Merge branch 'master' into release
diff --git a/.classpath b/.classpath
deleted file mode 100644
index d325996..0000000
--- a/.classpath
+++ /dev/null
@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<classpath>
- <classpathentry kind="src" path="src"/>
- <classpathentry excluding="joshua/corpus/|joshua/decoder/DecoderThreadTest.java|joshua/decoder/ff/|joshua/decoder/ff/lm/|joshua/lattice/|joshua/util/io/|joshua/zmert/" including="joshua/" kind="src" path="test"/>
- <classpathentry kind="lib" path="lib/asm-3.1.jar"/>
- <classpathentry kind="lib" path="lib/collections-generic-4.01.jar"/>
- <classpathentry kind="lib" path="lib/jaxen-1.1.1.jar"/>
- <classpathentry kind="lib" path="lib/jung-algorithms-2.0.jar"/>
- <classpathentry kind="lib" path="lib/jung-api-2.0.jar"/>
- <classpathentry kind="lib" path="lib/jung-graph-impl-2.0.jar"/>
- <classpathentry kind="lib" path="lib/jung-visualization-2.0.jar"/>
- <classpathentry kind="lib" path="lib/pmd-4.2.5.jar"/>
- <classpathentry kind="lib" path="lib/berkeleyaligner.jar"/>
- <classpathentry kind="lib" path="lib/berkeleylm.jar"/>
- <classpathentry kind="lib" path="lib/testng-6.7.jar"/>
- <classpathentry kind="lib" path="lib/mockito-all-1.9.5.jar"/>
- <classpathentry kind="lib" path="lib/junit-4.10.jar"/>
- <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
- <classpathentry kind="lib" path="lib/commons-cli-1.2.jar"/>
- <classpathentry kind="output" path="class"/>
-</classpath>
diff --git a/.gitignore b/.gitignore
index ec45214..869300e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+VERSION
+
class/
src/joshua/decoder/ff/lm/srilm/SWIGTYPE_p_Ngram.java
src/joshua/decoder/ff/lm/srilm/SWIGTYPE_p_Vocab.java
diff --git a/.project b/.project
index 6d8c332..7b6ed8e 100644
--- a/.project
+++ b/.project
@@ -1,33 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
- <name>joshua</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.jdt.core.javabuilder</name>
- <arguments>
- </arguments>
- </buildCommand>
- <buildCommand>
- <name>org.eclipse.ui.externaltools.ExternalToolBuilder</name>
- <triggers>full,incremental,</triggers>
- <arguments>
- <dictionary>
- <key>LaunchConfigHandle</key>
- <value><project>/.externalToolBuilders/Create jar.launch</value>
- </dictionary>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.jdt.core.javanature</nature>
- </natures>
- <variableList>
- <variable>
- <name>JOSHUA</name>
- <value>$%7BPROJECT_LOC%7D</value>
- </variable>
- </variableList>
-</projectDescription>
+ <name>joshua</name>
+ <comment>
+ </comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
\ No newline at end of file
diff --git a/CHANGELOG b/CHANGELOG
index 0720620..c90dc35 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,22 @@
+6.0.5 (October 23, 2015)
+========================
+
+- KenLM updated, includes lastly improved cmake-based build
+- Fix for grammar packing that previously limited the size of grammars (esp. Hiero)
+- Support for decoding with multiple packed grammars (if packed together)
+- Feature functions now report dense features, for more efficient handling
+- Added AdaGrad and internal MIRA
+- Pipeline:
+ - Alignment of different chunks now parallelized
+ - Computes meteor scores if $METEOR is defined
+ - Updated to use Hadoop 2.5.2
+ - Reworked how multiple tuning runs (for optimizer instability) function
+- Maven compatibility
+- Developers
+ - Ant eclipse target
+ - Added code formatting spec for Eclipse import
+- Many bugfixes and other improvements
+
6.0.4 (June 15, 2015)
=====================
diff --git a/LICENSE b/LICENSE
index 538a1ce..dc5b296 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2014, Johns Hopkins University
+Copyright (c) 2015, Johns Hopkins University
All rights reserved.
BSD 2-clause license
diff --git a/bin/meteor b/bin/meteor
new file mode 100755
index 0000000..5f98a26
--- /dev/null
+++ b/bin/meteor
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+if [[ -z $3 ]]; then
+ echo "Usage: meteor output reference lang"
+ exit 1
+fi
+
+output=$1
+reference=$2
+lang=$3
+
+java -Xmx2G -jar $METEOR/meteor-*.jar $output $reference -l $lang
diff --git a/build.xml b/build.xml
index 7723abf..8c6fabe 100644
--- a/build.xml
+++ b/build.xml
@@ -7,6 +7,7 @@
<property environment="env" />
<property name="JOSHUA" value="${basedir}" />
+ <property name="KENLM" value="${basedir}/src/kenlm" />
<property name="src" value="${JOSHUA}/src" />
<property name="test" value="${JOSHUA}/test" />
<property name="build" value="${JOSHUA}/class" />
@@ -14,23 +15,17 @@
<property name="thraxlib" value="${JOSHUA}/thrax/bin" />
<property name="doc" value="${JOSHUA}/doc" />
- <!-- TODO: should these be made into 'path's instead of 'property's? -->
- <property name="testng" value="${lib}/testng-6.7.jar" />
- <!-- <property name="bdb" value="${lib}/je-3.2.23.jar"/> -->
-
- <property name="cli" value="commons-cli-1.2.jar"/>
-
<path id="compile.all.classpath">
<fileset dir="${lib}">
- <include name="${cli}" />
- <!-- Jung uses a BSD-like license, see ./lib/LICENSES/LICENSE-jung.txt -->
<include name="jung-api-2.0.jar" />
<include name="jung-graph-impl-2.0.jar" />
<include name="jung-algorithms-2.0.jar" />
<include name="jung-visualization-2.0.jar" />
- <include name="collections-generic-4.01.jar" />
<include name="berkeleylm.jar" />
<include name="junit-4.10.jar" />
+ <include name="commons-cli-1.2.jar" />
+ <include name="collections-generic-4.01.jar" />
+ <include name="args4j-2.0.29.jar" />
</fileset>
<fileset dir="${thraxlib}">
<include name="thrax.jar" />
@@ -57,33 +52,13 @@
<!-- </if> -->
</target>
- <!-- Set git version -->
- <target name="git-version" description="--> Store latest git commit ID">
- <exec executable="git">
- <arg value="rev-parse"/>
- <arg value="--short"/>
- <arg value="HEAD"/>
- </exec>
- </target>
-
- <target name="set-joshua-version" unless="env.JOSHUA_VERSION" description="--> Prompt user to set Joshua version environment variable">
- <fail message="Please set the $JOSHUA_VERSION environment variable." />
- </target>
-
<target name="set-java-home" unless="env.JAVA_HOME" description="--> Prompt user to set JAVA_HOME environment variable">
<fail message="Please set the $JAVA_HOME environment variable." />
<!-- TODO: add suggestion to use /System/Library/Frameworks/JavaVM.framework/Home/ iff on OSX -->
</target>
- <target name="kenlm" depends="check-joshua-home" description="--> 'Make' the kenlm sofrtware in ${src}/joshua/decoder/ff/lm/kenlm/">
- <exec executable="make">
- <arg value="-j" />
- <arg value="4" />
- <arg value="-C" />
- <arg value="${src}/joshua/decoder/ff/lm/kenlm/" />
- <arg value="install" />
- <arg value="BOOST_ROOT=${env.BOOST_ROOT}" />
- </exec>
+ <target name="kenlm" depends="check-joshua-home" description="--> Build KenLM">
+ <exec executable="./build_kenlm.sh" dir="${JOSHUA}/jni" />
</target>
<target name="giza" depends="check-joshua-home" description="--> 'Make' the giza software in scripts/training/giza-pp/">
@@ -91,22 +66,15 @@
<arg value="-j" />
<arg value="4" />
<arg value="-C" />
- <arg value="scripts/training/giza-pp/" />
+ <arg value="${src}/giza-pp/" />
<arg value="all" />
<arg value="install" />
</exec>
<exec executable="make">
<arg value="-C" />
- <arg value="scripts/training/symal/" />
+ <arg value="${src}/symal" />
<arg value="all" />
</exec>
- <!-- <exec executable="./configure" dir="scripts/training/MGIZA" /> -->
- <!-- <exec executable="make"> -->
- <!-- <arg value="-C scripts/training/MGIZA/" /> -->
- <!-- </exec> -->
- <!-- <exec executable="cp"> -->
- <!-- <arg value="-C scripts/training/MGIZA/" /> -->
- <!-- </exec> -->
</target>
<target name="parallelize" depends="check-joshua-home" description="--> 'Make' the parallelize software in scripts/training/parallelize/">
@@ -131,10 +99,6 @@
<target name="thrax" depends="check-joshua-home" description="--> Build Thrax">
<subant buildpath="thrax">
- <property name="env.HADOOP" value="${lib}" />
- <property name="env.HADOOP_VERSION" value="0.20.203.0" />
- <property name="env.AWS_SDK" value="${JOSHUA}" />
- <property name="env.AWS_VERSION" value="1.1.3" />
</subant>
</target>
@@ -142,7 +106,7 @@
pipeline.pl and test/hadoop/ -->
<target name="download-hadoop" depends="check-joshua-home" description="--> Download the Hadoop software">
<get
- src="http://archive.apache.org/dist/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz"
+ src="http://archive.apache.org/dist/hadoop/core/hadoop-2.5.2/hadoop-2.5.2.tar.gz"
dest="${JOSHUA}/lib"
usetimestamp="true"
skipexisting="true" />
@@ -170,9 +134,9 @@
</target>
<!-- Compile the Java code. -->
- <target name="java" depends="check-joshua-home" description="--> Compile the Java code">
+ <target name="java" depends="check-joshua-home,version" description="--> Compile the Java code">
<mkdir dir="${build}" />
- <javac compiler="javac1.7" srcdir="${src}" destdir="${build}" classpathref="compile.all.classpath" debug="on" encoding="utf8" sourcepath="" includeantruntime="false">
+ <javac srcdir="${src}" destdir="${build}" classpathref="compile.all.classpath" debug="on" encoding="utf8" sourcepath="" includeantruntime="false">
<!-- We nullify the sourcepath in order to disable Ant's usual resolution mechanism. This makes it an error for our basic code to call into code that has external dependencies, rather than auto-including those files and then having a classpath error. -->
<include name="**/*.java" />
@@ -184,7 +148,7 @@
<!-- Create a JAR file -->
<target name="jar" depends="java,check-joshua-home" description="--> Create a JAR file of compiles classes">
- <jar destfile="${build}/joshua.jar" index="true">
+ <jar destfile="${lib}/joshua.jar" index="true">
<fileset dir="${build}">
<include name="**/*.class" />
</fileset>
@@ -211,15 +175,19 @@
</target>
<!-- Create a versioned release -->
- <target name="release" depends="set-joshua-version, devel-clean, init, thrax" description="--> Create a versioned release">
- <exec executable="./scripts/support/make-release.sh">
- <arg value="${env.JOSHUA_VERSION}" />
+ <target name="release" depends="devel-clean, init, thrax, version" description="--> Create a versioned release">
+ <exec executable="./scripts/support/make-release.sh" />
+ </target>
+
+ <!-- Set git version -->
+ <target name="version" description="Build VERSION file">
+ <exec executable="./scripts/support/write-version.sh">
</exec>
</target>
<!-- Create a JAR file of the source code -->
<target name="source-jar" description="--> Create a JAR file of the source code">
- <jar destfile="${build}/joshua-src.jar">
+ <jar destfile="${lib}/joshua-src.jar">
<fileset dir="${build}">
<include name="**/*.java" />
</fileset>
@@ -229,21 +197,27 @@
<!-- ~~~~~ Cleaning tasks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<!-- Delete the compiled files -->
- <target name="clean" depends="clean-thrax,clean-java" description="--> Clean (remove) all build directories">
- <exec executable="make">
- <arg value="-C" />
- <arg value="scripts/training/giza-pp" />
- <arg value="clean" />
- </exec>
- <exec executable="/bin/bash" dir="src/joshua/decoder/ff/lm/kenlm">
- <arg value="clean" />
- </exec>
+ <target name="clean" depends="clean-thrax,clean-java,clean-kenlm,clean-giza" description="--> Clean (remove) all build directories">
<delete file="${lib}/libken.so" />
<delete file="tree_visualizer/tree_visualizer.jar" />
<delete dir="doc/html" />
<delete dir="doc/latex" />
</target>
+ <!-- Clean up GIZA and symal code -->
+ <target name="clean-giza" description="--> Clean GIZA and symal">
+ <exec executable="make">
+ <arg value="-C" />
+ <arg value="${src}/giza-pp" />
+ <arg value="clean" />
+ </exec>
+ <exec executable="make">
+ <arg value="-C" />
+ <arg value="${src}/symal" />
+ <arg value="clean" />
+ </exec>
+ </target>
+
<!-- Delete just the java files -->
<target name="clean-java" description="--> Delete just the java files">
<delete verbose="true" quiet="true">
@@ -254,44 +228,17 @@
</target>
<target name="clean-thrax" description="--> Delete just the thrax files">
- <delete verbose="true" quiet="true">
+ <delete verbose="true" quiet="true">
<fileset dir="${thraxlib}">
<include name="**/*.class"/>
</fileset>
</delete>
</target>
- <!-- EXPERIMENTAL: Delete *all* generated files -->
- <target name="distclean" depends="clean" description="--> EXPERIMENTAL: Delete *all* generated files">
- <!-- BUG: this doesn't delete empty folders (neither ${build} itself, nor the class dirs (the latter makes sense since we don't traverse them)) -->
- <delete verbose="true" quiet="true" includeEmptyDirs="true">
- <fileset dir="${build}">
- <include name="joshua.jar" />
- <include name="joshua-ui.jar" />
- </fileset>
- </delete>
-
- <!-- HACK: these two work perfectly, but maybe problematic if people reset ${test} or ${doc} -->
- <delete verbose="true" quiet="true" includeEmptyDirs="true">
- <fileset dir="${test}-output" />
- </delete>
-
- <delete verbose="true" quiet="true" includeEmptyDirs="true">
- <fileset dir="${doc}" includes="**/*.tmp" />
- <fileset dir="${doc}/html" />
- <fileset dir="${doc}/latex" />
- </delete>
-
- <delete verbose="true" quiet="true" file="joshua-fb.xml" />
-
- <delete verbose="true" quiet="true" includeEmptyDirs="true">
- <fileset dir="tree_visualizer">
- <include name="tree_visualizer.jar" />
- </fileset>
- </delete>
+ <target name="clean-kenlm" description="--> Cleaning up KenLM">
+ <delete dir="${KENLM}/build" />
</target>
-
<!-- Delete *all* generated files,
including files and directories not tracked by git -->
<target name="devel-clean" description="--> Delete *all* generated files, including files and directories not tracked by git">
@@ -313,17 +260,6 @@
</exec>
</target>
- <!-- ~~~~~ Javadoc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
-
- <!-- Compile the Javadocs into web pages -->
- <target name="javadoc" description="--> Compile the Javadocs into web pages">
- <mkdir dir="${doc}" />
- <javadoc packagenames="joshua.*" classpath="${cli}" sourcepath="${src}" destdir="${doc}" author="true" version="true" charset="utf-8" overview="src/overview.html">
- <link href="http://docs.oracle.com/javase/7/docs/api/" />
- <link href="http://commons.apache.org/cli/api-release" />
- </javadoc>
- </target>
-
<!-- ~~~~~ Doxygen ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<taskdef uri="antlib:org.doxygen.tools" resource="org/doxygen/tools/antlib.xml" classpath="${lib}/ant-doxygen-1.6.1.jar" />
@@ -341,28 +277,7 @@
</exec>
</target>
- <!-- ~~~~~ TestNG ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
-
- <!-- Define the unit testing task -->
- <taskdef resource="testngtasks" classpath="${testng}" />
-
-
- <!-- Compile the unit test code -->
- <!-- FIXME: why is this broken out from the 'test' target? -->
- <target name="compile-tests" depends="java" description="--> Compile the unit test code">
- <javac compiler="javac1.5" srcdir="${test}/joshua/ui" destdir="${build}" classpath="${testng}:${build}" debug="on" encoding="utf8" />
- </target>
-
-
- <!-- Run the unit tests -->
- <target name="testng" depends="all,compile-tests" description="--> Run the unit tests">
- <testng classpath="${build}" sourcedir="${test}">
- <jvmarg value="-Dfile.encoding=UTF8" />
- <jvmarg value="-Xms256m" />
- <jvmarg value="-Xmx256m" />
- <xmlfileset dir="${test}" includes="testng.xml" />
- </testng>
- </target>
+ <!-- ~~~~~ Ivy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<property name="ivy.install.version" value="2.2.0" />
<condition property="ivy.home" value="${env.IVY_HOME}">
@@ -406,7 +321,64 @@
<property name="ivy.default.ivy.user.dir" value="${JOSHUA}/lib" />
<ivy:configure file="${JOSHUA}/lib/ivysettings.xml" />
<ivy:resolve file="${JOSHUA}/lib/ivy.xml" />
- <ivy:retrieve type="jar" />
+ <ivy:retrieve type="jar,bundle" />
+ </target>
+
+ <!-- target: clean-eclipse ============================================ -->
+ <target name="clean-eclipse" depends="clean"
+ description="--> cleans the eclipse project"/>
+
+ <!-- ================================================================== -->
+ <!-- Eclipse targets -->
+ <!-- ================================================================== -->
+
+ <!-- classpath for generating eclipse project -->
+ <path id="eclipse.classpath">
+ <fileset dir="${lib}">
+ <include name="**.jar" />
+ <exclude name="ant-eclipse-1.0-jvm1.2.jar" />
+ </fileset>
+ <fileset dir="${lib}">
+ <include name="**/*.jar" />
+ </fileset>
+ </path>
+
+ <!-- target: ant-eclipse-download =================================== -->
+ <target name="ant-eclipse-download" description="--> Downloads the ant-eclipse binary.">
+ <get src="http://downloads.sourceforge.net/project/ant-eclipse/ant-eclipse/1.0/ant-eclipse-1.0.bin.tar.bz2"
+ dest="${src}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" />
+
+ <untar src="${src}/ant-eclipse-1.0.bin.tar.bz2"
+ dest="${src}" compression="bzip2">
+ <patternset>
+ <include name="lib/ant-eclipse-1.0-jvm1.2.jar"/>
+ </patternset>
+ </untar>
+
+ <delete file="${src}/ant-eclipse-1.0.bin.tar.bz2" />
+ </target>
+
+ <!-- target: eclipse ================================================ -->
+ <target name="eclipse"
+ depends="init,jar,ant-eclipse-download"
+ description="--> Create eclipse project files">
+
+ <pathconvert property="eclipse.project">
+ <path path="${basedir}"/>
+ <regexpmapper from="^.*/([^/]+)$$" to="\1" handledirsep="yes"/>
+ </pathconvert>
+
+ <taskdef name="eclipse"
+ classname="prantl.ant.eclipse.EclipseTask"
+ classpath="${src}/lib/ant-eclipse-1.0-jvm1.2.jar" />
+ <eclipse updatealways="true">
+ <project name="${eclipse.project}" />
+ <classpath>
+ <library pathref="eclipse.classpath" exported="false" />
+ <source path="${basedir}/src" />
+ <output path="${build}" />
+ </classpath>
+ </eclipse>
</target>
</project>
diff --git a/eclipse-codeformat.xml b/eclipse-codeformat.xml
new file mode 100644
index 0000000..0a4e7c4
--- /dev/null
+++ b/eclipse-codeformat.xml
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="Joshua" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="100"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
diff --git a/jni/build_kenlm.sh b/jni/build_kenlm.sh
new file mode 100755
index 0000000..107802b
--- /dev/null
+++ b/jni/build_kenlm.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -u
+
+export KENLM_MAX_ORDER=10
+export CXXFLAGS+=" -O3 -fPIC -DHAVE_ZLIB"
+export LDFLAGS+=" -lz"
+export CXX=${CXX:-g++}
+
+cd $JOSHUA/src/kenlm
+[[ ! -d build ]] && mkdir build
+cd build
+cmake .. -DKENLM_MAX_ORDER=$KENLM_MAX_ORDER -DCMAKE_BUILD_TYPE=Release
+make
+cp bin/{query,lmplz,build_binary} $JOSHUA/bin
+
+if [ "$(uname)" == Darwin ]; then
+ SUFFIX=dylib
+ RT=""
+else
+ RT=-lrt
+ SUFFIX=so
+fi
+
+$CXX -std=gnu++11 -I. -DKENLM_MAX_ORDER=$KENLM_MAX_ORDER -I$JAVA_HOME/include -I$JOSHUA/src/kenlm -I$JAVA_HOME/include/linux -I$JAVA_HOME/include/darwin $JOSHUA/jni/kenlm_wrap.cc lm/CMakeFiles/kenlm.dir/*.o util/CMakeFiles/kenlm_util.dir/*.o util/CMakeFiles/kenlm_util.dir/double-conversion/*.o -shared -o $JOSHUA/lib/libken.$SUFFIX $CXXFLAGS $LDFLAGS $RT
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
new file mode 100644
index 0000000..6d66f37
--- /dev/null
+++ b/jni/kenlm_wrap.cc
@@ -0,0 +1,400 @@
+#include "lm/enumerate_vocab.hh"
+#include "lm/model.hh"
+#include "lm/left.hh"
+#include "lm/state.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+
+#include <iostream>
+
+#include <string.h>
+#include <stdlib.h>
+#include <jni.h>
+#include <pthread.h>
+
+// Grr. Everybody's compiler is slightly different and I'm trying to not depend on boost.
+#include <unordered_map>
+
+// Verify that jint and lm::ngram::WordIndex are the same size. If this breaks
+// for you, there's a need to revise probString.
+namespace {
+
+template<bool> struct StaticCheck {
+};
+
+template<> struct StaticCheck<true> {
+ typedef bool StaticAssertionPassed;
+};
+
+typedef StaticCheck<sizeof(jint) == sizeof(lm::WordIndex)>::StaticAssertionPassed FloatSize;
+
+typedef std::unordered_map<uint64_t, lm::ngram::ChartState*> PoolHash;
+
+/**
+ * A Chart bundles together a hash_map that maps ChartState signatures to a single object
+ * instantiated using a pool. This allows duplicate states to avoid allocating separate
+ * state objects at multiple places throughout a sentence, and also allows state to be
+ * shared across KenLMs for the same sentence.
+ */
+struct Chart {
+ // A cache for allocated chart objects
+ PoolHash* poolHash;
+ // Pool used to allocate new ones
+ util::Pool* pool;
+
+ Chart() {
+ poolHash = new PoolHash();
+ pool = new util::Pool();
+ }
+
+ ~Chart() {
+ delete poolHash;
+ pool->FreeAll();
+ delete pool;
+ }
+
+ lm::ngram::ChartState* put(const lm::ngram::ChartState& state) {
+ uint64_t hashValue = lm::ngram::hash_value(state);
+
+ if (poolHash->find(hashValue) == poolHash->end()) {
+ lm::ngram::ChartState* pointer = (lm::ngram::ChartState *)pool->Allocate(sizeof(lm::ngram::ChartState));
+ *pointer = state;
+ (*poolHash)[hashValue] = pointer;
+ }
+
+ return (*poolHash)[hashValue];
+ }
+};
+
+// Vocab ids above what the vocabulary knows about are unknown and should
+// be mapped to that.
+void MapArray(const std::vector<lm::WordIndex>& map, jint *begin, jint *end) {
+ for (jint *i = begin; i < end; ++i) {
+ *i = map[*i];
+ }
+}
+
+char *PieceCopy(const StringPiece &str) {
+ char *ret = (char*) malloc(str.size() + 1);
+ memcpy(ret, str.data(), str.size());
+ ret[str.size()] = 0;
+ return ret;
+}
+
+// Rather than handle several different instantiations over JNI, we'll just
+// do virtual calls C++-side.
+class VirtualBase {
+public:
+ virtual ~VirtualBase() {
+ }
+
+ virtual float Prob(jint *begin, jint *end) const = 0;
+
+ virtual float ProbRule(jlong *begin, jlong *end, lm::ngram::ChartState& state) const = 0;
+
+ virtual float ProbString(jint * const begin, jint * const end,
+ jint start) const = 0;
+
+ virtual float EstimateRule(jlong *begin, jlong *end) const = 0;
+
+ virtual uint8_t Order() const = 0;
+
+ virtual bool RegisterWord(const StringPiece& word, const int joshua_id) = 0;
+
+ void RememberReturnMethod(jclass chart_pair, jmethodID chart_pair_init) {
+ chart_pair_ = chart_pair;
+ chart_pair_init_ = chart_pair_init;
+ }
+
+ jclass ChartPair() const { return chart_pair_; }
+ jmethodID ChartPairInit() const { return chart_pair_init_; }
+
+protected:
+ VirtualBase() {
+ }
+
+private:
+ // Hack: these are remembered so we can avoid looking them up every time.
+ jclass chart_pair_;
+ jmethodID chart_pair_init_;
+};
+
+template<class Model> class VirtualImpl: public VirtualBase {
+public:
+ VirtualImpl(const char *name) :
+ m_(name) {
+ // Insert unknown id mapping.
+ map_.push_back(0);
+ }
+
+ ~VirtualImpl() {
+ }
+
+ float Prob(jint * const begin, jint * const end) const {
+ MapArray(map_, begin, end);
+
+ std::reverse(begin, end - 1);
+ lm::ngram::State ignored;
+ return m_.FullScoreForgotState(
+ reinterpret_cast<const lm::WordIndex*>(begin),
+ reinterpret_cast<const lm::WordIndex*>(end - 1), *(end - 1),
+ ignored).prob;
+ }
+
+ float ProbRule(jlong * const begin, jlong * const end, lm::ngram::ChartState& state) const {
+ if (begin == end) return 0.0;
+ lm::ngram::RuleScore<Model> ruleScore(m_, state);
+
+ if (*begin < 0) {
+ ruleScore.BeginNonTerminal(*reinterpret_cast<const lm::ngram::ChartState*>(-*begin));
+ } else {
+ const lm::WordIndex word = map_[*begin];
+ if (word == m_.GetVocabulary().BeginSentence()) {
+ ruleScore.BeginSentence();
+ } else {
+ ruleScore.Terminal(word);
+ }
+ }
+ for (jlong* i = begin + 1; i != end; i++) {
+ long word = *i;
+ if (word < 0)
+ ruleScore.NonTerminal(*reinterpret_cast<const lm::ngram::ChartState*>(-word));
+ else
+ ruleScore.Terminal(map_[word]);
+ }
+ return ruleScore.Finish();
+ }
+
+ float EstimateRule(jlong * const begin, jlong * const end) const {
+ if (begin == end) return 0.0;
+ lm::ngram::ChartState nullState;
+ lm::ngram::RuleScore<Model> ruleScore(m_, nullState);
+
+ if (*begin < 0) {
+ ruleScore.Reset();
+ } else {
+ const lm::WordIndex word = map_[*begin];
+ if (word == m_.GetVocabulary().BeginSentence()) {
+ ruleScore.BeginSentence();
+ } else {
+ ruleScore.Terminal(word);
+ }
+ }
+ for (jlong* i = begin + 1; i != end; i++) {
+ long word = *i;
+ if (word < 0)
+ ruleScore.Reset();
+ else
+ ruleScore.Terminal(map_[word]);
+ }
+ return ruleScore.Finish();
+ }
+
+ float ProbString(jint * const begin, jint * const end, jint start) const {
+ MapArray(map_, begin, end);
+
+ float prob;
+ lm::ngram::State state;
+ if (start == 0) {
+ prob = 0;
+ state = m_.NullContextState();
+ } else {
+ std::reverse(begin, begin + start);
+ prob = m_.FullScoreForgotState(
+ reinterpret_cast<const lm::WordIndex*>(begin),
+ reinterpret_cast<const lm::WordIndex*>(begin + start),
+ begin[start], state).prob;
+ ++start;
+ }
+ lm::ngram::State state2;
+ for (const jint *i = begin + start;;) {
+ if (i >= end)
+ break;
+ float got = m_.Score(state, *i, state2);
+ i++;
+ prob += got;
+ if (i >= end)
+ break;
+ got = m_.Score(state2, *i, state);
+ i++;
+ prob += got;
+ }
+ return prob;
+ }
+
+ uint8_t Order() const {
+ return m_.Order();
+ }
+
+ bool RegisterWord(const StringPiece& word, const int joshua_id) {
+ if (map_.size() <= joshua_id) {
+ map_.resize(joshua_id + 1, 0);
+ }
+ bool already_present = false;
+ if (map_[joshua_id] != 0)
+ already_present = true;
+ map_[joshua_id] = m_.GetVocabulary().Index(word);
+ return already_present;
+ }
+
+private:
+ Model m_;
+ std::vector<lm::WordIndex> map_;
+};
+
+VirtualBase *ConstructModel(const char *file_name) {
+ using namespace lm::ngram;
+ ModelType model_type;
+ if (!RecognizeBinary(file_name, model_type))
+ model_type = HASH_PROBING;
+ switch (model_type) {
+ case PROBING:
+ return new VirtualImpl<ProbingModel>(file_name);
+ case REST_PROBING:
+ return new VirtualImpl<RestProbingModel>(file_name);
+ case TRIE:
+ return new VirtualImpl<TrieModel>(file_name);
+ case ARRAY_TRIE:
+ return new VirtualImpl<ArrayTrieModel>(file_name);
+ case QUANT_TRIE:
+ return new VirtualImpl<QuantTrieModel>(file_name);
+ case QUANT_ARRAY_TRIE:
+ return new VirtualImpl<QuantArrayTrieModel>(file_name);
+ default:
+ UTIL_THROW(
+ lm::FormatLoadException,
+ "Unrecognized file format " << (unsigned) model_type
+ << " in file " << file_name);
+ }
+}
+
+} // namespace
+
+extern "C" {
+
+JNIEXPORT jlong JNICALL Java_joshua_decoder_ff_lm_KenLM_construct(
+ JNIEnv *env, jclass, jstring file_name) {
+ const char *str = env->GetStringUTFChars(file_name, 0);
+ if (!str)
+ return 0;
+
+ VirtualBase *ret;
+ try {
+ ret = ConstructModel(str);
+
+ // Get a class reference for the type pair that char
+ jclass local_chart_pair = env->FindClass("joshua/decoder/ff/lm/KenLM$StateProbPair");
+ UTIL_THROW_IF(!local_chart_pair, util::Exception, "Failed to find joshua/decoder/ff/lm/KenLM$StateProbPair");
+ jclass chart_pair = (jclass)env->NewGlobalRef(local_chart_pair);
+ env->DeleteLocalRef(local_chart_pair);
+
+ // Get the Method ID of the constructor which takes an int
+ jmethodID chart_pair_init = env->GetMethodID(chart_pair, "<init>", "(JF)V");
+ UTIL_THROW_IF(!chart_pair_init, util::Exception, "Failed to find init method");
+
+ ret->RememberReturnMethod(chart_pair, chart_pair_init);
+ } catch (std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
+ }
+ env->ReleaseStringUTFChars(file_name, str);
+ return reinterpret_cast<jlong>(ret);
+}
+
+JNIEXPORT void JNICALL Java_joshua_decoder_ff_lm_KenLM_destroy(
+ JNIEnv *env, jclass, jlong pointer) {
+ VirtualBase *base = reinterpret_cast<VirtualBase*>(pointer);
+ env->DeleteGlobalRef(base->ChartPair());
+ delete base;
+}
+
+JNIEXPORT long JNICALL Java_joshua_decoder_ff_lm_KenLM_createPool(
+ JNIEnv *env, jclass) {
+ return reinterpret_cast<long>(new Chart());
+}
+
+JNIEXPORT void JNICALL Java_joshua_decoder_ff_lm_KenLM_destroyPool(
+ JNIEnv *env, jclass, jlong pointer) {
+ Chart* chart = reinterpret_cast<Chart*>(pointer);
+ delete chart;
+}
+
+JNIEXPORT jint JNICALL Java_joshua_decoder_ff_lm_KenLM_order(
+ JNIEnv *env, jclass, jlong pointer) {
+ return reinterpret_cast<VirtualBase*>(pointer)->Order();
+}
+
+JNIEXPORT jboolean JNICALL Java_joshua_decoder_ff_lm_KenLM_registerWord(
+ JNIEnv *env, jclass, jlong pointer, jstring word, jint id) {
+ const char *str = env->GetStringUTFChars(word, 0);
+ if (!str)
+ return false;
+ jint ret;
+ try {
+ ret = reinterpret_cast<VirtualBase*>(pointer)->RegisterWord(str, id);
+ } catch (std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
+ }
+ env->ReleaseStringUTFChars(word, str);
+ return ret;
+}
+
+JNIEXPORT jfloat JNICALL Java_joshua_decoder_ff_lm_KenLM_prob(
+ JNIEnv *env, jclass, jlong pointer, jintArray arr) {
+ jint length = env->GetArrayLength(arr);
+ if (length <= 0)
+ return 0.0;
+ // GCC only.
+ jint values[length];
+ env->GetIntArrayRegion(arr, 0, length, values);
+
+ return reinterpret_cast<const VirtualBase*>(pointer)->Prob(values,
+ values + length);
+}
+
+JNIEXPORT jfloat JNICALL Java_joshua_decoder_ff_lm_KenLM_probString(
+ JNIEnv *env, jclass, jlong pointer, jintArray arr, jint start) {
+ jint length = env->GetArrayLength(arr);
+ if (length <= start)
+ return 0.0;
+ // GCC only.
+ jint values[length];
+ env->GetIntArrayRegion(arr, 0, length, values);
+
+ return reinterpret_cast<const VirtualBase*>(pointer)->ProbString(values,
+ values + length, start);
+}
+
+JNIEXPORT jobject JNICALL Java_joshua_decoder_ff_lm_KenLM_probRule(
+ JNIEnv *env, jclass, jlong pointer, jlong chartPtr, jlongArray arr) {
+ jint length = env->GetArrayLength(arr);
+ // GCC only.
+ jlong values[length];
+ env->GetLongArrayRegion(arr, 0, length, values);
+
+ // Compute the probability
+ lm::ngram::ChartState outState;
+ const VirtualBase *base = reinterpret_cast<const VirtualBase*>(pointer);
+ float prob = base->ProbRule(values, values + length, outState);
+
+ Chart* chart = reinterpret_cast<Chart*>(chartPtr);
+ lm::ngram::ChartState* outStatePtr = chart->put(outState);
+
+ // Call back constructor to allocate a new instance, with an int argument
+ return env->NewObject(base->ChartPair(), base->ChartPairInit(), (long)outStatePtr, prob);
+}
+
+JNIEXPORT jfloat JNICALL Java_joshua_decoder_ff_lm_KenLM_estimateRule(
+ JNIEnv *env, jclass, jlong pointer, jlongArray arr) {
+ jint length = env->GetArrayLength(arr);
+ // GCC only.
+ jlong values[length];
+ env->GetLongArrayRegion(arr, 0, length, values);
+
+ // Compute the probability
+ return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(values,
+ values + length);
+}
+
+} // extern
diff --git a/lib/collections-generic-4.01.jar b/lib/collections-generic-4.01.jar
deleted file mode 100644
index 92d009c..0000000
--- a/lib/collections-generic-4.01.jar
+++ /dev/null
Binary files differ
diff --git a/lib/ivy.xml b/lib/ivy.xml
index ababada..bed2d84 100644
--- a/lib/ivy.xml
+++ b/lib/ivy.xml
@@ -2,19 +2,14 @@
<info organisation="joshua" module="joshua"/>
<dependencies>
<dependency org="net.sourceforge.ant-doxygen" name="ant-doxygen" rev="1.6.1" />
- <dependency org="asm" name="asm" rev="3.1"/>
- <dependency org="com.amazonaws" name="aws-java-sdk" rev="1.1.3"/>
- <dependency org="org.apache.commons" name="commons-cli" rev="1.2"/>
- <dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
- <dependency org="jaxen" name="jaxen" rev="1.1.1"/>
<dependency org="net.sf.jung" name="jung-algorithms" rev="2.0"/>
<dependency org="net.sf.jung" name="jung-api" rev="2.0"/>
<dependency org="net.sf.jung" name="jung-graph-impl" rev="2.0"/>
<dependency org="net.sf.jung" name="jung-visualization" rev="2.0"/>
- <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.203.0"/>
+ <dependency org="org.apache.commons" name="commons-cli" rev="1.2"/>
<dependency org="org.testng" name="testng" rev="6.7"/>
- <dependency org="org.mockito" name="mockito-all" rev="1.9.5"/>
- <dependency org="pmd" name="pmd" rev="4.2.5"/>
<dependency org="junit" name="junit" rev="4.10" />
+ <dependency org="net.sourceforge.collections" name="collections-generic" rev="4.01"/>
+ <dependency org="args4j" name="args4j" rev="2.0.29" />
</dependencies>
</ivy-module>
diff --git a/lib/ivysettings.xml b/lib/ivysettings.xml
index 6dadd3d..a6fd635 100644
--- a/lib/ivysettings.xml
+++ b/lib/ivysettings.xml
@@ -9,7 +9,7 @@
</filesystem>
<ibiblio name="central" m2compatible="true"/>
<ibiblio name="tools.gbif.org" m2compatible="true" root="http://tools.gbif.org/maven/repository/" />
- <packager name="roundup" buildRoot="${env.JOSHUA}/lib/packager/build" resourceCache="${env.JOSHUA}/lib/cache">
+ <packager name="roundup" buildRoot="${JOSHUA}/lib/packager/build" resourceCache="${JOSHUA}/lib/cache">
<ivy pattern="http://ivyroundup.googlecode.com/svn/trunk/repo/modules/[organisation]/[module]/[revision]/ivy.xml"/>
<artifact pattern="http://ivyroundup.googlecode.com/svn/trunk/repo/modules/[organisation]/[module]/[revision]/packager.xml"/>
</packager>
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..3b4aac1
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,126 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Apache Maven 2 POM generated by Apache Ivy
+ http://ant.apache.org/ivy/
+ Apache Ivy version: 2.2.0 20100923230623
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>joshua</groupId>
+ <artifactId>joshua</artifactId>
+ <packaging>jar</packaging>
+ <version>6.0.4</version>
+ <build>
+ <sourceDirectory>src</sourceDirectory>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.1</version>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <!-- <dependency> -->
+ <!-- <groupId>net.sourceforge.ant-doxygen</groupId> -->
+ <!-- <artifactId>ant-doxygen</artifactId> -->
+ <!-- <version>1.6.1</version> -->
+ <!-- <optional>true</optional> -->
+ <!-- </dependency> -->
+ <dependency>
+ <groupId>edu.berkeley.nlp</groupId>
+ <artifactId>berkeleylm</artifactId>
+ <version>1.1.2</version>
+ </dependency>
+ <dependency>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ <version>3.1</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk</artifactId>
+ <version>1.1.3</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <!-- <groupId>org.apache.commons</groupId> -->
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <!-- <optional>true</optional> -->
+ <version>1.2</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1.1</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>jaxen</groupId>
+ <artifactId>jaxen</artifactId>
+ <version>1.1.1</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.jung</groupId>
+ <artifactId>jung-algorithms</artifactId>
+ <version>2.0</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.jung</groupId>
+ <artifactId>jung-api</artifactId>
+ <version>2.0</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.jung</groupId>
+ <artifactId>jung-graph-impl</artifactId>
+ <version>2.0</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.jung</groupId>
+ <artifactId>jung-visualization</artifactId>
+ <version>2.0</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.203.0</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.testng</groupId>
+ <artifactId>testng</artifactId>
+ <version>6.7</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <version>1.9.5</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>pmd</groupId>
+ <artifactId>pmd</artifactId>
+ <version>4.2.5</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.10</version>
+ <optional>true</optional>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/scripts/copy-config.pl b/scripts/copy-config.pl
index 867b470..2bb44f0 100755
--- a/scripts/copy-config.pl
+++ b/scripts/copy-config.pl
@@ -26,7 +26,7 @@
# Step 1. process command-line arguments for key/value pairs. The keys are matched next to the
# config file and the configfile values replaced with those found on the command-line.
-my (%params,%weights,@features);
+my (%params,%weights,%features);
while (my $key = shift @ARGV) {
# make sure the parameter has a leading dash
if ($key !~ /^-/) {
@@ -43,7 +43,7 @@
# -feature-function lines are gathered, other keys can be present only once
if ($key eq "featurefunction") {
- push(@features, $value);
+ $features{$value} = $value;
} elsif ($key eq "weights") {
my @tokens = split(' ', $value);
for (my $i = 0; $i < @tokens; $i += 2) {
@@ -105,6 +105,13 @@
}
}
+ # If an exact feature function line is in the config file, delete
+ # it from the command-line arguments so it doesn't get printed
+ # later. All features not found in the config file are appended.
+ if ($norm_key eq "featurefunction" and exists $features{$value}) {
+ delete $features{$value};
+ }
+
# if the parameter was found on the command line, print out its replaced value
if (exists $params{$norm_key}) {
print "$key = " . $params{$norm_key} . "\n";
@@ -130,13 +137,14 @@
if (scalar(keys(%params))) {
print $/;
foreach my $key (keys %params) {
+ next if $key =~ /^tm/; # skip unused tm flags
print STDERR "* WARNING: no key '$key' found in config file (appending to end)\n";
print "$key = $params{$key}\n";
}
}
# print out the feature functions
-map { print "feature-function = $_\n" } @features;
+map { print "feature-function = $_\n" } (keys %features);
print $/;
# Print out the weights
diff --git a/scripts/features/addSparseFeatures.py b/scripts/features/addSparseFeatures.py
new file mode 100755
index 0000000..b711f67
--- /dev/null
+++ b/scripts/features/addSparseFeatures.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import sys
+import gzip
+import argparse
+
+parser = argparse.ArgumentParser("Adds sparse features to a Moses ttable")
+parser.add_argument("-p", "--ttable", dest="filteredPT", help="A phrase table, preferably a filtered one")
+parser.add_argument("-o", "--output", dest="featurizedPT", help="The location of the output ttable", default="tuning/filtered.1/phrase-table.ft.0-0.1.1.gz")
+parser.add_argument("-f", "--sparse_f", dest="sparseF", help="Source sparse features", default="model/sparse-features.1.en.top1000")
+parser.add_argument("-e", "--sparse_e", dest="sparseE", help="Target sparse features", default="model/sparse-features.1.es.top1000")
+opts = parser.parse_args()
+
+if opts.filteredPT is None:
+ parser.print_help()
+ sys.exit()
+
+filteredPT = opts.filteredPT
+featurizedPT = gzip.open(opts.featurizedPT, 'wb')
+sparseE = opts.sparseE
+sparseF = opts.sparseF
+
+featsE = []
+featsF = []
+
+# First read off the sparse features and store them
+with open(sparseE) as sE:
+ for line in sE:
+ line = line.strip()
+ featsE.append(line)
+with open(sparseF) as sF:
+ for line in sF:
+ line = line.strip()
+ featsF.append(line)
+
+wt = set()
+phraseWT = []
+
+pt = gzip.open(filteredPT, 'rb')
+for line in pt:
+ lineComp = line.split("|||")
+ assert len(lineComp) > 3
+ sPhrase = lineComp[0].strip().split()
+ tPhrase = lineComp[1].strip().split()
+ alignment = lineComp[3].strip().split()
+ # Cache phrase features for use later
+ localWT = set()
+ # Read aligment infomation
+ for item in alignment:
+ item = item.split("-")
+ # Add seen word translations to a set
+ # if they were seen in the lexical features
+ sWord = sPhrase[int(item[0])]
+ tWord = tPhrase[int(item[1])]
+ if sWord in featsF and tWord in featsE:
+ wt.add((sWord, tWord))
+ localWT.add((sWord, tWord))
+
+ phraseWT.append(localWT)
+
+pt.seek(0)
+# Convert to a list
+wt = list(wt)
+for i, line in enumerate(pt):
+ lineComp = line.split("|||")
+ assert len(lineComp) > 3
+ sPhrase = lineComp[0].strip().split()
+ tPhrase = lineComp[1].strip().split()
+ # Lexical sparse features
+ # SD = Source word deletion, TI = target word deletion
+ # WT = word translation
+ sd_features = ["SD_"+token+"=1" for token in sPhrase if token in featsF]
+ ti_features = ["TI_"+token+"=1" for token in tPhrase if token in featsE]
+ wt_features = []
+ for feat in phraseWT[i]:
+ wt_features.append("WT_" + feat[0] + "~" + feat[1] + "=1")
+
+ all_feats = sd_features + ti_features + wt_features
+ # wt_features = ["1" if feat in phraseWT[i] else "0" for feat in wt]
+ lineComp[2] += " ".join(all_feats) + " "
+ featurizedPT.write("|||".join(lineComp))
+ sys.stdout.write("\r%f%%" % (float(i * 100)/len(phraseWT)))
+ sys.stdout.flush()
+
+featurizedPT.close()
diff --git a/scripts/support/grammar-packer.pl b/scripts/support/grammar-packer.pl
index 66b4d0f..533bcae 100755
--- a/scripts/support/grammar-packer.pl
+++ b/scripts/support/grammar-packer.pl
@@ -20,60 +20,80 @@
use File::Basename qw/basename/;
my %opts = (
+ g => '', # comma-separated list of grammars to pack
+ o => '', # comma-separated list of grammar output directories
m => '8g', # amount of memory to give the packer
T => '/tmp', # location of temporary space
v => 0, # verbose
);
-getopts("m:T:v", \%opts);
+getopts("m:T:vg:o:", \%opts) || die usage();
my $JOSHUA = $ENV{JOSHUA} or die "you must defined \$JOSHUA";
my $CAT = "$JOSHUA/scripts/training/scat";
sub usage {
- print "Usage: grammar-packer.pl [-m MEM] [-T /path/to/tmp] input-grammar [output-dir=grammar.packed]\n";
+ print "Usage: grammar-packer.pl [-m MEM] [-T /path/to/tmp] -g 'grammar [grammar2 ...]' -o 'grammar.packed [grammar2.packed ...]'\n";
exit 1;
}
-my $grammar = shift or usage();
-my $output_dir = shift || "grammar.packed";
+my @grammars = split(' ', $opts{g});
+my @outputs = split(' ', $opts{o});
-if (! -e $grammar) {
- print "* FATAL: Can't find grammar '$grammar'\n";
- exit 1;
+# make sure outputs is same size as inputs, or 0
+die usage() if (scalar(@outputs) != 0 && scalar(@outputs) != scalar(@grammars));
+
+# if no outputs given, generate default names
+if (scalar(@outputs) < scalar(@grammars)) {
+ for (my $i = 1; $i < @grammars; $i++) {
+ push(@outputs, $i == 1 ? "grammar.packed" : "grammar$i.packed");
+ }
}
-# Sort the grammar or phrase table
-my $name = basename($grammar);
-my (undef,$sorted_grammar) = tempfile("${name}XXXX", DIR => $opts{T}, UNLINK => 1);
-print STDERR "Sorting grammar to $sorted_grammar...\n" if $opts{v};
-
-# We need to sort by source side, which is field 0 (for phrase tables not listing the LHS)
-# or field 1 (convention, Thrax format)
-chomp(my $first_line = `$CAT $grammar | head -n1`);
-if ($first_line =~ /^\[/) {
- # regular grammar
- if (system("$CAT $grammar | sort -k3,3 --buffer-size=$opts{m} -T $opts{T} | gzip -9n > $sorted_grammar")) {
- print STDERR "* FATAL: Couldn't sort the grammar (not enough memory? short on tmp space?)\n";
- exit 2;
+my $grammar_no = 0;
+my @sorted_grammars;
+foreach my $grammar (@grammars) {
+ $grammar_no++;
+ if (! -e $grammar) {
+ print "* FATAL: Can't find grammar '$grammar'\n";
+ exit 1;
}
-} else {
- # phrase-based grammar -- prepend nonterminal symbol
- if (system("$CAT $grammar | $JOSHUA/scripts/support/moses_phrase_to_joshua.pl | sort -k3,3 --buffer-size=$opts{m} -T $opts{T} | gzip -9n > $sorted_grammar")) {
- print STDERR "* FATAL: Couldn't sort the grammar (not enough memory? short on tmp space?)\n";
- exit 2;
- }
-}
-#my $source_field = ($first_line =~ /^\[/) ? "3,3" : "1,1";
-$grammar = $sorted_grammar;
+ # Sort the grammar or phrase table
+ my $name = basename($grammar);
+ my (undef,$sorted_grammar) = tempfile("${name}XXXX", DIR => $opts{T}, UNLINK => 1);
+ print STDERR "Sorting grammar to $sorted_grammar...\n" if $opts{v};
+
+ # We need to sort by source side, which is field 1 (for phrase tables not listing the LHS)
+ # or field 2 (convention, Thrax format)
+ chomp(my $first_line = `$CAT $grammar | head -n1`);
+ if ($first_line =~ /^\[/) {
+ # regular grammar
+ if (system("$CAT $grammar | sed 's/ ||| /\t/g' | sort -k2,2 -k3,3 --buffer-size=$opts{m} -T $opts{T} | sed 's/\t/ ||| /g' | gzip -9n > $sorted_grammar")) {
+ print STDERR "* FATAL: Couldn't sort the grammar (not enough memory? short on tmp space?)\n";
+ exit 2;
+ }
+ } else {
+ # Moses phrase-based grammar -- prepend nonterminal symbol and -log() the weights
+ if (system("$CAT $grammar | $JOSHUA/scripts/support/moses_phrase_to_joshua.pl | sort -k3,3 --buffer-size=$opts{m} -T $opts{T} | gzip -9n > $sorted_grammar")) {
+ print STDERR "* FATAL: Couldn't sort the grammar (not enough memory? short on tmp space?)\n";
+ exit 2;
+ }
+ }
+
+ push(@sorted_grammars, $sorted_grammar);
+}
+
# Do the packing using the config.
-my $cmd = "java -Xmx$opts{m} -cp $JOSHUA/class joshua.tools.GrammarPacker -p $output_dir -g $grammar";
+my $grammars = join(" ", @sorted_grammars);
+my $outputs = join(" ", @outputs);
+my $cmd = "java -Xmx$opts{m} -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.tools.GrammarPackerCli -g $grammars --outputs $outputs";
print STDERR "Packing with $cmd...\n" if $opts{v};
+
my $retval = system($cmd);
if ($retval == 0) {
- unlink($sorted_grammar);
+ map { unlink($_) } @sorted_grammars;
} else {
print STDERR "* FATAL: Couldn't pack the grammar.\n";
exit 1;
diff --git a/scripts/support/make-release.sh b/scripts/support/make-release.sh
index 935f922..6489bdf 100755
--- a/scripts/support/make-release.sh
+++ b/scripts/support/make-release.sh
@@ -4,31 +4,40 @@
set -u
-version=$1
-
cd $JOSHUA
-ant clean java
+ant java version
+
+if [[ ! -e VERSION ]]; then
+ echo "* FATAL: can't find the version file!"
+ exit
+fi
+
+version=$(grep ^release VERSION | awk '{print $NF}')
+commit=$(grep ^current VERSION | awk '{print $NF}')
+clean=$(echo $commit | cut -d- -f2)
+
+if [[ $clean != "0" ]]; then
+ version=$commit
+fi
+
+echo "Bundling up joshua-$version"
+
[[ ! -d release ]] && mkdir release
rm -f joshua-$version && ln -s $JOSHUA joshua-$version
-wget -qr joshua-decoder.org
-
-# Save the current version and commit to a file
-echo "release version: $version" > VERSION
-echo "current commit: $(git rev-parse --verify HEAD)" >> VERSION
+wget -r http://joshua-decoder.org/
tar czf release/joshua-$version.tgz \
--exclude='*~' --exclude='#*' \
- joshua-$version/{README.md,VERSION,build.xml,logging.properties} \
+ joshua-$version/{README.md,VERSION,CHANGELOG,build.xml,logging.properties} \
joshua-$version/src \
joshua-$version/bin \
joshua-$version/class \
- joshua-$version/lib/{*jar,eng_sm6.gr,hadoop-0.20.2.tar.gz,README,LICENSES} \
+ joshua-$version/lib/{*jar,eng_sm6.gr,hadoop-2.5.2.tar.gz,README,LICENSES} \
joshua-$version/scripts \
joshua-$version/test \
joshua-$version/examples \
joshua-$version/thrax/bin/thrax.jar \
- joshua-$version/thrax/scripts \
joshua-$version/joshua-decoder.org
rm -f joshua-$version
diff --git a/scripts/support/moses2joshua_grammar.pl b/scripts/support/moses2joshua_grammar.pl
index 06e4b73..ab10a23 100755
--- a/scripts/support/moses2joshua_grammar.pl
+++ b/scripts/support/moses2joshua_grammar.pl
@@ -9,11 +9,11 @@
#
# (1) The rule syntax. Moses' rules look like this:
#
-# der [X][NN] [X] ||| of the [X][NN] [PP] ||| 0-0 0-1 1-2 ||| 1 ||| |||
+# der [X][NN] [X] ||| of the [X][NN] [PP] ||| 0-0 0-1 1-2 ||| .301 .6989 ||| |||
#
# Whereas the corresponding Joshua rule looks like this:
#
-# [PP] ||| der [NN] ||| of the [NN] ||| 1
+# [PP] ||| der [NN,1] ||| of the [NN,1] ||| 0.5 0.2
#
# (This doesn't apply to phrase tables, of course).
#
diff --git a/scripts/support/moses_phrase_to_joshua.pl b/scripts/support/moses_phrase_to_joshua.pl
index 4dc8198..5b04a5e 100755
--- a/scripts/support/moses_phrase_to_joshua.pl
+++ b/scripts/support/moses_phrase_to_joshua.pl
@@ -11,7 +11,12 @@
my @tokens = split(/ \|\|\| /, $line);
unshift(@tokens, "[X]");
- $tokens[3] = join(" ", map { -log($_) } split(' ', $tokens[3]));
+ $tokens[3] = join(" ", map { -mylog($_) } split(' ', $tokens[3]));
print join(" ||| ", @tokens);
}
+
+sub mylog {
+ my ($num) = @_;
+ return ($num == 0) ? -100 : log($num);
+}
diff --git a/scripts/support/run_bundler.py b/scripts/support/run_bundler.py
index 9675f27..9311e69 100755
--- a/scripts/support/run_bundler.py
+++ b/scripts/support/run_bundler.py
@@ -15,7 +15,6 @@
from subprocess import CalledProcessError, Popen, PIPE
import sys
-
EXAMPLE = r"""
Example invocation:
@@ -23,17 +22,17 @@
--force \
--verbose \
/path/to/origin/directory/test/model/joshua.config \
- --root /path/to/origin/directory \
+ --root /path/to/origin/directory/test/model \
new-bundle-directory \
--copy-config-options \
'-top-n 1 -output-format %S -mark-oovs false' \
- --pack-tm 'pt /path/to/origin/directory/grammar.gz'
+ --pack-tm /path/to/origin/directory/grammar.gz
Note: The options included in the value string for the --copy-config-options
argument can either be Joshua options or options for the
$JOSHUA/scripts/copy-config.pl script. The order of the --[pack-]tm options must
be in the same order as the grammar configuration lines they intend to
-override in the joshua.config file.
+override in the joshua.config file, and there can be only one --pack-tm option.
"""
README_TEMPLATE = """Joshua Configuration Run Bundle
@@ -104,6 +103,8 @@
"""
JOSHUA_PATH = os.environ.get('JOSHUA')
+default_normalizer = os.path.join(JOSHUA_PATH, "scripts/training/normalize.pl")
+default_tokenizer = os.path.join(JOSHUA_PATH, "scripts/training/penn-treebank-tokenizer.perl")
FILE_TYPE_TOKENS = ['lm', 'tm']
FILE_TYPE_OPTIONS = ['-path', '-lm_file']
@@ -325,6 +326,7 @@
def run_grammar_packer(src_path, dest_path):
cmd = [os.path.join(JOSHUA_PATH, "scripts/support/grammar-packer.pl"),
+ "-T", opts.tmpdir,
src_path, dest_path]
logging.info(
'Running the grammar-packer.pl script with the command: %s'
@@ -555,6 +557,18 @@
parser.add_argument(
'--absolute', dest='absolute', action='store_true', default=False,
help="Use absolute instead of relative paths for model file locations")
+ parser.add_argument(
+ '--source', dest='source',
+ help="Source language two-character code (ISO 639-1)")
+ parser.add_argument(
+ '--normalizer', default=default_normalizer,
+ help="source sentence normalizer that was applied to the model")
+ parser.add_argument(
+ '--tokenizer', default=default_tokenizer,
+ help="source sentence tokenizer that was applied to the model")
+ parser.add_argument(
+ '-T', dest='tmpdir', default='/tmp',
+ help="temp directory")
return parser.parse_args(clargs)
@@ -704,6 +718,7 @@
def main(argv):
+ global opts
opts = handle_args(argv[1:])
logging.basicConfig(
diff --git a/scripts/support/write-version.sh b/scripts/support/write-version.sh
new file mode 100755
index 0000000..720e513
--- /dev/null
+++ b/scripts/support/write-version.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -u
+
+version=$(git describe --abbrev=0 --dirty)
+
+# Save the current version and commit to a file
+echo "release version: $(git describe --abbrev=0)" > $JOSHUA/VERSION
+echo "current commit: $(git describe --long --dirty)" >> $JOSHUA/VERSION
+
diff --git a/scripts/training/MGIZA/.cvsignore b/scripts/training/MGIZA/.cvsignore
deleted file mode 100644
index 99be563..0000000
--- a/scripts/training/MGIZA/.cvsignore
+++ /dev/null
@@ -1,37 +0,0 @@
-.anjuta
-.tm_project*
-.libs
-.deps
-.*swp
-.nautilus-metafile.xml
-*.autosave
-*.pws
-*.bak
-*~
-#*#
-*.gladep
-*.la
-*.lo
-*.o
-*.class
-*.pyc
-aclocal.m4
-autom4te.cache
-config.h
-config.h.in
-config.log
-config.status
-configure
-intltool-extract*
-intltool-merge*
-intltool-modules*
-intltool-update*
-libtool
-prepare.sh
-stamp-h*
-ltmain.sh
-mkinstalldirs
-config.guess
-config.sub
-Makefile
-Makefile.in
diff --git a/scripts/training/MGIZA/AUTHORS b/scripts/training/MGIZA/AUTHORS
deleted file mode 100644
index e69de29..0000000
--- a/scripts/training/MGIZA/AUTHORS
+++ /dev/null
diff --git a/scripts/training/MGIZA/CMakeLists.txt b/scripts/training/MGIZA/CMakeLists.txt
deleted file mode 100644
index b638e51..0000000
--- a/scripts/training/MGIZA/CMakeLists.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-PROJECT (mgiza)
-
-SET(MGIZA_VERSION_MAJOR "0")
-SET(MGIZA_VERSION_MINOR "7")
-SET(MGIZA_VERSION_PATCH "0")
-
-MATH(EXPR MGIZA_INT_VERSION "(${MGIZA_VERSION_MAJOR} * 10000) + (${MGIZA_VERSION_MINOR} * 100) + (${MGIZA_VERSION_PATCH} * 1)" )
-
-SET(MGIZA_VERSION "${MGIZA_VERSION_MAJOR}.${MGIZA_VERSION_MINOR}.${MGIZA_VERSION_PATCH}")
-
-#CMake 2.6+ is recommended to an improved Boost module
-CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
-
-IF(COMMAND cmake_policy)
- cmake_policy(SET CMP0003 NEW)
-ENDIF(COMMAND cmake_policy)
-
-IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
- SET(CMAKE_INSTALL_PREFIX
- "inst" CACHE PATH "MGIZA install prefix" FORCE
- )
- MESSAGE(STATUS "You have not set the install dir, default to './inst', if
- you want to set it, use cmake -DCMAKE_INSTALL_PREFIX to do so")
-ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
-
-#set various platform specific global options
-IF(WIN32)
- SET(CMAKE_DEBUG_POSTFIX "d")
- OPTION( USE_64_BIT "Set to on if you want to compile Win64" OFF )
-ENDIF(WIN32)
-
-
-
-INCLUDE(cmake/FindTR1.cmake)
-
-# include specific modules
-SET(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
-
-
-# Find Boost library, specify
-# BOOST_ROOT=/e/programs/boost_1_35_0
-# BOOST_LIBRARYDIR=$BOOST_ROOT/stage/lib
-
-set(Boost_USE_STATIC_LIBS ON)
-set(Boost_USE_MULTITHREADED ON)
-set(Boost_USE_STATIC_RUNTIME OFF)
-
-
-FIND_PACKAGE( Boost 1.41 COMPONENTS thread)
-
- IF(Boost_FOUND)
- IF (NOT _boost_IN_CACHE)
- MESSAGE( "Boost found" )
- message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}")
- ENDIF (NOT _boost_IN_CACHE)
- LINK_DIRECTORIES ( ${Boost_LIBRARY_DIRS} )
- INCLUDE_DIRECTORIES ( ${Boost_INCLUDE_DIRS} )
-ELSE()
- MESSAGE(FATAL_ERROR "Boost not found, please set the BOOST_ROOT and BOOST_LIBRARYDIR environment variables " )
-ENDIF()
-
-ADD_SUBDIRECTORY (src)
-ADD_SUBDIRECTORY (src/mkcls)
-ADD_SUBDIRECTORY (scripts)
-
-IF (WIN32)
-MESSAGE( STATUS "-------------------------------------------------------------------------------" )
-MESSAGE( STATUS "USE_64_BIT = ${USE_64_BIT}" )
-MESSAGE( STATUS "Change a value with: cmake -D<Variable>=<Value>" )
-MESSAGE( STATUS "-------------------------------------------------------------------------------" )
-
-
-SET( USE_64_BIT "${USE_64_BIT}" CACHE BOOL
- "Set to ON to build Win64" FORCE )
-ENDIF()
-
-
-
-
diff --git a/scripts/training/MGIZA/COPYING b/scripts/training/MGIZA/COPYING
deleted file mode 100644
index 94a9ed0..0000000
--- a/scripts/training/MGIZA/COPYING
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/scripts/training/MGIZA/ChangeLog b/scripts/training/MGIZA/ChangeLog
deleted file mode 100644
index e69de29..0000000
--- a/scripts/training/MGIZA/ChangeLog
+++ /dev/null
diff --git a/scripts/training/MGIZA/INSTALL b/scripts/training/MGIZA/INSTALL
deleted file mode 100644
index 27e3dca..0000000
--- a/scripts/training/MGIZA/INSTALL
+++ /dev/null
@@ -1,11 +0,0 @@
-cmake .
-make
-make install
-
-If you want to install to a custom location, add the following flag when you run cmake:
--DCMAKE_INSTALL_PREFIX=/path/to/custom/location
-
-
-NOTE: Boost Version 1.48 has problem with the code, you can use either 1.46 or 1.50+. Unfortunately 1.48 is shipped with Ubuntu 12.04 LTS, you can either download and compile libboost 1.50+ from their website, or just do this:
-
-sudo apt-get install libboost1.46-all-dev
diff --git a/scripts/training/MGIZA/Makefile.am b/scripts/training/MGIZA/Makefile.am
deleted file mode 100644
index 8b28f81..0000000
--- a/scripts/training/MGIZA/Makefile.am
+++ /dev/null
@@ -1,29 +0,0 @@
-## Process this file with automake to produce Makefile.in
-## Created by Anjuta
-
-SUBDIRS = src
-
-mgizadocdir = ${prefix}/doc/mgiza
-mgizadoc_DATA = \
- README\
- COPYING\
- AUTHORS\
- ChangeLog\
- INSTALL\
- NEWS
-
-mgizascriptsdir = ${prefix}/scripts/
-
-mgizascripts_SCRIPTS = \
- scripts/*
-
-EXTRA_DIST = $(mgizadoc_DATA) \
- ${mgizascripts_SCRIPTS}
-# Copy all the spec files. Of cource, only one is actually used.
-dist-hook:
- for specfile in *.spec; do \
- if test -f $$specfile; then \
- cp -p $$specfile $(distdir); \
- fi \
- done
-
diff --git a/scripts/training/MGIZA/Makefile.in b/scripts/training/MGIZA/Makefile.in
deleted file mode 100644
index 7540d62..0000000
--- a/scripts/training/MGIZA/Makefile.in
+++ /dev/null
@@ -1,683 +0,0 @@
-# Makefile.in generated by automake 1.10.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-subdir = .
-DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in $(srcdir)/config.h.in \
- $(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \
- config.guess config.sub depcomp install-sh ltmain.sh missing
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
- configure.lineno config.status.lineno
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = config.h
-CONFIG_CLEAN_FILES =
-am__installdirs = "$(DESTDIR)$(mgizascriptsdir)" \
- "$(DESTDIR)$(mgizadocdir)"
-mgizascriptsSCRIPT_INSTALL = $(INSTALL_SCRIPT)
-SCRIPTS = $(mgizascripts_SCRIPTS)
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
-mgizadocDATA_INSTALL = $(INSTALL_DATA)
-DATA = $(mgizadoc_DATA)
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-distdir = $(PACKAGE)-$(VERSION)
-top_distdir = $(distdir)
-am__remove_distdir = \
- { test ! -d $(distdir) \
- || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
- && rm -fr $(distdir); }; }
-DIST_ARCHIVES = $(distdir).tar.gz
-GZIP_ENV = --best
-distuninstallcheck_listfiles = find . -type f -print
-distcleancheck_listfiles = find . -type f -print
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MKDIR_P = @MKDIR_P@
-OBJEXT = @OBJEXT@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-VERSION = @VERSION@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build_alias = @build_alias@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host_alias = @host_alias@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS = src
-mgizadocdir = ${prefix}/doc/mgiza
-mgizadoc_DATA = \
- README\
- COPYING\
- AUTHORS\
- ChangeLog\
- INSTALL\
- NEWS
-
-mgizascriptsdir = ${prefix}/scripts/
-mgizascripts_SCRIPTS = \
- scripts/*
-
-EXTRA_DIST = $(mgizadoc_DATA) \
- ${mgizascripts_SCRIPTS}
-
-all: config.h
- $(MAKE) $(AM_MAKEFLAGS) all-recursive
-
-.SUFFIXES:
-am--refresh:
- @:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \
- cd $(srcdir) && $(AUTOMAKE) --gnu \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
- cd $(top_srcdir) && \
- $(AUTOMAKE) --gnu Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- echo ' $(SHELL) ./config.status'; \
- $(SHELL) ./config.status;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- $(SHELL) ./config.status --recheck
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(srcdir) && $(AUTOCONF)
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
-
-config.h: stamp-h1
- @if test ! -f $@; then \
- rm -f stamp-h1; \
- $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
- else :; fi
-
-stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
- @rm -f stamp-h1
- cd $(top_builddir) && $(SHELL) ./config.status config.h
-$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_srcdir) && $(AUTOHEADER)
- rm -f stamp-h1
- touch $@
-
-distclean-hdr:
- -rm -f config.h stamp-h1
-install-mgizascriptsSCRIPTS: $(mgizascripts_SCRIPTS)
- @$(NORMAL_INSTALL)
- test -z "$(mgizascriptsdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizascriptsdir)"
- @list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- if test -f $$d$$p; then \
- f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
- echo " $(mgizascriptsSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
- $(mgizascriptsSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
- else :; fi; \
- done
-
-uninstall-mgizascriptsSCRIPTS:
- @$(NORMAL_UNINSTALL)
- @list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
- f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
- echo " rm -f '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
- rm -f "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
- done
-install-mgizadocDATA: $(mgizadoc_DATA)
- @$(NORMAL_INSTALL)
- test -z "$(mgizadocdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizadocdir)"
- @list='$(mgizadoc_DATA)'; for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- f=$(am__strip_dir) \
- echo " $(mgizadocDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizadocdir)/$$f'"; \
- $(mgizadocDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizadocdir)/$$f"; \
- done
-
-uninstall-mgizadocDATA:
- @$(NORMAL_UNINSTALL)
- @list='$(mgizadoc_DATA)'; for p in $$list; do \
- f=$(am__strip_dir) \
- echo " rm -f '$(DESTDIR)$(mgizadocdir)/$$f'"; \
- rm -f "$(DESTDIR)$(mgizadocdir)/$$f"; \
- done
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$tags $$unique; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$tags$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$tags $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && cd $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) $$here
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- $(am__remove_distdir)
- test -d $(distdir) || mkdir $(distdir)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
- fi; \
- cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
- else \
- test -f $(distdir)/$$file \
- || cp -p $$d/$$file $(distdir)/$$file \
- || exit 1; \
- fi; \
- done
- list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- distdir=`$(am__cd) $(distdir) && pwd`; \
- top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
- (cd $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$top_distdir" \
- distdir="$$distdir/$$subdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- distdir) \
- || exit 1; \
- fi; \
- done
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$(top_distdir)" distdir="$(distdir)" \
- dist-hook
- -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
- ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
- ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
- ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
- || chmod -R a+r $(distdir)
-dist-gzip: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
- $(am__remove_distdir)
-
-dist-bzip2: distdir
- tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
- $(am__remove_distdir)
-
-dist-lzma: distdir
- tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma
- $(am__remove_distdir)
-
-dist-tarZ: distdir
- tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
- $(am__remove_distdir)
-
-dist-shar: distdir
- shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
- $(am__remove_distdir)
-
-dist-zip: distdir
- -rm -f $(distdir).zip
- zip -rq $(distdir).zip $(distdir)
- $(am__remove_distdir)
-
-dist dist-all: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
- $(am__remove_distdir)
-
-# This target untars the dist file and tries a VPATH configuration. Then
-# it guarantees that the distribution is self-contained by making another
-# tarfile.
-distcheck: dist
- case '$(DIST_ARCHIVES)' in \
- *.tar.gz*) \
- GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
- *.tar.bz2*) \
- bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
- *.tar.lzma*) \
- unlzma -c $(distdir).tar.lzma | $(am__untar) ;;\
- *.tar.Z*) \
- uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
- *.shar.gz*) \
- GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
- *.zip*) \
- unzip $(distdir).zip ;;\
- esac
- chmod -R a-w $(distdir); chmod a+w $(distdir)
- mkdir $(distdir)/_build
- mkdir $(distdir)/_inst
- chmod a-w $(distdir)
- dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
- && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
- && cd $(distdir)/_build \
- && ../configure --srcdir=.. --prefix="$$dc_install_base" \
- $(DISTCHECK_CONFIGURE_FLAGS) \
- && $(MAKE) $(AM_MAKEFLAGS) \
- && $(MAKE) $(AM_MAKEFLAGS) dvi \
- && $(MAKE) $(AM_MAKEFLAGS) check \
- && $(MAKE) $(AM_MAKEFLAGS) install \
- && $(MAKE) $(AM_MAKEFLAGS) installcheck \
- && $(MAKE) $(AM_MAKEFLAGS) uninstall \
- && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
- distuninstallcheck \
- && chmod -R a-w "$$dc_install_base" \
- && ({ \
- (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
- distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
- } || { rm -rf "$$dc_destdir"; exit 1; }) \
- && rm -rf "$$dc_destdir" \
- && $(MAKE) $(AM_MAKEFLAGS) dist \
- && rm -rf $(DIST_ARCHIVES) \
- && $(MAKE) $(AM_MAKEFLAGS) distcleancheck
- $(am__remove_distdir)
- @(echo "$(distdir) archives ready for distribution: "; \
- list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
- sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
-distuninstallcheck:
- @cd $(distuninstallcheck_dir) \
- && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
- || { echo "ERROR: files left after uninstall:" ; \
- if test -n "$(DESTDIR)"; then \
- echo " (check DESTDIR support)"; \
- fi ; \
- $(distuninstallcheck_listfiles) ; \
- exit 1; } >&2
-distcleancheck: distclean
- @if test '$(srcdir)' = . ; then \
- echo "ERROR: distcleancheck can only run from a VPATH build" ; \
- exit 1 ; \
- fi
- @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
- || { echo "ERROR: files left in build directory after distclean:" ; \
- $(distcleancheck_listfiles) ; \
- exit 1; } >&2
-check-am: all-am
-check: check-recursive
-all-am: Makefile $(SCRIPTS) $(DATA) config.h
-installdirs: installdirs-recursive
-installdirs-am:
- for dir in "$(DESTDIR)$(mgizascriptsdir)" "$(DESTDIR)$(mgizadocdir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-info: info-recursive
-
-info-am:
-
-install-data-am: install-mgizadocDATA install-mgizascriptsSCRIPTS
-
-install-dvi: install-dvi-recursive
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-info: install-info-recursive
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-ps: install-ps-recursive
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -rf $(top_srcdir)/autom4te.cache
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-generic
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
- install-strip
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am am--refresh check check-am clean clean-generic \
- ctags ctags-recursive dist dist-all dist-bzip2 dist-gzip \
- dist-hook dist-lzma dist-shar dist-tarZ dist-zip distcheck \
- distclean distclean-generic distclean-hdr distclean-tags \
- distcleancheck distdir distuninstallcheck dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-mgizadocDATA \
- install-mgizascriptsSCRIPTS install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
- pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
- uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
-
-# Copy all the spec files. Of cource, only one is actually used.
-dist-hook:
- for specfile in *.spec; do \
- if test -f $$specfile; then \
- cp -p $$specfile $(distdir); \
- fi \
- done
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/scripts/training/MGIZA/NEWS b/scripts/training/MGIZA/NEWS
deleted file mode 100644
index e69de29..0000000
--- a/scripts/training/MGIZA/NEWS
+++ /dev/null
diff --git a/scripts/training/MGIZA/README b/scripts/training/MGIZA/README
deleted file mode 100644
index e69de29..0000000
--- a/scripts/training/MGIZA/README
+++ /dev/null
diff --git a/scripts/training/MGIZA/RELEASE_NOTE.txt b/scripts/training/MGIZA/RELEASE_NOTE.txt
deleted file mode 100644
index 7b1f685..0000000
--- a/scripts/training/MGIZA/RELEASE_NOTE.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Release Notes
-0.7.0
-
-Overview
-
-The version provide a new building system by cmake and native MS Windows support. Please be advised that the code has undergone a lot of modifications, if you are not using MS Windows, we suggest you use old 0.6.3 version until more test.
-
-You are welcomed to help testing it and report bugs.
-
-Thank you.
-
-New Building System
-
-In additional to GNU autotools, MGIZA++ now supports CMake. Use
-
-cmake
-make
-make install
-
-to build and install, on both Windows and Linux.
-
-Native MS Windows support
-
-The code has been modified to provide native Windows support. You can now generate Visual Studio solution and build it by invoking:
-
-cmake -G "Visual Studio 10"
-msbuild /p:Configuration=Release mgiza.sln
-
-Download link https://sourceforge.net/projects/mgizapp/files/mgizapp-0.7.0.tar.gz/download.
-0.6.3
-
-Memory optimization
-
-Filter vocabulary / word class and eliminate duplications. Being able to train with 34M sentence pairs and keep memory below 2G.
-
-Bug fix
-
-When log file was specified, model 3/4/5 training will occasionally encounter racing condition and crash. The unnecessary logging information is removed, because the same message is already printed on screen.
-
-Download link http://www.cs.cmu.edu/~qing/release/mgiza-0.6.3-10-01-11.tar.gz.
-0.6.2
-
-Minor interface change to keep compatibility with Chaski 0.2.2, the symal.sh script need file name to be specified instead of directly outputs to STDOUT.
-
-Download link http://www.cs.cmu.edu/~qing/release/mgiza-0.6.2-09-12-07.tar.gz.
-0.6.1
-
-Since the this release the MGIZA is separated from QMT package and therefore the dependencies are removed. Currently the only dependency is pthread library.
-
-Scripts for force alignment / resume training is included in the package. Please refer to forcealignment
-
-Added error tolerance functionality for hmmnorm executable, which allows ignoring a number of trunks.
-
-Download link: http://www.cs.cmu.edu/~qing/release/mgiza-0.6.1-09-11-17.tar.gz
diff --git a/scripts/training/MGIZA/TODO.tasks b/scripts/training/MGIZA/TODO.tasks
deleted file mode 100644
index d1fa282..0000000
--- a/scripts/training/MGIZA/TODO.tasks
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<gtodo>
- <category title="Personal" place="0"/>
- <category title="Business" place="1"/>
- <category title="Unfiled" place="2"/>
-</gtodo>
diff --git a/scripts/training/MGIZA/aclocal.m4 b/scripts/training/MGIZA/aclocal.m4
deleted file mode 100644
index 9919c47..0000000
--- a/scripts/training/MGIZA/aclocal.m4
+++ /dev/null
@@ -1,932 +0,0 @@
-# generated automatically by aclocal 1.10.1 -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-m4_ifndef([AC_AUTOCONF_VERSION],
- [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(AC_AUTOCONF_VERSION, [2.62],,
-[m4_warning([this file was generated for autoconf 2.62.
-You have another version of autoconf. It may work, but is not guaranteed to.
-If you have problems, you may need to regenerate the build system entirely.
-To do so, use the procedure documented by the package, typically `autoreconf'.])])
-
-# Copyright (C) 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# AM_AUTOMAKE_VERSION(VERSION)
-# ----------------------------
-# Automake X.Y traces this macro to ensure aclocal.m4 has been
-# generated from the m4 files accompanying Automake X.Y.
-# (This private macro should not be called outside this file.)
-AC_DEFUN([AM_AUTOMAKE_VERSION],
-[am__api_version='1.10'
-dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
-dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.10.1], [],
- [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
-])
-
-# _AM_AUTOCONF_VERSION(VERSION)
-# -----------------------------
-# aclocal traces this macro to find the Autoconf version.
-# This is a private macro too. Using m4_define simplifies
-# the logic in aclocal, which can simply ignore this definition.
-m4_define([_AM_AUTOCONF_VERSION], [])
-
-# AM_SET_CURRENT_AUTOMAKE_VERSION
-# -------------------------------
-# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
-# This function is AC_REQUIREd by AC_INIT_AUTOMAKE.
-AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.10.1])dnl
-m4_ifndef([AC_AUTOCONF_VERSION],
- [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-_AM_AUTOCONF_VERSION(AC_AUTOCONF_VERSION)])
-
-# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
-# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
-# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
-#
-# Of course, Automake must honor this variable whenever it calls a
-# tool from the auxiliary directory. The problem is that $srcdir (and
-# therefore $ac_aux_dir as well) can be either absolute or relative,
-# depending on how configure is run. This is pretty annoying, since
-# it makes $ac_aux_dir quite unusable in subdirectories: in the top
-# source directory, any form will work fine, but in subdirectories a
-# relative path needs to be adjusted first.
-#
-# $ac_aux_dir/missing
-# fails when called from a subdirectory if $ac_aux_dir is relative
-# $top_srcdir/$ac_aux_dir/missing
-# fails if $ac_aux_dir is absolute,
-# fails when called from a subdirectory in a VPATH build with
-# a relative $ac_aux_dir
-#
-# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
-# are both prefixed by $srcdir. In an in-source build this is usually
-# harmless because $srcdir is `.', but things will broke when you
-# start a VPATH build or use an absolute $srcdir.
-#
-# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
-# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
-# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
-# and then we would define $MISSING as
-# MISSING="\${SHELL} $am_aux_dir/missing"
-# This will work as long as MISSING is not called from configure, because
-# unfortunately $(top_srcdir) has no meaning in configure.
-# However there are other variables, like CC, which are often used in
-# configure, and could therefore not use this "fixed" $ac_aux_dir.
-#
-# Another solution, used here, is to always expand $ac_aux_dir to an
-# absolute PATH. The drawback is that using absolute paths prevent a
-# configured tree to be moved without reconfiguration.
-
-AC_DEFUN([AM_AUX_DIR_EXPAND],
-[dnl Rely on autoconf to set up CDPATH properly.
-AC_PREREQ([2.50])dnl
-# expand $ac_aux_dir to an absolute path
-am_aux_dir=`cd $ac_aux_dir && pwd`
-])
-
-
-# Copyright (C) 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 4
-
-# This was merged into AC_PROG_CC in Autoconf.
-
-AU_DEFUN([AM_PROG_CC_STDC],
-[AC_PROG_CC
-AC_DIAGNOSE([obsolete], [$0:
- your code should no longer depend upon `am_cv_prog_cc_stdc', but upon
- `ac_cv_prog_cc_stdc'. Remove this warning and the assignment when
- you adjust the code. You can also remove the above call to
- AC_PROG_CC if you already called it elsewhere.])
-am_cv_prog_cc_stdc=$ac_cv_prog_cc_stdc
-])
-AU_DEFUN([fp_PROG_CC_STDC])
-
-# AM_CONDITIONAL -*- Autoconf -*-
-
-# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 8
-
-# AM_CONDITIONAL(NAME, SHELL-CONDITION)
-# -------------------------------------
-# Define a conditional.
-AC_DEFUN([AM_CONDITIONAL],
-[AC_PREREQ(2.52)dnl
- ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
- [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
-AC_SUBST([$1_TRUE])dnl
-AC_SUBST([$1_FALSE])dnl
-_AM_SUBST_NOTMAKE([$1_TRUE])dnl
-_AM_SUBST_NOTMAKE([$1_FALSE])dnl
-if $2; then
- $1_TRUE=
- $1_FALSE='#'
-else
- $1_TRUE='#'
- $1_FALSE=
-fi
-AC_CONFIG_COMMANDS_PRE(
-[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
- AC_MSG_ERROR([[conditional "$1" was never defined.
-Usually this means the macro was only invoked conditionally.]])
-fi])])
-
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 9
-
-# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
-# written in clear, in which case automake, when reading aclocal.m4,
-# will think it sees a *use*, and therefore will trigger all it's
-# C support machinery. Also note that it means that autoscan, seeing
-# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
-
-
-# _AM_DEPENDENCIES(NAME)
-# ----------------------
-# See how the compiler implements dependency checking.
-# NAME is "CC", "CXX", "GCJ", or "OBJC".
-# We try a few techniques and use that to set a single cache variable.
-#
-# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
-# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
-# dependency, and given that the user is not expected to run this macro,
-# just rely on AC_PROG_CC.
-AC_DEFUN([_AM_DEPENDENCIES],
-[AC_REQUIRE([AM_SET_DEPDIR])dnl
-AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
-AC_REQUIRE([AM_MAKE_INCLUDE])dnl
-AC_REQUIRE([AM_DEP_TRACK])dnl
-
-ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
- [$1], CXX, [depcc="$CXX" am_compiler_list=],
- [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
- [$1], UPC, [depcc="$UPC" am_compiler_list=],
- [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
- [depcc="$$1" am_compiler_list=])
-
-AC_CACHE_CHECK([dependency style of $depcc],
- [am_cv_$1_dependencies_compiler_type],
-[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
- # We make a subdir and do the tests there. Otherwise we can end up
- # making bogus files that we don't know about and never remove. For
- # instance it was reported that on HP-UX the gcc test will end up
- # making a dummy file named `D' -- because `-MD' means `put the output
- # in D'.
- mkdir conftest.dir
- # Copy depcomp to subdir because otherwise we won't find it if we're
- # using a relative directory.
- cp "$am_depcomp" conftest.dir
- cd conftest.dir
- # We will build objects and dependencies in a subdirectory because
- # it helps to detect inapplicable dependency modes. For instance
- # both Tru64's cc and ICC support -MD to output dependencies as a
- # side effect of compilation, but ICC will put the dependencies in
- # the current directory while Tru64 will put them in the object
- # directory.
- mkdir sub
-
- am_cv_$1_dependencies_compiler_type=none
- if test "$am_compiler_list" = ""; then
- am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
- fi
- for depmode in $am_compiler_list; do
- # Setup a source with many dependencies, because some compilers
- # like to wrap large dependency lists on column 80 (with \), and
- # we should not choose a depcomp mode which is confused by this.
- #
- # We need to recreate these files for each test, as the compiler may
- # overwrite some of them when testing with obscure command lines.
- # This happens at least with the AIX C compiler.
- : > sub/conftest.c
- for i in 1 2 3 4 5 6; do
- echo '#include "conftst'$i'.h"' >> sub/conftest.c
- # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
- # Solaris 8's {/usr,}/bin/sh.
- touch sub/conftst$i.h
- done
- echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
-
- case $depmode in
- nosideeffect)
- # after this tag, mechanisms are not by side-effect, so they'll
- # only be used when explicitly requested
- if test "x$enable_dependency_tracking" = xyes; then
- continue
- else
- break
- fi
- ;;
- none) break ;;
- esac
- # We check with `-c' and `-o' for the sake of the "dashmstdout"
- # mode. It turns out that the SunPro C++ compiler does not properly
- # handle `-M -o', and we need to detect this.
- if depmode=$depmode \
- source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \
- depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
- $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \
- >/dev/null 2>conftest.err &&
- grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
- grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
- grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
- ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
- # icc doesn't choke on unknown options, it will just issue warnings
- # or remarks (even with -Werror). So we grep stderr for any message
- # that says an option was ignored or not supported.
- # When given -MP, icc 7.0 and 7.1 complain thusly:
- # icc: Command line warning: ignoring option '-M'; no argument required
- # The diagnosis changed in icc 8.0:
- # icc: Command line remark: option '-MP' not supported
- if (grep 'ignoring option' conftest.err ||
- grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
- am_cv_$1_dependencies_compiler_type=$depmode
- break
- fi
- fi
- done
-
- cd ..
- rm -rf conftest.dir
-else
- am_cv_$1_dependencies_compiler_type=none
-fi
-])
-AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
-AM_CONDITIONAL([am__fastdep$1], [
- test "x$enable_dependency_tracking" != xno \
- && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
-])
-
-
-# AM_SET_DEPDIR
-# -------------
-# Choose a directory name for dependency files.
-# This macro is AC_REQUIREd in _AM_DEPENDENCIES
-AC_DEFUN([AM_SET_DEPDIR],
-[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
-])
-
-
-# AM_DEP_TRACK
-# ------------
-AC_DEFUN([AM_DEP_TRACK],
-[AC_ARG_ENABLE(dependency-tracking,
-[ --disable-dependency-tracking speeds up one-time build
- --enable-dependency-tracking do not reject slow dependency extractors])
-if test "x$enable_dependency_tracking" != xno; then
- am_depcomp="$ac_aux_dir/depcomp"
- AMDEPBACKSLASH='\'
-fi
-AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
-AC_SUBST([AMDEPBACKSLASH])dnl
-_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
-])
-
-# Generate code to set up dependency tracking. -*- Autoconf -*-
-
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-#serial 3
-
-# _AM_OUTPUT_DEPENDENCY_COMMANDS
-# ------------------------------
-AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
-[for mf in $CONFIG_FILES; do
- # Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named `Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # Grep'ing the whole file is not good either: AIX grep has a line
- # limit of 2048, but all sed's we know have understand at least 4000.
- if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
- dirpart=`AS_DIRNAME("$mf")`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running `make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # When using ansi2knr, U may be empty or an underscore; expand it
- U=`sed -n 's/^U = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`AS_DIRNAME(["$file"])`
- AS_MKDIR_P([$dirpart/$fdir])
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
-done
-])# _AM_OUTPUT_DEPENDENCY_COMMANDS
-
-
-# AM_OUTPUT_DEPENDENCY_COMMANDS
-# -----------------------------
-# This macro should only be invoked once -- use via AC_REQUIRE.
-#
-# This code is only required when automatic dependency tracking
-# is enabled. FIXME. This creates each `.P' file that we will
-# need in order to bootstrap the dependency handling code.
-AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
-[AC_CONFIG_COMMANDS([depfiles],
- [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
- [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
-])
-
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 8
-
-# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
-# Do all the work for Automake. -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2008 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 13
-
-# This macro actually does too much. Some checks are only needed if
-# your package does certain things. But this isn't really a big deal.
-
-# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
-# AM_INIT_AUTOMAKE([OPTIONS])
-# -----------------------------------------------
-# The call with PACKAGE and VERSION arguments is the old style
-# call (pre autoconf-2.50), which is being phased out. PACKAGE
-# and VERSION should now be passed to AC_INIT and removed from
-# the call to AM_INIT_AUTOMAKE.
-# We support both call styles for the transition. After
-# the next Automake release, Autoconf can make the AC_INIT
-# arguments mandatory, and then we can depend on a new Autoconf
-# release and drop the old call support.
-AC_DEFUN([AM_INIT_AUTOMAKE],
-[AC_PREREQ([2.60])dnl
-dnl Autoconf wants to disallow AM_ names. We explicitly allow
-dnl the ones we care about.
-m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
-AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
-AC_REQUIRE([AC_PROG_INSTALL])dnl
-if test "`cd $srcdir && pwd`" != "`pwd`"; then
- # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
- # is not polluted with repeated "-I."
- AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
- # test to see if srcdir already configured
- if test -f $srcdir/config.status; then
- AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
- fi
-fi
-
-# test whether we have cygpath
-if test -z "$CYGPATH_W"; then
- if (cygpath --version) >/dev/null 2>/dev/null; then
- CYGPATH_W='cygpath -w'
- else
- CYGPATH_W=echo
- fi
-fi
-AC_SUBST([CYGPATH_W])
-
-# Define the identity of the package.
-dnl Distinguish between old-style and new-style calls.
-m4_ifval([$2],
-[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
- AC_SUBST([PACKAGE], [$1])dnl
- AC_SUBST([VERSION], [$2])],
-[_AM_SET_OPTIONS([$1])dnl
-dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
-m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
- [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
- AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
- AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
-
-_AM_IF_OPTION([no-define],,
-[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
- AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
-
-# Some tools Automake needs.
-AC_REQUIRE([AM_SANITY_CHECK])dnl
-AC_REQUIRE([AC_ARG_PROGRAM])dnl
-AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
-AM_MISSING_PROG(AUTOCONF, autoconf)
-AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
-AM_MISSING_PROG(AUTOHEADER, autoheader)
-AM_MISSING_PROG(MAKEINFO, makeinfo)
-AM_PROG_INSTALL_SH
-AM_PROG_INSTALL_STRIP
-AC_REQUIRE([AM_PROG_MKDIR_P])dnl
-# We need awk for the "check" target. The system "awk" is bad on
-# some platforms.
-AC_REQUIRE([AC_PROG_AWK])dnl
-AC_REQUIRE([AC_PROG_MAKE_SET])dnl
-AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
- [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
- [_AM_PROG_TAR([v7])])])
-_AM_IF_OPTION([no-dependencies],,
-[AC_PROVIDE_IFELSE([AC_PROG_CC],
- [_AM_DEPENDENCIES(CC)],
- [define([AC_PROG_CC],
- defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
-AC_PROVIDE_IFELSE([AC_PROG_CXX],
- [_AM_DEPENDENCIES(CXX)],
- [define([AC_PROG_CXX],
- defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
-AC_PROVIDE_IFELSE([AC_PROG_OBJC],
- [_AM_DEPENDENCIES(OBJC)],
- [define([AC_PROG_OBJC],
- defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
-])
-])
-
-
-# When config.status generates a header, we must update the stamp-h file.
-# This file resides in the same directory as the config header
-# that is generated. The stamp files are numbered to have different names.
-
-# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
-# loop where config.status creates the headers, so we can generate
-# our stamp files there.
-AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
-[# Compute $1's index in $config_headers.
-_am_arg=$1
-_am_stamp_count=1
-for _am_header in $config_headers :; do
- case $_am_header in
- $_am_arg | $_am_arg:* )
- break ;;
- * )
- _am_stamp_count=`expr $_am_stamp_count + 1` ;;
- esac
-done
-echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# AM_PROG_INSTALL_SH
-# ------------------
-# Define $install_sh.
-AC_DEFUN([AM_PROG_INSTALL_SH],
-[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
-install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"}
-AC_SUBST(install_sh)])
-
-# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 2
-
-# Check whether the underlying file-system supports filenames
-# with a leading dot. For instance MS-DOS doesn't.
-AC_DEFUN([AM_SET_LEADING_DOT],
-[rm -rf .tst 2>/dev/null
-mkdir .tst 2>/dev/null
-if test -d .tst; then
- am__leading_dot=.
-else
- am__leading_dot=_
-fi
-rmdir .tst 2>/dev/null
-AC_SUBST([am__leading_dot])])
-
-# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
-# From Jim Meyering
-
-# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 4
-
-AC_DEFUN([AM_MAINTAINER_MODE],
-[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
- dnl maintainer-mode is disabled by default
- AC_ARG_ENABLE(maintainer-mode,
-[ --enable-maintainer-mode enable make rules and dependencies not useful
- (and sometimes confusing) to the casual installer],
- USE_MAINTAINER_MODE=$enableval,
- USE_MAINTAINER_MODE=no)
- AC_MSG_RESULT([$USE_MAINTAINER_MODE])
- AM_CONDITIONAL(MAINTAINER_MODE, [test $USE_MAINTAINER_MODE = yes])
- MAINT=$MAINTAINER_MODE_TRUE
- AC_SUBST(MAINT)dnl
-]
-)
-
-AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
-
-# Check to see how 'make' treats includes. -*- Autoconf -*-
-
-# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 3
-
-# AM_MAKE_INCLUDE()
-# -----------------
-# Check to see how make treats includes.
-AC_DEFUN([AM_MAKE_INCLUDE],
-[am_make=${MAKE-make}
-cat > confinc << 'END'
-am__doit:
- @echo done
-.PHONY: am__doit
-END
-# If we don't find an include directive, just comment out the code.
-AC_MSG_CHECKING([for style of include used by $am_make])
-am__include="#"
-am__quote=
-_am_result=none
-# First try GNU make style include.
-echo "include confinc" > confmf
-# We grep out `Entering directory' and `Leaving directory'
-# messages which can occur if `w' ends up in MAKEFLAGS.
-# In particular we don't look at `^make:' because GNU make might
-# be invoked under some other name (usually "gmake"), in which
-# case it prints its new name instead of `make'.
-if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then
- am__include=include
- am__quote=
- _am_result=GNU
-fi
-# Now try BSD make style include.
-if test "$am__include" = "#"; then
- echo '.include "confinc"' > confmf
- if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then
- am__include=.include
- am__quote="\""
- _am_result=BSD
- fi
-fi
-AC_SUBST([am__include])
-AC_SUBST([am__quote])
-AC_MSG_RESULT([$_am_result])
-rm -f confinc confmf
-])
-
-# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-
-# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-# AM_MISSING_PROG(NAME, PROGRAM)
-# ------------------------------
-AC_DEFUN([AM_MISSING_PROG],
-[AC_REQUIRE([AM_MISSING_HAS_RUN])
-$1=${$1-"${am_missing_run}$2"}
-AC_SUBST($1)])
-
-
-# AM_MISSING_HAS_RUN
-# ------------------
-# Define MISSING if not defined so far and test if it supports --run.
-# If it does, set am_missing_run to use it, otherwise, to nothing.
-AC_DEFUN([AM_MISSING_HAS_RUN],
-[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
-AC_REQUIRE_AUX_FILE([missing])dnl
-test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing"
-# Use eval to expand $SHELL
-if eval "$MISSING --run true"; then
- am_missing_run="$MISSING --run "
-else
- am_missing_run=
- AC_MSG_WARN([`missing' script is too old or missing])
-fi
-])
-
-# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# AM_PROG_MKDIR_P
-# ---------------
-# Check for `mkdir -p'.
-AC_DEFUN([AM_PROG_MKDIR_P],
-[AC_PREREQ([2.60])dnl
-AC_REQUIRE([AC_PROG_MKDIR_P])dnl
-dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
-dnl while keeping a definition of mkdir_p for backward compatibility.
-dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
-dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
-dnl Makefile.ins that do not define MKDIR_P, so we do our own
-dnl adjustment using top_builddir (which is defined more often than
-dnl MKDIR_P).
-AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
-case $mkdir_p in
- [[\\/$]]* | ?:[[\\/]]*) ;;
- */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
-esac
-])
-
-# Helper functions for option handling. -*- Autoconf -*-
-
-# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 3
-
-# _AM_MANGLE_OPTION(NAME)
-# -----------------------
-AC_DEFUN([_AM_MANGLE_OPTION],
-[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
-
-# _AM_SET_OPTION(NAME)
-# ------------------------------
-# Set option NAME. Presently that only means defining a flag for this option.
-AC_DEFUN([_AM_SET_OPTION],
-[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
-
-# _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
-# OPTIONS is a space-separated list of Automake options.
-AC_DEFUN([_AM_SET_OPTIONS],
-[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
-
-# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
-# -------------------------------------------
-# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
-AC_DEFUN([_AM_IF_OPTION],
-[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-
-# Check to make sure that the build environment is sane. -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 4
-
-# AM_SANITY_CHECK
-# ---------------
-AC_DEFUN([AM_SANITY_CHECK],
-[AC_MSG_CHECKING([whether build environment is sane])
-# Just in case
-sleep 1
-echo timestamp > conftest.file
-# Do `set' in a subshell so we don't clobber the current shell's
-# arguments. Must try -L first in case configure is actually a
-# symlink; some systems play weird games with the mod time of symlinks
-# (eg FreeBSD returns the mod time of the symlink's containing
-# directory).
-if (
- set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null`
- if test "$[*]" = "X"; then
- # -L didn't work.
- set X `ls -t $srcdir/configure conftest.file`
- fi
- rm -f conftest.file
- if test "$[*]" != "X $srcdir/configure conftest.file" \
- && test "$[*]" != "X conftest.file $srcdir/configure"; then
-
- # If neither matched, then we have a broken ls. This can happen
- # if, for instance, CONFIG_SHELL is bash and it inherits a
- # broken ls alias from the environment. This has actually
- # happened. Such a system could not be considered "sane".
- AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
-alias in your environment])
- fi
-
- test "$[2]" = conftest.file
- )
-then
- # Ok.
- :
-else
- AC_MSG_ERROR([newly created file is older than distributed files!
-Check your system clock])
-fi
-AC_MSG_RESULT(yes)])
-
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# AM_PROG_INSTALL_STRIP
-# ---------------------
-# One issue with vendor `install' (even GNU) is that you can't
-# specify the program used to strip binaries. This is especially
-# annoying in cross-compiling environments, where the build's strip
-# is unlikely to handle the host's binaries.
-# Fortunately install-sh will honor a STRIPPROG variable, so we
-# always use install-sh in `make install-strip', and initialize
-# STRIPPROG with the value of the STRIP variable (set by the user).
-AC_DEFUN([AM_PROG_INSTALL_STRIP],
-[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
-# Installed binaries are usually stripped using `strip' when the user
-# run `make install-strip'. However `strip' might not be the right
-# tool to use in cross-compilation environments, therefore Automake
-# will honor the `STRIP' environment variable to overrule this program.
-dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
-if test "$cross_compiling" != no; then
- AC_CHECK_TOOL([STRIP], [strip], :)
-fi
-INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
-AC_SUBST([INSTALL_STRIP_PROGRAM])])
-
-# Copyright (C) 2006 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# _AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
-# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
-# This macro is traced by Automake.
-AC_DEFUN([_AM_SUBST_NOTMAKE])
-
-# Check how to create a tarball. -*- Autoconf -*-
-
-# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 2
-
-# _AM_PROG_TAR(FORMAT)
-# --------------------
-# Check how to create a tarball in format FORMAT.
-# FORMAT should be one of `v7', `ustar', or `pax'.
-#
-# Substitute a variable $(am__tar) that is a command
-# writing to stdout a FORMAT-tarball containing the directory
-# $tardir.
-# tardir=directory && $(am__tar) > result.tar
-#
-# Substitute a variable $(am__untar) that extract such
-# a tarball read from stdin.
-# $(am__untar) < result.tar
-AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
-m4_if([$1], [v7],
- [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
- [m4_case([$1], [ustar],, [pax],,
- [m4_fatal([Unknown tar format])])
-AC_MSG_CHECKING([how to create a $1 tar archive])
-# Loop over all known methods to create a tar archive until one works.
-_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
-_am_tools=${am_cv_prog_tar_$1-$_am_tools}
-# Do not fold the above two line into one, because Tru64 sh and
-# Solaris sh will not grok spaces in the rhs of `-'.
-for _am_tool in $_am_tools
-do
- case $_am_tool in
- gnutar)
- for _am_tar in tar gnutar gtar;
- do
- AM_RUN_LOG([$_am_tar --version]) && break
- done
- am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
- am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
- am__untar="$_am_tar -xf -"
- ;;
- plaintar)
- # Must skip GNU tar: if it does not support --format= it doesn't create
- # ustar tarball either.
- (tar --version) >/dev/null 2>&1 && continue
- am__tar='tar chf - "$$tardir"'
- am__tar_='tar chf - "$tardir"'
- am__untar='tar xf -'
- ;;
- pax)
- am__tar='pax -L -x $1 -w "$$tardir"'
- am__tar_='pax -L -x $1 -w "$tardir"'
- am__untar='pax -r'
- ;;
- cpio)
- am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
- am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
- am__untar='cpio -i -H $1 -d'
- ;;
- none)
- am__tar=false
- am__tar_=false
- am__untar=false
- ;;
- esac
-
- # If the value was cached, stop now. We just wanted to have am__tar
- # and am__untar set.
- test -n "${am_cv_prog_tar_$1}" && break
-
- # tar/untar a dummy directory, and stop if the command works
- rm -rf conftest.dir
- mkdir conftest.dir
- echo GrepMe > conftest.dir/file
- AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
- rm -rf conftest.dir
- if test -s conftest.tar; then
- AM_RUN_LOG([$am__untar <conftest.tar])
- grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
- fi
-done
-rm -rf conftest.dir
-
-AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
-AC_MSG_RESULT([$am_cv_prog_tar_$1])])
-AC_SUBST([am__tar])
-AC_SUBST([am__untar])
-]) # _AM_PROG_TAR
-
diff --git a/scripts/training/MGIZA/autogen.sh b/scripts/training/MGIZA/autogen.sh
deleted file mode 100755
index 9ab346a..0000000
--- a/scripts/training/MGIZA/autogen.sh
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/sh
-# Run this to generate all the initial makefiles, etc.
-
-srcdir=`dirname $0`
-test -z "$srcdir" && srcdir=.
-
-DIE=0
-
-if [ -n "$GNOME2_DIR" ]; then
- ACLOCAL_FLAGS="-I $GNOME2_DIR/share/aclocal $ACLOCAL_FLAGS"
- LD_LIBRARY_PATH="$GNOME2_DIR/lib:$LD_LIBRARY_PATH"
- PATH="$GNOME2_DIR/bin:$PATH"
- export PATH
- export LD_LIBRARY_PATH
-fi
-
-(test -f $srcdir/configure.ac) || {
- echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
- echo " top-level package directory"
- exit 1
-}
-
-(autoconf --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: You must have \`autoconf' installed."
- echo "Download the appropriate package for your distribution,"
- echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
- DIE=1
-}
-
-(grep "^IT_PROG_INTLTOOL" $srcdir/configure.ac >/dev/null) && {
- (intltoolize --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: You must have \`intltool' installed."
- echo "You can get it from:"
- echo " ftp://ftp.gnome.org/pub/GNOME/"
- DIE=1
- }
-}
-
-(grep "^AM_PROG_XML_I18N_TOOLS" $srcdir/configure.ac >/dev/null) && {
- (xml-i18n-toolize --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: You must have \`xml-i18n-toolize' installed."
- echo "You can get it from:"
- echo " ftp://ftp.gnome.org/pub/GNOME/"
- DIE=1
- }
-}
-
-(grep "^AM_PROG_LIBTOOL" $srcdir/configure.ac >/dev/null) && {
- (libtool --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: You must have \`libtool' installed."
- echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
- DIE=1
- }
-}
-
-(grep "^AM_GLIB_GNU_GETTEXT" $srcdir/configure.ac >/dev/null) && {
- (grep "sed.*POTFILES" $srcdir/configure.ac) > /dev/null || \
- (glib-gettextize --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: You must have \`glib' installed."
- echo "You can get it from: ftp://ftp.gtk.org/pub/gtk"
- DIE=1
- }
-}
-
-(automake --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: You must have \`automake' installed."
- echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
- DIE=1
- NO_AUTOMAKE=yes
-}
-
-
-# if no automake, don't bother testing for aclocal
-test -n "$NO_AUTOMAKE" || (aclocal --version) < /dev/null > /dev/null 2>&1 || {
- echo
- echo "**Error**: Missing \`aclocal'. The version of \`automake'"
- echo "installed doesn't appear recent enough."
- echo "You can get automake from ftp://ftp.gnu.org/pub/gnu/"
- DIE=1
-}
-
-if test "$DIE" -eq 1; then
- exit 1
-fi
-
-if test -z "$*"; then
- echo "**Warning**: I am going to run \`configure' with no arguments."
- echo "If you wish to pass any to it, please specify them on the"
- echo \`$0\'" command line."
- echo
-fi
-
-case $CC in
-xlc )
- am_opt=--include-deps;;
-esac
-
-for coin in `find $srcdir -path $srcdir/CVS -prune -o -name configure.ac -print`
-do
- dr=`dirname $coin`
- if test -f $dr/NO-AUTO-GEN; then
- echo skipping $dr -- flagged as no auto-gen
- else
- echo processing $dr
- ( cd $dr
-
- aclocalinclude="$ACLOCAL_FLAGS"
-
- if grep "^AM_GLIB_GNU_GETTEXT" configure.ac >/dev/null; then
- echo "Creating $dr/aclocal.m4 ..."
- test -r $dr/aclocal.m4 || touch $dr/aclocal.m4
- echo "Running glib-gettextize... Ignore non-fatal messages."
- echo "no" | glib-gettextize --force --copy
- echo "Making $dr/aclocal.m4 writable ..."
- test -r $dr/aclocal.m4 && chmod u+w $dr/aclocal.m4
- fi
- if grep "^IT_PROG_INTLTOOL" configure.ac >/dev/null; then
- echo "Running intltoolize..."
- intltoolize --copy --force --automake
- fi
- if grep "^AM_PROG_XML_I18N_TOOLS" configure.ac >/dev/null; then
- echo "Running xml-i18n-toolize..."
- xml-i18n-toolize --copy --force --automake
- fi
- if grep "^AM_PROG_LIBTOOL" configure.ac >/dev/null; then
- if test -z "$NO_LIBTOOLIZE" ; then
- echo "Running libtoolize..."
- libtoolize --force --copy
- fi
- fi
- echo "Running aclocal $aclocalinclude ..."
- aclocal $aclocalinclude
- if grep "^AM_CONFIG_HEADER" configure.ac >/dev/null; then
- echo "Running autoheader..."
- autoheader
- fi
- echo "Running automake --gnu $am_opt ..."
- automake --add-missing --gnu $am_opt
- echo "Running autoconf ..."
- autoconf
- )
- fi
-done
-
-conf_flags="--enable-maintainer-mode"
-
-if test x$NOCONFIGURE = x; then
- echo Running $srcdir/configure $conf_flags "$@" ...
- $srcdir/configure $conf_flags "$@" \
- && echo Now type \`make\' to compile. || exit 1
-else
- echo Skipping configure process.
-fi
diff --git a/scripts/training/MGIZA/cmake/CheckCXXSourceCompiles.cmake b/scripts/training/MGIZA/cmake/CheckCXXSourceCompiles.cmake
deleted file mode 100644
index 3921c89..0000000
--- a/scripts/training/MGIZA/cmake/CheckCXXSourceCompiles.cmake
+++ /dev/null
@@ -1,60 +0,0 @@
-# - Check if the source code provided in the SOURCE argument compiles.
-# CHECK_CXX_SOURCE_COMPILES(SOURCE VAR)
-# - macro which checks if the source code compiles
-# SOURCE - source code to try to compile
-# VAR - variable to store whether the source code compiled
-#
-# The following variables may be set before calling this macro to
-# modify the way the check is run:
-#
-# CMAKE_REQUIRED_FLAGS = string of compile command line flags
-# CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar)
-# CMAKE_REQUIRED_INCLUDES = list of include directories
-# CMAKE_REQUIRED_LIBRARIES = list of libraries to link
-
-MACRO(CHECK_CXX_SOURCE_COMPILES SOURCE VAR)
- IF("${VAR}" MATCHES "^${VAR}$")
- SET(MACRO_CHECK_FUNCTION_DEFINITIONS
- "-D${VAR} ${CMAKE_REQUIRED_FLAGS}")
- IF(CMAKE_REQUIRED_LIBRARIES)
- SET(CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES
- "-DLINK_LIBRARIES:STRING=${CMAKE_REQUIRED_LIBRARIES}")
- ELSE(CMAKE_REQUIRED_LIBRARIES)
- SET(CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES)
- ENDIF(CMAKE_REQUIRED_LIBRARIES)
- IF(CMAKE_REQUIRED_INCLUDES)
- SET(CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES
- "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}")
- ELSE(CMAKE_REQUIRED_INCLUDES)
- SET(CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES)
- ENDIF(CMAKE_REQUIRED_INCLUDES)
- FILE(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx"
- "${SOURCE}\n")
-
- MESSAGE(STATUS "Performing Test ${VAR}")
- TRY_COMPILE(${VAR}
- ${CMAKE_BINARY_DIR}
- ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx
- COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
- CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS}
- "${CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES}"
- "${CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES}"
- OUTPUT_VARIABLE OUTPUT)
- IF(${VAR})
- SET(${VAR} 1 CACHE INTERNAL "Test ${VAR}")
- MESSAGE(STATUS "Performing Test ${VAR} - Success")
- FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
- "Performing C++ SOURCE FILE Test ${VAR} succeded with the following output:\n"
- "${OUTPUT}\n"
- "Source file was:\n${SOURCE}\n")
- ELSE(${VAR})
- MESSAGE(STATUS "Performing Test ${VAR} - Failed")
- SET(${VAR} "" CACHE INTERNAL "Test ${VAR}")
- FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
- "Performing C++ SOURCE FILE Test ${VAR} failed with the following output:\n"
- "${OUTPUT}\n"
- "Source file was:\n${SOURCE}\n")
- ENDIF(${VAR})
- ENDIF("${VAR}" MATCHES "^${VAR}$")
-ENDMACRO(CHECK_CXX_SOURCE_COMPILES)
-
diff --git a/scripts/training/MGIZA/cmake/FindTR1.cmake b/scripts/training/MGIZA/cmake/FindTR1.cmake
deleted file mode 100644
index 3ef2b44..0000000
--- a/scripts/training/MGIZA/cmake/FindTR1.cmake
+++ /dev/null
@@ -1,84 +0,0 @@
-# Check availability of C++ TR1 contents.
-
-# Sets the following variables:
-#
-# TR1_SHARED_PTR_FOUND -- std::tr1::shared_ptr1<T> available
-# TR1_SHARED_PTR_USE_TR1_MEMORY -- #include <tr1/memory>
-# TR1_SHARED_PTR_USE_MEMORY -- #include <memory>
-
-# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3)
-cmake_policy(PUSH)
- cmake_minimum_required(VERSION 2.6.3)
-cmake_policy(POP)
-
-INCLUDE(${PROJECT_SOURCE_DIR}/cmake/CheckCXXSourceCompiles.cmake)
-# ---------------------------------------------------------------------------
-# std::tr1::shared_ptr<T>
-# ---------------------------------------------------------------------------
-
-check_cxx_source_compiles(
- "
- #include <tr1/memory>
- int main() {
- std::tr1::shared_ptr<int> ptr;
- return 0;
- }
- "
-TR1_SHARED_PTR_USE_TR1_MEMORY)
-check_cxx_source_compiles(
- "
- #include <memory>
- int main() {
- std::tr1::shared_ptr<int> ptr;
- return 0;
- }
- "
-TR1_SHARED_PTR_USE_MEMORY)
-
-set (TR1_SHARED_PTR -NOTFOUND)
-if (TR1_SHARED_PTR_USE_TR1_MEMORY)
-set (TR1_SHARED_PTR_FOUND TRUE)
-endif (TR1_SHARED_PTR_USE_TR1_MEMORY)
-if (TR1_SHARED_PTR_USE_MEMORY)
-set (TR1_SHARED_PTR_FOUND TRUE)
-endif (TR1_SHARED_PTR_USE_MEMORY)
-
-mark_as_advanced (TR1_SHARED_PTR_FOUND)
-mark_as_advanced (TR1_SHARED_PTR_USE_TR1_MEMORY)
-mark_as_advanced (TR1_SHARED_PTR_USE_MEMORY)
-
-# ---------------------------------------------------------------------------
-# std::tr1::unordered_map<K, V>
-# ---------------------------------------------------------------------------
-
-check_cxx_source_compiles(
- "
- #include <tr1/unordered_map>
- int main() {
- std::tr1::unordered_map<int, int> m;
- return 0;
- }
- "
- TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
-check_cxx_source_compiles(
- "
- #include <unordered_map>
- int main() {
- std::tr1::unordered_map<int, int> m;
- return 0;
- }
- "
- TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
-
-set (TR1_UNORDERED_MAP -NOTFOUND)
-if (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
-set (TR1_UNORDERED_MAP_FOUND TRUE)
-endif (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
-if (TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
-set (TR1_UNORDERED_MAP_FOUND TRUE)
-endif (TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
-
-mark_as_advanced (TR1_UNORDERED_MAP_FOUND)
-mark_as_advanced (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
-mark_as_advanced (TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
-
diff --git a/scripts/training/MGIZA/cmake/SRLBoost.cmake b/scripts/training/MGIZA/cmake/SRLBoost.cmake
deleted file mode 100644
index af3a956..0000000
--- a/scripts/training/MGIZA/cmake/SRLBoost.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-#Locate Boost libs. Windows users: make sure BOOST_ROOT and BOOST_PATH are set correctly on your environment.
-#See the site FAQ for more details.
-
-MACRO (GET_BOOST_INCLUDE_PATH path libs)
- #todo: allow this to fall back on a local distributed copy, so user doesn't have to d/l Boost seperately
-
- #todo: limit Boost version?
- #todo: use COMPONENTS threads to locate boost_threads without breaking the current support
- IF(Boost_FOUND)
- IF (NOT _boost_IN_CACHE)
- MESSAGE( "Boost found" )
- message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}")
- ENDIF (NOT _boost_IN_CACHE)
- SET(${path} ${Boost_INCLUDE_DIRS} )
- SET(${libs} ${Boost_LIBRARIES} )
- link_directories ( ${Boost_LIBRARY_DIRS} )
- ELSE()
- MESSAGE(FATAL_ERROR "Boost not found, please set the BOOST_ROOT environment variable " )
- ENDIF()
-ENDMACRO (GET_BOOST_INCLUDE_PATH path libs)
-
diff --git a/scripts/training/MGIZA/config.guess b/scripts/training/MGIZA/config.guess
deleted file mode 100755
index f32079a..0000000
--- a/scripts/training/MGIZA/config.guess
+++ /dev/null
@@ -1,1526 +0,0 @@
-#! /bin/sh
-# Attempt to guess a canonical system name.
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-# Free Software Foundation, Inc.
-
-timestamp='2008-01-23'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>. Submit a context
-# diff and a properly formatted ChangeLog entry.
-#
-# This script attempts to guess a canonical system name similar to
-# config.sub. If it succeeds, it prints the system name on stdout, and
-# exits with 0. Otherwise, it exits with 1.
-#
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION]
-
-Output the configuration name of the system \`$me' is run on.
-
-Operation modes:
- -h, --help print this help, then exit
- -t, --time-stamp print date of last modification, then exit
- -v, --version print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.guess ($timestamp)
-
-Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions. There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
- case $1 in
- --time-stamp | --time* | -t )
- echo "$timestamp" ; exit ;;
- --version | -v )
- echo "$version" ; exit ;;
- --help | --h* | -h )
- echo "$usage"; exit ;;
- -- ) # Stop option processing
- shift; break ;;
- - ) # Use stdin as input.
- break ;;
- -* )
- echo "$me: invalid option $1$help" >&2
- exit 1 ;;
- * )
- break ;;
- esac
-done
-
-if test $# != 0; then
- echo "$me: too many arguments$help" >&2
- exit 1
-fi
-
-trap 'exit 1' 1 2 15
-
-# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
-# compiler to aid in system detection is discouraged as it requires
-# temporary files to be created and, as you can see below, it is a
-# headache to deal with in a portable fashion.
-
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
-
-# Portable tmp directory creation inspired by the Autoconf team.
-
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,) echo "int x;" > $dummy.c ;
- for c in cc gcc c89 c99 ; do
- if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
- CC_FOR_BUILD="$c"; break ;
- fi ;
- done ;
- if test x"$CC_FOR_BUILD" = x ; then
- CC_FOR_BUILD=no_compiler_found ;
- fi
- ;;
- ,,*) CC_FOR_BUILD=$CC ;;
- ,*,*) CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
-
-# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi@noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
- PATH=$PATH:/.attbin ; export PATH
-fi
-
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
-
-# Note: order is significant - the case branches are not exclusive.
-
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
- *:NetBSD:*:*)
- # NetBSD (nbsd) targets should (where applicable) match one or
- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
- # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
- # switched to ELF, *-*-netbsd* would select the old
- # object file format. This provides both forward
- # compatibility and a consistent mechanism for selecting the
- # object file format.
- #
- # Note: NetBSD doesn't particularly care about the vendor
- # portion of the name. We always set it to "unknown".
- sysctl="sysctl -n hw.machine_arch"
- UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
- /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
- case "${UNAME_MACHINE_ARCH}" in
- armeb) machine=armeb-unknown ;;
- arm*) machine=arm-unknown ;;
- sh3el) machine=shl-unknown ;;
- sh3eb) machine=sh-unknown ;;
- sh5el) machine=sh5le-unknown ;;
- *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
- esac
- # The Operating System including object format, if it has switched
- # to ELF recently, or will in the future.
- case "${UNAME_MACHINE_ARCH}" in
- arm*|i386|m68k|ns32k|sh3*|sparc|vax)
- eval $set_cc_for_build
- if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
- | grep __ELF__ >/dev/null
- then
- # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
- # Return netbsd for either. FIX?
- os=netbsd
- else
- os=netbsdelf
- fi
- ;;
- *)
- os=netbsd
- ;;
- esac
- # The OS release
- # Debian GNU/NetBSD machines have a different userland, and
- # thus, need a distinct triplet. However, they do not need
- # kernel version information, so it can be replaced with a
- # suitable tag, in the style of linux-gnu.
- case "${UNAME_VERSION}" in
- Debian*)
- release='-gnu'
- ;;
- *)
- release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
- ;;
- esac
- # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
- # contains redundant information, the shorter form:
- # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
- echo "${machine}-${os}${release}"
- exit ;;
- *:OpenBSD:*:*)
- UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
- echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
- exit ;;
- *:ekkoBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
- exit ;;
- *:SolidBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
- exit ;;
- macppc:MirBSD:*:*)
- echo powerpc-unknown-mirbsd${UNAME_RELEASE}
- exit ;;
- *:MirBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
- exit ;;
- alpha:OSF1:*:*)
- case $UNAME_RELEASE in
- *4.0)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
- ;;
- *5.*)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
- ;;
- esac
- # According to Compaq, /usr/sbin/psrinfo has been available on
- # OSF/1 and Tru64 systems produced since 1995. I hope that
- # covers most systems running today. This code pipes the CPU
- # types through head -n 1, so we only detect the type of CPU 0.
- ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
- case "$ALPHA_CPU_TYPE" in
- "EV4 (21064)")
- UNAME_MACHINE="alpha" ;;
- "EV4.5 (21064)")
- UNAME_MACHINE="alpha" ;;
- "LCA4 (21066/21068)")
- UNAME_MACHINE="alpha" ;;
- "EV5 (21164)")
- UNAME_MACHINE="alphaev5" ;;
- "EV5.6 (21164A)")
- UNAME_MACHINE="alphaev56" ;;
- "EV5.6 (21164PC)")
- UNAME_MACHINE="alphapca56" ;;
- "EV5.7 (21164PC)")
- UNAME_MACHINE="alphapca57" ;;
- "EV6 (21264)")
- UNAME_MACHINE="alphaev6" ;;
- "EV6.7 (21264A)")
- UNAME_MACHINE="alphaev67" ;;
- "EV6.8CB (21264C)")
- UNAME_MACHINE="alphaev68" ;;
- "EV6.8AL (21264B)")
- UNAME_MACHINE="alphaev68" ;;
- "EV6.8CX (21264D)")
- UNAME_MACHINE="alphaev68" ;;
- "EV6.9A (21264/EV69A)")
- UNAME_MACHINE="alphaev69" ;;
- "EV7 (21364)")
- UNAME_MACHINE="alphaev7" ;;
- "EV7.9 (21364A)")
- UNAME_MACHINE="alphaev79" ;;
- esac
- # A Pn.n version is a patched version.
- # A Vn.n version is a released version.
- # A Tn.n version is a released field test version.
- # A Xn.n version is an unreleased experimental baselevel.
- # 1.2 uses "1.2" for uname -r.
- echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- exit ;;
- Alpha\ *:Windows_NT*:*)
- # How do we know it's Interix rather than the generic POSIX subsystem?
- # Should we change UNAME_MACHINE based on the output of uname instead
- # of the specific Alpha model?
- echo alpha-pc-interix
- exit ;;
- 21064:Windows_NT:50:3)
- echo alpha-dec-winnt3.5
- exit ;;
- Amiga*:UNIX_System_V:4.0:*)
- echo m68k-unknown-sysv4
- exit ;;
- *:[Aa]miga[Oo][Ss]:*:*)
- echo ${UNAME_MACHINE}-unknown-amigaos
- exit ;;
- *:[Mm]orph[Oo][Ss]:*:*)
- echo ${UNAME_MACHINE}-unknown-morphos
- exit ;;
- *:OS/390:*:*)
- echo i370-ibm-openedition
- exit ;;
- *:z/VM:*:*)
- echo s390-ibm-zvmoe
- exit ;;
- *:OS400:*:*)
- echo powerpc-ibm-os400
- exit ;;
- arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
- echo arm-acorn-riscix${UNAME_RELEASE}
- exit ;;
- arm:riscos:*:*|arm:RISCOS:*:*)
- echo arm-unknown-riscos
- exit ;;
- SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
- echo hppa1.1-hitachi-hiuxmpp
- exit ;;
- Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
- # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
- if test "`(/bin/universe) 2>/dev/null`" = att ; then
- echo pyramid-pyramid-sysv3
- else
- echo pyramid-pyramid-bsd
- fi
- exit ;;
- NILE*:*:*:dcosx)
- echo pyramid-pyramid-svr4
- exit ;;
- DRS?6000:unix:4.0:6*)
- echo sparc-icl-nx6
- exit ;;
- DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
- case `/usr/bin/uname -p` in
- sparc) echo sparc-icl-nx7; exit ;;
- esac ;;
- sun4H:SunOS:5.*:*)
- echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
- echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
- echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4*:SunOS:6*:*)
- # According to config.sub, this is the proper way to canonicalize
- # SunOS6. Hard to guess exactly what SunOS6 will be like, but
- # it's likely to be more like Solaris than SunOS4.
- echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4*:SunOS:*:*)
- case "`/usr/bin/arch -k`" in
- Series*|S4*)
- UNAME_RELEASE=`uname -v`
- ;;
- esac
- # Japanese Language versions have a version number like `4.1.3-JL'.
- echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
- exit ;;
- sun3*:SunOS:*:*)
- echo m68k-sun-sunos${UNAME_RELEASE}
- exit ;;
- sun*:*:4.2BSD:*)
- UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
- test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
- case "`/bin/arch`" in
- sun3)
- echo m68k-sun-sunos${UNAME_RELEASE}
- ;;
- sun4)
- echo sparc-sun-sunos${UNAME_RELEASE}
- ;;
- esac
- exit ;;
- aushp:SunOS:*:*)
- echo sparc-auspex-sunos${UNAME_RELEASE}
- exit ;;
- # The situation for MiNT is a little confusing. The machine name
- # can be virtually everything (everything which is not
- # "atarist" or "atariste" at least should have a processor
- # > m68000). The system name ranges from "MiNT" over "FreeMiNT"
- # to the lowercase version "mint" (or "freemint"). Finally
- # the system name "TOS" denotes a system which is actually not
- # MiNT. But MiNT is downward compatible to TOS, so this should
- # be no problem.
- atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
- atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
- *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
- milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
- echo m68k-milan-mint${UNAME_RELEASE}
- exit ;;
- hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
- echo m68k-hades-mint${UNAME_RELEASE}
- exit ;;
- *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
- echo m68k-unknown-mint${UNAME_RELEASE}
- exit ;;
- m68k:machten:*:*)
- echo m68k-apple-machten${UNAME_RELEASE}
- exit ;;
- powerpc:machten:*:*)
- echo powerpc-apple-machten${UNAME_RELEASE}
- exit ;;
- RISC*:Mach:*:*)
- echo mips-dec-mach_bsd4.3
- exit ;;
- RISC*:ULTRIX:*:*)
- echo mips-dec-ultrix${UNAME_RELEASE}
- exit ;;
- VAX*:ULTRIX*:*:*)
- echo vax-dec-ultrix${UNAME_RELEASE}
- exit ;;
- 2020:CLIX:*:* | 2430:CLIX:*:*)
- echo clipper-intergraph-clix${UNAME_RELEASE}
- exit ;;
- mips:*:*:UMIPS | mips:*:*:RISCos)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
-#ifdef __cplusplus
-#include <stdio.h> /* for printf() prototype */
- int main (int argc, char *argv[]) {
-#else
- int main (argc, argv) int argc; char *argv[]; {
-#endif
- #if defined (host_mips) && defined (MIPSEB)
- #if defined (SYSTYPE_SYSV)
- printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
- #endif
- #if defined (SYSTYPE_SVR4)
- printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
- #endif
- #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
- printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
- #endif
- #endif
- exit (-1);
- }
-EOF
- $CC_FOR_BUILD -o $dummy $dummy.c &&
- dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
- SYSTEM_NAME=`$dummy $dummyarg` &&
- { echo "$SYSTEM_NAME"; exit; }
- echo mips-mips-riscos${UNAME_RELEASE}
- exit ;;
- Motorola:PowerMAX_OS:*:*)
- echo powerpc-motorola-powermax
- exit ;;
- Motorola:*:4.3:PL8-*)
- echo powerpc-harris-powermax
- exit ;;
- Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
- echo powerpc-harris-powermax
- exit ;;
- Night_Hawk:Power_UNIX:*:*)
- echo powerpc-harris-powerunix
- exit ;;
- m88k:CX/UX:7*:*)
- echo m88k-harris-cxux7
- exit ;;
- m88k:*:4*:R4*)
- echo m88k-motorola-sysv4
- exit ;;
- m88k:*:3*:R3*)
- echo m88k-motorola-sysv3
- exit ;;
- AViiON:dgux:*:*)
- # DG/UX returns AViiON for all architectures
- UNAME_PROCESSOR=`/usr/bin/uname -p`
- if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
- then
- if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
- [ ${TARGET_BINARY_INTERFACE}x = x ]
- then
- echo m88k-dg-dgux${UNAME_RELEASE}
- else
- echo m88k-dg-dguxbcs${UNAME_RELEASE}
- fi
- else
- echo i586-dg-dgux${UNAME_RELEASE}
- fi
- exit ;;
- M88*:DolphinOS:*:*) # DolphinOS (SVR3)
- echo m88k-dolphin-sysv3
- exit ;;
- M88*:*:R3*:*)
- # Delta 88k system running SVR3
- echo m88k-motorola-sysv3
- exit ;;
- XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
- echo m88k-tektronix-sysv3
- exit ;;
- Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
- echo m68k-tektronix-bsd
- exit ;;
- *:IRIX*:*:*)
- echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
- exit ;;
- ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
- echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
- exit ;; # Note that: echo "'`uname -s`'" gives 'AIX '
- i*86:AIX:*:*)
- echo i386-ibm-aix
- exit ;;
- ia64:AIX:*:*)
- if [ -x /usr/bin/oslevel ] ; then
- IBM_REV=`/usr/bin/oslevel`
- else
- IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
- fi
- echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
- exit ;;
- *:AIX:2:3)
- if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <sys/systemcfg.h>
-
- main()
- {
- if (!__power_pc())
- exit(1);
- puts("powerpc-ibm-aix3.2.5");
- exit(0);
- }
-EOF
- if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
- then
- echo "$SYSTEM_NAME"
- else
- echo rs6000-ibm-aix3.2.5
- fi
- elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
- echo rs6000-ibm-aix3.2.4
- else
- echo rs6000-ibm-aix3.2
- fi
- exit ;;
- *:AIX:*:[456])
- IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
- if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
- IBM_ARCH=rs6000
- else
- IBM_ARCH=powerpc
- fi
- if [ -x /usr/bin/oslevel ] ; then
- IBM_REV=`/usr/bin/oslevel`
- else
- IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
- fi
- echo ${IBM_ARCH}-ibm-aix${IBM_REV}
- exit ;;
- *:AIX:*:*)
- echo rs6000-ibm-aix
- exit ;;
- ibmrt:4.4BSD:*|romp-ibm:BSD:*)
- echo romp-ibm-bsd4.4
- exit ;;
- ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
- echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
- exit ;; # report: romp-ibm BSD 4.3
- *:BOSX:*:*)
- echo rs6000-bull-bosx
- exit ;;
- DPX/2?00:B.O.S.:*:*)
- echo m68k-bull-sysv3
- exit ;;
- 9000/[34]??:4.3bsd:1.*:*)
- echo m68k-hp-bsd
- exit ;;
- hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
- echo m68k-hp-bsd4.4
- exit ;;
- 9000/[34678]??:HP-UX:*:*)
- HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
- case "${UNAME_MACHINE}" in
- 9000/31? ) HP_ARCH=m68000 ;;
- 9000/[34]?? ) HP_ARCH=m68k ;;
- 9000/[678][0-9][0-9])
- if [ -x /usr/bin/getconf ]; then
- sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
- sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
- case "${sc_cpu_version}" in
- 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
- 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
- 532) # CPU_PA_RISC2_0
- case "${sc_kernel_bits}" in
- 32) HP_ARCH="hppa2.0n" ;;
- 64) HP_ARCH="hppa2.0w" ;;
- '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
- esac ;;
- esac
- fi
- if [ "${HP_ARCH}" = "" ]; then
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
-
- #define _HPUX_SOURCE
- #include <stdlib.h>
- #include <unistd.h>
-
- int main ()
- {
- #if defined(_SC_KERNEL_BITS)
- long bits = sysconf(_SC_KERNEL_BITS);
- #endif
- long cpu = sysconf (_SC_CPU_VERSION);
-
- switch (cpu)
- {
- case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
- case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
- case CPU_PA_RISC2_0:
- #if defined(_SC_KERNEL_BITS)
- switch (bits)
- {
- case 64: puts ("hppa2.0w"); break;
- case 32: puts ("hppa2.0n"); break;
- default: puts ("hppa2.0"); break;
- } break;
- #else /* !defined(_SC_KERNEL_BITS) */
- puts ("hppa2.0"); break;
- #endif
- default: puts ("hppa1.0"); break;
- }
- exit (0);
- }
-EOF
- (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
- test -z "$HP_ARCH" && HP_ARCH=hppa
- fi ;;
- esac
- if [ ${HP_ARCH} = "hppa2.0w" ]
- then
- eval $set_cc_for_build
-
- # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
- # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
- # generating 64-bit code. GNU and HP use different nomenclature:
- #
- # $ CC_FOR_BUILD=cc ./config.guess
- # => hppa2.0w-hp-hpux11.23
- # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
- # => hppa64-hp-hpux11.23
-
- if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
- grep __LP64__ >/dev/null
- then
- HP_ARCH="hppa2.0w"
- else
- HP_ARCH="hppa64"
- fi
- fi
- echo ${HP_ARCH}-hp-hpux${HPUX_REV}
- exit ;;
- ia64:HP-UX:*:*)
- HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
- echo ia64-hp-hpux${HPUX_REV}
- exit ;;
- 3050*:HI-UX:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <unistd.h>
- int
- main ()
- {
- long cpu = sysconf (_SC_CPU_VERSION);
- /* The order matters, because CPU_IS_HP_MC68K erroneously returns
- true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct
- results, however. */
- if (CPU_IS_PA_RISC (cpu))
- {
- switch (cpu)
- {
- case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
- case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
- case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
- default: puts ("hppa-hitachi-hiuxwe2"); break;
- }
- }
- else if (CPU_IS_HP_MC68K (cpu))
- puts ("m68k-hitachi-hiuxwe2");
- else puts ("unknown-hitachi-hiuxwe2");
- exit (0);
- }
-EOF
- $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
- { echo "$SYSTEM_NAME"; exit; }
- echo unknown-hitachi-hiuxwe2
- exit ;;
- 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
- echo hppa1.1-hp-bsd
- exit ;;
- 9000/8??:4.3bsd:*:*)
- echo hppa1.0-hp-bsd
- exit ;;
- *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
- echo hppa1.0-hp-mpeix
- exit ;;
- hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
- echo hppa1.1-hp-osf
- exit ;;
- hp8??:OSF1:*:*)
- echo hppa1.0-hp-osf
- exit ;;
- i*86:OSF1:*:*)
- if [ -x /usr/sbin/sysversion ] ; then
- echo ${UNAME_MACHINE}-unknown-osf1mk
- else
- echo ${UNAME_MACHINE}-unknown-osf1
- fi
- exit ;;
- parisc*:Lites*:*:*)
- echo hppa1.1-hp-lites
- exit ;;
- C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
- echo c1-convex-bsd
- exit ;;
- C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
- if getsysinfo -f scalar_acc
- then echo c32-convex-bsd
- else echo c2-convex-bsd
- fi
- exit ;;
- C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
- echo c34-convex-bsd
- exit ;;
- C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
- echo c38-convex-bsd
- exit ;;
- C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
- echo c4-convex-bsd
- exit ;;
- CRAY*Y-MP:*:*:*)
- echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*[A-Z]90:*:*:*)
- echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
- | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
- -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
- -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*TS:*:*:*)
- echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*T3E:*:*:*)
- echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*SV1:*:*:*)
- echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- *:UNICOS/mp:*:*)
- echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
- FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
- echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
- exit ;;
- 5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
- echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
- exit ;;
- i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
- echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
- exit ;;
- sparc*:BSD/OS:*:*)
- echo sparc-unknown-bsdi${UNAME_RELEASE}
- exit ;;
- *:BSD/OS:*:*)
- echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
- exit ;;
- *:FreeBSD:*:*)
- case ${UNAME_MACHINE} in
- pc98)
- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- amd64)
- echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- *)
- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- esac
- exit ;;
- i*:CYGWIN*:*)
- echo ${UNAME_MACHINE}-pc-cygwin
- exit ;;
- *:MINGW*:*)
- echo ${UNAME_MACHINE}-pc-mingw32
- exit ;;
- i*:windows32*:*)
- # uname -m includes "-pc" on this system.
- echo ${UNAME_MACHINE}-mingw32
- exit ;;
- i*:PW*:*)
- echo ${UNAME_MACHINE}-pc-pw32
- exit ;;
- *:Interix*:[3456]*)
- case ${UNAME_MACHINE} in
- x86)
- echo i586-pc-interix${UNAME_RELEASE}
- exit ;;
- EM64T | authenticamd)
- echo x86_64-unknown-interix${UNAME_RELEASE}
- exit ;;
- IA64)
- echo ia64-unknown-interix${UNAME_RELEASE}
- exit ;;
- esac ;;
- [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
- echo i${UNAME_MACHINE}-pc-mks
- exit ;;
- i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
- # How do we know it's Interix rather than the generic POSIX subsystem?
- # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
- # UNAME_MACHINE based on the output of uname instead of i386?
- echo i586-pc-interix
- exit ;;
- i*:UWIN*:*)
- echo ${UNAME_MACHINE}-pc-uwin
- exit ;;
- amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
- echo x86_64-unknown-cygwin
- exit ;;
- p*:CYGWIN*:*)
- echo powerpcle-unknown-cygwin
- exit ;;
- prep*:SunOS:5.*:*)
- echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- *:GNU:*:*)
- # the GNU system
- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
- exit ;;
- *:GNU/*:*:*)
- # other systems with GNU libc and userland
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
- exit ;;
- i*86:Minix:*:*)
- echo ${UNAME_MACHINE}-pc-minix
- exit ;;
- arm*:Linux:*:*)
- eval $set_cc_for_build
- if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
- | grep -q __ARM_EABI__
- then
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- else
- echo ${UNAME_MACHINE}-unknown-linux-gnueabi
- fi
- exit ;;
- avr32*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- cris:Linux:*:*)
- echo cris-axis-linux-gnu
- exit ;;
- crisv32:Linux:*:*)
- echo crisv32-axis-linux-gnu
- exit ;;
- frv:Linux:*:*)
- echo frv-unknown-linux-gnu
- exit ;;
- ia64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- m32r*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- m68*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- mips:Linux:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #undef CPU
- #undef mips
- #undef mipsel
- #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=mipsel
- #else
- #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=mips
- #else
- CPU=
- #endif
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^CPU/{
- s: ::g
- p
- }'`"
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
- ;;
- mips64:Linux:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #undef CPU
- #undef mips64
- #undef mips64el
- #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=mips64el
- #else
- #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=mips64
- #else
- CPU=
- #endif
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^CPU/{
- s: ::g
- p
- }'`"
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
- ;;
- or32:Linux:*:*)
- echo or32-unknown-linux-gnu
- exit ;;
- ppc:Linux:*:*)
- echo powerpc-unknown-linux-gnu
- exit ;;
- ppc64:Linux:*:*)
- echo powerpc64-unknown-linux-gnu
- exit ;;
- alpha:Linux:*:*)
- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
- EV5) UNAME_MACHINE=alphaev5 ;;
- EV56) UNAME_MACHINE=alphaev56 ;;
- PCA56) UNAME_MACHINE=alphapca56 ;;
- PCA57) UNAME_MACHINE=alphapca56 ;;
- EV6) UNAME_MACHINE=alphaev6 ;;
- EV67) UNAME_MACHINE=alphaev67 ;;
- EV68*) UNAME_MACHINE=alphaev68 ;;
- esac
- objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
- exit ;;
- parisc:Linux:*:* | hppa:Linux:*:*)
- # Look for CPU level
- case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
- PA7*) echo hppa1.1-unknown-linux-gnu ;;
- PA8*) echo hppa2.0-unknown-linux-gnu ;;
- *) echo hppa-unknown-linux-gnu ;;
- esac
- exit ;;
- parisc64:Linux:*:* | hppa64:Linux:*:*)
- echo hppa64-unknown-linux-gnu
- exit ;;
- s390:Linux:*:* | s390x:Linux:*:*)
- echo ${UNAME_MACHINE}-ibm-linux
- exit ;;
- sh64*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- sh*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- sparc:Linux:*:* | sparc64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- vax:Linux:*:*)
- echo ${UNAME_MACHINE}-dec-linux-gnu
- exit ;;
- x86_64:Linux:*:*)
- echo x86_64-unknown-linux-gnu
- exit ;;
- xtensa*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-gnu
- exit ;;
- i*86:Linux:*:*)
- # The BFD linker knows what the default object file format is, so
- # first see if it will tell us. cd to the root directory to prevent
- # problems with other programs or directories called `ld' in the path.
- # Set LC_ALL=C to ensure ld outputs messages in English.
- ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
- | sed -ne '/supported targets:/!d
- s/[ ][ ]*/ /g
- s/.*supported targets: *//
- s/ .*//
- p'`
- case "$ld_supported_targets" in
- elf32-i386)
- TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
- ;;
- a.out-i386-linux)
- echo "${UNAME_MACHINE}-pc-linux-gnuaout"
- exit ;;
- coff-i386)
- echo "${UNAME_MACHINE}-pc-linux-gnucoff"
- exit ;;
- "")
- # Either a pre-BFD a.out linker (linux-gnuoldld) or
- # one that does not give us useful --help.
- echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
- exit ;;
- esac
- # Determine whether the default compiler is a.out or elf
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <features.h>
- #ifdef __ELF__
- # ifdef __GLIBC__
- # if __GLIBC__ >= 2
- LIBC=gnu
- # else
- LIBC=gnulibc1
- # endif
- # else
- LIBC=gnulibc1
- # endif
- #else
- #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- LIBC=gnu
- #else
- LIBC=gnuaout
- #endif
- #endif
- #ifdef __dietlibc__
- LIBC=dietlibc
- #endif
-EOF
- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
- /^LIBC/{
- s: ::g
- p
- }'`"
- test x"${LIBC}" != x && {
- echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
- exit
- }
- test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
- ;;
- i*86:DYNIX/ptx:4*:*)
- # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
- # earlier versions are messed up and put the nodename in both
- # sysname and nodename.
- echo i386-sequent-sysv4
- exit ;;
- i*86:UNIX_SV:4.2MP:2.*)
- # Unixware is an offshoot of SVR4, but it has its own version
- # number series starting with 2...
- # I am not positive that other SVR4 systems won't match this,
- # I just have to hope. -- rms.
- # Use sysv4.2uw... so that sysv4* matches it.
- echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
- exit ;;
- i*86:OS/2:*:*)
- # If we were able to find `uname', then EMX Unix compatibility
- # is probably installed.
- echo ${UNAME_MACHINE}-pc-os2-emx
- exit ;;
- i*86:XTS-300:*:STOP)
- echo ${UNAME_MACHINE}-unknown-stop
- exit ;;
- i*86:atheos:*:*)
- echo ${UNAME_MACHINE}-unknown-atheos
- exit ;;
- i*86:syllable:*:*)
- echo ${UNAME_MACHINE}-pc-syllable
- exit ;;
- i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
- echo i386-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- i*86:*DOS:*:*)
- echo ${UNAME_MACHINE}-pc-msdosdjgpp
- exit ;;
- i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
- UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
- if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
- echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
- else
- echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
- fi
- exit ;;
- i*86:*:5:[678]*)
- # UnixWare 7.x, OpenUNIX and OpenServer 6.
- case `/bin/uname -X | grep "^Machine"` in
- *486*) UNAME_MACHINE=i486 ;;
- *Pentium) UNAME_MACHINE=i586 ;;
- *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
- esac
- echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
- exit ;;
- i*86:*:3.2:*)
- if test -f /usr/options/cb.name; then
- UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
- echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
- elif /bin/uname -X 2>/dev/null >/dev/null ; then
- UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
- (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
- (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
- && UNAME_MACHINE=i586
- (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
- && UNAME_MACHINE=i686
- (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
- && UNAME_MACHINE=i686
- echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
- else
- echo ${UNAME_MACHINE}-pc-sysv32
- fi
- exit ;;
- pc:*:*:*)
- # Left here for compatibility:
- # uname -m prints for DJGPP always 'pc', but it prints nothing about
- # the processor, so we play safe by assuming i386.
- echo i386-pc-msdosdjgpp
- exit ;;
- Intel:Mach:3*:*)
- echo i386-pc-mach3
- exit ;;
- paragon:*:*:*)
- echo i860-intel-osf1
- exit ;;
- i860:*:4.*:*) # i860-SVR4
- if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
- echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
- else # Add other i860-SVR4 vendors below as they are discovered.
- echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
- fi
- exit ;;
- mini*:CTIX:SYS*5:*)
- # "miniframe"
- echo m68010-convergent-sysv
- exit ;;
- mc68k:UNIX:SYSTEM5:3.51m)
- echo m68k-convergent-sysv
- exit ;;
- M680?0:D-NIX:5.3:*)
- echo m68k-diab-dnix
- exit ;;
- M68*:*:R3V[5678]*:*)
- test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
- 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
- OS_REL=''
- test -r /etc/.relid \
- && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
- /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
- 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4; exit; } ;;
- m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
- echo m68k-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- mc68030:UNIX_System_V:4.*:*)
- echo m68k-atari-sysv4
- exit ;;
- TSUNAMI:LynxOS:2.*:*)
- echo sparc-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- rs6000:LynxOS:2.*:*)
- echo rs6000-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
- echo powerpc-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- SM[BE]S:UNIX_SV:*:*)
- echo mips-dde-sysv${UNAME_RELEASE}
- exit ;;
- RM*:ReliantUNIX-*:*:*)
- echo mips-sni-sysv4
- exit ;;
- RM*:SINIX-*:*:*)
- echo mips-sni-sysv4
- exit ;;
- *:SINIX-*:*:*)
- if uname -p 2>/dev/null >/dev/null ; then
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
- echo ${UNAME_MACHINE}-sni-sysv4
- else
- echo ns32k-sni-sysv
- fi
- exit ;;
- PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
- # says <Richard.M.Bartel@ccMail.Census.GOV>
- echo i586-unisys-sysv4
- exit ;;
- *:UNIX_System_V:4*:FTX*)
- # From Gerald Hewes <hewes@openmarket.com>.
- # How about differentiating between stratus architectures? -djm
- echo hppa1.1-stratus-sysv4
- exit ;;
- *:*:*:FTX*)
- # From seanf@swdc.stratus.com.
- echo i860-stratus-sysv4
- exit ;;
- i*86:VOS:*:*)
- # From Paul.Green@stratus.com.
- echo ${UNAME_MACHINE}-stratus-vos
- exit ;;
- *:VOS:*:*)
- # From Paul.Green@stratus.com.
- echo hppa1.1-stratus-vos
- exit ;;
- mc68*:A/UX:*:*)
- echo m68k-apple-aux${UNAME_RELEASE}
- exit ;;
- news*:NEWS-OS:6*:*)
- echo mips-sony-newsos6
- exit ;;
- R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
- if [ -d /usr/nec ]; then
- echo mips-nec-sysv${UNAME_RELEASE}
- else
- echo mips-unknown-sysv${UNAME_RELEASE}
- fi
- exit ;;
- BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
- echo powerpc-be-beos
- exit ;;
- BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
- echo powerpc-apple-beos
- exit ;;
- BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
- echo i586-pc-beos
- exit ;;
- SX-4:SUPER-UX:*:*)
- echo sx4-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-5:SUPER-UX:*:*)
- echo sx5-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-6:SUPER-UX:*:*)
- echo sx6-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-7:SUPER-UX:*:*)
- echo sx7-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-8:SUPER-UX:*:*)
- echo sx8-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-8R:SUPER-UX:*:*)
- echo sx8r-nec-superux${UNAME_RELEASE}
- exit ;;
- Power*:Rhapsody:*:*)
- echo powerpc-apple-rhapsody${UNAME_RELEASE}
- exit ;;
- *:Rhapsody:*:*)
- echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
- exit ;;
- *:Darwin:*:*)
- UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
- case $UNAME_PROCESSOR in
- unknown) UNAME_PROCESSOR=powerpc ;;
- esac
- echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
- exit ;;
- *:procnto*:*:* | *:QNX:[0123456789]*:*)
- UNAME_PROCESSOR=`uname -p`
- if test "$UNAME_PROCESSOR" = "x86"; then
- UNAME_PROCESSOR=i386
- UNAME_MACHINE=pc
- fi
- echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
- exit ;;
- *:QNX:*:4*)
- echo i386-pc-qnx
- exit ;;
- NSE-?:NONSTOP_KERNEL:*:*)
- echo nse-tandem-nsk${UNAME_RELEASE}
- exit ;;
- NSR-?:NONSTOP_KERNEL:*:*)
- echo nsr-tandem-nsk${UNAME_RELEASE}
- exit ;;
- *:NonStop-UX:*:*)
- echo mips-compaq-nonstopux
- exit ;;
- BS2000:POSIX*:*:*)
- echo bs2000-siemens-sysv
- exit ;;
- DS/*:UNIX_System_V:*:*)
- echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
- exit ;;
- *:Plan9:*:*)
- # "uname -m" is not consistent, so use $cputype instead. 386
- # is converted to i386 for consistency with other x86
- # operating systems.
- if test "$cputype" = "386"; then
- UNAME_MACHINE=i386
- else
- UNAME_MACHINE="$cputype"
- fi
- echo ${UNAME_MACHINE}-unknown-plan9
- exit ;;
- *:TOPS-10:*:*)
- echo pdp10-unknown-tops10
- exit ;;
- *:TENEX:*:*)
- echo pdp10-unknown-tenex
- exit ;;
- KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
- echo pdp10-dec-tops20
- exit ;;
- XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
- echo pdp10-xkl-tops20
- exit ;;
- *:TOPS-20:*:*)
- echo pdp10-unknown-tops20
- exit ;;
- *:ITS:*:*)
- echo pdp10-unknown-its
- exit ;;
- SEI:*:*:SEIUX)
- echo mips-sei-seiux${UNAME_RELEASE}
- exit ;;
- *:DragonFly:*:*)
- echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
- exit ;;
- *:*VMS:*:*)
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
- case "${UNAME_MACHINE}" in
- A*) echo alpha-dec-vms ; exit ;;
- I*) echo ia64-dec-vms ; exit ;;
- V*) echo vax-dec-vms ; exit ;;
- esac ;;
- *:XENIX:*:SysV)
- echo i386-pc-xenix
- exit ;;
- i*86:skyos:*:*)
- echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
- exit ;;
- i*86:rdos:*:*)
- echo ${UNAME_MACHINE}-pc-rdos
- exit ;;
-esac
-
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
-eval $set_cc_for_build
-cat >$dummy.c <<EOF
-#ifdef _SEQUENT_
-# include <sys/types.h>
-# include <sys/utsname.h>
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
- /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
- I don't know.... */
- printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
- printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
- "4"
-#else
- ""
-#endif
- ); exit (0);
-#endif
-#endif
-
-#if defined (__arm) && defined (__acorn) && defined (__unix)
- printf ("arm-acorn-riscix\n"); exit (0);
-#endif
-
-#if defined (hp300) && !defined (hpux)
- printf ("m68k-hp-bsd\n"); exit (0);
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
- int version;
- version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
- if (version < 4)
- printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
- else
- printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
- exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
- printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
- printf ("ns32k-encore-mach\n"); exit (0);
-#else
- printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
- printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
- printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
- printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
- struct utsname un;
-
- uname(&un);
-
- if (strncmp(un.version, "V2", 2) == 0) {
- printf ("i386-sequent-ptx2\n"); exit (0);
- }
- if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
- printf ("i386-sequent-ptx1\n"); exit (0);
- }
- printf ("i386-sequent-ptx\n"); exit (0);
-
-#endif
-
-#if defined (vax)
-# if !defined (ultrix)
-# include <sys/param.h>
-# if defined (BSD)
-# if BSD == 43
- printf ("vax-dec-bsd4.3\n"); exit (0);
-# else
-# if BSD == 199006
- printf ("vax-dec-bsd4.3reno\n"); exit (0);
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# endif
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# else
- printf ("vax-dec-ultrix\n"); exit (0);
-# endif
-#endif
-
-#if defined (alliant) && defined (i860)
- printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
- exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
- { echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
-
-# Convex versions that predate uname can use getsysinfo(1)
-
-if [ -x /usr/convex/getsysinfo ]
-then
- case `getsysinfo -f cpu_type` in
- c1*)
- echo c1-convex-bsd
- exit ;;
- c2*)
- if getsysinfo -f scalar_acc
- then echo c32-convex-bsd
- else echo c2-convex-bsd
- fi
- exit ;;
- c34*)
- echo c34-convex-bsd
- exit ;;
- c38*)
- echo c38-convex-bsd
- exit ;;
- c4*)
- echo c4-convex-bsd
- exit ;;
- esac
-fi
-
-cat >&2 <<EOF
-$0: unable to guess system type
-
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
-
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
-and
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
-
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches@gnu.org> in order to provide the needed
-information to handle your system.
-
-config.guess timestamp = $timestamp
-
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X = `(/bin/uname -X) 2>/dev/null`
-
-hostinfo = `(hostinfo) 2>/dev/null`
-/bin/universe = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
-
-UNAME_MACHINE = ${UNAME_MACHINE}
-UNAME_RELEASE = ${UNAME_RELEASE}
-UNAME_SYSTEM = ${UNAME_SYSTEM}
-UNAME_VERSION = ${UNAME_VERSION}
-EOF
-
-exit 1
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/scripts/training/MGIZA/config.h.in b/scripts/training/MGIZA/config.h.in
deleted file mode 100644
index b168b3e..0000000
--- a/scripts/training/MGIZA/config.h.in
+++ /dev/null
@@ -1,25 +0,0 @@
-/* config.h.in. Generated from configure.ac by autoheader. */
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
diff --git a/scripts/training/MGIZA/config.sub b/scripts/training/MGIZA/config.sub
deleted file mode 100755
index 6759825..0000000
--- a/scripts/training/MGIZA/config.sub
+++ /dev/null
@@ -1,1658 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-# Free Software Foundation, Inc.
-
-timestamp='2008-01-16'
-
-# This file is (in principle) common to ALL GNU software.
-# The presence of a machine in this file suggests that SOME GNU software
-# can handle that machine. It does not imply ALL GNU software can.
-#
-# This file is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Please send patches to <config-patches@gnu.org>. Submit a context
-# diff and a properly formatted ChangeLog entry.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support. The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
- $0 [OPTION] ALIAS
-
-Canonicalize a configuration name.
-
-Operation modes:
- -h, --help print this help, then exit
- -t, --time-stamp print date of last modification, then exit
- -v, --version print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions. There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
- case $1 in
- --time-stamp | --time* | -t )
- echo "$timestamp" ; exit ;;
- --version | -v )
- echo "$version" ; exit ;;
- --help | --h* | -h )
- echo "$usage"; exit ;;
- -- ) # Stop option processing
- shift; break ;;
- - ) # Use stdin as input.
- break ;;
- -* )
- echo "$me: invalid option $1$help"
- exit 1 ;;
-
- *local*)
- # First pass through any local machine types.
- echo $1
- exit ;;
-
- * )
- break ;;
- esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
- exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
- exit 1;;
-esac
-
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
- nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
- uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
- storm-chaos* | os2-emx* | rtmk-nova*)
- os=-$maybe_os
- basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
- ;;
- *)
- basic_machine=`echo $1 | sed 's/-[^-]*$//'`
- if [ $basic_machine != $1 ]
- then os=`echo $1 | sed 's/.*-/-/'`
- else os=; fi
- ;;
-esac
-
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work. We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
- -sun*os*)
- # Prevent following clause from handling this invalid input.
- ;;
- -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
- -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
- -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
- -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
- -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
- -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
- -apple | -axis | -knuth | -cray)
- os=
- basic_machine=$1
- ;;
- -sim | -cisco | -oki | -wec | -winbond)
- os=
- basic_machine=$1
- ;;
- -scout)
- ;;
- -wrs)
- os=-vxworks
- basic_machine=$1
- ;;
- -chorusos*)
- os=-chorusos
- basic_machine=$1
- ;;
- -chorusrdb)
- os=-chorusrdb
- basic_machine=$1
- ;;
- -hiux*)
- os=-hiuxwe2
- ;;
- -sco6)
- os=-sco5v6
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5)
- os=-sco3.2v5
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco4)
- os=-sco3.2v4
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2.[4-9]*)
- os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2v[4-9]*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5v6*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco*)
- os=-sco3.2v2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -udk*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -isc)
- os=-isc2.2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -clix*)
- basic_machine=clipper-intergraph
- ;;
- -isc*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -lynx*)
- os=-lynxos
- ;;
- -ptx*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
- ;;
- -windowsnt*)
- os=`echo $os | sed -e 's/windowsnt/winnt/'`
- ;;
- -psos*)
- os=-psos
- ;;
- -mint | -mint[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
-esac
-
-# Decode aliases for certain CPU-COMPANY combinations.
-case $basic_machine in
- # Recognize the basic CPU types without company name.
- # Some are omitted here because they have special meanings below.
- 1750a | 580 \
- | a29k \
- | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
- | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
- | am33_2.0 \
- | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
- | bfin \
- | c4x | clipper \
- | d10v | d30v | dlx | dsp16xx \
- | fido | fr30 | frv \
- | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
- | i370 | i860 | i960 | ia64 \
- | ip2k | iq2000 \
- | m32c | m32r | m32rle | m68000 | m68k | m88k \
- | maxq | mb | microblaze | mcore | mep \
- | mips | mipsbe | mipseb | mipsel | mipsle \
- | mips16 \
- | mips64 | mips64el \
- | mips64vr | mips64vrel \
- | mips64orion | mips64orionel \
- | mips64vr4100 | mips64vr4100el \
- | mips64vr4300 | mips64vr4300el \
- | mips64vr5000 | mips64vr5000el \
- | mips64vr5900 | mips64vr5900el \
- | mipsisa32 | mipsisa32el \
- | mipsisa32r2 | mipsisa32r2el \
- | mipsisa64 | mipsisa64el \
- | mipsisa64r2 | mipsisa64r2el \
- | mipsisa64sb1 | mipsisa64sb1el \
- | mipsisa64sr71k | mipsisa64sr71kel \
- | mipstx39 | mipstx39el \
- | mn10200 | mn10300 \
- | mt \
- | msp430 \
- | nios | nios2 \
- | ns16k | ns32k \
- | or32 \
- | pdp10 | pdp11 | pj | pjl \
- | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
- | pyramid \
- | score \
- | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
- | sh64 | sh64le \
- | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
- | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
- | spu | strongarm \
- | tahoe | thumb | tic4x | tic80 | tron \
- | v850 | v850e \
- | we32k \
- | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
- | z8k)
- basic_machine=$basic_machine-unknown
- ;;
- m6811 | m68hc11 | m6812 | m68hc12)
- # Motorola 68HC11/12.
- basic_machine=$basic_machine-unknown
- os=-none
- ;;
- m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
- ;;
- ms1)
- basic_machine=mt-unknown
- ;;
-
- # We use `pc' rather than `unknown'
- # because (1) that's what they normally are, and
- # (2) the word "unknown" tends to confuse beginning users.
- i*86 | x86_64)
- basic_machine=$basic_machine-pc
- ;;
- # Object if more than one company name word.
- *-*-*)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
- # Recognize the basic CPU types with company name.
- 580-* \
- | a29k-* \
- | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
- | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
- | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
- | avr-* | avr32-* \
- | bfin-* | bs2000-* \
- | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
- | clipper-* | craynv-* | cydra-* \
- | d10v-* | d30v-* | dlx-* \
- | elxsi-* \
- | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
- | h8300-* | h8500-* \
- | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
- | i*86-* | i860-* | i960-* | ia64-* \
- | ip2k-* | iq2000-* \
- | m32c-* | m32r-* | m32rle-* \
- | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
- | m88110-* | m88k-* | maxq-* | mcore-* \
- | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
- | mips16-* \
- | mips64-* | mips64el-* \
- | mips64vr-* | mips64vrel-* \
- | mips64orion-* | mips64orionel-* \
- | mips64vr4100-* | mips64vr4100el-* \
- | mips64vr4300-* | mips64vr4300el-* \
- | mips64vr5000-* | mips64vr5000el-* \
- | mips64vr5900-* | mips64vr5900el-* \
- | mipsisa32-* | mipsisa32el-* \
- | mipsisa32r2-* | mipsisa32r2el-* \
- | mipsisa64-* | mipsisa64el-* \
- | mipsisa64r2-* | mipsisa64r2el-* \
- | mipsisa64sb1-* | mipsisa64sb1el-* \
- | mipsisa64sr71k-* | mipsisa64sr71kel-* \
- | mipstx39-* | mipstx39el-* \
- | mmix-* \
- | mt-* \
- | msp430-* \
- | nios-* | nios2-* \
- | none-* | np1-* | ns16k-* | ns32k-* \
- | orion-* \
- | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
- | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
- | pyramid-* \
- | romp-* | rs6000-* \
- | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
- | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
- | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
- | sparclite-* \
- | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
- | tahoe-* | thumb-* \
- | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
- | tron-* \
- | v850-* | v850e-* | vax-* \
- | we32k-* \
- | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
- | xstormy16-* | xtensa*-* \
- | ymp-* \
- | z8k-*)
- ;;
- # Recognize the basic CPU types without company name, with glob match.
- xtensa*)
- basic_machine=$basic_machine-unknown
- ;;
- # Recognize the various machine names and aliases which stand
- # for a CPU type and a company and sometimes even an OS.
- 386bsd)
- basic_machine=i386-unknown
- os=-bsd
- ;;
- 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
- basic_machine=m68000-att
- ;;
- 3b*)
- basic_machine=we32k-att
- ;;
- a29khif)
- basic_machine=a29k-amd
- os=-udi
- ;;
- abacus)
- basic_machine=abacus-unknown
- ;;
- adobe68k)
- basic_machine=m68010-adobe
- os=-scout
- ;;
- alliant | fx80)
- basic_machine=fx80-alliant
- ;;
- altos | altos3068)
- basic_machine=m68k-altos
- ;;
- am29k)
- basic_machine=a29k-none
- os=-bsd
- ;;
- amd64)
- basic_machine=x86_64-pc
- ;;
- amd64-*)
- basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- amdahl)
- basic_machine=580-amdahl
- os=-sysv
- ;;
- amiga | amiga-*)
- basic_machine=m68k-unknown
- ;;
- amigaos | amigados)
- basic_machine=m68k-unknown
- os=-amigaos
- ;;
- amigaunix | amix)
- basic_machine=m68k-unknown
- os=-sysv4
- ;;
- apollo68)
- basic_machine=m68k-apollo
- os=-sysv
- ;;
- apollo68bsd)
- basic_machine=m68k-apollo
- os=-bsd
- ;;
- aux)
- basic_machine=m68k-apple
- os=-aux
- ;;
- balance)
- basic_machine=ns32k-sequent
- os=-dynix
- ;;
- blackfin)
- basic_machine=bfin-unknown
- os=-linux
- ;;
- blackfin-*)
- basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- c90)
- basic_machine=c90-cray
- os=-unicos
- ;;
- convex-c1)
- basic_machine=c1-convex
- os=-bsd
- ;;
- convex-c2)
- basic_machine=c2-convex
- os=-bsd
- ;;
- convex-c32)
- basic_machine=c32-convex
- os=-bsd
- ;;
- convex-c34)
- basic_machine=c34-convex
- os=-bsd
- ;;
- convex-c38)
- basic_machine=c38-convex
- os=-bsd
- ;;
- cray | j90)
- basic_machine=j90-cray
- os=-unicos
- ;;
- craynv)
- basic_machine=craynv-cray
- os=-unicosmp
- ;;
- cr16)
- basic_machine=cr16-unknown
- os=-elf
- ;;
- crds | unos)
- basic_machine=m68k-crds
- ;;
- crisv32 | crisv32-* | etraxfs*)
- basic_machine=crisv32-axis
- ;;
- cris | cris-* | etrax*)
- basic_machine=cris-axis
- ;;
- crx)
- basic_machine=crx-unknown
- os=-elf
- ;;
- da30 | da30-*)
- basic_machine=m68k-da30
- ;;
- decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
- basic_machine=mips-dec
- ;;
- decsystem10* | dec10*)
- basic_machine=pdp10-dec
- os=-tops10
- ;;
- decsystem20* | dec20*)
- basic_machine=pdp10-dec
- os=-tops20
- ;;
- delta | 3300 | motorola-3300 | motorola-delta \
- | 3300-motorola | delta-motorola)
- basic_machine=m68k-motorola
- ;;
- delta88)
- basic_machine=m88k-motorola
- os=-sysv3
- ;;
- djgpp)
- basic_machine=i586-pc
- os=-msdosdjgpp
- ;;
- dpx20 | dpx20-*)
- basic_machine=rs6000-bull
- os=-bosx
- ;;
- dpx2* | dpx2*-bull)
- basic_machine=m68k-bull
- os=-sysv3
- ;;
- ebmon29k)
- basic_machine=a29k-amd
- os=-ebmon
- ;;
- elxsi)
- basic_machine=elxsi-elxsi
- os=-bsd
- ;;
- encore | umax | mmax)
- basic_machine=ns32k-encore
- ;;
- es1800 | OSE68k | ose68k | ose | OSE)
- basic_machine=m68k-ericsson
- os=-ose
- ;;
- fx2800)
- basic_machine=i860-alliant
- ;;
- genix)
- basic_machine=ns32k-ns
- ;;
- gmicro)
- basic_machine=tron-gmicro
- os=-sysv
- ;;
- go32)
- basic_machine=i386-pc
- os=-go32
- ;;
- h3050r* | hiux*)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- h8300hms)
- basic_machine=h8300-hitachi
- os=-hms
- ;;
- h8300xray)
- basic_machine=h8300-hitachi
- os=-xray
- ;;
- h8500hms)
- basic_machine=h8500-hitachi
- os=-hms
- ;;
- harris)
- basic_machine=m88k-harris
- os=-sysv3
- ;;
- hp300-*)
- basic_machine=m68k-hp
- ;;
- hp300bsd)
- basic_machine=m68k-hp
- os=-bsd
- ;;
- hp300hpux)
- basic_machine=m68k-hp
- os=-hpux
- ;;
- hp3k9[0-9][0-9] | hp9[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hp9k2[0-9][0-9] | hp9k31[0-9])
- basic_machine=m68000-hp
- ;;
- hp9k3[2-9][0-9])
- basic_machine=m68k-hp
- ;;
- hp9k6[0-9][0-9] | hp6[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hp9k7[0-79][0-9] | hp7[0-79][0-9])
- basic_machine=hppa1.1-hp
- ;;
- hp9k78[0-9] | hp78[0-9])
- # FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
- # FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[0-9][13679] | hp8[0-9][13679])
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[0-9][0-9] | hp8[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hppa-next)
- os=-nextstep3
- ;;
- hppaosf)
- basic_machine=hppa1.1-hp
- os=-osf
- ;;
- hppro)
- basic_machine=hppa1.1-hp
- os=-proelf
- ;;
- i370-ibm* | ibm*)
- basic_machine=i370-ibm
- ;;
-# I'm not sure what "Sysv32" means. Should this be sysv3.2?
- i*86v32)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv32
- ;;
- i*86v4*)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv4
- ;;
- i*86v)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv
- ;;
- i*86sol2)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-solaris2
- ;;
- i386mach)
- basic_machine=i386-mach
- os=-mach
- ;;
- i386-vsta | vsta)
- basic_machine=i386-unknown
- os=-vsta
- ;;
- iris | iris4d)
- basic_machine=mips-sgi
- case $os in
- -irix*)
- ;;
- *)
- os=-irix4
- ;;
- esac
- ;;
- isi68 | isi)
- basic_machine=m68k-isi
- os=-sysv
- ;;
- m68knommu)
- basic_machine=m68k-unknown
- os=-linux
- ;;
- m68knommu-*)
- basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- m88k-omron*)
- basic_machine=m88k-omron
- ;;
- magnum | m3230)
- basic_machine=mips-mips
- os=-sysv
- ;;
- merlin)
- basic_machine=ns32k-utek
- os=-sysv
- ;;
- mingw32)
- basic_machine=i386-pc
- os=-mingw32
- ;;
- mingw32ce)
- basic_machine=arm-unknown
- os=-mingw32ce
- ;;
- miniframe)
- basic_machine=m68000-convergent
- ;;
- *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
- mips3*-*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
- ;;
- mips3*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
- ;;
- monitor)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- morphos)
- basic_machine=powerpc-unknown
- os=-morphos
- ;;
- msdos)
- basic_machine=i386-pc
- os=-msdos
- ;;
- ms1-*)
- basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
- ;;
- mvs)
- basic_machine=i370-ibm
- os=-mvs
- ;;
- ncr3000)
- basic_machine=i486-ncr
- os=-sysv4
- ;;
- netbsd386)
- basic_machine=i386-unknown
- os=-netbsd
- ;;
- netwinder)
- basic_machine=armv4l-rebel
- os=-linux
- ;;
- news | news700 | news800 | news900)
- basic_machine=m68k-sony
- os=-newsos
- ;;
- news1000)
- basic_machine=m68030-sony
- os=-newsos
- ;;
- news-3600 | risc-news)
- basic_machine=mips-sony
- os=-newsos
- ;;
- necv70)
- basic_machine=v70-nec
- os=-sysv
- ;;
- next | m*-next )
- basic_machine=m68k-next
- case $os in
- -nextstep* )
- ;;
- -ns2*)
- os=-nextstep2
- ;;
- *)
- os=-nextstep3
- ;;
- esac
- ;;
- nh3000)
- basic_machine=m68k-harris
- os=-cxux
- ;;
- nh[45]000)
- basic_machine=m88k-harris
- os=-cxux
- ;;
- nindy960)
- basic_machine=i960-intel
- os=-nindy
- ;;
- mon960)
- basic_machine=i960-intel
- os=-mon960
- ;;
- nonstopux)
- basic_machine=mips-compaq
- os=-nonstopux
- ;;
- np1)
- basic_machine=np1-gould
- ;;
- nsr-tandem)
- basic_machine=nsr-tandem
- ;;
- op50n-* | op60c-*)
- basic_machine=hppa1.1-oki
- os=-proelf
- ;;
- openrisc | openrisc-*)
- basic_machine=or32-unknown
- ;;
- os400)
- basic_machine=powerpc-ibm
- os=-os400
- ;;
- OSE68000 | ose68000)
- basic_machine=m68000-ericsson
- os=-ose
- ;;
- os68k)
- basic_machine=m68k-none
- os=-os68k
- ;;
- pa-hitachi)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- paragon)
- basic_machine=i860-intel
- os=-osf
- ;;
- parisc)
- basic_machine=hppa-unknown
- os=-linux
- ;;
- parisc-*)
- basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- pbd)
- basic_machine=sparc-tti
- ;;
- pbb)
- basic_machine=m68k-tti
- ;;
- pc532 | pc532-*)
- basic_machine=ns32k-pc532
- ;;
- pc98)
- basic_machine=i386-pc
- ;;
- pc98-*)
- basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentium | p5 | k5 | k6 | nexgen | viac3)
- basic_machine=i586-pc
- ;;
- pentiumpro | p6 | 6x86 | athlon | athlon_*)
- basic_machine=i686-pc
- ;;
- pentiumii | pentium2 | pentiumiii | pentium3)
- basic_machine=i686-pc
- ;;
- pentium4)
- basic_machine=i786-pc
- ;;
- pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
- basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumpro-* | p6-* | 6x86-* | athlon-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentium4-*)
- basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pn)
- basic_machine=pn-gould
- ;;
- power) basic_machine=power-ibm
- ;;
- ppc) basic_machine=powerpc-unknown
- ;;
- ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppcle | powerpclittle | ppc-le | powerpc-little)
- basic_machine=powerpcle-unknown
- ;;
- ppcle-* | powerpclittle-*)
- basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppc64) basic_machine=powerpc64-unknown
- ;;
- ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppc64le | powerpc64little | ppc64-le | powerpc64-little)
- basic_machine=powerpc64le-unknown
- ;;
- ppc64le-* | powerpc64little-*)
- basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ps2)
- basic_machine=i386-ibm
- ;;
- pw32)
- basic_machine=i586-unknown
- os=-pw32
- ;;
- rdos)
- basic_machine=i386-pc
- os=-rdos
- ;;
- rom68k)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- rm[46]00)
- basic_machine=mips-siemens
- ;;
- rtpc | rtpc-*)
- basic_machine=romp-ibm
- ;;
- s390 | s390-*)
- basic_machine=s390-ibm
- ;;
- s390x | s390x-*)
- basic_machine=s390x-ibm
- ;;
- sa29200)
- basic_machine=a29k-amd
- os=-udi
- ;;
- sb1)
- basic_machine=mipsisa64sb1-unknown
- ;;
- sb1el)
- basic_machine=mipsisa64sb1el-unknown
- ;;
- sde)
- basic_machine=mipsisa32-sde
- os=-elf
- ;;
- sei)
- basic_machine=mips-sei
- os=-seiux
- ;;
- sequent)
- basic_machine=i386-sequent
- ;;
- sh)
- basic_machine=sh-hitachi
- os=-hms
- ;;
- sh5el)
- basic_machine=sh5le-unknown
- ;;
- sh64)
- basic_machine=sh64-unknown
- ;;
- sparclite-wrs | simso-wrs)
- basic_machine=sparclite-wrs
- os=-vxworks
- ;;
- sps7)
- basic_machine=m68k-bull
- os=-sysv2
- ;;
- spur)
- basic_machine=spur-unknown
- ;;
- st2000)
- basic_machine=m68k-tandem
- ;;
- stratus)
- basic_machine=i860-stratus
- os=-sysv4
- ;;
- sun2)
- basic_machine=m68000-sun
- ;;
- sun2os3)
- basic_machine=m68000-sun
- os=-sunos3
- ;;
- sun2os4)
- basic_machine=m68000-sun
- os=-sunos4
- ;;
- sun3os3)
- basic_machine=m68k-sun
- os=-sunos3
- ;;
- sun3os4)
- basic_machine=m68k-sun
- os=-sunos4
- ;;
- sun4os3)
- basic_machine=sparc-sun
- os=-sunos3
- ;;
- sun4os4)
- basic_machine=sparc-sun
- os=-sunos4
- ;;
- sun4sol2)
- basic_machine=sparc-sun
- os=-solaris2
- ;;
- sun3 | sun3-*)
- basic_machine=m68k-sun
- ;;
- sun4)
- basic_machine=sparc-sun
- ;;
- sun386 | sun386i | roadrunner)
- basic_machine=i386-sun
- ;;
- sv1)
- basic_machine=sv1-cray
- os=-unicos
- ;;
- symmetry)
- basic_machine=i386-sequent
- os=-dynix
- ;;
- t3e)
- basic_machine=alphaev5-cray
- os=-unicos
- ;;
- t90)
- basic_machine=t90-cray
- os=-unicos
- ;;
- tic54x | c54x*)
- basic_machine=tic54x-unknown
- os=-coff
- ;;
- tic55x | c55x*)
- basic_machine=tic55x-unknown
- os=-coff
- ;;
- tic6x | c6x*)
- basic_machine=tic6x-unknown
- os=-coff
- ;;
- tile*)
- basic_machine=tile-unknown
- os=-linux-gnu
- ;;
- tx39)
- basic_machine=mipstx39-unknown
- ;;
- tx39el)
- basic_machine=mipstx39el-unknown
- ;;
- toad1)
- basic_machine=pdp10-xkl
- os=-tops20
- ;;
- tower | tower-32)
- basic_machine=m68k-ncr
- ;;
- tpf)
- basic_machine=s390x-ibm
- os=-tpf
- ;;
- udi29k)
- basic_machine=a29k-amd
- os=-udi
- ;;
- ultra3)
- basic_machine=a29k-nyu
- os=-sym1
- ;;
- v810 | necv810)
- basic_machine=v810-nec
- os=-none
- ;;
- vaxv)
- basic_machine=vax-dec
- os=-sysv
- ;;
- vms)
- basic_machine=vax-dec
- os=-vms
- ;;
- vpp*|vx|vx-*)
- basic_machine=f301-fujitsu
- ;;
- vxworks960)
- basic_machine=i960-wrs
- os=-vxworks
- ;;
- vxworks68)
- basic_machine=m68k-wrs
- os=-vxworks
- ;;
- vxworks29k)
- basic_machine=a29k-wrs
- os=-vxworks
- ;;
- w65*)
- basic_machine=w65-wdc
- os=-none
- ;;
- w89k-*)
- basic_machine=hppa1.1-winbond
- os=-proelf
- ;;
- xbox)
- basic_machine=i686-pc
- os=-mingw32
- ;;
- xps | xps100)
- basic_machine=xps100-honeywell
- ;;
- ymp)
- basic_machine=ymp-cray
- os=-unicos
- ;;
- z8k-*-coff)
- basic_machine=z8k-unknown
- os=-sim
- ;;
- none)
- basic_machine=none-none
- os=-none
- ;;
-
-# Here we handle the default manufacturer of certain CPU types. It is in
-# some cases the only manufacturer, in others, it is the most popular.
- w89k)
- basic_machine=hppa1.1-winbond
- ;;
- op50n)
- basic_machine=hppa1.1-oki
- ;;
- op60c)
- basic_machine=hppa1.1-oki
- ;;
- romp)
- basic_machine=romp-ibm
- ;;
- mmix)
- basic_machine=mmix-knuth
- ;;
- rs6000)
- basic_machine=rs6000-ibm
- ;;
- vax)
- basic_machine=vax-dec
- ;;
- pdp10)
- # there are many clones, so DEC is not a safe bet
- basic_machine=pdp10-unknown
- ;;
- pdp11)
- basic_machine=pdp11-dec
- ;;
- we32k)
- basic_machine=we32k-att
- ;;
- sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele)
- basic_machine=sh-unknown
- ;;
- sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
- basic_machine=sparc-sun
- ;;
- cydra)
- basic_machine=cydra-cydrome
- ;;
- orion)
- basic_machine=orion-highlevel
- ;;
- orion105)
- basic_machine=clipper-highlevel
- ;;
- mac | mpw | mac-mpw)
- basic_machine=m68k-apple
- ;;
- pmac | pmac-mpw)
- basic_machine=powerpc-apple
- ;;
- *-unknown)
- # Make sure to match an already-canonicalized machine name.
- ;;
- *)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
- *-digital*)
- basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
- ;;
- *-commodore*)
- basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
- ;;
- *)
- ;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x"$os" != x"" ]
-then
-case $os in
- # First match some system type aliases
- # that might get confused with valid system types.
- # -solaris* is a basic system type, with this one exception.
- -solaris1 | -solaris1.*)
- os=`echo $os | sed -e 's|solaris1|sunos4|'`
- ;;
- -solaris)
- os=-solaris2
- ;;
- -svr4*)
- os=-sysv4
- ;;
- -unixware*)
- os=-sysv4.2uw
- ;;
- -gnu/linux*)
- os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
- ;;
- # First accept the basic system types.
- # The portable systems comes first.
- # Each alternative MUST END IN A *, to match a version number.
- # -sysv* is not here because it comes later, after sysvr4.
- -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
- | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
- | -aos* \
- | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
- | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
- | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
- | -openbsd* | -solidbsd* \
- | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
- | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
- | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
- | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
- | -chorusos* | -chorusrdb* \
- | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
- | -uxpv* | -beos* | -mpeix* | -udk* \
- | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
- | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
- | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
- | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
- | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
- | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops*)
- # Remember, each alternative MUST END IN *, to match a version number.
- ;;
- -qnx*)
- case $basic_machine in
- x86-* | i*86-*)
- ;;
- *)
- os=-nto$os
- ;;
- esac
- ;;
- -nto-qnx*)
- ;;
- -nto*)
- os=`echo $os | sed -e 's|nto|nto-qnx|'`
- ;;
- -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
- | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
- | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
- ;;
- -mac*)
- os=`echo $os | sed -e 's|mac|macos|'`
- ;;
- -linux-dietlibc)
- os=-linux-dietlibc
- ;;
- -linux*)
- os=`echo $os | sed -e 's|linux|linux-gnu|'`
- ;;
- -sunos5*)
- os=`echo $os | sed -e 's|sunos5|solaris2|'`
- ;;
- -sunos6*)
- os=`echo $os | sed -e 's|sunos6|solaris3|'`
- ;;
- -opened*)
- os=-openedition
- ;;
- -os400*)
- os=-os400
- ;;
- -wince*)
- os=-wince
- ;;
- -osfrose*)
- os=-osfrose
- ;;
- -osf*)
- os=-osf
- ;;
- -utek*)
- os=-bsd
- ;;
- -dynix*)
- os=-bsd
- ;;
- -acis*)
- os=-aos
- ;;
- -atheos*)
- os=-atheos
- ;;
- -syllable*)
- os=-syllable
- ;;
- -386bsd)
- os=-bsd
- ;;
- -ctix* | -uts*)
- os=-sysv
- ;;
- -nova*)
- os=-rtmk-nova
- ;;
- -ns2 )
- os=-nextstep2
- ;;
- -nsk*)
- os=-nsk
- ;;
- # Preserve the version number of sinix5.
- -sinix5.*)
- os=`echo $os | sed -e 's|sinix|sysv|'`
- ;;
- -sinix*)
- os=-sysv4
- ;;
- -tpf*)
- os=-tpf
- ;;
- -triton*)
- os=-sysv3
- ;;
- -oss*)
- os=-sysv3
- ;;
- -svr4)
- os=-sysv4
- ;;
- -svr3)
- os=-sysv3
- ;;
- -sysvr4)
- os=-sysv4
- ;;
- # This must come after -sysvr4.
- -sysv*)
- ;;
- -ose*)
- os=-ose
- ;;
- -es1800*)
- os=-ose
- ;;
- -xenix)
- os=-xenix
- ;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- os=-mint
- ;;
- -aros*)
- os=-aros
- ;;
- -kaos*)
- os=-kaos
- ;;
- -zvmoe)
- os=-zvmoe
- ;;
- -none)
- ;;
- *)
- # Get rid of the `-' at the beginning of $os.
- os=`echo $os | sed 's/[^-]*-//'`
- echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
- exit 1
- ;;
-esac
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system. Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-case $basic_machine in
- score-*)
- os=-elf
- ;;
- spu-*)
- os=-elf
- ;;
- *-acorn)
- os=-riscix1.2
- ;;
- arm*-rebel)
- os=-linux
- ;;
- arm*-semi)
- os=-aout
- ;;
- c4x-* | tic4x-*)
- os=-coff
- ;;
- # This must come before the *-dec entry.
- pdp10-*)
- os=-tops20
- ;;
- pdp11-*)
- os=-none
- ;;
- *-dec | vax-*)
- os=-ultrix4.2
- ;;
- m68*-apollo)
- os=-domain
- ;;
- i386-sun)
- os=-sunos4.0.2
- ;;
- m68000-sun)
- os=-sunos3
- # This also exists in the configure program, but was not the
- # default.
- # os=-sunos4
- ;;
- m68*-cisco)
- os=-aout
- ;;
- mep-*)
- os=-elf
- ;;
- mips*-cisco)
- os=-elf
- ;;
- mips*-*)
- os=-elf
- ;;
- or32-*)
- os=-coff
- ;;
- *-tti) # must be before sparc entry or we get the wrong os.
- os=-sysv3
- ;;
- sparc-* | *-sun)
- os=-sunos4.1.1
- ;;
- *-be)
- os=-beos
- ;;
- *-haiku)
- os=-haiku
- ;;
- *-ibm)
- os=-aix
- ;;
- *-knuth)
- os=-mmixware
- ;;
- *-wec)
- os=-proelf
- ;;
- *-winbond)
- os=-proelf
- ;;
- *-oki)
- os=-proelf
- ;;
- *-hp)
- os=-hpux
- ;;
- *-hitachi)
- os=-hiux
- ;;
- i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
- os=-sysv
- ;;
- *-cbm)
- os=-amigaos
- ;;
- *-dg)
- os=-dgux
- ;;
- *-dolphin)
- os=-sysv3
- ;;
- m68k-ccur)
- os=-rtu
- ;;
- m88k-omron*)
- os=-luna
- ;;
- *-next )
- os=-nextstep
- ;;
- *-sequent)
- os=-ptx
- ;;
- *-crds)
- os=-unos
- ;;
- *-ns)
- os=-genix
- ;;
- i370-*)
- os=-mvs
- ;;
- *-next)
- os=-nextstep3
- ;;
- *-gould)
- os=-sysv
- ;;
- *-highlevel)
- os=-bsd
- ;;
- *-encore)
- os=-bsd
- ;;
- *-sgi)
- os=-irix
- ;;
- *-siemens)
- os=-sysv4
- ;;
- *-masscomp)
- os=-rtu
- ;;
- f30[01]-fujitsu | f700-fujitsu)
- os=-uxpv
- ;;
- *-rom68k)
- os=-coff
- ;;
- *-*bug)
- os=-coff
- ;;
- *-apple)
- os=-macos
- ;;
- *-atari*)
- os=-mint
- ;;
- *)
- os=-none
- ;;
-esac
-fi
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer. We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
- *-unknown)
- case $os in
- -riscix*)
- vendor=acorn
- ;;
- -sunos*)
- vendor=sun
- ;;
- -aix*)
- vendor=ibm
- ;;
- -beos*)
- vendor=be
- ;;
- -hpux*)
- vendor=hp
- ;;
- -mpeix*)
- vendor=hp
- ;;
- -hiux*)
- vendor=hitachi
- ;;
- -unos*)
- vendor=crds
- ;;
- -dgux*)
- vendor=dg
- ;;
- -luna*)
- vendor=omron
- ;;
- -genix*)
- vendor=ns
- ;;
- -mvs* | -opened*)
- vendor=ibm
- ;;
- -os400*)
- vendor=ibm
- ;;
- -ptx*)
- vendor=sequent
- ;;
- -tpf*)
- vendor=ibm
- ;;
- -vxsim* | -vxworks* | -windiss*)
- vendor=wrs
- ;;
- -aux*)
- vendor=apple
- ;;
- -hms*)
- vendor=hitachi
- ;;
- -mpw* | -macos*)
- vendor=apple
- ;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- vendor=atari
- ;;
- -vos*)
- vendor=stratus
- ;;
- esac
- basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
- ;;
-esac
-
-echo $basic_machine$os
-exit
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/scripts/training/MGIZA/configure b/scripts/training/MGIZA/configure
deleted file mode 100755
index 2d5ca3d..0000000
--- a/scripts/training/MGIZA/configure
+++ /dev/null
Binary files differ
diff --git a/scripts/training/MGIZA/configure.ac b/scripts/training/MGIZA/configure.ac
deleted file mode 100644
index f4696d0..0000000
--- a/scripts/training/MGIZA/configure.ac
+++ /dev/null
@@ -1,28 +0,0 @@
-dnl Process this file with autoconf to produce a configure script.
-dnl Created by Anjuta application wizard.
-
-AC_INIT(mgiza, 1.0)
-
-AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
-AM_CONFIG_HEADER(config.h)
-AM_MAINTAINER_MODE
-
-AC_ISC_POSIX
-AC_PROG_CXX
-AM_PROG_CC_STDC
-AC_HEADER_STDC
-
-
-
-AC_PROG_RANLIB
-AM_PROG_LIBTOOL
-AC_PROG_LIBTOOL
-
-
-
-
-AC_OUTPUT([
-Makefile
-src/Makefile
-src/mkcls/Makefile
-])
diff --git a/scripts/training/MGIZA/depcomp b/scripts/training/MGIZA/depcomp
deleted file mode 100755
index e5f9736..0000000
--- a/scripts/training/MGIZA/depcomp
+++ /dev/null
@@ -1,589 +0,0 @@
-#! /bin/sh
-# depcomp - compile a program generating dependencies as side-effects
-
-scriptversion=2007-03-29.01
-
-# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007 Free Software
-# Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-# 02110-1301, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
-
-case $1 in
- '')
- echo "$0: No command. Try \`$0 --help' for more information." 1>&2
- exit 1;
- ;;
- -h | --h*)
- cat <<\EOF
-Usage: depcomp [--help] [--version] PROGRAM [ARGS]
-
-Run PROGRAMS ARGS to compile a file, generating dependencies
-as side-effects.
-
-Environment variables:
- depmode Dependency tracking mode.
- source Source file read by `PROGRAMS ARGS'.
- object Object file output by `PROGRAMS ARGS'.
- DEPDIR directory where to store dependencies.
- depfile Dependency file to output.
- tmpdepfile Temporary file to use when outputing dependencies.
- libtool Whether libtool is used (yes/no).
-
-Report bugs to <bug-automake@gnu.org>.
-EOF
- exit $?
- ;;
- -v | --v*)
- echo "depcomp $scriptversion"
- exit $?
- ;;
-esac
-
-if test -z "$depmode" || test -z "$source" || test -z "$object"; then
- echo "depcomp: Variables source, object and depmode must be set" 1>&2
- exit 1
-fi
-
-# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
-depfile=${depfile-`echo "$object" |
- sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
-tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
-
-rm -f "$tmpdepfile"
-
-# Some modes work just like other modes, but use different flags. We
-# parameterize here, but still list the modes in the big case below,
-# to make depend.m4 easier to write. Note that we *cannot* use a case
-# here, because this file can only contain one case statement.
-if test "$depmode" = hp; then
- # HP compiler uses -M and no extra arg.
- gccflag=-M
- depmode=gcc
-fi
-
-if test "$depmode" = dashXmstdout; then
- # This is just like dashmstdout with a different argument.
- dashmflag=-xM
- depmode=dashmstdout
-fi
-
-case "$depmode" in
-gcc3)
-## gcc 3 implements dependency tracking that does exactly what
-## we want. Yay! Note: for some reason libtool 1.4 doesn't like
-## it if -MD -MP comes after the -MF stuff. Hmm.
-## Unfortunately, FreeBSD c89 acceptance of flags depends upon
-## the command line argument order; so add the flags where they
-## appear in depend2.am. Note that the slowdown incurred here
-## affects only configure: in makefiles, %FASTDEP% shortcuts this.
- for arg
- do
- case $arg in
- -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
- *) set fnord "$@" "$arg" ;;
- esac
- shift # fnord
- shift # $arg
- done
- "$@"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- mv "$tmpdepfile" "$depfile"
- ;;
-
-gcc)
-## There are various ways to get dependency output from gcc. Here's
-## why we pick this rather obscure method:
-## - Don't want to use -MD because we'd like the dependencies to end
-## up in a subdir. Having to rename by hand is ugly.
-## (We might end up doing this anyway to support other compilers.)
-## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
-## -MM, not -M (despite what the docs say).
-## - Using -M directly means running the compiler twice (even worse
-## than renaming).
- if test -z "$gccflag"; then
- gccflag=-MD,
- fi
- "$@" -Wp,"$gccflag$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
-## The second -e expression handles DOS-style file names with drive letters.
- sed -e 's/^[^:]*: / /' \
- -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
-## This next piece of magic avoids the `deleted header file' problem.
-## The problem is that when a header file which appears in a .P file
-## is deleted, the dependency causes make to die (because there is
-## typically no way to rebuild the header). We avoid this by adding
-## dummy dependencies for each header file. Too bad gcc doesn't do
-## this for us directly.
- tr ' ' '
-' < "$tmpdepfile" |
-## Some versions of gcc put a space before the `:'. On the theory
-## that the space means something, we add a space to the output as
-## well.
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-hp)
- # This case exists only to let depend.m4 do its work. It works by
- # looking at the text of this script. This case will never be run,
- # since it is checked for above.
- exit 1
- ;;
-
-sgi)
- if test "$libtool" = yes; then
- "$@" "-Wp,-MDupdate,$tmpdepfile"
- else
- "$@" -MDupdate "$tmpdepfile"
- fi
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
-
- if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
- echo "$object : \\" > "$depfile"
-
- # Clip off the initial element (the dependent). Don't try to be
- # clever and replace this with sed code, as IRIX sed won't handle
- # lines with more than a fixed number of characters (4096 in
- # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
- # the IRIX cc adds comments like `#:fec' to the end of the
- # dependency line.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
- tr '
-' ' ' >> $depfile
- echo >> $depfile
-
- # The second pass generates a dummy entry for each header file.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
- >> $depfile
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-aix)
- # The C for AIX Compiler uses -M and outputs the dependencies
- # in a .u file. In older versions, this file always lives in the
- # current directory. Also, the AIX compiler puts `$object:' at the
- # start of each line; $object doesn't have directory information.
- # Version 6 uses the directory in both cases.
- dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
- test "x$dir" = "x$object" && dir=
- base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
- if test "$libtool" = yes; then
- tmpdepfile1=$dir$base.u
- tmpdepfile2=$base.u
- tmpdepfile3=$dir.libs/$base.u
- "$@" -Wc,-M
- else
- tmpdepfile1=$dir$base.u
- tmpdepfile2=$dir$base.u
- tmpdepfile3=$dir$base.u
- "$@" -M
- fi
- stat=$?
-
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
- exit $stat
- fi
-
- for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
- do
- test -f "$tmpdepfile" && break
- done
- if test -f "$tmpdepfile"; then
- # Each line is of the form `foo.o: dependent.h'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
- # That's a tab and a space in the [].
- sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-icc)
- # Intel's C compiler understands `-MD -MF file'. However on
- # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
- # ICC 7.0 will fill foo.d with something like
- # foo.o: sub/foo.c
- # foo.o: sub/foo.h
- # which is wrong. We want:
- # sub/foo.o: sub/foo.c
- # sub/foo.o: sub/foo.h
- # sub/foo.c:
- # sub/foo.h:
- # ICC 7.1 will output
- # foo.o: sub/foo.c sub/foo.h
- # and will wrap long lines using \ :
- # foo.o: sub/foo.c ... \
- # sub/foo.h ... \
- # ...
-
- "$@" -MD -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- # Each line is of the form `foo.o: dependent.h',
- # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
- # Some versions of the HPUX 10.20 sed can't process this invocation
- # correctly. Breaking it into two sed invocations is a workaround.
- sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
- sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-hp2)
- # The "hp" stanza above does not work with aCC (C++) and HP's ia64
- # compilers, which have integrated preprocessors. The correct option
- # to use with these is +Maked; it writes dependencies to a file named
- # 'foo.d', which lands next to the object file, wherever that
- # happens to be.
- # Much of this is similar to the tru64 case; see comments there.
- dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
- test "x$dir" = "x$object" && dir=
- base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
- if test "$libtool" = yes; then
- tmpdepfile1=$dir$base.d
- tmpdepfile2=$dir.libs/$base.d
- "$@" -Wc,+Maked
- else
- tmpdepfile1=$dir$base.d
- tmpdepfile2=$dir$base.d
- "$@" +Maked
- fi
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile1" "$tmpdepfile2"
- exit $stat
- fi
-
- for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
- do
- test -f "$tmpdepfile" && break
- done
- if test -f "$tmpdepfile"; then
- sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
- # Add `dependent.h:' lines.
- sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile"
- else
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile" "$tmpdepfile2"
- ;;
-
-tru64)
- # The Tru64 compiler uses -MD to generate dependencies as a side
- # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
- # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
- # dependencies in `foo.d' instead, so we check for that too.
- # Subdirectories are respected.
- dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
- test "x$dir" = "x$object" && dir=
- base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
-
- if test "$libtool" = yes; then
- # With Tru64 cc, shared objects can also be used to make a
- # static library. This mechanism is used in libtool 1.4 series to
- # handle both shared and static libraries in a single compilation.
- # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
- #
- # With libtool 1.5 this exception was removed, and libtool now
- # generates 2 separate objects for the 2 libraries. These two
- # compilations output dependencies in $dir.libs/$base.o.d and
- # in $dir$base.o.d. We have to check for both files, because
- # one of the two compilations can be disabled. We should prefer
- # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
- # automatically cleaned when .libs/ is deleted, while ignoring
- # the former would cause a distcleancheck panic.
- tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
- tmpdepfile2=$dir$base.o.d # libtool 1.5
- tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
- tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
- "$@" -Wc,-MD
- else
- tmpdepfile1=$dir$base.o.d
- tmpdepfile2=$dir$base.d
- tmpdepfile3=$dir$base.d
- tmpdepfile4=$dir$base.d
- "$@" -MD
- fi
-
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
- exit $stat
- fi
-
- for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
- do
- test -f "$tmpdepfile" && break
- done
- if test -f "$tmpdepfile"; then
- sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
- # That's a tab and a space in the [].
- sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
- else
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-#nosideeffect)
- # This comment above is used by automake to tell side-effect
- # dependency tracking mechanisms from slower ones.
-
-dashmstdout)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout, regardless of -o.
- "$@" || exit $?
-
- # Remove the call to Libtool.
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
-
- # Remove `-o $object'.
- IFS=" "
- for arg
- do
- case $arg in
- -o)
- shift
- ;;
- $object)
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift # fnord
- shift # $arg
- ;;
- esac
- done
-
- test -z "$dashmflag" && dashmflag=-M
- # Require at least two characters before searching for `:'
- # in the target name. This is to cope with DOS-style filenames:
- # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
- "$@" $dashmflag |
- sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- tr ' ' '
-' < "$tmpdepfile" | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-dashXmstdout)
- # This case only exists to satisfy depend.m4. It is never actually
- # run, as this mode is specially recognized in the preamble.
- exit 1
- ;;
-
-makedepend)
- "$@" || exit $?
- # Remove any Libtool call
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
- # X makedepend
- shift
- cleared=no
- for arg in "$@"; do
- case $cleared in
- no)
- set ""; shift
- cleared=yes ;;
- esac
- case "$arg" in
- -D*|-I*)
- set fnord "$@" "$arg"; shift ;;
- # Strip any option that makedepend may not understand. Remove
- # the object too, otherwise makedepend will parse it as a source file.
- -*|$object)
- ;;
- *)
- set fnord "$@" "$arg"; shift ;;
- esac
- done
- obj_suffix="`echo $object | sed 's/^.*\././'`"
- touch "$tmpdepfile"
- ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- sed '1,2d' "$tmpdepfile" | tr ' ' '
-' | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile" "$tmpdepfile".bak
- ;;
-
-cpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout.
- "$@" || exit $?
-
- # Remove the call to Libtool.
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
-
- # Remove `-o $object'.
- IFS=" "
- for arg
- do
- case $arg in
- -o)
- shift
- ;;
- $object)
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift # fnord
- shift # $arg
- ;;
- esac
- done
-
- "$@" -E |
- sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
- -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
- sed '$ s: \\$::' > "$tmpdepfile"
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- cat < "$tmpdepfile" >> "$depfile"
- sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-msvisualcpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout, regardless of -o,
- # because we must use -o when running libtool.
- "$@" || exit $?
- IFS=" "
- for arg
- do
- case "$arg" in
- "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
- set fnord "$@"
- shift
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift
- shift
- ;;
- esac
- done
- "$@" -E |
- sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
- echo " " >> "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-none)
- exec "$@"
- ;;
-
-*)
- echo "Unknown depmode $depmode" 1>&2
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local Variables:
-# mode: shell-script
-# sh-indentation: 2
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/scripts/training/MGIZA/install-sh b/scripts/training/MGIZA/install-sh
deleted file mode 100755
index a5897de..0000000
--- a/scripts/training/MGIZA/install-sh
+++ /dev/null
@@ -1,519 +0,0 @@
-#!/bin/sh
-# install - install a program, script, or datafile
-
-scriptversion=2006-12-25.00
-
-# This originates from X11R5 (mit/util/scripts/install.sh), which was
-# later released in X11R6 (xc/config/util/install.sh) with the
-# following copyright and license.
-#
-# Copyright (C) 1994 X Consortium
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
-# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
-# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# Except as contained in this notice, the name of the X Consortium shall not
-# be used in advertising or otherwise to promote the sale, use or other deal-
-# ings in this Software without prior written authorization from the X Consor-
-# tium.
-#
-#
-# FSF changes to this file are in the public domain.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch.
-
-nl='
-'
-IFS=" "" $nl"
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit=${DOITPROG-}
-if test -z "$doit"; then
- doit_exec=exec
-else
- doit_exec=$doit
-fi
-
-# Put in absolute file names if you don't have them in your path;
-# or use environment vars.
-
-chgrpprog=${CHGRPPROG-chgrp}
-chmodprog=${CHMODPROG-chmod}
-chownprog=${CHOWNPROG-chown}
-cmpprog=${CMPPROG-cmp}
-cpprog=${CPPROG-cp}
-mkdirprog=${MKDIRPROG-mkdir}
-mvprog=${MVPROG-mv}
-rmprog=${RMPROG-rm}
-stripprog=${STRIPPROG-strip}
-
-posix_glob='?'
-initialize_posix_glob='
- test "$posix_glob" != "?" || {
- if (set -f) 2>/dev/null; then
- posix_glob=
- else
- posix_glob=:
- fi
- }
-'
-
-posix_mkdir=
-
-# Desired mode of installed file.
-mode=0755
-
-chgrpcmd=
-chmodcmd=$chmodprog
-chowncmd=
-mvcmd=$mvprog
-rmcmd="$rmprog -f"
-stripcmd=
-
-src=
-dst=
-dir_arg=
-dst_arg=
-
-copy_on_change=false
-no_target_directory=
-
-usage="\
-Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
- or: $0 [OPTION]... SRCFILES... DIRECTORY
- or: $0 [OPTION]... -t DIRECTORY SRCFILES...
- or: $0 [OPTION]... -d DIRECTORIES...
-
-In the 1st form, copy SRCFILE to DSTFILE.
-In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
-In the 4th, create DIRECTORIES.
-
-Options:
- --help display this help and exit.
- --version display version info and exit.
-
- -c (ignored)
- -C install only if different (preserve the last data modification time)
- -d create directories instead of installing files.
- -g GROUP $chgrpprog installed files to GROUP.
- -m MODE $chmodprog installed files to MODE.
- -o USER $chownprog installed files to USER.
- -s $stripprog installed files.
- -t DIRECTORY install into DIRECTORY.
- -T report an error if DSTFILE is a directory.
-
-Environment variables override the default commands:
- CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
- RMPROG STRIPPROG
-"
-
-while test $# -ne 0; do
- case $1 in
- -c) ;;
-
- -C) copy_on_change=true;;
-
- -d) dir_arg=true;;
-
- -g) chgrpcmd="$chgrpprog $2"
- shift;;
-
- --help) echo "$usage"; exit $?;;
-
- -m) mode=$2
- case $mode in
- *' '* | *' '* | *'
-'* | *'*'* | *'?'* | *'['*)
- echo "$0: invalid mode: $mode" >&2
- exit 1;;
- esac
- shift;;
-
- -o) chowncmd="$chownprog $2"
- shift;;
-
- -s) stripcmd=$stripprog;;
-
- -t) dst_arg=$2
- shift;;
-
- -T) no_target_directory=true;;
-
- --version) echo "$0 $scriptversion"; exit $?;;
-
- --) shift
- break;;
-
- -*) echo "$0: invalid option: $1" >&2
- exit 1;;
-
- *) break;;
- esac
- shift
-done
-
-if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
- # When -d is used, all remaining arguments are directories to create.
- # When -t is used, the destination is already specified.
- # Otherwise, the last argument is the destination. Remove it from $@.
- for arg
- do
- if test -n "$dst_arg"; then
- # $@ is not empty: it contains at least $arg.
- set fnord "$@" "$dst_arg"
- shift # fnord
- fi
- shift # arg
- dst_arg=$arg
- done
-fi
-
-if test $# -eq 0; then
- if test -z "$dir_arg"; then
- echo "$0: no input file specified." >&2
- exit 1
- fi
- # It's OK to call `install-sh -d' without argument.
- # This can happen when creating conditional directories.
- exit 0
-fi
-
-if test -z "$dir_arg"; then
- trap '(exit $?); exit' 1 2 13 15
-
- # Set umask so as not to create temps with too-generous modes.
- # However, 'strip' requires both read and write access to temps.
- case $mode in
- # Optimize common cases.
- *644) cp_umask=133;;
- *755) cp_umask=22;;
-
- *[0-7])
- if test -z "$stripcmd"; then
- u_plus_rw=
- else
- u_plus_rw='% 200'
- fi
- cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
- *)
- if test -z "$stripcmd"; then
- u_plus_rw=
- else
- u_plus_rw=,u+rw
- fi
- cp_umask=$mode$u_plus_rw;;
- esac
-fi
-
-for src
-do
- # Protect names starting with `-'.
- case $src in
- -*) src=./$src;;
- esac
-
- if test -n "$dir_arg"; then
- dst=$src
- dstdir=$dst
- test -d "$dstdir"
- dstdir_status=$?
- else
-
- # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
- # might cause directories to be created, which would be especially bad
- # if $src (and thus $dsttmp) contains '*'.
- if test ! -f "$src" && test ! -d "$src"; then
- echo "$0: $src does not exist." >&2
- exit 1
- fi
-
- if test -z "$dst_arg"; then
- echo "$0: no destination specified." >&2
- exit 1
- fi
-
- dst=$dst_arg
- # Protect names starting with `-'.
- case $dst in
- -*) dst=./$dst;;
- esac
-
- # If destination is a directory, append the input filename; won't work
- # if double slashes aren't ignored.
- if test -d "$dst"; then
- if test -n "$no_target_directory"; then
- echo "$0: $dst_arg: Is a directory" >&2
- exit 1
- fi
- dstdir=$dst
- dst=$dstdir/`basename "$src"`
- dstdir_status=0
- else
- # Prefer dirname, but fall back on a substitute if dirname fails.
- dstdir=`
- (dirname "$dst") 2>/dev/null ||
- expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$dst" : 'X\(//\)[^/]' \| \
- X"$dst" : 'X\(//\)$' \| \
- X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
- echo X"$dst" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'
- `
-
- test -d "$dstdir"
- dstdir_status=$?
- fi
- fi
-
- obsolete_mkdir_used=false
-
- if test $dstdir_status != 0; then
- case $posix_mkdir in
- '')
- # Create intermediate dirs using mode 755 as modified by the umask.
- # This is like FreeBSD 'install' as of 1997-10-28.
- umask=`umask`
- case $stripcmd.$umask in
- # Optimize common cases.
- *[2367][2367]) mkdir_umask=$umask;;
- .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
-
- *[0-7])
- mkdir_umask=`expr $umask + 22 \
- - $umask % 100 % 40 + $umask % 20 \
- - $umask % 10 % 4 + $umask % 2
- `;;
- *) mkdir_umask=$umask,go-w;;
- esac
-
- # With -d, create the new directory with the user-specified mode.
- # Otherwise, rely on $mkdir_umask.
- if test -n "$dir_arg"; then
- mkdir_mode=-m$mode
- else
- mkdir_mode=
- fi
-
- posix_mkdir=false
- case $umask in
- *[123567][0-7][0-7])
- # POSIX mkdir -p sets u+wx bits regardless of umask, which
- # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
- ;;
- *)
- tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
- trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
-
- if (umask $mkdir_umask &&
- exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
- then
- if test -z "$dir_arg" || {
- # Check for POSIX incompatibilities with -m.
- # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
- # other-writeable bit of parent directory when it shouldn't.
- # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
- ls_ld_tmpdir=`ls -ld "$tmpdir"`
- case $ls_ld_tmpdir in
- d????-?r-*) different_mode=700;;
- d????-?--*) different_mode=755;;
- *) false;;
- esac &&
- $mkdirprog -m$different_mode -p -- "$tmpdir" && {
- ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
- test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
- }
- }
- then posix_mkdir=:
- fi
- rmdir "$tmpdir/d" "$tmpdir"
- else
- # Remove any dirs left behind by ancient mkdir implementations.
- rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
- fi
- trap '' 0;;
- esac;;
- esac
-
- if
- $posix_mkdir && (
- umask $mkdir_umask &&
- $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
- )
- then :
- else
-
- # The umask is ridiculous, or mkdir does not conform to POSIX,
- # or it failed possibly due to a race condition. Create the
- # directory the slow way, step by step, checking for races as we go.
-
- case $dstdir in
- /*) prefix='/';;
- -*) prefix='./';;
- *) prefix='';;
- esac
-
- eval "$initialize_posix_glob"
-
- oIFS=$IFS
- IFS=/
- $posix_glob set -f
- set fnord $dstdir
- shift
- $posix_glob set +f
- IFS=$oIFS
-
- prefixes=
-
- for d
- do
- test -z "$d" && continue
-
- prefix=$prefix$d
- if test -d "$prefix"; then
- prefixes=
- else
- if $posix_mkdir; then
- (umask=$mkdir_umask &&
- $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
- # Don't fail if two instances are running concurrently.
- test -d "$prefix" || exit 1
- else
- case $prefix in
- *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
- *) qprefix=$prefix;;
- esac
- prefixes="$prefixes '$qprefix'"
- fi
- fi
- prefix=$prefix/
- done
-
- if test -n "$prefixes"; then
- # Don't fail if two instances are running concurrently.
- (umask $mkdir_umask &&
- eval "\$doit_exec \$mkdirprog $prefixes") ||
- test -d "$dstdir" || exit 1
- obsolete_mkdir_used=true
- fi
- fi
- fi
-
- if test -n "$dir_arg"; then
- { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
- { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
- { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
- test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
- else
-
- # Make a couple of temp file names in the proper directory.
- dsttmp=$dstdir/_inst.$$_
- rmtmp=$dstdir/_rm.$$_
-
- # Trap to clean up those temp files at exit.
- trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
-
- # Copy the file name to the temp name.
- (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
-
- # and set any options; do chmod last to preserve setuid bits.
- #
- # If any of these fail, we abort the whole thing. If we want to
- # ignore errors from any of these, just make sure not to ignore
- # errors from the above "$doit $cpprog $src $dsttmp" command.
- #
- { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
- { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
- { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
- { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
-
- # If -C, don't bother to copy if it wouldn't change the file.
- if $copy_on_change &&
- old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
- new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
-
- eval "$initialize_posix_glob" &&
- $posix_glob set -f &&
- set X $old && old=:$2:$4:$5:$6 &&
- set X $new && new=:$2:$4:$5:$6 &&
- $posix_glob set +f &&
-
- test "$old" = "$new" &&
- $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
- then
- rm -f "$dsttmp"
- else
- # Rename the file to the real destination.
- $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
-
- # The rename failed, perhaps because mv can't rename something else
- # to itself, or perhaps because mv is so ancient that it does not
- # support -f.
- {
- # Now remove or move aside any old file at destination location.
- # We try this two ways since rm can't unlink itself on some
- # systems and the destination file might be busy for other
- # reasons. In this case, the final cleanup might fail but the new
- # file should still install successfully.
- {
- test ! -f "$dst" ||
- $doit $rmcmd -f "$dst" 2>/dev/null ||
- { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
- { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
- } ||
- { echo "$0: cannot unlink or rename $dst" >&2
- (exit 1); exit 1
- }
- } &&
-
- # Now rename the file to the real destination.
- $doit $mvcmd "$dsttmp" "$dst"
- }
- fi || exit 1
-
- trap '' 0
- fi
-done
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/scripts/training/MGIZA/ltmain.sh b/scripts/training/MGIZA/ltmain.sh
deleted file mode 100644
index 3506ead..0000000
--- a/scripts/training/MGIZA/ltmain.sh
+++ /dev/null
@@ -1,8413 +0,0 @@
-# Generated from ltmain.m4sh.
-
-# ltmain.sh (GNU libtool) 2.2.6
-# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007 2008 Free Software Foundation, Inc.
-# This is free software; see the source for copying conditions. There is NO
-# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
-# GNU Libtool is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# As a special exception to the GNU General Public License,
-# if you distribute this file as part of a program or library that
-# is built using GNU Libtool, you may include this file under the
-# same distribution terms that you use for the rest of that program.
-#
-# GNU Libtool is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Libtool; see the file COPYING. If not, a copy
-# can be downloaded from http://www.gnu.org/licenses/gpl.html,
-# or obtained by writing to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# Usage: $progname [OPTION]... [MODE-ARG]...
-#
-# Provide generalized library-building support services.
-#
-# --config show all configuration variables
-# --debug enable verbose shell tracing
-# -n, --dry-run display commands without modifying any files
-# --features display basic configuration information and exit
-# --mode=MODE use operation mode MODE
-# --preserve-dup-deps don't remove duplicate dependency libraries
-# --quiet, --silent don't print informational messages
-# --tag=TAG use configuration variables from tag TAG
-# -v, --verbose print informational messages (default)
-# --version print version information
-# -h, --help print short or long help message
-#
-# MODE must be one of the following:
-#
-# clean remove files from the build directory
-# compile compile a source file into a libtool object
-# execute automatically set library path, then run a program
-# finish complete the installation of libtool libraries
-# install install libraries or executables
-# link create a library or an executable
-# uninstall remove libraries from an installed directory
-#
-# MODE-ARGS vary depending on the MODE.
-# Try `$progname --help --mode=MODE' for a more detailed description of MODE.
-#
-# When reporting a bug, please describe a test case to reproduce it and
-# include the following information:
-#
-# host-triplet: $host
-# shell: $SHELL
-# compiler: $LTCC
-# compiler flags: $LTCFLAGS
-# linker: $LD (gnu? $with_gnu_ld)
-# $progname: (GNU libtool) 2.2.6 Debian-2.2.6a-4
-# automake: $automake_version
-# autoconf: $autoconf_version
-#
-# Report bugs to <bug-libtool@gnu.org>.
-
-PROGRAM=ltmain.sh
-PACKAGE=libtool
-VERSION="2.2.6 Debian-2.2.6a-4"
-TIMESTAMP=""
-package_revision=1.3012
-
-# Be Bourne compatible
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
- setopt NO_GLOB_SUBST
-else
- case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
-fi
-BIN_SH=xpg4; export BIN_SH # for Tru64
-DUALCASE=1; export DUALCASE # for MKS sh
-
-# NLS nuisances: We save the old values to restore during execute mode.
-# Only set LANG and LC_ALL to C if already set.
-# These must not be set unconditionally because not all systems understand
-# e.g. LANG=C (notably SCO).
-lt_user_locale=
-lt_safe_locale=
-for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
-do
- eval "if test \"\${$lt_var+set}\" = set; then
- save_$lt_var=\$$lt_var
- $lt_var=C
- export $lt_var
- lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\"
- lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\"
- fi"
-done
-
-$lt_unset CDPATH
-
-
-
-
-
-: ${CP="cp -f"}
-: ${ECHO="echo"}
-: ${EGREP="/bin/grep -E"}
-: ${FGREP="/bin/grep -F"}
-: ${GREP="/bin/grep"}
-: ${LN_S="ln -s"}
-: ${MAKE="make"}
-: ${MKDIR="mkdir"}
-: ${MV="mv -f"}
-: ${RM="rm -f"}
-: ${SED="/bin/sed"}
-: ${SHELL="${CONFIG_SHELL-/bin/sh}"}
-: ${Xsed="$SED -e 1s/^X//"}
-
-# Global variables:
-EXIT_SUCCESS=0
-EXIT_FAILURE=1
-EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing.
-EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake.
-
-exit_status=$EXIT_SUCCESS
-
-# Make sure IFS has a sensible default
-lt_nl='
-'
-IFS=" $lt_nl"
-
-dirname="s,/[^/]*$,,"
-basename="s,^.*/,,"
-
-# func_dirname_and_basename file append nondir_replacement
-# perform func_basename and func_dirname in a single function
-# call:
-# dirname: Compute the dirname of FILE. If nonempty,
-# add APPEND to the result, otherwise set result
-# to NONDIR_REPLACEMENT.
-# value returned in "$func_dirname_result"
-# basename: Compute filename of FILE.
-# value retuned in "$func_basename_result"
-# Implementation must be kept synchronized with func_dirname
-# and func_basename. For efficiency, we do not delegate to
-# those functions but instead duplicate the functionality here.
-func_dirname_and_basename ()
-{
- # Extract subdirectory from the argument.
- func_dirname_result=`$ECHO "X${1}" | $Xsed -e "$dirname"`
- if test "X$func_dirname_result" = "X${1}"; then
- func_dirname_result="${3}"
- else
- func_dirname_result="$func_dirname_result${2}"
- fi
- func_basename_result=`$ECHO "X${1}" | $Xsed -e "$basename"`
-}
-
-# Generated shell functions inserted here.
-
-# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh
-# is ksh but when the shell is invoked as "sh" and the current value of
-# the _XPG environment variable is not equal to 1 (one), the special
-# positional parameter $0, within a function call, is the name of the
-# function.
-progpath="$0"
-
-# The name of this program:
-# In the unlikely event $progname began with a '-', it would play havoc with
-# func_echo (imagine progname=-n), so we prepend ./ in that case:
-func_dirname_and_basename "$progpath"
-progname=$func_basename_result
-case $progname in
- -*) progname=./$progname ;;
-esac
-
-# Make sure we have an absolute path for reexecution:
-case $progpath in
- [\\/]*|[A-Za-z]:\\*) ;;
- *[\\/]*)
- progdir=$func_dirname_result
- progdir=`cd "$progdir" && pwd`
- progpath="$progdir/$progname"
- ;;
- *)
- save_IFS="$IFS"
- IFS=:
- for progdir in $PATH; do
- IFS="$save_IFS"
- test -x "$progdir/$progname" && break
- done
- IFS="$save_IFS"
- test -n "$progdir" || progdir=`pwd`
- progpath="$progdir/$progname"
- ;;
-esac
-
-# Sed substitution that helps us do robust quoting. It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed="${SED}"' -e 1s/^X//'
-sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\(["`\\]\)/\\\1/g'
-
-# Re-`\' parameter expansions in output of double_quote_subst that were
-# `\'-ed in input to the same. If an odd number of `\' preceded a '$'
-# in input to double_quote_subst, that '$' was protected from expansion.
-# Since each input `\' is now two `\'s, look for any number of runs of
-# four `\'s followed by two `\'s and then a '$'. `\' that '$'.
-bs='\\'
-bs2='\\\\'
-bs4='\\\\\\\\'
-dollar='\$'
-sed_double_backslash="\
- s/$bs4/&\\
-/g
- s/^$bs2$dollar/$bs&/
- s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g
- s/\n//g"
-
-# Standard options:
-opt_dry_run=false
-opt_help=false
-opt_quiet=false
-opt_verbose=false
-opt_warning=:
-
-# func_echo arg...
-# Echo program name prefixed message, along with the current mode
-# name if it has been set yet.
-func_echo ()
-{
- $ECHO "$progname${mode+: }$mode: $*"
-}
-
-# func_verbose arg...
-# Echo program name prefixed message in verbose mode only.
-func_verbose ()
-{
- $opt_verbose && func_echo ${1+"$@"}
-
- # A bug in bash halts the script if the last line of a function
- # fails when set -e is in force, so we need another command to
- # work around that:
- :
-}
-
-# func_error arg...
-# Echo program name prefixed message to standard error.
-func_error ()
-{
- $ECHO "$progname${mode+: }$mode: "${1+"$@"} 1>&2
-}
-
-# func_warning arg...
-# Echo program name prefixed warning message to standard error.
-func_warning ()
-{
- $opt_warning && $ECHO "$progname${mode+: }$mode: warning: "${1+"$@"} 1>&2
-
- # bash bug again:
- :
-}
-
-# func_fatal_error arg...
-# Echo program name prefixed message to standard error, and exit.
-func_fatal_error ()
-{
- func_error ${1+"$@"}
- exit $EXIT_FAILURE
-}
-
-# func_fatal_help arg...
-# Echo program name prefixed message to standard error, followed by
-# a help hint, and exit.
-func_fatal_help ()
-{
- func_error ${1+"$@"}
- func_fatal_error "$help"
-}
-help="Try \`$progname --help' for more information." ## default
-
-
-# func_grep expression filename
-# Check whether EXPRESSION matches any line of FILENAME, without output.
-func_grep ()
-{
- $GREP "$1" "$2" >/dev/null 2>&1
-}
-
-
-# func_mkdir_p directory-path
-# Make sure the entire path to DIRECTORY-PATH is available.
-func_mkdir_p ()
-{
- my_directory_path="$1"
- my_dir_list=
-
- if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then
-
- # Protect directory names starting with `-'
- case $my_directory_path in
- -*) my_directory_path="./$my_directory_path" ;;
- esac
-
- # While some portion of DIR does not yet exist...
- while test ! -d "$my_directory_path"; do
- # ...make a list in topmost first order. Use a colon delimited
- # list incase some portion of path contains whitespace.
- my_dir_list="$my_directory_path:$my_dir_list"
-
- # If the last portion added has no slash in it, the list is done
- case $my_directory_path in */*) ;; *) break ;; esac
-
- # ...otherwise throw away the child directory and loop
- my_directory_path=`$ECHO "X$my_directory_path" | $Xsed -e "$dirname"`
- done
- my_dir_list=`$ECHO "X$my_dir_list" | $Xsed -e 's,:*$,,'`
-
- save_mkdir_p_IFS="$IFS"; IFS=':'
- for my_dir in $my_dir_list; do
- IFS="$save_mkdir_p_IFS"
- # mkdir can fail with a `File exist' error if two processes
- # try to create one of the directories concurrently. Don't
- # stop in that case!
- $MKDIR "$my_dir" 2>/dev/null || :
- done
- IFS="$save_mkdir_p_IFS"
-
- # Bail out if we (or some other process) failed to create a directory.
- test -d "$my_directory_path" || \
- func_fatal_error "Failed to create \`$1'"
- fi
-}
-
-
-# func_mktempdir [string]
-# Make a temporary directory that won't clash with other running
-# libtool processes, and avoids race conditions if possible. If
-# given, STRING is the basename for that directory.
-func_mktempdir ()
-{
- my_template="${TMPDIR-/tmp}/${1-$progname}"
-
- if test "$opt_dry_run" = ":"; then
- # Return a directory name, but don't create it in dry-run mode
- my_tmpdir="${my_template}-$$"
- else
-
- # If mktemp works, use that first and foremost
- my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null`
-
- if test ! -d "$my_tmpdir"; then
- # Failing that, at least try and use $RANDOM to avoid a race
- my_tmpdir="${my_template}-${RANDOM-0}$$"
-
- save_mktempdir_umask=`umask`
- umask 0077
- $MKDIR "$my_tmpdir"
- umask $save_mktempdir_umask
- fi
-
- # If we're not in dry-run mode, bomb out on failure
- test -d "$my_tmpdir" || \
- func_fatal_error "cannot create temporary directory \`$my_tmpdir'"
- fi
-
- $ECHO "X$my_tmpdir" | $Xsed
-}
-
-
-# func_quote_for_eval arg
-# Aesthetically quote ARG to be evaled later.
-# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT
-# is double-quoted, suitable for a subsequent eval, whereas
-# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters
-# which are still active within double quotes backslashified.
-func_quote_for_eval ()
-{
- case $1 in
- *[\\\`\"\$]*)
- func_quote_for_eval_unquoted_result=`$ECHO "X$1" | $Xsed -e "$sed_quote_subst"` ;;
- *)
- func_quote_for_eval_unquoted_result="$1" ;;
- esac
-
- case $func_quote_for_eval_unquoted_result in
- # Double-quote args containing shell metacharacters to delay
- # word splitting, command substitution and and variable
- # expansion for a subsequent eval.
- # Many Bourne shells cannot handle close brackets correctly
- # in scan sets, so we specify it separately.
- *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
- func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\""
- ;;
- *)
- func_quote_for_eval_result="$func_quote_for_eval_unquoted_result"
- esac
-}
-
-
-# func_quote_for_expand arg
-# Aesthetically quote ARG to be evaled later; same as above,
-# but do not quote variable references.
-func_quote_for_expand ()
-{
- case $1 in
- *[\\\`\"]*)
- my_arg=`$ECHO "X$1" | $Xsed \
- -e "$double_quote_subst" -e "$sed_double_backslash"` ;;
- *)
- my_arg="$1" ;;
- esac
-
- case $my_arg in
- # Double-quote args containing shell metacharacters to delay
- # word splitting and command substitution for a subsequent eval.
- # Many Bourne shells cannot handle close brackets correctly
- # in scan sets, so we specify it separately.
- *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
- my_arg="\"$my_arg\""
- ;;
- esac
-
- func_quote_for_expand_result="$my_arg"
-}
-
-
-# func_show_eval cmd [fail_exp]
-# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is
-# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP
-# is given, then evaluate it.
-func_show_eval ()
-{
- my_cmd="$1"
- my_fail_exp="${2-:}"
-
- ${opt_silent-false} || {
- func_quote_for_expand "$my_cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
-
- if ${opt_dry_run-false}; then :; else
- eval "$my_cmd"
- my_status=$?
- if test "$my_status" -eq 0; then :; else
- eval "(exit $my_status); $my_fail_exp"
- fi
- fi
-}
-
-
-# func_show_eval_locale cmd [fail_exp]
-# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is
-# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP
-# is given, then evaluate it. Use the saved locale for evaluation.
-func_show_eval_locale ()
-{
- my_cmd="$1"
- my_fail_exp="${2-:}"
-
- ${opt_silent-false} || {
- func_quote_for_expand "$my_cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
-
- if ${opt_dry_run-false}; then :; else
- eval "$lt_user_locale
- $my_cmd"
- my_status=$?
- eval "$lt_safe_locale"
- if test "$my_status" -eq 0; then :; else
- eval "(exit $my_status); $my_fail_exp"
- fi
- fi
-}
-
-
-
-
-
-# func_version
-# Echo version message to standard output and exit.
-func_version ()
-{
- $SED -n '/^# '$PROGRAM' (GNU /,/# warranty; / {
- s/^# //
- s/^# *$//
- s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/
- p
- }' < "$progpath"
- exit $?
-}
-
-# func_usage
-# Echo short help message to standard output and exit.
-func_usage ()
-{
- $SED -n '/^# Usage:/,/# -h/ {
- s/^# //
- s/^# *$//
- s/\$progname/'$progname'/
- p
- }' < "$progpath"
- $ECHO
- $ECHO "run \`$progname --help | more' for full usage"
- exit $?
-}
-
-# func_help
-# Echo long help message to standard output and exit.
-func_help ()
-{
- $SED -n '/^# Usage:/,/# Report bugs to/ {
- s/^# //
- s/^# *$//
- s*\$progname*'$progname'*
- s*\$host*'"$host"'*
- s*\$SHELL*'"$SHELL"'*
- s*\$LTCC*'"$LTCC"'*
- s*\$LTCFLAGS*'"$LTCFLAGS"'*
- s*\$LD*'"$LD"'*
- s/\$with_gnu_ld/'"$with_gnu_ld"'/
- s/\$automake_version/'"`(automake --version) 2>/dev/null |$SED 1q`"'/
- s/\$autoconf_version/'"`(autoconf --version) 2>/dev/null |$SED 1q`"'/
- p
- }' < "$progpath"
- exit $?
-}
-
-# func_missing_arg argname
-# Echo program name prefixed message to standard error and set global
-# exit_cmd.
-func_missing_arg ()
-{
- func_error "missing argument for $1"
- exit_cmd=exit
-}
-
-exit_cmd=:
-
-
-
-
-
-# Check that we have a working $ECHO.
-if test "X$1" = X--no-reexec; then
- # Discard the --no-reexec flag, and continue.
- shift
-elif test "X$1" = X--fallback-echo; then
- # Avoid inline document here, it may be left over
- :
-elif test "X`{ $ECHO '\t'; } 2>/dev/null`" = 'X\t'; then
- # Yippee, $ECHO works!
- :
-else
- # Restart under the correct shell, and then maybe $ECHO will work.
- exec $SHELL "$progpath" --no-reexec ${1+"$@"}
-fi
-
-if test "X$1" = X--fallback-echo; then
- # used as fallback echo
- shift
- cat <<EOF
-$*
-EOF
- exit $EXIT_SUCCESS
-fi
-
-magic="%%%MAGIC variable%%%"
-magic_exe="%%%MAGIC EXE variable%%%"
-
-# Global variables.
-# $mode is unset
-nonopt=
-execute_dlfiles=
-preserve_args=
-lo2o="s/\\.lo\$/.${objext}/"
-o2lo="s/\\.${objext}\$/.lo/"
-extracted_archives=
-extracted_serial=0
-
-opt_dry_run=false
-opt_duplicate_deps=false
-opt_silent=false
-opt_debug=:
-
-# If this variable is set in any of the actions, the command in it
-# will be execed at the end. This prevents here-documents from being
-# left over by shells.
-exec_cmd=
-
-# func_fatal_configuration arg...
-# Echo program name prefixed message to standard error, followed by
-# a configuration failure hint, and exit.
-func_fatal_configuration ()
-{
- func_error ${1+"$@"}
- func_error "See the $PACKAGE documentation for more information."
- func_fatal_error "Fatal configuration error."
-}
-
-
-# func_config
-# Display the configuration for all the tags in this script.
-func_config ()
-{
- re_begincf='^# ### BEGIN LIBTOOL'
- re_endcf='^# ### END LIBTOOL'
-
- # Default configuration.
- $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath"
-
- # Now print the configurations for the tags.
- for tagname in $taglist; do
- $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath"
- done
-
- exit $?
-}
-
-# func_features
-# Display the features supported by this script.
-func_features ()
-{
- $ECHO "host: $host"
- if test "$build_libtool_libs" = yes; then
- $ECHO "enable shared libraries"
- else
- $ECHO "disable shared libraries"
- fi
- if test "$build_old_libs" = yes; then
- $ECHO "enable static libraries"
- else
- $ECHO "disable static libraries"
- fi
-
- exit $?
-}
-
-# func_enable_tag tagname
-# Verify that TAGNAME is valid, and either flag an error and exit, or
-# enable the TAGNAME tag. We also add TAGNAME to the global $taglist
-# variable here.
-func_enable_tag ()
-{
- # Global variable:
- tagname="$1"
-
- re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$"
- re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$"
- sed_extractcf="/$re_begincf/,/$re_endcf/p"
-
- # Validate tagname.
- case $tagname in
- *[!-_A-Za-z0-9,/]*)
- func_fatal_error "invalid tag name: $tagname"
- ;;
- esac
-
- # Don't test for the "default" C tag, as we know it's
- # there but not specially marked.
- case $tagname in
- CC) ;;
- *)
- if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then
- taglist="$taglist $tagname"
-
- # Evaluate the configuration. Be careful to quote the path
- # and the sed script, to avoid splitting on whitespace, but
- # also don't use non-portable quotes within backquotes within
- # quotes we have to do it in 2 steps:
- extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"`
- eval "$extractedcf"
- else
- func_error "ignoring unknown tag $tagname"
- fi
- ;;
- esac
-}
-
-# Parse options once, thoroughly. This comes as soon as possible in
-# the script to make things like `libtool --version' happen quickly.
-{
-
- # Shorthand for --mode=foo, only valid as the first argument
- case $1 in
- clean|clea|cle|cl)
- shift; set dummy --mode clean ${1+"$@"}; shift
- ;;
- compile|compil|compi|comp|com|co|c)
- shift; set dummy --mode compile ${1+"$@"}; shift
- ;;
- execute|execut|execu|exec|exe|ex|e)
- shift; set dummy --mode execute ${1+"$@"}; shift
- ;;
- finish|finis|fini|fin|fi|f)
- shift; set dummy --mode finish ${1+"$@"}; shift
- ;;
- install|instal|insta|inst|ins|in|i)
- shift; set dummy --mode install ${1+"$@"}; shift
- ;;
- link|lin|li|l)
- shift; set dummy --mode link ${1+"$@"}; shift
- ;;
- uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u)
- shift; set dummy --mode uninstall ${1+"$@"}; shift
- ;;
- esac
-
- # Parse non-mode specific arguments:
- while test "$#" -gt 0; do
- opt="$1"
- shift
-
- case $opt in
- --config) func_config ;;
-
- --debug) preserve_args="$preserve_args $opt"
- func_echo "enabling shell trace mode"
- opt_debug='set -x'
- $opt_debug
- ;;
-
- -dlopen) test "$#" -eq 0 && func_missing_arg "$opt" && break
- execute_dlfiles="$execute_dlfiles $1"
- shift
- ;;
-
- --dry-run | -n) opt_dry_run=: ;;
- --features) func_features ;;
- --finish) mode="finish" ;;
-
- --mode) test "$#" -eq 0 && func_missing_arg "$opt" && break
- case $1 in
- # Valid mode arguments:
- clean) ;;
- compile) ;;
- execute) ;;
- finish) ;;
- install) ;;
- link) ;;
- relink) ;;
- uninstall) ;;
-
- # Catch anything else as an error
- *) func_error "invalid argument for $opt"
- exit_cmd=exit
- break
- ;;
- esac
-
- mode="$1"
- shift
- ;;
-
- --preserve-dup-deps)
- opt_duplicate_deps=: ;;
-
- --quiet|--silent) preserve_args="$preserve_args $opt"
- opt_silent=:
- ;;
-
- --verbose| -v) preserve_args="$preserve_args $opt"
- opt_silent=false
- ;;
-
- --tag) test "$#" -eq 0 && func_missing_arg "$opt" && break
- preserve_args="$preserve_args $opt $1"
- func_enable_tag "$1" # tagname is set here
- shift
- ;;
-
- # Separate optargs to long options:
- -dlopen=*|--mode=*|--tag=*)
- func_opt_split "$opt"
- set dummy "$func_opt_split_opt" "$func_opt_split_arg" ${1+"$@"}
- shift
- ;;
-
- -\?|-h) func_usage ;;
- --help) opt_help=: ;;
- --version) func_version ;;
-
- -*) func_fatal_help "unrecognized option \`$opt'" ;;
-
- *) nonopt="$opt"
- break
- ;;
- esac
- done
-
-
- case $host in
- *cygwin* | *mingw* | *pw32* | *cegcc*)
- # don't eliminate duplications in $postdeps and $predeps
- opt_duplicate_compiler_generated_deps=:
- ;;
- *)
- opt_duplicate_compiler_generated_deps=$opt_duplicate_deps
- ;;
- esac
-
- # Having warned about all mis-specified options, bail out if
- # anything was wrong.
- $exit_cmd $EXIT_FAILURE
-}
-
-# func_check_version_match
-# Ensure that we are using m4 macros, and libtool script from the same
-# release of libtool.
-func_check_version_match ()
-{
- if test "$package_revision" != "$macro_revision"; then
- if test "$VERSION" != "$macro_version"; then
- if test -z "$macro_version"; then
- cat >&2 <<_LT_EOF
-$progname: Version mismatch error. This is $PACKAGE $VERSION, but the
-$progname: definition of this LT_INIT comes from an older release.
-$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
-$progname: and run autoconf again.
-_LT_EOF
- else
- cat >&2 <<_LT_EOF
-$progname: Version mismatch error. This is $PACKAGE $VERSION, but the
-$progname: definition of this LT_INIT comes from $PACKAGE $macro_version.
-$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
-$progname: and run autoconf again.
-_LT_EOF
- fi
- else
- cat >&2 <<_LT_EOF
-$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision,
-$progname: but the definition of this LT_INIT comes from revision $macro_revision.
-$progname: You should recreate aclocal.m4 with macros from revision $package_revision
-$progname: of $PACKAGE $VERSION and run autoconf again.
-_LT_EOF
- fi
-
- exit $EXIT_MISMATCH
- fi
-}
-
-
-## ----------- ##
-## Main. ##
-## ----------- ##
-
-$opt_help || {
- # Sanity checks first:
- func_check_version_match
-
- if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
- func_fatal_configuration "not configured to build any kind of library"
- fi
-
- test -z "$mode" && func_fatal_error "error: you must specify a MODE."
-
-
- # Darwin sucks
- eval std_shrext=\"$shrext_cmds\"
-
-
- # Only execute mode is allowed to have -dlopen flags.
- if test -n "$execute_dlfiles" && test "$mode" != execute; then
- func_error "unrecognized option \`-dlopen'"
- $ECHO "$help" 1>&2
- exit $EXIT_FAILURE
- fi
-
- # Change the help message to a mode-specific one.
- generic_help="$help"
- help="Try \`$progname --help --mode=$mode' for more information."
-}
-
-
-# func_lalib_p file
-# True iff FILE is a libtool `.la' library or `.lo' object file.
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_lalib_p ()
-{
- test -f "$1" &&
- $SED -e 4q "$1" 2>/dev/null \
- | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1
-}
-
-# func_lalib_unsafe_p file
-# True iff FILE is a libtool `.la' library or `.lo' object file.
-# This function implements the same check as func_lalib_p without
-# resorting to external programs. To this end, it redirects stdin and
-# closes it afterwards, without saving the original file descriptor.
-# As a safety measure, use it only where a negative result would be
-# fatal anyway. Works if `file' does not exist.
-func_lalib_unsafe_p ()
-{
- lalib_p=no
- if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then
- for lalib_p_l in 1 2 3 4
- do
- read lalib_p_line
- case "$lalib_p_line" in
- \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;;
- esac
- done
- exec 0<&5 5<&-
- fi
- test "$lalib_p" = yes
-}
-
-# func_ltwrapper_script_p file
-# True iff FILE is a libtool wrapper script
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_ltwrapper_script_p ()
-{
- func_lalib_p "$1"
-}
-
-# func_ltwrapper_executable_p file
-# True iff FILE is a libtool wrapper executable
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_ltwrapper_executable_p ()
-{
- func_ltwrapper_exec_suffix=
- case $1 in
- *.exe) ;;
- *) func_ltwrapper_exec_suffix=.exe ;;
- esac
- $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1
-}
-
-# func_ltwrapper_scriptname file
-# Assumes file is an ltwrapper_executable
-# uses $file to determine the appropriate filename for a
-# temporary ltwrapper_script.
-func_ltwrapper_scriptname ()
-{
- func_ltwrapper_scriptname_result=""
- if func_ltwrapper_executable_p "$1"; then
- func_dirname_and_basename "$1" "" "."
- func_stripname '' '.exe' "$func_basename_result"
- func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper"
- fi
-}
-
-# func_ltwrapper_p file
-# True iff FILE is a libtool wrapper script or wrapper executable
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_ltwrapper_p ()
-{
- func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1"
-}
-
-
-# func_execute_cmds commands fail_cmd
-# Execute tilde-delimited COMMANDS.
-# If FAIL_CMD is given, eval that upon failure.
-# FAIL_CMD may read-access the current command in variable CMD!
-func_execute_cmds ()
-{
- $opt_debug
- save_ifs=$IFS; IFS='~'
- for cmd in $1; do
- IFS=$save_ifs
- eval cmd=\"$cmd\"
- func_show_eval "$cmd" "${2-:}"
- done
- IFS=$save_ifs
-}
-
-
-# func_source file
-# Source FILE, adding directory component if necessary.
-# Note that it is not necessary on cygwin/mingw to append a dot to
-# FILE even if both FILE and FILE.exe exist: automatic-append-.exe
-# behavior happens only for exec(3), not for open(2)! Also, sourcing
-# `FILE.' does not work on cygwin managed mounts.
-func_source ()
-{
- $opt_debug
- case $1 in
- */* | *\\*) . "$1" ;;
- *) . "./$1" ;;
- esac
-}
-
-
-# func_infer_tag arg
-# Infer tagged configuration to use if any are available and
-# if one wasn't chosen via the "--tag" command line option.
-# Only attempt this if the compiler in the base compile
-# command doesn't match the default compiler.
-# arg is usually of the form 'gcc ...'
-func_infer_tag ()
-{
- $opt_debug
- if test -n "$available_tags" && test -z "$tagname"; then
- CC_quoted=
- for arg in $CC; do
- func_quote_for_eval "$arg"
- CC_quoted="$CC_quoted $func_quote_for_eval_result"
- done
- case $@ in
- # Blanks in the command may have been stripped by the calling shell,
- # but not from the CC environment variable when configure was run.
- " $CC "* | "$CC "* | " `$ECHO $CC` "* | "`$ECHO $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$ECHO $CC_quoted` "* | "`$ECHO $CC_quoted` "*) ;;
- # Blanks at the start of $base_compile will cause this to fail
- # if we don't check for them as well.
- *)
- for z in $available_tags; do
- if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then
- # Evaluate the configuration.
- eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`"
- CC_quoted=
- for arg in $CC; do
- # Double-quote args containing other shell metacharacters.
- func_quote_for_eval "$arg"
- CC_quoted="$CC_quoted $func_quote_for_eval_result"
- done
- case "$@ " in
- " $CC "* | "$CC "* | " `$ECHO $CC` "* | "`$ECHO $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$ECHO $CC_quoted` "* | "`$ECHO $CC_quoted` "*)
- # The compiler in the base compile command matches
- # the one in the tagged configuration.
- # Assume this is the tagged configuration we want.
- tagname=$z
- break
- ;;
- esac
- fi
- done
- # If $tagname still isn't set, then no tagged configuration
- # was found and let the user know that the "--tag" command
- # line option must be used.
- if test -z "$tagname"; then
- func_echo "unable to infer tagged configuration"
- func_fatal_error "specify a tag with \`--tag'"
-# else
-# func_verbose "using $tagname tagged configuration"
- fi
- ;;
- esac
- fi
-}
-
-
-
-# func_write_libtool_object output_name pic_name nonpic_name
-# Create a libtool object file (analogous to a ".la" file),
-# but don't create it if we're doing a dry run.
-func_write_libtool_object ()
-{
- write_libobj=${1}
- if test "$build_libtool_libs" = yes; then
- write_lobj=\'${2}\'
- else
- write_lobj=none
- fi
-
- if test "$build_old_libs" = yes; then
- write_oldobj=\'${3}\'
- else
- write_oldobj=none
- fi
-
- $opt_dry_run || {
- cat >${write_libobj}T <<EOF
-# $write_libobj - a libtool object file
-# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-#
-# Please DO NOT delete this file!
-# It is necessary for linking the library.
-
-# Name of the PIC object.
-pic_object=$write_lobj
-
-# Name of the non-PIC object
-non_pic_object=$write_oldobj
-
-EOF
- $MV "${write_libobj}T" "${write_libobj}"
- }
-}
-
-# func_mode_compile arg...
-func_mode_compile ()
-{
- $opt_debug
- # Get the compilation command and the source file.
- base_compile=
- srcfile="$nonopt" # always keep a non-empty value in "srcfile"
- suppress_opt=yes
- suppress_output=
- arg_mode=normal
- libobj=
- later=
- pie_flag=
-
- for arg
- do
- case $arg_mode in
- arg )
- # do not "continue". Instead, add this to base_compile
- lastarg="$arg"
- arg_mode=normal
- ;;
-
- target )
- libobj="$arg"
- arg_mode=normal
- continue
- ;;
-
- normal )
- # Accept any command-line options.
- case $arg in
- -o)
- test -n "$libobj" && \
- func_fatal_error "you cannot specify \`-o' more than once"
- arg_mode=target
- continue
- ;;
-
- -pie | -fpie | -fPIE)
- pie_flag="$pie_flag $arg"
- continue
- ;;
-
- -shared | -static | -prefer-pic | -prefer-non-pic)
- later="$later $arg"
- continue
- ;;
-
- -no-suppress)
- suppress_opt=no
- continue
- ;;
-
- -Xcompiler)
- arg_mode=arg # the next one goes into the "base_compile" arg list
- continue # The current "srcfile" will either be retained or
- ;; # replaced later. I would guess that would be a bug.
-
- -Wc,*)
- func_stripname '-Wc,' '' "$arg"
- args=$func_stripname_result
- lastarg=
- save_ifs="$IFS"; IFS=','
- for arg in $args; do
- IFS="$save_ifs"
- func_quote_for_eval "$arg"
- lastarg="$lastarg $func_quote_for_eval_result"
- done
- IFS="$save_ifs"
- func_stripname ' ' '' "$lastarg"
- lastarg=$func_stripname_result
-
- # Add the arguments to base_compile.
- base_compile="$base_compile $lastarg"
- continue
- ;;
-
- *)
- # Accept the current argument as the source file.
- # The previous "srcfile" becomes the current argument.
- #
- lastarg="$srcfile"
- srcfile="$arg"
- ;;
- esac # case $arg
- ;;
- esac # case $arg_mode
-
- # Aesthetically quote the previous argument.
- func_quote_for_eval "$lastarg"
- base_compile="$base_compile $func_quote_for_eval_result"
- done # for arg
-
- case $arg_mode in
- arg)
- func_fatal_error "you must specify an argument for -Xcompile"
- ;;
- target)
- func_fatal_error "you must specify a target with \`-o'"
- ;;
- *)
- # Get the name of the library object.
- test -z "$libobj" && {
- func_basename "$srcfile"
- libobj="$func_basename_result"
- }
- ;;
- esac
-
- # Recognize several different file suffixes.
- # If the user specifies -o file.o, it is replaced with file.lo
- case $libobj in
- *.[cCFSifmso] | \
- *.ada | *.adb | *.ads | *.asm | \
- *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \
- *.[fF][09]? | *.for | *.java | *.obj | *.sx)
- func_xform "$libobj"
- libobj=$func_xform_result
- ;;
- esac
-
- case $libobj in
- *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;;
- *)
- func_fatal_error "cannot determine name of library object from \`$libobj'"
- ;;
- esac
-
- func_infer_tag $base_compile
-
- for arg in $later; do
- case $arg in
- -shared)
- test "$build_libtool_libs" != yes && \
- func_fatal_configuration "can not build a shared library"
- build_old_libs=no
- continue
- ;;
-
- -static)
- build_libtool_libs=no
- build_old_libs=yes
- continue
- ;;
-
- -prefer-pic)
- pic_mode=yes
- continue
- ;;
-
- -prefer-non-pic)
- pic_mode=no
- continue
- ;;
- esac
- done
-
- func_quote_for_eval "$libobj"
- test "X$libobj" != "X$func_quote_for_eval_result" \
- && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \
- && func_warning "libobj name \`$libobj' may not contain shell special characters."
- func_dirname_and_basename "$obj" "/" ""
- objname="$func_basename_result"
- xdir="$func_dirname_result"
- lobj=${xdir}$objdir/$objname
-
- test -z "$base_compile" && \
- func_fatal_help "you must specify a compilation command"
-
- # Delete any leftover library objects.
- if test "$build_old_libs" = yes; then
- removelist="$obj $lobj $libobj ${libobj}T"
- else
- removelist="$lobj $libobj ${libobj}T"
- fi
-
- # On Cygwin there's no "real" PIC flag so we must build both object types
- case $host_os in
- cygwin* | mingw* | pw32* | os2* | cegcc*)
- pic_mode=default
- ;;
- esac
- if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
- # non-PIC code in shared libraries is not supported
- pic_mode=default
- fi
-
- # Calculate the filename of the output object if compiler does
- # not support -o with -c
- if test "$compiler_c_o" = no; then
- output_obj=`$ECHO "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext}
- lockfile="$output_obj.lock"
- else
- output_obj=
- need_locks=no
- lockfile=
- fi
-
- # Lock this critical section if it is needed
- # We use this script file to make the link, it avoids creating a new file
- if test "$need_locks" = yes; then
- until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
- func_echo "Waiting for $lockfile to be removed"
- sleep 2
- done
- elif test "$need_locks" = warn; then
- if test -f "$lockfile"; then
- $ECHO "\
-*** ERROR, $lockfile exists and contains:
-`cat $lockfile 2>/dev/null`
-
-This indicates that another process is trying to use the same
-temporary object file, and libtool could not work around it because
-your compiler does not support \`-c' and \`-o' together. If you
-repeat this compilation, it may succeed, by chance, but you had better
-avoid parallel builds (make -j) in this platform, or get a better
-compiler."
-
- $opt_dry_run || $RM $removelist
- exit $EXIT_FAILURE
- fi
- removelist="$removelist $output_obj"
- $ECHO "$srcfile" > "$lockfile"
- fi
-
- $opt_dry_run || $RM $removelist
- removelist="$removelist $lockfile"
- trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15
-
- if test -n "$fix_srcfile_path"; then
- eval srcfile=\"$fix_srcfile_path\"
- fi
- func_quote_for_eval "$srcfile"
- qsrcfile=$func_quote_for_eval_result
-
- # Only build a PIC object if we are building libtool libraries.
- if test "$build_libtool_libs" = yes; then
- # Without this assignment, base_compile gets emptied.
- fbsd_hideous_sh_bug=$base_compile
-
- if test "$pic_mode" != no; then
- command="$base_compile $qsrcfile $pic_flag"
- else
- # Don't build PIC code
- command="$base_compile $qsrcfile"
- fi
-
- func_mkdir_p "$xdir$objdir"
-
- if test -z "$output_obj"; then
- # Place PIC objects in $objdir
- command="$command -o $lobj"
- fi
-
- func_show_eval_locale "$command" \
- 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE'
-
- if test "$need_locks" = warn &&
- test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
- $ECHO "\
-*** ERROR, $lockfile contains:
-`cat $lockfile 2>/dev/null`
-
-but it should contain:
-$srcfile
-
-This indicates that another process is trying to use the same
-temporary object file, and libtool could not work around it because
-your compiler does not support \`-c' and \`-o' together. If you
-repeat this compilation, it may succeed, by chance, but you had better
-avoid parallel builds (make -j) in this platform, or get a better
-compiler."
-
- $opt_dry_run || $RM $removelist
- exit $EXIT_FAILURE
- fi
-
- # Just move the object if needed, then go on to compile the next one
- if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then
- func_show_eval '$MV "$output_obj" "$lobj"' \
- 'error=$?; $opt_dry_run || $RM $removelist; exit $error'
- fi
-
- # Allow error messages only from the first compilation.
- if test "$suppress_opt" = yes; then
- suppress_output=' >/dev/null 2>&1'
- fi
- fi
-
- # Only build a position-dependent object if we build old libraries.
- if test "$build_old_libs" = yes; then
- if test "$pic_mode" != yes; then
- # Don't build PIC code
- command="$base_compile $qsrcfile$pie_flag"
- else
- command="$base_compile $qsrcfile $pic_flag"
- fi
- if test "$compiler_c_o" = yes; then
- command="$command -o $obj"
- fi
-
- # Suppress compiler output if we already did a PIC compilation.
- command="$command$suppress_output"
- func_show_eval_locale "$command" \
- '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE'
-
- if test "$need_locks" = warn &&
- test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
- $ECHO "\
-*** ERROR, $lockfile contains:
-`cat $lockfile 2>/dev/null`
-
-but it should contain:
-$srcfile
-
-This indicates that another process is trying to use the same
-temporary object file, and libtool could not work around it because
-your compiler does not support \`-c' and \`-o' together. If you
-repeat this compilation, it may succeed, by chance, but you had better
-avoid parallel builds (make -j) in this platform, or get a better
-compiler."
-
- $opt_dry_run || $RM $removelist
- exit $EXIT_FAILURE
- fi
-
- # Just move the object if needed
- if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then
- func_show_eval '$MV "$output_obj" "$obj"' \
- 'error=$?; $opt_dry_run || $RM $removelist; exit $error'
- fi
- fi
-
- $opt_dry_run || {
- func_write_libtool_object "$libobj" "$objdir/$objname" "$objname"
-
- # Unlock the critical section if it was locked
- if test "$need_locks" != no; then
- removelist=$lockfile
- $RM "$lockfile"
- fi
- }
-
- exit $EXIT_SUCCESS
-}
-
-$opt_help || {
-test "$mode" = compile && func_mode_compile ${1+"$@"}
-}
-
-func_mode_help ()
-{
- # We need to display help for each of the modes.
- case $mode in
- "")
- # Generic help is extracted from the usage comments
- # at the start of this file.
- func_help
- ;;
-
- clean)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
-
-Remove files from the build directory.
-
-RM is the name of the program to use to delete files associated with each FILE
-(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
-to RM.
-
-If FILE is a libtool library, object or program, all the files associated
-with it are deleted. Otherwise, only FILE itself is deleted using RM."
- ;;
-
- compile)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
-
-Compile a source file into a libtool library object.
-
-This mode accepts the following additional options:
-
- -o OUTPUT-FILE set the output file name to OUTPUT-FILE
- -no-suppress do not suppress compiler output for multiple passes
- -prefer-pic try to building PIC objects only
- -prefer-non-pic try to building non-PIC objects only
- -shared do not build a \`.o' file suitable for static linking
- -static only build a \`.o' file suitable for static linking
-
-COMPILE-COMMAND is a command to be used in creating a \`standard' object file
-from the given SOURCEFILE.
-
-The output file name is determined by removing the directory component from
-SOURCEFILE, then substituting the C source code suffix \`.c' with the
-library object suffix, \`.lo'."
- ;;
-
- execute)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]...
-
-Automatically set library path, then run a program.
-
-This mode accepts the following additional options:
-
- -dlopen FILE add the directory containing FILE to the library path
-
-This mode sets the library path environment variable according to \`-dlopen'
-flags.
-
-If any of the ARGS are libtool executable wrappers, then they are translated
-into their corresponding uninstalled binary, and any of their required library
-directories are added to the library path.
-
-Then, COMMAND is executed, with ARGS as arguments."
- ;;
-
- finish)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=finish [LIBDIR]...
-
-Complete the installation of libtool libraries.
-
-Each LIBDIR is a directory that contains libtool libraries.
-
-The commands that this mode executes may require superuser privileges. Use
-the \`--dry-run' option if you just want to see what would be executed."
- ;;
-
- install)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND...
-
-Install executables or libraries.
-
-INSTALL-COMMAND is the installation command. The first component should be
-either the \`install' or \`cp' program.
-
-The following components of INSTALL-COMMAND are treated specially:
-
- -inst-prefix PREFIX-DIR Use PREFIX-DIR as a staging area for installation
-
-The rest of the components are interpreted as arguments to that command (only
-BSD-compatible install options are recognized)."
- ;;
-
- link)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=link LINK-COMMAND...
-
-Link object files or libraries together to form another library, or to
-create an executable program.
-
-LINK-COMMAND is a command using the C compiler that you would use to create
-a program from several object files.
-
-The following components of LINK-COMMAND are treated specially:
-
- -all-static do not do any dynamic linking at all
- -avoid-version do not add a version suffix if possible
- -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime
- -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols
- -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
- -export-symbols SYMFILE
- try to export only the symbols listed in SYMFILE
- -export-symbols-regex REGEX
- try to export only the symbols matching REGEX
- -LLIBDIR search LIBDIR for required installed libraries
- -lNAME OUTPUT-FILE requires the installed library libNAME
- -module build a library that can dlopened
- -no-fast-install disable the fast-install mode
- -no-install link a not-installable executable
- -no-undefined declare that a library does not refer to external symbols
- -o OUTPUT-FILE create OUTPUT-FILE from the specified objects
- -objectlist FILE Use a list of object files found in FILE to specify objects
- -precious-files-regex REGEX
- don't remove output files matching REGEX
- -release RELEASE specify package release information
- -rpath LIBDIR the created library will eventually be installed in LIBDIR
- -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries
- -shared only do dynamic linking of libtool libraries
- -shrext SUFFIX override the standard shared library file extension
- -static do not do any dynamic linking of uninstalled libtool libraries
- -static-libtool-libs
- do not do any dynamic linking of libtool libraries
- -version-info CURRENT[:REVISION[:AGE]]
- specify library version info [each variable defaults to 0]
- -weak LIBNAME declare that the target provides the LIBNAME interface
-
-All other options (arguments beginning with \`-') are ignored.
-
-Every other argument is treated as a filename. Files ending in \`.la' are
-treated as uninstalled libtool libraries, other files are standard or library
-object files.
-
-If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
-only library objects (\`.lo' files) may be specified, and \`-rpath' is
-required, except when creating a convenience library.
-
-If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
-using \`ar' and \`ranlib', or on Windows using \`lib'.
-
-If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
-is created, otherwise an executable program is created."
- ;;
-
- uninstall)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
-
-Remove libraries from an installation directory.
-
-RM is the name of the program to use to delete files associated with each FILE
-(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
-to RM.
-
-If FILE is a libtool library, all the files associated with it are deleted.
-Otherwise, only FILE itself is deleted using RM."
- ;;
-
- *)
- func_fatal_help "invalid operation mode \`$mode'"
- ;;
- esac
-
- $ECHO
- $ECHO "Try \`$progname --help' for more information about other modes."
-
- exit $?
-}
-
- # Now that we've collected a possible --mode arg, show help if necessary
- $opt_help && func_mode_help
-
-
-# func_mode_execute arg...
-func_mode_execute ()
-{
- $opt_debug
- # The first argument is the command name.
- cmd="$nonopt"
- test -z "$cmd" && \
- func_fatal_help "you must specify a COMMAND"
-
- # Handle -dlopen flags immediately.
- for file in $execute_dlfiles; do
- test -f "$file" \
- || func_fatal_help "\`$file' is not a file"
-
- dir=
- case $file in
- *.la)
- # Check to see that this really is a libtool archive.
- func_lalib_unsafe_p "$file" \
- || func_fatal_help "\`$lib' is not a valid libtool archive"
-
- # Read the libtool library.
- dlname=
- library_names=
- func_source "$file"
-
- # Skip this library if it cannot be dlopened.
- if test -z "$dlname"; then
- # Warn if it was a shared library.
- test -n "$library_names" && \
- func_warning "\`$file' was not linked with \`-export-dynamic'"
- continue
- fi
-
- func_dirname "$file" "" "."
- dir="$func_dirname_result"
-
- if test -f "$dir/$objdir/$dlname"; then
- dir="$dir/$objdir"
- else
- if test ! -f "$dir/$dlname"; then
- func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'"
- fi
- fi
- ;;
-
- *.lo)
- # Just add the directory containing the .lo file.
- func_dirname "$file" "" "."
- dir="$func_dirname_result"
- ;;
-
- *)
- func_warning "\`-dlopen' is ignored for non-libtool libraries and objects"
- continue
- ;;
- esac
-
- # Get the absolute pathname.
- absdir=`cd "$dir" && pwd`
- test -n "$absdir" && dir="$absdir"
-
- # Now add the directory to shlibpath_var.
- if eval "test -z \"\$$shlibpath_var\""; then
- eval "$shlibpath_var=\"\$dir\""
- else
- eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
- fi
- done
-
- # This variable tells wrapper scripts just to set shlibpath_var
- # rather than running their programs.
- libtool_execute_magic="$magic"
-
- # Check if any of the arguments is a wrapper script.
- args=
- for file
- do
- case $file in
- -*) ;;
- *)
- # Do a test to see if this is really a libtool program.
- if func_ltwrapper_script_p "$file"; then
- func_source "$file"
- # Transform arg to wrapped name.
- file="$progdir/$program"
- elif func_ltwrapper_executable_p "$file"; then
- func_ltwrapper_scriptname "$file"
- func_source "$func_ltwrapper_scriptname_result"
- # Transform arg to wrapped name.
- file="$progdir/$program"
- fi
- ;;
- esac
- # Quote arguments (to preserve shell metacharacters).
- func_quote_for_eval "$file"
- args="$args $func_quote_for_eval_result"
- done
-
- if test "X$opt_dry_run" = Xfalse; then
- if test -n "$shlibpath_var"; then
- # Export the shlibpath_var.
- eval "export $shlibpath_var"
- fi
-
- # Restore saved environment variables
- for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
- do
- eval "if test \"\${save_$lt_var+set}\" = set; then
- $lt_var=\$save_$lt_var; export $lt_var
- else
- $lt_unset $lt_var
- fi"
- done
-
- # Now prepare to actually exec the command.
- exec_cmd="\$cmd$args"
- else
- # Display what would be done.
- if test -n "$shlibpath_var"; then
- eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\""
- $ECHO "export $shlibpath_var"
- fi
- $ECHO "$cmd$args"
- exit $EXIT_SUCCESS
- fi
-}
-
-test "$mode" = execute && func_mode_execute ${1+"$@"}
-
-
-# func_mode_finish arg...
-func_mode_finish ()
-{
- $opt_debug
- libdirs="$nonopt"
- admincmds=
-
- if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
- for dir
- do
- libdirs="$libdirs $dir"
- done
-
- for libdir in $libdirs; do
- if test -n "$finish_cmds"; then
- # Do each command in the finish commands.
- func_execute_cmds "$finish_cmds" 'admincmds="$admincmds
-'"$cmd"'"'
- fi
- if test -n "$finish_eval"; then
- # Do the single finish_eval.
- eval cmds=\"$finish_eval\"
- $opt_dry_run || eval "$cmds" || admincmds="$admincmds
- $cmds"
- fi
- done
- fi
-
- # Exit here if they wanted silent mode.
- $opt_silent && exit $EXIT_SUCCESS
-
- $ECHO "X----------------------------------------------------------------------" | $Xsed
- $ECHO "Libraries have been installed in:"
- for libdir in $libdirs; do
- $ECHO " $libdir"
- done
- $ECHO
- $ECHO "If you ever happen to want to link against installed libraries"
- $ECHO "in a given directory, LIBDIR, you must either use libtool, and"
- $ECHO "specify the full pathname of the library, or use the \`-LLIBDIR'"
- $ECHO "flag during linking and do at least one of the following:"
- if test -n "$shlibpath_var"; then
- $ECHO " - add LIBDIR to the \`$shlibpath_var' environment variable"
- $ECHO " during execution"
- fi
- if test -n "$runpath_var"; then
- $ECHO " - add LIBDIR to the \`$runpath_var' environment variable"
- $ECHO " during linking"
- fi
- if test -n "$hardcode_libdir_flag_spec"; then
- libdir=LIBDIR
- eval flag=\"$hardcode_libdir_flag_spec\"
-
- $ECHO " - use the \`$flag' linker flag"
- fi
- if test -n "$admincmds"; then
- $ECHO " - have your system administrator run these commands:$admincmds"
- fi
- if test -f /etc/ld.so.conf; then
- $ECHO " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
- fi
- $ECHO
-
- $ECHO "See any operating system documentation about shared libraries for"
- case $host in
- solaris2.[6789]|solaris2.1[0-9])
- $ECHO "more information, such as the ld(1), crle(1) and ld.so(8) manual"
- $ECHO "pages."
- ;;
- *)
- $ECHO "more information, such as the ld(1) and ld.so(8) manual pages."
- ;;
- esac
- $ECHO "X----------------------------------------------------------------------" | $Xsed
- exit $EXIT_SUCCESS
-}
-
-test "$mode" = finish && func_mode_finish ${1+"$@"}
-
-
-# func_mode_install arg...
-func_mode_install ()
-{
- $opt_debug
- # There may be an optional sh(1) argument at the beginning of
- # install_prog (especially on Windows NT).
- if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
- # Allow the use of GNU shtool's install command.
- $ECHO "X$nonopt" | $GREP shtool >/dev/null; then
- # Aesthetically quote it.
- func_quote_for_eval "$nonopt"
- install_prog="$func_quote_for_eval_result "
- arg=$1
- shift
- else
- install_prog=
- arg=$nonopt
- fi
-
- # The real first argument should be the name of the installation program.
- # Aesthetically quote it.
- func_quote_for_eval "$arg"
- install_prog="$install_prog$func_quote_for_eval_result"
-
- # We need to accept at least all the BSD install flags.
- dest=
- files=
- opts=
- prev=
- install_type=
- isdir=no
- stripme=
- for arg
- do
- if test -n "$dest"; then
- files="$files $dest"
- dest=$arg
- continue
- fi
-
- case $arg in
- -d) isdir=yes ;;
- -f)
- case " $install_prog " in
- *[\\\ /]cp\ *) ;;
- *) prev=$arg ;;
- esac
- ;;
- -g | -m | -o)
- prev=$arg
- ;;
- -s)
- stripme=" -s"
- continue
- ;;
- -*)
- ;;
- *)
- # If the previous option needed an argument, then skip it.
- if test -n "$prev"; then
- prev=
- else
- dest=$arg
- continue
- fi
- ;;
- esac
-
- # Aesthetically quote the argument.
- func_quote_for_eval "$arg"
- install_prog="$install_prog $func_quote_for_eval_result"
- done
-
- test -z "$install_prog" && \
- func_fatal_help "you must specify an install program"
-
- test -n "$prev" && \
- func_fatal_help "the \`$prev' option requires an argument"
-
- if test -z "$files"; then
- if test -z "$dest"; then
- func_fatal_help "no file or destination specified"
- else
- func_fatal_help "you must specify a destination"
- fi
- fi
-
- # Strip any trailing slash from the destination.
- func_stripname '' '/' "$dest"
- dest=$func_stripname_result
-
- # Check to see that the destination is a directory.
- test -d "$dest" && isdir=yes
- if test "$isdir" = yes; then
- destdir="$dest"
- destname=
- else
- func_dirname_and_basename "$dest" "" "."
- destdir="$func_dirname_result"
- destname="$func_basename_result"
-
- # Not a directory, so check to see that there is only one file specified.
- set dummy $files; shift
- test "$#" -gt 1 && \
- func_fatal_help "\`$dest' is not a directory"
- fi
- case $destdir in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- for file in $files; do
- case $file in
- *.lo) ;;
- *)
- func_fatal_help "\`$destdir' must be an absolute directory name"
- ;;
- esac
- done
- ;;
- esac
-
- # This variable tells wrapper scripts just to set variables rather
- # than running their programs.
- libtool_install_magic="$magic"
-
- staticlibs=
- future_libdirs=
- current_libdirs=
- for file in $files; do
-
- # Do each installation.
- case $file in
- *.$libext)
- # Do the static libraries later.
- staticlibs="$staticlibs $file"
- ;;
-
- *.la)
- # Check to see that this really is a libtool archive.
- func_lalib_unsafe_p "$file" \
- || func_fatal_help "\`$file' is not a valid libtool archive"
-
- library_names=
- old_library=
- relink_command=
- func_source "$file"
-
- # Add the libdir to current_libdirs if it is the destination.
- if test "X$destdir" = "X$libdir"; then
- case "$current_libdirs " in
- *" $libdir "*) ;;
- *) current_libdirs="$current_libdirs $libdir" ;;
- esac
- else
- # Note the libdir as a future libdir.
- case "$future_libdirs " in
- *" $libdir "*) ;;
- *) future_libdirs="$future_libdirs $libdir" ;;
- esac
- fi
-
- func_dirname "$file" "/" ""
- dir="$func_dirname_result"
- dir="$dir$objdir"
-
- if test -n "$relink_command"; then
- # Determine the prefix the user has applied to our future dir.
- inst_prefix_dir=`$ECHO "X$destdir" | $Xsed -e "s%$libdir\$%%"`
-
- # Don't allow the user to place us outside of our expected
- # location b/c this prevents finding dependent libraries that
- # are installed to the same prefix.
- # At present, this check doesn't affect windows .dll's that
- # are installed into $libdir/../bin (currently, that works fine)
- # but it's something to keep an eye on.
- test "$inst_prefix_dir" = "$destdir" && \
- func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir"
-
- if test -n "$inst_prefix_dir"; then
- # Stick the inst_prefix_dir data into the link command.
- relink_command=`$ECHO "X$relink_command" | $Xsed -e "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"`
- else
- relink_command=`$ECHO "X$relink_command" | $Xsed -e "s%@inst_prefix_dir@%%"`
- fi
-
- func_warning "relinking \`$file'"
- func_show_eval "$relink_command" \
- 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"'
- fi
-
- # See the names of the shared library.
- set dummy $library_names; shift
- if test -n "$1"; then
- realname="$1"
- shift
-
- srcname="$realname"
- test -n "$relink_command" && srcname="$realname"T
-
- # Install the shared library and build the symlinks.
- func_show_eval "$install_prog $dir/$srcname $destdir/$realname" \
- 'exit $?'
- tstripme="$stripme"
- case $host_os in
- cygwin* | mingw* | pw32* | cegcc*)
- case $realname in
- *.dll.a)
- tstripme=""
- ;;
- esac
- ;;
- esac
- if test -n "$tstripme" && test -n "$striplib"; then
- func_show_eval "$striplib $destdir/$realname" 'exit $?'
- fi
-
- if test "$#" -gt 0; then
- # Delete the old symlinks, and create new ones.
- # Try `ln -sf' first, because the `ln' binary might depend on
- # the symlink we replace! Solaris /bin/ln does not understand -f,
- # so we also need to try rm && ln -s.
- for linkname
- do
- test "$linkname" != "$realname" \
- && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })"
- done
- fi
-
- # Do each command in the postinstall commands.
- lib="$destdir/$realname"
- func_execute_cmds "$postinstall_cmds" 'exit $?'
- fi
-
- # Install the pseudo-library for information purposes.
- func_basename "$file"
- name="$func_basename_result"
- instname="$dir/$name"i
- func_show_eval "$install_prog $instname $destdir/$name" 'exit $?'
-
- # Maybe install the static library, too.
- test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
- ;;
-
- *.lo)
- # Install (i.e. copy) a libtool object.
-
- # Figure out destination file name, if it wasn't already specified.
- if test -n "$destname"; then
- destfile="$destdir/$destname"
- else
- func_basename "$file"
- destfile="$func_basename_result"
- destfile="$destdir/$destfile"
- fi
-
- # Deduce the name of the destination old-style object file.
- case $destfile in
- *.lo)
- func_lo2o "$destfile"
- staticdest=$func_lo2o_result
- ;;
- *.$objext)
- staticdest="$destfile"
- destfile=
- ;;
- *)
- func_fatal_help "cannot copy a libtool object to \`$destfile'"
- ;;
- esac
-
- # Install the libtool object if requested.
- test -n "$destfile" && \
- func_show_eval "$install_prog $file $destfile" 'exit $?'
-
- # Install the old object if enabled.
- if test "$build_old_libs" = yes; then
- # Deduce the name of the old-style object file.
- func_lo2o "$file"
- staticobj=$func_lo2o_result
- func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?'
- fi
- exit $EXIT_SUCCESS
- ;;
-
- *)
- # Figure out destination file name, if it wasn't already specified.
- if test -n "$destname"; then
- destfile="$destdir/$destname"
- else
- func_basename "$file"
- destfile="$func_basename_result"
- destfile="$destdir/$destfile"
- fi
-
- # If the file is missing, and there is a .exe on the end, strip it
- # because it is most likely a libtool script we actually want to
- # install
- stripped_ext=""
- case $file in
- *.exe)
- if test ! -f "$file"; then
- func_stripname '' '.exe' "$file"
- file=$func_stripname_result
- stripped_ext=".exe"
- fi
- ;;
- esac
-
- # Do a test to see if this is really a libtool program.
- case $host in
- *cygwin* | *mingw*)
- if func_ltwrapper_executable_p "$file"; then
- func_ltwrapper_scriptname "$file"
- wrapper=$func_ltwrapper_scriptname_result
- else
- func_stripname '' '.exe' "$file"
- wrapper=$func_stripname_result
- fi
- ;;
- *)
- wrapper=$file
- ;;
- esac
- if func_ltwrapper_script_p "$wrapper"; then
- notinst_deplibs=
- relink_command=
-
- func_source "$wrapper"
-
- # Check the variables that should have been set.
- test -z "$generated_by_libtool_version" && \
- func_fatal_error "invalid libtool wrapper script \`$wrapper'"
-
- finalize=yes
- for lib in $notinst_deplibs; do
- # Check to see that each library is installed.
- libdir=
- if test -f "$lib"; then
- func_source "$lib"
- fi
- libfile="$libdir/"`$ECHO "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
- if test -n "$libdir" && test ! -f "$libfile"; then
- func_warning "\`$lib' has not been installed in \`$libdir'"
- finalize=no
- fi
- done
-
- relink_command=
- func_source "$wrapper"
-
- outputname=
- if test "$fast_install" = no && test -n "$relink_command"; then
- $opt_dry_run || {
- if test "$finalize" = yes; then
- tmpdir=`func_mktempdir`
- func_basename "$file$stripped_ext"
- file="$func_basename_result"
- outputname="$tmpdir/$file"
- # Replace the output file specification.
- relink_command=`$ECHO "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
-
- $opt_silent || {
- func_quote_for_expand "$relink_command"
- eval "func_echo $func_quote_for_expand_result"
- }
- if eval "$relink_command"; then :
- else
- func_error "error: relink \`$file' with the above command before installing it"
- $opt_dry_run || ${RM}r "$tmpdir"
- continue
- fi
- file="$outputname"
- else
- func_warning "cannot relink \`$file'"
- fi
- }
- else
- # Install the binary that we compiled earlier.
- file=`$ECHO "X$file$stripped_ext" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
- fi
- fi
-
- # remove .exe since cygwin /usr/bin/install will append another
- # one anyway
- case $install_prog,$host in
- */usr/bin/install*,*cygwin*)
- case $file:$destfile in
- *.exe:*.exe)
- # this is ok
- ;;
- *.exe:*)
- destfile=$destfile.exe
- ;;
- *:*.exe)
- func_stripname '' '.exe' "$destfile"
- destfile=$func_stripname_result
- ;;
- esac
- ;;
- esac
- func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?'
- $opt_dry_run || if test -n "$outputname"; then
- ${RM}r "$tmpdir"
- fi
- ;;
- esac
- done
-
- for file in $staticlibs; do
- func_basename "$file"
- name="$func_basename_result"
-
- # Set up the ranlib parameters.
- oldlib="$destdir/$name"
-
- func_show_eval "$install_prog \$file \$oldlib" 'exit $?'
-
- if test -n "$stripme" && test -n "$old_striplib"; then
- func_show_eval "$old_striplib $oldlib" 'exit $?'
- fi
-
- # Do each command in the postinstall commands.
- func_execute_cmds "$old_postinstall_cmds" 'exit $?'
- done
-
- test -n "$future_libdirs" && \
- func_warning "remember to run \`$progname --finish$future_libdirs'"
-
- if test -n "$current_libdirs"; then
- # Maybe just do a dry run.
- $opt_dry_run && current_libdirs=" -n$current_libdirs"
- exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs'
- else
- exit $EXIT_SUCCESS
- fi
-}
-
-test "$mode" = install && func_mode_install ${1+"$@"}
-
-
-# func_generate_dlsyms outputname originator pic_p
-# Extract symbols from dlprefiles and create ${outputname}S.o with
-# a dlpreopen symbol table.
-func_generate_dlsyms ()
-{
- $opt_debug
- my_outputname="$1"
- my_originator="$2"
- my_pic_p="${3-no}"
- my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'`
- my_dlsyms=
-
- if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
- if test -n "$NM" && test -n "$global_symbol_pipe"; then
- my_dlsyms="${my_outputname}S.c"
- else
- func_error "not configured to extract global symbols from dlpreopened files"
- fi
- fi
-
- if test -n "$my_dlsyms"; then
- case $my_dlsyms in
- "") ;;
- *.c)
- # Discover the nlist of each of the dlfiles.
- nlist="$output_objdir/${my_outputname}.nm"
-
- func_show_eval "$RM $nlist ${nlist}S ${nlist}T"
-
- # Parse the name list into a source file.
- func_verbose "creating $output_objdir/$my_dlsyms"
-
- $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\
-/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */
-/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */
-
-#ifdef __cplusplus
-extern \"C\" {
-#endif
-
-/* External symbol declarations for the compiler. */\
-"
-
- if test "$dlself" = yes; then
- func_verbose "generating symbol list for \`$output'"
-
- $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist"
-
- # Add our own program objects to the symbol list.
- progfiles=`$ECHO "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
- for progfile in $progfiles; do
- func_verbose "extracting global C symbols from \`$progfile'"
- $opt_dry_run || eval "$NM $progfile | $global_symbol_pipe >> '$nlist'"
- done
-
- if test -n "$exclude_expsyms"; then
- $opt_dry_run || {
- eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
- eval '$MV "$nlist"T "$nlist"'
- }
- fi
-
- if test -n "$export_symbols_regex"; then
- $opt_dry_run || {
- eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T'
- eval '$MV "$nlist"T "$nlist"'
- }
- fi
-
- # Prepare the list of exported symbols
- if test -z "$export_symbols"; then
- export_symbols="$output_objdir/$outputname.exp"
- $opt_dry_run || {
- $RM $export_symbols
- eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
- eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"'
- ;;
- esac
- }
- else
- $opt_dry_run || {
- eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"'
- eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T'
- eval '$MV "$nlist"T "$nlist"'
- case $host in
- *cygwin | *mingw* | *cegcc* )
- eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
- eval 'cat "$nlist" >> "$output_objdir/$outputname.def"'
- ;;
- esac
- }
- fi
- fi
-
- for dlprefile in $dlprefiles; do
- func_verbose "extracting global C symbols from \`$dlprefile'"
- func_basename "$dlprefile"
- name="$func_basename_result"
- $opt_dry_run || {
- eval '$ECHO ": $name " >> "$nlist"'
- eval "$NM $dlprefile 2>/dev/null | $global_symbol_pipe >> '$nlist'"
- }
- done
-
- $opt_dry_run || {
- # Make sure we have at least an empty file.
- test -f "$nlist" || : > "$nlist"
-
- if test -n "$exclude_expsyms"; then
- $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
- $MV "$nlist"T "$nlist"
- fi
-
- # Try sorting and uniquifying the output.
- if $GREP -v "^: " < "$nlist" |
- if sort -k 3 </dev/null >/dev/null 2>&1; then
- sort -k 3
- else
- sort +2
- fi |
- uniq > "$nlist"S; then
- :
- else
- $GREP -v "^: " < "$nlist" > "$nlist"S
- fi
-
- if test -f "$nlist"S; then
- eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"'
- else
- $ECHO '/* NONE */' >> "$output_objdir/$my_dlsyms"
- fi
-
- $ECHO >> "$output_objdir/$my_dlsyms" "\
-
-/* The mapping between symbol names and symbols. */
-typedef struct {
- const char *name;
- void *address;
-} lt_dlsymlist;
-"
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- $ECHO >> "$output_objdir/$my_dlsyms" "\
-/* DATA imports from DLLs on WIN32 con't be const, because
- runtime relocations are performed -- see ld's documentation
- on pseudo-relocs. */"
- lt_dlsym_const= ;;
- *osf5*)
- echo >> "$output_objdir/$my_dlsyms" "\
-/* This system does not cope well with relocations in const data */"
- lt_dlsym_const= ;;
- *)
- lt_dlsym_const=const ;;
- esac
-
- $ECHO >> "$output_objdir/$my_dlsyms" "\
-extern $lt_dlsym_const lt_dlsymlist
-lt_${my_prefix}_LTX_preloaded_symbols[];
-$lt_dlsym_const lt_dlsymlist
-lt_${my_prefix}_LTX_preloaded_symbols[] =
-{\
- { \"$my_originator\", (void *) 0 },"
-
- case $need_lib_prefix in
- no)
- eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms"
- ;;
- *)
- eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms"
- ;;
- esac
- $ECHO >> "$output_objdir/$my_dlsyms" "\
- {0, (void *) 0}
-};
-
-/* This works around a problem in FreeBSD linker */
-#ifdef FREEBSD_WORKAROUND
-static const void *lt_preloaded_setup() {
- return lt_${my_prefix}_LTX_preloaded_symbols;
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif\
-"
- } # !$opt_dry_run
-
- pic_flag_for_symtable=
- case "$compile_command " in
- *" -static "*) ;;
- *)
- case $host in
- # compiling the symbol table file with pic_flag works around
- # a FreeBSD bug that causes programs to crash when -lm is
- # linked before any other PIC object. But we must not use
- # pic_flag when linking with -static. The problem exists in
- # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
- *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
- pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;;
- *-*-hpux*)
- pic_flag_for_symtable=" $pic_flag" ;;
- *)
- if test "X$my_pic_p" != Xno; then
- pic_flag_for_symtable=" $pic_flag"
- fi
- ;;
- esac
- ;;
- esac
- symtab_cflags=
- for arg in $LTCFLAGS; do
- case $arg in
- -pie | -fpie | -fPIE) ;;
- *) symtab_cflags="$symtab_cflags $arg" ;;
- esac
- done
-
- # Now compile the dynamic symbol file.
- func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?'
-
- # Clean up the generated files.
- func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"'
-
- # Transform the symbol file into the correct name.
- symfileobj="$output_objdir/${my_outputname}S.$objext"
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- if test -f "$output_objdir/$my_outputname.def"; then
- compile_command=`$ECHO "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
- finalize_command=`$ECHO "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
- else
- compile_command=`$ECHO "X$compile_command" | $Xsed -e "s%@SYMFILE@%$symfileobj%"`
- finalize_command=`$ECHO "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$symfileobj%"`
- fi
- ;;
- *)
- compile_command=`$ECHO "X$compile_command" | $Xsed -e "s%@SYMFILE@%$symfileobj%"`
- finalize_command=`$ECHO "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$symfileobj%"`
- ;;
- esac
- ;;
- *)
- func_fatal_error "unknown suffix for \`$my_dlsyms'"
- ;;
- esac
- else
- # We keep going just in case the user didn't refer to
- # lt_preloaded_symbols. The linker will fail if global_symbol_pipe
- # really was required.
-
- # Nullify the symbol file.
- compile_command=`$ECHO "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
- finalize_command=`$ECHO "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
- fi
-}
-
-# func_win32_libid arg
-# return the library type of file 'arg'
-#
-# Need a lot of goo to handle *both* DLLs and import libs
-# Has to be a shell function in order to 'eat' the argument
-# that is supplied when $file_magic_command is called.
-func_win32_libid ()
-{
- $opt_debug
- win32_libid_type="unknown"
- win32_fileres=`file -L $1 2>/dev/null`
- case $win32_fileres in
- *ar\ archive\ import\ library*) # definitely import
- win32_libid_type="x86 archive import"
- ;;
- *ar\ archive*) # could be an import, or static
- if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null |
- $EGREP 'file format pe-i386(.*architecture: i386)?' >/dev/null ; then
- win32_nmres=`eval $NM -f posix -A $1 |
- $SED -n -e '
- 1,100{
- / I /{
- s,.*,import,
- p
- q
- }
- }'`
- case $win32_nmres in
- import*) win32_libid_type="x86 archive import";;
- *) win32_libid_type="x86 archive static";;
- esac
- fi
- ;;
- *DLL*)
- win32_libid_type="x86 DLL"
- ;;
- *executable*) # but shell scripts are "executable" too...
- case $win32_fileres in
- *MS\ Windows\ PE\ Intel*)
- win32_libid_type="x86 DLL"
- ;;
- esac
- ;;
- esac
- $ECHO "$win32_libid_type"
-}
-
-
-
-# func_extract_an_archive dir oldlib
-func_extract_an_archive ()
-{
- $opt_debug
- f_ex_an_ar_dir="$1"; shift
- f_ex_an_ar_oldlib="$1"
- func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" 'exit $?'
- if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
- :
- else
- func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib"
- fi
-}
-
-
-# func_extract_archives gentop oldlib ...
-func_extract_archives ()
-{
- $opt_debug
- my_gentop="$1"; shift
- my_oldlibs=${1+"$@"}
- my_oldobjs=""
- my_xlib=""
- my_xabs=""
- my_xdir=""
-
- for my_xlib in $my_oldlibs; do
- # Extract the objects.
- case $my_xlib in
- [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;;
- *) my_xabs=`pwd`"/$my_xlib" ;;
- esac
- func_basename "$my_xlib"
- my_xlib="$func_basename_result"
- my_xlib_u=$my_xlib
- while :; do
- case " $extracted_archives " in
- *" $my_xlib_u "*)
- func_arith $extracted_serial + 1
- extracted_serial=$func_arith_result
- my_xlib_u=lt$extracted_serial-$my_xlib ;;
- *) break ;;
- esac
- done
- extracted_archives="$extracted_archives $my_xlib_u"
- my_xdir="$my_gentop/$my_xlib_u"
-
- func_mkdir_p "$my_xdir"
-
- case $host in
- *-darwin*)
- func_verbose "Extracting $my_xabs"
- # Do not bother doing anything if just a dry run
- $opt_dry_run || {
- darwin_orig_dir=`pwd`
- cd $my_xdir || exit $?
- darwin_archive=$my_xabs
- darwin_curdir=`pwd`
- darwin_base_archive=`basename "$darwin_archive"`
- darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true`
- if test -n "$darwin_arches"; then
- darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'`
- darwin_arch=
- func_verbose "$darwin_base_archive has multiple architectures $darwin_arches"
- for darwin_arch in $darwin_arches ; do
- func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}"
- $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}"
- cd "unfat-$$/${darwin_base_archive}-${darwin_arch}"
- func_extract_an_archive "`pwd`" "${darwin_base_archive}"
- cd "$darwin_curdir"
- $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}"
- done # $darwin_arches
- ## Okay now we've a bunch of thin objects, gotta fatten them up :)
- darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u`
- darwin_file=
- darwin_files=
- for darwin_file in $darwin_filelist; do
- darwin_files=`find unfat-$$ -name $darwin_file -print | $NL2SP`
- $LIPO -create -output "$darwin_file" $darwin_files
- done # $darwin_filelist
- $RM -rf unfat-$$
- cd "$darwin_orig_dir"
- else
- cd $darwin_orig_dir
- func_extract_an_archive "$my_xdir" "$my_xabs"
- fi # $darwin_arches
- } # !$opt_dry_run
- ;;
- *)
- func_extract_an_archive "$my_xdir" "$my_xabs"
- ;;
- esac
- my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | $NL2SP`
- done
-
- func_extract_archives_result="$my_oldobjs"
-}
-
-
-
-# func_emit_wrapper_part1 [arg=no]
-#
-# Emit the first part of a libtool wrapper script on stdout.
-# For more information, see the description associated with
-# func_emit_wrapper(), below.
-func_emit_wrapper_part1 ()
-{
- func_emit_wrapper_part1_arg1=no
- if test -n "$1" ; then
- func_emit_wrapper_part1_arg1=$1
- fi
-
- $ECHO "\
-#! $SHELL
-
-# $output - temporary wrapper script for $objdir/$outputname
-# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-#
-# The $output program cannot be directly executed until all the libtool
-# libraries that it depends on are installed.
-#
-# This wrapper script should never be moved out of the build directory.
-# If it is, it will not operate correctly.
-
-# Sed substitution that helps us do robust quoting. It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed='${SED} -e 1s/^X//'
-sed_quote_subst='$sed_quote_subst'
-
-# Be Bourne compatible
-if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '\${1+\"\$@\"}'='\"\$@\"'
- setopt NO_GLOB_SUBST
-else
- case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac
-fi
-BIN_SH=xpg4; export BIN_SH # for Tru64
-DUALCASE=1; export DUALCASE # for MKS sh
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-relink_command=\"$relink_command\"
-
-# This environment variable determines our operation mode.
-if test \"\$libtool_install_magic\" = \"$magic\"; then
- # install mode needs the following variables:
- generated_by_libtool_version='$macro_version'
- notinst_deplibs='$notinst_deplibs'
-else
- # When we are sourced in execute mode, \$file and \$ECHO are already set.
- if test \"\$libtool_execute_magic\" != \"$magic\"; then
- ECHO=\"$qecho\"
- file=\"\$0\"
- # Make sure echo works.
- if test \"X\$1\" = X--no-reexec; then
- # Discard the --no-reexec flag, and continue.
- shift
- elif test \"X\`{ \$ECHO '\t'; } 2>/dev/null\`\" = 'X\t'; then
- # Yippee, \$ECHO works!
- :
- else
- # Restart under the correct shell, and then maybe \$ECHO will work.
- exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
- fi
- fi\
-"
- $ECHO "\
-
- # Find the directory that this script lives in.
- thisdir=\`\$ECHO \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
- test \"x\$thisdir\" = \"x\$file\" && thisdir=.
-
- # Follow symbolic links until we get to the real thisdir.
- file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\`
- while test -n \"\$file\"; do
- destdir=\`\$ECHO \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
-
- # If there was a directory component, then change thisdir.
- if test \"x\$destdir\" != \"x\$file\"; then
- case \"\$destdir\" in
- [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
- *) thisdir=\"\$thisdir/\$destdir\" ;;
- esac
- fi
-
- file=\`\$ECHO \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
- file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\`
- done
-"
-}
-# end: func_emit_wrapper_part1
-
-# func_emit_wrapper_part2 [arg=no]
-#
-# Emit the second part of a libtool wrapper script on stdout.
-# For more information, see the description associated with
-# func_emit_wrapper(), below.
-func_emit_wrapper_part2 ()
-{
- func_emit_wrapper_part2_arg1=no
- if test -n "$1" ; then
- func_emit_wrapper_part2_arg1=$1
- fi
-
- $ECHO "\
-
- # Usually 'no', except on cygwin/mingw when embedded into
- # the cwrapper.
- WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_part2_arg1
- if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then
- # special case for '.'
- if test \"\$thisdir\" = \".\"; then
- thisdir=\`pwd\`
- fi
- # remove .libs from thisdir
- case \"\$thisdir\" in
- *[\\\\/]$objdir ) thisdir=\`\$ECHO \"X\$thisdir\" | \$Xsed -e 's%[\\\\/][^\\\\/]*$%%'\` ;;
- $objdir ) thisdir=. ;;
- esac
- fi
-
- # Try to get the absolute directory name.
- absdir=\`cd \"\$thisdir\" && pwd\`
- test -n \"\$absdir\" && thisdir=\"\$absdir\"
-"
-
- if test "$fast_install" = yes; then
- $ECHO "\
- program=lt-'$outputname'$exeext
- progdir=\"\$thisdir/$objdir\"
-
- if test ! -f \"\$progdir/\$program\" ||
- { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
- test \"X\$file\" != \"X\$progdir/\$program\"; }; then
-
- file=\"\$\$-\$program\"
-
- if test ! -d \"\$progdir\"; then
- $MKDIR \"\$progdir\"
- else
- $RM \"\$progdir/\$file\"
- fi"
-
- $ECHO "\
-
- # relink executable if necessary
- if test -n \"\$relink_command\"; then
- if relink_command_output=\`eval \$relink_command 2>&1\`; then :
- else
- $ECHO \"\$relink_command_output\" >&2
- $RM \"\$progdir/\$file\"
- exit 1
- fi
- fi
-
- $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
- { $RM \"\$progdir/\$program\";
- $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; }
- $RM \"\$progdir/\$file\"
- fi"
- else
- $ECHO "\
- program='$outputname'
- progdir=\"\$thisdir/$objdir\"
-"
- fi
-
- $ECHO "\
-
- if test -f \"\$progdir/\$program\"; then"
-
- # Export our shlibpath_var if we have one.
- if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
- $ECHO "\
- # Add our own library path to $shlibpath_var
- $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
-
- # Some systems cannot cope with colon-terminated $shlibpath_var
- # The second colon is a workaround for a bug in BeOS R4 sed
- $shlibpath_var=\`\$ECHO \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
-
- export $shlibpath_var
-"
- fi
-
- # fixup the dll searchpath if we need to.
- if test -n "$dllsearchpath"; then
- $ECHO "\
- # Add the dll search path components to the executable PATH
- PATH=$dllsearchpath:\$PATH
-"
- fi
-
- $ECHO "\
- if test \"\$libtool_execute_magic\" != \"$magic\"; then
- # Run the actual program with our arguments.
-"
- case $host in
- # Backslashes separate directories on plain windows
- *-*-mingw | *-*-os2* | *-cegcc*)
- $ECHO "\
- exec \"\$progdir\\\\\$program\" \${1+\"\$@\"}
-"
- ;;
-
- *)
- $ECHO "\
- exec \"\$progdir/\$program\" \${1+\"\$@\"}
-"
- ;;
- esac
- $ECHO "\
- \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2
- exit 1
- fi
- else
- # The program doesn't exist.
- \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2
- \$ECHO \"This script is just a wrapper for \$program.\" 1>&2
- $ECHO \"See the $PACKAGE documentation for more information.\" 1>&2
- exit 1
- fi
-fi\
-"
-}
-# end: func_emit_wrapper_part2
-
-
-# func_emit_wrapper [arg=no]
-#
-# Emit a libtool wrapper script on stdout.
-# Don't directly open a file because we may want to
-# incorporate the script contents within a cygwin/mingw
-# wrapper executable. Must ONLY be called from within
-# func_mode_link because it depends on a number of variables
-# set therein.
-#
-# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR
-# variable will take. If 'yes', then the emitted script
-# will assume that the directory in which it is stored is
-# the $objdir directory. This is a cygwin/mingw-specific
-# behavior.
-func_emit_wrapper ()
-{
- func_emit_wrapper_arg1=no
- if test -n "$1" ; then
- func_emit_wrapper_arg1=$1
- fi
-
- # split this up so that func_emit_cwrapperexe_src
- # can call each part independently.
- func_emit_wrapper_part1 "${func_emit_wrapper_arg1}"
- func_emit_wrapper_part2 "${func_emit_wrapper_arg1}"
-}
-
-
-# func_to_host_path arg
-#
-# Convert paths to host format when used with build tools.
-# Intended for use with "native" mingw (where libtool itself
-# is running under the msys shell), or in the following cross-
-# build environments:
-# $build $host
-# mingw (msys) mingw [e.g. native]
-# cygwin mingw
-# *nix + wine mingw
-# where wine is equipped with the `winepath' executable.
-# In the native mingw case, the (msys) shell automatically
-# converts paths for any non-msys applications it launches,
-# but that facility isn't available from inside the cwrapper.
-# Similar accommodations are necessary for $host mingw and
-# $build cygwin. Calling this function does no harm for other
-# $host/$build combinations not listed above.
-#
-# ARG is the path (on $build) that should be converted to
-# the proper representation for $host. The result is stored
-# in $func_to_host_path_result.
-func_to_host_path ()
-{
- func_to_host_path_result="$1"
- if test -n "$1" ; then
- case $host in
- *mingw* )
- lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g'
- case $build in
- *mingw* ) # actually, msys
- # awkward: cmd appends spaces to result
- lt_sed_strip_trailing_spaces="s/[ ]*\$//"
- func_to_host_path_tmp1=`( cmd //c echo "$1" |\
- $SED -e "$lt_sed_strip_trailing_spaces" ) 2>/dev/null || echo ""`
- func_to_host_path_result=`echo "$func_to_host_path_tmp1" |\
- $SED -e "$lt_sed_naive_backslashify"`
- ;;
- *cygwin* )
- func_to_host_path_tmp1=`cygpath -w "$1"`
- func_to_host_path_result=`echo "$func_to_host_path_tmp1" |\
- $SED -e "$lt_sed_naive_backslashify"`
- ;;
- * )
- # Unfortunately, winepath does not exit with a non-zero
- # error code, so we are forced to check the contents of
- # stdout. On the other hand, if the command is not
- # found, the shell will set an exit code of 127 and print
- # *an error message* to stdout. So we must check for both
- # error code of zero AND non-empty stdout, which explains
- # the odd construction:
- func_to_host_path_tmp1=`winepath -w "$1" 2>/dev/null`
- if test "$?" -eq 0 && test -n "${func_to_host_path_tmp1}"; then
- func_to_host_path_result=`echo "$func_to_host_path_tmp1" |\
- $SED -e "$lt_sed_naive_backslashify"`
- else
- # Allow warning below.
- func_to_host_path_result=""
- fi
- ;;
- esac
- if test -z "$func_to_host_path_result" ; then
- func_error "Could not determine host path corresponding to"
- func_error " '$1'"
- func_error "Continuing, but uninstalled executables may not work."
- # Fallback:
- func_to_host_path_result="$1"
- fi
- ;;
- esac
- fi
-}
-# end: func_to_host_path
-
-# func_to_host_pathlist arg
-#
-# Convert pathlists to host format when used with build tools.
-# See func_to_host_path(), above. This function supports the
-# following $build/$host combinations (but does no harm for
-# combinations not listed here):
-# $build $host
-# mingw (msys) mingw [e.g. native]
-# cygwin mingw
-# *nix + wine mingw
-#
-# Path separators are also converted from $build format to
-# $host format. If ARG begins or ends with a path separator
-# character, it is preserved (but converted to $host format)
-# on output.
-#
-# ARG is a pathlist (on $build) that should be converted to
-# the proper representation on $host. The result is stored
-# in $func_to_host_pathlist_result.
-func_to_host_pathlist ()
-{
- func_to_host_pathlist_result="$1"
- if test -n "$1" ; then
- case $host in
- *mingw* )
- lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g'
- # Remove leading and trailing path separator characters from
- # ARG. msys behavior is inconsistent here, cygpath turns them
- # into '.;' and ';.', and winepath ignores them completely.
- func_to_host_pathlist_tmp2="$1"
- # Once set for this call, this variable should not be
- # reassigned. It is used in tha fallback case.
- func_to_host_pathlist_tmp1=`echo "$func_to_host_pathlist_tmp2" |\
- $SED -e 's|^:*||' -e 's|:*$||'`
- case $build in
- *mingw* ) # Actually, msys.
- # Awkward: cmd appends spaces to result.
- lt_sed_strip_trailing_spaces="s/[ ]*\$//"
- func_to_host_pathlist_tmp2=`( cmd //c echo "$func_to_host_pathlist_tmp1" |\
- $SED -e "$lt_sed_strip_trailing_spaces" ) 2>/dev/null || echo ""`
- func_to_host_pathlist_result=`echo "$func_to_host_pathlist_tmp2" |\
- $SED -e "$lt_sed_naive_backslashify"`
- ;;
- *cygwin* )
- func_to_host_pathlist_tmp2=`cygpath -w -p "$func_to_host_pathlist_tmp1"`
- func_to_host_pathlist_result=`echo "$func_to_host_pathlist_tmp2" |\
- $SED -e "$lt_sed_naive_backslashify"`
- ;;
- * )
- # unfortunately, winepath doesn't convert pathlists
- func_to_host_pathlist_result=""
- func_to_host_pathlist_oldIFS=$IFS
- IFS=:
- for func_to_host_pathlist_f in $func_to_host_pathlist_tmp1 ; do
- IFS=$func_to_host_pathlist_oldIFS
- if test -n "$func_to_host_pathlist_f" ; then
- func_to_host_path "$func_to_host_pathlist_f"
- if test -n "$func_to_host_path_result" ; then
- if test -z "$func_to_host_pathlist_result" ; then
- func_to_host_pathlist_result="$func_to_host_path_result"
- else
- func_to_host_pathlist_result="$func_to_host_pathlist_result;$func_to_host_path_result"
- fi
- fi
- fi
- IFS=:
- done
- IFS=$func_to_host_pathlist_oldIFS
- ;;
- esac
- if test -z "$func_to_host_pathlist_result" ; then
- func_error "Could not determine the host path(s) corresponding to"
- func_error " '$1'"
- func_error "Continuing, but uninstalled executables may not work."
- # Fallback. This may break if $1 contains DOS-style drive
- # specifications. The fix is not to complicate the expression
- # below, but for the user to provide a working wine installation
- # with winepath so that path translation in the cross-to-mingw
- # case works properly.
- lt_replace_pathsep_nix_to_dos="s|:|;|g"
- func_to_host_pathlist_result=`echo "$func_to_host_pathlist_tmp1" |\
- $SED -e "$lt_replace_pathsep_nix_to_dos"`
- fi
- # Now, add the leading and trailing path separators back
- case "$1" in
- :* ) func_to_host_pathlist_result=";$func_to_host_pathlist_result"
- ;;
- esac
- case "$1" in
- *: ) func_to_host_pathlist_result="$func_to_host_pathlist_result;"
- ;;
- esac
- ;;
- esac
- fi
-}
-# end: func_to_host_pathlist
-
-# func_emit_cwrapperexe_src
-# emit the source code for a wrapper executable on stdout
-# Must ONLY be called from within func_mode_link because
-# it depends on a number of variable set therein.
-func_emit_cwrapperexe_src ()
-{
- cat <<EOF
-
-/* $cwrappersource - temporary wrapper executable for $objdir/$outputname
- Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-
- The $output program cannot be directly executed until all the libtool
- libraries that it depends on are installed.
-
- This wrapper executable should never be moved out of the build directory.
- If it is, it will not operate correctly.
-
- Currently, it simply execs the wrapper *script* "$SHELL $output",
- but could eventually absorb all of the scripts functionality and
- exec $objdir/$outputname directly.
-*/
-EOF
- cat <<"EOF"
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef _MSC_VER
-# include <direct.h>
-# include <process.h>
-# include <io.h>
-# define setmode _setmode
-#else
-# include <unistd.h>
-# include <stdint.h>
-# ifdef __CYGWIN__
-# include <io.h>
-# define HAVE_SETENV
-# ifdef __STRICT_ANSI__
-char *realpath (const char *, char *);
-int putenv (char *);
-int setenv (const char *, const char *, int);
-# endif
-# endif
-#endif
-#include <malloc.h>
-#include <stdarg.h>
-#include <assert.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-
-#if defined(PATH_MAX)
-# define LT_PATHMAX PATH_MAX
-#elif defined(MAXPATHLEN)
-# define LT_PATHMAX MAXPATHLEN
-#else
-# define LT_PATHMAX 1024
-#endif
-
-#ifndef S_IXOTH
-# define S_IXOTH 0
-#endif
-#ifndef S_IXGRP
-# define S_IXGRP 0
-#endif
-
-#ifdef _MSC_VER
-# define S_IXUSR _S_IEXEC
-# define stat _stat
-# ifndef _INTPTR_T_DEFINED
-# define intptr_t int
-# endif
-#endif
-
-#ifndef DIR_SEPARATOR
-# define DIR_SEPARATOR '/'
-# define PATH_SEPARATOR ':'
-#endif
-
-#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \
- defined (__OS2__)
-# define HAVE_DOS_BASED_FILE_SYSTEM
-# define FOPEN_WB "wb"
-# ifndef DIR_SEPARATOR_2
-# define DIR_SEPARATOR_2 '\\'
-# endif
-# ifndef PATH_SEPARATOR_2
-# define PATH_SEPARATOR_2 ';'
-# endif
-#endif
-
-#ifndef DIR_SEPARATOR_2
-# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
-#else /* DIR_SEPARATOR_2 */
-# define IS_DIR_SEPARATOR(ch) \
- (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
-#endif /* DIR_SEPARATOR_2 */
-
-#ifndef PATH_SEPARATOR_2
-# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR)
-#else /* PATH_SEPARATOR_2 */
-# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2)
-#endif /* PATH_SEPARATOR_2 */
-
-#ifdef __CYGWIN__
-# define FOPEN_WB "wb"
-#endif
-
-#ifndef FOPEN_WB
-# define FOPEN_WB "w"
-#endif
-#ifndef _O_BINARY
-# define _O_BINARY 0
-#endif
-
-#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type)))
-#define XFREE(stale) do { \
- if (stale) { free ((void *) stale); stale = 0; } \
-} while (0)
-
-#undef LTWRAPPER_DEBUGPRINTF
-#if defined DEBUGWRAPPER
-# define LTWRAPPER_DEBUGPRINTF(args) ltwrapper_debugprintf args
-static void
-ltwrapper_debugprintf (const char *fmt, ...)
-{
- va_list args;
- va_start (args, fmt);
- (void) vfprintf (stderr, fmt, args);
- va_end (args);
-}
-#else
-# define LTWRAPPER_DEBUGPRINTF(args)
-#endif
-
-const char *program_name = NULL;
-
-void *xmalloc (size_t num);
-char *xstrdup (const char *string);
-const char *base_name (const char *name);
-char *find_executable (const char *wrapper);
-char *chase_symlinks (const char *pathspec);
-int make_executable (const char *path);
-int check_executable (const char *path);
-char *strendzap (char *str, const char *pat);
-void lt_fatal (const char *message, ...);
-void lt_setenv (const char *name, const char *value);
-char *lt_extend_str (const char *orig_value, const char *add, int to_end);
-void lt_opt_process_env_set (const char *arg);
-void lt_opt_process_env_prepend (const char *arg);
-void lt_opt_process_env_append (const char *arg);
-int lt_split_name_value (const char *arg, char** name, char** value);
-void lt_update_exe_path (const char *name, const char *value);
-void lt_update_lib_path (const char *name, const char *value);
-
-static const char *script_text_part1 =
-EOF
-
- func_emit_wrapper_part1 yes |
- $SED -e 's/\([\\"]\)/\\\1/g' \
- -e 's/^/ "/' -e 's/$/\\n"/'
- echo ";"
- cat <<EOF
-
-static const char *script_text_part2 =
-EOF
- func_emit_wrapper_part2 yes |
- $SED -e 's/\([\\"]\)/\\\1/g' \
- -e 's/^/ "/' -e 's/$/\\n"/'
- echo ";"
-
- cat <<EOF
-const char * MAGIC_EXE = "$magic_exe";
-const char * LIB_PATH_VARNAME = "$shlibpath_var";
-EOF
-
- if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
- func_to_host_pathlist "$temp_rpath"
- cat <<EOF
-const char * LIB_PATH_VALUE = "$func_to_host_pathlist_result";
-EOF
- else
- cat <<"EOF"
-const char * LIB_PATH_VALUE = "";
-EOF
- fi
-
- if test -n "$dllsearchpath"; then
- func_to_host_pathlist "$dllsearchpath:"
- cat <<EOF
-const char * EXE_PATH_VARNAME = "PATH";
-const char * EXE_PATH_VALUE = "$func_to_host_pathlist_result";
-EOF
- else
- cat <<"EOF"
-const char * EXE_PATH_VARNAME = "";
-const char * EXE_PATH_VALUE = "";
-EOF
- fi
-
- if test "$fast_install" = yes; then
- cat <<EOF
-const char * TARGET_PROGRAM_NAME = "lt-$outputname"; /* hopefully, no .exe */
-EOF
- else
- cat <<EOF
-const char * TARGET_PROGRAM_NAME = "$outputname"; /* hopefully, no .exe */
-EOF
- fi
-
-
- cat <<"EOF"
-
-#define LTWRAPPER_OPTION_PREFIX "--lt-"
-#define LTWRAPPER_OPTION_PREFIX_LENGTH 5
-
-static const size_t opt_prefix_len = LTWRAPPER_OPTION_PREFIX_LENGTH;
-static const char *ltwrapper_option_prefix = LTWRAPPER_OPTION_PREFIX;
-
-static const char *dumpscript_opt = LTWRAPPER_OPTION_PREFIX "dump-script";
-
-static const size_t env_set_opt_len = LTWRAPPER_OPTION_PREFIX_LENGTH + 7;
-static const char *env_set_opt = LTWRAPPER_OPTION_PREFIX "env-set";
- /* argument is putenv-style "foo=bar", value of foo is set to bar */
-
-static const size_t env_prepend_opt_len = LTWRAPPER_OPTION_PREFIX_LENGTH + 11;
-static const char *env_prepend_opt = LTWRAPPER_OPTION_PREFIX "env-prepend";
- /* argument is putenv-style "foo=bar", new value of foo is bar${foo} */
-
-static const size_t env_append_opt_len = LTWRAPPER_OPTION_PREFIX_LENGTH + 10;
-static const char *env_append_opt = LTWRAPPER_OPTION_PREFIX "env-append";
- /* argument is putenv-style "foo=bar", new value of foo is ${foo}bar */
-
-int
-main (int argc, char *argv[])
-{
- char **newargz;
- int newargc;
- char *tmp_pathspec;
- char *actual_cwrapper_path;
- char *actual_cwrapper_name;
- char *target_name;
- char *lt_argv_zero;
- intptr_t rval = 127;
-
- int i;
-
- program_name = (char *) xstrdup (base_name (argv[0]));
- LTWRAPPER_DEBUGPRINTF (("(main) argv[0] : %s\n", argv[0]));
- LTWRAPPER_DEBUGPRINTF (("(main) program_name : %s\n", program_name));
-
- /* very simple arg parsing; don't want to rely on getopt */
- for (i = 1; i < argc; i++)
- {
- if (strcmp (argv[i], dumpscript_opt) == 0)
- {
-EOF
- case "$host" in
- *mingw* | *cygwin* )
- # make stdout use "unix" line endings
- echo " setmode(1,_O_BINARY);"
- ;;
- esac
-
- cat <<"EOF"
- printf ("%s", script_text_part1);
- printf ("%s", script_text_part2);
- return 0;
- }
- }
-
- newargz = XMALLOC (char *, argc + 1);
- tmp_pathspec = find_executable (argv[0]);
- if (tmp_pathspec == NULL)
- lt_fatal ("Couldn't find %s", argv[0]);
- LTWRAPPER_DEBUGPRINTF (("(main) found exe (before symlink chase) at : %s\n",
- tmp_pathspec));
-
- actual_cwrapper_path = chase_symlinks (tmp_pathspec);
- LTWRAPPER_DEBUGPRINTF (("(main) found exe (after symlink chase) at : %s\n",
- actual_cwrapper_path));
- XFREE (tmp_pathspec);
-
- actual_cwrapper_name = xstrdup( base_name (actual_cwrapper_path));
- strendzap (actual_cwrapper_path, actual_cwrapper_name);
-
- /* wrapper name transforms */
- strendzap (actual_cwrapper_name, ".exe");
- tmp_pathspec = lt_extend_str (actual_cwrapper_name, ".exe", 1);
- XFREE (actual_cwrapper_name);
- actual_cwrapper_name = tmp_pathspec;
- tmp_pathspec = 0;
-
- /* target_name transforms -- use actual target program name; might have lt- prefix */
- target_name = xstrdup (base_name (TARGET_PROGRAM_NAME));
- strendzap (target_name, ".exe");
- tmp_pathspec = lt_extend_str (target_name, ".exe", 1);
- XFREE (target_name);
- target_name = tmp_pathspec;
- tmp_pathspec = 0;
-
- LTWRAPPER_DEBUGPRINTF (("(main) libtool target name: %s\n",
- target_name));
-EOF
-
- cat <<EOF
- newargz[0] =
- XMALLOC (char, (strlen (actual_cwrapper_path) +
- strlen ("$objdir") + 1 + strlen (actual_cwrapper_name) + 1));
- strcpy (newargz[0], actual_cwrapper_path);
- strcat (newargz[0], "$objdir");
- strcat (newargz[0], "/");
-EOF
-
- cat <<"EOF"
- /* stop here, and copy so we don't have to do this twice */
- tmp_pathspec = xstrdup (newargz[0]);
-
- /* do NOT want the lt- prefix here, so use actual_cwrapper_name */
- strcat (newargz[0], actual_cwrapper_name);
-
- /* DO want the lt- prefix here if it exists, so use target_name */
- lt_argv_zero = lt_extend_str (tmp_pathspec, target_name, 1);
- XFREE (tmp_pathspec);
- tmp_pathspec = NULL;
-EOF
-
- case $host_os in
- mingw*)
- cat <<"EOF"
- {
- char* p;
- while ((p = strchr (newargz[0], '\\')) != NULL)
- {
- *p = '/';
- }
- while ((p = strchr (lt_argv_zero, '\\')) != NULL)
- {
- *p = '/';
- }
- }
-EOF
- ;;
- esac
-
- cat <<"EOF"
- XFREE (target_name);
- XFREE (actual_cwrapper_path);
- XFREE (actual_cwrapper_name);
-
- lt_setenv ("BIN_SH", "xpg4"); /* for Tru64 */
- lt_setenv ("DUALCASE", "1"); /* for MSK sh */
- lt_update_lib_path (LIB_PATH_VARNAME, LIB_PATH_VALUE);
- lt_update_exe_path (EXE_PATH_VARNAME, EXE_PATH_VALUE);
-
- newargc=0;
- for (i = 1; i < argc; i++)
- {
- if (strncmp (argv[i], env_set_opt, env_set_opt_len) == 0)
- {
- if (argv[i][env_set_opt_len] == '=')
- {
- const char *p = argv[i] + env_set_opt_len + 1;
- lt_opt_process_env_set (p);
- }
- else if (argv[i][env_set_opt_len] == '\0' && i + 1 < argc)
- {
- lt_opt_process_env_set (argv[++i]); /* don't copy */
- }
- else
- lt_fatal ("%s missing required argument", env_set_opt);
- continue;
- }
- if (strncmp (argv[i], env_prepend_opt, env_prepend_opt_len) == 0)
- {
- if (argv[i][env_prepend_opt_len] == '=')
- {
- const char *p = argv[i] + env_prepend_opt_len + 1;
- lt_opt_process_env_prepend (p);
- }
- else if (argv[i][env_prepend_opt_len] == '\0' && i + 1 < argc)
- {
- lt_opt_process_env_prepend (argv[++i]); /* don't copy */
- }
- else
- lt_fatal ("%s missing required argument", env_prepend_opt);
- continue;
- }
- if (strncmp (argv[i], env_append_opt, env_append_opt_len) == 0)
- {
- if (argv[i][env_append_opt_len] == '=')
- {
- const char *p = argv[i] + env_append_opt_len + 1;
- lt_opt_process_env_append (p);
- }
- else if (argv[i][env_append_opt_len] == '\0' && i + 1 < argc)
- {
- lt_opt_process_env_append (argv[++i]); /* don't copy */
- }
- else
- lt_fatal ("%s missing required argument", env_append_opt);
- continue;
- }
- if (strncmp (argv[i], ltwrapper_option_prefix, opt_prefix_len) == 0)
- {
- /* however, if there is an option in the LTWRAPPER_OPTION_PREFIX
- namespace, but it is not one of the ones we know about and
- have already dealt with, above (inluding dump-script), then
- report an error. Otherwise, targets might begin to believe
- they are allowed to use options in the LTWRAPPER_OPTION_PREFIX
- namespace. The first time any user complains about this, we'll
- need to make LTWRAPPER_OPTION_PREFIX a configure-time option
- or a configure.ac-settable value.
- */
- lt_fatal ("Unrecognized option in %s namespace: '%s'",
- ltwrapper_option_prefix, argv[i]);
- }
- /* otherwise ... */
- newargz[++newargc] = xstrdup (argv[i]);
- }
- newargz[++newargc] = NULL;
-
- LTWRAPPER_DEBUGPRINTF (("(main) lt_argv_zero : %s\n", (lt_argv_zero ? lt_argv_zero : "<NULL>")));
- for (i = 0; i < newargc; i++)
- {
- LTWRAPPER_DEBUGPRINTF (("(main) newargz[%d] : %s\n", i, (newargz[i] ? newargz[i] : "<NULL>")));
- }
-
-EOF
-
- case $host_os in
- mingw*)
- cat <<"EOF"
- /* execv doesn't actually work on mingw as expected on unix */
- rval = _spawnv (_P_WAIT, lt_argv_zero, (const char * const *) newargz);
- if (rval == -1)
- {
- /* failed to start process */
- LTWRAPPER_DEBUGPRINTF (("(main) failed to launch target \"%s\": errno = %d\n", lt_argv_zero, errno));
- return 127;
- }
- return rval;
-EOF
- ;;
- *)
- cat <<"EOF"
- execv (lt_argv_zero, newargz);
- return rval; /* =127, but avoids unused variable warning */
-EOF
- ;;
- esac
-
- cat <<"EOF"
-}
-
-void *
-xmalloc (size_t num)
-{
- void *p = (void *) malloc (num);
- if (!p)
- lt_fatal ("Memory exhausted");
-
- return p;
-}
-
-char *
-xstrdup (const char *string)
-{
- return string ? strcpy ((char *) xmalloc (strlen (string) + 1),
- string) : NULL;
-}
-
-const char *
-base_name (const char *name)
-{
- const char *base;
-
-#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
- /* Skip over the disk name in MSDOS pathnames. */
- if (isalpha ((unsigned char) name[0]) && name[1] == ':')
- name += 2;
-#endif
-
- for (base = name; *name; name++)
- if (IS_DIR_SEPARATOR (*name))
- base = name + 1;
- return base;
-}
-
-int
-check_executable (const char *path)
-{
- struct stat st;
-
- LTWRAPPER_DEBUGPRINTF (("(check_executable) : %s\n",
- path ? (*path ? path : "EMPTY!") : "NULL!"));
- if ((!path) || (!*path))
- return 0;
-
- if ((stat (path, &st) >= 0)
- && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
- return 1;
- else
- return 0;
-}
-
-int
-make_executable (const char *path)
-{
- int rval = 0;
- struct stat st;
-
- LTWRAPPER_DEBUGPRINTF (("(make_executable) : %s\n",
- path ? (*path ? path : "EMPTY!") : "NULL!"));
- if ((!path) || (!*path))
- return 0;
-
- if (stat (path, &st) >= 0)
- {
- rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR);
- }
- return rval;
-}
-
-/* Searches for the full path of the wrapper. Returns
- newly allocated full path name if found, NULL otherwise
- Does not chase symlinks, even on platforms that support them.
-*/
-char *
-find_executable (const char *wrapper)
-{
- int has_slash = 0;
- const char *p;
- const char *p_next;
- /* static buffer for getcwd */
- char tmp[LT_PATHMAX + 1];
- int tmp_len;
- char *concat_name;
-
- LTWRAPPER_DEBUGPRINTF (("(find_executable) : %s\n",
- wrapper ? (*wrapper ? wrapper : "EMPTY!") : "NULL!"));
-
- if ((wrapper == NULL) || (*wrapper == '\0'))
- return NULL;
-
- /* Absolute path? */
-#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
- if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':')
- {
- concat_name = xstrdup (wrapper);
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- }
- else
- {
-#endif
- if (IS_DIR_SEPARATOR (wrapper[0]))
- {
- concat_name = xstrdup (wrapper);
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- }
-#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
- }
-#endif
-
- for (p = wrapper; *p; p++)
- if (*p == '/')
- {
- has_slash = 1;
- break;
- }
- if (!has_slash)
- {
- /* no slashes; search PATH */
- const char *path = getenv ("PATH");
- if (path != NULL)
- {
- for (p = path; *p; p = p_next)
- {
- const char *q;
- size_t p_len;
- for (q = p; *q; q++)
- if (IS_PATH_SEPARATOR (*q))
- break;
- p_len = q - p;
- p_next = (*q == '\0' ? q : q + 1);
- if (p_len == 0)
- {
- /* empty path: current directory */
- if (getcwd (tmp, LT_PATHMAX) == NULL)
- lt_fatal ("getcwd failed");
- tmp_len = strlen (tmp);
- concat_name =
- XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
- memcpy (concat_name, tmp, tmp_len);
- concat_name[tmp_len] = '/';
- strcpy (concat_name + tmp_len + 1, wrapper);
- }
- else
- {
- concat_name =
- XMALLOC (char, p_len + 1 + strlen (wrapper) + 1);
- memcpy (concat_name, p, p_len);
- concat_name[p_len] = '/';
- strcpy (concat_name + p_len + 1, wrapper);
- }
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- }
- }
- /* not found in PATH; assume curdir */
- }
- /* Relative path | not found in path: prepend cwd */
- if (getcwd (tmp, LT_PATHMAX) == NULL)
- lt_fatal ("getcwd failed");
- tmp_len = strlen (tmp);
- concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
- memcpy (concat_name, tmp, tmp_len);
- concat_name[tmp_len] = '/';
- strcpy (concat_name + tmp_len + 1, wrapper);
-
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- return NULL;
-}
-
-char *
-chase_symlinks (const char *pathspec)
-{
-#ifndef S_ISLNK
- return xstrdup (pathspec);
-#else
- char buf[LT_PATHMAX];
- struct stat s;
- char *tmp_pathspec = xstrdup (pathspec);
- char *p;
- int has_symlinks = 0;
- while (strlen (tmp_pathspec) && !has_symlinks)
- {
- LTWRAPPER_DEBUGPRINTF (("checking path component for symlinks: %s\n",
- tmp_pathspec));
- if (lstat (tmp_pathspec, &s) == 0)
- {
- if (S_ISLNK (s.st_mode) != 0)
- {
- has_symlinks = 1;
- break;
- }
-
- /* search backwards for last DIR_SEPARATOR */
- p = tmp_pathspec + strlen (tmp_pathspec) - 1;
- while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
- p--;
- if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
- {
- /* no more DIR_SEPARATORS left */
- break;
- }
- *p = '\0';
- }
- else
- {
- char *errstr = strerror (errno);
- lt_fatal ("Error accessing file %s (%s)", tmp_pathspec, errstr);
- }
- }
- XFREE (tmp_pathspec);
-
- if (!has_symlinks)
- {
- return xstrdup (pathspec);
- }
-
- tmp_pathspec = realpath (pathspec, buf);
- if (tmp_pathspec == 0)
- {
- lt_fatal ("Could not follow symlinks for %s", pathspec);
- }
- return xstrdup (tmp_pathspec);
-#endif
-}
-
-char *
-strendzap (char *str, const char *pat)
-{
- size_t len, patlen;
-
- assert (str != NULL);
- assert (pat != NULL);
-
- len = strlen (str);
- patlen = strlen (pat);
-
- if (patlen <= len)
- {
- str += len - patlen;
- if (strcmp (str, pat) == 0)
- *str = '\0';
- }
- return str;
-}
-
-static void
-lt_error_core (int exit_status, const char *mode,
- const char *message, va_list ap)
-{
- fprintf (stderr, "%s: %s: ", program_name, mode);
- vfprintf (stderr, message, ap);
- fprintf (stderr, ".\n");
-
- if (exit_status >= 0)
- exit (exit_status);
-}
-
-void
-lt_fatal (const char *message, ...)
-{
- va_list ap;
- va_start (ap, message);
- lt_error_core (EXIT_FAILURE, "FATAL", message, ap);
- va_end (ap);
-}
-
-void
-lt_setenv (const char *name, const char *value)
-{
- LTWRAPPER_DEBUGPRINTF (("(lt_setenv) setting '%s' to '%s'\n",
- (name ? name : "<NULL>"),
- (value ? value : "<NULL>")));
- {
-#ifdef HAVE_SETENV
- /* always make a copy, for consistency with !HAVE_SETENV */
- char *str = xstrdup (value);
- setenv (name, str, 1);
-#else
- int len = strlen (name) + 1 + strlen (value) + 1;
- char *str = XMALLOC (char, len);
- sprintf (str, "%s=%s", name, value);
- if (putenv (str) != EXIT_SUCCESS)
- {
- XFREE (str);
- }
-#endif
- }
-}
-
-char *
-lt_extend_str (const char *orig_value, const char *add, int to_end)
-{
- char *new_value;
- if (orig_value && *orig_value)
- {
- int orig_value_len = strlen (orig_value);
- int add_len = strlen (add);
- new_value = XMALLOC (char, add_len + orig_value_len + 1);
- if (to_end)
- {
- strcpy (new_value, orig_value);
- strcpy (new_value + orig_value_len, add);
- }
- else
- {
- strcpy (new_value, add);
- strcpy (new_value + add_len, orig_value);
- }
- }
- else
- {
- new_value = xstrdup (add);
- }
- return new_value;
-}
-
-int
-lt_split_name_value (const char *arg, char** name, char** value)
-{
- const char *p;
- int len;
- if (!arg || !*arg)
- return 1;
-
- p = strchr (arg, (int)'=');
-
- if (!p)
- return 1;
-
- *value = xstrdup (++p);
-
- len = strlen (arg) - strlen (*value);
- *name = XMALLOC (char, len);
- strncpy (*name, arg, len-1);
- (*name)[len - 1] = '\0';
-
- return 0;
-}
-
-void
-lt_opt_process_env_set (const char *arg)
-{
- char *name = NULL;
- char *value = NULL;
-
- if (lt_split_name_value (arg, &name, &value) != 0)
- {
- XFREE (name);
- XFREE (value);
- lt_fatal ("bad argument for %s: '%s'", env_set_opt, arg);
- }
-
- lt_setenv (name, value);
- XFREE (name);
- XFREE (value);
-}
-
-void
-lt_opt_process_env_prepend (const char *arg)
-{
- char *name = NULL;
- char *value = NULL;
- char *new_value = NULL;
-
- if (lt_split_name_value (arg, &name, &value) != 0)
- {
- XFREE (name);
- XFREE (value);
- lt_fatal ("bad argument for %s: '%s'", env_prepend_opt, arg);
- }
-
- new_value = lt_extend_str (getenv (name), value, 0);
- lt_setenv (name, new_value);
- XFREE (new_value);
- XFREE (name);
- XFREE (value);
-}
-
-void
-lt_opt_process_env_append (const char *arg)
-{
- char *name = NULL;
- char *value = NULL;
- char *new_value = NULL;
-
- if (lt_split_name_value (arg, &name, &value) != 0)
- {
- XFREE (name);
- XFREE (value);
- lt_fatal ("bad argument for %s: '%s'", env_append_opt, arg);
- }
-
- new_value = lt_extend_str (getenv (name), value, 1);
- lt_setenv (name, new_value);
- XFREE (new_value);
- XFREE (name);
- XFREE (value);
-}
-
-void
-lt_update_exe_path (const char *name, const char *value)
-{
- LTWRAPPER_DEBUGPRINTF (("(lt_update_exe_path) modifying '%s' by prepending '%s'\n",
- (name ? name : "<NULL>"),
- (value ? value : "<NULL>")));
-
- if (name && *name && value && *value)
- {
- char *new_value = lt_extend_str (getenv (name), value, 0);
- /* some systems can't cope with a ':'-terminated path #' */
- int len = strlen (new_value);
- while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1]))
- {
- new_value[len-1] = '\0';
- }
- lt_setenv (name, new_value);
- XFREE (new_value);
- }
-}
-
-void
-lt_update_lib_path (const char *name, const char *value)
-{
- LTWRAPPER_DEBUGPRINTF (("(lt_update_lib_path) modifying '%s' by prepending '%s'\n",
- (name ? name : "<NULL>"),
- (value ? value : "<NULL>")));
-
- if (name && *name && value && *value)
- {
- char *new_value = lt_extend_str (getenv (name), value, 0);
- lt_setenv (name, new_value);
- XFREE (new_value);
- }
-}
-
-
-EOF
-}
-# end: func_emit_cwrapperexe_src
-
-# func_mode_link arg...
-func_mode_link ()
-{
- $opt_debug
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
- # It is impossible to link a dll without this setting, and
- # we shouldn't force the makefile maintainer to figure out
- # which system we are compiling for in order to pass an extra
- # flag for every libtool invocation.
- # allow_undefined=no
-
- # FIXME: Unfortunately, there are problems with the above when trying
- # to make a dll which has undefined symbols, in which case not
- # even a static library is built. For now, we need to specify
- # -no-undefined on the libtool link line when we can be certain
- # that all symbols are satisfied, otherwise we get a static library.
- allow_undefined=yes
- ;;
- *)
- allow_undefined=yes
- ;;
- esac
- libtool_args=$nonopt
- base_compile="$nonopt $@"
- compile_command=$nonopt
- finalize_command=$nonopt
-
- compile_rpath=
- finalize_rpath=
- compile_shlibpath=
- finalize_shlibpath=
- convenience=
- old_convenience=
- deplibs=
- old_deplibs=
- compiler_flags=
- linker_flags=
- dllsearchpath=
- lib_search_path=`pwd`
- inst_prefix_dir=
- new_inherited_linker_flags=
-
- avoid_version=no
- dlfiles=
- dlprefiles=
- dlself=no
- export_dynamic=no
- export_symbols=
- export_symbols_regex=
- generated=
- libobjs=
- ltlibs=
- module=no
- no_install=no
- objs=
- non_pic_objects=
- precious_files_regex=
- prefer_static_libs=no
- preload=no
- prev=
- prevarg=
- release=
- rpath=
- xrpath=
- perm_rpath=
- temp_rpath=
- thread_safe=no
- vinfo=
- vinfo_number=no
- weak_libs=
- single_module="${wl}-single_module"
- func_infer_tag $base_compile
-
- # We need to know -static, to get the right output filenames.
- for arg
- do
- case $arg in
- -shared)
- test "$build_libtool_libs" != yes && \
- func_fatal_configuration "can not build a shared library"
- build_old_libs=no
- break
- ;;
- -all-static | -static | -static-libtool-libs)
- case $arg in
- -all-static)
- if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
- func_warning "complete static linking is impossible in this configuration"
- fi
- if test -n "$link_static_flag"; then
- dlopen_self=$dlopen_self_static
- fi
- prefer_static_libs=yes
- ;;
- -static)
- if test -z "$pic_flag" && test -n "$link_static_flag"; then
- dlopen_self=$dlopen_self_static
- fi
- prefer_static_libs=built
- ;;
- -static-libtool-libs)
- if test -z "$pic_flag" && test -n "$link_static_flag"; then
- dlopen_self=$dlopen_self_static
- fi
- prefer_static_libs=yes
- ;;
- esac
- build_libtool_libs=no
- build_old_libs=yes
- break
- ;;
- esac
- done
-
- # See if our shared archives depend on static archives.
- test -n "$old_archive_from_new_cmds" && build_old_libs=yes
-
- # Go through the arguments, transforming them on the way.
- while test "$#" -gt 0; do
- arg="$1"
- shift
- func_quote_for_eval "$arg"
- qarg=$func_quote_for_eval_unquoted_result
- func_append libtool_args " $func_quote_for_eval_result"
-
- # If the previous option needs an argument, assign it.
- if test -n "$prev"; then
- case $prev in
- output)
- func_append compile_command " @OUTPUT@"
- func_append finalize_command " @OUTPUT@"
- ;;
- esac
-
- case $prev in
- dlfiles|dlprefiles)
- if test "$preload" = no; then
- # Add the symbol object into the linking commands.
- func_append compile_command " @SYMFILE@"
- func_append finalize_command " @SYMFILE@"
- preload=yes
- fi
- case $arg in
- *.la | *.lo) ;; # We handle these cases below.
- force)
- if test "$dlself" = no; then
- dlself=needless
- export_dynamic=yes
- fi
- prev=
- continue
- ;;
- self)
- if test "$prev" = dlprefiles; then
- dlself=yes
- elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
- dlself=yes
- else
- dlself=needless
- export_dynamic=yes
- fi
- prev=
- continue
- ;;
- *)
- if test "$prev" = dlfiles; then
- dlfiles="$dlfiles $arg"
- else
- dlprefiles="$dlprefiles $arg"
- fi
- prev=
- continue
- ;;
- esac
- ;;
- expsyms)
- export_symbols="$arg"
- test -f "$arg" \
- || func_fatal_error "symbol file \`$arg' does not exist"
- prev=
- continue
- ;;
- expsyms_regex)
- export_symbols_regex="$arg"
- prev=
- continue
- ;;
- framework)
- case $host in
- *-*-darwin*)
- case "$deplibs " in
- *" $qarg.ltframework "*) ;;
- *) deplibs="$deplibs $qarg.ltframework" # this is fixed later
- ;;
- esac
- ;;
- esac
- prev=
- continue
- ;;
- inst_prefix)
- inst_prefix_dir="$arg"
- prev=
- continue
- ;;
- objectlist)
- if test -f "$arg"; then
- save_arg=$arg
- moreargs=
- for fil in `cat "$save_arg"`
- do
-# moreargs="$moreargs $fil"
- arg=$fil
- # A libtool-controlled object.
-
- # Check to see that this really is a libtool object.
- if func_lalib_unsafe_p "$arg"; then
- pic_object=
- non_pic_object=
-
- # Read the .lo file
- func_source "$arg"
-
- if test -z "$pic_object" ||
- test -z "$non_pic_object" ||
- test "$pic_object" = none &&
- test "$non_pic_object" = none; then
- func_fatal_error "cannot find name of object for \`$arg'"
- fi
-
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- if test "$pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- pic_object="$xdir$pic_object"
-
- if test "$prev" = dlfiles; then
- if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
- dlfiles="$dlfiles $pic_object"
- prev=
- continue
- else
- # If libtool objects are unsupported, then we need to preload.
- prev=dlprefiles
- fi
- fi
-
- # CHECK ME: I think I busted this. -Ossama
- if test "$prev" = dlprefiles; then
- # Preload the old-style object.
- dlprefiles="$dlprefiles $pic_object"
- prev=
- fi
-
- # A PIC object.
- func_append libobjs " $pic_object"
- arg="$pic_object"
- fi
-
- # Non-PIC object.
- if test "$non_pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- non_pic_object="$xdir$non_pic_object"
-
- # A standard non-PIC object
- func_append non_pic_objects " $non_pic_object"
- if test -z "$pic_object" || test "$pic_object" = none ; then
- arg="$non_pic_object"
- fi
- else
- # If the PIC object exists, use it instead.
- # $xdir was prepended to $pic_object above.
- non_pic_object="$pic_object"
- func_append non_pic_objects " $non_pic_object"
- fi
- else
- # Only an error if not doing a dry-run.
- if $opt_dry_run; then
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- func_lo2o "$arg"
- pic_object=$xdir$objdir/$func_lo2o_result
- non_pic_object=$xdir$func_lo2o_result
- func_append libobjs " $pic_object"
- func_append non_pic_objects " $non_pic_object"
- else
- func_fatal_error "\`$arg' is not a valid libtool object"
- fi
- fi
- done
- else
- func_fatal_error "link input file \`$arg' does not exist"
- fi
- arg=$save_arg
- prev=
- continue
- ;;
- precious_regex)
- precious_files_regex="$arg"
- prev=
- continue
- ;;
- release)
- release="-$arg"
- prev=
- continue
- ;;
- rpath | xrpath)
- # We need an absolute path.
- case $arg in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- func_fatal_error "only absolute run-paths are allowed"
- ;;
- esac
- if test "$prev" = rpath; then
- case "$rpath " in
- *" $arg "*) ;;
- *) rpath="$rpath $arg" ;;
- esac
- else
- case "$xrpath " in
- *" $arg "*) ;;
- *) xrpath="$xrpath $arg" ;;
- esac
- fi
- prev=
- continue
- ;;
- shrext)
- shrext_cmds="$arg"
- prev=
- continue
- ;;
- weak)
- weak_libs="$weak_libs $arg"
- prev=
- continue
- ;;
- xcclinker)
- linker_flags="$linker_flags $qarg"
- compiler_flags="$compiler_flags $qarg"
- prev=
- func_append compile_command " $qarg"
- func_append finalize_command " $qarg"
- continue
- ;;
- xcompiler)
- compiler_flags="$compiler_flags $qarg"
- prev=
- func_append compile_command " $qarg"
- func_append finalize_command " $qarg"
- continue
- ;;
- xlinker)
- linker_flags="$linker_flags $qarg"
- compiler_flags="$compiler_flags $wl$qarg"
- prev=
- func_append compile_command " $wl$qarg"
- func_append finalize_command " $wl$qarg"
- continue
- ;;
- *)
- eval "$prev=\"\$arg\""
- prev=
- continue
- ;;
- esac
- fi # test -n "$prev"
-
- prevarg="$arg"
-
- case $arg in
- -all-static)
- if test -n "$link_static_flag"; then
- # See comment for -static flag below, for more details.
- func_append compile_command " $link_static_flag"
- func_append finalize_command " $link_static_flag"
- fi
- continue
- ;;
-
- -allow-undefined)
- # FIXME: remove this flag sometime in the future.
- func_fatal_error "\`-allow-undefined' must not be used because it is the default"
- ;;
-
- -avoid-version)
- avoid_version=yes
- continue
- ;;
-
- -dlopen)
- prev=dlfiles
- continue
- ;;
-
- -dlpreopen)
- prev=dlprefiles
- continue
- ;;
-
- -export-dynamic)
- export_dynamic=yes
- continue
- ;;
-
- -export-symbols | -export-symbols-regex)
- if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
- func_fatal_error "more than one -exported-symbols argument is not allowed"
- fi
- if test "X$arg" = "X-export-symbols"; then
- prev=expsyms
- else
- prev=expsyms_regex
- fi
- continue
- ;;
-
- -framework)
- prev=framework
- continue
- ;;
-
- -inst-prefix-dir)
- prev=inst_prefix
- continue
- ;;
-
- # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
- # so, if we see these flags be careful not to treat them like -L
- -L[A-Z][A-Z]*:*)
- case $with_gcc/$host in
- no/*-*-irix* | /*-*-irix*)
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- ;;
- esac
- continue
- ;;
-
- -L*)
- func_stripname '-L' '' "$arg"
- dir=$func_stripname_result
- if test -z "$dir"; then
- if test "$#" -gt 0; then
- func_fatal_error "require no space between \`-L' and \`$1'"
- else
- func_fatal_error "need path for \`-L' option"
- fi
- fi
- # We need an absolute path.
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- absdir=`cd "$dir" && pwd`
- test -z "$absdir" && \
- func_fatal_error "cannot determine absolute directory name of \`$dir'"
- dir="$absdir"
- ;;
- esac
- case "$deplibs " in
- *" -L$dir "*) ;;
- *)
- deplibs="$deplibs -L$dir"
- lib_search_path="$lib_search_path $dir"
- ;;
- esac
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
- testbindir=`$ECHO "X$dir" | $Xsed -e 's*/lib$*/bin*'`
- case :$dllsearchpath: in
- *":$dir:"*) ;;
- ::) dllsearchpath=$dir;;
- *) dllsearchpath="$dllsearchpath:$dir";;
- esac
- case :$dllsearchpath: in
- *":$testbindir:"*) ;;
- ::) dllsearchpath=$testbindir;;
- *) dllsearchpath="$dllsearchpath:$testbindir";;
- esac
- ;;
- esac
- continue
- ;;
-
- -l*)
- if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc*)
- # These systems don't actually have a C or math library (as such)
- continue
- ;;
- *-*-os2*)
- # These systems don't actually have a C library (as such)
- test "X$arg" = "X-lc" && continue
- ;;
- *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
- # Do not include libc due to us having libc/libc_r.
- test "X$arg" = "X-lc" && continue
- ;;
- *-*-rhapsody* | *-*-darwin1.[012])
- # Rhapsody C and math libraries are in the System framework
- deplibs="$deplibs System.ltframework"
- continue
- ;;
- *-*-sco3.2v5* | *-*-sco5v6*)
- # Causes problems with __ctype
- test "X$arg" = "X-lc" && continue
- ;;
- *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
- # Compiler inserts libc in the correct place for threads to work
- test "X$arg" = "X-lc" && continue
- ;;
- esac
- elif test "X$arg" = "X-lc_r"; then
- case $host in
- *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
- # Do not include libc_r directly, use -pthread flag.
- continue
- ;;
- esac
- fi
- deplibs="$deplibs $arg"
- continue
- ;;
-
- -module)
- module=yes
- continue
- ;;
-
- # Tru64 UNIX uses -model [arg] to determine the layout of C++
- # classes, name mangling, and exception handling.
- # Darwin uses the -arch flag to determine output architecture.
- -model|-arch|-isysroot)
- compiler_flags="$compiler_flags $arg"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- prev=xcompiler
- continue
- ;;
-
- -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads)
- compiler_flags="$compiler_flags $arg"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- case "$new_inherited_linker_flags " in
- *" $arg "*) ;;
- * ) new_inherited_linker_flags="$new_inherited_linker_flags $arg" ;;
- esac
- continue
- ;;
-
- -multi_module)
- single_module="${wl}-multi_module"
- continue
- ;;
-
- -no-fast-install)
- fast_install=no
- continue
- ;;
-
- -no-install)
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*)
- # The PATH hackery in wrapper scripts is required on Windows
- # and Darwin in order for the loader to find any dlls it needs.
- func_warning "\`-no-install' is ignored for $host"
- func_warning "assuming \`-no-fast-install' instead"
- fast_install=no
- ;;
- *) no_install=yes ;;
- esac
- continue
- ;;
-
- -no-undefined)
- allow_undefined=no
- continue
- ;;
-
- -objectlist)
- prev=objectlist
- continue
- ;;
-
- -o) prev=output ;;
-
- -precious-files-regex)
- prev=precious_regex
- continue
- ;;
-
- -release)
- prev=release
- continue
- ;;
-
- -rpath)
- prev=rpath
- continue
- ;;
-
- -R)
- prev=xrpath
- continue
- ;;
-
- -R*)
- func_stripname '-R' '' "$arg"
- dir=$func_stripname_result
- # We need an absolute path.
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- func_fatal_error "only absolute run-paths are allowed"
- ;;
- esac
- case "$xrpath " in
- *" $dir "*) ;;
- *) xrpath="$xrpath $dir" ;;
- esac
- continue
- ;;
-
- -shared)
- # The effects of -shared are defined in a previous loop.
- continue
- ;;
-
- -shrext)
- prev=shrext
- continue
- ;;
-
- -static | -static-libtool-libs)
- # The effects of -static are defined in a previous loop.
- # We used to do the same as -all-static on platforms that
- # didn't have a PIC flag, but the assumption that the effects
- # would be equivalent was wrong. It would break on at least
- # Digital Unix and AIX.
- continue
- ;;
-
- -thread-safe)
- thread_safe=yes
- continue
- ;;
-
- -version-info)
- prev=vinfo
- continue
- ;;
-
- -version-number)
- prev=vinfo
- vinfo_number=yes
- continue
- ;;
-
- -weak)
- prev=weak
- continue
- ;;
-
- -Wc,*)
- func_stripname '-Wc,' '' "$arg"
- args=$func_stripname_result
- arg=
- save_ifs="$IFS"; IFS=','
- for flag in $args; do
- IFS="$save_ifs"
- func_quote_for_eval "$flag"
- arg="$arg $wl$func_quote_for_eval_result"
- compiler_flags="$compiler_flags $func_quote_for_eval_result"
- done
- IFS="$save_ifs"
- func_stripname ' ' '' "$arg"
- arg=$func_stripname_result
- ;;
-
- -Wl,*)
- func_stripname '-Wl,' '' "$arg"
- args=$func_stripname_result
- arg=
- save_ifs="$IFS"; IFS=','
- for flag in $args; do
- IFS="$save_ifs"
- func_quote_for_eval "$flag"
- arg="$arg $wl$func_quote_for_eval_result"
- compiler_flags="$compiler_flags $wl$func_quote_for_eval_result"
- linker_flags="$linker_flags $func_quote_for_eval_result"
- done
- IFS="$save_ifs"
- func_stripname ' ' '' "$arg"
- arg=$func_stripname_result
- ;;
-
- -Xcompiler)
- prev=xcompiler
- continue
- ;;
-
- -Xlinker)
- prev=xlinker
- continue
- ;;
-
- -XCClinker)
- prev=xcclinker
- continue
- ;;
-
- # -msg_* for osf cc
- -msg_*)
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- ;;
-
- # -64, -mips[0-9] enable 64-bit mode on the SGI compiler
- # -r[0-9][0-9]* specifies the processor on the SGI compiler
- # -xarch=*, -xtarget=* enable 64-bit mode on the Sun compiler
- # +DA*, +DD* enable 64-bit mode on the HP compiler
- # -q* pass through compiler args for the IBM compiler
- # -m*, -t[45]*, -txscale* pass through architecture-specific
- # compiler args for GCC
- # -F/path gives path to uninstalled frameworks, gcc on darwin
- # -p, -pg, --coverage, -fprofile-* pass through profiling flag for GCC
- # @file GCC response files
- -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \
- -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*)
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- compiler_flags="$compiler_flags $arg"
- continue
- ;;
-
- # Some other compiler flag.
- -* | +*)
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- ;;
-
- *.$objext)
- # A standard object.
- objs="$objs $arg"
- ;;
-
- *.lo)
- # A libtool-controlled object.
-
- # Check to see that this really is a libtool object.
- if func_lalib_unsafe_p "$arg"; then
- pic_object=
- non_pic_object=
-
- # Read the .lo file
- func_source "$arg"
-
- if test -z "$pic_object" ||
- test -z "$non_pic_object" ||
- test "$pic_object" = none &&
- test "$non_pic_object" = none; then
- func_fatal_error "cannot find name of object for \`$arg'"
- fi
-
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- if test "$pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- pic_object="$xdir$pic_object"
-
- if test "$prev" = dlfiles; then
- if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
- dlfiles="$dlfiles $pic_object"
- prev=
- continue
- else
- # If libtool objects are unsupported, then we need to preload.
- prev=dlprefiles
- fi
- fi
-
- # CHECK ME: I think I busted this. -Ossama
- if test "$prev" = dlprefiles; then
- # Preload the old-style object.
- dlprefiles="$dlprefiles $pic_object"
- prev=
- fi
-
- # A PIC object.
- func_append libobjs " $pic_object"
- arg="$pic_object"
- fi
-
- # Non-PIC object.
- if test "$non_pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- non_pic_object="$xdir$non_pic_object"
-
- # A standard non-PIC object
- func_append non_pic_objects " $non_pic_object"
- if test -z "$pic_object" || test "$pic_object" = none ; then
- arg="$non_pic_object"
- fi
- else
- # If the PIC object exists, use it instead.
- # $xdir was prepended to $pic_object above.
- non_pic_object="$pic_object"
- func_append non_pic_objects " $non_pic_object"
- fi
- else
- # Only an error if not doing a dry-run.
- if $opt_dry_run; then
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- func_lo2o "$arg"
- pic_object=$xdir$objdir/$func_lo2o_result
- non_pic_object=$xdir$func_lo2o_result
- func_append libobjs " $pic_object"
- func_append non_pic_objects " $non_pic_object"
- else
- func_fatal_error "\`$arg' is not a valid libtool object"
- fi
- fi
- ;;
-
- *.$libext)
- # An archive.
- deplibs="$deplibs $arg"
- old_deplibs="$old_deplibs $arg"
- continue
- ;;
-
- *.la)
- # A libtool-controlled library.
-
- if test "$prev" = dlfiles; then
- # This library was specified with -dlopen.
- dlfiles="$dlfiles $arg"
- prev=
- elif test "$prev" = dlprefiles; then
- # The library was specified with -dlpreopen.
- dlprefiles="$dlprefiles $arg"
- prev=
- else
- deplibs="$deplibs $arg"
- fi
- continue
- ;;
-
- # Some other compiler argument.
- *)
- # Unknown arguments in both finalize_command and compile_command need
- # to be aesthetically quoted because they are evaled later.
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- ;;
- esac # arg
-
- # Now actually substitute the argument into the commands.
- if test -n "$arg"; then
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- fi
- done # argument parsing loop
-
- test -n "$prev" && \
- func_fatal_help "the \`$prevarg' option requires an argument"
-
- if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
- eval arg=\"$export_dynamic_flag_spec\"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- fi
-
- oldlibs=
- # calculate the name of the file, without its directory
- func_basename "$output"
- outputname="$func_basename_result"
- libobjs_save="$libobjs"
-
- if test -n "$shlibpath_var"; then
- # get the directories listed in $shlibpath_var
- eval shlib_search_path=\`\$ECHO \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
- else
- shlib_search_path=
- fi
- eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
- eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
-
- func_dirname "$output" "/" ""
- output_objdir="$func_dirname_result$objdir"
- # Create the object directory.
- func_mkdir_p "$output_objdir"
-
- # Determine the type of output
- case $output in
- "")
- func_fatal_help "you must specify an output file"
- ;;
- *.$libext) linkmode=oldlib ;;
- *.lo | *.$objext) linkmode=obj ;;
- *.la) linkmode=lib ;;
- *) linkmode=prog ;; # Anything else should be a program.
- esac
-
- specialdeplibs=
-
- libs=
- # Find all interdependent deplibs by searching for libraries
- # that are linked more than once (e.g. -la -lb -la)
- for deplib in $deplibs; do
- if $opt_duplicate_deps ; then
- case "$libs " in
- *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
- esac
- fi
- libs="$libs $deplib"
- done
-
- if test "$linkmode" = lib; then
- libs="$predeps $libs $compiler_lib_search_path $postdeps"
-
- # Compute libraries that are listed more than once in $predeps
- # $postdeps and mark them as special (i.e., whose duplicates are
- # not to be eliminated).
- pre_post_deps=
- if $opt_duplicate_compiler_generated_deps; then
- for pre_post_dep in $predeps $postdeps; do
- case "$pre_post_deps " in
- *" $pre_post_dep "*) specialdeplibs="$specialdeplibs $pre_post_deps" ;;
- esac
- pre_post_deps="$pre_post_deps $pre_post_dep"
- done
- fi
- pre_post_deps=
- fi
-
- deplibs=
- newdependency_libs=
- newlib_search_path=
- need_relink=no # whether we're linking any uninstalled libtool libraries
- notinst_deplibs= # not-installed libtool libraries
- notinst_path= # paths that contain not-installed libtool libraries
-
- case $linkmode in
- lib)
- passes="conv dlpreopen link"
- for file in $dlfiles $dlprefiles; do
- case $file in
- *.la) ;;
- *)
- func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file"
- ;;
- esac
- done
- ;;
- prog)
- compile_deplibs=
- finalize_deplibs=
- alldeplibs=no
- newdlfiles=
- newdlprefiles=
- passes="conv scan dlopen dlpreopen link"
- ;;
- *) passes="conv"
- ;;
- esac
-
- for pass in $passes; do
- # The preopen pass in lib mode reverses $deplibs; put it back here
- # so that -L comes before libs that need it for instance...
- if test "$linkmode,$pass" = "lib,link"; then
- ## FIXME: Find the place where the list is rebuilt in the wrong
- ## order, and fix it there properly
- tmp_deplibs=
- for deplib in $deplibs; do
- tmp_deplibs="$deplib $tmp_deplibs"
- done
- deplibs="$tmp_deplibs"
- fi
-
- if test "$linkmode,$pass" = "lib,link" ||
- test "$linkmode,$pass" = "prog,scan"; then
- libs="$deplibs"
- deplibs=
- fi
- if test "$linkmode" = prog; then
- case $pass in
- dlopen) libs="$dlfiles" ;;
- dlpreopen) libs="$dlprefiles" ;;
- link)
- libs="$deplibs %DEPLIBS%"
- test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs"
- ;;
- esac
- fi
- if test "$linkmode,$pass" = "lib,dlpreopen"; then
- # Collect and forward deplibs of preopened libtool libs
- for lib in $dlprefiles; do
- # Ignore non-libtool-libs
- dependency_libs=
- case $lib in
- *.la) func_source "$lib" ;;
- esac
-
- # Collect preopened libtool deplibs, except any this library
- # has declared as weak libs
- for deplib in $dependency_libs; do
- deplib_base=`$ECHO "X$deplib" | $Xsed -e "$basename"`
- case " $weak_libs " in
- *" $deplib_base "*) ;;
- *) deplibs="$deplibs $deplib" ;;
- esac
- done
- done
- libs="$dlprefiles"
- fi
- if test "$pass" = dlopen; then
- # Collect dlpreopened libraries
- save_deplibs="$deplibs"
- deplibs=
- fi
-
- for deplib in $libs; do
- lib=
- found=no
- case $deplib in
- -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads)
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- compiler_flags="$compiler_flags $deplib"
- if test "$linkmode" = lib ; then
- case "$new_inherited_linker_flags " in
- *" $deplib "*) ;;
- * ) new_inherited_linker_flags="$new_inherited_linker_flags $deplib" ;;
- esac
- fi
- fi
- continue
- ;;
- -l*)
- if test "$linkmode" != lib && test "$linkmode" != prog; then
- func_warning "\`-l' is ignored for archives/objects"
- continue
- fi
- func_stripname '-l' '' "$deplib"
- name=$func_stripname_result
- if test "$linkmode" = lib; then
- searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path"
- else
- searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path"
- fi
- for searchdir in $searchdirs; do
- for search_ext in .la $std_shrext .so .a; do
- # Search the libtool library
- lib="$searchdir/lib${name}${search_ext}"
- if test -f "$lib"; then
- if test "$search_ext" = ".la"; then
- found=yes
- else
- found=no
- fi
- break 2
- fi
- done
- done
- if test "$found" != yes; then
- # deplib doesn't seem to be a libtool library
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- deplibs="$deplib $deplibs"
- test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
- fi
- continue
- else # deplib is a libtool library
- # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib,
- # We need to do some special things here, and not later.
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $deplib "*)
- if func_lalib_p "$lib"; then
- library_names=
- old_library=
- func_source "$lib"
- for l in $old_library $library_names; do
- ll="$l"
- done
- if test "X$ll" = "X$old_library" ; then # only static version available
- found=no
- func_dirname "$lib" "" "."
- ladir="$func_dirname_result"
- lib=$ladir/$old_library
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- deplibs="$deplib $deplibs"
- test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
- fi
- continue
- fi
- fi
- ;;
- *) ;;
- esac
- fi
- fi
- ;; # -l
- *.ltframework)
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- deplibs="$deplib $deplibs"
- if test "$linkmode" = lib ; then
- case "$new_inherited_linker_flags " in
- *" $deplib "*) ;;
- * ) new_inherited_linker_flags="$new_inherited_linker_flags $deplib" ;;
- esac
- fi
- fi
- continue
- ;;
- -L*)
- case $linkmode in
- lib)
- deplibs="$deplib $deplibs"
- test "$pass" = conv && continue
- newdependency_libs="$deplib $newdependency_libs"
- func_stripname '-L' '' "$deplib"
- newlib_search_path="$newlib_search_path $func_stripname_result"
- ;;
- prog)
- if test "$pass" = conv; then
- deplibs="$deplib $deplibs"
- continue
- fi
- if test "$pass" = scan; then
- deplibs="$deplib $deplibs"
- else
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- fi
- func_stripname '-L' '' "$deplib"
- newlib_search_path="$newlib_search_path $func_stripname_result"
- ;;
- *)
- func_warning "\`-L' is ignored for archives/objects"
- ;;
- esac # linkmode
- continue
- ;; # -L
- -R*)
- if test "$pass" = link; then
- func_stripname '-R' '' "$deplib"
- dir=$func_stripname_result
- # Make sure the xrpath contains only unique directories.
- case "$xrpath " in
- *" $dir "*) ;;
- *) xrpath="$xrpath $dir" ;;
- esac
- fi
- deplibs="$deplib $deplibs"
- continue
- ;;
- *.la) lib="$deplib" ;;
- *.$libext)
- if test "$pass" = conv; then
- deplibs="$deplib $deplibs"
- continue
- fi
- case $linkmode in
- lib)
- # Linking convenience modules into shared libraries is allowed,
- # but linking other static libraries is non-portable.
- case " $dlpreconveniencelibs " in
- *" $deplib "*) ;;
- *)
- valid_a_lib=no
- case $deplibs_check_method in
- match_pattern*)
- set dummy $deplibs_check_method; shift
- match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
- if eval "\$ECHO \"X$deplib\"" 2>/dev/null | $Xsed -e 10q \
- | $EGREP "$match_pattern_regex" > /dev/null; then
- valid_a_lib=yes
- fi
- ;;
- pass_all)
- valid_a_lib=yes
- ;;
- esac
- if test "$valid_a_lib" != yes; then
- $ECHO
- $ECHO "*** Warning: Trying to link with static lib archive $deplib."
- $ECHO "*** I have the capability to make that library automatically link in when"
- $ECHO "*** you link to this library. But I can only do this if you have a"
- $ECHO "*** shared version of the library, which you do not appear to have"
- $ECHO "*** because the file extensions .$libext of this argument makes me believe"
- $ECHO "*** that it is just a static archive that I should not use here."
- else
- $ECHO
- $ECHO "*** Warning: Linking the shared library $output against the"
- $ECHO "*** static library $deplib is not portable!"
- deplibs="$deplib $deplibs"
- fi
- ;;
- esac
- continue
- ;;
- prog)
- if test "$pass" != link; then
- deplibs="$deplib $deplibs"
- else
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- fi
- continue
- ;;
- esac # linkmode
- ;; # *.$libext
- *.lo | *.$objext)
- if test "$pass" = conv; then
- deplibs="$deplib $deplibs"
- elif test "$linkmode" = prog; then
- if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
- # If there is no dlopen support or we're linking statically,
- # we need to preload.
- newdlprefiles="$newdlprefiles $deplib"
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- newdlfiles="$newdlfiles $deplib"
- fi
- fi
- continue
- ;;
- %DEPLIBS%)
- alldeplibs=yes
- continue
- ;;
- esac # case $deplib
-
- if test "$found" = yes || test -f "$lib"; then :
- else
- func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'"
- fi
-
- # Check to see that this really is a libtool archive.
- func_lalib_unsafe_p "$lib" \
- || func_fatal_error "\`$lib' is not a valid libtool archive"
-
- func_dirname "$lib" "" "."
- ladir="$func_dirname_result"
-
- dlname=
- dlopen=
- dlpreopen=
- libdir=
- library_names=
- old_library=
- inherited_linker_flags=
- # If the library was installed with an old release of libtool,
- # it will not redefine variables installed, or shouldnotlink
- installed=yes
- shouldnotlink=no
- avoidtemprpath=
-
-
- # Read the .la file
- func_source "$lib"
-
- # Convert "-framework foo" to "foo.ltframework"
- if test -n "$inherited_linker_flags"; then
- tmp_inherited_linker_flags=`$ECHO "X$inherited_linker_flags" | $Xsed -e 's/-framework \([^ $]*\)/\1.ltframework/g'`
- for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do
- case " $new_inherited_linker_flags " in
- *" $tmp_inherited_linker_flag "*) ;;
- *) new_inherited_linker_flags="$new_inherited_linker_flags $tmp_inherited_linker_flag";;
- esac
- done
- fi
- dependency_libs=`$ECHO "X $dependency_libs" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- if test "$linkmode,$pass" = "lib,link" ||
- test "$linkmode,$pass" = "prog,scan" ||
- { test "$linkmode" != prog && test "$linkmode" != lib; }; then
- test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
- test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
- fi
-
- if test "$pass" = conv; then
- # Only check for convenience libraries
- deplibs="$lib $deplibs"
- if test -z "$libdir"; then
- if test -z "$old_library"; then
- func_fatal_error "cannot find name of link library for \`$lib'"
- fi
- # It is a libtool convenience library, so add in its objects.
- convenience="$convenience $ladir/$objdir/$old_library"
- old_convenience="$old_convenience $ladir/$objdir/$old_library"
- tmp_libs=
- for deplib in $dependency_libs; do
- deplibs="$deplib $deplibs"
- if $opt_duplicate_deps ; then
- case "$tmp_libs " in
- *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
- esac
- fi
- tmp_libs="$tmp_libs $deplib"
- done
- elif test "$linkmode" != prog && test "$linkmode" != lib; then
- func_fatal_error "\`$lib' is not a convenience library"
- fi
- continue
- fi # $pass = conv
-
-
- # Get the name of the library we link against.
- linklib=
- for l in $old_library $library_names; do
- linklib="$l"
- done
- if test -z "$linklib"; then
- func_fatal_error "cannot find name of link library for \`$lib'"
- fi
-
- # This library was specified with -dlopen.
- if test "$pass" = dlopen; then
- if test -z "$libdir"; then
- func_fatal_error "cannot -dlopen a convenience library: \`$lib'"
- fi
- if test -z "$dlname" ||
- test "$dlopen_support" != yes ||
- test "$build_libtool_libs" = no; then
- # If there is no dlname, no dlopen support or we're linking
- # statically, we need to preload. We also need to preload any
- # dependent libraries so libltdl's deplib preloader doesn't
- # bomb out in the load deplibs phase.
- dlprefiles="$dlprefiles $lib $dependency_libs"
- else
- newdlfiles="$newdlfiles $lib"
- fi
- continue
- fi # $pass = dlopen
-
- # We need an absolute path.
- case $ladir in
- [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
- *)
- abs_ladir=`cd "$ladir" && pwd`
- if test -z "$abs_ladir"; then
- func_warning "cannot determine absolute directory name of \`$ladir'"
- func_warning "passing it literally to the linker, although it might fail"
- abs_ladir="$ladir"
- fi
- ;;
- esac
- func_basename "$lib"
- laname="$func_basename_result"
-
- # Find the relevant object directory and library name.
- if test "X$installed" = Xyes; then
- if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
- func_warning "library \`$lib' was moved."
- dir="$ladir"
- absdir="$abs_ladir"
- libdir="$abs_ladir"
- else
- dir="$libdir"
- absdir="$libdir"
- fi
- test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes
- else
- if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then
- dir="$ladir"
- absdir="$abs_ladir"
- # Remove this search path later
- notinst_path="$notinst_path $abs_ladir"
- else
- dir="$ladir/$objdir"
- absdir="$abs_ladir/$objdir"
- # Remove this search path later
- notinst_path="$notinst_path $abs_ladir"
- fi
- fi # $installed = yes
- func_stripname 'lib' '.la' "$laname"
- name=$func_stripname_result
-
- # This library was specified with -dlpreopen.
- if test "$pass" = dlpreopen; then
- if test -z "$libdir" && test "$linkmode" = prog; then
- func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'"
- fi
- # Prefer using a static library (so that no silly _DYNAMIC symbols
- # are required to link).
- if test -n "$old_library"; then
- newdlprefiles="$newdlprefiles $dir/$old_library"
- # Keep a list of preopened convenience libraries to check
- # that they are being used correctly in the link pass.
- test -z "$libdir" && \
- dlpreconveniencelibs="$dlpreconveniencelibs $dir/$old_library"
- # Otherwise, use the dlname, so that lt_dlopen finds it.
- elif test -n "$dlname"; then
- newdlprefiles="$newdlprefiles $dir/$dlname"
- else
- newdlprefiles="$newdlprefiles $dir/$linklib"
- fi
- fi # $pass = dlpreopen
-
- if test -z "$libdir"; then
- # Link the convenience library
- if test "$linkmode" = lib; then
- deplibs="$dir/$old_library $deplibs"
- elif test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$dir/$old_library $compile_deplibs"
- finalize_deplibs="$dir/$old_library $finalize_deplibs"
- else
- deplibs="$lib $deplibs" # used for prog,scan pass
- fi
- continue
- fi
-
-
- if test "$linkmode" = prog && test "$pass" != link; then
- newlib_search_path="$newlib_search_path $ladir"
- deplibs="$lib $deplibs"
-
- linkalldeplibs=no
- if test "$link_all_deplibs" != no || test -z "$library_names" ||
- test "$build_libtool_libs" = no; then
- linkalldeplibs=yes
- fi
-
- tmp_libs=
- for deplib in $dependency_libs; do
- case $deplib in
- -L*) func_stripname '-L' '' "$deplib"
- newlib_search_path="$newlib_search_path $func_stripname_result"
- ;;
- esac
- # Need to link against all dependency_libs?
- if test "$linkalldeplibs" = yes; then
- deplibs="$deplib $deplibs"
- else
- # Need to hardcode shared library paths
- # or/and link against static libraries
- newdependency_libs="$deplib $newdependency_libs"
- fi
- if $opt_duplicate_deps ; then
- case "$tmp_libs " in
- *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
- esac
- fi
- tmp_libs="$tmp_libs $deplib"
- done # for deplib
- continue
- fi # $linkmode = prog...
-
- if test "$linkmode,$pass" = "prog,link"; then
- if test -n "$library_names" &&
- { { test "$prefer_static_libs" = no ||
- test "$prefer_static_libs,$installed" = "built,yes"; } ||
- test -z "$old_library"; }; then
- # We need to hardcode the library path
- if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then
- # Make sure the rpath contains only unique directories.
- case "$temp_rpath:" in
- *"$absdir:"*) ;;
- *) temp_rpath="$temp_rpath$absdir:" ;;
- esac
- fi
-
- # Hardcode the library path.
- # Skip directories that are in the system default run-time
- # search path.
- case " $sys_lib_dlsearch_path " in
- *" $absdir "*) ;;
- *)
- case "$compile_rpath " in
- *" $absdir "*) ;;
- *) compile_rpath="$compile_rpath $absdir"
- esac
- ;;
- esac
- case " $sys_lib_dlsearch_path " in
- *" $libdir "*) ;;
- *)
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) finalize_rpath="$finalize_rpath $libdir"
- esac
- ;;
- esac
- fi # $linkmode,$pass = prog,link...
-
- if test "$alldeplibs" = yes &&
- { test "$deplibs_check_method" = pass_all ||
- { test "$build_libtool_libs" = yes &&
- test -n "$library_names"; }; }; then
- # We only need to search for static libraries
- continue
- fi
- fi
-
- link_static=no # Whether the deplib will be linked statically
- use_static_libs=$prefer_static_libs
- if test "$use_static_libs" = built && test "$installed" = yes; then
- use_static_libs=no
- fi
- if test -n "$library_names" &&
- { test "$use_static_libs" = no || test -z "$old_library"; }; then
- case $host in
- *cygwin* | *mingw* | *cegcc*)
- # No point in relinking DLLs because paths are not encoded
- notinst_deplibs="$notinst_deplibs $lib"
- need_relink=no
- ;;
- *)
- if test "$installed" = no; then
- notinst_deplibs="$notinst_deplibs $lib"
- need_relink=yes
- fi
- ;;
- esac
- # This is a shared library
-
- # Warn about portability, can't link against -module's on some
- # systems (darwin). Don't bleat about dlopened modules though!
- dlopenmodule=""
- for dlpremoduletest in $dlprefiles; do
- if test "X$dlpremoduletest" = "X$lib"; then
- dlopenmodule="$dlpremoduletest"
- break
- fi
- done
- if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then
- $ECHO
- if test "$linkmode" = prog; then
- $ECHO "*** Warning: Linking the executable $output against the loadable module"
- else
- $ECHO "*** Warning: Linking the shared library $output against the loadable module"
- fi
- $ECHO "*** $linklib is not portable!"
- fi
- if test "$linkmode" = lib &&
- test "$hardcode_into_libs" = yes; then
- # Hardcode the library path.
- # Skip directories that are in the system default run-time
- # search path.
- case " $sys_lib_dlsearch_path " in
- *" $absdir "*) ;;
- *)
- case "$compile_rpath " in
- *" $absdir "*) ;;
- *) compile_rpath="$compile_rpath $absdir"
- esac
- ;;
- esac
- case " $sys_lib_dlsearch_path " in
- *" $libdir "*) ;;
- *)
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) finalize_rpath="$finalize_rpath $libdir"
- esac
- ;;
- esac
- fi
-
- if test -n "$old_archive_from_expsyms_cmds"; then
- # figure out the soname
- set dummy $library_names
- shift
- realname="$1"
- shift
- libname=`eval "\\$ECHO \"$libname_spec\""`
- # use dlname if we got it. it's perfectly good, no?
- if test -n "$dlname"; then
- soname="$dlname"
- elif test -n "$soname_spec"; then
- # bleh windows
- case $host in
- *cygwin* | mingw* | *cegcc*)
- func_arith $current - $age
- major=$func_arith_result
- versuffix="-$major"
- ;;
- esac
- eval soname=\"$soname_spec\"
- else
- soname="$realname"
- fi
-
- # Make a new name for the extract_expsyms_cmds to use
- soroot="$soname"
- func_basename "$soroot"
- soname="$func_basename_result"
- func_stripname 'lib' '.dll' "$soname"
- newlib=libimp-$func_stripname_result.a
-
- # If the library has no export list, then create one now
- if test -f "$output_objdir/$soname-def"; then :
- else
- func_verbose "extracting exported symbol list from \`$soname'"
- func_execute_cmds "$extract_expsyms_cmds" 'exit $?'
- fi
-
- # Create $newlib
- if test -f "$output_objdir/$newlib"; then :; else
- func_verbose "generating import library for \`$soname'"
- func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?'
- fi
- # make sure the library variables are pointing to the new library
- dir=$output_objdir
- linklib=$newlib
- fi # test -n "$old_archive_from_expsyms_cmds"
-
- if test "$linkmode" = prog || test "$mode" != relink; then
- add_shlibpath=
- add_dir=
- add=
- lib_linked=yes
- case $hardcode_action in
- immediate | unsupported)
- if test "$hardcode_direct" = no; then
- add="$dir/$linklib"
- case $host in
- *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;;
- *-*-sysv4*uw2*) add_dir="-L$dir" ;;
- *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \
- *-*-unixware7*) add_dir="-L$dir" ;;
- *-*-darwin* )
- # if the lib is a (non-dlopened) module then we can not
- # link against it, someone is ignoring the earlier warnings
- if /usr/bin/file -L $add 2> /dev/null |
- $GREP ": [^:]* bundle" >/dev/null ; then
- if test "X$dlopenmodule" != "X$lib"; then
- $ECHO "*** Warning: lib $linklib is a module, not a shared library"
- if test -z "$old_library" ; then
- $ECHO
- $ECHO "*** And there doesn't seem to be a static archive available"
- $ECHO "*** The link will probably fail, sorry"
- else
- add="$dir/$old_library"
- fi
- elif test -n "$old_library"; then
- add="$dir/$old_library"
- fi
- fi
- esac
- elif test "$hardcode_minus_L" = no; then
- case $host in
- *-*-sunos*) add_shlibpath="$dir" ;;
- esac
- add_dir="-L$dir"
- add="-l$name"
- elif test "$hardcode_shlibpath_var" = no; then
- add_shlibpath="$dir"
- add="-l$name"
- else
- lib_linked=no
- fi
- ;;
- relink)
- if test "$hardcode_direct" = yes &&
- test "$hardcode_direct_absolute" = no; then
- add="$dir/$linklib"
- elif test "$hardcode_minus_L" = yes; then
- add_dir="-L$dir"
- # Try looking first in the location we're being installed to.
- if test -n "$inst_prefix_dir"; then
- case $libdir in
- [\\/]*)
- add_dir="$add_dir -L$inst_prefix_dir$libdir"
- ;;
- esac
- fi
- add="-l$name"
- elif test "$hardcode_shlibpath_var" = yes; then
- add_shlibpath="$dir"
- add="-l$name"
- else
- lib_linked=no
- fi
- ;;
- *) lib_linked=no ;;
- esac
-
- if test "$lib_linked" != yes; then
- func_fatal_configuration "unsupported hardcode properties"
- fi
-
- if test -n "$add_shlibpath"; then
- case :$compile_shlibpath: in
- *":$add_shlibpath:"*) ;;
- *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
- esac
- fi
- if test "$linkmode" = prog; then
- test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
- test -n "$add" && compile_deplibs="$add $compile_deplibs"
- else
- test -n "$add_dir" && deplibs="$add_dir $deplibs"
- test -n "$add" && deplibs="$add $deplibs"
- if test "$hardcode_direct" != yes &&
- test "$hardcode_minus_L" != yes &&
- test "$hardcode_shlibpath_var" = yes; then
- case :$finalize_shlibpath: in
- *":$libdir:"*) ;;
- *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
- esac
- fi
- fi
- fi
-
- if test "$linkmode" = prog || test "$mode" = relink; then
- add_shlibpath=
- add_dir=
- add=
- # Finalize command for both is simple: just hardcode it.
- if test "$hardcode_direct" = yes &&
- test "$hardcode_direct_absolute" = no; then
- add="$libdir/$linklib"
- elif test "$hardcode_minus_L" = yes; then
- add_dir="-L$libdir"
- add="-l$name"
- elif test "$hardcode_shlibpath_var" = yes; then
- case :$finalize_shlibpath: in
- *":$libdir:"*) ;;
- *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
- esac
- add="-l$name"
- elif test "$hardcode_automatic" = yes; then
- if test -n "$inst_prefix_dir" &&
- test -f "$inst_prefix_dir$libdir/$linklib" ; then
- add="$inst_prefix_dir$libdir/$linklib"
- else
- add="$libdir/$linklib"
- fi
- else
- # We cannot seem to hardcode it, guess we'll fake it.
- add_dir="-L$libdir"
- # Try looking first in the location we're being installed to.
- if test -n "$inst_prefix_dir"; then
- case $libdir in
- [\\/]*)
- add_dir="$add_dir -L$inst_prefix_dir$libdir"
- ;;
- esac
- fi
- add="-l$name"
- fi
-
- if test "$linkmode" = prog; then
- test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
- test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
- else
- test -n "$add_dir" && deplibs="$add_dir $deplibs"
- test -n "$add" && deplibs="$add $deplibs"
- fi
- fi
- elif test "$linkmode" = prog; then
- # Here we assume that one of hardcode_direct or hardcode_minus_L
- # is not unsupported. This is valid on all known static and
- # shared platforms.
- if test "$hardcode_direct" != unsupported; then
- test -n "$old_library" && linklib="$old_library"
- compile_deplibs="$dir/$linklib $compile_deplibs"
- finalize_deplibs="$dir/$linklib $finalize_deplibs"
- else
- compile_deplibs="-l$name -L$dir $compile_deplibs"
- finalize_deplibs="-l$name -L$dir $finalize_deplibs"
- fi
- elif test "$build_libtool_libs" = yes; then
- # Not a shared library
- if test "$deplibs_check_method" != pass_all; then
- # We're trying link a shared library against a static one
- # but the system doesn't support it.
-
- # Just print a warning and add the library to dependency_libs so
- # that the program can be linked against the static library.
- $ECHO
- $ECHO "*** Warning: This system can not link to static lib archive $lib."
- $ECHO "*** I have the capability to make that library automatically link in when"
- $ECHO "*** you link to this library. But I can only do this if you have a"
- $ECHO "*** shared version of the library, which you do not appear to have."
- if test "$module" = yes; then
- $ECHO "*** But as you try to build a module library, libtool will still create "
- $ECHO "*** a static module, that should work as long as the dlopening application"
- $ECHO "*** is linked with the -dlopen flag to resolve symbols at runtime."
- if test -z "$global_symbol_pipe"; then
- $ECHO
- $ECHO "*** However, this would only work if libtool was able to extract symbol"
- $ECHO "*** lists from a program, using \`nm' or equivalent, but libtool could"
- $ECHO "*** not find such a program. So, this module is probably useless."
- $ECHO "*** \`nm' from GNU binutils and a full rebuild may help."
- fi
- if test "$build_old_libs" = no; then
- build_libtool_libs=module
- build_old_libs=yes
- else
- build_libtool_libs=no
- fi
- fi
- else
- deplibs="$dir/$old_library $deplibs"
- link_static=yes
- fi
- fi # link shared/static library?
-
- if test "$linkmode" = lib; then
- if test -n "$dependency_libs" &&
- { test "$hardcode_into_libs" != yes ||
- test "$build_old_libs" = yes ||
- test "$link_static" = yes; }; then
- # Extract -R from dependency_libs
- temp_deplibs=
- for libdir in $dependency_libs; do
- case $libdir in
- -R*) func_stripname '-R' '' "$libdir"
- temp_xrpath=$func_stripname_result
- case " $xrpath " in
- *" $temp_xrpath "*) ;;
- *) xrpath="$xrpath $temp_xrpath";;
- esac;;
- *) temp_deplibs="$temp_deplibs $libdir";;
- esac
- done
- dependency_libs="$temp_deplibs"
- fi
-
- newlib_search_path="$newlib_search_path $absdir"
- # Link against this library
- test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
- # ... and its dependency_libs
- tmp_libs=
- for deplib in $dependency_libs; do
- newdependency_libs="$deplib $newdependency_libs"
- if $opt_duplicate_deps ; then
- case "$tmp_libs " in
- *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
- esac
- fi
- tmp_libs="$tmp_libs $deplib"
- done
-
- if test "$link_all_deplibs" != no; then
- # Add the search paths of all dependency libraries
- for deplib in $dependency_libs; do
- path=
- case $deplib in
- -L*) path="$deplib" ;;
- *.la)
- func_dirname "$deplib" "" "."
- dir="$func_dirname_result"
- # We need an absolute path.
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
- *)
- absdir=`cd "$dir" && pwd`
- if test -z "$absdir"; then
- func_warning "cannot determine absolute directory name of \`$dir'"
- absdir="$dir"
- fi
- ;;
- esac
- if $GREP "^installed=no" $deplib > /dev/null; then
- case $host in
- *-*-darwin*)
- depdepl=
- eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib`
- if test -n "$deplibrary_names" ; then
- for tmp in $deplibrary_names ; do
- depdepl=$tmp
- done
- if test -f "$absdir/$objdir/$depdepl" ; then
- depdepl="$absdir/$objdir/$depdepl"
- darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'`
- if test -z "$darwin_install_name"; then
- darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'`
- fi
- compiler_flags="$compiler_flags ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}"
- linker_flags="$linker_flags -dylib_file ${darwin_install_name}:${depdepl}"
- path=
- fi
- fi
- ;;
- *)
- path="-L$absdir/$objdir"
- ;;
- esac
- else
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
- test -z "$libdir" && \
- func_fatal_error "\`$deplib' is not a valid libtool archive"
- test "$absdir" != "$libdir" && \
- func_warning "\`$deplib' seems to be moved"
-
- path="-L$absdir"
- fi
- ;;
- esac
- case " $deplibs " in
- *" $path "*) ;;
- *) deplibs="$path $deplibs" ;;
- esac
- done
- fi # link_all_deplibs != no
- fi # linkmode = lib
- done # for deplib in $libs
- if test "$pass" = link; then
- if test "$linkmode" = "prog"; then
- compile_deplibs="$new_inherited_linker_flags $compile_deplibs"
- finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs"
- else
- compiler_flags="$compiler_flags "`$ECHO "X $new_inherited_linker_flags" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- fi
- fi
- dependency_libs="$newdependency_libs"
- if test "$pass" = dlpreopen; then
- # Link the dlpreopened libraries before other libraries
- for deplib in $save_deplibs; do
- deplibs="$deplib $deplibs"
- done
- fi
- if test "$pass" != dlopen; then
- if test "$pass" != conv; then
- # Make sure lib_search_path contains only unique directories.
- lib_search_path=
- for dir in $newlib_search_path; do
- case "$lib_search_path " in
- *" $dir "*) ;;
- *) lib_search_path="$lib_search_path $dir" ;;
- esac
- done
- newlib_search_path=
- fi
-
- if test "$linkmode,$pass" != "prog,link"; then
- vars="deplibs"
- else
- vars="compile_deplibs finalize_deplibs"
- fi
- for var in $vars dependency_libs; do
- # Add libraries to $var in reverse order
- eval tmp_libs=\"\$$var\"
- new_libs=
- for deplib in $tmp_libs; do
- # FIXME: Pedantically, this is the right thing to do, so
- # that some nasty dependency loop isn't accidentally
- # broken:
- #new_libs="$deplib $new_libs"
- # Pragmatically, this seems to cause very few problems in
- # practice:
- case $deplib in
- -L*) new_libs="$deplib $new_libs" ;;
- -R*) ;;
- *)
- # And here is the reason: when a library appears more
- # than once as an explicit dependence of a library, or
- # is implicitly linked in more than once by the
- # compiler, it is considered special, and multiple
- # occurrences thereof are not removed. Compare this
- # with having the same library being listed as a
- # dependency of multiple other libraries: in this case,
- # we know (pedantically, we assume) the library does not
- # need to be listed more than once, so we keep only the
- # last copy. This is not always right, but it is rare
- # enough that we require users that really mean to play
- # such unportable linking tricks to link the library
- # using -Wl,-lname, so that libtool does not consider it
- # for duplicate removal.
- case " $specialdeplibs " in
- *" $deplib "*) new_libs="$deplib $new_libs" ;;
- *)
- case " $new_libs " in
- *" $deplib "*) ;;
- *) new_libs="$deplib $new_libs" ;;
- esac
- ;;
- esac
- ;;
- esac
- done
- tmp_libs=
- for deplib in $new_libs; do
- case $deplib in
- -L*)
- case " $tmp_libs " in
- *" $deplib "*) ;;
- *) tmp_libs="$tmp_libs $deplib" ;;
- esac
- ;;
- *) tmp_libs="$tmp_libs $deplib" ;;
- esac
- done
- eval $var=\"$tmp_libs\"
- done # for var
- fi
- # Last step: remove runtime libs from dependency_libs
- # (they stay in deplibs)
- tmp_libs=
- for i in $dependency_libs ; do
- case " $predeps $postdeps $compiler_lib_search_path " in
- *" $i "*)
- i=""
- ;;
- esac
- if test -n "$i" ; then
- tmp_libs="$tmp_libs $i"
- fi
- done
- dependency_libs=$tmp_libs
- done # for pass
- if test "$linkmode" = prog; then
- dlfiles="$newdlfiles"
- fi
- if test "$linkmode" = prog || test "$linkmode" = lib; then
- dlprefiles="$newdlprefiles"
- fi
-
- case $linkmode in
- oldlib)
- if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
- func_warning "\`-dlopen' is ignored for archives"
- fi
-
- case " $deplibs" in
- *\ -l* | *\ -L*)
- func_warning "\`-l' and \`-L' are ignored for archives" ;;
- esac
-
- test -n "$rpath" && \
- func_warning "\`-rpath' is ignored for archives"
-
- test -n "$xrpath" && \
- func_warning "\`-R' is ignored for archives"
-
- test -n "$vinfo" && \
- func_warning "\`-version-info/-version-number' is ignored for archives"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for archives"
-
- test -n "$export_symbols$export_symbols_regex" && \
- func_warning "\`-export-symbols' is ignored for archives"
-
- # Now set the variables for building old libraries.
- build_libtool_libs=no
- oldlibs="$output"
- objs="$objs$old_deplibs"
- ;;
-
- lib)
- # Make sure we only generate libraries of the form `libNAME.la'.
- case $outputname in
- lib*)
- func_stripname 'lib' '.la' "$outputname"
- name=$func_stripname_result
- eval shared_ext=\"$shrext_cmds\"
- eval libname=\"$libname_spec\"
- ;;
- *)
- test "$module" = no && \
- func_fatal_help "libtool library \`$output' must begin with \`lib'"
-
- if test "$need_lib_prefix" != no; then
- # Add the "lib" prefix for modules if required
- func_stripname '' '.la' "$outputname"
- name=$func_stripname_result
- eval shared_ext=\"$shrext_cmds\"
- eval libname=\"$libname_spec\"
- else
- func_stripname '' '.la' "$outputname"
- libname=$func_stripname_result
- fi
- ;;
- esac
-
- if test -n "$objs"; then
- if test "$deplibs_check_method" != pass_all; then
- func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs"
- else
- $ECHO
- $ECHO "*** Warning: Linking the shared library $output against the non-libtool"
- $ECHO "*** objects $objs is not portable!"
- libobjs="$libobjs $objs"
- fi
- fi
-
- test "$dlself" != no && \
- func_warning "\`-dlopen self' is ignored for libtool libraries"
-
- set dummy $rpath
- shift
- test "$#" -gt 1 && \
- func_warning "ignoring multiple \`-rpath's for a libtool library"
-
- install_libdir="$1"
-
- oldlibs=
- if test -z "$rpath"; then
- if test "$build_libtool_libs" = yes; then
- # Building a libtool convenience library.
- # Some compilers have problems with a `.al' extension so
- # convenience libraries should have the same extension an
- # archive normally would.
- oldlibs="$output_objdir/$libname.$libext $oldlibs"
- build_libtool_libs=convenience
- build_old_libs=yes
- fi
-
- test -n "$vinfo" && \
- func_warning "\`-version-info/-version-number' is ignored for convenience libraries"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for convenience libraries"
- else
-
- # Parse the version information argument.
- save_ifs="$IFS"; IFS=':'
- set dummy $vinfo 0 0 0
- shift
- IFS="$save_ifs"
-
- test -n "$7" && \
- func_fatal_help "too many parameters to \`-version-info'"
-
- # convert absolute version numbers to libtool ages
- # this retains compatibility with .la files and attempts
- # to make the code below a bit more comprehensible
-
- case $vinfo_number in
- yes)
- number_major="$1"
- number_minor="$2"
- number_revision="$3"
- #
- # There are really only two kinds -- those that
- # use the current revision as the major version
- # and those that subtract age and use age as
- # a minor version. But, then there is irix
- # which has an extra 1 added just for fun
- #
- case $version_type in
- darwin|linux|osf|windows|none)
- func_arith $number_major + $number_minor
- current=$func_arith_result
- age="$number_minor"
- revision="$number_revision"
- ;;
- freebsd-aout|freebsd-elf|sunos)
- current="$number_major"
- revision="$number_minor"
- age="0"
- ;;
- irix|nonstopux)
- func_arith $number_major + $number_minor
- current=$func_arith_result
- age="$number_minor"
- revision="$number_minor"
- lt_irix_increment=no
- ;;
- *)
- func_fatal_configuration "$modename: unknown library version type \`$version_type'"
- ;;
- esac
- ;;
- no)
- current="$1"
- revision="$2"
- age="$3"
- ;;
- esac
-
- # Check that each of the things are valid numbers.
- case $current in
- 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
- *)
- func_error "CURRENT \`$current' must be a nonnegative integer"
- func_fatal_error "\`$vinfo' is not valid version information"
- ;;
- esac
-
- case $revision in
- 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
- *)
- func_error "REVISION \`$revision' must be a nonnegative integer"
- func_fatal_error "\`$vinfo' is not valid version information"
- ;;
- esac
-
- case $age in
- 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
- *)
- func_error "AGE \`$age' must be a nonnegative integer"
- func_fatal_error "\`$vinfo' is not valid version information"
- ;;
- esac
-
- if test "$age" -gt "$current"; then
- func_error "AGE \`$age' is greater than the current interface number \`$current'"
- func_fatal_error "\`$vinfo' is not valid version information"
- fi
-
- # Calculate the version variables.
- major=
- versuffix=
- verstring=
- case $version_type in
- none) ;;
-
- darwin)
- # Like Linux, but with the current version available in
- # verstring for coding it into the library header
- func_arith $current - $age
- major=.$func_arith_result
- versuffix="$major.$age.$revision"
- # Darwin ld doesn't like 0 for these options...
- func_arith $current + 1
- minor_current=$func_arith_result
- xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision"
- verstring="-compatibility_version $minor_current -current_version $minor_current.$revision"
- ;;
-
- freebsd-aout)
- major=".$current"
- versuffix=".$current.$revision";
- ;;
-
- freebsd-elf)
- major=".$current"
- versuffix=".$current"
- ;;
-
- irix | nonstopux)
- if test "X$lt_irix_increment" = "Xno"; then
- func_arith $current - $age
- else
- func_arith $current - $age + 1
- fi
- major=$func_arith_result
-
- case $version_type in
- nonstopux) verstring_prefix=nonstopux ;;
- *) verstring_prefix=sgi ;;
- esac
- verstring="$verstring_prefix$major.$revision"
-
- # Add in all the interfaces that we are compatible with.
- loop=$revision
- while test "$loop" -ne 0; do
- func_arith $revision - $loop
- iface=$func_arith_result
- func_arith $loop - 1
- loop=$func_arith_result
- verstring="$verstring_prefix$major.$iface:$verstring"
- done
-
- # Before this point, $major must not contain `.'.
- major=.$major
- versuffix="$major.$revision"
- ;;
-
- linux)
- func_arith $current - $age
- major=.$func_arith_result
- versuffix="$major.$age.$revision"
- ;;
-
- osf)
- func_arith $current - $age
- major=.$func_arith_result
- versuffix=".$current.$age.$revision"
- verstring="$current.$age.$revision"
-
- # Add in all the interfaces that we are compatible with.
- loop=$age
- while test "$loop" -ne 0; do
- func_arith $current - $loop
- iface=$func_arith_result
- func_arith $loop - 1
- loop=$func_arith_result
- verstring="$verstring:${iface}.0"
- done
-
- # Make executables depend on our current version.
- verstring="$verstring:${current}.0"
- ;;
-
- qnx)
- major=".$current"
- versuffix=".$current"
- ;;
-
- sunos)
- major=".$current"
- versuffix=".$current.$revision"
- ;;
-
- windows)
- # Use '-' rather than '.', since we only want one
- # extension on DOS 8.3 filesystems.
- func_arith $current - $age
- major=$func_arith_result
- versuffix="-$major"
- ;;
-
- *)
- func_fatal_configuration "unknown library version type \`$version_type'"
- ;;
- esac
-
- # Clear the version info if we defaulted, and they specified a release.
- if test -z "$vinfo" && test -n "$release"; then
- major=
- case $version_type in
- darwin)
- # we can't check for "0.0" in archive_cmds due to quoting
- # problems, so we reset it completely
- verstring=
- ;;
- *)
- verstring="0.0"
- ;;
- esac
- if test "$need_version" = no; then
- versuffix=
- else
- versuffix=".0.0"
- fi
- fi
-
- # Remove version info from name if versioning should be avoided
- if test "$avoid_version" = yes && test "$need_version" = no; then
- major=
- versuffix=
- verstring=""
- fi
-
- # Check to see if the archive will have undefined symbols.
- if test "$allow_undefined" = yes; then
- if test "$allow_undefined_flag" = unsupported; then
- func_warning "undefined symbols not allowed in $host shared libraries"
- build_libtool_libs=no
- build_old_libs=yes
- fi
- else
- # Don't allow undefined symbols.
- allow_undefined_flag="$no_undefined_flag"
- fi
-
- fi
-
- func_generate_dlsyms "$libname" "$libname" "yes"
- libobjs="$libobjs $symfileobj"
- test "X$libobjs" = "X " && libobjs=
-
- if test "$mode" != relink; then
- # Remove our outputs, but don't remove object files since they
- # may have been created when compiling PIC objects.
- removelist=
- tempremovelist=`$ECHO "$output_objdir/*"`
- for p in $tempremovelist; do
- case $p in
- *.$objext | *.gcno)
- ;;
- $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*)
- if test "X$precious_files_regex" != "X"; then
- if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1
- then
- continue
- fi
- fi
- removelist="$removelist $p"
- ;;
- *) ;;
- esac
- done
- test -n "$removelist" && \
- func_show_eval "${RM}r \$removelist"
- fi
-
- # Now set the variables for building old libraries.
- if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
- oldlibs="$oldlibs $output_objdir/$libname.$libext"
-
- # Transform .lo files to .o files.
- oldobjs="$objs "`$ECHO "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
- fi
-
- # Eliminate all temporary directories.
- #for path in $notinst_path; do
- # lib_search_path=`$ECHO "X$lib_search_path " | $Xsed -e "s% $path % %g"`
- # deplibs=`$ECHO "X$deplibs " | $Xsed -e "s% -L$path % %g"`
- # dependency_libs=`$ECHO "X$dependency_libs " | $Xsed -e "s% -L$path % %g"`
- #done
-
- if test -n "$xrpath"; then
- # If the user specified any rpath flags, then add them.
- temp_xrpath=
- for libdir in $xrpath; do
- temp_xrpath="$temp_xrpath -R$libdir"
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) finalize_rpath="$finalize_rpath $libdir" ;;
- esac
- done
- if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then
- dependency_libs="$temp_xrpath $dependency_libs"
- fi
- fi
-
- # Make sure dlfiles contains only unique files that won't be dlpreopened
- old_dlfiles="$dlfiles"
- dlfiles=
- for lib in $old_dlfiles; do
- case " $dlprefiles $dlfiles " in
- *" $lib "*) ;;
- *) dlfiles="$dlfiles $lib" ;;
- esac
- done
-
- # Make sure dlprefiles contains only unique files
- old_dlprefiles="$dlprefiles"
- dlprefiles=
- for lib in $old_dlprefiles; do
- case "$dlprefiles " in
- *" $lib "*) ;;
- *) dlprefiles="$dlprefiles $lib" ;;
- esac
- done
-
- if test "$build_libtool_libs" = yes; then
- if test -n "$rpath"; then
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc*)
- # these systems don't actually have a c library (as such)!
- ;;
- *-*-rhapsody* | *-*-darwin1.[012])
- # Rhapsody C library is in the System framework
- deplibs="$deplibs System.ltframework"
- ;;
- *-*-netbsd*)
- # Don't link with libc until the a.out ld.so is fixed.
- ;;
- *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
- # Do not include libc due to us having libc/libc_r.
- ;;
- *-*-sco3.2v5* | *-*-sco5v6*)
- # Causes problems with __ctype
- ;;
- *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
- # Compiler inserts libc in the correct place for threads to work
- ;;
- *)
- # Add libc to deplibs on all other systems if necessary.
- if test "$build_libtool_need_lc" = "yes"; then
- deplibs="$deplibs -lc"
- fi
- ;;
- esac
- fi
-
- # Transform deplibs into only deplibs that can be linked in shared.
- name_save=$name
- libname_save=$libname
- release_save=$release
- versuffix_save=$versuffix
- major_save=$major
- # I'm not sure if I'm treating the release correctly. I think
- # release should show up in the -l (ie -lgmp5) so we don't want to
- # add it in twice. Is that correct?
- release=""
- versuffix=""
- major=""
- newdeplibs=
- droppeddeps=no
- case $deplibs_check_method in
- pass_all)
- # Don't check for shared/static. Everything works.
- # This might be a little naive. We might want to check
- # whether the library exists or not. But this is on
- # osf3 & osf4 and I'm not really sure... Just
- # implementing what was already the behavior.
- newdeplibs=$deplibs
- ;;
- test_compile)
- # This code stresses the "libraries are programs" paradigm to its
- # limits. Maybe even breaks it. We compile a program, linking it
- # against the deplibs as a proxy for the library. Then we can check
- # whether they linked in statically or dynamically with ldd.
- $opt_dry_run || $RM conftest.c
- cat > conftest.c <<EOF
- int main() { return 0; }
-EOF
- $opt_dry_run || $RM conftest
- if $LTCC $LTCFLAGS -o conftest conftest.c $deplibs; then
- ldd_output=`ldd conftest`
- for i in $deplibs; do
- case $i in
- -l*)
- func_stripname -l '' "$i"
- name=$func_stripname_result
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $i "*)
- newdeplibs="$newdeplibs $i"
- i=""
- ;;
- esac
- fi
- if test -n "$i" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
- set dummy $deplib_matches; shift
- deplib_match=$1
- if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
- newdeplibs="$newdeplibs $i"
- else
- droppeddeps=yes
- $ECHO
- $ECHO "*** Warning: dynamic linker does not accept needed library $i."
- $ECHO "*** I have the capability to make that library automatically link in when"
- $ECHO "*** you link to this library. But I can only do this if you have a"
- $ECHO "*** shared version of the library, which I believe you do not have"
- $ECHO "*** because a test_compile did reveal that the linker did not use it for"
- $ECHO "*** its dynamic dependency list that programs get resolved with at runtime."
- fi
- fi
- ;;
- *)
- newdeplibs="$newdeplibs $i"
- ;;
- esac
- done
- else
- # Error occurred in the first compile. Let's try to salvage
- # the situation: Compile a separate program for each library.
- for i in $deplibs; do
- case $i in
- -l*)
- func_stripname -l '' "$i"
- name=$func_stripname_result
- $opt_dry_run || $RM conftest
- if $LTCC $LTCFLAGS -o conftest conftest.c $i; then
- ldd_output=`ldd conftest`
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $i "*)
- newdeplibs="$newdeplibs $i"
- i=""
- ;;
- esac
- fi
- if test -n "$i" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
- set dummy $deplib_matches; shift
- deplib_match=$1
- if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
- newdeplibs="$newdeplibs $i"
- else
- droppeddeps=yes
- $ECHO
- $ECHO "*** Warning: dynamic linker does not accept needed library $i."
- $ECHO "*** I have the capability to make that library automatically link in when"
- $ECHO "*** you link to this library. But I can only do this if you have a"
- $ECHO "*** shared version of the library, which you do not appear to have"
- $ECHO "*** because a test_compile did reveal that the linker did not use this one"
- $ECHO "*** as a dynamic dependency that programs can get resolved with at runtime."
- fi
- fi
- else
- droppeddeps=yes
- $ECHO
- $ECHO "*** Warning! Library $i is needed by this library but I was not able to"
- $ECHO "*** make it link in! You will probably need to install it or some"
- $ECHO "*** library that it depends on before this library will be fully"
- $ECHO "*** functional. Installing it before continuing would be even better."
- fi
- ;;
- *)
- newdeplibs="$newdeplibs $i"
- ;;
- esac
- done
- fi
- ;;
- file_magic*)
- set dummy $deplibs_check_method; shift
- file_magic_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
- for a_deplib in $deplibs; do
- case $a_deplib in
- -l*)
- func_stripname -l '' "$a_deplib"
- name=$func_stripname_result
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $a_deplib "*)
- newdeplibs="$newdeplibs $a_deplib"
- a_deplib=""
- ;;
- esac
- fi
- if test -n "$a_deplib" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
- potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
- for potent_lib in $potential_libs; do
- # Follow soft links.
- if ls -lLd "$potent_lib" 2>/dev/null |
- $GREP " -> " >/dev/null; then
- continue
- fi
- # The statement above tries to avoid entering an
- # endless loop below, in case of cyclic links.
- # We might still enter an endless loop, since a link
- # loop can be closed while we follow links,
- # but so what?
- potlib="$potent_lib"
- while test -h "$potlib" 2>/dev/null; do
- potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
- case $potliblink in
- [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
- *) potlib=`$ECHO "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
- esac
- done
- if eval $file_magic_cmd \"\$potlib\" 2>/dev/null |
- $SED -e 10q |
- $EGREP "$file_magic_regex" > /dev/null; then
- newdeplibs="$newdeplibs $a_deplib"
- a_deplib=""
- break 2
- fi
- done
- done
- fi
- if test -n "$a_deplib" ; then
- droppeddeps=yes
- $ECHO
- $ECHO "*** Warning: linker path does not have real file for library $a_deplib."
- $ECHO "*** I have the capability to make that library automatically link in when"
- $ECHO "*** you link to this library. But I can only do this if you have a"
- $ECHO "*** shared version of the library, which you do not appear to have"
- $ECHO "*** because I did check the linker path looking for a file starting"
- if test -z "$potlib" ; then
- $ECHO "*** with $libname but no candidates were found. (...for file magic test)"
- else
- $ECHO "*** with $libname and none of the candidates passed a file format test"
- $ECHO "*** using a file magic. Last file checked: $potlib"
- fi
- fi
- ;;
- *)
- # Add a -L argument.
- newdeplibs="$newdeplibs $a_deplib"
- ;;
- esac
- done # Gone through all deplibs.
- ;;
- match_pattern*)
- set dummy $deplibs_check_method; shift
- match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
- for a_deplib in $deplibs; do
- case $a_deplib in
- -l*)
- func_stripname -l '' "$a_deplib"
- name=$func_stripname_result
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $a_deplib "*)
- newdeplibs="$newdeplibs $a_deplib"
- a_deplib=""
- ;;
- esac
- fi
- if test -n "$a_deplib" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
- potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
- for potent_lib in $potential_libs; do
- potlib="$potent_lib" # see symlink-check above in file_magic test
- if eval "\$ECHO \"X$potent_lib\"" 2>/dev/null | $Xsed -e 10q | \
- $EGREP "$match_pattern_regex" > /dev/null; then
- newdeplibs="$newdeplibs $a_deplib"
- a_deplib=""
- break 2
- fi
- done
- done
- fi
- if test -n "$a_deplib" ; then
- droppeddeps=yes
- $ECHO
- $ECHO "*** Warning: linker path does not have real file for library $a_deplib."
- $ECHO "*** I have the capability to make that library automatically link in when"
- $ECHO "*** you link to this library. But I can only do this if you have a"
- $ECHO "*** shared version of the library, which you do not appear to have"
- $ECHO "*** because I did check the linker path looking for a file starting"
- if test -z "$potlib" ; then
- $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)"
- else
- $ECHO "*** with $libname and none of the candidates passed a file format test"
- $ECHO "*** using a regex pattern. Last file checked: $potlib"
- fi
- fi
- ;;
- *)
- # Add a -L argument.
- newdeplibs="$newdeplibs $a_deplib"
- ;;
- esac
- done # Gone through all deplibs.
- ;;
- none | unknown | *)
- newdeplibs=""
- tmp_deplibs=`$ECHO "X $deplibs" | $Xsed \
- -e 's/ -lc$//' -e 's/ -[LR][^ ]*//g'`
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- for i in $predeps $postdeps ; do
- # can't use Xsed below, because $i might contain '/'
- tmp_deplibs=`$ECHO "X $tmp_deplibs" | $Xsed -e "s,$i,,"`
- done
- fi
- if $ECHO "X $tmp_deplibs" | $Xsed -e 's/[ ]//g' |
- $GREP . >/dev/null; then
- $ECHO
- if test "X$deplibs_check_method" = "Xnone"; then
- $ECHO "*** Warning: inter-library dependencies are not supported in this platform."
- else
- $ECHO "*** Warning: inter-library dependencies are not known to be supported."
- fi
- $ECHO "*** All declared inter-library dependencies are being dropped."
- droppeddeps=yes
- fi
- ;;
- esac
- versuffix=$versuffix_save
- major=$major_save
- release=$release_save
- libname=$libname_save
- name=$name_save
-
- case $host in
- *-*-rhapsody* | *-*-darwin1.[012])
- # On Rhapsody replace the C library with the System framework
- newdeplibs=`$ECHO "X $newdeplibs" | $Xsed -e 's/ -lc / System.ltframework /'`
- ;;
- esac
-
- if test "$droppeddeps" = yes; then
- if test "$module" = yes; then
- $ECHO
- $ECHO "*** Warning: libtool could not satisfy all declared inter-library"
- $ECHO "*** dependencies of module $libname. Therefore, libtool will create"
- $ECHO "*** a static module, that should work as long as the dlopening"
- $ECHO "*** application is linked with the -dlopen flag."
- if test -z "$global_symbol_pipe"; then
- $ECHO
- $ECHO "*** However, this would only work if libtool was able to extract symbol"
- $ECHO "*** lists from a program, using \`nm' or equivalent, but libtool could"
- $ECHO "*** not find such a program. So, this module is probably useless."
- $ECHO "*** \`nm' from GNU binutils and a full rebuild may help."
- fi
- if test "$build_old_libs" = no; then
- oldlibs="$output_objdir/$libname.$libext"
- build_libtool_libs=module
- build_old_libs=yes
- else
- build_libtool_libs=no
- fi
- else
- $ECHO "*** The inter-library dependencies that have been dropped here will be"
- $ECHO "*** automatically added whenever a program is linked with this library"
- $ECHO "*** or is declared to -dlopen it."
-
- if test "$allow_undefined" = no; then
- $ECHO
- $ECHO "*** Since this library must not contain undefined symbols,"
- $ECHO "*** because either the platform does not support them or"
- $ECHO "*** it was explicitly requested with -no-undefined,"
- $ECHO "*** libtool will only create a static version of it."
- if test "$build_old_libs" = no; then
- oldlibs="$output_objdir/$libname.$libext"
- build_libtool_libs=module
- build_old_libs=yes
- else
- build_libtool_libs=no
- fi
- fi
- fi
- fi
- # Done checking deplibs!
- deplibs=$newdeplibs
- fi
- # Time to change all our "foo.ltframework" stuff back to "-framework foo"
- case $host in
- *-*-darwin*)
- newdeplibs=`$ECHO "X $newdeplibs" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- new_inherited_linker_flags=`$ECHO "X $new_inherited_linker_flags" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- deplibs=`$ECHO "X $deplibs" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- ;;
- esac
-
- # move library search paths that coincide with paths to not yet
- # installed libraries to the beginning of the library search list
- new_libs=
- for path in $notinst_path; do
- case " $new_libs " in
- *" -L$path/$objdir "*) ;;
- *)
- case " $deplibs " in
- *" -L$path/$objdir "*)
- new_libs="$new_libs -L$path/$objdir" ;;
- esac
- ;;
- esac
- done
- for deplib in $deplibs; do
- case $deplib in
- -L*)
- case " $new_libs " in
- *" $deplib "*) ;;
- *) new_libs="$new_libs $deplib" ;;
- esac
- ;;
- *) new_libs="$new_libs $deplib" ;;
- esac
- done
- deplibs="$new_libs"
-
- # All the library-specific variables (install_libdir is set above).
- library_names=
- old_library=
- dlname=
-
- # Test again, we may have decided not to build it any more
- if test "$build_libtool_libs" = yes; then
- if test "$hardcode_into_libs" = yes; then
- # Hardcode the library paths
- hardcode_libdirs=
- dep_rpath=
- rpath="$finalize_rpath"
- test "$mode" != relink && rpath="$compile_rpath$rpath"
- for libdir in $rpath; do
- if test -n "$hardcode_libdir_flag_spec"; then
- if test -n "$hardcode_libdir_separator"; then
- if test -z "$hardcode_libdirs"; then
- hardcode_libdirs="$libdir"
- else
- # Just accumulate the unique libdirs.
- case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
- *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
- ;;
- *)
- hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
- ;;
- esac
- fi
- else
- eval flag=\"$hardcode_libdir_flag_spec\"
- dep_rpath="$dep_rpath $flag"
- fi
- elif test -n "$runpath_var"; then
- case "$perm_rpath " in
- *" $libdir "*) ;;
- *) perm_rpath="$perm_rpath $libdir" ;;
- esac
- fi
- done
- # Substitute the hardcoded libdirs into the rpath.
- if test -n "$hardcode_libdir_separator" &&
- test -n "$hardcode_libdirs"; then
- libdir="$hardcode_libdirs"
- if test -n "$hardcode_libdir_flag_spec_ld"; then
- eval dep_rpath=\"$hardcode_libdir_flag_spec_ld\"
- else
- eval dep_rpath=\"$hardcode_libdir_flag_spec\"
- fi
- fi
- if test -n "$runpath_var" && test -n "$perm_rpath"; then
- # We should set the runpath_var.
- rpath=
- for dir in $perm_rpath; do
- rpath="$rpath$dir:"
- done
- eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
- fi
- test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
- fi
-
- shlibpath="$finalize_shlibpath"
- test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
- if test -n "$shlibpath"; then
- eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
- fi
-
- # Get the real and link names of the library.
- eval shared_ext=\"$shrext_cmds\"
- eval library_names=\"$library_names_spec\"
- set dummy $library_names
- shift
- realname="$1"
- shift
-
- if test -n "$soname_spec"; then
- eval soname=\"$soname_spec\"
- else
- soname="$realname"
- fi
- if test -z "$dlname"; then
- dlname=$soname
- fi
-
- lib="$output_objdir/$realname"
- linknames=
- for link
- do
- linknames="$linknames $link"
- done
-
- # Use standard objects if they are pic
- test -z "$pic_flag" && libobjs=`$ECHO "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
- test "X$libobjs" = "X " && libobjs=
-
- delfiles=
- if test -n "$export_symbols" && test -n "$include_expsyms"; then
- $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp"
- export_symbols="$output_objdir/$libname.uexp"
- delfiles="$delfiles $export_symbols"
- fi
-
- orig_export_symbols=
- case $host_os in
- cygwin* | mingw* | cegcc*)
- if test -n "$export_symbols" && test -z "$export_symbols_regex"; then
- # exporting using user supplied symfile
- if test "x`$SED 1q $export_symbols`" != xEXPORTS; then
- # and it's NOT already a .def file. Must figure out
- # which of the given symbols are data symbols and tag
- # them as such. So, trigger use of export_symbols_cmds.
- # export_symbols gets reassigned inside the "prepare
- # the list of exported symbols" if statement, so the
- # include_expsyms logic still works.
- orig_export_symbols="$export_symbols"
- export_symbols=
- always_export_symbols=yes
- fi
- fi
- ;;
- esac
-
- # Prepare the list of exported symbols
- if test -z "$export_symbols"; then
- if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
- func_verbose "generating symbol list for \`$libname.la'"
- export_symbols="$output_objdir/$libname.exp"
- $opt_dry_run || $RM $export_symbols
- cmds=$export_symbols_cmds
- save_ifs="$IFS"; IFS='~'
- for cmd in $cmds; do
- IFS="$save_ifs"
- eval cmd=\"$cmd\"
- func_len " $cmd"
- len=$func_len_result
- if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
- func_show_eval "$cmd" 'exit $?'
- skipped_export=false
- else
- # The command line is too long to execute in one step.
- func_verbose "using reloadable object file for export list..."
- skipped_export=:
- # Break out early, otherwise skipped_export may be
- # set to false by a later but shorter cmd.
- break
- fi
- done
- IFS="$save_ifs"
- if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then
- func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
- func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
- fi
- fi
- fi
-
- if test -n "$export_symbols" && test -n "$include_expsyms"; then
- tmp_export_symbols="$export_symbols"
- test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
- $opt_dry_run || eval '$ECHO "X$include_expsyms" | $Xsed | $SP2NL >> "$tmp_export_symbols"'
- fi
-
- if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then
- # The given exports_symbols file has to be filtered, so filter it.
- func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
- # FIXME: $output_objdir/$libname.filter potentially contains lots of
- # 's' commands which not all seds can handle. GNU sed should be fine
- # though. Also, the filter scales superlinearly with the number of
- # global variables. join(1) would be nice here, but unfortunately
- # isn't a blessed tool.
- $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
- delfiles="$delfiles $export_symbols $output_objdir/$libname.filter"
- export_symbols=$output_objdir/$libname.def
- $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
- fi
-
- tmp_deplibs=
- for test_deplib in $deplibs; do
- case " $convenience " in
- *" $test_deplib "*) ;;
- *)
- tmp_deplibs="$tmp_deplibs $test_deplib"
- ;;
- esac
- done
- deplibs="$tmp_deplibs"
-
- if test -n "$convenience"; then
- if test -n "$whole_archive_flag_spec" &&
- test "$compiler_needs_object" = yes &&
- test -z "$libobjs"; then
- # extract the archives, so we have objects to list.
- # TODO: could optimize this to just extract one archive.
- whole_archive_flag_spec=
- fi
- if test -n "$whole_archive_flag_spec"; then
- save_libobjs=$libobjs
- eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
- test "X$libobjs" = "X " && libobjs=
- else
- gentop="$output_objdir/${outputname}x"
- generated="$generated $gentop"
-
- func_extract_archives $gentop $convenience
- libobjs="$libobjs $func_extract_archives_result"
- test "X$libobjs" = "X " && libobjs=
- fi
- fi
-
- if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
- eval flag=\"$thread_safe_flag_spec\"
- linker_flags="$linker_flags $flag"
- fi
-
- # Make a backup of the uninstalled library when relinking
- if test "$mode" = relink; then
- $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $?
- fi
-
- # Do each of the archive commands.
- if test "$module" = yes && test -n "$module_cmds" ; then
- if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
- eval test_cmds=\"$module_expsym_cmds\"
- cmds=$module_expsym_cmds
- else
- eval test_cmds=\"$module_cmds\"
- cmds=$module_cmds
- fi
- else
- if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
- eval test_cmds=\"$archive_expsym_cmds\"
- cmds=$archive_expsym_cmds
- else
- eval test_cmds=\"$archive_cmds\"
- cmds=$archive_cmds
- fi
- fi
-
- if test "X$skipped_export" != "X:" &&
- func_len " $test_cmds" &&
- len=$func_len_result &&
- test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
- :
- else
- # The command line is too long to link in one step, link piecewise
- # or, if using GNU ld and skipped_export is not :, use a linker
- # script.
-
- # Save the value of $output and $libobjs because we want to
- # use them later. If we have whole_archive_flag_spec, we
- # want to use save_libobjs as it was before
- # whole_archive_flag_spec was expanded, because we can't
- # assume the linker understands whole_archive_flag_spec.
- # This may have to be revisited, in case too many
- # convenience libraries get linked in and end up exceeding
- # the spec.
- if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then
- save_libobjs=$libobjs
- fi
- save_output=$output
- output_la=`$ECHO "X$output" | $Xsed -e "$basename"`
-
- # Clear the reloadable object creation command queue and
- # initialize k to one.
- test_cmds=
- concat_cmds=
- objlist=
- last_robj=
- k=1
-
- if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then
- output=${output_objdir}/${output_la}.lnkscript
- func_verbose "creating GNU ld script: $output"
- $ECHO 'INPUT (' > $output
- for obj in $save_libobjs
- do
- $ECHO "$obj" >> $output
- done
- $ECHO ')' >> $output
- delfiles="$delfiles $output"
- elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then
- output=${output_objdir}/${output_la}.lnk
- func_verbose "creating linker input file list: $output"
- : > $output
- set x $save_libobjs
- shift
- firstobj=
- if test "$compiler_needs_object" = yes; then
- firstobj="$1 "
- shift
- fi
- for obj
- do
- $ECHO "$obj" >> $output
- done
- delfiles="$delfiles $output"
- output=$firstobj\"$file_list_spec$output\"
- else
- if test -n "$save_libobjs"; then
- func_verbose "creating reloadable object files..."
- output=$output_objdir/$output_la-${k}.$objext
- eval test_cmds=\"$reload_cmds\"
- func_len " $test_cmds"
- len0=$func_len_result
- len=$len0
-
- # Loop over the list of objects to be linked.
- for obj in $save_libobjs
- do
- func_len " $obj"
- func_arith $len + $func_len_result
- len=$func_arith_result
- if test "X$objlist" = X ||
- test "$len" -lt "$max_cmd_len"; then
- func_append objlist " $obj"
- else
- # The command $test_cmds is almost too long, add a
- # command to the queue.
- if test "$k" -eq 1 ; then
- # The first file doesn't have a previous command to add.
- eval concat_cmds=\"$reload_cmds $objlist $last_robj\"
- else
- # All subsequent reloadable object files will link in
- # the last one created.
- eval concat_cmds=\"\$concat_cmds~$reload_cmds $objlist $last_robj~\$RM $last_robj\"
- fi
- last_robj=$output_objdir/$output_la-${k}.$objext
- func_arith $k + 1
- k=$func_arith_result
- output=$output_objdir/$output_la-${k}.$objext
- objlist=$obj
- func_len " $last_robj"
- func_arith $len0 + $func_len_result
- len=$func_arith_result
- fi
- done
- # Handle the remaining objects by creating one last
- # reloadable object file. All subsequent reloadable object
- # files will link in the last one created.
- test -z "$concat_cmds" || concat_cmds=$concat_cmds~
- eval concat_cmds=\"\${concat_cmds}$reload_cmds $objlist $last_robj\"
- if test -n "$last_robj"; then
- eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\"
- fi
- delfiles="$delfiles $output"
-
- else
- output=
- fi
-
- if ${skipped_export-false}; then
- func_verbose "generating symbol list for \`$libname.la'"
- export_symbols="$output_objdir/$libname.exp"
- $opt_dry_run || $RM $export_symbols
- libobjs=$output
- # Append the command to create the export file.
- test -z "$concat_cmds" || concat_cmds=$concat_cmds~
- eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\"
- if test -n "$last_robj"; then
- eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\"
- fi
- fi
-
- test -n "$save_libobjs" &&
- func_verbose "creating a temporary reloadable object file: $output"
-
- # Loop through the commands generated above and execute them.
- save_ifs="$IFS"; IFS='~'
- for cmd in $concat_cmds; do
- IFS="$save_ifs"
- $opt_silent || {
- func_quote_for_expand "$cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
- $opt_dry_run || eval "$cmd" || {
- lt_exit=$?
-
- # Restore the uninstalled library and exit
- if test "$mode" = relink; then
- ( cd "$output_objdir" && \
- $RM "${realname}T" && \
- $MV "${realname}U" "$realname" )
- fi
-
- exit $lt_exit
- }
- done
- IFS="$save_ifs"
-
- if test -n "$export_symbols_regex" && ${skipped_export-false}; then
- func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
- func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
- fi
- fi
-
- if ${skipped_export-false}; then
- if test -n "$export_symbols" && test -n "$include_expsyms"; then
- tmp_export_symbols="$export_symbols"
- test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
- $opt_dry_run || eval '$ECHO "X$include_expsyms" | $Xsed | $SP2NL >> "$tmp_export_symbols"'
- fi
-
- if test -n "$orig_export_symbols"; then
- # The given exports_symbols file has to be filtered, so filter it.
- func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
- # FIXME: $output_objdir/$libname.filter potentially contains lots of
- # 's' commands which not all seds can handle. GNU sed should be fine
- # though. Also, the filter scales superlinearly with the number of
- # global variables. join(1) would be nice here, but unfortunately
- # isn't a blessed tool.
- $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
- delfiles="$delfiles $export_symbols $output_objdir/$libname.filter"
- export_symbols=$output_objdir/$libname.def
- $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
- fi
- fi
-
- libobjs=$output
- # Restore the value of output.
- output=$save_output
-
- if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then
- eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
- test "X$libobjs" = "X " && libobjs=
- fi
- # Expand the library linking commands again to reset the
- # value of $libobjs for piecewise linking.
-
- # Do each of the archive commands.
- if test "$module" = yes && test -n "$module_cmds" ; then
- if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
- cmds=$module_expsym_cmds
- else
- cmds=$module_cmds
- fi
- else
- if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
- cmds=$archive_expsym_cmds
- else
- cmds=$archive_cmds
- fi
- fi
- fi
-
- if test -n "$delfiles"; then
- # Append the command to remove temporary files to $cmds.
- eval cmds=\"\$cmds~\$RM $delfiles\"
- fi
-
- # Add any objects from preloaded convenience libraries
- if test -n "$dlprefiles"; then
- gentop="$output_objdir/${outputname}x"
- generated="$generated $gentop"
-
- func_extract_archives $gentop $dlprefiles
- libobjs="$libobjs $func_extract_archives_result"
- test "X$libobjs" = "X " && libobjs=
- fi
-
- save_ifs="$IFS"; IFS='~'
- for cmd in $cmds; do
- IFS="$save_ifs"
- eval cmd=\"$cmd\"
- $opt_silent || {
- func_quote_for_expand "$cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
- $opt_dry_run || eval "$cmd" || {
- lt_exit=$?
-
- # Restore the uninstalled library and exit
- if test "$mode" = relink; then
- ( cd "$output_objdir" && \
- $RM "${realname}T" && \
- $MV "${realname}U" "$realname" )
- fi
-
- exit $lt_exit
- }
- done
- IFS="$save_ifs"
-
- # Restore the uninstalled library and exit
- if test "$mode" = relink; then
- $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $?
-
- if test -n "$convenience"; then
- if test -z "$whole_archive_flag_spec"; then
- func_show_eval '${RM}r "$gentop"'
- fi
- fi
-
- exit $EXIT_SUCCESS
- fi
-
- # Create links to the real library.
- for linkname in $linknames; do
- if test "$realname" != "$linkname"; then
- func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?'
- fi
- done
-
- # If -module or -export-dynamic was specified, set the dlname.
- if test "$module" = yes || test "$export_dynamic" = yes; then
- # On all known operating systems, these are identical.
- dlname="$soname"
- fi
- fi
- ;;
-
- obj)
- if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
- func_warning "\`-dlopen' is ignored for objects"
- fi
-
- case " $deplibs" in
- *\ -l* | *\ -L*)
- func_warning "\`-l' and \`-L' are ignored for objects" ;;
- esac
-
- test -n "$rpath" && \
- func_warning "\`-rpath' is ignored for objects"
-
- test -n "$xrpath" && \
- func_warning "\`-R' is ignored for objects"
-
- test -n "$vinfo" && \
- func_warning "\`-version-info' is ignored for objects"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for objects"
-
- case $output in
- *.lo)
- test -n "$objs$old_deplibs" && \
- func_fatal_error "cannot build library object \`$output' from non-libtool objects"
-
- libobj=$output
- func_lo2o "$libobj"
- obj=$func_lo2o_result
- ;;
- *)
- libobj=
- obj="$output"
- ;;
- esac
-
- # Delete the old objects.
- $opt_dry_run || $RM $obj $libobj
-
- # Objects from convenience libraries. This assumes
- # single-version convenience libraries. Whenever we create
- # different ones for PIC/non-PIC, this we'll have to duplicate
- # the extraction.
- reload_conv_objs=
- gentop=
- # reload_cmds runs $LD directly, so let us get rid of
- # -Wl from whole_archive_flag_spec and hope we can get by with
- # turning comma into space..
- wl=
-
- if test -n "$convenience"; then
- if test -n "$whole_archive_flag_spec"; then
- eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\"
- reload_conv_objs=$reload_objs\ `$ECHO "X$tmp_whole_archive_flags" | $Xsed -e 's|,| |g'`
- else
- gentop="$output_objdir/${obj}x"
- generated="$generated $gentop"
-
- func_extract_archives $gentop $convenience
- reload_conv_objs="$reload_objs $func_extract_archives_result"
- fi
- fi
-
- # Create the old-style object.
- reload_objs="$objs$old_deplibs "`$ECHO "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
-
- output="$obj"
- func_execute_cmds "$reload_cmds" 'exit $?'
-
- # Exit if we aren't doing a library object file.
- if test -z "$libobj"; then
- if test -n "$gentop"; then
- func_show_eval '${RM}r "$gentop"'
- fi
-
- exit $EXIT_SUCCESS
- fi
-
- if test "$build_libtool_libs" != yes; then
- if test -n "$gentop"; then
- func_show_eval '${RM}r "$gentop"'
- fi
-
- # Create an invalid libtool object if no PIC, so that we don't
- # accidentally link it into a program.
- # $show "echo timestamp > $libobj"
- # $opt_dry_run || eval "echo timestamp > $libobj" || exit $?
- exit $EXIT_SUCCESS
- fi
-
- if test -n "$pic_flag" || test "$pic_mode" != default; then
- # Only do commands if we really have different PIC objects.
- reload_objs="$libobjs $reload_conv_objs"
- output="$libobj"
- func_execute_cmds "$reload_cmds" 'exit $?'
- fi
-
- if test -n "$gentop"; then
- func_show_eval '${RM}r "$gentop"'
- fi
-
- exit $EXIT_SUCCESS
- ;;
-
- prog)
- case $host in
- *cygwin*) func_stripname '' '.exe' "$output"
- output=$func_stripname_result.exe;;
- esac
- test -n "$vinfo" && \
- func_warning "\`-version-info' is ignored for programs"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for programs"
-
- test "$preload" = yes \
- && test "$dlopen_support" = unknown \
- && test "$dlopen_self" = unknown \
- && test "$dlopen_self_static" = unknown && \
- func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support."
-
- case $host in
- *-*-rhapsody* | *-*-darwin1.[012])
- # On Rhapsody replace the C library is the System framework
- compile_deplibs=`$ECHO "X $compile_deplibs" | $Xsed -e 's/ -lc / System.ltframework /'`
- finalize_deplibs=`$ECHO "X $finalize_deplibs" | $Xsed -e 's/ -lc / System.ltframework /'`
- ;;
- esac
-
- case $host in
- *-*-darwin*)
- # Don't allow lazy linking, it breaks C++ global constructors
- # But is supposedly fixed on 10.4 or later (yay!).
- if test "$tagname" = CXX ; then
- case ${MACOSX_DEPLOYMENT_TARGET-10.0} in
- 10.[0123])
- compile_command="$compile_command ${wl}-bind_at_load"
- finalize_command="$finalize_command ${wl}-bind_at_load"
- ;;
- esac
- fi
- # Time to change all our "foo.ltframework" stuff back to "-framework foo"
- compile_deplibs=`$ECHO "X $compile_deplibs" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- finalize_deplibs=`$ECHO "X $finalize_deplibs" | $Xsed -e 's% \([^ $]*\).ltframework% -framework \1%g'`
- ;;
- esac
-
-
- # move library search paths that coincide with paths to not yet
- # installed libraries to the beginning of the library search list
- new_libs=
- for path in $notinst_path; do
- case " $new_libs " in
- *" -L$path/$objdir "*) ;;
- *)
- case " $compile_deplibs " in
- *" -L$path/$objdir "*)
- new_libs="$new_libs -L$path/$objdir" ;;
- esac
- ;;
- esac
- done
- for deplib in $compile_deplibs; do
- case $deplib in
- -L*)
- case " $new_libs " in
- *" $deplib "*) ;;
- *) new_libs="$new_libs $deplib" ;;
- esac
- ;;
- *) new_libs="$new_libs $deplib" ;;
- esac
- done
- compile_deplibs="$new_libs"
-
-
- compile_command="$compile_command $compile_deplibs"
- finalize_command="$finalize_command $finalize_deplibs"
-
- if test -n "$rpath$xrpath"; then
- # If the user specified any rpath flags, then add them.
- for libdir in $rpath $xrpath; do
- # This is the magic to use -rpath.
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) finalize_rpath="$finalize_rpath $libdir" ;;
- esac
- done
- fi
-
- # Now hardcode the library paths
- rpath=
- hardcode_libdirs=
- for libdir in $compile_rpath $finalize_rpath; do
- if test -n "$hardcode_libdir_flag_spec"; then
- if test -n "$hardcode_libdir_separator"; then
- if test -z "$hardcode_libdirs"; then
- hardcode_libdirs="$libdir"
- else
- # Just accumulate the unique libdirs.
- case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
- *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
- ;;
- *)
- hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
- ;;
- esac
- fi
- else
- eval flag=\"$hardcode_libdir_flag_spec\"
- rpath="$rpath $flag"
- fi
- elif test -n "$runpath_var"; then
- case "$perm_rpath " in
- *" $libdir "*) ;;
- *) perm_rpath="$perm_rpath $libdir" ;;
- esac
- fi
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
- testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'`
- case :$dllsearchpath: in
- *":$libdir:"*) ;;
- ::) dllsearchpath=$libdir;;
- *) dllsearchpath="$dllsearchpath:$libdir";;
- esac
- case :$dllsearchpath: in
- *":$testbindir:"*) ;;
- ::) dllsearchpath=$testbindir;;
- *) dllsearchpath="$dllsearchpath:$testbindir";;
- esac
- ;;
- esac
- done
- # Substitute the hardcoded libdirs into the rpath.
- if test -n "$hardcode_libdir_separator" &&
- test -n "$hardcode_libdirs"; then
- libdir="$hardcode_libdirs"
- eval rpath=\" $hardcode_libdir_flag_spec\"
- fi
- compile_rpath="$rpath"
-
- rpath=
- hardcode_libdirs=
- for libdir in $finalize_rpath; do
- if test -n "$hardcode_libdir_flag_spec"; then
- if test -n "$hardcode_libdir_separator"; then
- if test -z "$hardcode_libdirs"; then
- hardcode_libdirs="$libdir"
- else
- # Just accumulate the unique libdirs.
- case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
- *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
- ;;
- *)
- hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
- ;;
- esac
- fi
- else
- eval flag=\"$hardcode_libdir_flag_spec\"
- rpath="$rpath $flag"
- fi
- elif test -n "$runpath_var"; then
- case "$finalize_perm_rpath " in
- *" $libdir "*) ;;
- *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
- esac
- fi
- done
- # Substitute the hardcoded libdirs into the rpath.
- if test -n "$hardcode_libdir_separator" &&
- test -n "$hardcode_libdirs"; then
- libdir="$hardcode_libdirs"
- eval rpath=\" $hardcode_libdir_flag_spec\"
- fi
- finalize_rpath="$rpath"
-
- if test -n "$libobjs" && test "$build_old_libs" = yes; then
- # Transform all the library objects into standard objects.
- compile_command=`$ECHO "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
- finalize_command=`$ECHO "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
- fi
-
- func_generate_dlsyms "$outputname" "@PROGRAM@" "no"
-
- # template prelinking step
- if test -n "$prelink_cmds"; then
- func_execute_cmds "$prelink_cmds" 'exit $?'
- fi
-
- wrappers_required=yes
- case $host in
- *cygwin* | *mingw* )
- if test "$build_libtool_libs" != yes; then
- wrappers_required=no
- fi
- ;;
- *cegcc)
- # Disable wrappers for cegcc, we are cross compiling anyway.
- wrappers_required=no
- ;;
- *)
- if test "$need_relink" = no || test "$build_libtool_libs" != yes; then
- wrappers_required=no
- fi
- ;;
- esac
- if test "$wrappers_required" = no; then
- # Replace the output file specification.
- compile_command=`$ECHO "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
- link_command="$compile_command$compile_rpath"
-
- # We have no uninstalled library dependencies, so finalize right now.
- exit_status=0
- func_show_eval "$link_command" 'exit_status=$?'
-
- # Delete the generated files.
- if test -f "$output_objdir/${outputname}S.${objext}"; then
- func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"'
- fi
-
- exit $exit_status
- fi
-
- if test -n "$compile_shlibpath$finalize_shlibpath"; then
- compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
- fi
- if test -n "$finalize_shlibpath"; then
- finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
- fi
-
- compile_var=
- finalize_var=
- if test -n "$runpath_var"; then
- if test -n "$perm_rpath"; then
- # We should set the runpath_var.
- rpath=
- for dir in $perm_rpath; do
- rpath="$rpath$dir:"
- done
- compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
- fi
- if test -n "$finalize_perm_rpath"; then
- # We should set the runpath_var.
- rpath=
- for dir in $finalize_perm_rpath; do
- rpath="$rpath$dir:"
- done
- finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
- fi
- fi
-
- if test "$no_install" = yes; then
- # We don't need to create a wrapper script.
- link_command="$compile_var$compile_command$compile_rpath"
- # Replace the output file specification.
- link_command=`$ECHO "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
- # Delete the old output file.
- $opt_dry_run || $RM $output
- # Link the executable and exit
- func_show_eval "$link_command" 'exit $?'
- exit $EXIT_SUCCESS
- fi
-
- if test "$hardcode_action" = relink; then
- # Fast installation is not supported
- link_command="$compile_var$compile_command$compile_rpath"
- relink_command="$finalize_var$finalize_command$finalize_rpath"
-
- func_warning "this platform does not like uninstalled shared libraries"
- func_warning "\`$output' will be relinked during installation"
- else
- if test "$fast_install" != no; then
- link_command="$finalize_var$compile_command$finalize_rpath"
- if test "$fast_install" = yes; then
- relink_command=`$ECHO "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
- else
- # fast_install is set to needless
- relink_command=
- fi
- else
- link_command="$compile_var$compile_command$compile_rpath"
- relink_command="$finalize_var$finalize_command$finalize_rpath"
- fi
- fi
-
- # Replace the output file specification.
- link_command=`$ECHO "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
-
- # Delete the old output files.
- $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname
-
- func_show_eval "$link_command" 'exit $?'
-
- # Now create the wrapper script.
- func_verbose "creating $output"
-
- # Quote the relink command for shipping.
- if test -n "$relink_command"; then
- # Preserve any variables that may affect compiler behavior
- for var in $variables_saved_for_relink; do
- if eval test -z \"\${$var+set}\"; then
- relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
- elif eval var_value=\$$var; test -z "$var_value"; then
- relink_command="$var=; export $var; $relink_command"
- else
- func_quote_for_eval "$var_value"
- relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
- fi
- done
- relink_command="(cd `pwd`; $relink_command)"
- relink_command=`$ECHO "X$relink_command" | $Xsed -e "$sed_quote_subst"`
- fi
-
- # Quote $ECHO for shipping.
- if test "X$ECHO" = "X$SHELL $progpath --fallback-echo"; then
- case $progpath in
- [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $progpath --fallback-echo";;
- *) qecho="$SHELL `pwd`/$progpath --fallback-echo";;
- esac
- qecho=`$ECHO "X$qecho" | $Xsed -e "$sed_quote_subst"`
- else
- qecho=`$ECHO "X$ECHO" | $Xsed -e "$sed_quote_subst"`
- fi
-
- # Only actually do things if not in dry run mode.
- $opt_dry_run || {
- # win32 will think the script is a binary if it has
- # a .exe suffix, so we strip it off here.
- case $output in
- *.exe) func_stripname '' '.exe' "$output"
- output=$func_stripname_result ;;
- esac
- # test for cygwin because mv fails w/o .exe extensions
- case $host in
- *cygwin*)
- exeext=.exe
- func_stripname '' '.exe' "$outputname"
- outputname=$func_stripname_result ;;
- *) exeext= ;;
- esac
- case $host in
- *cygwin* | *mingw* )
- func_dirname_and_basename "$output" "" "."
- output_name=$func_basename_result
- output_path=$func_dirname_result
- cwrappersource="$output_path/$objdir/lt-$output_name.c"
- cwrapper="$output_path/$output_name.exe"
- $RM $cwrappersource $cwrapper
- trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15
-
- func_emit_cwrapperexe_src > $cwrappersource
-
- # The wrapper executable is built using the $host compiler,
- # because it contains $host paths and files. If cross-
- # compiling, it, like the target executable, must be
- # executed on the $host or under an emulation environment.
- $opt_dry_run || {
- $LTCC $LTCFLAGS -o $cwrapper $cwrappersource
- $STRIP $cwrapper
- }
-
- # Now, create the wrapper script for func_source use:
- func_ltwrapper_scriptname $cwrapper
- $RM $func_ltwrapper_scriptname_result
- trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15
- $opt_dry_run || {
- # note: this script will not be executed, so do not chmod.
- if test "x$build" = "x$host" ; then
- $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result
- else
- func_emit_wrapper no > $func_ltwrapper_scriptname_result
- fi
- }
- ;;
- * )
- $RM $output
- trap "$RM $output; exit $EXIT_FAILURE" 1 2 15
-
- func_emit_wrapper no > $output
- chmod +x $output
- ;;
- esac
- }
- exit $EXIT_SUCCESS
- ;;
- esac
-
- # See if we need to build an old-fashioned archive.
- for oldlib in $oldlibs; do
-
- if test "$build_libtool_libs" = convenience; then
- oldobjs="$libobjs_save $symfileobj"
- addlibs="$convenience"
- build_libtool_libs=no
- else
- if test "$build_libtool_libs" = module; then
- oldobjs="$libobjs_save"
- build_libtool_libs=no
- else
- oldobjs="$old_deplibs $non_pic_objects"
- if test "$preload" = yes && test -f "$symfileobj"; then
- oldobjs="$oldobjs $symfileobj"
- fi
- fi
- addlibs="$old_convenience"
- fi
-
- if test -n "$addlibs"; then
- gentop="$output_objdir/${outputname}x"
- generated="$generated $gentop"
-
- func_extract_archives $gentop $addlibs
- oldobjs="$oldobjs $func_extract_archives_result"
- fi
-
- # Do each command in the archive commands.
- if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
- cmds=$old_archive_from_new_cmds
- else
-
- # Add any objects from preloaded convenience libraries
- if test -n "$dlprefiles"; then
- gentop="$output_objdir/${outputname}x"
- generated="$generated $gentop"
-
- func_extract_archives $gentop $dlprefiles
- oldobjs="$oldobjs $func_extract_archives_result"
- fi
-
- # POSIX demands no paths to be encoded in archives. We have
- # to avoid creating archives with duplicate basenames if we
- # might have to extract them afterwards, e.g., when creating a
- # static archive out of a convenience library, or when linking
- # the entirety of a libtool archive into another (currently
- # not supported by libtool).
- if (for obj in $oldobjs
- do
- func_basename "$obj"
- $ECHO "$func_basename_result"
- done | sort | sort -uc >/dev/null 2>&1); then
- :
- else
- $ECHO "copying selected object files to avoid basename conflicts..."
- gentop="$output_objdir/${outputname}x"
- generated="$generated $gentop"
- func_mkdir_p "$gentop"
- save_oldobjs=$oldobjs
- oldobjs=
- counter=1
- for obj in $save_oldobjs
- do
- func_basename "$obj"
- objbase="$func_basename_result"
- case " $oldobjs " in
- " ") oldobjs=$obj ;;
- *[\ /]"$objbase "*)
- while :; do
- # Make sure we don't pick an alternate name that also
- # overlaps.
- newobj=lt$counter-$objbase
- func_arith $counter + 1
- counter=$func_arith_result
- case " $oldobjs " in
- *[\ /]"$newobj "*) ;;
- *) if test ! -f "$gentop/$newobj"; then break; fi ;;
- esac
- done
- func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj"
- oldobjs="$oldobjs $gentop/$newobj"
- ;;
- *) oldobjs="$oldobjs $obj" ;;
- esac
- done
- fi
- eval cmds=\"$old_archive_cmds\"
-
- func_len " $cmds"
- len=$func_len_result
- if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
- cmds=$old_archive_cmds
- else
- # the command line is too long to link in one step, link in parts
- func_verbose "using piecewise archive linking..."
- save_RANLIB=$RANLIB
- RANLIB=:
- objlist=
- concat_cmds=
- save_oldobjs=$oldobjs
- oldobjs=
- # Is there a better way of finding the last object in the list?
- for obj in $save_oldobjs
- do
- last_oldobj=$obj
- done
- eval test_cmds=\"$old_archive_cmds\"
- func_len " $test_cmds"
- len0=$func_len_result
- len=$len0
- for obj in $save_oldobjs
- do
- func_len " $obj"
- func_arith $len + $func_len_result
- len=$func_arith_result
- func_append objlist " $obj"
- if test "$len" -lt "$max_cmd_len"; then
- :
- else
- # the above command should be used before it gets too long
- oldobjs=$objlist
- if test "$obj" = "$last_oldobj" ; then
- RANLIB=$save_RANLIB
- fi
- test -z "$concat_cmds" || concat_cmds=$concat_cmds~
- eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\"
- objlist=
- len=$len0
- fi
- done
- RANLIB=$save_RANLIB
- oldobjs=$objlist
- if test "X$oldobjs" = "X" ; then
- eval cmds=\"\$concat_cmds\"
- else
- eval cmds=\"\$concat_cmds~\$old_archive_cmds\"
- fi
- fi
- fi
- func_execute_cmds "$cmds" 'exit $?'
- done
-
- test -n "$generated" && \
- func_show_eval "${RM}r$generated"
-
- # Now create the libtool archive.
- case $output in
- *.la)
- old_library=
- test "$build_old_libs" = yes && old_library="$libname.$libext"
- func_verbose "creating $output"
-
- # Preserve any variables that may affect compiler behavior
- for var in $variables_saved_for_relink; do
- if eval test -z \"\${$var+set}\"; then
- relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
- elif eval var_value=\$$var; test -z "$var_value"; then
- relink_command="$var=; export $var; $relink_command"
- else
- func_quote_for_eval "$var_value"
- relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
- fi
- done
- # Quote the link command for shipping.
- relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)"
- relink_command=`$ECHO "X$relink_command" | $Xsed -e "$sed_quote_subst"`
- if test "$hardcode_automatic" = yes ; then
- relink_command=
- fi
-
- # Only create the output if not a dry run.
- $opt_dry_run || {
- for installed in no yes; do
- if test "$installed" = yes; then
- if test -z "$install_libdir"; then
- break
- fi
- output="$output_objdir/$outputname"i
- # Replace all uninstalled libtool libraries with the installed ones
- newdependency_libs=
- for deplib in $dependency_libs; do
- case $deplib in
- *.la)
- func_basename "$deplib"
- name="$func_basename_result"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
- test -z "$libdir" && \
- func_fatal_error "\`$deplib' is not a valid libtool archive"
- newdependency_libs="$newdependency_libs $libdir/$name"
- ;;
- *) newdependency_libs="$newdependency_libs $deplib" ;;
- esac
- done
- dependency_libs="$newdependency_libs"
- newdlfiles=
-
- for lib in $dlfiles; do
- case $lib in
- *.la)
- func_basename "$lib"
- name="$func_basename_result"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
- test -z "$libdir" && \
- func_fatal_error "\`$lib' is not a valid libtool archive"
- newdlfiles="$newdlfiles $libdir/$name"
- ;;
- *) newdlfiles="$newdlfiles $lib" ;;
- esac
- done
- dlfiles="$newdlfiles"
- newdlprefiles=
- for lib in $dlprefiles; do
- case $lib in
- *.la)
- # Only pass preopened files to the pseudo-archive (for
- # eventual linking with the app. that links it) if we
- # didn't already link the preopened objects directly into
- # the library:
- func_basename "$lib"
- name="$func_basename_result"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
- test -z "$libdir" && \
- func_fatal_error "\`$lib' is not a valid libtool archive"
- newdlprefiles="$newdlprefiles $libdir/$name"
- ;;
- esac
- done
- dlprefiles="$newdlprefiles"
- else
- newdlfiles=
- for lib in $dlfiles; do
- case $lib in
- [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
- *) abs=`pwd`"/$lib" ;;
- esac
- newdlfiles="$newdlfiles $abs"
- done
- dlfiles="$newdlfiles"
- newdlprefiles=
- for lib in $dlprefiles; do
- case $lib in
- [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
- *) abs=`pwd`"/$lib" ;;
- esac
- newdlprefiles="$newdlprefiles $abs"
- done
- dlprefiles="$newdlprefiles"
- fi
- $RM $output
- # place dlname in correct position for cygwin
- tdlname=$dlname
- case $host,$output,$installed,$module,$dlname in
- *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;;
- esac
- $ECHO > $output "\
-# $outputname - a libtool library file
-# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-#
-# Please DO NOT delete this file!
-# It is necessary for linking the library.
-
-# The name that we can dlopen(3).
-dlname='$tdlname'
-
-# Names of this library.
-library_names='$library_names'
-
-# The name of the static archive.
-old_library='$old_library'
-
-# Linker flags that can not go in dependency_libs.
-inherited_linker_flags='$new_inherited_linker_flags'
-
-# Libraries that this one depends upon.
-dependency_libs='$dependency_libs'
-
-# Names of additional weak libraries provided by this library
-weak_library_names='$weak_libs'
-
-# Version information for $libname.
-current=$current
-age=$age
-revision=$revision
-
-# Is this an already installed library?
-installed=$installed
-
-# Should we warn about portability when linking against -modules?
-shouldnotlink=$module
-
-# Files to dlopen/dlpreopen
-dlopen='$dlfiles'
-dlpreopen='$dlprefiles'
-
-# Directory that this library needs to be installed in:
-libdir='$install_libdir'"
- if test "$installed" = no && test "$need_relink" = yes; then
- $ECHO >> $output "\
-relink_command=\"$relink_command\""
- fi
- done
- }
-
- # Do a symbolic link so that the libtool archive can be found in
- # LD_LIBRARY_PATH before the program is installed.
- func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?'
- ;;
- esac
- exit $EXIT_SUCCESS
-}
-
-{ test "$mode" = link || test "$mode" = relink; } &&
- func_mode_link ${1+"$@"}
-
-
-# func_mode_uninstall arg...
-func_mode_uninstall ()
-{
- $opt_debug
- RM="$nonopt"
- files=
- rmforce=
- exit_status=0
-
- # This variable tells wrapper scripts just to set variables rather
- # than running their programs.
- libtool_install_magic="$magic"
-
- for arg
- do
- case $arg in
- -f) RM="$RM $arg"; rmforce=yes ;;
- -*) RM="$RM $arg" ;;
- *) files="$files $arg" ;;
- esac
- done
-
- test -z "$RM" && \
- func_fatal_help "you must specify an RM program"
-
- rmdirs=
-
- origobjdir="$objdir"
- for file in $files; do
- func_dirname "$file" "" "."
- dir="$func_dirname_result"
- if test "X$dir" = X.; then
- objdir="$origobjdir"
- else
- objdir="$dir/$origobjdir"
- fi
- func_basename "$file"
- name="$func_basename_result"
- test "$mode" = uninstall && objdir="$dir"
-
- # Remember objdir for removal later, being careful to avoid duplicates
- if test "$mode" = clean; then
- case " $rmdirs " in
- *" $objdir "*) ;;
- *) rmdirs="$rmdirs $objdir" ;;
- esac
- fi
-
- # Don't error if the file doesn't exist and rm -f was used.
- if { test -L "$file"; } >/dev/null 2>&1 ||
- { test -h "$file"; } >/dev/null 2>&1 ||
- test -f "$file"; then
- :
- elif test -d "$file"; then
- exit_status=1
- continue
- elif test "$rmforce" = yes; then
- continue
- fi
-
- rmfiles="$file"
-
- case $name in
- *.la)
- # Possibly a libtool archive, so verify it.
- if func_lalib_p "$file"; then
- func_source $dir/$name
-
- # Delete the libtool libraries and symlinks.
- for n in $library_names; do
- rmfiles="$rmfiles $objdir/$n"
- done
- test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
-
- case "$mode" in
- clean)
- case " $library_names " in
- # " " in the beginning catches empty $dlname
- *" $dlname "*) ;;
- *) rmfiles="$rmfiles $objdir/$dlname" ;;
- esac
- test -n "$libdir" && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
- ;;
- uninstall)
- if test -n "$library_names"; then
- # Do each command in the postuninstall commands.
- func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
- fi
-
- if test -n "$old_library"; then
- # Do each command in the old_postuninstall commands.
- func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
- fi
- # FIXME: should reinstall the best remaining shared library.
- ;;
- esac
- fi
- ;;
-
- *.lo)
- # Possibly a libtool object, so verify it.
- if func_lalib_p "$file"; then
-
- # Read the .lo file
- func_source $dir/$name
-
- # Add PIC object to the list of files to remove.
- if test -n "$pic_object" &&
- test "$pic_object" != none; then
- rmfiles="$rmfiles $dir/$pic_object"
- fi
-
- # Add non-PIC object to the list of files to remove.
- if test -n "$non_pic_object" &&
- test "$non_pic_object" != none; then
- rmfiles="$rmfiles $dir/$non_pic_object"
- fi
- fi
- ;;
-
- *)
- if test "$mode" = clean ; then
- noexename=$name
- case $file in
- *.exe)
- func_stripname '' '.exe' "$file"
- file=$func_stripname_result
- func_stripname '' '.exe' "$name"
- noexename=$func_stripname_result
- # $file with .exe has already been added to rmfiles,
- # add $file without .exe
- rmfiles="$rmfiles $file"
- ;;
- esac
- # Do a test to see if this is a libtool program.
- if func_ltwrapper_p "$file"; then
- if func_ltwrapper_executable_p "$file"; then
- func_ltwrapper_scriptname "$file"
- relink_command=
- func_source $func_ltwrapper_scriptname_result
- rmfiles="$rmfiles $func_ltwrapper_scriptname_result"
- else
- relink_command=
- func_source $dir/$noexename
- fi
-
- # note $name still contains .exe if it was in $file originally
- # as does the version of $file that was added into $rmfiles
- rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
- if test "$fast_install" = yes && test -n "$relink_command"; then
- rmfiles="$rmfiles $objdir/lt-$name"
- fi
- if test "X$noexename" != "X$name" ; then
- rmfiles="$rmfiles $objdir/lt-${noexename}.c"
- fi
- fi
- fi
- ;;
- esac
- func_show_eval "$RM $rmfiles" 'exit_status=1'
- done
- objdir="$origobjdir"
-
- # Try to remove the ${objdir}s in the directories where we deleted files
- for dir in $rmdirs; do
- if test -d "$dir"; then
- func_show_eval "rmdir $dir >/dev/null 2>&1"
- fi
- done
-
- exit $exit_status
-}
-
-{ test "$mode" = uninstall || test "$mode" = clean; } &&
- func_mode_uninstall ${1+"$@"}
-
-test -z "$mode" && {
- help="$generic_help"
- func_fatal_help "you must specify a MODE"
-}
-
-test -z "$exec_cmd" && \
- func_fatal_help "invalid operation mode \`$mode'"
-
-if test -n "$exec_cmd"; then
- eval exec "$exec_cmd"
- exit $EXIT_FAILURE
-fi
-
-exit $exit_status
-
-
-# The TAGs below are defined such that we never get into a situation
-# in which we disable both kinds of libraries. Given conflicting
-# choices, we go for a static library, that is the most portable,
-# since we can't tell whether shared libraries were disabled because
-# the user asked for that or because the platform doesn't support
-# them. This is particularly important on AIX, because we don't
-# support having both static and shared libraries enabled at the same
-# time on that platform, so we default to a shared-only configuration.
-# If a disable-shared tag is given, we'll fallback to a static-only
-# configuration. But we'll never go from static-only to shared-only.
-
-# ### BEGIN LIBTOOL TAG CONFIG: disable-shared
-build_libtool_libs=no
-build_old_libs=yes
-# ### END LIBTOOL TAG CONFIG: disable-shared
-
-# ### BEGIN LIBTOOL TAG CONFIG: disable-static
-build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac`
-# ### END LIBTOOL TAG CONFIG: disable-static
-
-# Local Variables:
-# mode:shell-script
-# sh-indentation:2
-# End:
-# vi:sw=2
-
diff --git a/scripts/training/MGIZA/manual-compile/compile.sh b/scripts/training/MGIZA/manual-compile/compile.sh
deleted file mode 100755
index 1120901..0000000
--- a/scripts/training/MGIZA/manual-compile/compile.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#GCC=gcc
-#GPP=g++
-#LDFLAGS="-static"
-
-# mac, 'cos OSX doesn't support static linking and other such nonsense
-GCC=gcc-mp-4.5
-GPP=g++-mp-4.5
-
-SRC_DIR=/Users/hieuhoang/workspace/mgizapp/trunk/mgizapp/src
-BOOST_ROOT=/Users/hieuhoang/workspace/boost/boost_1_52_0
-BOOST_LIBRARYDIR=/Users/hieuhoang/workspace/boost/boost_1_52_0/lib64/
-
-
-rm *.o libmgiza.a d4norm hmmnorm mgiza plain2snt snt2cooc snt2cooc-reduce-mem-preprocess snt2plain symal mkcls
-
-$GPP -I$SRC_DIR -I$BOOST_ROOT/include -c -fPIC \
- $SRC_DIR/alignment.cpp \
- $SRC_DIR/AlignTables.cpp \
- $SRC_DIR/ATables.cpp \
- $SRC_DIR/collCounts.cpp \
- $SRC_DIR/Dictionary.cpp \
- $SRC_DIR/ForwardBackward.cpp \
- $SRC_DIR/getSentence.cpp \
- $SRC_DIR/hmm.cpp \
- $SRC_DIR/HMMTables.cpp \
- $SRC_DIR/logprob.cpp \
- $SRC_DIR/model1.cpp \
- $SRC_DIR/model2.cpp \
- $SRC_DIR/model2to3.cpp \
- $SRC_DIR/model345-peg.cpp \
- $SRC_DIR/model3.cpp \
- $SRC_DIR/model3_viterbi.cpp \
- $SRC_DIR/model3_viterbi_with_tricks.cpp \
- $SRC_DIR/MoveSwapMatrix.cpp \
- $SRC_DIR/myassert.cpp \
- $SRC_DIR/NTables.cpp \
- $SRC_DIR/Parameter.cpp \
- $SRC_DIR/parse.cpp \
- $SRC_DIR/Perplexity.cpp \
- $SRC_DIR/reports.cpp \
- $SRC_DIR/SetArray.cpp \
- $SRC_DIR/transpair_model3.cpp \
- $SRC_DIR/transpair_model4.cpp \
- $SRC_DIR/transpair_model5.cpp \
- $SRC_DIR/TTables.cpp \
- $SRC_DIR/utility.cpp \
- $SRC_DIR/vocab.cpp
-
-$GCC -c -fPIC $SRC_DIR/cmd.c
-
-ar rvs libmgiza.a *.o
-
-$GPP -o d4norm $SRC_DIR/d4norm.cxx $LDFLAGS -I$BOOST_ROOT -I$SRC_DIR -L. -lmgiza -L$BOOST_LIBRARYDIR -lboost_system-mt -lboost_thread-mt -lpthread
-
-$GPP -o hmmnorm $SRC_DIR/hmmnorm.cxx $LDFLAGS -I$BOOST_ROOT -I$SRC_DIR ./libmgiza.a -L$BOOST_LIBRARYDIR -lboost_system-mt -lboost_thread-mt -lpthread
-
-$GPP -o mgiza $SRC_DIR/main.cpp $LDFLAGS -I$BOOST_ROOT -I$SRC_DIR ./libmgiza.a -L$BOOST_LIBRARYDIR -lboost_system-mt -lboost_thread-mt -lpthread
-
-$GPP -o plain2snt $SRC_DIR/plain2snt.cpp
-
-$GPP -o snt2cooc $SRC_DIR/snt2cooc.cpp
-
-$GPP -o snt2cooc-reduce-mem-preprocess $SRC_DIR/snt2cooc-reduce-mem-preprocess.cpp
-
-$GPP -o snt2plain $SRC_DIR/snt2plain.cpp
-
-$GPP -o symal $SRC_DIR/symal.cpp $LDFLAGS -I$BOOST_ROOT -I$SRC_DIR ./libmgiza.a -L$BOOST_LIBRARYDIR -lboost_system-mt -lboost_thread-mt -lpthread
-
-$GPP -I$SRC_DIR/mkcls -o mkcls $SRC_DIR/mkcls/mkcls.cpp $SRC_DIR/mkcls/general.cpp $SRC_DIR/mkcls/KategProblemKBC.cpp $SRC_DIR/mkcls/KategProblem.cpp $SRC_DIR/mkcls/Problem.cpp $SRC_DIR/mkcls/ProblemTest.cpp $SRC_DIR/mkcls/IterOptimization.cpp $SRC_DIR/mkcls/StatVar.cpp $SRC_DIR/mkcls/TAOptimization.cpp $SRC_DIR/mkcls/SAOptimization.cpp $SRC_DIR/mkcls/GDAOptimization.cpp $SRC_DIR/mkcls/MYOptimization.cpp $SRC_DIR/mkcls/RRTOptimization.cpp $SRC_DIR/mkcls/HCOptimization.cpp $SRC_DIR/mkcls/Optimization.cpp $SRC_DIR/mkcls/KategProblemWBC.cpp $SRC_DIR/mkcls/KategProblemTest.cpp
-
-
diff --git a/scripts/training/MGIZA/manual-compile/transfer.sh b/scripts/training/MGIZA/manual-compile/transfer.sh
deleted file mode 100755
index b635396..0000000
--- a/scripts/training/MGIZA/manual-compile/transfer.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-scp mgiza symal snt2plain snt2cooc-reduce-mem-preprocess snt2cooc plain2snt mkcls hmmnorm d4norm ../scripts/merge_alignment.py s0565741@thor:/home/s0565741/RELEASE-1.0/binaries/mac-64bit/training-tools/mgiza
-
diff --git a/scripts/training/MGIZA/mgiza.anjuta b/scripts/training/MGIZA/mgiza.anjuta
deleted file mode 100644
index 2a7dcf1..0000000
--- a/scripts/training/MGIZA/mgiza.anjuta
+++ /dev/null
@@ -1,37 +0,0 @@
-<?xml version="1.0"?>
-<anjuta>
- <plugin name="GBF Project Manager"
- url="http://anjuta.org/plugins/"
- mandatory="yes">
- <require group="Anjuta Plugin"
- attribute="Interfaces"
- value="IAnjutaProjectManager"/>
- <require group="Project"
- attribute="Supported-Project-Types"
- value="automake"/>
- </plugin>
- <plugin name="Make Build System"
- url="http://anjuta.org/plugins/"
- mandatory="yes">
- <require group="Anjuta Plugin"
- attribute="Interfaces"
- value="IAnjutaBuildable"/>
- <require group="Build"
- attribute="Supported-Build-Types"
- value="make"/>
- </plugin>
- <plugin name="Task Manager"
- url="http://anjuta.org/plugins/"
- mandatory="no">
- <require group="Anjuta Plugin"
- attribute="Interfaces"
- value="IAnjutaTodo"/>
- </plugin>
- <plugin name="Debug Manager"
- url="http://anjuta.org/plugins/"
- mandatory="no">
- <require group="Anjuta Plugin"
- attribute="Interfaces"
- value="IAnjutaDebuggerManager"/>
- </plugin>
-</anjuta>
diff --git a/scripts/training/MGIZA/missing b/scripts/training/MGIZA/missing
deleted file mode 100755
index 1c8ff70..0000000
--- a/scripts/training/MGIZA/missing
+++ /dev/null
@@ -1,367 +0,0 @@
-#! /bin/sh
-# Common stub for a few missing GNU programs while installing.
-
-scriptversion=2006-05-10.23
-
-# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006
-# Free Software Foundation, Inc.
-# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-# 02110-1301, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-if test $# -eq 0; then
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
-fi
-
-run=:
-sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
-sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
-
-# In the cases where this matters, `missing' is being run in the
-# srcdir already.
-if test -f configure.ac; then
- configure_ac=configure.ac
-else
- configure_ac=configure.in
-fi
-
-msg="missing on your system"
-
-case $1 in
---run)
- # Try to run requested program, and just exit if it succeeds.
- run=
- shift
- "$@" && exit 0
- # Exit code 63 means version mismatch. This often happens
- # when the user try to use an ancient version of a tool on
- # a file that requires a minimum version. In this case we
- # we should proceed has if the program had been absent, or
- # if --run hadn't been passed.
- if test $? = 63; then
- run=:
- msg="probably too old"
- fi
- ;;
-
- -h|--h|--he|--hel|--help)
- echo "\
-$0 [OPTION]... PROGRAM [ARGUMENT]...
-
-Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
-error status if there is no known handling for PROGRAM.
-
-Options:
- -h, --help display this help and exit
- -v, --version output version information and exit
- --run try to run the given command, and emulate it if it fails
-
-Supported PROGRAM values:
- aclocal touch file \`aclocal.m4'
- autoconf touch file \`configure'
- autoheader touch file \`config.h.in'
- autom4te touch the output file, or create a stub one
- automake touch all \`Makefile.in' files
- bison create \`y.tab.[ch]', if possible, from existing .[ch]
- flex create \`lex.yy.c', if possible, from existing .c
- help2man touch the output file
- lex create \`lex.yy.c', if possible, from existing .c
- makeinfo touch the output file
- tar try tar, gnutar, gtar, then tar without non-portable flags
- yacc create \`y.tab.[ch]', if possible, from existing .[ch]
-
-Send bug reports to <bug-automake@gnu.org>."
- exit $?
- ;;
-
- -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
- echo "missing $scriptversion (GNU Automake)"
- exit $?
- ;;
-
- -*)
- echo 1>&2 "$0: Unknown \`$1' option"
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
- ;;
-
-esac
-
-# Now exit if we have it, but it failed. Also exit now if we
-# don't have it and --version was passed (most likely to detect
-# the program).
-case $1 in
- lex|yacc)
- # Not GNU programs, they don't have --version.
- ;;
-
- tar)
- if test -n "$run"; then
- echo 1>&2 "ERROR: \`tar' requires --run"
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- exit 1
- fi
- ;;
-
- *)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- # Could not run --version or --help. This is probably someone
- # running `$TOOL --version' or `$TOOL --help' to check whether
- # $TOOL exists and not knowing $TOOL uses missing.
- exit 1
- fi
- ;;
-esac
-
-# If it does not exist, or fails to run (possibly an outdated version),
-# try to emulate it.
-case $1 in
- aclocal*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acinclude.m4' or \`${configure_ac}'. You might want
- to install the \`Automake' and \`Perl' packages. Grab them from
- any GNU archive site."
- touch aclocal.m4
- ;;
-
- autoconf)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`${configure_ac}'. You might want to install the
- \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
- archive site."
- touch configure
- ;;
-
- autoheader)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acconfig.h' or \`${configure_ac}'. You might want
- to install the \`Autoconf' and \`GNU m4' packages. Grab them
- from any GNU archive site."
- files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
- test -z "$files" && files="config.h"
- touch_files=
- for f in $files; do
- case $f in
- *:*) touch_files="$touch_files "`echo "$f" |
- sed -e 's/^[^:]*://' -e 's/:.*//'`;;
- *) touch_files="$touch_files $f.in";;
- esac
- done
- touch $touch_files
- ;;
-
- automake*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
- You might want to install the \`Automake' and \`Perl' packages.
- Grab them from any GNU archive site."
- find . -type f -name Makefile.am -print |
- sed 's/\.am$/.in/' |
- while read f; do touch "$f"; done
- ;;
-
- autom4te)
- echo 1>&2 "\
-WARNING: \`$1' is needed, but is $msg.
- You might have modified some files without having the
- proper tools for further handling them.
- You can get \`$1' as part of \`Autoconf' from any GNU
- archive site."
-
- file=`echo "$*" | sed -n "$sed_output"`
- test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo "#! /bin/sh"
- echo "# Created by GNU Automake missing as a replacement of"
- echo "# $ $@"
- echo "exit 0"
- chmod +x $file
- exit 1
- fi
- ;;
-
- bison|yacc)
- echo 1>&2 "\
-WARNING: \`$1' $msg. You should only need it if
- you modified a \`.y' file. You may need the \`Bison' package
- in order for those modifications to take effect. You can get
- \`Bison' from any GNU archive site."
- rm -f y.tab.c y.tab.h
- if test $# -ne 1; then
- eval LASTARG="\${$#}"
- case $LASTARG in
- *.y)
- SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
- if test -f "$SRCFILE"; then
- cp "$SRCFILE" y.tab.c
- fi
- SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
- if test -f "$SRCFILE"; then
- cp "$SRCFILE" y.tab.h
- fi
- ;;
- esac
- fi
- if test ! -f y.tab.h; then
- echo >y.tab.h
- fi
- if test ! -f y.tab.c; then
- echo 'main() { return 0; }' >y.tab.c
- fi
- ;;
-
- lex|flex)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.l' file. You may need the \`Flex' package
- in order for those modifications to take effect. You can get
- \`Flex' from any GNU archive site."
- rm -f lex.yy.c
- if test $# -ne 1; then
- eval LASTARG="\${$#}"
- case $LASTARG in
- *.l)
- SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
- if test -f "$SRCFILE"; then
- cp "$SRCFILE" lex.yy.c
- fi
- ;;
- esac
- fi
- if test ! -f lex.yy.c; then
- echo 'main() { return 0; }' >lex.yy.c
- fi
- ;;
-
- help2man)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a dependency of a manual page. You may need the
- \`Help2man' package in order for those modifications to take
- effect. You can get \`Help2man' from any GNU archive site."
-
- file=`echo "$*" | sed -n "$sed_output"`
- test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo ".ab help2man is required to generate this page"
- exit 1
- fi
- ;;
-
- makeinfo)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.texi' or \`.texinfo' file, or any other file
- indirectly affecting the aspect of the manual. The spurious
- call might also be the consequence of using a buggy \`make' (AIX,
- DU, IRIX). You might want to install the \`Texinfo' package or
- the \`GNU make' package. Grab either from any GNU archive site."
- # The file to touch is that specified with -o ...
- file=`echo "$*" | sed -n "$sed_output"`
- test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
- if test -z "$file"; then
- # ... or it is the one specified with @setfilename ...
- infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
- file=`sed -n '
- /^@setfilename/{
- s/.* \([^ ]*\) *$/\1/
- p
- q
- }' $infile`
- # ... or it is derived from the source name (dir/f.texi becomes f.info)
- test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
- fi
- # If the file does not exist, the user really needs makeinfo;
- # let's fail without touching anything.
- test -f $file || exit 1
- touch $file
- ;;
-
- tar)
- shift
-
- # We have already tried tar in the generic part.
- # Look for gnutar/gtar before invocation to avoid ugly error
- # messages.
- if (gnutar --version > /dev/null 2>&1); then
- gnutar "$@" && exit 0
- fi
- if (gtar --version > /dev/null 2>&1); then
- gtar "$@" && exit 0
- fi
- firstarg="$1"
- if shift; then
- case $firstarg in
- *o*)
- firstarg=`echo "$firstarg" | sed s/o//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- case $firstarg in
- *h*)
- firstarg=`echo "$firstarg" | sed s/h//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- fi
-
- echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
- You may want to install GNU tar or Free paxutils, or check the
- command line arguments."
- exit 1
- ;;
-
- *)
- echo 1>&2 "\
-WARNING: \`$1' is needed, and is $msg.
- You might have modified some files without having the
- proper tools for further handling them. Check the \`README' file,
- it often tells you about the needed prerequisites for installing
- this package. You may also peek at any GNU archive site, in case
- some other package would contain this missing \`$1' program."
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/scripts/training/MGIZA/scripts/CMakeLists.txt b/scripts/training/MGIZA/scripts/CMakeLists.txt
deleted file mode 100644
index 72d0f33..0000000
--- a/scripts/training/MGIZA/scripts/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-INSTALL(PROGRAMS force-align-moses.sh giza2bal.pl merge_alignment.py plain2snt-hasvcb.py sntpostproc.py force-align-moses-old.sh run.sh snt2cooc.pl
- DESTINATION scripts
- PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE
- )
diff --git a/scripts/training/MGIZA/scripts/TrimBlanks.sh b/scripts/training/MGIZA/scripts/TrimBlanks.sh
deleted file mode 100755
index 78bd28b..0000000
--- a/scripts/training/MGIZA/scripts/TrimBlanks.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-sed -e 's/^[ \t]*//' -e 's/[ \t][ \t]*/ /g' -e 's/[ \t]*$//'
-
diff --git a/scripts/training/MGIZA/scripts/force-align-moses-old.sh b/scripts/training/MGIZA/scripts/force-align-moses-old.sh
deleted file mode 100755
index fd4cf12..0000000
--- a/scripts/training/MGIZA/scripts/force-align-moses-old.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env bash
-
-MGIZA=${QMT_HOME}/bin/mgiza
-
-if [ $# -lt 4 ]; then
- echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2
- echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2
- echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2
- echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2
- exit
-fi
-
-PRE=$1
-SRC=$2
-TGT=$3
-ROOT=$4
-
-mkdir -p $ROOT/giza.${SRC}-${TGT}
-mkdir -p $ROOT/giza.${TGT}-${SRC}
-mkdir -p $ROOT/corpus
-
-echo "Generating corpus file " 1>&2
-
-${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb
-
-ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/
-
-echo "Generating co-occurrence file " 1>&2
-
-${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt
-${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt
-
-echo "Running force alignment " 1>&2
-
-$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \
--s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \
--restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
--previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
--previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
--previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
-
-$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \
--s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \
--restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
--previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
--previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
--previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1
-
diff --git a/scripts/training/MGIZA/scripts/force-align-moses.sh b/scripts/training/MGIZA/scripts/force-align-moses.sh
deleted file mode 100755
index ac95bcb..0000000
--- a/scripts/training/MGIZA/scripts/force-align-moses.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-
-MGIZA=${QMT_HOME}/bin/mgiza
-
-if [ $# -lt 4 ]; then
- echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2
- echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2
- echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2
- echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2
- exit
-fi
-
-PRE=$1
-SRC=$2
-TGT=$3
-ROOT=$4
-NUM=$5
-
-mkdir -p $ROOT/giza-inverse.${NUM}
-mkdir -p $ROOT/giza.${NUM}
-mkdir -p $ROOT/prepared.${NUM}
-
-echo "Generating corpus file " 1>&2
-
-${QMT_HOME}/scripts/plain2snt-hasvcb.py prepared.${NUM}/$SRC.vcb prepared.${NUM}/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/prepared.${NUM}/${TGT}-${SRC}.snt $ROOT/prepared.${NUM}/${SRC}-${TGT}.snt $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/$TGT.vcb
-
-ln -sf $PWD/prepared.${NUM}/$SRC.vcb.classes $PWD/prepared.${NUM}/$TGT.vcb.classes $ROOT/prepared.${NUM}/
-
-echo "Generating co-occurrence file " 1>&2
-
-${QMT_HOME}/bin/snt2cooc $ROOT/giza.${NUM}/$TGT-${SRC}.cooc $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/$TGT.vcb $ROOT/prepared.${NUM}/${TGT}-${SRC}.snt
-${QMT_HOME}/bin//snt2cooc $ROOT/giza-inverse.${NUM}/$SRC-${TGT}.cooc $ROOT/prepared.${NUM}/$TGT.vcb $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/${SRC}-${TGT}.snt
-
-echo "Running force alignment " 1>&2
-
-$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/prepared.${NUM}/$TGT-$SRC.snt -o $ROOT/giza.${NUM}/$TGT-${SRC} \
--s $ROOT/prepared.${NUM}/$SRC.vcb -t $ROOT/prepared.${NUM}/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${NUM}/$TGT-${SRC}.cooc \
--restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
--previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
--previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
--previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
-
-$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/prepared.${NUM}/$SRC-$TGT.snt -o $ROOT/giza-inverse.${NUM}/$SRC-${TGT} \
--s $ROOT/prepared.${NUM}/$TGT.vcb -t $ROOT/prepared.${NUM}/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza-inverse.${NUM}/$SRC-${TGT}.cooc \
--restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
--previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
--previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
--previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1
-
diff --git a/scripts/training/MGIZA/scripts/giza2bal.pl b/scripts/training/MGIZA/scripts/giza2bal.pl
deleted file mode 100755
index fb134c0..0000000
--- a/scripts/training/MGIZA/scripts/giza2bal.pl
+++ /dev/null
@@ -1,112 +0,0 @@
-#! /usr/bin/perl
-
-# $Id: giza2bal.pl 1562 2008-02-19 20:48:14Z redpony $
-#Converts direct and inverted alignments into a more compact
-#bi-alignment format. It optionally reads the counting file
-#produced by giza containing the frequency of each traning sentence.
-
-#Copyright Marcello Federico, November 2004
-
-($cnt,$dir,$inv)=();
-
-while ($w=shift @ARGV){
- $dir=shift(@ARGV),next if $w eq "-d";
- $inv=shift(@ARGV),next if $w eq "-i";
- $cnt=shift(@ARGV),next if $w eq "-c";
-}
-
-my $lc = 0;
-
-if (!$dir || !inv){
- print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
- print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
- exit(0);
-}
-
-$|=1;
-
-open(DIR,"<$dir") || open(DIR,"$dir|") || die "cannot open $dir\n";
-open(INV,"<$inv") || open(INV,"$inv|") || die "cannot open $dir\n";
-
-if ($cnt){
-open(CNT,"<$cnt") || open(CNT,"$cnt|") || die "cannot open $dir\n";
-}
-
-
-sub ReadBiAlign{
- local($fd0,$fd1,$fd2,*s1,*s2,*a,*b,*c)=@_;
- local($dummy,$n);
-
- chop($c=<$fd0>); ## count
- $dummy=<$fd0>; ## header
- $dummy=<$fd0>; ## header
- $c=1 if !$c;
-
- $dummy=<$fd1>; ## header
- chop($s1=<$fd1>);
- chop($t1=<$fd1>);
-
- $dummy=<$fd2>; ## header
- chop($s2=<$fd2>);
- chop($t2=<$fd2>);
-
- @a=@b=();
- $lc++;
-
- #get target statistics
- $n=1;
- $t1=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
- while ($t1=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
- grep($a[$_]=$n,split(/\s+/,$2));
- $n++;
- }
-
- $m=1;
- $t2=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
- while ($t2=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
- grep($b[$_]=$m,split(/\s+/,$2));
- $m++;
- }
-
- $M=split(/\s+/,$s1);
- $N=split(/\s+/,$s2);
-
- if ($m != ($M+1) || $n != ($N+1)) {
- print STDERR "Sentence mismatch error! Line #$lc\n";
- $s1 = "ALIGN_ERR";
- $s2 = "ALIGN_ERR";
- @a=(); @b=();
- for ($j=1;$j<2;$j++){ $a[$j]=1; }
- for ($i=1;$i<2;$i++){ $b[$i]=1; }
- return 1;
- }
-
- for ($j=1;$j<$m;$j++){
- $a[$j]=0 if !$a[$j];
- }
-
- for ($i=1;$i<$n;$i++){
- $b[$i]=0 if !$b[$i];
- }
-
-
- return 1;
-}
-
-$skip=0;
-$ccc=0;
-while(!eof(DIR)){
-
- if (ReadBiAlign(CNT,DIR,INV,*src,*tgt,*a,*b,*c))
- {
- $ccc++;
- print "$c\n";
- print $#a," $src \# @a[1..$#a]\n";
- print $#b," $tgt \# @b[1..$#b]\n";
- }
- else{
- print "\n";
- print STDERR "." if !(++$skip % 1000);
- }
-};
-print STDERR "skip=<$skip> counts=<$ccc>\n";
diff --git a/scripts/training/MGIZA/scripts/merge_alignment.py b/scripts/training/MGIZA/scripts/merge_alignment.py
deleted file mode 100755
index 626bc68..0000000
--- a/scripts/training/MGIZA/scripts/merge_alignment.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-# Author : Qin Gao
-# Date : Dec 31, 2007
-# Purpose: Combine multiple alignment files into a single one, the files are
-# prodcuced by MGIZA, which has sentence IDs, and every file is
-# ordered inside
-
-import sys
-import re
-
-if len(sys.argv)<2:
- sys.stderr.write("Provide me the file names (at least 2)\n");
- sys.exit();
-
-sent_id = 0;
-
-files = [];
-ids = [];
-
-sents = [];
-done = [];
-
-for i in range(1,len(sys.argv)):
- files.append(open(sys.argv[i],"r"));
- ids.append(0);
- sents.append("");
- done.append(False);
-
-r = re.compile("\\((\\d+)\\)");
-i = 0;
-while i< len(files):
- st1 = files[i].readline();
- st2 = files[i].readline();
- st3 = files[i].readline();
- if len(st1)==0 or len(st2)==0 or len(st3)==0:
- done[i] = True;
- else:
- mt = r.search(st1);
- id = int(mt.group(1));
- ids[i] = id;
- sents[i] = (st1, st2, st3);
- i += 1
-
-cont = True;
-while (cont):
- sent_id += 1;
- writeOne = False;
-# Now try to read more sentences
- i = 0;
- cont = False;
- while i < len(files):
- if done[i]:
- i+=1
- continue;
- cont = True;
- if ids[i] == sent_id:
- sys.stdout.write("%s%s%s"%(sents[i][0],sents[i][1],sents[i][2]));
- writeOne = True;
- st1 = files[i].readline();
- st2 = files[i].readline();
- st3 = files[i].readline();
- if len(st1)==0 or len(st2)==0 or len(st3)==0:
- done[i] = True;
- else:
- mt = r.search(st1);
- id = int(mt.group(1));
- ids[i] = id;
- sents[i] = (st1, st2, st3);
- cont = True;
- break;
- elif ids[i] < sent_id:
- sys.stderr.write("ERROR! DUPLICATED ENTRY %d\n" % ids[i]);
- sys.exit();
- else:
- cont = True;
- i+=1;
- if (not writeOne) and cont:
- sys.stderr.write("ERROR! MISSING ENTRy %d\n" % sent_id);
- #sys.exit();
-sys.stderr.write("Combined %d files, totally %d sents \n" %(len(files),sent_id-1));
diff --git a/scripts/training/MGIZA/scripts/plain2snt-hasvcb.py b/scripts/training/MGIZA/scripts/plain2snt-hasvcb.py
deleted file mode 100755
index 490c493..0000000
--- a/scripts/training/MGIZA/scripts/plain2snt-hasvcb.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-
-from sys import *
-
-def loadvcb(fname,out):
- dict={};
- df = open(fname,"r");
- for line in df:
- out.write(line);
- ws = line.strip().split();
- id = int(ws[0]);
- wd = ws[1];
- dict[wd]=id;
- return dict;
-
-if len(argv)<9:
- stderr.write("Error, the input should be \n");
- stderr.write("%s evcb fvcb etxt ftxt esnt(out) fsnt(out) evcbx(out) fvcbx(out)\n" % argv[0]);
- stderr.write("You should concatenate the evcbx and fvcbx to existing vcb files\n");
- exit();
-
-ein = open(argv[3],"r");
-fin = open(argv[4],"r");
-
-eout = open(argv[5],"w");
-fout = open(argv[6],"w");
-
-evcbx = open(argv[7],"w");
-fvcbx = open(argv[8],"w");
-evcb = loadvcb(argv[1],evcbx);
-fvcb = loadvcb(argv[2],fvcbx);
-
-i=0
-while True:
- i+=1;
- eline=ein.readline();
- fline=fin.readline();
- if len(eline)==0 or len(fline)==0:
- break;
- ewords = eline.strip().split();
- fwords = fline.strip().split();
- el = [];
- fl = [];
- j=0;
- for w in ewords:
- j+=1
- if evcb.has_key(w):
- el.append(evcb[w]);
- else:
- if evcb.has_key(w.lower()):
- el.append(evcb[w.lower()]);
- else:
- ##stdout.write("#E %d %d %s\n" % (i,j,w))
- #el.append(1);
- nid = len(evcb)+1;
- evcb[w.lower()] = nid;
- evcbx.write("%d %s 1\n" % (nid, w));
- el.append(nid);
-
- j=0;
- for w in fwords:
- j+=1
- if fvcb.has_key(w):
- fl.append(fvcb[w]);
- else:
- if fvcb.has_key(w.lower()):
- fl.append(fvcb[w.lower()]);
- else:
- #stdout.write("#F %d %d %s\n" % (i,j,w))
- nid = len(fvcb)+1;
- fvcb[w.lower()] = nid;
- fvcbx.write("%d %s 1\n" % (nid, w));
- fl.append(nid);
- #fl.append(1);
- eout.write("1\n");
- fout.write("1\n");
- for I in el:
- eout.write("%d " % I);
- eout.write("\n");
- for I in fl:
- eout.write("%d " % I);
- fout.write("%d " % I);
- eout.write("\n");
- fout.write("\n");
- for I in el:
- fout.write("%d " % I);
- fout.write("\n");
-
-fout.close();
-eout.close();
-fvcbx.close();
-evcbx.close();
-
diff --git a/scripts/training/MGIZA/scripts/run.sh b/scripts/training/MGIZA/scripts/run.sh
deleted file mode 100755
index 2bb3972..0000000
--- a/scripts/training/MGIZA/scripts/run.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-PRE=test
-SRC=fr
-TGT=en
-NUM=1
-SCRIPT_DIR=/opt/AO/sw/edinburgh-code/scripts-20110926-1425
-
-export QMT_HOME=/root/workspace/mgizapp
-
-rm -rf out
-
-$QMT_HOME/scripts/force-align-moses.sh $PRE $SRC $TGT out 1
-
-echo "FINISHED forced alignment"
-
-$SCRIPT_DIR/../merge_alignment.py out/giza-inverse.$NUM/$SRC-$TGT.A3.final.part* | gzip -c > out/giza-inverse.$NUM/$SRC-$TGT.A3.final.gz
-$SCRIPT_DIR/../merge_alignment.py out/giza.$NUM/$TGT-$SRC.A3.final.part* | gzip -c > out/giza.$NUM/$TGT-$SRC.A3.final.gz
-
-$SCRIPT_DIR/training/symal/giza2bal.pl -d "gzip -cd out/giza.$NUM/$TGT-$SRC.A3.final.gz" -i "gzip -cd out/giza-inverse.$NUM/$SRC-$TGT.A3.final.gz" | $SCRIPT_DIR/training/symal/symal -alignment="grow" -diagonal="yes" -final="yes" -both="yes" > out/aligned.1.grow-diag-final-and
-
-echo "FINISHED giza2bal & symal"
-
-$SCRIPT_DIR/training/phrase-extract/extract $PRE.$TGT $PRE.$SRC out/aligned.1.grow-diag-final-and out/extract.1 7 orientation --model wbe-msd
-
diff --git a/scripts/training/MGIZA/scripts/snt2cooc.pl b/scripts/training/MGIZA/scripts/snt2cooc.pl
deleted file mode 100755
index 5bbefab..0000000
--- a/scripts/training/MGIZA/scripts/snt2cooc.pl
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/perl -w
-
-# sntcooc.perl [-sort-buffer-size 200M] [-sort-batch-size 253] [-sort-compress gzip] output vcb1 vcb2 snt12
-
-use strict;
-use File::Basename;
-use FindBin qw($Bin);
-
-sub systemCheck($);
-
-my $sortArgs = "";
-for (my $i = 0; $i < (@ARGV - 4); ++$i)
-{
- my $arg = $ARGV[$i];
- if ($arg eq "-sort-buffer-size")
- {
- $sortArgs .= " -S " .$ARGV[++$i];
- }
- elsif ($arg eq "-sort-batch-size")
- {
- $sortArgs .= " --batch-size " .$ARGV[++$i];
- }
- elsif ($arg eq "-sort-compress")
- {
- $sortArgs .= " --compress-program " .$ARGV[++$i];
- }
-}
-
-my $out = $ARGV[@ARGV - 4];
-my $vcb1 = $ARGV[@ARGV - 3];
-my $vcb2 = $ARGV[@ARGV - 2];
-my $snt12 = $ARGV[@ARGV - 1];
-
-my $SORT_EXEC = `gsort --help 2>/dev/null`;
-if($SORT_EXEC) {
- $SORT_EXEC = 'gsort';
-}
-else {
- $SORT_EXEC = 'sort';
-}
-
-my $TMPDIR=dirname($out);
-
-my $cmd;
-$cmd = "$Bin/snt2coocrmp $vcb1 $vcb2 $snt12 ";
-$cmd .= "| $SORT_EXEC $sortArgs -T $TMPDIR -nk 1 -nk 2 | uniq > $out";
-systemCheck($cmd);
-
-#############################
-
-sub systemCheck($)
-{
- my $cmd = shift;
- print STDERR "Executing $cmd \n";
-
- my $retVal = system($cmd);
- if ($retVal != 0)
- {
- exit(1);
- }
-}
-
diff --git a/scripts/training/MGIZA/scripts/sntpostproc.py b/scripts/training/MGIZA/scripts/sntpostproc.py
deleted file mode 100755
index b3bf528..0000000
--- a/scripts/training/MGIZA/scripts/sntpostproc.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-
-# This script post process the snt file -- either in single-line format or in multi-line format
-# The output, however, will always be in single-line format
-
-from sys import *
-from optparse import OptionParser
-import re;
-usage = """
-The script post process the snt file, the input could be single-line snt
-file or multi-line, (triple line) and can insert sentence weight to the
-file (-w) or add partial alignment to the file (-a)
-Usage %prog -s sntfile -w weight-file -a alignfile -o outputfile
-"""
-parser = OptionParser(usage=usage)
-
-
-parser = OptionParser()
-
-parser.add_option("-s", "--snt", dest="snt",default=None,
- help="The input snt file", metavar="FILE")
-
-parser.add_option("-w", "--weight", dest="weight",default=None,
- help="The input weight file", metavar="FILE")
-
-
-parser.add_option("-o", "--output", dest="output",default="-",
- help="The input partial alignment file, one sentence per line", metavar="FILE")
-
-parser.add_option("-a", "--align", dest="align",default=None,
- help="The input partial alignment file, one sentence per line", metavar="FILE")
-
-
-(options, args) = parser.parse_args()
-
-if options.snt == None:
- parser.print_help();
- exit();
-else:
- sfile = open(options.snt,"r");
-
-if options.output=="-":
- ofile = stdout;
-else:
- ofile = open(options.output,"w");
-
-wfile = None;
-
-if options.weight <> None:
- wfile = open(options.weight,"r");
-
-afile = None;
-if options.align <> None:
- afile = open(options.align,"r");
-
-rr = re.compile("[\\|\\#\\*]");
-wt = 0.0;
-al = {};
-e = "";
-f = "";
-
-def parse_ax(line):
- alq = {};
- als = line.strip().split(" ");
- for e in als:
- if len(e.strip())>0:
- alo = e.split("-");
- if len(alo)==2:
- alq[tuple(alo)] = 1;
- return alq;
-
-
-
-
-
-
-while True:
- l = sfile.readline();
- if len(l) == 0:
- break;
- lp = rr.split(l.strip());
- if len(lp)>=3:
- wt = float(lp[0]);
- e = lp[1];
- f = lp[2];
- if len(lp) > 3:
- al = parse_ax(lp[3]);
- else:
- al = {};
- else:
- wt = float(l);
- e = sfile.readline().strip();
- f = sfile.readline().strip();
- al={}
- if wfile <> None:
- lw = wfile.readline().strip();
- if len(lw)>0:
- wt = float(lw);
- else:
- wt = 1;
- if afile <> None:
- la = afile.readline().strip();
- if len(la)>0:
- al1 = parse_ax(la);
- for entry in al1.keys():
- al[entry] = 1;
-
- ofile.write("%g | %s | %s" % (wt, e, f));
- if len(al)>0:
- ofile.write(" |");
-
- for entry in al.keys():
- ofile.write(" %s-%s" % entry);
- ofile.write("\n");
-
-
diff --git a/scripts/training/MGIZA/scripts/symal.sh b/scripts/training/MGIZA/scripts/symal.sh
deleted file mode 100644
index a446beb..0000000
--- a/scripts/training/MGIZA/scripts/symal.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-OUTPUT=$1
-shift
-GIZA2BAL=$1
-shift
-SYMAL=$1
-shift
-STOT=$1
-shift
-TTOS=$1
-shift
-
-perl $GIZA2BAL -d ${STOT} -i ${TTOS} | $SYMAL $* > $OUTPUT
-
diff --git a/scripts/training/MGIZA/src/.cvsignore b/scripts/training/MGIZA/src/.cvsignore
deleted file mode 100644
index 6c5271f..0000000
--- a/scripts/training/MGIZA/src/.cvsignore
+++ /dev/null
@@ -1,17 +0,0 @@
-.libs
-.deps
-.*swp
-.nautilus-metafile.xml
-*.autosave
-*.bak
-*~
-#*#
-*.gladep
-*.la
-*.lo
-*.o
-*.class
-*.pyc
-*.plugin
-Makefile
-Makefile.in
diff --git a/scripts/training/MGIZA/src/ATables.cpp b/scripts/training/MGIZA/src/ATables.cpp
deleted file mode 100644
index 8cc4ad2..0000000
--- a/scripts/training/MGIZA/src/ATables.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "ATables.h"
-#include "Globals.h"
-#include "myassert.h"
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1);
-GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
-
-template <class VALTYPE>
-void amodel<VALTYPE>::printTable(const char *filename) const{
- // print amodel to file with the name filename (it'll be created or overwritten
- // format : for a table :
- // aj j l m val
- // where aj is source word pos, j target word pos, l source sentence length,
- // m is target sentence length.
- //
- //return;
- if (is_distortion)
- cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
- else
- cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
-
- ofstream of(filename);
- double ssum=0.0;
- for(WordIndex l=0; l < MaxSentLength; l++){
- for(WordIndex m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(WordIndex j=1;j<=M; j++){
- double sum=0.0;
- for(WordIndex i=0;i<=L; i++){
- VALTYPE x=getValue(i, j, L, M);
- if( x>PROB_SMOOTH ){
- of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
- sum+=x;
- }
- }
- ssum+=sum;
- }
- }else{
- for(WordIndex i=0;i<=L;i++){
- double sum=0.0;
- for(WordIndex j=1;j<=M;j++){
- VALTYPE x=getValue(j, i, L, M);
- if( x>PROB_SMOOTH ){
- of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
- sum+=x;
- }
- }
- ssum+=sum;
- }
- }
- }
- }
-}
-
-template <class VALTYPE>
-void amodel<VALTYPE>::printRealTable(const char *filename) const{
- // print amodel to file with the name filename (it'll be created or overwritten
- // format : for a table :
- // aj j l m val
- // where aj is source word pos, j target word pos, l source sentence length,
- // m is target sentence length.
- //
- //return;
- if (is_distortion)
- cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
- else
- cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
-
- ofstream of(filename);
- for(WordIndex l=0; l < MaxSentLength; l++){
- for(WordIndex m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(WordIndex j=1;j<=M; j++){
- for(WordIndex i=0;i<=L; i++){
- VALTYPE x=getValue(i, j, L, M);
- if( x>MINCOUNTINCREASE )
- of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
- }
- }
- }else{
- for(WordIndex i=0;i<=L;i++){
- for(WordIndex j=1;j<=M;j++){
- VALTYPE x=getValue(j, i, L, M);
- if( x>MINCOUNTINCREASE )
- of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
- }
- }
- }
- }
- }
-}
-
-extern short NoEmptyWord;
-
-template <class VALTYPE>
-bool amodel<VALTYPE>::readTable(const char *filename){
- /* This function reads the a table from a file.
- Each line is of the format: aj j l m val
- where aj is the source word position, j the target word position,
- l the source sentence length, and m the target sentence length
-
- This function also works for a d table, where the positions
- of aj and i are swapped. Both the a and d tables are 4 dimensional
- hashes; this function will simply read in the four values and keep
- them in that order when hashing the fifth value.
- NAS, 7/11/99
- */
- ifstream inf(filename);
- cout << "Reading a/d table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename<<"\n";
- return false;
- }
- WordIndex w, x, l, m;
- VALTYPE prob;
- while(inf >> w >> x >> l >> m >> prob )
- // the NULL word is added to the length
- // of the sentence in the tables, but discount it when you write the tables.
- setValue(w, x, l, m, prob);
- return true;
-}
-
-template <class VALTYPE>
-bool amodel<VALTYPE>::readAugTable(const char *filename){
- /* This function reads the a table from a file.
- Each line is of the format: aj j l m val
- where aj is the source word position, j the target word position,
- l the source sentence length, and m the target sentence length
-
- This function also works for a d table, where the positions
- of aj and i are swapped. Both the a and d tables are 4 dimensional
- hashes; this function will simply read in the four values and keep
- them in that order when hashing the fifth value.
- NAS, 7/11/99
- */
- ifstream inf(filename);
- cout << "Reading a/d table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename<<"\n";
- return false;
- }
- WordIndex w, x, l, m;
- VALTYPE prob;
- while(inf >> w >> x >> l >> m >> prob )
- // the NULL word is added to the length
- // of the sentence in the tables, but discount it when you write the tables.
- addValue(w, x, l, m, prob);
- return true;
-}
-
-template <class VALTYPE>
-bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){
- cout << "start merging " <<"\n";
- for(WordIndex l=0; l < MaxSentLength; l++){
- for(WordIndex m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(WordIndex j=1;j<=M; j++){
- for(WordIndex i=0;i<=L; i++){
- VALTYPE x=am.getValue(i, j, L, M);
- addValue(i,j,L,M,x);
- }
- }
- }else{
- for(WordIndex i=0;i<=L;i++){
- for(WordIndex j=1;j<=M;j++){
- VALTYPE x=am.getValue(j, i, L, M);
- addValue(j,i,L,M,x);
- }
- }
- }
- }
- }
- return true;
-}
-
-
-template class amodel<COUNT> ;
-//template class amodel<PROB> ;
diff --git a/scripts/training/MGIZA/src/ATables.h b/scripts/training/MGIZA/src/ATables.h
deleted file mode 100644
index 9db77b1..0000000
--- a/scripts/training/MGIZA/src/ATables.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* --------------------------------------------------------------------------*
- * *
- * Module :ATables *
- * *
- * Prototypes File: ATables.h *
- * *
- * Objective: Defines clases and methods for handling I/O for distortion & *
- * alignment tables. *
- *****************************************************************************/
-
-#ifndef _atables_h
-#define _atables_h 1
-
-#include "defs.h"
-#include <cassert>
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include <fstream>
-#include "Array4.h"
-#include "myassert.h"
-#include "Globals.h"
-#include "syncObj.h"
-
-extern bool CompactADTable;
-extern float amodel_smooth_factor;
-extern short NoEmptyWord;
-
-/* ------------------- Class Defintions of amodel ---------------------------*/
-/* Class Name: amodel:
- Objective: This defines the underlying data structure for distortiont prob.
- and count tables. They are defined as a hash table. Each entry in the hash
- table is the probability (d(j/l,m,i), where j is word target position, i is
- source word position connected to it, m is target sentence length, and l is
- source sentence length) or count collected for it. The probability and the
- count are represented as log integer probability as
- defined by the class LogProb .
-
- This class is used to represents a Tables (probabiliity) and d (distortion)
- tables and also their corresponding count tables .
-
- *--------------------------------------------------------------------------*/
-
-inline int Mabs(int a){
- if(a<0)
- return -a;
- else
- return a;
-}
-
-template <class VALTYPE>
-class amodel{
-public:
- Array4<VALTYPE> a;
- bool is_distortion ;
- WordIndex MaxSentLength;
- bool ignoreL, ignoreM;
- VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const{
- massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );massert( (!is_distortion) || aj!=0 );
- massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
- massert( l<MaxSentLength );massert( m<MaxSentLength );
- return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
- }
-
- static float smooth_factor;
- amodel(bool flag = false)
- : a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH)
- {lock = new Mutex();};
-
- ~amodel(){delete lock;};
-
-protected:
- VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m){
- massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );
- massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
- massert( l<MaxSentLength );massert( m<MaxSentLength );
- return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
- }
-public:
- void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
- lock->lock();
- getRef(aj, j, l, m)=val;
- lock->unlock();
- }
-
- Mutex* lock;
-public:
- /**
- By Qin
- */
- void addValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
- lock->lock();
- getRef(aj, j, l, m)+=val;
- lock->unlock();
- }
- bool merge(amodel<VALTYPE>& am);
- VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const{
- if( is_distortion==0 )
- return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
- else
- return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
- }
-
- void printTable(const char* filename)const ;
- void printRealTable(const char* filename)const ;
- template<class COUNT>
- void normalize(amodel<COUNT>& aTable)const
- {
- WordIndex i, j, l, m ;
- COUNT total;
- int nParam=0;
- for(l=0;l<MaxSentLength;l++){
- for(m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(j=1;j<=M; j++){
- total=0.0;
- for(i=0;i<=L;i++){
- total+=get(i, j, L, M);
- }
- if( total ){
- for(i=0;i<=L;i++){
- nParam++;
- aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
- massert(aTable.getRef(i,j,L,M)<=1.0);
- if( NoEmptyWord&&i==0 )
- aTable.getRef(i,j,L,M)=0;
- }
- }
- }
- }else{
- for(i=0;i<=L;i++){
- total=0.0;
- for(j=1;j<=M;j++)
- total+=get(j, i, L, M);
- if( total )
- for(j=1;j<=M;j++){
- aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
- nParam++;
- massert(aTable.getRef(j,i,L,M)<=1.0);
- if( NoEmptyWord&&i==0 )
- aTable.getRef(j,i,L,M)=0;
- }
- }
- }
- }
- }
- cout << "A/D table contains " << nParam << " parameters.\n";
- }
-
- bool readTable(const char *filename);
- bool readAugTable(const char *filename);
- void clear()
- {a.clear();}
-};
-
-/* ------------------- End of amodel Class Definitions ----------------------*/
-
-#endif
diff --git a/scripts/training/MGIZA/src/AlignTables.cpp b/scripts/training/MGIZA/src/AlignTables.cpp
deleted file mode 100644
index 8c35b77..0000000
--- a/scripts/training/MGIZA/src/AlignTables.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "AlignTables.h"
-
-bool alignmodel::insert(Vector<WordIndex>& aj, LogProb val)
-{
- alignment_hash::iterator i;
- i = a.find(aj);
- if(i != a.end() || val <= 0)
- return false ;
- a.insert(pair<const Vector<WordIndex>, LogProb>(aj, val));
- return true ;
-}
-
-
-LogProb alignmodel::getValue(Vector<WordIndex>& align) const
-{
- const LogProb zero = 0.0 ;
- alignment_hash::const_iterator i;
- i = a.find(align);
- if(i == a.end())
- return zero;
- else
- return (*i).second;
-}
diff --git a/scripts/training/MGIZA/src/AlignTables.h b/scripts/training/MGIZA/src/AlignTables.h
deleted file mode 100644
index fcca7a6..0000000
--- a/scripts/training/MGIZA/src/AlignTables.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _aligntables_h
-#define _aligntables_h 1
-
-#include "defs.h"
-
-
-#include <cassert>
-
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-//#include <vector>
-#include "Vector.h"
-#include <utility>
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include <math.h>
-#include <fstream>
-#include "transpair_model1.h"
-
-
-/* ----------------- Class Defintions for hashmyalignment --------------------
- Objective: This class is used to define a hash mapping function to map
- an alignment (defined as a vector of integers) into a hash key
- ----------------------------------------------------------------------------*/
-
-class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
-{
-public:
- size_t operator() (const Vector<WordIndex>& key) const
- // to define the mapping function. it takes an alignment (a vector of
- // integers) and it returns an integer value (hash key).
- {
- WordIndex j ;
- size_t s ;
- size_t key_sum = 0 ;
- // logmsg << "For alignment:" ;
- for (j = 1 ; j < key.size() ; j++){
- // logmsg << " " << key[j] ;
- key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
- }
- // logmsg << " , Key value was : " << key_sum;
- s = key_sum % 1000000 ;
- // logmsg << " h(k) = " << s << endl ;
- return(s);
- }
-#ifdef WIN32
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
-
- bool operator()(const Vector<WordIndex> t1,
- const Vector<WordIndex> t2) const
- {WordIndex j ;
- if (t1.size() != t2.size())
- return(false);
- for (j = 1 ; j < t1.size() ; j++)
- if (t1[j] != t2[j])
- return(false);
- return(true);
- }
-#endif
-};
-
-#ifndef WIN32
-class equal_to_myalignment{
- // returns true if two alignments are the same (two vectors have same enties)
-public:
- bool operator()(const Vector<WordIndex> t1,
- const Vector<WordIndex> t2) const
- {WordIndex j ;
- if (t1.size() != t2.size())
- return(false);
- for (j = 1 ; j < t1.size() ; j++)
- if (t1[j] != t2[j])
- return(false);
- return(true);
- }
-
-};
-#endif
-
-/* ---------------- End of Class Defnition for hashmyalignment --------------*/
-
-
-/* ------------------ Class Defintions for alignmodel -----------------------
- Class Name: alignmodel
- Objective: Alignments neighborhhoods (collection of alignments) are stored in
- a hash table (for easy lookup). Each alignment vector is mapped into a hash
- key using the operator defined above.
- *--------------------------------------------------------------------------*/
-
-class alignmodel{
-private:
-#ifdef WIN32
- typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment > alignment_hash;
-
-#else
- typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > alignment_hash;
-
-#endif
- alignment_hash a;
-private:
- // void erase(Vector<WordIndex>&);
-public:
-
- // methods;
-
- inline alignment_hash::iterator begin(void){return a.begin();} // begining of hash
- inline alignment_hash::iterator end(void){return a.end();} // end of hash
- inline const alignment_hash& getHash() const {return a;}; // reference to hash table
- bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
- // void setValue(Vector<WordIndex>&, LogProb val); // not needed
- LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
- inline void clear(void){ a.clear();}; // clear hash table
- // void printTable(const char* filename);
- inline void resize(WordIndex n) {
-#ifndef WIN32
- a.resize(n);
-#endif
- }; // resize table
-
-};
-
-/* -------------- End of alignmode Class Definitions ------------------------*/
-#endif
diff --git a/scripts/training/MGIZA/src/Array.h b/scripts/training/MGIZA/src/Array.h
deleted file mode 100644
index eae58d4..0000000
--- a/scripts/training/MGIZA/src/Array.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef GIZA_ARRAY_H_DEFINED
-#define GIZA_ARRAY_H_DEFINED
-#include "Vector.h"
-#define Array Vector
-#endif
diff --git a/scripts/training/MGIZA/src/Array2.h b/scripts/training/MGIZA/src/Array2.h
deleted file mode 100644
index 8ea2d9e..0000000
--- a/scripts/training/MGIZA/src/Array2.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
-
- EGYPT Toolkit for Statistical Machine Translation
- Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-/*--
- Array2: Implementation of a twodimensional checked array allowing for
- a specified underlieing one-dimensional data-structure.
-
- Franz Josef Och (30/07/99)
- --*/
-#ifndef CLASS_Array2_DEFINED
-#define CLASS_Array2_DEFINED
-
-#include "mystl.h"
-#include <string>
-#include <vector>
-
-template<class T, class Y=vector<T> > class Array2 {
-public:
- Y p;
- // short h1, h2;
- unsigned int h1, h2;
-public:
- Array2(unsigned int _h1, unsigned int _h2) :
- p(_h1*_h2), h1(_h1), h2(_h2) {
- }
- Array2(unsigned int _h1, unsigned int _h2, const T&_init) :
- p(_h1*_h2, _init), h1(_h1), h2(_h2) {
- }
- Array2() :
- h1(0), h2(0) {
- }
- inline T &operator()(unsigned int i, unsigned int j) {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline const T&operator()(unsigned int i, unsigned int j) const {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline T get(unsigned int i, unsigned int j) {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline void set(unsigned int i, unsigned int j, T x) {
- assert(i<h1);
- assert(j<h2);
- p[i*h2+j]=x;
- }
- inline const T get(unsigned int i, unsigned int j) const {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline unsigned int getLen1() const {
- return h1;
- }
- inline unsigned int getLen2() const {
- return h2;
- }
-
- inline T*begin() {
- if (h1==0||h2==0)
- return 0;
- return &(p[0]);
- }
- inline T*end() {
- if (h1==0||h2==0)
- return 0;
- return &(p[0])+p.size();
- }
-
- inline const T*begin() const {
- return p.begin();
- }
- inline const T*end() const {
- return p.end();
- }
-
- friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar) {
- for (unsigned int i=0; i<ar.getLen1(); i++) {
- //out << i << ": ";
- for (unsigned int j=0; j<ar.getLen2(); j++)
- out << ar(i, j) << ' ';
- out << '\n';
- }
- return out << endl;
- }
- inline void resize(unsigned int a, unsigned int b) {
- if ( !(a==h1&&b==h2)) {
- h1=a;
- h2=b;
- p.resize(h1*h2);
- }
- }
- inline void resize(unsigned int a, unsigned int b, const T&t) {
- if ( !(a==h1&&b==h2)) {
- h1=a;
- h2=b;
- p.resize(h1*h2);
- fill(p.begin(), p.end(), t);
- }
- }
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/Array4.h b/scripts/training/MGIZA/src/Array4.h
deleted file mode 100644
index 4e57a2e..0000000
--- a/scripts/training/MGIZA/src/Array4.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef AlignmentArray4_h_DEFINED
-#define AlignmentArray4_h_DEFINED
-
-#include "Array2.h"
-template<class T> class Array4
-{
- private:
- Array2< Array2<T>* > A;
- int M;
- T init;
- public:
- Array4(int m,const T&_init)
- : A(m,m,0),M(m),init(_init) {}
- ~Array4()
- {
- for(int l=0;l<M;++l)
- for(int m=0;m<M;++m)
- delete A(l,m);
- }
- const T&operator()(int i, int j, int l, int m)const
- {
- if( A(l,m)==0 )
- return init;
- else
- return (*A(l,m))(i,j);
- }
- const T&get(int i, int j, int l, int m)const
- {
- if( A(l,m)==0 )
- return init;
- else
- return (*A(l,m))(i,j);
- }
- T&operator()(int i, int j, int l, int m)
- {
- if( A(l,m)==0 )
- {
- A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
- }
- return (*A(l,m))(i,j);
- }
- void clear()
- {
- for(int l=0;l<M;++l)
- for(int m=0;m<M;++m)
- if( A(l,m) )
- {
- Array2<T>&a=*A(l,m);
- for(int i=0;i<=l;++i)
- for(int j=0;j<=m;++j)
- a(i,j)=0.0;
- }
- }
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/CMakeLists.txt b/scripts/training/MGIZA/src/CMakeLists.txt
deleted file mode 100644
index c86312b..0000000
--- a/scripts/training/MGIZA/src/CMakeLists.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-
-# Set output directory
-
-FIND_PACKAGE(Threads)
-
-
-SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
-SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
-
-ADD_DEFINITIONS("-DNDEBUG")
-ADD_DEFINITIONS("-DWORDINDEX_WITH_4_BYTE")
-ADD_DEFINITIONS("-DBINARY_SEARCH_FOR_TTABLE")
-ADD_DEFINITIONS("-DDEBUG")
-IF (WIN32)
-
-ELSE()
-ADD_DEFINITIONS("-Wno-deprecated")
-ADD_DEFINITIONS("-Wno-write-strings")
-ENDIF()
-
-SET( LIBMGIZA_SRC
- alignment.cpp alignment.h
- AlignTables.cpp AlignTables.h
- Array2.h Array4.h
- Array.h ATables.cpp
- ATables.h cmd.c
- cmd.h collCounts.cpp
- collCounts.h common.h
- D4Tables.h
- D5Tables.h defs.h
- Dictionary.cpp Dictionary.h
- file_spec.h FlexArray.h
- ForwardBackward.cpp ForwardBackward.h
- getSentence.cpp getSentence.h
- Globals.h hmm.cpp
- hmm.h
- HMMTables.cpp HMMTables.h
- logprob.cpp logprob.h
- model1.cpp
- model1.h model2.cpp
- model2.h model2to3.cpp
- model345-peg.cpp model3.cpp
- model3.h model3_viterbi.cpp
- model3_viterbi_with_tricks.cpp MoveSwapMatrix.cpp
- MoveSwapMatrix.h myassert.cpp
- myassert.h mymath.h
- mystl.h NTables.cpp
- NTables.h Parameter.cpp
- Parameter.h parse.cpp
- Perplexity.cpp Perplexity.h
- Pointer.h
- reports.cpp SetArray.cpp
- SetArray.h
- syncObj.h transpair_model1.h
- transpair_model2.h transpair_model3.cpp
- transpair_model3.h transpair_model4.cpp
- transpair_model4.h transpair_model5.cpp
- transpair_model5.h transpair_modelhmm.h
- ttableDiff.hpp TTables.cpp
- TTables.h types.h
- utility.cpp utility.h
- Vector.h vocab.cpp
- vocab.h WordClasses.h
-)
-
-ADD_LIBRARY(mgiza_lib STATIC ${LIBMGIZA_SRC})
-SET_TARGET_PROPERTIES(mgiza_lib PROPERTIES OUTPUT_NAME "mgiza")
-
-INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR} )
-IF (WIN32)
- INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/w32 )
- LINK_DIRECTORIES ( ${PROJECT_SOURCE_DIR}/w32 )
- IF( USE_64_BIT )
- SET(CMAKE_THREAD_LIBS_INIT pthread64)
- ELSE( USE_64_BIT )
- SET(CMAKE_THREAD_LIBS_INIT pthread)
- ENDIF( )
-
-
-ENDIF()
-INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/ )
-LINK_DIRECTORIES ( ${LIBRARY_OUTPUT_PATH} )
-
-SET( MGIZA_SRC main.cpp )
-
-ADD_EXECUTABLE( mgiza ${MGIZA_SRC} )
-
-TARGET_LINK_LIBRARIES (
- mgiza
- mgiza_lib
- ${Boost_LIBRARIES}
- ${CMAKE_THREAD_LIBS_INIT}
- )
-
-
-ADD_EXECUTABLE(snt2cooc snt2cooc.cpp)
-ADD_EXECUTABLE(snt2coocrmp snt2cooc-reduce-mem-preprocess.cpp)
-ADD_EXECUTABLE(snt2plain snt2plain.cpp)
-ADD_EXECUTABLE(plain2snt plain2snt.cpp)
-ADD_EXECUTABLE(symal symal.cpp cmd.c)
-ADD_EXECUTABLE(hmmnorm hmmnorm.cxx)
-ADD_EXECUTABLE(d4norm d4norm.cxx)
-TARGET_LINK_LIBRARIES (
- hmmnorm
- mgiza_lib
- ${Boost_LIBRARIES}
- ${CMAKE_THREAD_LIBS_INIT}
- )
-TARGET_LINK_LIBRARIES (
- d4norm
- mgiza_lib
- ${Boost_LIBRARIES}
- ${CMAKE_THREAD_LIBS_INIT}
- )
-
-INSTALL(TARGETS mgiza_lib mgiza snt2cooc snt2plain plain2snt symal hmmnorm d4norm snt2coocrmp
- RUNTIME DESTINATION bin
- LIBRARY DESTINATION lib
- ARCHIVE DESTINATION lib
- )
-
-
-
diff --git a/scripts/training/MGIZA/src/D4Tables.h b/scripts/training/MGIZA/src/D4Tables.h
deleted file mode 100644
index 3e60dc8..0000000
--- a/scripts/training/MGIZA/src/D4Tables.h
+++ /dev/null
@@ -1,785 +0,0 @@
-/*
-
- Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
- This file is part of GIZA++ ( extension of GIZA ).
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#ifndef _d4tables_h_define
-#define _d4tables_h_define
-#include <math.h>
-#include "WordClasses.h"
-#include "Globals.h"
-#include "myassert.h"
-#include "syncObj.h"
-extern float d4modelsmooth_factor;
-
-class m4_key {
-public:
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1, vacancies2;
- m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1,
- int _v2) :
- deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj),
- vacancies1(_v1), vacancies2(_v2) {
- }
- friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce,
- const WordClasses&wcf) {
- if (x.deps&DEP_MODEL_l)
- out << "l: " << x.l<<' ';
- if (x.deps&DEP_MODEL_m)
- out << "m: " << x.m<<' ';
- if (x.deps&DEP_MODEL_F)
- out << "F: " << wcf.classString(x.F)<< ' ';
- if (x.deps&DEP_MODEL_E)
- out << "E: " << wce.classString(x.E)<< ' ';
- // if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
- if (x.vacancies1!=-1)
- out << "v1: " << x.vacancies1 << ' ';
- if (x.vacancies2!=-1)
- out << "v2: " << x.vacancies2 << ' ';
- return out << '\n';
- }
-
- friend ostream&print1_m5(ostream&out, const m4_key&x,
- const WordClasses&wce, const WordClasses&wcf) {
- out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0"))
- << ' ';
- out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0"))
- << ' ';
- out << x.vacancies1 << ' ';
- out << x.vacancies2 << ' ';
- return out;
- }
-
- friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce,
- const WordClasses&wcf) {
- if (x.deps&DEP_MODELb_l)
- out << "l: " << x.l<<' ';
- if (x.deps&DEP_MODELb_m)
- out << "m: " << x.m<<' ';
- if (x.deps&DEP_MODELb_F)
- out << "F: " << wcf.classString(x.F) << ' ';
- if (x.deps&DEP_MODELb_E)
- out << "E: " << wce.classString(x.E) << ' ';
- if (x.vacancies1!=-1)
- out << "v1: " << x.vacancies1 << ' ';
- if (x.vacancies2!=-1)
- out << "v2: " << x.vacancies2 << ' ';
- return out << '\n';
- }
- friend ostream&printb1_m5(ostream&out, const m4_key&x,
- const WordClasses&wcf) {
- out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F)
- : string("0"))<< ' ';
- out << x.vacancies1 << ' ';
- out << x.vacancies2 << ' ';
- return out;
- }
-};
-
-class compare1 {
-private:
- int deps;
-public:
- compare1(int _deps) :
- deps(_deps) {
- }
- bool operator()(const m4_key&a, const m4_key&b) const {
- if (deps&DEP_MODEL_l) {
- if (a.l<b.l)
- return 1;
- if (b.l<a.l)
- return 0;
- }
- if (deps&DEP_MODEL_m) {
- if (a.m<b.m)
- return 1;
- if (b.m<a.m)
- return 0;
- }
- if (deps&DEP_MODEL_F) {
- if (a.F<b.F)
- return 1;
- if (b.F<a.F)
- return 0;
- }
- if (deps&DEP_MODEL_E) {
- if (a.E<b.E)
- return 1;
- if (b.E<a.E)
- return 0;
- }
- //if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
- if (a.vacancies1<b.vacancies1)
- return 1;
- if (b.vacancies1<a.vacancies1)
- return 0;
- if (a.vacancies2<b.vacancies2)
- return 1;
- if (b.vacancies2<a.vacancies2)
- return 0;
- return 0;
- }
-};
-
-class compareb1 {
-private:
- int deps;
-public:
- compareb1(int _deps) :
- deps(_deps) {
- }
- bool operator()(const m4_key&a, const m4_key&b) const {
- if (deps&DEP_MODELb_l) {
- if (a.l<b.l)
- return 1;
- if (b.l<a.l)
- return 0;
- }
- if (deps&DEP_MODELb_m) {
- if (a.m<b.m)
- return 1;
- if (b.m<a.m)
- return 0;
- }
- if (deps&DEP_MODELb_F) {
- if (a.F<b.F)
- return 1;
- if (b.F<a.F)
- return 0;
- }
- if (deps&DEP_MODELb_E) {
- if (a.E<b.E)
- return 1;
- if (b.E<a.E)
- return 0;
- }
- //if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
- if (a.vacancies1<b.vacancies1)
- return 1;
- if (b.vacancies1<a.vacancies1)
- return 0;
- if (a.vacancies2<b.vacancies2)
- return 1;
- if (b.vacancies2<a.vacancies2)
- return 0;
- return 0;
- }
-};
-
-inline void tokenize(const string&in, Vector<string>&out) {
- string s;
- istrstream l(in.c_str());
- while (l>>s)
- out.push_back(s);
-}
-
-class d4model {
-public:
- typedef Vector<pair<COUNT,PROB> > Vpff;
- map<m4_key,Vpff,compare1 > D1;
- map<m4_key,Vpff,compareb1> Db1;
- PositionIndex msl;
- WordClasses* ewordclasses;
- WordClasses* fwordclasses;
- template<class MAPPER> void makeWordClasses(const MAPPER&m1,
- const MAPPER&m2, string efile, string ffile, const vcbList& elist,
- const vcbList& flist) {
- ifstream estrm(efile.c_str()), fstrm(ffile.c_str());
- if ( !estrm) {
- cerr << "ERROR: can not read " << efile << endl;
- } else
- ewordclasses->read(estrm, m1,elist);
- if ( !fstrm)
- cerr << "ERROR: can not read " << ffile << endl;
- else
- fwordclasses->read(fstrm, m2,flist);
- }
- d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) :
- D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)),
- msl(_msl),ewordclasses(&e),fwordclasses(&f) {
- }
-
-protected:
- inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
- int m) {
- assert(j>=1);
- m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if (p==D1.end())
- p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- return (p->second)[j-j_cp+msl].first;
- };
-
- inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
- int l, int m) {
- assert(j>=1);
- assert(j_prev>=1);
- m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if (p==Db1.end())
- p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=Db1.end());
- return (p->second)[j-j_prev+msl].first;
- };
- Mutex lock_f,lock_b;
-public:
- inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
- int m, const COUNT& v){
- lock_f.lock();
- getCountRef_first(j,j_cp,E,F,l,m)+=v;
- lock_f.unlock();
- }
-
- inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
- int l, int m, const COUNT& v){
- lock_b.lock();
- getCountRef_bigger(j,j_prev,E,F,l,m)+=v;
- lock_b.unlock();
- }
-
-
-
- void merge(d4model &d) {
- map<m4_key,Vpff,compare1 >::iterator it;
- for (it = d.D1.begin(); it!=d.D1.end(); it++) {
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first);
- if (p==D1.end())
- p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- int i;
- for (i=0; i<it->second.size(); i++) {
- p->second[i].second+=it->second[i].second;
- }
- }
-#ifdef WIN32
- map<m4_key,Vpff,compareb1 >::iterator it1;
- for (it1 = d.Db1.begin(); it1!=d.Db1.end(); it1++) {
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(it->first);
- if (p==Db1.end())
- p=Db1.insert(make_pair(it1->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- int i;
- for (i=0; i<it->second.size(); i++) {
- p->second[i].second+=it1->second[i].second;
- }
- }
-#else
- for (it = d.Db1.begin(); it!=d.Db1.end(); it++) {
- map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first);
- if (p==Db1.end())
- p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- int i;
- for (i=0; i<it->second.size(); i++) {
- p->second[i].second+=it->second[i].second;
- }
- }
-#endif
- }
-
- bool augCount(const char* fD1, const char* fDb) {
- ifstream ifsd(fD1);
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1, vacancies2;
- int len;
- double count;
- if (!ifsd) {
- cerr << "Failed in " << fD1 << endl;
- return false;
- }
- {
- while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1
- >>vacancies2>>len) {
- m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
- vacancies2);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if (p==D1.end())
- p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for (i=0; i<len; i++) {
- ifsd >> count;
- p->second[i].first+=count;
- }
-
- }
- }
- ifstream ifsd1(fDb);
- if (!ifsd1) {
- cerr << "Failed in " << fDb << endl;
- return false;
- }
- {
- while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1
- >>vacancies2>>len) {
- m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
- vacancies2);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if (p==Db1.end())
- p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for (i=0; i<len; i++) {
- ifsd1 >> count;
- p->second[i].first+=count;
- }
-
- }
- }
- return true;
- }
-
- bool readProbTable(const char* fD1, const char* fDb){
- ifstream ifsd(fD1);
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1,vacancies2;
- int len;
- double count;
- if(!ifsd){
- cerr << "Failed in " << fD1 << endl;
- return false;
- }
- {
- while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
- m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for(i=0;i<len;i++){
- ifsd >> count;
- p->second[i].second=count;
- }
-
- }
- }
- ifstream ifsd1(fDb);
- if(!ifsd1){
- cerr << "Failed in " << fDb << endl;
- return false;
- }
- {
- while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
- m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for(i=0;i<len;i++){
- ifsd1 >> count;
- p->second[i].second=count;
- }
-
- }
- }
- return true;
- }
-
-
- bool printProbTable(const char* fD1, const char* fDb) {
- ofstream ofsd(fD1);
- if (!ofsd.is_open()) {
- return false;
- }
- {
- map<m4_key,Vpff,compare1 >::iterator it;
- for (it = D1.begin(); it!=D1.end(); it++) {
- ofsd << it->first.deps << " " << it->first.l << " "
- << it->first.m << " " << it->first.F << " "
- << it->first.E << " " << it->first.prevj << " "
- << it->first.vacancies1 << " " << it->first.vacancies2
- << " " << it->second.size() << " ";
- int i;
- for (i=0; i<it->second.size(); i++) {
- ofsd << it->second[i].second << " ";
- }
- ofsd << endl;
- }
-
- }
-
- ofstream ofsdb(fDb);
- if (!ofsdb.is_open()) {
- return false;
- }
-
- map<m4_key,Vpff,compareb1 >::iterator it;
- for (it = Db1.begin(); it!=Db1.end(); it++) {
- ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m
- << " " << it->first.F << " " << it->first.E << " "
- << it->first.prevj << " " << it->first.vacancies1 << " "
- << it->first.vacancies2 << " " << it->second.size()<< endl;
- int i;
- for (i=0; i<it->second.size(); i++) {
- ofsdb << it->second[i].second << " ";
- }
- ofsdb << endl;
- }
- return true;
- }
-
- bool dumpCount(const char* fD1, const char* fDb){
- ofstream ofsd(fD1);
- if(!ofsd.is_open()){
- return false;
- }
- {
- map<m4_key,Vpff,compare1 >::iterator it;
- for(it = D1.begin(); it!=D1.end();it++){
- ofsd << it->first.deps << " "
- << it->first.l << " "
- << it->first.m << " "
- << it->first.F << " "
- << it->first.E << " "
- << it->first.prevj << " "
- << it->first.vacancies1 << " "
- << it->first.vacancies2 << " "
- << it->second.size() << " ";
- int i;
- for(i=0;i<it->second.size();i++){
- ofsd << it->second[i].first << " ";
- }
- ofsd << endl;
- }
-
- }
-
- ofstream ofsdb(fDb);
- if(!ofsdb.is_open()){
- return false;
- }
-
- map<m4_key,Vpff,compareb1 >::iterator it;
- for(it = Db1.begin(); it!=Db1.end();it++){
- ofsdb << it->first.deps << " "
- << it->first.l << " "
- << it->first.m << " "
- << it->first.F << " "
- << it->first.E << " "
- << it->first.prevj << " "
- << it->first.vacancies1 << " "
- << it->first.vacancies2 << " "
- << it->second.size()<< endl;
- int i;
- for(i=0;i<it->second.size();i++){
- ofsdb << it->second[i].first << " ";
- }
- ofsdb << endl;
- }
- return true;
- }
- map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,
- int F, int l, int m) const {
- return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
- }
- PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m,
- const map<m4_key,Vpff,compare1 >::const_iterator& p) const {
- assert(j>=1);
- //assert(j_cp>=0);
- assert(j<=msl);
- assert(j_cp<=msl);
- if (p==D1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_cp+msl].second<=1.0);
- return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
- }
- }
-
- PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const {
- assert(j>=1);
- //assert(j_cp>=0);
- assert(j<=msl);
- assert(j_cp<=msl);
- m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
- map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
- if (p==D1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_cp+msl].second<=1.0);
- return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
- }
- }
- map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,
- int F, int l, int m) const {
- return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
- }
- PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m,
- const map<m4_key,Vpff,compareb1 >::const_iterator&p) const {
- massert(j>=1);
- massert(j_prev>=1);
- massert(j>j_prev);
- massert(j<=msl);
- massert(j_prev<=msl);
- if (p==Db1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_prev+msl].second<=1.0 );
- return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
- }
- }
-
- PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l,
- int m) const {
- massert(j>=1);
- massert(j_prev>=1);
- massert(j>j_prev);
- massert(j<=msl);
- massert(j_prev<=msl);
- m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
- map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
- if (p==Db1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_prev+msl].second<=1.0 );
- return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
- }
- }
-
- void normalizeTable() {
- int nParams=0;
- for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
- Vpff&d1=i->second;
- double sum=0.0;
- for (PositionIndex i=0; i<d1.size(); i++)
- sum+=d1[i].first;
- for (PositionIndex i=0; i<d1.size(); i++) {
- d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size());
- nParams++;
- }
- }
- for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
- Vpff&db1=i->second;
- double sum=0.0;
- for (PositionIndex i=0; i<db1.size(); i++)
- sum+=db1[i].first;
- for (PositionIndex i=0; i<db1.size(); i++) {
- db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size());
- nParams++;
- }
- }
- cout << "D4 table contains " << nParams << " parameters.\n";
- }
-
- void clear() {
- for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
- Vpff&d1=i->second;
- for (PositionIndex i=0; i<d1.size(); i++)
- d1[i].first=0.0;
- }
- for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
- Vpff&db1=i->second;
- for (PositionIndex i=0; i<db1.size(); i++)
- db1[i].first=0.0;
- }
- }
-
- /*void printProbTable(const char*fname1,const char*fname2)
- {
- ofstream out(fname1);
- double ssum=0.0;
- out << "# Translation tables for Model 4 .\n";
- out << "# Table for head of cept.\n";
- for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){
- const Vpff&d1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
- if ( sum ){
- print1(out,i->first,ewordclasses,fwordclasses);
- out << "SUM: " << sum << ' '<< '\n';
- for(unsigned ii=0;ii<d1.size();ii++)
- if( d1[ii].first )
- out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
- out << endl;
- }
- ssum+=sum;
- }
- out << "# Table for non-head of cept.\n";
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
- {
- const Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
- if( sum ){
- printb1(out,i->first,ewordclasses,fwordclasses);
- out << "SUM: " << sum << ' '<<'\n';
- for(unsigned ii=0;ii<db1.size();ii++)
- if( db1[ii].first )
- {
- out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
- }
- out << endl;
- }
- ssum+=sum;
- }
- out << endl << "FULL-SUM: " << ssum << endl;
- if( M4_Dependencies==76 ){
- ofstream out2(fname2);
- for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
- {
- const Vpff&d1=i->second;
- for(unsigned ii=0;ii<d1.size();ii++)
- if( d1[ii].first )
- out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
- }
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) {
- const Vpff&db1=i->second;
- for(unsigned ii=0;ii<db1.size();ii++)
- if( db1[ii].first )
- out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
- }
- }
- }*/
-
- bool readProbTable(const char *fname) {
- cerr << "Reading D4Tables from " << fname << endl;
- ifstream file(fname);
- string line;
- do {
- getline(file, line);
- } while (line.length()&&line[0]=='#');
-
- do {
- while (line.length()==0)
- getline(file, line);
- if (line[0]=='#')
- break;
- Vector<string> linestr;
- tokenize(line, linestr);
- m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
- for (unsigned int i=0; i<linestr.size(); i+=2) {
- if (linestr[i]=="l:") {
- k.l=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODEL_l);
- }
- if (linestr[i]=="m:") {
- k.m=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODEL_m);
- }
- if (linestr[i]=="F:") {
- k.F=(*fwordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODEL_F);
- }
- if (linestr[i]=="E:") {
- k.E=(*ewordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODEL_E);
- }
- //if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
- }
- string str;
- double sum;
- file >> str >> sum;
- iassert(str=="SUM:");
- if (str!="SUM:")
- cerr << "ERROR: string is " << str << " and not sum " << endl;
-
- do {
- int value;
- double count;
- getline(file, line);
- istrstream twonumbers(line.c_str());
- if (twonumbers >> value >> count) {
- if (D1.count(k)==0)
- D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>(
- 0.0, 0.0))));
- D1[k][value+msl]=make_pair(count, count/sum);
- }
- } while (line.length());
- } while (file);
- do {
- getline(file, line);
- } while (line.length()&&line[0]=='#');
- do {
- while (line.length()==0)
- getline(file, line);
- if (line[0]=='#')
- break;
- Vector<string> linestr;
- tokenize(line, linestr);
- m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
- bool sumRead=0;
- for (unsigned int i=0; i<linestr.size(); i+=2) {
- if (linestr[i]=="l:") {
- k.l=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODELb_l);
- } else if (linestr[i]=="m:") {
- k.m=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODELb_m);
- } else if (linestr[i]=="F:") {
- k.F=(*fwordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODELb_F);
- } else if (linestr[i]=="E:") {
- k.E=(*ewordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODELb_E);
- } else if (linestr[i]=="SUM:") {
- cerr << "Warning: obviously no dependency.\n";
- sumRead=1;
- } else if (linestr[i]=="FULL-SUM:") {
- break;
- } else {
- cerr << "ERROR: error in reading d4 tables: " << linestr[i]
- << ' ' << linestr[i+1] << endl;
- }
- }
- string str;
- double sum;
- if (sumRead==0)
- file >> str >> sum;
- else {
- str=linestr[0];
- sum=atof(linestr[1].c_str());
- }
- if (str!="SUM:")
- cerr << "ERROR: should read SUM but read " << str << endl;
- do {
- int value;
- double count;
- getline(file, line);
- istrstream twonumbers(line.c_str());
- if (twonumbers >> value >> count) {
- if (Db1.count(k)==0)
- Db1.insert(make_pair(k, Vpff(msl*2+1,
- pair<COUNT, PROB>(0.0, 0.0))));
- Db1[k][value+msl]=make_pair(count, count/sum);
- }
- } while (file&&line.length());
- } while (file);
- return 1;
- }
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/D5Tables.h b/scripts/training/MGIZA/src/D5Tables.h
deleted file mode 100644
index 74693f0..0000000
--- a/scripts/training/MGIZA/src/D5Tables.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _d5tables_h_define
-#define _d5tables_h_define
-#include <math.h>
-#include "D4Tables.h"
-
-extern float d5modelsmooth_countoffset;
-extern float d5modelsmooth_factor;
-
-#define UNSEENPROB (1.0/vacancies_total)
-
-class d5model
-{
- private:
- typedef Vector < pair < COUNT,PROB > >Vpff;
- map< m4_key,Vpff,compare1 > D1;
- map< m4_key,Vpff,compareb1 > Db1;
- public:
- d4model&d4m;
- WordClasses* ewordclasses;
- WordClasses* fwordclasses;
- template<class MAPPER>
- void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile
- , const vcbList& elist,
- const vcbList& flist)
- {
- ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
- if( !estrm )
- cerr << "ERROR: can not read classes from " << efile << endl;
- else
- ewordclasses->read(estrm,m1,elist);
- if( !fstrm )
- cerr << "ERROR: can not read classes from " << ffile << endl;
- else
- fwordclasses->read(fstrm,m2,flist);
- }
- d5model (d4model&_d4m)
- :D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m),
- ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
- {}
- COUNT &getCountRef_first (PositionIndex vacancies_j,
- PositionIndex vacancies_jp, int F,
- PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total)
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- //massert(vacancies_jp<=vacancies_total);
- massert(vacancies_j <=vacancies_total);
- massert(vacancies_total<=m);
- m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if(p==D1.end())
- p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
- massert(p!=D1.end());
- return (p->second)[vacancies_j].first;
- }
- COUNT &getCountRef_bigger (PositionIndex vacancies_j,
- PositionIndex vacancies_jp, int F,
- PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total)
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- massert (vacancies_jp <= vacancies_j);
- massert (vacancies_j-vacancies_jp <= vacancies_total);
- m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if(p==Db1.end())
- p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
- massert(p!=Db1.end());
- return (p->second)[vacancies_j - vacancies_jp].first;
- }
- PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
- int F, PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total) const
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- //massert(vacancies_jp<=vacancies_total);
- massert(vacancies_j <=vacancies_total);
- massert(vacancies_total<=m);
- m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
- map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
- if( p==D1.end() )
- return UNSEENPROB;
- else
- return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
- }
- PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
- int F, PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total) const
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- massert (vacancies_jp <= vacancies_j);
- massert (vacancies_j-vacancies_jp <= vacancies_total);
- m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
- map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
- if(p==Db1.end())
- return UNSEENPROB;
- else
- return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
- }
- void normalizeTable ()
- {
- int nParams=0;
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- COUNT sum=0.0;
- for(PositionIndex i=0;i<d1.size();i++)
- sum+=d1[i].first+d5modelsmooth_countoffset;
- for(PositionIndex i=0;i<d1.size();i++)
- {
- d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
- nParams++;
- }
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex i=0;i<db1.size();i++)
- sum+=db1[i].first+d5modelsmooth_countoffset;
- for(PositionIndex i=0;i<db1.size();i++)
- {
- db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
- nParams++;
- }
- }
- cout << "D5 table contains " << nParams << " parameters.\n";
- }
-
-friend ostream&operator<<(ostream&out,d5model&d5m) {
- out << "# Translation tables for Model 5 .\n";
- out << "# Table for head of cept.\n";
- for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i){
- const Vpff&d1=i->second;
- COUNT sum=0.0;
- for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
- if ( sum ) {
- for(unsigned ii=0;ii<d1.size();ii++)
- {
- print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
- out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
- }
- out << endl;
- }
- }
- out << "# Table for non-head of cept.\n";
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i){
- const Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
- if( sum ){
- for(unsigned ii=0;ii<db1.size();ii++){
- printb1_m5(out,i->first,*d5m.fwordclasses);
- out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
- }
- out << endl;
- }
- }
- return out;
-}
- void readProbTable(const char*x)
- {
- ifstream f(x);
- string l;
- while(getline(f,l))
- {
- if(l.length()&&l[0]=='#')
- continue;
- istrstream is(l.c_str());
- string E,F;
- int v1,v2,ii;
- double prob,count;
- if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
- {
- //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
- if( count>0 )
- if( E=="-1")
- getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
- else
- getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
- }
- }
- normalizeTable();
- //ofstream of("M5FILE");
- //of << (*this);
- }
- void clear()
- {
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- for(PositionIndex i=0;i<d1.size();i++)
- d1[i].first=0.0;
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- for(PositionIndex i=0;i<db1.size();i++)
- db1[i].first=0.0;
- }
- }
-};
-
-#endif
-
-
-
diff --git a/scripts/training/MGIZA/src/Dictionary.cpp b/scripts/training/MGIZA/src/Dictionary.cpp
deleted file mode 100644
index a0d9dc3..0000000
--- a/scripts/training/MGIZA/src/Dictionary.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* Noah A. Smith
- Dictionary object for dictionary filter in Model 1 training
-
- Dictionary file must be in order (sorted) by Foreign vocab id, but English
- vocab ids may be in any order.
-
- 9 August 1999
-*/
-
-#include "Dictionary.h"
-#include <string.h>
-
-Dictionary::Dictionary(const char *filename){
- if(!strcmp(filename, "")){
- dead = true;
- return;
- }
- dead = false;
- cout << "Reading dictionary from: " << filename << '\n';
- ifstream dFile(filename);
- if(!dFile){
- cerr << "ERROR: Can't open dictionary: " << filename << '\n';
- exit(1);
- }
-
- currindexmin = 0;
- currindexmax = 0;
- currval = 0;
- int p, q;
- while((dFile >> p >> q)){
- pairs[0].push_back(p);
- pairs[1].push_back(q);
- }
- cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n';
- dFile.close();
-}
-
-
-bool Dictionary::indict(int p, int q){
- if(dead) return false;
- if(p == 0 && q == 0) return false;
- if(currval == p){
- for(int i = currindexmin; i <= currindexmax; i++)
- if(pairs[1][i] == q) return true;
- return false;
- }
- else{
- int begin = 0, end = pairs[0].size() - 1, middle = 0;
- unsigned int t;
- bool ret = false;
- while(begin <= end){
- middle = begin + ((end - begin) >> 1);
- if(p < pairs[0][middle]) end = middle - 1;
- else if(p > pairs[0][middle]) begin = middle + 1;
- else{
- break;
- }
- }
- t = middle;
- while(pairs[0][t] == p )
- if(pairs[1][t--] == q) ret = true;
- currindexmin = t + 1;
- t = middle + 1;
- while(pairs[0][t] == p && t < pairs[0].size())
- if(pairs[1][t++] == q) ret = true;
- currindexmax = t - 1;
- currval = p;
- return ret;
- }
-}
-
-
diff --git a/scripts/training/MGIZA/src/Dictionary.h b/scripts/training/MGIZA/src/Dictionary.h
deleted file mode 100644
index 3a5c71e..0000000
--- a/scripts/training/MGIZA/src/Dictionary.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* Noah A. Smith
- Dictionary object for dictionary filter in Model 1 training
-
- 9 August 1999
-*/
-
-#include <iostream>
-#include <fstream>
-
-#include "Vector.h"
-
-#ifndef DICTIONARY_H
-#define DICTIONARY_H
-
-class Dictionary{
- private:
- Vector<int> pairs[2];
- int currval;
- int currindexmin;
- int currindexmax;
- bool dead;
- public:
- Dictionary(const char *);
- bool indict(int, int);
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/FlexArray.h b/scripts/training/MGIZA/src/FlexArray.h
deleted file mode 100644
index 1dd73ed..0000000
--- a/scripts/training/MGIZA/src/FlexArray.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-
-Copyright (C) 1988,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef CLASS_FlexArray_defined
-#define CLASS_FlexArray_defined
-#include "Array.h"
-#include <iostream>
-#include <fstream>
-template<class T>
-class FlexArray
-{
-private:
- Array<T> p;
- int start,End;
-public:
- FlexArray(int _start=0,int _end=-1)
- : p(_end-_start+1),start(_start),End(_end) {}
- FlexArray(int _start,int _end,const T&init)
- : p(_end-_start+1,init),start(_start),End(_end) {}
- T&operator[](int i)
- {return p[i-start];}
- const T&operator[](int i)const
- {return p[i-start];}
- int low()const{return start;}
- int high()const{return End;}
-#ifdef WIN32
- T*begin(){return const_cast<double*>(&p[0]);}
- T*end(){return const_cast<double*>(&(p[0])+p.size());}
-#else
- T*begin(){return conv<double>(p.begin());}
- T*end(){return conv<double>(p.end());}
-#endif
-};
-
-template<class T>
-inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
-{
- for(int i=x.low();i<=x.high();++i)
- out << i << ':' << x[i] << ';' << ' ';
- return out;
-}
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/ForwardBackward.cpp b/scripts/training/MGIZA/src/ForwardBackward.cpp
deleted file mode 100644
index e477dd0..0000000
--- a/scripts/training/MGIZA/src/ForwardBackward.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef NO_TRAINING
-#include "ForwardBackward.h"
-#include "Globals.h"
-#include "myassert.h"
-#include "HMMTables.h"
-#include "mymath.h"
-
-
-double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
- const int I=net.size1(),J=net.size2(),N=I*J;
- Array<double> alpha(N,0),beta(N,0),sum(J);
- for(int i=0;i<I;i++)
- beta[N-I+i]=net.getBetainit(i);
-#ifdef WIN32
- double * cur_beta=const_cast<double*>(&(beta[0]))+N-I-1;
-#else
- double * cur_beta=conv<double>(beta.begin())+N-I-1;
-#endif
- for(int j=J-2;j>=0;--j)
- for(int ti=I-1;ti>=0;--ti,--cur_beta) {
- const double *next_beta=conv<double>(beta.begin())+(j+1)*I;
- const double *alprob=&net.outProb(j,ti,0),*next_node=&net.nodeProb(0,j+1);
- for(int ni=0;ni<I;++ni,(next_node+=J)){
- massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
- massert(next_node == &net.nodeProb(ni,j+1));
- /* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
- cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
- << next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
- (*cur_beta)+=(*next_beta++)*(*alprob++)*(*next_node);
- }
- }
- for(int i=0;i<I;i++)
- alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
-#ifdef WIN32
- double* cur_alpha=const_cast<double*>(&(alpha[0]))+I;
- cur_beta=const_cast<double*>(&(beta[0]))+I;
-#else
- double* cur_alpha=conv<double>(alpha.begin())+I;
- cur_beta=conv<double>(beta.begin())+I;
-#endif
-
- for(int j=1;j<J;j++){
- Array2<double>&e=E[ (E.size()==1)?0:(j-1) ];
- if( (E.size()!=1) || j==1 )
- {
- e.resize(I,I);
- fill(e.begin(),e.end(),0.0);
- }
-
- for(int ti=0;ti<I;++ti,++cur_alpha,++cur_beta) {
- const double * prev_alpha=conv<double>(alpha.begin())+I*(j-1);
- double *cur_e= &e(ti,0);
- double this_node=net.nodeProb(ti,j);
- const double* alprob= &net.outProb(j-1,0,ti);
- for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
- massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
- massert(&e(ti,pi)==cur_e);
- const double alpha_increment= *prev_alpha*(*alprob)*this_node;
- (*cur_alpha)+=alpha_increment;
- (*cur_e++)+=alpha_increment*(*cur_beta);
- }
- }
- }
- g.resize(N);
- transform(alpha.begin(),alpha.end(),beta.begin(),g.begin(),multiplies<double>());
- double bsum=0,esum=0,esum2;
- for(int i=0;i<I;i++)
- bsum+=beta[i]*net.nodeProb(i,0)*net.getAlphainit(i);
- for(unsigned int j=0;j<(unsigned int)E.size();j++)
- {
- Array2<double>&e=E[j];
- const double *epe=e.end();
- for(const double*ep=e.begin();ep!=epe;++ep)
- esum+=*ep;
- }
- if( J>1 )
- esum2=esum/(J-1);
- else
- esum2=0.0;
- if(!(esum2==0.0||mfabs(esum2-bsum)/bsum<1e-3*I))
- cout << "ERROR2: " << esum2 <<" " <<bsum << " " << esum << net << endl;
-#ifdef WIN32
- double * sumptr=const_cast<double*>(&(sum[0]));
- double* ge=const_cast<double*>(&(g[0])+g.size());
- for(double* gp=const_cast<double*>(&(g[0]));gp!=ge;gp+=I)
- {
- *sumptr++=normalize_if_possible(gp,gp+I);
- if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
- cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
- }
-#else
- double * sumptr=conv<double>(sum.begin());
- double* ge=conv<double>(g.end());
- for(double* gp=conv<double>(g.begin());gp!=ge;gp+=I)
- {
- *sumptr++=normalize_if_possible(gp,gp+I);
- if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
- cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
- }
-#endif
- for(unsigned int j=0;j<(unsigned int)E.size();j++)
- {
- Array2<double>&e=E[j];
- double* epe=e.end();
- if( esum )
- for(double*ep=e.begin();ep!=epe;++ep)
- *ep/=esum;
- else
- for(double*ep=e.begin();ep!=epe;++ep)
- *ep/=1.0/(max(I*I,I*I*(J-1)));
- }
- if( sum.size() )
- return sum[0];
- else
- return 1.0;
-}
-void HMMViterbi(const HMMNetwork&net,Array<int>&vit) {
- const int I=net.size1(),J=net.size2();
- vit.resize(J);
- Array<double>g;
- Array<Array2<double> >e(1);
- ForwardBackwardTraining(net,g,e);
- for(int j=0;j<J;j++) {
-#ifdef WIN32
- double * begin=const_cast<double*>(&(g[0]))+I*j;
-#else
- double * begin=conv<double>(g.begin())+I*j;
-#endif
- vit[j]=max_element(begin,begin+I)-begin;
- }
-}
-void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
- const int I=net.size1(),J=net.size2();
- vit.resize(J);
- for(int j=0;j<J;j++) {
-#ifdef WIN32
- double* begin=const_cast<double*>(&(g[0]))+I*j;
-#else
- double* begin=conv<double>(g.begin())+I*j;
-#endif
- vit[j]=max_element(begin,begin+I)-begin;
- }
-}
-
-double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose){
- const int I=net.size1(),J=net.size2(),N=I*J;
- Array<double> alpha(N,-1);
- Array<double*> bp(N,(double*)0);
- vitar.resize(J);
- if( J==0 )
- return 1.0;
- for(int i=0;i<I;i++)
- {
- alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
- if( i>I/2 )
- alpha[i]=0; // only first empty word can be chosen
- bp[i]=0;
- }
-#ifdef WIN32
- double *cur_alpha=const_cast<double*>(&alpha[0])+I;
- double **cur_bp=const_cast<double**>(&bp[0])+I;
-#else
- double *cur_alpha=conv<double>(alpha.begin())+I;
- double **cur_bp=conv<double*>(bp.begin())+I;
-#endif
- for(int j=1;j<J;j++)
- {
- if( pegj+1==j)
- for(int ti=0;ti<I;ti++)
- if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
- (cur_alpha-I)[ti]=0.0;
- for(int ti=0;ti<I;++ti,++cur_alpha,++cur_bp) {
-#ifdef WIN32
- double* prev_alpha=const_cast<double*>(&(alpha[0]))+I*(j-1);
-#else
- double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
-#endif
- double this_node=net.nodeProb(ti,j);
- const double *alprob= &net.outProb(j-1,0,ti);
- for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
- massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
- const double alpha_increment= *prev_alpha*(*alprob)*this_node;
- if( alpha_increment> *cur_alpha )
- {
- (*cur_alpha)=alpha_increment;
- (*cur_bp)=prev_alpha;
- }
- }
- }
- }
- for(int i=0;i<I;i++)
- alpha[N-I+i]*=net.getBetainit(i);
- if( pegj==J-1)
- for(int ti=0;ti<I;ti++)
- if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
- (alpha)[N-I+ti]=0.0;
-
- int j=J-1;
-#ifdef WIN32
- cur_alpha=const_cast<double*>(&(alpha[0]))+j*I;
-#else
- cur_alpha=conv<double>(alpha.begin())+j*I;
-#endif
- vitar[J-1]=max_element(cur_alpha,cur_alpha+I)-cur_alpha;
- double ret= *max_element(cur_alpha,cur_alpha+I);
- while(bp[vitar[j]+j*I])
- {
- cur_alpha-=I;
- vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
- massert(vitar[j-1]<I&&vitar[j-1]>=0);
- j--;
- }
- massert(j==0);
- if( verbose )
- {
- cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
- for(int j=0;j<J;j++)
- cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << "; ";
- cout << endl;
- }
- return ret;
-}
-
-double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
- Array<int> vitar;
- double ret=HMMRealViterbi(net,vitar);
- const int I=net.size1(),J=net.size2();
- if( E.size()==1 )
- {
- Array2<double>&e=E[0];
- e.resize(I,I);
- g.resize(I*J);
- fill(g.begin(),g.end(),0.0);
- fill(e.begin(),e.end(),0.0);
- for(int i=0;i<J;++i)
- {
- g[i*I+vitar[i]]=1.0;
- if( i>0 )
- e(vitar[i],vitar[i-1])++;
- }
- }
- else
- {
- g.resize(I*J);
- fill(g.begin(),g.end(),0.0);
- for(int i=0;i<J;++i)
- {
- g[i*I+vitar[i]]=1.0;
- if( i>0 )
- {
- Array2<double>&e=E[i-1];
- e.resize(I,I);
- fill(e.begin(),e.end(),0.0);
- e(vitar[i],vitar[i-1])++;
- }
- }
- }
- return ret;
-}
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/ForwardBackward.h b/scripts/training/MGIZA/src/ForwardBackward.h
deleted file mode 100644
index 42449d3..0000000
--- a/scripts/training/MGIZA/src/ForwardBackward.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef NO_EM_MARKOF_ZEUGS_DEFINED
-#define NO_EM_MARKOF_ZEUGS_DEFINED
-#ifndef NO_TRAINING
-#include "myassert.h"
-#include "Array.h"
-#include "Array2.h"
-
-class HMMNetwork
-{
- public:
- int as,bs;
- Array2<double> n;
- Array<Array2<double> > e;
- Array<double> alphainit;
- Array<double> betainit;
- int ab;
- double finalMultiply;
- HMMNetwork(int I,int J)
- : as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
- {}
- double getAlphainit(int i)const{return alphainit[i];}
- double getBetainit(int i)const{return betainit[i];}
- inline int size1()const{return as;}
- inline int size2()const{return bs;}
- inline const double&nodeProb(int i,int j)const
- {return n(i,j);}
- inline const double&outProb(int j,int i1,int i2)const
- {/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
- friend ostream&operator<<(ostream&out,const HMMNetwork&x)
- {
- return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
- }
-};
-double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
-void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
-double HMMRealViterbi(const HMMNetwork&net,Array<int>&vit,int pegi=-1,int pegj=-1,bool verbose=0);
-double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&e);
-void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit);
-#endif
-#endif
diff --git a/scripts/training/MGIZA/src/Globals.h b/scripts/training/MGIZA/src/Globals.h
deleted file mode 100644
index 693a117..0000000
--- a/scripts/training/MGIZA/src/Globals.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef Globals_asdf_defined
-#define Globals_asdf_defined
-#include <string>
-#include <fstream>
-#include <map>
-#include <syncObj.h>
-#include "defs.h"
-#include "Vector.h"
-
-extern float PROB_SMOOTH,MINCOUNTINCREASE;
-extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
-extern string Prefix, LogFilename, OPath,
- SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
- SourceVocabClassesFilename, TargetVocabClassesFilename,
- t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-extern ofstream logmsg ;
-extern Mutex logmsg_lock;
-extern double M5P0,P0 ;
-extern bool NODUMPS, FEWDUMPS ;
-extern string Usage ;
-extern unsigned int MAX_SENTENCE_LENGTH ;
-extern int PegUntil;
-
-extern short DeficientDistortionForEmptyWord;
-
-extern int M4_Dependencies;
-extern int M5_Dependencies;
-
-extern short OutputInAachenFormat;
-
-#define DEP_MODEL_l 1
-#define DEP_MODEL_m 2
-#define DEP_MODEL_F 4
-#define DEP_MODEL_E 8
-
-#define DEP_MODELb_l 16
-#define DEP_MODELb_m 32
-#define DEP_MODELb_F 64
-#define DEP_MODELb_E 128
-
-#define DEP_SUM 256
-
-class vcbList;
-
-extern vcbList *globeTrainVcbList, *globfTrainVcbList;
-
-extern short PredictionInAlignments;
-extern short SmoothHMM;
-#define VERB Verbose
-
-double ErrorsInAlignment(const map< pair<int,int>,char >&reference,const Vector<WordIndex>&test,int l,int&missing,int&toomuch,int&eventsMissing,int&eventsToomuch,int);
-extern Vector<map< pair<int,int>,char > > ReferenceAlignment;
-void printGIZAPars(ostream&out);
-
-#endif
diff --git a/scripts/training/MGIZA/src/HMMTables.cpp b/scripts/training/MGIZA/src/HMMTables.cpp
deleted file mode 100644
index c3ec741..0000000
--- a/scripts/training/MGIZA/src/HMMTables.cpp
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
-
- Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
- This file is part of GIZA++ ( extension of GIZA ).
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#include "HMMTables.h"
-#include <fstream>
-#include <sstream>
-#include "Globals.h"
-#include "Parameter.h"
-
-template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const {
- double ssum=0.0;
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- alProb.begin(); i!=alProb.end(); ++i) {
- double sum=0.0;
- out << "\n\nDistribution for: ";
- printAlDeps(out, i->first, *mapper1, *mapper2);
- out << ' ';
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- out << a << ':' << i->second[a] << ';' << ' ';
- sum+=i->second[a];
- }
- out << '\n' << '\n';
- out << "SUM: " << sum << '\n';
- ssum+=sum;
- }
- out << "FULL-SUM: " << ssum << '\n';
-}
-template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::readJumps(istream&) {
-}
-template<class CLS, class MAPPERCLASSTOSTRING> double HMMTables<CLS,
- MAPPERCLASSTOSTRING>::getAlProb(int istrich, int k, int sentLength,
- int J, CLS w1, CLS w2, int j, int iter) const {
- massert(k<sentLength&&k>=0);
- massert(istrich<sentLength&&istrich>=-1);
- int pos=istrich-k;
- switch (PredictionInAlignments) {
- case 0:
- pos=istrich-k;
- break;
- case 1:
- pos=k;
- break;
- case 2:
- pos=(k*J-j*sentLength);
- if (pos>0)
- pos+=J/2;
- else
- pos-=J/2;
- pos/=J;
- break;
- default:
- abort();
- }
- lock->lock();
- typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=
- alProb.find(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
- if (p!=alProb.end() ) {
- lock->unlock();
- return (p->second)[pos];
- } else {
- if (iter>0&&iter<5000)
- cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength
- << '\n';;
- lock->unlock();
- return 1.0/(2*sentLength-1);
- }
- lock->unlock();
-}
-
-template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::addAlCount(int istrich, int k, int sentLength,
- int J, CLS w1, CLS w2, int j, double value, double valuePredicted) {
-
-
- int pos=istrich-k;
- switch (PredictionInAlignments) {
- case 0:
- pos=istrich-k;
- break;
- case 1:
- pos=k;
- break;
- case 2:
- pos=(k*J-j*sentLength);
- if (pos>0)
- pos+=J/2;
- else
- pos-=J/2;
- pos/=J;
- break;
- default:
- abort();
- }
-
-
- AlDeps<CLS> deps(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
-
- {
- lock->lock();
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProb.find(deps);
- if (p==alProb.end() ) {
- if ( (CompareAlDeps&1)==0)
- p=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
- else
- p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
- }
- p->second[pos]+=value;
- lock->unlock();
- }
-
- if (valuePredicted) {
- lock->lock();
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProbPredicted.find(deps);
- if (p==alProbPredicted.end() ) {
- if ( (CompareAlDeps&1)==0)
- p
- =alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
- else
- p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
- }
- p->second[pos]+=valuePredicted;
- lock->unlock();
- }
-
-}
-
-template<class CLS, class MAPPERCLASSTOSTRING>
-hmmentry_type& HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
-{
- alphalock->lock();
- if( !init_alpha.count(I) ){
-#ifdef WIN32
- init_alpha[I]=hmmentry_type(Array<double>(I,0),new Mutex());
-#else
- init_alpha[I]=hmmentry_type(Array<double>(I,0),Mutex());
-#endif
- }
- hmmentry_type& ret = init_alpha[I];
- alphalock->unlock();
- return ret;
-}
-template<class CLS, class MAPPERCLASSTOSTRING>
-hmmentry_type& HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
-{
- betalock->lock();
- if( !init_beta.count(I) ){
-#ifdef WIN32
- init_beta[I]=hmmentry_type(Array<double>(I,0),new Mutex());
-#else
- init_beta[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
-#endif
- }
- hmmentry_type& ret = init_beta[I];
- betalock->unlock();
- return ret;
-}
-
-template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::getAlphaInit(int I, Array<double>&x) const {
- alphalock->lock();
- hash_map<int,hmmentry_type >::const_iterator i=init_alpha.find(I);
- if (i==init_alpha.end() ){
- alphalock->unlock();
- return 0;
- }
- else {
- x=i->second.first;
- alphalock->unlock();
- for (unsigned int j=x.size()/2+1; j<x.size(); ++j)
- // only first empty word can be chosen
- x[j]=0;
- return 1;
- }
- alphalock->unlock();
-}
-template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::getBetaInit(int I, Array<double>&x) const {
- betalock->lock();
- hash_map<int,hmmentry_type >::const_iterator i=init_beta.find(I);
- if (i==init_beta.end() ){
- betalock->unlock();
- return 0;
- }
- else {
- x=i->second.first;
- betalock->unlock();
- return 1;
- }
- betalock->unlock();
-}
-
-/***********************************
- By Edward Gao
- ************************************/
-
-template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::writeJumps(const char* alprob,
- const char* alpredict, const char* alpha, const char* beta) const {
- if (alprob) {
- ofstream ofs(alprob);
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << alprob << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << alprob << endl;
-
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- alProb.begin(); i!=alProb.end(); ++i) {
- double sum=0.0;
- ofs <<i->first.englishSentenceLength << " "
- << i->first.classPrevious << " " << i->first.previous
- << " " << i->first.j << " " << i->first.Cj <<" "
- << i->second.low() <<" " << i->second.high()<< " ";
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- ofs << a << ' ' << i->second[a] << ' ';
- sum+=i->second[a];
- }
- ofs << endl;
- }
- ofs.close();
- }
- if (alpredict) {
- ofstream ofs(alpredict);
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << alpredict << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << alpredict << endl;
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- alProbPredicted.begin(); i!=alProbPredicted.end(); ++i) {
- double sum=0.0;
- ofs << i->first.englishSentenceLength << " "
- << i->first.classPrevious << " " << i->first.previous
- << " " << i->first.j << " " << i->first.Cj <<" "
- << i->second.low() <<" " << i->second.high()<< " ";
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- ofs << a << ' ' << i->second[a] << ' ';
- sum+=i->second[a];
- }
- ofs << endl;
- }
- ofs.close();
- }
- if (alpha) {
- ofstream ofs(alpha);
-
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << alpha << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << alpha << endl;
- for (typename hash_map<int,hmmentry_type>::const_iterator i=
- init_alpha.begin(); i!=init_alpha.end(); i++) {
- ofs << i->first << " " << i->second.first.size() <<" ";
- int j;
- for (j=0; j<i->second.first.size(); j++) {
- ofs << i->second.first[j] << " ";
- }
- ofs<<endl;
- }
- ofs.close();
- }
- if (beta) {
- ofstream ofs(beta);
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << beta << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << beta << endl;
- for (typename hash_map<int,hmmentry_type>::const_iterator i=
- init_beta.begin(); i!=init_beta.end(); i++) {
- ofs << i->first << " " << i->second.first.size() << " ";
- int j;
- for (j=0; j<i->second.first.size(); j++) {
- ofs << i->second.first[j] << " ";
- }
- ofs << endl;
- }
- ofs.close();
- }
- return true;
-}
-
-template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::readJumps(const char* alprob,
- const char* alpredict, const char* alpha, const char* beta) {
- if (alprob) {
- ifstream ifs(alprob);
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << alprob << endl;
- return false;
- }
- cerr << "Reading HMM table from " << alprob << endl;
- string strLine="";
- bool expect_data = false;
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- AlDeps<CLS> dep;
- int low, high;
- ss >> dep.englishSentenceLength >> dep.classPrevious
- >> dep.previous >> dep.j >> dep.Cj >> low >> high;
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProb.find(dep);
- if (p==alProb.end() ) {
- p=alProb.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
- }
- int pos;
- double val;
- while (!ss.eof()) {
- pos = low-1;
- val = 0;
- ss >> pos >> val;
- if (pos>low-1) {
- p->second[pos]+=val;
- }
- }
- }
- }
- }
- if (alpredict) {
- ifstream ifs(alpredict);
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << alpredict << endl;
- return false;
- }
- cerr << "Reading HMM table from " << alpredict << endl;
- string strLine="";
- bool expect_data = false;
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- AlDeps<CLS> dep;
- int low, high;
- ss >> dep.englishSentenceLength >> dep.classPrevious
- >> dep.previous >> dep.j >> dep.Cj >> low >> high;
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProbPredicted.find(dep);
- if (p==alProbPredicted.end() ) {
- p=alProbPredicted.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
- }
- int pos;
- double val;
-
- while (!ss.eof()) {
- pos = low-1;
- val = 0;
- ss >> pos >> val;
- if (pos>low-1) {
- p->second[pos]+=val;
- }
- }
- }
- }
- }
-
- if (alpha) {
- ifstream ifs(alpha);
-
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << alpha << endl;
- return false;
- }
- string strLine="";
- bool expect_data = false;
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- int id = -1, size = -1;
- ss >> id >> size;
- if (id<0||size<0||id!=size) {
- cerr << "Mismatch in alpha init table!" << endl;
- return false;
- }
- hmmentry_type&alp = doGetAlphaInit(id);
- Array<double>& gk = alp.first;
- int j;
- double v;
-#ifdef WIN32
- alp.second->lock();
-#else
- alp.second.lock();
-#endif
- for (j=0; j<gk.size(); j++) {
- ss >> v;
- gk[j]+=v;
- }
-#ifdef WIN32
- alp.second->unlock();
-#else
- alp.second.unlock();
-#endif
-
- }
- }
- }
-
- if (beta) {
- ifstream ifs(beta);
-
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << beta << endl;
- return false;
- }
- string strLine="";
- bool expect_data = false;
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- int id = -1, size = -1;
- ss >> id >> size;
- if (id<0||size<0||id!=size) {
- cerr << "Mismatch in alpha init table!" << endl;
- return false;
- }
- hmmentry_type&bet1 = doGetBetaInit(id);
- Array<double>&bet = bet1.first;
-
- int j;
- double v;
-#ifdef WIN32
- bet1.second->lock();
-#else
- bet1.second.lock();
-#endif
- for (j=0; j<bet.size(); j++) {
- ss >> v;
- bet[j]+=v;
- }
-#ifdef WIN32
- bet1.second->unlock();
-#else
- bet1.second.unlock();
-#endif
- }
- }
- }
-
- return true;
-}
-
-template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht) {
-
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- ht.alProb.begin(); i!=ht.alProb.end(); ++i) {
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProb.find(i->first);
- if (p==alProb.end() ) {
- p=alProb.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
- }
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- p->second[a] += i->second[a];
- }
-
- }
-
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- ht.alProbPredicted.begin(); i!=ht.alProbPredicted.end(); ++i) {
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProbPredicted.find(i->first);
- if (p==alProbPredicted.end() ) {
- p=alProbPredicted.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
- }
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- p->second[a] += i->second[a];
- }
-
- }
-
- for (typename hash_map<int,hmmentry_type>::iterator i=
- ht.init_alpha.begin(); i!=ht.init_alpha.end(); i++) {
- hmmentry_type& alp = doGetAlphaInit(i->first);
- int j;
- double v;
- for (j=0; j<alp.first.size(); j++) {
- alp.first[j]+=i->second.first[j];
- }
- }
- for (typename hash_map<int,hmmentry_type>::iterator i=
- ht.init_beta.begin(); i!=ht.init_beta.end(); i++) {
- hmmentry_type&alp = doGetBetaInit(i->first);
- int j;
- double v;
- for (j=0; j<alp.first.size(); j++) {
- alp.first[j]+=i->second.first[j];
- }
- }
-
- return true;
-
-}
-
-//////////////////////////////////////
-template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
- MAPPERCLASSTOSTRING>::HMMTables(double _probForEmpty,
- const MAPPERCLASSTOSTRING&m1, const MAPPERCLASSTOSTRING&m2) :
- probabilityForEmpty(mfabs(_probForEmpty)),
- updateProbabilityForEmpty(_probForEmpty<0.0), mapper1(&m1),
- mapper2(&m2) {
- lock = new Mutex();
- alphalock = new Mutex();
- betalock = new Mutex();
-}
-
-template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
- MAPPERCLASSTOSTRING>::HMMTables(const HMMTables& ref):
-mapper1(ref.mapper1), mapper2(ref.mapper2)
-{
- probabilityForEmpty=ref.probabilityForEmpty;
- updateProbabilityForEmpty=ref.updateProbabilityForEmpty;
- init_alpha=ref.init_alpha;
- init_beta=ref.init_beta;
- alProb=ref.alProb;
- alProbPredicted=ref.alProbPredicted;
- globalCounter=ref.globalCounter;
- divSum=ref.divSum;
- p0_count=ref.p0_count;
- np0_count=ref.np0_count;
-}
-template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::operator=(const HMMTables& ref){
- probabilityForEmpty=ref.probabilityForEmpty;
- updateProbabilityForEmpty=ref.updateProbabilityForEmpty;
- init_alpha=ref.init_alpha;
- init_beta=ref.init_beta;
- alProb=ref.alProb;
- alProbPredicted=ref.alProbPredicted;
- globalCounter=ref.globalCounter;
- divSum=ref.divSum;
- p0_count=ref.p0_count;
- np0_count=ref.np0_count;
-}
-
-
-template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
- MAPPERCLASSTOSTRING>::~HMMTables() {
-#if 0
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_alpha.begin(); i!=init_alpha.end(); i++) {
- i->second.second->unlock();
- }
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_beta.begin(); i!=init_beta.end(); i++) {
- i->second.second->unlock();
- }
-
-
- delete lock;
- delete alphalock;
- delete betalock;
-
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_alpha.begin(); i!=init_alpha.end(); i++) {
- delete i->second.second;
- }
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_beta.begin(); i!=init_beta.end(); i++) {
- delete i->second.second;
- }
-#endif
-}
diff --git a/scripts/training/MGIZA/src/HMMTables.h b/scripts/training/MGIZA/src/HMMTables.h
deleted file mode 100644
index 944b173..0000000
--- a/scripts/training/MGIZA/src/HMMTables.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef HMM_TABLES_H_ASDF_DEFINED
-#define HMM_TABLES_H_ASDF_DEFINED
-#include "FlexArray.h"
-
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include "Array.h"
-#include <map>
-#include "mymath.h"
-#include "syncObj.h"
-
-template<class T>
-T normalize_if_possible(T*a,T*b){
- T sum=0;
- for(T*i=a;i!=b;++i)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;++i)
- *i/=sum;
- else
- fill(a,b,1.0/(b-a));
- return sum;
-}
-
-extern short CompareAlDeps;
-template<class CLS>
-class AlDeps{
-public:
- int englishSentenceLength;
- CLS classPrevious;
- int previous;
- int j;
- CLS Cj;
- AlDeps(){};
- AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
- : englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
- {}
- friend bool operator<(const AlDeps&x,const AlDeps&y){
- if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
- if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
- if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
- if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
- if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
- if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
- if( (CompareAlDeps&8) && x.j<y.j ) return 1;
- if( (CompareAlDeps&8) && y.j<x.j ) return 0;
- if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
- if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
- return 0;
- }
- friend bool operator==(const AlDeps&x,const AlDeps&y)
- { return !( x<y || y<x ); }
-};
-
-template<class CLS>
-class Hash_AlDeps{
-public:
- unsigned
- int
- operator()
- (const AlDeps<CLS>&x)
- const
- {
- unsigned int hash=0;
- if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
- if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
- if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
- if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
- if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
- return hash;
-
- }
-};
-
-#ifdef WIN32
-typedef pair<Array<double>,Mutex*> hmmentry_type;
-#else
-typedef pair<Array<double>,Mutex> hmmentry_type;
-#endif
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-class HMMTables
-{
- Mutex* lock;
- Mutex* alphalock,*betalock;
-public:
-
- double probabilityForEmpty;
- bool updateProbabilityForEmpty;
- hash_map<int, hmmentry_type > init_alpha;
- hash_map<int, hmmentry_type > init_beta;
- map<AlDeps<CLS>,FlexArray<double> > alProb;
- map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
- int globalCounter;
- double divSum;
- double p0_count,np0_count;
- const MAPPERCLASSTOSTRING*mapper1;
- const MAPPERCLASSTOSTRING*mapper2;
-public:
- bool merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht);
- const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
- HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
- HMMTables(const HMMTables& ref);
- void operator=(const HMMTables& ref);
- virtual ~HMMTables();
- virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
- virtual void writeJumps(ostream&) const;
- /**By Edward Gao, write out all things needed to rebuild the count table*/
- virtual bool writeJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta )const;
- virtual bool readJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta );
- void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
- virtual void readJumps(istream&);
- virtual bool getAlphaInit(int I,Array<double>&x)const;
- virtual bool getBetaInit(int I,Array<double> &x)const;
- hmmentry_type &doGetAlphaInit(int I);
- hmmentry_type &doGetBetaInit(int I);
- virtual double getProbabilityForEmpty()const
- {return probabilityForEmpty;}
- void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old){
- cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
- for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i) {
- if( alProbPredicted.count(i->first)){
- normalize_if_possible(i->second.begin(),i->second.end());
- normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
- for(int j=i->second.low();j<=i->second.high();++j){
- if( i->second[j] )
- if(alProbPredicted[i->first][j]>0.0 )
- {
- double op=1.0;
- if( old && old->alProb.count(i->first) )
- op=(old->alProb.find(i->first)->second)[j];
- //cerr << "GIS: " << j << ' ' << " OLD:"
- // << op << "*true:"
- // << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
-
-
- i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
- //cerr << i->second[j] << endl;
- }
- else{
- cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
- }
- }
- }
- else
- cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
- }
- }
-};
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-inline void printAlDeps(ostream&out,const AlDeps<CLS>&x,const MAPPERCLASSTOSTRING&mapper1,const MAPPERCLASSTOSTRING&mapper2)
-{
- if( (CompareAlDeps&1) ) out << "sentenceLength: " << x.englishSentenceLength<< ' ';
- if( (CompareAlDeps&2) ) out << "previousClass: " << mapper1.classString(x.classPrevious) << ' ';
- if( (CompareAlDeps&4) ) out << "previousPosition: " << x.previous << ' ';
- if( (CompareAlDeps&8) ) out << "FrenchPosition: " << x.j << ' ';
- if( (CompareAlDeps&16) ) out << "FrenchClass: " << mapper2.classString(x.Cj) << ' ';
- //out << '\n';
-}
-
-#endif
diff --git a/scripts/training/MGIZA/src/Makefile.am b/scripts/training/MGIZA/src/Makefile.am
deleted file mode 100644
index 55d688e..0000000
--- a/scripts/training/MGIZA/src/Makefile.am
+++ /dev/null
@@ -1,217 +0,0 @@
-## Process this file with automake to produce Makefile.in
-
-## Created by Anjuta
-
-INCLUDES = \
- -DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
- -DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
- -DPACKAGE_DATA_DIR=\""$(datadir)"\"
-
-AM_CFLAGS =\
- -Wall\
- -g
-
-bin_PROGRAMS = mgiza \
- snt2cooc\
- snt2plain\
- plain2snt \
- symal \
- hmmnorm \
- d4norm
-
-d4norm_SOURCES = \
- d4norm.cxx
-
-d4norm_LDADD = \
- -lgiza \
- -lpthread
-
-d4norm_LDFLAGS = \
- -L.
-
-d4norm_DEPENDENCIES = \
- libgiza.a
-
-d4norm_CXXFLAGS = \
- -MT \
- -MD \
- -MP \
- -MF \
- -O6
-
-d4norm_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-hmmnorm_SOURCES = \
- hmmnorm.cxx
-
-hmmnorm_LDADD = \
- -lgiza \
- -lpthread
-
-hmmnorm_LDFLAGS = \
- -L.
-
-hmmnorm_DEPENDENCIES = \
- libgiza.a
-
-hmmnorm_CXXFLAGS = \
- -MT \
- -MD \
- -MP \
- -MF \
- -O6
-
-hmmnorm_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-symal_SOURCES = \
- cmd.c \
- cmd.h \
- symal.cpp
-
-plain2snt_SOURCES = \
- plain2snt.cpp
-
-snt2plain_SOURCES = \
- snt2plain.cpp
-
-snt2cooc_SOURCES = \
- snt2cooc.cpp
-
-snt2cooc_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-mgiza_SOURCES = \
- main.cpp
-
-mgiza_DEPENDENCIES = \
- libgiza.a
-
-mgiza_CXXFLAGS = \
- -MT \
- -MD \
- -MP \
- -MF \
- -O6
-
-mgiza_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-mgiza_LDFLAGS = \
- -L.
-
-mgiza_LDADD = \
- -lgiza \
- -lpthread
-
-lib_LIBRARIES = \
- libgiza.a
-
-libgiza_a_SOURCES = \
- alignment.cpp\
- alignment.h \
- AlignTables.cpp \
- AlignTables.h \
- Array.h \
- Array2.h \
- Array4.h \
- ATables.cpp \
- ATables.h \
- collCounts.cpp \
- collCounts.h \
- common.h \
- D4Tables.h \
- D5Tables.h \
- defs.h \
- Dictionary.cpp \
- Dictionary.h \
- file_spec.h \
- FlexArray.h \
- ForwardBackward.cpp \
- ForwardBackward.h \
- getSentence.cpp \
- getSentence.h \
- Globals.h \
- hmm.cpp \
- hmm.h \
- HMMTables.cpp \
- HMMTables.h \
- logprob.cpp \
- logprob.h \
- model1.cpp \
- model1.h \
- model2.cpp \
- model2.h \
- model2to3.cpp \
- model3.cpp \
- model3.h \
- model3_viterbi.cpp \
- model3_viterbi_with_tricks.cpp \
- model345-peg.cpp \
- MoveSwapMatrix.cpp \
- MoveSwapMatrix.h \
- myassert.cpp \
- myassert.h \
- mymath.h \
- mystl.h \
- NTables.cpp \
- NTables.h \
- Parameter.cpp \
- Parameter.h \
- parse.cpp \
- Perplexity.cpp \
- Perplexity.h \
- Pointer.h \
- reports.cpp \
- SetArray.cpp \
- SetArray.h \
- syncObj.h \
- transpair_model1.h \
- transpair_model2.h \
- transpair_model3.cpp \
- transpair_model3.h \
- transpair_model4.cpp \
- transpair_model4.h \
- transpair_model5.cpp \
- transpair_model5.h \
- transpair_modelhmm.h \
- ttableDiff.hpp \
- TTables.cpp \
- TTables.h \
- types.h \
- utility.cpp \
- utility.h \
- Vector.h \
- vocab.cpp \
- vocab.h \
- WordClasses.h
-
-libgiza_a_CXXFLAGS = \
- -MD \
- -MP \
- -MF \
- -MT \
- -O6
-
-libgiza_a_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-SUBDIRS = \
- mkcls
-
diff --git a/scripts/training/MGIZA/src/Makefile.in b/scripts/training/MGIZA/src/Makefile.in
deleted file mode 100644
index cc9f259..0000000
--- a/scripts/training/MGIZA/src/Makefile.in
+++ /dev/null
@@ -1,1407 +0,0 @@
-# Makefile.in generated by automake 1.10.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-bin_PROGRAMS = mgiza$(EXEEXT) snt2cooc$(EXEEXT) snt2plain$(EXEEXT) \
- plain2snt$(EXEEXT) symal$(EXEEXT) hmmnorm$(EXEEXT) \
- d4norm$(EXEEXT)
-subdir = src
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
-am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)"
-libLIBRARIES_INSTALL = $(INSTALL_DATA)
-LIBRARIES = $(lib_LIBRARIES)
-AR = ar
-ARFLAGS = cru
-libgiza_a_AR = $(AR) $(ARFLAGS)
-libgiza_a_LIBADD =
-am_libgiza_a_OBJECTS = libgiza_a-alignment.$(OBJEXT) \
- libgiza_a-AlignTables.$(OBJEXT) libgiza_a-ATables.$(OBJEXT) \
- libgiza_a-collCounts.$(OBJEXT) libgiza_a-Dictionary.$(OBJEXT) \
- libgiza_a-ForwardBackward.$(OBJEXT) \
- libgiza_a-getSentence.$(OBJEXT) libgiza_a-hmm.$(OBJEXT) \
- libgiza_a-HMMTables.$(OBJEXT) libgiza_a-logprob.$(OBJEXT) \
- libgiza_a-model1.$(OBJEXT) libgiza_a-model2.$(OBJEXT) \
- libgiza_a-model2to3.$(OBJEXT) libgiza_a-model3.$(OBJEXT) \
- libgiza_a-model3_viterbi.$(OBJEXT) \
- libgiza_a-model3_viterbi_with_tricks.$(OBJEXT) \
- libgiza_a-model345-peg.$(OBJEXT) \
- libgiza_a-MoveSwapMatrix.$(OBJEXT) \
- libgiza_a-myassert.$(OBJEXT) libgiza_a-NTables.$(OBJEXT) \
- libgiza_a-Parameter.$(OBJEXT) libgiza_a-parse.$(OBJEXT) \
- libgiza_a-Perplexity.$(OBJEXT) libgiza_a-reports.$(OBJEXT) \
- libgiza_a-SetArray.$(OBJEXT) \
- libgiza_a-transpair_model3.$(OBJEXT) \
- libgiza_a-transpair_model4.$(OBJEXT) \
- libgiza_a-transpair_model5.$(OBJEXT) \
- libgiza_a-TTables.$(OBJEXT) libgiza_a-utility.$(OBJEXT) \
- libgiza_a-vocab.$(OBJEXT)
-libgiza_a_OBJECTS = $(am_libgiza_a_OBJECTS)
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
-PROGRAMS = $(bin_PROGRAMS)
-am_d4norm_OBJECTS = d4norm-d4norm.$(OBJEXT)
-d4norm_OBJECTS = $(am_d4norm_OBJECTS)
-d4norm_LINK = $(CXXLD) $(d4norm_CXXFLAGS) $(CXXFLAGS) \
- $(d4norm_LDFLAGS) $(LDFLAGS) -o $@
-am_hmmnorm_OBJECTS = hmmnorm-hmmnorm.$(OBJEXT)
-hmmnorm_OBJECTS = $(am_hmmnorm_OBJECTS)
-hmmnorm_LINK = $(CXXLD) $(hmmnorm_CXXFLAGS) $(CXXFLAGS) \
- $(hmmnorm_LDFLAGS) $(LDFLAGS) -o $@
-am_mgiza_OBJECTS = mgiza-main.$(OBJEXT)
-mgiza_OBJECTS = $(am_mgiza_OBJECTS)
-mgiza_LINK = $(CXXLD) $(mgiza_CXXFLAGS) $(CXXFLAGS) $(mgiza_LDFLAGS) \
- $(LDFLAGS) -o $@
-am_plain2snt_OBJECTS = plain2snt.$(OBJEXT)
-plain2snt_OBJECTS = $(am_plain2snt_OBJECTS)
-plain2snt_LDADD = $(LDADD)
-am_snt2cooc_OBJECTS = snt2cooc-snt2cooc.$(OBJEXT)
-snt2cooc_OBJECTS = $(am_snt2cooc_OBJECTS)
-snt2cooc_LDADD = $(LDADD)
-am_snt2plain_OBJECTS = snt2plain.$(OBJEXT)
-snt2plain_OBJECTS = $(am_snt2plain_OBJECTS)
-snt2plain_LDADD = $(LDADD)
-am_symal_OBJECTS = cmd.$(OBJEXT) symal.$(OBJEXT)
-symal_OBJECTS = $(am_symal_OBJECTS)
-symal_LDADD = $(LDADD)
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
- -o $@
-SOURCES = $(libgiza_a_SOURCES) $(d4norm_SOURCES) $(hmmnorm_SOURCES) \
- $(mgiza_SOURCES) $(plain2snt_SOURCES) $(snt2cooc_SOURCES) \
- $(snt2plain_SOURCES) $(symal_SOURCES)
-DIST_SOURCES = $(libgiza_a_SOURCES) $(d4norm_SOURCES) \
- $(hmmnorm_SOURCES) $(mgiza_SOURCES) $(plain2snt_SOURCES) \
- $(snt2cooc_SOURCES) $(snt2plain_SOURCES) $(symal_SOURCES)
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MKDIR_P = @MKDIR_P@
-OBJEXT = @OBJEXT@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-VERSION = @VERSION@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build_alias = @build_alias@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host_alias = @host_alias@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-INCLUDES = \
- -DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
- -DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
- -DPACKAGE_DATA_DIR=\""$(datadir)"\"
-
-AM_CFLAGS = \
- -Wall\
- -g
-
-d4norm_SOURCES = \
- d4norm.cxx
-
-d4norm_LDADD = \
- -lgiza \
- -lpthread
-
-d4norm_LDFLAGS = \
- -L.
-
-d4norm_DEPENDENCIES = \
- libgiza.a
-
-d4norm_CXXFLAGS = \
- -MT \
- -MD \
- -MP \
- -MF \
- -O6
-
-d4norm_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-hmmnorm_SOURCES = \
- hmmnorm.cxx
-
-hmmnorm_LDADD = \
- -lgiza \
- -lpthread
-
-hmmnorm_LDFLAGS = \
- -L.
-
-hmmnorm_DEPENDENCIES = \
- libgiza.a
-
-hmmnorm_CXXFLAGS = \
- -MT \
- -MD \
- -MP \
- -MF \
- -O6
-
-hmmnorm_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-symal_SOURCES = \
- cmd.c \
- cmd.h \
- symal.cpp
-
-plain2snt_SOURCES = \
- plain2snt.cpp
-
-snt2plain_SOURCES = \
- snt2plain.cpp
-
-snt2cooc_SOURCES = \
- snt2cooc.cpp
-
-snt2cooc_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-mgiza_SOURCES = \
- main.cpp
-
-mgiza_DEPENDENCIES = \
- libgiza.a
-
-mgiza_CXXFLAGS = \
- -MT \
- -MD \
- -MP \
- -MF \
- -O6
-
-mgiza_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-mgiza_LDFLAGS = \
- -L.
-
-mgiza_LDADD = \
- -lgiza \
- -lpthread
-
-lib_LIBRARIES = \
- libgiza.a
-
-libgiza_a_SOURCES = \
- alignment.cpp\
- alignment.h \
- AlignTables.cpp \
- AlignTables.h \
- Array.h \
- Array2.h \
- Array4.h \
- ATables.cpp \
- ATables.h \
- collCounts.cpp \
- collCounts.h \
- common.h \
- D4Tables.h \
- D5Tables.h \
- defs.h \
- Dictionary.cpp \
- Dictionary.h \
- file_spec.h \
- FlexArray.h \
- ForwardBackward.cpp \
- ForwardBackward.h \
- getSentence.cpp \
- getSentence.h \
- Globals.h \
- hmm.cpp \
- hmm.h \
- HMMTables.cpp \
- HMMTables.h \
- logprob.cpp \
- logprob.h \
- model1.cpp \
- model1.h \
- model2.cpp \
- model2.h \
- model2to3.cpp \
- model3.cpp \
- model3.h \
- model3_viterbi.cpp \
- model3_viterbi_with_tricks.cpp \
- model345-peg.cpp \
- MoveSwapMatrix.cpp \
- MoveSwapMatrix.h \
- myassert.cpp \
- myassert.h \
- mymath.h \
- mystl.h \
- NTables.cpp \
- NTables.h \
- Parameter.cpp \
- Parameter.h \
- parse.cpp \
- Perplexity.cpp \
- Perplexity.h \
- Pointer.h \
- reports.cpp \
- SetArray.cpp \
- SetArray.h \
- syncObj.h \
- transpair_model1.h \
- transpair_model2.h \
- transpair_model3.cpp \
- transpair_model3.h \
- transpair_model4.cpp \
- transpair_model4.h \
- transpair_model5.cpp \
- transpair_model5.h \
- transpair_modelhmm.h \
- ttableDiff.hpp \
- TTables.cpp \
- TTables.h \
- types.h \
- utility.cpp \
- utility.h \
- Vector.h \
- vocab.cpp \
- vocab.h \
- WordClasses.h
-
-libgiza_a_CXXFLAGS = \
- -MD \
- -MP \
- -MF \
- -MT \
- -O6
-
-libgiza_a_CPPFLAGS = \
- -DNDEBUG \
- -DWORDINDEX_WITH_4_BYTE \
- -DBINARY_SEARCH_FOR_TTABLE \
- -DDEBUG
-
-SUBDIRS = \
- mkcls
-
-all: all-recursive
-
-.SUFFIXES:
-.SUFFIXES: .c .cpp .cxx .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
- cd $(top_srcdir) && \
- $(AUTOMAKE) --gnu src/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-install-libLIBRARIES: $(lib_LIBRARIES)
- @$(NORMAL_INSTALL)
- test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
- @list='$(lib_LIBRARIES)'; for p in $$list; do \
- if test -f $$p; then \
- f=$(am__strip_dir) \
- echo " $(libLIBRARIES_INSTALL) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \
- $(libLIBRARIES_INSTALL) "$$p" "$(DESTDIR)$(libdir)/$$f"; \
- else :; fi; \
- done
- @$(POST_INSTALL)
- @list='$(lib_LIBRARIES)'; for p in $$list; do \
- if test -f $$p; then \
- p=$(am__strip_dir) \
- echo " $(RANLIB) '$(DESTDIR)$(libdir)/$$p'"; \
- $(RANLIB) "$(DESTDIR)$(libdir)/$$p"; \
- else :; fi; \
- done
-
-uninstall-libLIBRARIES:
- @$(NORMAL_UNINSTALL)
- @list='$(lib_LIBRARIES)'; for p in $$list; do \
- p=$(am__strip_dir) \
- echo " rm -f '$(DESTDIR)$(libdir)/$$p'"; \
- rm -f "$(DESTDIR)$(libdir)/$$p"; \
- done
-
-clean-libLIBRARIES:
- -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES)
-libgiza.a: $(libgiza_a_OBJECTS) $(libgiza_a_DEPENDENCIES)
- -rm -f libgiza.a
- $(libgiza_a_AR) libgiza.a $(libgiza_a_OBJECTS) $(libgiza_a_LIBADD)
- $(RANLIB) libgiza.a
-install-binPROGRAMS: $(bin_PROGRAMS)
- @$(NORMAL_INSTALL)
- test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
- if test -f $$p \
- ; then \
- f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
- $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
- else :; fi; \
- done
-
-uninstall-binPROGRAMS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
- rm -f "$(DESTDIR)$(bindir)/$$f"; \
- done
-
-clean-binPROGRAMS:
- -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
-d4norm$(EXEEXT): $(d4norm_OBJECTS) $(d4norm_DEPENDENCIES)
- @rm -f d4norm$(EXEEXT)
- $(d4norm_LINK) $(d4norm_OBJECTS) $(d4norm_LDADD) $(LIBS)
-hmmnorm$(EXEEXT): $(hmmnorm_OBJECTS) $(hmmnorm_DEPENDENCIES)
- @rm -f hmmnorm$(EXEEXT)
- $(hmmnorm_LINK) $(hmmnorm_OBJECTS) $(hmmnorm_LDADD) $(LIBS)
-mgiza$(EXEEXT): $(mgiza_OBJECTS) $(mgiza_DEPENDENCIES)
- @rm -f mgiza$(EXEEXT)
- $(mgiza_LINK) $(mgiza_OBJECTS) $(mgiza_LDADD) $(LIBS)
-plain2snt$(EXEEXT): $(plain2snt_OBJECTS) $(plain2snt_DEPENDENCIES)
- @rm -f plain2snt$(EXEEXT)
- $(CXXLINK) $(plain2snt_OBJECTS) $(plain2snt_LDADD) $(LIBS)
-snt2cooc$(EXEEXT): $(snt2cooc_OBJECTS) $(snt2cooc_DEPENDENCIES)
- @rm -f snt2cooc$(EXEEXT)
- $(CXXLINK) $(snt2cooc_OBJECTS) $(snt2cooc_LDADD) $(LIBS)
-snt2plain$(EXEEXT): $(snt2plain_OBJECTS) $(snt2plain_DEPENDENCIES)
- @rm -f snt2plain$(EXEEXT)
- $(CXXLINK) $(snt2plain_OBJECTS) $(snt2plain_LDADD) $(LIBS)
-symal$(EXEEXT): $(symal_OBJECTS) $(symal_DEPENDENCIES)
- @rm -f symal$(EXEEXT)
- $(CXXLINK) $(symal_OBJECTS) $(symal_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cmd.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/d4norm-d4norm.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hmmnorm-hmmnorm.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-ATables.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-AlignTables.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-Dictionary.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-ForwardBackward.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-HMMTables.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-MoveSwapMatrix.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-NTables.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-Parameter.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-Perplexity.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-SetArray.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-TTables.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-alignment.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-collCounts.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-getSentence.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-hmm.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-logprob.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model1.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model2.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model2to3.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model3.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model345-peg.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model3_viterbi.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-myassert.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-parse.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-reports.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-transpair_model3.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-transpair_model4.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-transpair_model5.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-utility.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgiza_a-vocab.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mgiza-main.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plain2snt.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/snt2cooc-snt2cooc.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/snt2plain.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symal.Po@am__quote@
-
-.c.o:
-@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(COMPILE) -c $<
-
-.c.obj:
-@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.cpp.o:
-@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
-
-.cpp.obj:
-@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-libgiza_a-alignment.o: alignment.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-alignment.o -MD -MP -MF $(DEPDIR)/libgiza_a-alignment.Tpo -c -o libgiza_a-alignment.o `test -f 'alignment.cpp' || echo '$(srcdir)/'`alignment.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-alignment.Tpo $(DEPDIR)/libgiza_a-alignment.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='alignment.cpp' object='libgiza_a-alignment.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-alignment.o `test -f 'alignment.cpp' || echo '$(srcdir)/'`alignment.cpp
-
-libgiza_a-alignment.obj: alignment.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-alignment.obj -MD -MP -MF $(DEPDIR)/libgiza_a-alignment.Tpo -c -o libgiza_a-alignment.obj `if test -f 'alignment.cpp'; then $(CYGPATH_W) 'alignment.cpp'; else $(CYGPATH_W) '$(srcdir)/alignment.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-alignment.Tpo $(DEPDIR)/libgiza_a-alignment.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='alignment.cpp' object='libgiza_a-alignment.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-alignment.obj `if test -f 'alignment.cpp'; then $(CYGPATH_W) 'alignment.cpp'; else $(CYGPATH_W) '$(srcdir)/alignment.cpp'; fi`
-
-libgiza_a-AlignTables.o: AlignTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-AlignTables.o -MD -MP -MF $(DEPDIR)/libgiza_a-AlignTables.Tpo -c -o libgiza_a-AlignTables.o `test -f 'AlignTables.cpp' || echo '$(srcdir)/'`AlignTables.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-AlignTables.Tpo $(DEPDIR)/libgiza_a-AlignTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='AlignTables.cpp' object='libgiza_a-AlignTables.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-AlignTables.o `test -f 'AlignTables.cpp' || echo '$(srcdir)/'`AlignTables.cpp
-
-libgiza_a-AlignTables.obj: AlignTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-AlignTables.obj -MD -MP -MF $(DEPDIR)/libgiza_a-AlignTables.Tpo -c -o libgiza_a-AlignTables.obj `if test -f 'AlignTables.cpp'; then $(CYGPATH_W) 'AlignTables.cpp'; else $(CYGPATH_W) '$(srcdir)/AlignTables.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-AlignTables.Tpo $(DEPDIR)/libgiza_a-AlignTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='AlignTables.cpp' object='libgiza_a-AlignTables.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-AlignTables.obj `if test -f 'AlignTables.cpp'; then $(CYGPATH_W) 'AlignTables.cpp'; else $(CYGPATH_W) '$(srcdir)/AlignTables.cpp'; fi`
-
-libgiza_a-ATables.o: ATables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-ATables.o -MD -MP -MF $(DEPDIR)/libgiza_a-ATables.Tpo -c -o libgiza_a-ATables.o `test -f 'ATables.cpp' || echo '$(srcdir)/'`ATables.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-ATables.Tpo $(DEPDIR)/libgiza_a-ATables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ATables.cpp' object='libgiza_a-ATables.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-ATables.o `test -f 'ATables.cpp' || echo '$(srcdir)/'`ATables.cpp
-
-libgiza_a-ATables.obj: ATables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-ATables.obj -MD -MP -MF $(DEPDIR)/libgiza_a-ATables.Tpo -c -o libgiza_a-ATables.obj `if test -f 'ATables.cpp'; then $(CYGPATH_W) 'ATables.cpp'; else $(CYGPATH_W) '$(srcdir)/ATables.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-ATables.Tpo $(DEPDIR)/libgiza_a-ATables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ATables.cpp' object='libgiza_a-ATables.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-ATables.obj `if test -f 'ATables.cpp'; then $(CYGPATH_W) 'ATables.cpp'; else $(CYGPATH_W) '$(srcdir)/ATables.cpp'; fi`
-
-libgiza_a-collCounts.o: collCounts.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-collCounts.o -MD -MP -MF $(DEPDIR)/libgiza_a-collCounts.Tpo -c -o libgiza_a-collCounts.o `test -f 'collCounts.cpp' || echo '$(srcdir)/'`collCounts.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-collCounts.Tpo $(DEPDIR)/libgiza_a-collCounts.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='collCounts.cpp' object='libgiza_a-collCounts.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-collCounts.o `test -f 'collCounts.cpp' || echo '$(srcdir)/'`collCounts.cpp
-
-libgiza_a-collCounts.obj: collCounts.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-collCounts.obj -MD -MP -MF $(DEPDIR)/libgiza_a-collCounts.Tpo -c -o libgiza_a-collCounts.obj `if test -f 'collCounts.cpp'; then $(CYGPATH_W) 'collCounts.cpp'; else $(CYGPATH_W) '$(srcdir)/collCounts.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-collCounts.Tpo $(DEPDIR)/libgiza_a-collCounts.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='collCounts.cpp' object='libgiza_a-collCounts.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-collCounts.obj `if test -f 'collCounts.cpp'; then $(CYGPATH_W) 'collCounts.cpp'; else $(CYGPATH_W) '$(srcdir)/collCounts.cpp'; fi`
-
-libgiza_a-Dictionary.o: Dictionary.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-Dictionary.o -MD -MP -MF $(DEPDIR)/libgiza_a-Dictionary.Tpo -c -o libgiza_a-Dictionary.o `test -f 'Dictionary.cpp' || echo '$(srcdir)/'`Dictionary.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-Dictionary.Tpo $(DEPDIR)/libgiza_a-Dictionary.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Dictionary.cpp' object='libgiza_a-Dictionary.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-Dictionary.o `test -f 'Dictionary.cpp' || echo '$(srcdir)/'`Dictionary.cpp
-
-libgiza_a-Dictionary.obj: Dictionary.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-Dictionary.obj -MD -MP -MF $(DEPDIR)/libgiza_a-Dictionary.Tpo -c -o libgiza_a-Dictionary.obj `if test -f 'Dictionary.cpp'; then $(CYGPATH_W) 'Dictionary.cpp'; else $(CYGPATH_W) '$(srcdir)/Dictionary.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-Dictionary.Tpo $(DEPDIR)/libgiza_a-Dictionary.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Dictionary.cpp' object='libgiza_a-Dictionary.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-Dictionary.obj `if test -f 'Dictionary.cpp'; then $(CYGPATH_W) 'Dictionary.cpp'; else $(CYGPATH_W) '$(srcdir)/Dictionary.cpp'; fi`
-
-libgiza_a-ForwardBackward.o: ForwardBackward.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-ForwardBackward.o -MD -MP -MF $(DEPDIR)/libgiza_a-ForwardBackward.Tpo -c -o libgiza_a-ForwardBackward.o `test -f 'ForwardBackward.cpp' || echo '$(srcdir)/'`ForwardBackward.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-ForwardBackward.Tpo $(DEPDIR)/libgiza_a-ForwardBackward.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ForwardBackward.cpp' object='libgiza_a-ForwardBackward.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-ForwardBackward.o `test -f 'ForwardBackward.cpp' || echo '$(srcdir)/'`ForwardBackward.cpp
-
-libgiza_a-ForwardBackward.obj: ForwardBackward.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-ForwardBackward.obj -MD -MP -MF $(DEPDIR)/libgiza_a-ForwardBackward.Tpo -c -o libgiza_a-ForwardBackward.obj `if test -f 'ForwardBackward.cpp'; then $(CYGPATH_W) 'ForwardBackward.cpp'; else $(CYGPATH_W) '$(srcdir)/ForwardBackward.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-ForwardBackward.Tpo $(DEPDIR)/libgiza_a-ForwardBackward.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ForwardBackward.cpp' object='libgiza_a-ForwardBackward.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-ForwardBackward.obj `if test -f 'ForwardBackward.cpp'; then $(CYGPATH_W) 'ForwardBackward.cpp'; else $(CYGPATH_W) '$(srcdir)/ForwardBackward.cpp'; fi`
-
-libgiza_a-getSentence.o: getSentence.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-getSentence.o -MD -MP -MF $(DEPDIR)/libgiza_a-getSentence.Tpo -c -o libgiza_a-getSentence.o `test -f 'getSentence.cpp' || echo '$(srcdir)/'`getSentence.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-getSentence.Tpo $(DEPDIR)/libgiza_a-getSentence.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='getSentence.cpp' object='libgiza_a-getSentence.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-getSentence.o `test -f 'getSentence.cpp' || echo '$(srcdir)/'`getSentence.cpp
-
-libgiza_a-getSentence.obj: getSentence.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-getSentence.obj -MD -MP -MF $(DEPDIR)/libgiza_a-getSentence.Tpo -c -o libgiza_a-getSentence.obj `if test -f 'getSentence.cpp'; then $(CYGPATH_W) 'getSentence.cpp'; else $(CYGPATH_W) '$(srcdir)/getSentence.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-getSentence.Tpo $(DEPDIR)/libgiza_a-getSentence.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='getSentence.cpp' object='libgiza_a-getSentence.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-getSentence.obj `if test -f 'getSentence.cpp'; then $(CYGPATH_W) 'getSentence.cpp'; else $(CYGPATH_W) '$(srcdir)/getSentence.cpp'; fi`
-
-libgiza_a-hmm.o: hmm.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-hmm.o -MD -MP -MF $(DEPDIR)/libgiza_a-hmm.Tpo -c -o libgiza_a-hmm.o `test -f 'hmm.cpp' || echo '$(srcdir)/'`hmm.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-hmm.Tpo $(DEPDIR)/libgiza_a-hmm.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='hmm.cpp' object='libgiza_a-hmm.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-hmm.o `test -f 'hmm.cpp' || echo '$(srcdir)/'`hmm.cpp
-
-libgiza_a-hmm.obj: hmm.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-hmm.obj -MD -MP -MF $(DEPDIR)/libgiza_a-hmm.Tpo -c -o libgiza_a-hmm.obj `if test -f 'hmm.cpp'; then $(CYGPATH_W) 'hmm.cpp'; else $(CYGPATH_W) '$(srcdir)/hmm.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-hmm.Tpo $(DEPDIR)/libgiza_a-hmm.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='hmm.cpp' object='libgiza_a-hmm.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-hmm.obj `if test -f 'hmm.cpp'; then $(CYGPATH_W) 'hmm.cpp'; else $(CYGPATH_W) '$(srcdir)/hmm.cpp'; fi`
-
-libgiza_a-HMMTables.o: HMMTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-HMMTables.o -MD -MP -MF $(DEPDIR)/libgiza_a-HMMTables.Tpo -c -o libgiza_a-HMMTables.o `test -f 'HMMTables.cpp' || echo '$(srcdir)/'`HMMTables.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-HMMTables.Tpo $(DEPDIR)/libgiza_a-HMMTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='HMMTables.cpp' object='libgiza_a-HMMTables.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-HMMTables.o `test -f 'HMMTables.cpp' || echo '$(srcdir)/'`HMMTables.cpp
-
-libgiza_a-HMMTables.obj: HMMTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-HMMTables.obj -MD -MP -MF $(DEPDIR)/libgiza_a-HMMTables.Tpo -c -o libgiza_a-HMMTables.obj `if test -f 'HMMTables.cpp'; then $(CYGPATH_W) 'HMMTables.cpp'; else $(CYGPATH_W) '$(srcdir)/HMMTables.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-HMMTables.Tpo $(DEPDIR)/libgiza_a-HMMTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='HMMTables.cpp' object='libgiza_a-HMMTables.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-HMMTables.obj `if test -f 'HMMTables.cpp'; then $(CYGPATH_W) 'HMMTables.cpp'; else $(CYGPATH_W) '$(srcdir)/HMMTables.cpp'; fi`
-
-libgiza_a-logprob.o: logprob.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-logprob.o -MD -MP -MF $(DEPDIR)/libgiza_a-logprob.Tpo -c -o libgiza_a-logprob.o `test -f 'logprob.cpp' || echo '$(srcdir)/'`logprob.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-logprob.Tpo $(DEPDIR)/libgiza_a-logprob.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='logprob.cpp' object='libgiza_a-logprob.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-logprob.o `test -f 'logprob.cpp' || echo '$(srcdir)/'`logprob.cpp
-
-libgiza_a-logprob.obj: logprob.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-logprob.obj -MD -MP -MF $(DEPDIR)/libgiza_a-logprob.Tpo -c -o libgiza_a-logprob.obj `if test -f 'logprob.cpp'; then $(CYGPATH_W) 'logprob.cpp'; else $(CYGPATH_W) '$(srcdir)/logprob.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-logprob.Tpo $(DEPDIR)/libgiza_a-logprob.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='logprob.cpp' object='libgiza_a-logprob.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-logprob.obj `if test -f 'logprob.cpp'; then $(CYGPATH_W) 'logprob.cpp'; else $(CYGPATH_W) '$(srcdir)/logprob.cpp'; fi`
-
-libgiza_a-model1.o: model1.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model1.o -MD -MP -MF $(DEPDIR)/libgiza_a-model1.Tpo -c -o libgiza_a-model1.o `test -f 'model1.cpp' || echo '$(srcdir)/'`model1.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model1.Tpo $(DEPDIR)/libgiza_a-model1.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model1.cpp' object='libgiza_a-model1.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model1.o `test -f 'model1.cpp' || echo '$(srcdir)/'`model1.cpp
-
-libgiza_a-model1.obj: model1.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model1.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model1.Tpo -c -o libgiza_a-model1.obj `if test -f 'model1.cpp'; then $(CYGPATH_W) 'model1.cpp'; else $(CYGPATH_W) '$(srcdir)/model1.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model1.Tpo $(DEPDIR)/libgiza_a-model1.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model1.cpp' object='libgiza_a-model1.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model1.obj `if test -f 'model1.cpp'; then $(CYGPATH_W) 'model1.cpp'; else $(CYGPATH_W) '$(srcdir)/model1.cpp'; fi`
-
-libgiza_a-model2.o: model2.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model2.o -MD -MP -MF $(DEPDIR)/libgiza_a-model2.Tpo -c -o libgiza_a-model2.o `test -f 'model2.cpp' || echo '$(srcdir)/'`model2.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model2.Tpo $(DEPDIR)/libgiza_a-model2.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model2.cpp' object='libgiza_a-model2.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model2.o `test -f 'model2.cpp' || echo '$(srcdir)/'`model2.cpp
-
-libgiza_a-model2.obj: model2.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model2.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model2.Tpo -c -o libgiza_a-model2.obj `if test -f 'model2.cpp'; then $(CYGPATH_W) 'model2.cpp'; else $(CYGPATH_W) '$(srcdir)/model2.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model2.Tpo $(DEPDIR)/libgiza_a-model2.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model2.cpp' object='libgiza_a-model2.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model2.obj `if test -f 'model2.cpp'; then $(CYGPATH_W) 'model2.cpp'; else $(CYGPATH_W) '$(srcdir)/model2.cpp'; fi`
-
-libgiza_a-model2to3.o: model2to3.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model2to3.o -MD -MP -MF $(DEPDIR)/libgiza_a-model2to3.Tpo -c -o libgiza_a-model2to3.o `test -f 'model2to3.cpp' || echo '$(srcdir)/'`model2to3.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model2to3.Tpo $(DEPDIR)/libgiza_a-model2to3.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model2to3.cpp' object='libgiza_a-model2to3.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model2to3.o `test -f 'model2to3.cpp' || echo '$(srcdir)/'`model2to3.cpp
-
-libgiza_a-model2to3.obj: model2to3.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model2to3.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model2to3.Tpo -c -o libgiza_a-model2to3.obj `if test -f 'model2to3.cpp'; then $(CYGPATH_W) 'model2to3.cpp'; else $(CYGPATH_W) '$(srcdir)/model2to3.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model2to3.Tpo $(DEPDIR)/libgiza_a-model2to3.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model2to3.cpp' object='libgiza_a-model2to3.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model2to3.obj `if test -f 'model2to3.cpp'; then $(CYGPATH_W) 'model2to3.cpp'; else $(CYGPATH_W) '$(srcdir)/model2to3.cpp'; fi`
-
-libgiza_a-model3.o: model3.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model3.o -MD -MP -MF $(DEPDIR)/libgiza_a-model3.Tpo -c -o libgiza_a-model3.o `test -f 'model3.cpp' || echo '$(srcdir)/'`model3.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model3.Tpo $(DEPDIR)/libgiza_a-model3.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model3.cpp' object='libgiza_a-model3.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model3.o `test -f 'model3.cpp' || echo '$(srcdir)/'`model3.cpp
-
-libgiza_a-model3.obj: model3.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model3.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model3.Tpo -c -o libgiza_a-model3.obj `if test -f 'model3.cpp'; then $(CYGPATH_W) 'model3.cpp'; else $(CYGPATH_W) '$(srcdir)/model3.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model3.Tpo $(DEPDIR)/libgiza_a-model3.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model3.cpp' object='libgiza_a-model3.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model3.obj `if test -f 'model3.cpp'; then $(CYGPATH_W) 'model3.cpp'; else $(CYGPATH_W) '$(srcdir)/model3.cpp'; fi`
-
-libgiza_a-model3_viterbi.o: model3_viterbi.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model3_viterbi.o -MD -MP -MF $(DEPDIR)/libgiza_a-model3_viterbi.Tpo -c -o libgiza_a-model3_viterbi.o `test -f 'model3_viterbi.cpp' || echo '$(srcdir)/'`model3_viterbi.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model3_viterbi.Tpo $(DEPDIR)/libgiza_a-model3_viterbi.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model3_viterbi.cpp' object='libgiza_a-model3_viterbi.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model3_viterbi.o `test -f 'model3_viterbi.cpp' || echo '$(srcdir)/'`model3_viterbi.cpp
-
-libgiza_a-model3_viterbi.obj: model3_viterbi.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model3_viterbi.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model3_viterbi.Tpo -c -o libgiza_a-model3_viterbi.obj `if test -f 'model3_viterbi.cpp'; then $(CYGPATH_W) 'model3_viterbi.cpp'; else $(CYGPATH_W) '$(srcdir)/model3_viterbi.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model3_viterbi.Tpo $(DEPDIR)/libgiza_a-model3_viterbi.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model3_viterbi.cpp' object='libgiza_a-model3_viterbi.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model3_viterbi.obj `if test -f 'model3_viterbi.cpp'; then $(CYGPATH_W) 'model3_viterbi.cpp'; else $(CYGPATH_W) '$(srcdir)/model3_viterbi.cpp'; fi`
-
-libgiza_a-model3_viterbi_with_tricks.o: model3_viterbi_with_tricks.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model3_viterbi_with_tricks.o -MD -MP -MF $(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Tpo -c -o libgiza_a-model3_viterbi_with_tricks.o `test -f 'model3_viterbi_with_tricks.cpp' || echo '$(srcdir)/'`model3_viterbi_with_tricks.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Tpo $(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model3_viterbi_with_tricks.cpp' object='libgiza_a-model3_viterbi_with_tricks.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model3_viterbi_with_tricks.o `test -f 'model3_viterbi_with_tricks.cpp' || echo '$(srcdir)/'`model3_viterbi_with_tricks.cpp
-
-libgiza_a-model3_viterbi_with_tricks.obj: model3_viterbi_with_tricks.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model3_viterbi_with_tricks.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Tpo -c -o libgiza_a-model3_viterbi_with_tricks.obj `if test -f 'model3_viterbi_with_tricks.cpp'; then $(CYGPATH_W) 'model3_viterbi_with_tricks.cpp'; else $(CYGPATH_W) '$(srcdir)/model3_viterbi_with_tricks.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Tpo $(DEPDIR)/libgiza_a-model3_viterbi_with_tricks.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model3_viterbi_with_tricks.cpp' object='libgiza_a-model3_viterbi_with_tricks.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model3_viterbi_with_tricks.obj `if test -f 'model3_viterbi_with_tricks.cpp'; then $(CYGPATH_W) 'model3_viterbi_with_tricks.cpp'; else $(CYGPATH_W) '$(srcdir)/model3_viterbi_with_tricks.cpp'; fi`
-
-libgiza_a-model345-peg.o: model345-peg.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model345-peg.o -MD -MP -MF $(DEPDIR)/libgiza_a-model345-peg.Tpo -c -o libgiza_a-model345-peg.o `test -f 'model345-peg.cpp' || echo '$(srcdir)/'`model345-peg.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model345-peg.Tpo $(DEPDIR)/libgiza_a-model345-peg.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model345-peg.cpp' object='libgiza_a-model345-peg.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model345-peg.o `test -f 'model345-peg.cpp' || echo '$(srcdir)/'`model345-peg.cpp
-
-libgiza_a-model345-peg.obj: model345-peg.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-model345-peg.obj -MD -MP -MF $(DEPDIR)/libgiza_a-model345-peg.Tpo -c -o libgiza_a-model345-peg.obj `if test -f 'model345-peg.cpp'; then $(CYGPATH_W) 'model345-peg.cpp'; else $(CYGPATH_W) '$(srcdir)/model345-peg.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-model345-peg.Tpo $(DEPDIR)/libgiza_a-model345-peg.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='model345-peg.cpp' object='libgiza_a-model345-peg.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-model345-peg.obj `if test -f 'model345-peg.cpp'; then $(CYGPATH_W) 'model345-peg.cpp'; else $(CYGPATH_W) '$(srcdir)/model345-peg.cpp'; fi`
-
-libgiza_a-MoveSwapMatrix.o: MoveSwapMatrix.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-MoveSwapMatrix.o -MD -MP -MF $(DEPDIR)/libgiza_a-MoveSwapMatrix.Tpo -c -o libgiza_a-MoveSwapMatrix.o `test -f 'MoveSwapMatrix.cpp' || echo '$(srcdir)/'`MoveSwapMatrix.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-MoveSwapMatrix.Tpo $(DEPDIR)/libgiza_a-MoveSwapMatrix.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='MoveSwapMatrix.cpp' object='libgiza_a-MoveSwapMatrix.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-MoveSwapMatrix.o `test -f 'MoveSwapMatrix.cpp' || echo '$(srcdir)/'`MoveSwapMatrix.cpp
-
-libgiza_a-MoveSwapMatrix.obj: MoveSwapMatrix.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-MoveSwapMatrix.obj -MD -MP -MF $(DEPDIR)/libgiza_a-MoveSwapMatrix.Tpo -c -o libgiza_a-MoveSwapMatrix.obj `if test -f 'MoveSwapMatrix.cpp'; then $(CYGPATH_W) 'MoveSwapMatrix.cpp'; else $(CYGPATH_W) '$(srcdir)/MoveSwapMatrix.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-MoveSwapMatrix.Tpo $(DEPDIR)/libgiza_a-MoveSwapMatrix.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='MoveSwapMatrix.cpp' object='libgiza_a-MoveSwapMatrix.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-MoveSwapMatrix.obj `if test -f 'MoveSwapMatrix.cpp'; then $(CYGPATH_W) 'MoveSwapMatrix.cpp'; else $(CYGPATH_W) '$(srcdir)/MoveSwapMatrix.cpp'; fi`
-
-libgiza_a-myassert.o: myassert.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-myassert.o -MD -MP -MF $(DEPDIR)/libgiza_a-myassert.Tpo -c -o libgiza_a-myassert.o `test -f 'myassert.cpp' || echo '$(srcdir)/'`myassert.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-myassert.Tpo $(DEPDIR)/libgiza_a-myassert.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='myassert.cpp' object='libgiza_a-myassert.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-myassert.o `test -f 'myassert.cpp' || echo '$(srcdir)/'`myassert.cpp
-
-libgiza_a-myassert.obj: myassert.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-myassert.obj -MD -MP -MF $(DEPDIR)/libgiza_a-myassert.Tpo -c -o libgiza_a-myassert.obj `if test -f 'myassert.cpp'; then $(CYGPATH_W) 'myassert.cpp'; else $(CYGPATH_W) '$(srcdir)/myassert.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-myassert.Tpo $(DEPDIR)/libgiza_a-myassert.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='myassert.cpp' object='libgiza_a-myassert.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-myassert.obj `if test -f 'myassert.cpp'; then $(CYGPATH_W) 'myassert.cpp'; else $(CYGPATH_W) '$(srcdir)/myassert.cpp'; fi`
-
-libgiza_a-NTables.o: NTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-NTables.o -MD -MP -MF $(DEPDIR)/libgiza_a-NTables.Tpo -c -o libgiza_a-NTables.o `test -f 'NTables.cpp' || echo '$(srcdir)/'`NTables.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-NTables.Tpo $(DEPDIR)/libgiza_a-NTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='NTables.cpp' object='libgiza_a-NTables.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-NTables.o `test -f 'NTables.cpp' || echo '$(srcdir)/'`NTables.cpp
-
-libgiza_a-NTables.obj: NTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-NTables.obj -MD -MP -MF $(DEPDIR)/libgiza_a-NTables.Tpo -c -o libgiza_a-NTables.obj `if test -f 'NTables.cpp'; then $(CYGPATH_W) 'NTables.cpp'; else $(CYGPATH_W) '$(srcdir)/NTables.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-NTables.Tpo $(DEPDIR)/libgiza_a-NTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='NTables.cpp' object='libgiza_a-NTables.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-NTables.obj `if test -f 'NTables.cpp'; then $(CYGPATH_W) 'NTables.cpp'; else $(CYGPATH_W) '$(srcdir)/NTables.cpp'; fi`
-
-libgiza_a-Parameter.o: Parameter.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-Parameter.o -MD -MP -MF $(DEPDIR)/libgiza_a-Parameter.Tpo -c -o libgiza_a-Parameter.o `test -f 'Parameter.cpp' || echo '$(srcdir)/'`Parameter.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-Parameter.Tpo $(DEPDIR)/libgiza_a-Parameter.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Parameter.cpp' object='libgiza_a-Parameter.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-Parameter.o `test -f 'Parameter.cpp' || echo '$(srcdir)/'`Parameter.cpp
-
-libgiza_a-Parameter.obj: Parameter.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-Parameter.obj -MD -MP -MF $(DEPDIR)/libgiza_a-Parameter.Tpo -c -o libgiza_a-Parameter.obj `if test -f 'Parameter.cpp'; then $(CYGPATH_W) 'Parameter.cpp'; else $(CYGPATH_W) '$(srcdir)/Parameter.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-Parameter.Tpo $(DEPDIR)/libgiza_a-Parameter.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Parameter.cpp' object='libgiza_a-Parameter.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-Parameter.obj `if test -f 'Parameter.cpp'; then $(CYGPATH_W) 'Parameter.cpp'; else $(CYGPATH_W) '$(srcdir)/Parameter.cpp'; fi`
-
-libgiza_a-parse.o: parse.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-parse.o -MD -MP -MF $(DEPDIR)/libgiza_a-parse.Tpo -c -o libgiza_a-parse.o `test -f 'parse.cpp' || echo '$(srcdir)/'`parse.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-parse.Tpo $(DEPDIR)/libgiza_a-parse.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='parse.cpp' object='libgiza_a-parse.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-parse.o `test -f 'parse.cpp' || echo '$(srcdir)/'`parse.cpp
-
-libgiza_a-parse.obj: parse.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-parse.obj -MD -MP -MF $(DEPDIR)/libgiza_a-parse.Tpo -c -o libgiza_a-parse.obj `if test -f 'parse.cpp'; then $(CYGPATH_W) 'parse.cpp'; else $(CYGPATH_W) '$(srcdir)/parse.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-parse.Tpo $(DEPDIR)/libgiza_a-parse.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='parse.cpp' object='libgiza_a-parse.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-parse.obj `if test -f 'parse.cpp'; then $(CYGPATH_W) 'parse.cpp'; else $(CYGPATH_W) '$(srcdir)/parse.cpp'; fi`
-
-libgiza_a-Perplexity.o: Perplexity.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-Perplexity.o -MD -MP -MF $(DEPDIR)/libgiza_a-Perplexity.Tpo -c -o libgiza_a-Perplexity.o `test -f 'Perplexity.cpp' || echo '$(srcdir)/'`Perplexity.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-Perplexity.Tpo $(DEPDIR)/libgiza_a-Perplexity.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Perplexity.cpp' object='libgiza_a-Perplexity.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-Perplexity.o `test -f 'Perplexity.cpp' || echo '$(srcdir)/'`Perplexity.cpp
-
-libgiza_a-Perplexity.obj: Perplexity.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-Perplexity.obj -MD -MP -MF $(DEPDIR)/libgiza_a-Perplexity.Tpo -c -o libgiza_a-Perplexity.obj `if test -f 'Perplexity.cpp'; then $(CYGPATH_W) 'Perplexity.cpp'; else $(CYGPATH_W) '$(srcdir)/Perplexity.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-Perplexity.Tpo $(DEPDIR)/libgiza_a-Perplexity.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Perplexity.cpp' object='libgiza_a-Perplexity.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-Perplexity.obj `if test -f 'Perplexity.cpp'; then $(CYGPATH_W) 'Perplexity.cpp'; else $(CYGPATH_W) '$(srcdir)/Perplexity.cpp'; fi`
-
-libgiza_a-reports.o: reports.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-reports.o -MD -MP -MF $(DEPDIR)/libgiza_a-reports.Tpo -c -o libgiza_a-reports.o `test -f 'reports.cpp' || echo '$(srcdir)/'`reports.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-reports.Tpo $(DEPDIR)/libgiza_a-reports.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='reports.cpp' object='libgiza_a-reports.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-reports.o `test -f 'reports.cpp' || echo '$(srcdir)/'`reports.cpp
-
-libgiza_a-reports.obj: reports.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-reports.obj -MD -MP -MF $(DEPDIR)/libgiza_a-reports.Tpo -c -o libgiza_a-reports.obj `if test -f 'reports.cpp'; then $(CYGPATH_W) 'reports.cpp'; else $(CYGPATH_W) '$(srcdir)/reports.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-reports.Tpo $(DEPDIR)/libgiza_a-reports.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='reports.cpp' object='libgiza_a-reports.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-reports.obj `if test -f 'reports.cpp'; then $(CYGPATH_W) 'reports.cpp'; else $(CYGPATH_W) '$(srcdir)/reports.cpp'; fi`
-
-libgiza_a-SetArray.o: SetArray.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-SetArray.o -MD -MP -MF $(DEPDIR)/libgiza_a-SetArray.Tpo -c -o libgiza_a-SetArray.o `test -f 'SetArray.cpp' || echo '$(srcdir)/'`SetArray.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-SetArray.Tpo $(DEPDIR)/libgiza_a-SetArray.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='SetArray.cpp' object='libgiza_a-SetArray.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-SetArray.o `test -f 'SetArray.cpp' || echo '$(srcdir)/'`SetArray.cpp
-
-libgiza_a-SetArray.obj: SetArray.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-SetArray.obj -MD -MP -MF $(DEPDIR)/libgiza_a-SetArray.Tpo -c -o libgiza_a-SetArray.obj `if test -f 'SetArray.cpp'; then $(CYGPATH_W) 'SetArray.cpp'; else $(CYGPATH_W) '$(srcdir)/SetArray.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-SetArray.Tpo $(DEPDIR)/libgiza_a-SetArray.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='SetArray.cpp' object='libgiza_a-SetArray.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-SetArray.obj `if test -f 'SetArray.cpp'; then $(CYGPATH_W) 'SetArray.cpp'; else $(CYGPATH_W) '$(srcdir)/SetArray.cpp'; fi`
-
-libgiza_a-transpair_model3.o: transpair_model3.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-transpair_model3.o -MD -MP -MF $(DEPDIR)/libgiza_a-transpair_model3.Tpo -c -o libgiza_a-transpair_model3.o `test -f 'transpair_model3.cpp' || echo '$(srcdir)/'`transpair_model3.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-transpair_model3.Tpo $(DEPDIR)/libgiza_a-transpair_model3.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='transpair_model3.cpp' object='libgiza_a-transpair_model3.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-transpair_model3.o `test -f 'transpair_model3.cpp' || echo '$(srcdir)/'`transpair_model3.cpp
-
-libgiza_a-transpair_model3.obj: transpair_model3.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-transpair_model3.obj -MD -MP -MF $(DEPDIR)/libgiza_a-transpair_model3.Tpo -c -o libgiza_a-transpair_model3.obj `if test -f 'transpair_model3.cpp'; then $(CYGPATH_W) 'transpair_model3.cpp'; else $(CYGPATH_W) '$(srcdir)/transpair_model3.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-transpair_model3.Tpo $(DEPDIR)/libgiza_a-transpair_model3.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='transpair_model3.cpp' object='libgiza_a-transpair_model3.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-transpair_model3.obj `if test -f 'transpair_model3.cpp'; then $(CYGPATH_W) 'transpair_model3.cpp'; else $(CYGPATH_W) '$(srcdir)/transpair_model3.cpp'; fi`
-
-libgiza_a-transpair_model4.o: transpair_model4.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-transpair_model4.o -MD -MP -MF $(DEPDIR)/libgiza_a-transpair_model4.Tpo -c -o libgiza_a-transpair_model4.o `test -f 'transpair_model4.cpp' || echo '$(srcdir)/'`transpair_model4.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-transpair_model4.Tpo $(DEPDIR)/libgiza_a-transpair_model4.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='transpair_model4.cpp' object='libgiza_a-transpair_model4.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-transpair_model4.o `test -f 'transpair_model4.cpp' || echo '$(srcdir)/'`transpair_model4.cpp
-
-libgiza_a-transpair_model4.obj: transpair_model4.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-transpair_model4.obj -MD -MP -MF $(DEPDIR)/libgiza_a-transpair_model4.Tpo -c -o libgiza_a-transpair_model4.obj `if test -f 'transpair_model4.cpp'; then $(CYGPATH_W) 'transpair_model4.cpp'; else $(CYGPATH_W) '$(srcdir)/transpair_model4.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-transpair_model4.Tpo $(DEPDIR)/libgiza_a-transpair_model4.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='transpair_model4.cpp' object='libgiza_a-transpair_model4.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-transpair_model4.obj `if test -f 'transpair_model4.cpp'; then $(CYGPATH_W) 'transpair_model4.cpp'; else $(CYGPATH_W) '$(srcdir)/transpair_model4.cpp'; fi`
-
-libgiza_a-transpair_model5.o: transpair_model5.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-transpair_model5.o -MD -MP -MF $(DEPDIR)/libgiza_a-transpair_model5.Tpo -c -o libgiza_a-transpair_model5.o `test -f 'transpair_model5.cpp' || echo '$(srcdir)/'`transpair_model5.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-transpair_model5.Tpo $(DEPDIR)/libgiza_a-transpair_model5.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='transpair_model5.cpp' object='libgiza_a-transpair_model5.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-transpair_model5.o `test -f 'transpair_model5.cpp' || echo '$(srcdir)/'`transpair_model5.cpp
-
-libgiza_a-transpair_model5.obj: transpair_model5.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-transpair_model5.obj -MD -MP -MF $(DEPDIR)/libgiza_a-transpair_model5.Tpo -c -o libgiza_a-transpair_model5.obj `if test -f 'transpair_model5.cpp'; then $(CYGPATH_W) 'transpair_model5.cpp'; else $(CYGPATH_W) '$(srcdir)/transpair_model5.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-transpair_model5.Tpo $(DEPDIR)/libgiza_a-transpair_model5.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='transpair_model5.cpp' object='libgiza_a-transpair_model5.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-transpair_model5.obj `if test -f 'transpair_model5.cpp'; then $(CYGPATH_W) 'transpair_model5.cpp'; else $(CYGPATH_W) '$(srcdir)/transpair_model5.cpp'; fi`
-
-libgiza_a-TTables.o: TTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-TTables.o -MD -MP -MF $(DEPDIR)/libgiza_a-TTables.Tpo -c -o libgiza_a-TTables.o `test -f 'TTables.cpp' || echo '$(srcdir)/'`TTables.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-TTables.Tpo $(DEPDIR)/libgiza_a-TTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='TTables.cpp' object='libgiza_a-TTables.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-TTables.o `test -f 'TTables.cpp' || echo '$(srcdir)/'`TTables.cpp
-
-libgiza_a-TTables.obj: TTables.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-TTables.obj -MD -MP -MF $(DEPDIR)/libgiza_a-TTables.Tpo -c -o libgiza_a-TTables.obj `if test -f 'TTables.cpp'; then $(CYGPATH_W) 'TTables.cpp'; else $(CYGPATH_W) '$(srcdir)/TTables.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-TTables.Tpo $(DEPDIR)/libgiza_a-TTables.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='TTables.cpp' object='libgiza_a-TTables.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-TTables.obj `if test -f 'TTables.cpp'; then $(CYGPATH_W) 'TTables.cpp'; else $(CYGPATH_W) '$(srcdir)/TTables.cpp'; fi`
-
-libgiza_a-utility.o: utility.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-utility.o -MD -MP -MF $(DEPDIR)/libgiza_a-utility.Tpo -c -o libgiza_a-utility.o `test -f 'utility.cpp' || echo '$(srcdir)/'`utility.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-utility.Tpo $(DEPDIR)/libgiza_a-utility.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='utility.cpp' object='libgiza_a-utility.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-utility.o `test -f 'utility.cpp' || echo '$(srcdir)/'`utility.cpp
-
-libgiza_a-utility.obj: utility.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-utility.obj -MD -MP -MF $(DEPDIR)/libgiza_a-utility.Tpo -c -o libgiza_a-utility.obj `if test -f 'utility.cpp'; then $(CYGPATH_W) 'utility.cpp'; else $(CYGPATH_W) '$(srcdir)/utility.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-utility.Tpo $(DEPDIR)/libgiza_a-utility.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='utility.cpp' object='libgiza_a-utility.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-utility.obj `if test -f 'utility.cpp'; then $(CYGPATH_W) 'utility.cpp'; else $(CYGPATH_W) '$(srcdir)/utility.cpp'; fi`
-
-libgiza_a-vocab.o: vocab.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-vocab.o -MD -MP -MF $(DEPDIR)/libgiza_a-vocab.Tpo -c -o libgiza_a-vocab.o `test -f 'vocab.cpp' || echo '$(srcdir)/'`vocab.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-vocab.Tpo $(DEPDIR)/libgiza_a-vocab.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='vocab.cpp' object='libgiza_a-vocab.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-vocab.o `test -f 'vocab.cpp' || echo '$(srcdir)/'`vocab.cpp
-
-libgiza_a-vocab.obj: vocab.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -MT libgiza_a-vocab.obj -MD -MP -MF $(DEPDIR)/libgiza_a-vocab.Tpo -c -o libgiza_a-vocab.obj `if test -f 'vocab.cpp'; then $(CYGPATH_W) 'vocab.cpp'; else $(CYGPATH_W) '$(srcdir)/vocab.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/libgiza_a-vocab.Tpo $(DEPDIR)/libgiza_a-vocab.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='vocab.cpp' object='libgiza_a-vocab.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgiza_a_CPPFLAGS) $(CPPFLAGS) $(libgiza_a_CXXFLAGS) $(CXXFLAGS) -c -o libgiza_a-vocab.obj `if test -f 'vocab.cpp'; then $(CYGPATH_W) 'vocab.cpp'; else $(CYGPATH_W) '$(srcdir)/vocab.cpp'; fi`
-
-d4norm-d4norm.o: d4norm.cxx
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(d4norm_CPPFLAGS) $(CPPFLAGS) $(d4norm_CXXFLAGS) $(CXXFLAGS) -MT d4norm-d4norm.o -MD -MP -MF $(DEPDIR)/d4norm-d4norm.Tpo -c -o d4norm-d4norm.o `test -f 'd4norm.cxx' || echo '$(srcdir)/'`d4norm.cxx
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/d4norm-d4norm.Tpo $(DEPDIR)/d4norm-d4norm.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='d4norm.cxx' object='d4norm-d4norm.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(d4norm_CPPFLAGS) $(CPPFLAGS) $(d4norm_CXXFLAGS) $(CXXFLAGS) -c -o d4norm-d4norm.o `test -f 'd4norm.cxx' || echo '$(srcdir)/'`d4norm.cxx
-
-d4norm-d4norm.obj: d4norm.cxx
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(d4norm_CPPFLAGS) $(CPPFLAGS) $(d4norm_CXXFLAGS) $(CXXFLAGS) -MT d4norm-d4norm.obj -MD -MP -MF $(DEPDIR)/d4norm-d4norm.Tpo -c -o d4norm-d4norm.obj `if test -f 'd4norm.cxx'; then $(CYGPATH_W) 'd4norm.cxx'; else $(CYGPATH_W) '$(srcdir)/d4norm.cxx'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/d4norm-d4norm.Tpo $(DEPDIR)/d4norm-d4norm.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='d4norm.cxx' object='d4norm-d4norm.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(d4norm_CPPFLAGS) $(CPPFLAGS) $(d4norm_CXXFLAGS) $(CXXFLAGS) -c -o d4norm-d4norm.obj `if test -f 'd4norm.cxx'; then $(CYGPATH_W) 'd4norm.cxx'; else $(CYGPATH_W) '$(srcdir)/d4norm.cxx'; fi`
-
-hmmnorm-hmmnorm.o: hmmnorm.cxx
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hmmnorm_CPPFLAGS) $(CPPFLAGS) $(hmmnorm_CXXFLAGS) $(CXXFLAGS) -MT hmmnorm-hmmnorm.o -MD -MP -MF $(DEPDIR)/hmmnorm-hmmnorm.Tpo -c -o hmmnorm-hmmnorm.o `test -f 'hmmnorm.cxx' || echo '$(srcdir)/'`hmmnorm.cxx
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/hmmnorm-hmmnorm.Tpo $(DEPDIR)/hmmnorm-hmmnorm.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='hmmnorm.cxx' object='hmmnorm-hmmnorm.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hmmnorm_CPPFLAGS) $(CPPFLAGS) $(hmmnorm_CXXFLAGS) $(CXXFLAGS) -c -o hmmnorm-hmmnorm.o `test -f 'hmmnorm.cxx' || echo '$(srcdir)/'`hmmnorm.cxx
-
-hmmnorm-hmmnorm.obj: hmmnorm.cxx
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hmmnorm_CPPFLAGS) $(CPPFLAGS) $(hmmnorm_CXXFLAGS) $(CXXFLAGS) -MT hmmnorm-hmmnorm.obj -MD -MP -MF $(DEPDIR)/hmmnorm-hmmnorm.Tpo -c -o hmmnorm-hmmnorm.obj `if test -f 'hmmnorm.cxx'; then $(CYGPATH_W) 'hmmnorm.cxx'; else $(CYGPATH_W) '$(srcdir)/hmmnorm.cxx'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/hmmnorm-hmmnorm.Tpo $(DEPDIR)/hmmnorm-hmmnorm.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='hmmnorm.cxx' object='hmmnorm-hmmnorm.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hmmnorm_CPPFLAGS) $(CPPFLAGS) $(hmmnorm_CXXFLAGS) $(CXXFLAGS) -c -o hmmnorm-hmmnorm.obj `if test -f 'hmmnorm.cxx'; then $(CYGPATH_W) 'hmmnorm.cxx'; else $(CYGPATH_W) '$(srcdir)/hmmnorm.cxx'; fi`
-
-mgiza-main.o: main.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mgiza_CPPFLAGS) $(CPPFLAGS) $(mgiza_CXXFLAGS) $(CXXFLAGS) -MT mgiza-main.o -MD -MP -MF $(DEPDIR)/mgiza-main.Tpo -c -o mgiza-main.o `test -f 'main.cpp' || echo '$(srcdir)/'`main.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mgiza-main.Tpo $(DEPDIR)/mgiza-main.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='main.cpp' object='mgiza-main.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mgiza_CPPFLAGS) $(CPPFLAGS) $(mgiza_CXXFLAGS) $(CXXFLAGS) -c -o mgiza-main.o `test -f 'main.cpp' || echo '$(srcdir)/'`main.cpp
-
-mgiza-main.obj: main.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mgiza_CPPFLAGS) $(CPPFLAGS) $(mgiza_CXXFLAGS) $(CXXFLAGS) -MT mgiza-main.obj -MD -MP -MF $(DEPDIR)/mgiza-main.Tpo -c -o mgiza-main.obj `if test -f 'main.cpp'; then $(CYGPATH_W) 'main.cpp'; else $(CYGPATH_W) '$(srcdir)/main.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mgiza-main.Tpo $(DEPDIR)/mgiza-main.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='main.cpp' object='mgiza-main.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(mgiza_CPPFLAGS) $(CPPFLAGS) $(mgiza_CXXFLAGS) $(CXXFLAGS) -c -o mgiza-main.obj `if test -f 'main.cpp'; then $(CYGPATH_W) 'main.cpp'; else $(CYGPATH_W) '$(srcdir)/main.cpp'; fi`
-
-snt2cooc-snt2cooc.o: snt2cooc.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(snt2cooc_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT snt2cooc-snt2cooc.o -MD -MP -MF $(DEPDIR)/snt2cooc-snt2cooc.Tpo -c -o snt2cooc-snt2cooc.o `test -f 'snt2cooc.cpp' || echo '$(srcdir)/'`snt2cooc.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/snt2cooc-snt2cooc.Tpo $(DEPDIR)/snt2cooc-snt2cooc.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='snt2cooc.cpp' object='snt2cooc-snt2cooc.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(snt2cooc_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o snt2cooc-snt2cooc.o `test -f 'snt2cooc.cpp' || echo '$(srcdir)/'`snt2cooc.cpp
-
-snt2cooc-snt2cooc.obj: snt2cooc.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(snt2cooc_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT snt2cooc-snt2cooc.obj -MD -MP -MF $(DEPDIR)/snt2cooc-snt2cooc.Tpo -c -o snt2cooc-snt2cooc.obj `if test -f 'snt2cooc.cpp'; then $(CYGPATH_W) 'snt2cooc.cpp'; else $(CYGPATH_W) '$(srcdir)/snt2cooc.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/snt2cooc-snt2cooc.Tpo $(DEPDIR)/snt2cooc-snt2cooc.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='snt2cooc.cpp' object='snt2cooc-snt2cooc.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(snt2cooc_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o snt2cooc-snt2cooc.obj `if test -f 'snt2cooc.cpp'; then $(CYGPATH_W) 'snt2cooc.cpp'; else $(CYGPATH_W) '$(srcdir)/snt2cooc.cpp'; fi`
-
-.cxx.o:
-@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
-
-.cxx.obj:
-@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$tags $$unique; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$tags$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$tags $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && cd $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) $$here
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
- fi; \
- cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
- else \
- test -f $(distdir)/$$file \
- || cp -p $$d/$$file $(distdir)/$$file \
- || exit 1; \
- fi; \
- done
- list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- distdir=`$(am__cd) $(distdir) && pwd`; \
- top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
- (cd $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$top_distdir" \
- distdir="$$distdir/$$subdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-recursive
-all-am: Makefile $(LIBRARIES) $(PROGRAMS)
-installdirs: installdirs-recursive
-installdirs-am:
- for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-binPROGRAMS clean-generic clean-libLIBRARIES \
- mostlyclean-am
-
-distclean: distclean-recursive
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-exec-am: install-binPROGRAMS install-libLIBRARIES
-
-install-html: install-html-recursive
-
-install-info: install-info-recursive
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-ps: install-ps-recursive
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-binPROGRAMS uninstall-libLIBRARIES
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
- install-strip
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-am clean clean-binPROGRAMS \
- clean-generic clean-libLIBRARIES ctags ctags-recursive \
- distclean distclean-compile distclean-generic distclean-tags \
- distdir dvi dvi-am html html-am info info-am install \
- install-am install-binPROGRAMS install-data install-data-am \
- install-dvi install-dvi-am install-exec install-exec-am \
- install-html install-html-am install-info install-info-am \
- install-libLIBRARIES install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic pdf pdf-am ps ps-am tags tags-recursive \
- uninstall uninstall-am uninstall-binPROGRAMS \
- uninstall-libLIBRARIES
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/scripts/training/MGIZA/src/MoveSwapMatrix.cpp b/scripts/training/MGIZA/src/MoveSwapMatrix.cpp
deleted file mode 100644
index 2b0c3a3..0000000
--- a/scripts/training/MGIZA/src/MoveSwapMatrix.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "MoveSwapMatrix.h"
-
-template<class TRANSPAIR>
-MoveSwapMatrix<TRANSPAIR>::MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a)
- : alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
- delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
- modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
-{
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- if( lazyEvaluation==0)
- for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
-}
-
-template<class TRANSPAIR>
-void MoveSwapMatrix<TRANSPAIR>::updateJ(WordIndex j, bool useChanged,double thisValue)
-{
- massert( lazyEvaluation==0 );
- for(WordIndex i=0;i<=l;i++)
- if( (useChanged==0||changed[i]!=changedCounter) )
- if( get_al(j)!=i )
- _cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
- else
- _cmove(i, j)=1.0;
- for(WordIndex j2=j+1;j2<=m;j2++)
- if( get_al(j)!=get_al(j2) )
- _cswap(j, j2)=ef.scoreOfSwap((*this), j, j2,thisValue);
- else
- _cswap(j, j2)=1.0;
- for(WordIndex j2=1;j2<j;j2++)
- if( get_al(j)!=get_al(j2) )
- _cswap(j2, j)=ef.scoreOfSwap((*this), j2, j,thisValue);
- else
- _cswap(j2, j)=1.0;
-}
-template<class TRANSPAIR>
-void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
-{
- massert( lazyEvaluation==0);
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)!=i )
- _cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
- else
- _cmove(i, j)=1.0;
-}
-
-template<class TRANSPAIR>
-void MoveSwapMatrix<TRANSPAIR>::printWrongs()const{
- for(WordIndex i=0;i<=l;i++)
- {
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)==i)
- cout << "A";
- else
- {
- LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
- if( fabs(1.0-real/wanted)>1e-3 )
- cout << 'b';
- else if(fabs(1.0-real/wanted)>1e-10 )
- cout << 'e';
- else if(real!=wanted)
- cout << 'E';
- else
- cout << ' ';
- }
- cout << endl;
- }
- cout << endl;
- for(WordIndex j=1;j<=m;j++)
- {
- for(WordIndex j1=1;j1<=m;j1++)
- if( j1>j )
- {
- if( get_al(j)==get_al(j1) )
- cout << 'A';
- else
- cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
- }
- else
- cout << ' ';
- cout << endl;
- }
- massert(0);
-}
-template<class TRANSPAIR>
-bool MoveSwapMatrix<TRANSPAIR>::isRight()const{
- if( lazyEvaluation )
- return 1;
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) )
- {
- cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
- return 0;
- }
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex j1=1;j1<=m;j1++)
- if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) )
- {
- cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
- return 0;
- }
- return 1;
-}
-
-template<class TRANSPAIR>
-void MoveSwapMatrix<TRANSPAIR>::doMove(WordIndex _i, WordIndex _j)
-{
- WordIndex old_i=get_al(_j);
- if( lazyEvaluation )
- set(_j,_i);
- else
- {
- if ( modelnr==5||modelnr==6 )
- {
- set(_j, _i);
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
- }
- else if ( modelnr==4 )
- {
- changedCounter++;
- for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
- for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
- set(_j, _i);
- for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
- for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(unsigned int i=0;i<=l;i++)
- if(changed[i]==changedCounter)
- updateI(i,thisValue);
- for(unsigned int j=1;j<=m;j++)
- if( changed[get_al(j)]==changedCounter )
- updateJ(j, 1,thisValue);
- }
- else
- {
- assert(modelnr==3);
- set(_j, _i);
- changedCounter++;
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- updateI(old_i,thisValue);
- changed[old_i]=changedCounter;
- updateI(_i,thisValue);
- changed[_i]=changedCounter;
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)==_i || get_al(j)==old_i )
- updateJ(j, 1,thisValue);
- }
- }
-}
-template<class TRANSPAIR>
-void MoveSwapMatrix<TRANSPAIR>::doSwap(WordIndex _j1, WordIndex _j2)
-{
- assert( cswap(_j1, _j2)>1 );
- WordIndex i1=get_al(_j1), i2=get_al(_j2);
- if( lazyEvaluation==1 )
- {
- set(_j1, i2);
- set(_j2, i1);
- }
- else
- {
- if ( modelnr==5||modelnr==6 )
- {
- set(_j1, i2);
- set(_j2, i1);
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
- }
- else if( modelnr==4 )
- {
- changedCounter++;
- for(unsigned int k=prev_cept(i1);k<=next_cept(i1);++k)changed[k]=changedCounter;
- for(unsigned int k=prev_cept(i2);k<=next_cept(i2);++k)changed[k]=changedCounter;
- set(_j1, i2);
- set(_j2, i1);
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(unsigned int i=0;i<=l;i++)
- if(changed[i]==changedCounter)
- updateI(i,thisValue);
- for(unsigned int j=1;j<=m;j++)
- if( changed[get_al(j)]==changedCounter )
- updateJ(j, 1,thisValue);
- }
- else
- {
- assert(modelnr==3);
- set(_j1, i2);
- set(_j2, i1);
- changedCounter++;
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- updateI(i1,thisValue);
- changed[i1]=changedCounter;
- updateI(i2,thisValue);
- changed[i2]=changedCounter;
- updateJ(_j1, 1,thisValue);
- updateJ(_j2, 1,thisValue);
- }
- }
-}
-
-#include "transpair_model3.h"
-#include "transpair_model4.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-template class MoveSwapMatrix<transpair_model3>;
-template class MoveSwapMatrix<transpair_model4>;
-template class MoveSwapMatrix<transpair_model5>;
-template class MoveSwapMatrix<transpair_modelhmm>;
diff --git a/scripts/training/MGIZA/src/MoveSwapMatrix.h b/scripts/training/MGIZA/src/MoveSwapMatrix.h
deleted file mode 100644
index b1bbf15..0000000
--- a/scripts/training/MGIZA/src/MoveSwapMatrix.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-MoveSwapMatrix: Efficient representation for moving and swapping
-around in IBM3 training.
-Franz Josef Och (30/07/99)
---*/
-#ifndef moveswap2_costs_h_defined
-#define moveswap2_costs_h_defined
-#include "alignment.h"
-#include "transpair_model3.h"
-#include "myassert.h"
-
-extern short DoViterbiTraining;
-
-template<class TRANSPAIR>
-class MoveSwapMatrix : public alignment
-{
- private:
- const TRANSPAIR&ef;
- const WordIndex l, m;
- Array2<LogProb, Vector<LogProb> > _cmove, _cswap;
- Array2<char,Vector<char> > delmove,delswap;
- Vector<int> changed;
- int changedCounter;
- const int modelnr;
- bool lazyEvaluation;
- bool centerDeleted;
- public:
- bool check()const
- {
- return 1;
- }
- const TRANSPAIR&get_ef()const
- {return ef;}
- bool isCenterDeleted()const
- {return centerDeleted;}
- bool isLazy()const
- {return lazyEvaluation;}
- MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a);
- void updateJ(WordIndex j, bool,double thisValue);
- void updateI(WordIndex i,double thisValue);
- void doMove(WordIndex _i, WordIndex _j);
- void doSwap(WordIndex _j1, WordIndex _j2);
- void delCenter()
- {
- centerDeleted=1;
- }
- void delMove(WordIndex x, WordIndex y)
- {
- delmove(x,y)=1;
- }
- void delSwap(WordIndex x, WordIndex y)
- {
- massert(y>x);
- delswap(x,y)=1;
- delswap(y,x)=1;
- }
- bool isDelMove(WordIndex x, WordIndex y)const
- {
- return DoViterbiTraining||delmove(x,y);
- }
- bool isDelSwap(WordIndex x, WordIndex y)const
- {
- massert(y>x);
- return DoViterbiTraining||delswap(x,y);
- }
- LogProb cmove(WordIndex x, WordIndex y)const
- {
- massert( get_al(y)!=x );
- massert( delmove(x,y)==0 );
- if( lazyEvaluation )
- return ef.scoreOfMove(*this,x,y);
- else
- {
- return _cmove(x, y);
- }
- }
- LogProb cswap(WordIndex x, WordIndex y)const
- {
- massert(x<y);
- massert(delswap(x,y)==0);
- massert(get_al(x)!=get_al(y));
- if( lazyEvaluation )
- return ef.scoreOfSwap(*this,x,y);
- else
- {
- massert(y>x);
- return _cswap(x, y);
- }
- }
- void printWrongs()const;
- bool isRight()const;
- friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m)
- {return out << (alignment)m << "\nEF:\n"<< m.ef << "\nCMOVE\n"<<m._cmove << "\nCSWAP\n" << m._cswap << endl;};
-};
-#endif
diff --git a/scripts/training/MGIZA/src/NTables.cpp b/scripts/training/MGIZA/src/NTables.cpp
deleted file mode 100644
index e5676d5..0000000
--- a/scripts/training/MGIZA/src/NTables.cpp
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "NTables.h"
-#include <iostream>
-#include "defs.h"
-#include <fstream>
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(double,NTablesFactorGraphemes,"nSmooth","smoothing for fertility parameters (good value: 64): weight for wordlength-dependent fertility parameters",PARLEV_SMOOTH,64.0);
-GLOBAL_PARAMETER(double,NTablesFactorGeneral,"nSmoothGeneral","smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters",PARLEV_SMOOTH,0.0);
-
-template <class VALTYPE>
-void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist,
- bool actual) const
- // prints the fertility table but with actual sourcce words (not their id)
-{
- cerr << "Dumping nTable to: " << filename << '\n';
- ofstream of(filename);
- VALTYPE p ;
- WordIndex k, i ;
- for(i=1; int(i) < noEW; i++){
- if (evlist[i].freq > 0){
- if (actual)
- of << evlist[i].word << ' ' ;
- else
- of << i << ' ' ;
- for( k=0; k < MAX_FERTILITY; k++){
- p = getValue(i, k);
- if (p <= PROB_SMOOTH)
- p = 0;
- of << p << ' ';
- }
- of << '\n';
- }
- }
-}
-
-template <class VALTYPE>
-void nmodel<VALTYPE>::printRealNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist,
- bool actual) const
- // prints the fertility table but with actual sourcce words (not their id)
-{
- cerr << "Dumping nTable to: " << filename << '\n';
- ofstream of(filename);
- VALTYPE p ;
- WordIndex k, i ;
- for(i=1; int(i) < noEW; i++){
- if (evlist[i].freq > 0){
- if (actual)
- of << evlist[i].word << ' ' ;
- else
- of << i << ' ' ;
- for( k=0; k < MAX_FERTILITY; k++){
- p = getValue(i, k);
-// if (p <= PROB_SMOOTH)
-// p = 0;
- of << p << ' ';
- }
- of << '\n';
- }
- }
-}
-
-template <class VALTYPE>
-bool nmodel<VALTYPE>::readNTable(const char *filename){
- /* This function reads the n table from a file.
- Each line is of the format: source_word_id p0 p1 p2 ... pn
- This is the inverse operation of the printTable function.
- NAS, 7/11/99
- */
- ifstream inf(filename);
- if(!inf.is_open()){
- return false;
- }
- cerr << "Reading fertility table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename <<"\n";
- return false;
- }
-
- VALTYPE prob;
- WordIndex tok, i;
- int nFert=0;
- while(!inf.eof()){
- nFert++;
- inf >> ws >> tok;
- if (tok > MAX_VOCAB_SIZE){
- cerr << "NTables:readNTable(): unrecognized token id: " << tok
- <<'\n';
- exit(-1);
- }
- for(i = 0; i < MAX_FERTILITY; i++){
- inf >> ws >> prob;
- getRef(tok, i)=prob;
- }
- }
- cerr << "Read " << nFert << " entries in fertility table.\n";
- inf.close();
- return true;
-}
-
-template <class VALTYPE>
-bool nmodel<VALTYPE>::merge(nmodel<VALTYPE>& n,int noEW, const Vector<WordEntry>& evlist){
- /* This function reads the n table from a file.
- Each line is of the format: source_word_id p0 p1 p2 ... pn
- This is the inverse operation of the printTable function.
- NAS, 7/11/99
- */
-
-
- VALTYPE p ;
- WordIndex k, i ;
- for(i=1; int(i) < noEW; i++){
- if (evlist[i].freq > 0){
- for( k=0; k < MAX_FERTILITY; k++){
- p = n.getValue(i, k);
- getRef(i,k)+=p;
- }
- }
- }
- return true;
-}
-
-template <class VALTYPE>
-bool nmodel<VALTYPE>::readAugNTable(const char *filename){
- /* This function reads the n table from a file.
- Each line is of the format: source_word_id p0 p1 p2 ... pn
- This is the inverse operation of the printTable function.
- NAS, 7/11/99
- */
- ifstream inf(filename);
- if(!inf.is_open()){
- return false;
- }
- cerr << "Reading fertility table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename <<"\n";
- return false;
- }
-
- VALTYPE prob;
- WordIndex tok, i;
- int nFert=0;
- while(!inf.eof()){
- nFert++;
- inf >> ws >> tok;
- if (tok > MAX_VOCAB_SIZE){
- cerr << "NTables:readNTable(): unrecognized token id: " << tok
- <<'\n';
- exit(-1);
- }
- for(i = 0; i < MAX_FERTILITY; i++){
- inf >> ws >> prob;
- getRef(tok, i)+=prob;
- }
- }
- cerr << "Read " << nFert << " entries in fertility table.\n";
- inf.close();
- return true;
-}
-
-template class nmodel<COUNT>;
-//template class nmodel<PROB>;
diff --git a/scripts/training/MGIZA/src/NTables.h b/scripts/training/MGIZA/src/NTables.h
deleted file mode 100644
index 90d1b3d..0000000
--- a/scripts/training/MGIZA/src/NTables.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
-
- EGYPT Toolkit for Statistical Machine Translation
- Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#ifndef _ntables_h
-#define _ntables_h 1
-#include "Array2.h"
-#include "Vector.h"
-#include <assert.h>
-#include "defs.h"
-#include "vocab.h"
-#include "myassert.h"
-#include "Globals.h"
-#include "syncObj.h"
-
-extern double NTablesFactorGraphemes, NTablesFactorGeneral;
-
-template<class VALTYPE> class nmodel {
-private:
- Array2<VALTYPE, Vector<VALTYPE> > ntab;
-public:
- nmodel(int maxw, int maxn) :
- ntab(maxw, maxn, 0.0) {
- }
- VALTYPE getValue(int w, unsigned int n) const {
- massert(w!=0);
- if (n>=ntab.getLen2())
- return 0.0;
- else
- return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
- }
-protected:
- inline VALTYPE&getRef(int w, int n) {
- //massert(w!=0);
- return ntab(w, n);
- };
- Mutex lock;
-public:
- inline void addValue(int w , int n,const VALTYPE& t){lock.lock();ntab(w,n)+=t;lock.unlock();};
-public:
- template<class COUNT> void normalize(nmodel<COUNT>&write,
- const Vector<WordEntry>* _evlist) const {
- int h1=ntab.getLen1(), h2=ntab.getLen2();
- int nParams=0;
- if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
- size_t maxlen=0;
- const Vector<WordEntry>&evlist=*_evlist;
- for (unsigned int i=1; i<evlist.size(); i++)
- maxlen=max(maxlen, evlist[i].word.length());
- Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
- Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
- for (unsigned int i=1; i<min((unsigned int)h1,
- (unsigned int)evlist.size()); i++) {
- int l=evlist[i].word.length();
- for (int k=0; k<h2; k++) {
- counts(l, k)+=getValue(i, k);
- nprob_general[k]+=getValue(i, k);
- }
- }
- COUNT sum2=0;
- for (unsigned int i=1; i<maxlen+1; i++) {
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=counts(i, k);
- sum2+=sum;
- if (sum) {
- double average=0.0;
- //cerr << "l: " << i << " " << sum << " ";
- for (int k=0; k<h2; k++) {
- counts(i, k)/=sum;
- //cerr << counts(i,k) << ' ';
- average+=k*counts(i, k);
- }
- //cerr << "avg: " << average << endl;
- //cerr << '\n';
- }
- }
- for (unsigned int k=0; k<nprob_general.size(); k++)
- nprob_general[k]/=sum2;
-
- for (int i=1; i<h1; i++) {
- int l=-1;
- if ((unsigned int)i<evlist.size())
- l=evlist[i].word.length();
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
- *NTablesFactorGraphemes)) + NTablesFactorGeneral
- *nprob_general[k];
- assert(sum);
- for (int k=0; k<h2; k++) {
- write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
- : (counts(l, k)*NTablesFactorGraphemes)))/sum
- + NTablesFactorGeneral*nprob_general[k];
- nParams++;
- }
- }
- } else
- for (int i=1; i<h1; i++) {
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=getValue(i, k);
- assert(sum);
- for (int k=0; k<h2; k++) {
- write.getRef(i, k)=getValue(i, k)/sum;
- nParams++;
- }
- }
- cerr << "NTable contains " << nParams << " parameter.\n";
- }
-
- bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
- void clear() {
- int h1=ntab.getLen1(), h2=ntab.getLen2();
- for (int i=0; i<h1; i++)
- for (int k=0; k<h2; k++)
- ntab(i, k)=0;
- }
- void printNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist, bool) const;
- void printRealNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist, bool) const;
- bool readAugNTable(const char *filename);
- bool readNTable(const char *filename);
-
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/Parameter.cpp b/scripts/training/MGIZA/src/Parameter.cpp
deleted file mode 100644
index 1175ec7..0000000
--- a/scripts/training/MGIZA/src/Parameter.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "Parameter.h"
-#include "fstream"
-#ifndef WIN32
-#include "unistd.h"
-#else
-#include <direct.h>
-#define getcwd _getcwd
-#endif
-#include <strstream>
-
-
-bool absolutePathNames=0;
-string ParameterPathPrefix;
-bool ParameterChangedFlag=0;
-
-bool writeParameters(ofstream&of,const ParSet&parset,int level)
-{
- if(!of)return 0;
- for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
- {
- if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
- {
- ostrstream os;
- (*i)->printValue(os);
- os << ends;
- string s(os.str());
- of << (*i)->getString() << " ";
- if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
- {
- char path[1024];
- getcwd(path,1024);
- of << path << '/';
- }
- if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
- of << ParameterPathPrefix << '/';
- (*i)->printValue(of);
- of << endl;
- }
- }
- return 1;
-}
-
-bool readParameters(ifstream&f,const ParSet&parset,int verb,int level)
-{
- string s;
- if(!f)return 0;
- while(getline(f,s))
- {
- istrstream eingabe(s.c_str());
- string s1,s2;
- eingabe>>s1>>s2;
- if(makeSetCommand(s1,s2,parset,verb,level)==0)
- cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
- }
- return 1;
-}
-
-
-bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
-{
- ParPtr anf;
- int anfset=0;
- string s1=simpleString(_s1);
- for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
- {
- if( *(*i)==s1 )
- {
- if( level==-1 || level==(*i)->getLevel() )
- (*i)->setParameter(s2,verb);
- else if(verb>1)
- cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
- return 1;
- }
- else if( (*i)->getString().substr(0,s1.length())==s1 )
- {
- anf=(*i);anfset++;
- }
- }
- if(anfset==1)
- {
- if( level==-1 || level==anf->getLevel() )
- anf->setParameter(s2,verb);
- else if( verb>1 )
- cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
- return 1;
- }
- if( anfset>1 )
- cerr << "ERROR: ambiguous parameter '" << s1 << "'.\n";
- if( anfset==0 )
- cerr << "ERROR: parameter '" << s1 << "' does not exist.\n";
- return 0;
-}
-
-ostream& printPars(ostream&of,const ParSet&parset,int level)
-{
- if(!of)return of;
- for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
- {
- if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
- {
- (*i)->printAt(of);
- of << endl;
- }
- }
- return of;
-}
-
-string simpleString(const string s)
-{
- string k;
- for(unsigned int i=0;i<s.length();++i)
- {
- char c[2];
- c[0]=tolower(s[i]);
- c[1]=0;
- if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
- k += c;
- }
- return k;
-}
-
-
-ParSet&getGlobalParSet()
-{
- static ParSet x;
- return x;
-}
diff --git a/scripts/training/MGIZA/src/Parameter.h b/scripts/training/MGIZA/src/Parameter.h
deleted file mode 100644
index 312bfbc..0000000
--- a/scripts/training/MGIZA/src/Parameter.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef PARAMETER_H_DEFINED
-#define PARAMETER_H_DEFINED
-
-#include "mystl.h"
-#include <set>
-#include "Pointer.h"
-#include <string>
-#include "Globals.h"
-#include <fstream>
-#include <string.h>
-
-#ifdef WIN32
-#define strcasecmp _strcmpi
-#endif
-
-inline unsigned int mConvert(const string&s,unsigned int &i)
-{
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1; }
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
- return i=atoi(s.c_str());
-}
-inline int mConvert(const string&s,int &i){
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
- return i=atoi(s.c_str());
-}
-inline double mConvert(const string&s,double &d) { return d=atof(s.c_str()); }
-inline double mConvert(const string&s,float &d) { return d=atof(s.c_str()); }
-inline string mConvert(const string&s,string&n) { return n=s; }
-inline bool mConvert(const string&s,bool&n) {
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
- return n=atoi(s.c_str());
-}
-inline short mConvert(const string&s,short&n) {
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
- return n=atoi(s.c_str());
-}
-inline unsigned short mConvert(const string&s,unsigned short&n) {
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
- return n=atoi(s.c_str());
-}
-
-string simpleString(const string s);
-
-inline int Hashstring(const string& s)
-{
- int sum=0;
- string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
- return sum;
-}
-
-class _Parameter
-{
- protected:
- string name;
- bool *ifChanged;
- string description;
- int level;
- bool filename;
- public:
- int onlyCopy;
- _Parameter(string n,bool&b,string desc,int _level,bool _onlyCopy)
- : name(simpleString(n)),ifChanged(&b),description(desc),level(_level),filename(0),onlyCopy(_onlyCopy) {}
- virtual ~_Parameter(){};
- bool operator==(const string&s)const
- { return name== simpleString(s); }
- void setChanged()
- { *ifChanged=true; }
- virtual bool setParameter(string s2,int)=0;
- virtual ostream&printAt(ostream&out)=0;
- virtual ostream&printValue(ostream&out)=0;
- const string&getString() const { return name; }
- int getLevel() const { return level;}
- bool isFilename() { return filename;}
- void setFilename(bool x=1) { filename=x;}
- friend bool operator==(const _Parameter&a,const _Parameter&b)
- { return a.name==b.name; }
- friend bool operator<(const _Parameter&a,const _Parameter&b)
- { return a.name<b.name; }
- friend int Hash(const _Parameter&aaa)
- { return Hashstring(aaa.name); }
- friend ostream&operator<<(ostream&out,const _Parameter&p)
- { return out<<"Parameter: "<<p.name <<endl;}
-};
-
-template<class T>
-class Parameter : public _Parameter
-{
- private:
- T*t;
- public:
- Parameter(string n,bool&b,string desc,T&_t,int level=0,bool onlyCopy=0)
- : _Parameter(n,b,desc,level,onlyCopy),t(&_t) {}
- virtual ~Parameter(){}
- virtual bool setParameter(string s2,int verb)
- {
- T x;
- if( !(*t==mConvert(s2,x)))
- {
- bool printedFirst=0;
- if( verb>1 )
- {
- cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
- printedFirst=1;
- }
- mConvert(s2,*t);
- if( printedFirst )
- cout << *t <<"'\n";
- setChanged();
- return 1;
- }
- return 0;
- }
- virtual ostream&printAt(ostream&out)
- {return out << name << " = " << *t << " (" << description << ")";}
- virtual ostream&printValue(ostream&out)
- {return out << *t;}
-};
-
-typedef MP<_Parameter> ParPtr;
-
-class ParSet : public set<ParPtr>
-{
- public:
- void insert(const ParPtr&x)
- {
- if( count(x)!=0 )
- cerr << "ERROR: element " << x->getString() << " already inserted.\n";
- set<ParPtr>::insert(x);
- }
-};
-
-bool makeSetCommand(string s1,string s2,const ParSet&pars,int verb=1,int level= -1);
-ostream&printPars(ostream&out,const ParSet&pars,int level=-1);
-bool writeParameters(ofstream&of,const ParSet&parset,int level=0);
-bool readParameters(ifstream&f,const ParSet&parset,int verb=2,int level=0);
-ParSet&getGlobalParSet();
-extern bool ParameterChangedFlag;
-template<class T>const T&addGlobalParameter(const char *name,const char *description,int level,T*adr,const T&init)
-{
- *adr=init;
- getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
- return init;
-}
-template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *description,int level,T*adr,const T&init)
-{
- *adr=init;
- getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
- getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
- return init;
-}
-template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *description,int level,T*adr,const T&init)
-{
- *adr=init;
- getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
- getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
- getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
- return init;
-}
-template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *name4,const char *description,int level,T*adr,const T&init)
-{
- *adr=init;
- getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
- getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
- getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
- getGlobalParSet().insert(new Parameter<T>(name4,ParameterChangedFlag,description,*adr,-1));
- return init;
-}
-void MakeParameterOptimizing(istream&file,string resultingParameters);
-
-#define GLOBAL_PARAMETER(TYP,VARNAME,NAME,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,DESCRIPTION,LEVEL,&VARNAME,INIT);
-#define GLOBAL_PARAMETER2(TYP,VARNAME,NAME,NAME2,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,DESCRIPTION,LEVEL,&VARNAME,INIT);
-#define GLOBAL_PARAMETER3(TYP,VARNAME,NAME,NAME2,NAME3,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,DESCRIPTION,LEVEL,&VARNAME,INIT);
-#define GLOBAL_PARAMETER4(TYP,VARNAME,NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,&VARNAME,INIT);
-
-void setParameterLevelName(unsigned int i,string x);
-
-#endif
diff --git a/scripts/training/MGIZA/src/Perplexity.cpp b/scripts/training/MGIZA/src/Perplexity.cpp
deleted file mode 100644
index faa1f81..0000000
--- a/scripts/training/MGIZA/src/Perplexity.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* Perplexity.cc
- * =============
- * Mike Jahr, 7/21/99
- * Machine Translation group, WS99
- * Center for Language and Speech Processing
- *
- * Last Modified by: Yaser Al-Onaizan, August 17, 1999
- *
- * Simple class used to calculate cross entropy and perplexity
- * of models.
- */
-
-#include "Perplexity.h"
-
-void Perplexity::record(string model){
- mutex.lock();
- modelid.push_back(model);
- perp.push_back(perplexity());
- ce.push_back(cross_entropy());
- mutex.unlock();
-}
diff --git a/scripts/training/MGIZA/src/Perplexity.h b/scripts/training/MGIZA/src/Perplexity.h
deleted file mode 100644
index 45cc84f..0000000
--- a/scripts/training/MGIZA/src/Perplexity.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* Perplexity.h
- * ============
- * Mike Jahr, 7/15/99
- * Machine Translation group, WS99
- * Center for Language and Speech Processing
- *
- * Last Modified by: Yaser Al-Onaizan, August 17, 1999
- *
- * Simple class used to calculate cross entropy and perplexity
- * of models.
- */
-
-#ifndef _PERPLEXITY_H
-#define _PERPLEXITY_H
-
-#include <math.h>
-#include <fstream>
-#include "Vector.h"
-#include "defs.h"
-#include "Array2.h"
-#include "Globals.h"
-#include "syncObj.h"
-
-#define CROSS_ENTROPY_BASE 2
-
-class Perplexity {
- private:
- double sum;
- double wc;
- Array2<double, Vector<double> > *E_M_L;
- Vector<string> modelid;
- Vector<double > perp;
- Vector<double > ce;
- Vector<string> name ;
- Mutex mutex;
- public:
- ~Perplexity() { delete E_M_L;}
- Perplexity() {
- E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
- unsigned int l, m ;
- Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
- for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
- fact[m] = fact[m-1] * m ;
- for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
- for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
- (*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
- (fact[m])) ;
- }
- sum = 0 ;
- wc = 0;
- perp.clear();
- ce.clear();
- name.clear();
- }
- inline void clear() {
- mutex.lock();
- sum = 0 ;
- wc = 0 ;
- mutex.unlock();
- }
- size_t size() const {return(min(perp.size(), ce.size()));}
- inline void addFactor(const double p, const double count, const int l,
- const int m,bool withPoisson) {
- mutex.lock();
- wc += count * m ; // number of french words
- sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
- mutex.unlock();
- }
-
- inline double perplexity() const {
- return exp( -1*sum / wc);
- }
-
- inline double cross_entropy() const {
- return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
- }
-
- inline double word_count() const {
- return wc;
- }
-
- inline double getSum() const {
- return sum ;
- }
-
- void record(string model);
-
- friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
- const Perplexity&, const Perplexity&,
- ostream&, int, int, bool);
-};
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/Pointer.h b/scripts/training/MGIZA/src/Pointer.h
deleted file mode 100644
index 58de60c..0000000
--- a/scripts/training/MGIZA/src/Pointer.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef HEADER_Pointer_DEFINED
-#define HEADER_Pointer_DEFINED
-
-#include <assert.h>
-#include <iostream>
-
-template<class T>
-class SmartPointer
-{
- protected:
- T*p;
- public:
- SmartPointer(T*_p=0)
- : p(_p) {}
- inline T&operator*() const
- {return *p;}
- inline T*operator->() const
- {return p;}
- inline operator bool() const
- {return p!=0;}
- inline T*ptr() const
- { return p; }
-};
-template<class T> inline ostream &operator<<(ostream&out,const SmartPointer<T>&s)
-{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
-
-
-template<class T>
-class SmartPointerConst
-{
- protected:
- const T*p;
- public:
- SmartPointerConst(const T*_p=0)
- : p(_p) {}
- inline const T&operator*() const
- {return *p;}
- inline const T*operator->() const
- {return p;}
- inline operator bool() const
- {return p!=0;}
- inline const T*ptr() const
- { return p; }
-};
-template<class T> inline ostream &operator<<(ostream&out,const SmartPointerConst<T>&s)
-{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
-
-template <class T>
-class UP : public SmartPointer<T>
-{
- public:
- UP(T*_p=0)
- : SmartPointer<T>(_p) {}
-};
-template<class T> inline bool operator==(const UP<T>&s1,const UP<T>&s2)
-{return s1.ptr()==s2.ptr();}
-template<class T> inline bool operator<(const UP<T>&s1,const UP<T>&s2)
-{return s1.ptr() < s2.ptr();}
-template<class T> inline int Hash(const UP<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
-
-
-template <class T>
-class UPConst : public SmartPointerConst<T>
-{
- public:
- UPConst(const T*_p=0)
- : SmartPointerConst<T>(_p) {}
-};
-template<class T> inline bool operator==(const UPConst<T>&s1,const UPConst<T>&s2)
-{return s1.ptr()==s2.ptr();}
-template<class T> inline bool operator<(const UPConst<T>&s1,const UPConst<T>&s2)
-{return s1.ptr()<s2.ptr();}
-template<class T> inline int Hash(const UPConst<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
-
-
-template <class T>
-class MP : public SmartPointer<T>
-{
- public:
- MP(T*_p=0)
- : SmartPointer<T>(_p) {}
-};
-template <class T> inline bool operator==(const MP<T>&s1,const MP<T>&s2)
-{assert(s1);assert(s2);return *s1==*s2;}
-template <class T> inline bool operator<(const MP<T>&s1,const MP<T>&s2)
-{assert(s1);assert(s2);return *s1 < *s2;}
-template <class T> inline int Hash(const MP<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
-
-
-template <class T>
-class MPConst : public SmartPointerConst<T>
-{
- public:
- MPConst(const T*_p=0)
- : SmartPointerConst<T>(_p) {}
-};
-template <class T> inline bool operator==(const MPConst<T>&s1,const MPConst<T>&s2)
-{assert(s1);assert(s2);return *s1== *s2;}
-template <class T> inline bool operator<(const MPConst<T>&s1,const MPConst<T>&s2)
-{assert(s1);assert(s2);return *s1 < *s2;}
-template <class T> inline int Hash(const MPConst<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
-
-
-template <class T>
-class DELP : public SmartPointer<T>
-{
- private:
- DELP(const DELP<T>&x);
- public:
- const DELP<T>&operator=(DELP<T>&x)
- {
- delete this->p;
- this->p=x.p;x.p=0;
- return *this;
- }
-
- ~DELP()
- { delete this->p;this->p=0;}
- DELP(T*_p=0)
- : SmartPointer<T>(_p) {}
- void set(T*_p)
- {
- delete this->p;
- this->p=_p;
- }
- friend bool operator==(const DELP<T>&s1,const DELP<T>&s2)
- {
- return *(s1.p)== *(s2.p);
- }
- friend bool operator<(const DELP<T>&s1,const DELP<T>&s2)
- {
- return *(s1.p) < *(s2.p);
- }
- friend inline int Hash(const DELP<T> &wp)
- {
- if(wp.p)
- return Hash(*wp.p);
- else
- return 0;
- }
-};
-#endif
-
-
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/SetArray.cpp b/scripts/training/MGIZA/src/SetArray.cpp
deleted file mode 100644
index d819188..0000000
--- a/scripts/training/MGIZA/src/SetArray.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-
-//#include "SetArray.h"
-
-#include "Parameter.h"
-
diff --git a/scripts/training/MGIZA/src/SetArray.h b/scripts/training/MGIZA/src/SetArray.h
deleted file mode 100644
index 9c49239..0000000
--- a/scripts/training/MGIZA/src/SetArray.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-Array of set, for fast access of dictionary, and most important,
-be threadsafe
-*/
-
-
-#ifndef __SET_ARRAY_H__
-#define __SET_ARRAY_H__
-
-#include <map>
-#include <vector>
-#include "defs.h"
-#include "vocab.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include "syncObj.h"
-
-template <class COUNT, class PROB>
-class LpPair {
-public:
- COUNT count ;
- PROB prob ;
-public: // constructor
- LpPair():count(0), prob(0){} ;
- LpPair(COUNT c, PROB p):count(c), prob(p){};
-} ;
-
-
-
-
-template <class COUNT, class PROB>
-class SetArray{
-public:
- typedef LpPair<COUNT, PROB> CPPair;
-protected:
-
- /*Information stores here*/
- std::vector<std::map<size_t,CPPair> > store;
- std::vector<Mutex> muts;
- size_t nEnglishWord;
- size_t nFrenchWord;
- void _init(){
- store.resize(nEnglishWord);
- muts.resize(nFrenchWord);
- }
-
-public:
-
- /*
- Get reference, not creating
- */
- CPPair* find(size_t fi, size_t si){
- /*HERE: lock, unlock after we get the pointer*/
- muts[fi].lock();
- /* Sync-ed */
- std::map<size_t,CPPair>& w = store[fi];
- typename std::map<size_t,CPPair>::iterator it = w.find((size_t)si);
- CPPair* q = ( it!=store[fi].end() ? &(it->second) : 0);
-// for(it = w.begin(); it!=w.end();it++){
- // cout << it->first << endl;
- // }
- /* End Synced*/
- muts[fi].unlock();
- return q;
- };
-
- /*
- Get reference, creating it
- */
- inline CPPair& findRef(size_t fi, size_t si){
- std::map<size_t,CPPair> &x = store[fi];
- muts[fi].lock();
- /* Sync-ed */
- CPPair& ref= x[si];
- /* End Synced */
- muts[fi].unlock();
- };
-
-
- void insert(size_t fi, size_t si, COUNT count = 0, PROB prob = 0){
- muts[fi].lock();
- /*Syced*/
- std::map<size_t,CPPair> &x = store[fi];
- CPPair& v= x[si];
- v.count = count;
- v.prob = prob;
- muts[fi].unlock();
- }
-
- void incCount(size_t e, size_t f, COUNT inc)
- // increments the count of the given word pair. if the pair does not exist,
- // it creates it with the given value.
- {
- if( inc ){
- std::map<size_t,CPPair> &x = store[e];
- muts[e].lock();
- CPPair& ref= x[f];
- ref.count += inc;
- muts[e].unlock();
- }
- }
-
- PROB getProb(size_t e, size_t f) const
- // read probability value for P(fj/ei) from the hash table
- // if pair does not exist, return floor value PROB_SMOOTH
- {
- muts[e].lock();
- typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
- PROB b;
- if(it == store[e].end())
- b = PROB_SMOOTH;
- else
- b=max((it->second).prob, PROB_SMOOTH);
- muts[e].unlock();
- return b;
- }
-
- COUNT getCount(size_t e, size_t f) const
- /* read count value for entry pair (fj/ei) from the hash table */
- {
- muts[e].lock();
- typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
- COUNT c;
- if(it == store[e].end())
- c = 0;
- else
- c = ((*it).second).count;
- muts[e].unlock();
- }
-
- void erase(size_t e, size_t f)
- // In: a source and a target token ids.
- // removes the entry with that pair from table
- {
- muts[e].lock();
- store[e].erase(f);
- muts[e].unlock();
- };
-
- inline void setNumberOfEnlish(size_t e){nEnglishWord=e;_init();};
- inline void setNumberOfFrench(size_t f){nFrenchWord = f;};
-
- const std::map<size_t,CPPair>& getMap(size_t i) const{
- return store[i];
- }
-
- std::map<size_t,CPPair>& getMap1(size_t i){
- return store[i];
- }
-
- SetArray(size_t e, size_t f): nEnglishWord(e), nFrenchWord(f){
- _init();
- }
-};
-
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/TTables.cpp b/scripts/training/MGIZA/src/TTables.cpp
deleted file mode 100644
index 1e4f3b6..0000000
--- a/scripts/training/MGIZA/src/TTables.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "TTables.h"
-#include "Parameter.h"
-#include<iostream>
-#include <fstream>
-
-GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
-GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
-
-
-/* ------------------ Method Definiotns for Class tmodel --------------------*/
-
-
-// To output to STDOUT, submit filename as NULL
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printCountTable(const char *filename,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- const bool actual) const
-{
- ostream *tof;
-
- if(filename)
- tof = new ofstream(filename);
- else
- tof = & cout;
-
- ostream &of = *tof;
- /* for(unsigned int i=0;i<es.size()-1;++i)
- for(unsigned int j=es[i];j<es[i+1];++j)
- {
- const CPPair&x=fs[j].second;
- WordIndex e=i,f=fs[j].first;
- if( actual )
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
- else
- of << e << ' ' << f << ' ' << x.prob << '\n';
- }*/
- for(unsigned int i=0;i<lexmat.size();++i){
- if( lexmat[i] ){
- for(unsigned int j=0;j<lexmat[i]->size();++j)
- {
- const CPPair&x=(*lexmat[i])[j].second;
- WordIndex e=i,f=(*lexmat[i])[j].first;
- if( x.prob>MINCOUNTINCREASE ){
- if( actual ){
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
- }else{
- of << e << ' ' << f << ' ' << x.count << '\n';
- }
- }
- }
- }
- }
-
- if(filename){
- ((ofstream*)tof)->close();
- delete tof;
- }
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTable(const char *filename,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- const bool actual) const
-{
- ofstream of(filename);
- /* for(unsigned int i=0;i<es.size()-1;++i)
- for(unsigned int j=es[i];j<es[i+1];++j)
- {
- const CPPair&x=fs[j].second;
- WordIndex e=i,f=fs[j].first;
- if( actual )
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
- else
- of << e << ' ' << f << ' ' << x.prob << '\n';
- }*/
- for(unsigned int i=0;i<lexmat.size();++i){
- if( lexmat[i] ){
- for(unsigned int j=0;j<lexmat[i]->size();++j)
- {
- const CPPair&x=(*lexmat[i])[j].second;
- WordIndex e=i,f=(*lexmat[i])[j].first;
- if( x.prob>PROB_SMOOTH ){
- if( actual ){
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
- }else{
- of << e << ' ' << f << ' ' << x.prob << '\n';
- }
- }
- }
- }
- }
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
- const Vector<WordEntry>&,
- const Vector<WordEntry>&,
- const double,
- const double,
- const bool ) const
-{
-}
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
-{
- for(unsigned int i=0;i<lexmat.size();++i){
- double c=0.0;
- if( lexmat[i] ){
- unsigned int lSize=lexmat[i]->size();
- for(unsigned int j=0;j<lSize;++j)
- c+=(*lexmat[i])[j].second.count;
- for(unsigned int j=0;j<lSize;++j) {
- if( c==0 )
- (*lexmat[i])[j].second.prob=1.0/(lSize);
- else
- (*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
- (*lexmat[i])[j].second.count=0;
- }
- }
- }
-}
-
-template <class COUNT, class PROB>
-bool tmodel<COUNT, PROB>::readProbTable(const char *filename){
- /* This function reads the t table from a file.
- Each line is of the format: source_word_id target_word_id p(target_word|source_word)
- This is the inverse operation of the printTable function.
- NAS, 7/11/99
- */
- ifstream inf(filename);
- cerr << "Reading t prob. table from " << filename << "\n";
- if (!inf) {
- cerr << "\nERROR: Cannot open " << filename << "\n";
- return false;
- }
- WordIndex src_id, trg_id;
- PROB prob;
- int nEntry=0;
- while (inf >> src_id >> trg_id >> prob) {
- insert(src_id, trg_id, 0.0, prob);
- nEntry++;
- }
- cerr << "Read " << nEntry << " entries in prob. table.\n";
- return true;
-}
-
-
-
-template class tmodel<COUNT,PROB> ;
-
-/* ---------------- End of Method Definitions of class tmodel ---------------*/
-
-
-
diff --git a/scripts/training/MGIZA/src/TTables.h b/scripts/training/MGIZA/src/TTables.h
deleted file mode 100644
index f6be652..0000000
--- a/scripts/training/MGIZA/src/TTables.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* --------------------------------------------------------------------------*
- * *
- * Module : TTables *
- * *
- * Prototypes File: TTables.h *
- * *
- * Objective: Defines clases and methods for handling I/O for Probability & *
- * Count tables and also alignment tables *
- *****************************************************************************/
-
-#ifndef _ttables_h
-#define _ttables_h 1
-
-
-#include "defs.h"
-#include "vocab.h"
-
-#include <assert.h>
-
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-#include "syncObj.h"
-
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-
-#include <fstream>
-
-#include "Globals.h"
-
-
-/* The tables defined in the following classes are defined as hash tables. For
- example. the t-table is a hash function of a word pair; an alignment is
- a hash function of a vector of integer numbers (sentence positions) and so
- on */
-
-
-/*----------- Defnition of Hash Function for class tmodel ------- -----------*/
-
-typedef pair<WordIndex, WordIndex> wordPairIds;
-
-
-
-class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
-{
-public:
- size_t operator() (const pair<WordIndex, WordIndex>& key) const
- {
- return (size_t) MAX_W*key.first + key.second; /* hash function and it
- is guarnteed to have
- unique id for each
- unique pair */
- }
- #ifdef WIN32
- inline bool operator() (const pair<WordIndex, WordIndex>& key, const pair<WordIndex, WordIndex>& key2){
- return key.first==key2.first && key.second==key2.second;
- }
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
- #endif
-};
-
-
-
-/* ------------------ Class Prototype Definitions ---------------------------*
- Class Name: tmodel
- Objective: This defines the underlying data structur for t Tables and t
- Count Tables. They are defined as a hash table. Each entry in the hash table
- is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
- probability and the count are represented as log integer probability as
- defined by the class LogProb .
-
- This class is used to represents t Tables (probabiliity) and n (fertility
- Tables and also their corresponding count tables .
-
- *---------------------------------------------------------------------------*/
-
-//typedef float COUNT ;
-//typedef LogProb PROB ;
-template <class COUNT, class PROB>
-class LpPair {
-public:
- COUNT count ;
- PROB prob ;
-public: // constructor
- LpPair():count(0), prob(0){} ;
- LpPair(COUNT c, PROB p):count(c), prob(p){};
-} ;
-
-template<class T>
-T*mbinary_search(T*x,T*y,unsigned int val)
-{
- if( y-x==0 )
- return 0;
- if( x->first==val)
- return x;
- if( y-x<2 )
- return 0;
- T*mid=x+(y-x)/2;
- if( val < mid->first )
- return mbinary_search(x,mid,val);
- else
- return mbinary_search(mid,y,val);
-
-}
-
-template<class T>
-const T*mbinary_search(const T*x,const T*y,unsigned int val)
-{
- if( y-x==0 )
- return 0;
- if( x->first==val)
- return x;
- if( y-x<2 )
- return 0;
- const T*mid=x+(y-x)/2;
- if( val < mid->first )
- return mbinary_search(x,mid,val);
- else
- return mbinary_search(mid,y,val);
-
-}
-
-template <class COUNT, class PROB>
-class tmodel{
- typedef LpPair<COUNT, PROB> CPPair;
-public:
- bool recordDiff;
-
-public:
- int noEnglishWords; // total number of unique source words
- int noFrenchWords; // total number of unique target words
- //vector<pair<unsigned int,CPPair> > fs;
- //vector<unsigned int> es;
- vector< vector<pair<unsigned int,CPPair> >* > lexmat;
- vector< Mutex* > mutex;
-
- void erase(WordIndex e, WordIndex f){
- CPPair *p=find(e,f);
- if(p)
- *p=CPPair(0,0);
- };
-
- CPPair*find(int e,int f){
- //pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
- //pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
- if(e>=lexmat.size()||lexmat[e]==NULL){
- return NULL;
- }
- pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
- pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
- pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
- if( x==0 ){
- //cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
- //abort();
- return 0;
- }
- return &(x->second);
- }
-
- const CPPair*find(int e,int f)const{
- if(e>=lexmat.size()||lexmat[e]==NULL){
- return NULL;
- }
- const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
- const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
- //const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
- //const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
- const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
- if( x==0 ){
- //cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
- //abort();
- return 0;
- }
-
- return &(x->second);
- }
-public:
- void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
- CPPair* found = find(e,f);
- if(found)
- *found=CPPair(cval,pval);
- }
-
- CPPair*getPtr(int e,int f){return find(e,f);}
-
- tmodel(){};
- tmodel(const string&fn) {
- recordDiff = false;
- int count=0,count2=0;
- ifstream infile2(fn.c_str());
- cerr << "Inputfile in " << fn << endl;
- int e,f,olde=-1,oldf=-1;
- pair<unsigned int,CPPair> cp;
- vector< pair<unsigned int,CPPair> > cps;
- while(infile2>>e>>f){
- cp.first=f;
- assert(e>=olde);
- assert(e>olde ||f>oldf);
- if( e!=olde&&olde>=0 ){
- int oldsize=lexmat.size();
- lexmat.resize(olde+1);
- for(unsigned int i=oldsize;i<lexmat.size();++i)
- lexmat[i]=0;
- lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
- cps.clear();
- if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
- cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
- count2+=lexmat[olde]->capacity();
- }
- cps.push_back(cp);
- olde=e;
- oldf=f;
- count++;
- }
- lexmat.resize(olde+1);
- lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
- count2+=lexmat[olde]->capacity();
- cout << "There are " << count << " " << count2 << " entries in table" << '\n';
- mutex.resize(lexmat.size());
- for(int _i = 0; _i< lexmat.size();_i++){
- mutex[_i] = new Mutex();
- }
- /* Create mutex */
- }
-
- ~tmodel(){
- for(int _i = 0; _i< lexmat.size();_i++){
- delete mutex[_i];
- }
-
- }
-
-
- /* tmodel(const string&fn)
- {
- size_t count=0;
- {
- ifstream infile1(fn.c_str());
- if( !infile1 )
- {
- cerr << "ERROR: can't read coocurrence file " << fn << '\n';
- abort();
- }
- int e,f;
- while(infile1>>e>>f)
- count++;
- }
- cout << "There are " << count << " entries in table" << '\n';
- ifstream infile2(fn.c_str());
- fs.resize(count);
- int e,f,olde=-1,oldf=-1;
- pair<unsigned int,CPPair> cp;
- count=0;
- while(infile2>>e>>f)
- {
- assert(e>=olde);
- assert(e>olde ||f>oldf);
- if( e!=olde )
- {
- es.resize(e+1);
- for(unsigned int i=olde+1;int(i)<=e;++i)
- es[i]=count;
- }
- cp.first=f;
- assert(count<fs.size());
- fs[count]=cp;
- //fs.push_back(cp);
- olde=e;
- oldf=f;
- count++;
- }
- assert(count==fs.size());
- es.push_back(fs.size());
- cout << fs.size() << " " << count << " coocurrences read" << '\n';
- }*/
-
- void incCount(WordIndex e, WordIndex f, COUNT inc) {
- if( inc ){
- CPPair *p=find(e,f);
- if( p ){
- mutex[e]->lock();
- p->count += inc ;
- mutex[e]->unlock();
- }
- }
- }
-
- PROB getProb(WordIndex e, WordIndex f) const{
- const CPPair *p=find(e,f);
- if( p )
- return max(p->prob, PROB_SMOOTH);
- else
- return PROB_SMOOTH;
- }
-
- COUNT getCount(WordIndex e, WordIndex f) const
- {
- const CPPair *p=find(e,f);
- if( p )
- return p->count;
- else
- return 0.0;
- }
-
- void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
- void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
- void printProbTableInverse(const char *filename,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- const double eTotal,
- const double fTotal,
- const bool actual = false ) const;
- void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
- bool readProbTable(const char *filename);
- bool readSubSampledProbTable(const char* filename, std::set<WordIndex> &e, std::set<WordIndex> &f);
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/Vector.h b/scripts/training/MGIZA/src/Vector.h
deleted file mode 100644
index a48fd3e..0000000
--- a/scripts/training/MGIZA/src/Vector.h
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-Vector: checked vector implementation
-
-Franz Josef Och (30/07/99)
---*/
-#ifndef ARRAY_H_DEFINED
-#define ARRAY_H_DEFINED
-#include "mystl.h"
-#include <algorithm>
-#include <string>
-#include <utility>
-#include <functional>
-#include <assert.h>
-
-
-#ifdef NDEBUG
-
-#include <vector>
-#define Vector vector
-template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
-{
- o << "Vector(" << a.size() << "){ ";
- for(unsigned int iii=0;iii<a.size();iii++)
- o << " " << iii<< ": " << a[iii]<<" ;";
- return o << "}\n";
-}
-
-#else
-
-#define ARRAY_DEBUG
-#define memo_del(a, b)
-#define memo_new(a)
-
-template<class T> class Vector
-{
-private:
- T *p;
- int realSize;
- int maxWritten;
-
- void copy(T *a, const T *b, int n);
- void copy(T *a, T *b, int n);
- void _expand();
-public:
- Vector()
- : p(0), realSize(0), maxWritten(-1)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
-#endif
- }
- Vector(const Vector<T> &x)
- : p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
- {
- memo_new(p);
- copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
-#endif
- }
- explicit Vector(int n)
- : p(new T[n]), realSize(n), maxWritten(n-1)
- {
- memo_new(p);
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
- Vector(int n, const T&_init)
- : p(new T[n]), realSize(n), maxWritten(n-1)
- {
- memo_new(p);
- for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
-
- ~Vector()
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete [] p;
- memo_del(p, 1);
-#ifndef NDEBUG
- p=0;realSize=-1;maxWritten=-1;
-#endif
- }
-
- Vector<T>& operator=(const Vector<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete [] p;
- memo_del(p, 1);
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- p = new T[realSize];
- memo_new(p);
- copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
- return *this;
- }
-
- Vector<T>& operator=(Vector<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete [] p;
- memo_del(p, 1);
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- p = new T[realSize];
- memo_new(p);
- copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
- return *this;
- }
-
- void allowAccess(int n)
- {
- while( realSize<=n )
- _expand();
- maxWritten=max(maxWritten, n);
- assert( maxWritten<realSize );
- }
- void resize(int n)
- {
- while( realSize<n )
- _expand();
- maxWritten=n-1;
- }
- void clear()
- {
- resize(0);
- }
- void reserve(int n)
- {
- int maxOld=maxWritten;
- resize(n);
- maxWritten=maxOld;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p, p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p, p+until, greater<T>());
- }
- void init(int n, const T&_init)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete []p;
- memo_del(p, 1);
- p=new T[n];
- memo_new(p);
- realSize=n;
- maxWritten=n-1;
- for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
- inline unsigned int size() const
- {assert( maxWritten<realSize );
- return maxWritten+1;}
- inline int low() const
- { return 0; }
- inline int high() const
- { return maxWritten; }
- int findMax() const;
- int findMin() const;
- void errorAccess(int n) const;
- inline T*getPointerToData(){return p;}
- inline T*begin(){return p;}
- inline T*end(){return p+maxWritten+1;}
- inline T& operator[](int n)
- {
-#ifndef NDEBUG
- if( n<0 || n>maxWritten )
- errorAccess(n);
-#endif
- return p[n];
- }
- inline const T& operator[](int n) const
- {
-#ifndef NDEBUG
- if(n<0 || n>maxWritten )
- errorAccess(n);
-#endif
- return p[n];
- }
- inline const T& get(int n) const
- {
-#ifndef NDEBUG
- if(n<0 || n>maxWritten )
- errorAccess(n);
-#endif
- return p[n];
- }
- const T&top(int n=0) const
- {return (*this)[maxWritten-n];}
- T&top(int n=0)
- {return (*this)[maxWritten-n];}
- const T&back(int n=0) const
- {return (*this)[maxWritten-n];}
- T&back(int n=0)
- {return (*this)[maxWritten-n];}
- T&push_back(const T&x)
- {
- allowAccess(maxWritten+1);
- (*this)[maxWritten]=x;
- return top();
- }
- bool writeTo(ostream&out) const
- {
- out << "Vector ";
- out << size() << " ";
- //out << a << '\n';
- for(int iv=0;iv<=maxWritten;iv++)
- {
- writeOb(out, (*this)[iv]);
- out << '\n';
- }
- return 1;
- }
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(Vector): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="Vector") )
- {
- cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- // in >> a;
- resize(biggest);
- for(int iv=0;iv<size();iv++)
- {
- readOb(in, (*this)[iv]);
- }
- return 1;
- }
-};
-
-template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
-{
- if( &x == &y )
- return 1;
- else
- {
- if( y.size()!=x.size() )
- return 0;
- else
- {
- for(unsigned int iii=0;iii<x.size();iii++)
- if( !(x[iii]==y[iii]) )
- return 0;
- return 1;
- }
- }
-}
-template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
-{
- return !(x==y);
-}
-
-template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
-{
- if( &x == &y )
- return 0;
- else
- {
- if( y.size()<x.size() )
- return !(y<x);
- for(int iii=0;iii<x.size();iii++)
- {
- assert( iii!=y.size() );
- if( x[iii]<y[iii] )
- return 1;
- else if( y[iii]<x[iii] )
- return 0;
- }
- return x.size()!=y.size();//??
- }
-}
-
-
-template<class T> void Vector<T>:: errorAccess(int n) const
-{
- cerr << "ERROR: Access to array element " << n
- << " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
- cout << "ERROR: Access to array element " << n
- << " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
- assert(0);
-#ifndef DEBUG
- abort();
-#endif
-}
-
-template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
-{
- o << "Vector(" << a.size() << "){ ";
- for(unsigned int iii=0;iii<a.size();iii++)
- o << " " << iii<< ": " << a[iii]<<" ;";
- return o << "}\n";
-}
-
-template<class T> istream& operator>>(istream&in, Vector<T>&)
-{return in;}
-
-template<class T> int Hash(const Vector<T>&a)
-{
- int n=0;
- for(int iii=0;iii<a.size();iii++)
- n+=Hash(a[iii])*(iii+1);
- return n+a.size()*47;
-}
-template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
-{
- for(int iii=0;iii<n;iii++)
- aa[iii]=bb[iii];
-}
-template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
-{
- for(int iii=0;iii<n;iii++)
- aa[iii]=bb[iii];
-}
-
-template<class T> void Vector<T>::_expand()
-{
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- T *oldp=p;
- int oldsize=realSize;
- realSize=realSize*2+1;
- p=new T[realSize];
- memo_new(p);
- copy(p, oldp, oldsize);
- delete [] oldp;
- memo_del(oldp, 1);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
-}
-
-template<class T> int Vector<T>::findMax() const
-{
- if( size()==0 )
- return -1;
- else
- {
- int maxPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[maxPos]<(*this)[iii] )
- maxPos=iii;
- return maxPos;
- }
-}
-template<class T> int Vector<T>::findMin() const
-{
- if( size()==0 )
- return -1;
- else
- {
- int minPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[iii]<(*this)[minPos] )
- minPos=iii;
- return minPos;
- }
-}
-
-#endif
-
-#endif
diff --git a/scripts/training/MGIZA/src/WordClasses.h b/scripts/training/MGIZA/src/WordClasses.h
deleted file mode 100644
index 7992553..0000000
--- a/scripts/training/MGIZA/src/WordClasses.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef WordClasses_h_DEFINED
-#define WordClasses_h_DEFINED
-#include <map>
-#include <string>
-#include <set>
-#include "vocab.h"
-
-class WordClasses
-{
- private:
- map<string,string> Sw2c;
- map<string,int> Sc2int;
- Vector<string> Sint2c;
- Vector<int> w2c;
- unsigned int classes;
- public:
- WordClasses()
- : classes(1)
- {
- Sint2c.push_back("0");
- Sc2int["0"]=0;
- }
- template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb)
- {
- string sline;
- int maxword=0;
- int readWord=0, putWord=0;
- while(getline(in,sline))
- {
- readWord ++;
- string word,wclass;
- istrstream iline(sline.c_str());
- iline>>word>>wclass;
-
- if( !Sc2int.count(wclass) )
- {
- Sc2int[wclass]=classes++;
- Sint2c.push_back(wclass);
- assert(classes==Sint2c.size());
- }
- if(vcb.has_word(word)){
- maxword=max(m(word),maxword);
- assert(Sw2c.count(word)==0);
- Sw2c[word]=wclass;
- putWord++;
- }
- }
- w2c=Vector<int>(maxword+1,0);
- for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
- w2c[m(i->first)]=Sc2int[i->second];
- cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
- cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl;
- return 1;
- }
- int getClass(int w)const
- {
- if(w>=0&&int(w)<int(w2c.size()) )
- return w2c[w];
- else
- return 0;
- }
- int operator()(const string&x)const
- {
- if( Sc2int.count(x) )
- return Sc2int.find(x)->second;
- else
- {
- cerr << "WARNING: class " << x << " not found.\n";
- return 0;
- }
- }
- string classString(unsigned int cnr)const
- {
- if( cnr<Sint2c.size())
- return Sint2c[cnr];
- else
- return string("0");
- }
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/alignment.cpp b/scripts/training/MGIZA/src/alignment.cpp
deleted file mode 100644
index 55a2e5c..0000000
--- a/scripts/training/MGIZA/src/alignment.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-alignment: 'checked' alignment representation with automatic calculation
- of fertilities
-Franz Josef Och (30/07/99)
---*/
-#include "alignment.h"
-
-ostream&operator<<(ostream&out, const alignment&a)
-{
- int m=a.a.size()-1,l=a.f.size()-1;
- out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
- for(int j=1;j<=m;j++)out << a(j) << ' ';
- out << ")(fert: ";
- for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
- return out << ") c:"<<"\n";
-}
-
diff --git a/scripts/training/MGIZA/src/alignment.h b/scripts/training/MGIZA/src/alignment.h
deleted file mode 100644
index 17774c6..0000000
--- a/scripts/training/MGIZA/src/alignment.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-alignment: 'checked' alignment representation with autom. calc. of fertilities
-Franz Josef Och (30/07/99)
---*/
-#ifndef alignment_h_fjo_defined
-#define alignment_h_fjo_defined
-#include "Vector.h"
-#include <assert.h>
-#include "defs.h"
-#include "myassert.h"
-
-class al_struct
-{
- public:
- al_struct()
- : prev(0),next(0){}
- PositionIndex prev,next;
-};
-
-
-class alignment
-{
- private:
- Vector<PositionIndex> a;
- Vector<PositionIndex> positionSum,f;
- public:
- Vector<PositionIndex> als_i;
- Vector<al_struct> als_j;
- PositionIndex l,m;
- alignment()
- {}
- alignment(PositionIndex _l, PositionIndex _m)
- : a(_m+1, (PositionIndex)0),
- positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
- {
- f[0]=m;
- for(PositionIndex j=1;j<=m;j++)
- {
- if( j>1 )
- als_j[j].prev= j-1;
- if( j<m )
- als_j[j].next= j+1;
- }
- als_i[0]=1;
- }
- PositionIndex get_l()const
- {return l;}
- PositionIndex get_m()const
- {return m;}
- void doMove(int i,int j)
- {
- set(j,i);
- }
- void doSwap(int j1,int j2)
- {
- int aj1=a[j1],aj2=a[j2];
- set(j1,aj2);
- set(j2,aj1);
- }
- void set(PositionIndex j, PositionIndex aj)
- {
- PositionIndex old_aj=a[j];
- massert(j<a.size());massert(aj<f.size());
- massert(old_aj<f.size());massert(f[old_aj]>0);
- massert(j>0);
- positionSum[old_aj]-=j;
- // ausfuegen
- PositionIndex prev=als_j[j].prev;
- PositionIndex next=als_j[j].next;
- if( next )
- als_j[next].prev=prev;
- if( prev )
- als_j[prev].next=next;
- else
- als_i[old_aj]=next;
-
- // neue Position suchen
- PositionIndex lfd=als_i[aj],llfd=0;
- while( lfd && lfd<j )
- lfd = als_j[llfd=lfd].next;
-
- // einfuegen
- als_j[j].prev=llfd;
- als_j[j].next=lfd;
- if( llfd )
- als_j[llfd].next=j;
- else
- als_i[aj]=j;
- if( lfd )
- als_j[lfd].prev=j;
-
- f[old_aj]--;
- positionSum[aj]+=j;
- f[aj]++;
- a[j]=aj;
- }
- const Vector<PositionIndex>& getAlignment() const
- {return a ;}
- PositionIndex get_al(PositionIndex j)const
- {
- massert(j<a.size());
- return a[j];
- }
- PositionIndex operator()(PositionIndex j)const
- {
- massert(j<a.size());
- return a[j];
- }
- PositionIndex fert(PositionIndex i)const
- {
- massert(i<f.size());
- return f[i];
- }
- PositionIndex get_head(PositionIndex i)const
- {
- massert( als_i[i]==_get_head(i) );
- return als_i[i];
- }
- PositionIndex get_center(PositionIndex i)const
- {
- if( i==0 )return 0;
- massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
- return (positionSum[i]+f[i]-1)/f[i];
- }
- PositionIndex _get_head(PositionIndex i)const
- {
- if( fert(i)==0 )return 0;
- for(PositionIndex j=1;j<=m;j++)
- if( a[j]==i )
- return j;
- return 0;
- }
- PositionIndex _get_center(PositionIndex i)const
- {
- if( i==0 )return 0;
- massert(fert(i));
- PositionIndex sum=0;
- for(PositionIndex j=1;j<=m;j++)
- if( a[j]==i )
- sum+=j;
- return (sum+fert(i)-1)/fert(i);
- }
- PositionIndex prev_cept(PositionIndex i)const
- {
- if( i==0 )return 0;
- PositionIndex k=i-1;
- while(k&&fert(k)==0)
- k--;
- return k;
- }
- PositionIndex next_cept(PositionIndex i)const
- {
- PositionIndex k=i+1;
- while(k<l+1&&fert(k)==0)
- k++;
- return k;
- }
- PositionIndex prev_in_cept(PositionIndex j)const
- {
- //PositionIndex k=j-1;
- //while(k&&a[k]!=a[j])
- //k--;
- //assert( als_j[j].prev==k );
- //assert(k);
- //return k;
- massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
- return als_j[j].prev;
- }
- friend ostream &operator<<(ostream&out, const alignment&a);
- friend bool operator==(const alignment&a, const alignment&b)
- {
- massert(a.a.size()==b.a.size());
- for(PositionIndex j=1;j<=a.get_m();j++)
- if(a(j)!=b(j))
- return 0;
- return 1;
- }
- friend bool operator<(const alignment&x, const alignment&y)
- {
- massert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
- if( x(j)<y(j) )
- return 1;
- else if( y(j)<x(j) )
- return 0;
- return 0;
- }
- friend int differences(const alignment&x, const alignment&y){
- int count=0;
- massert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
- count += (x(j)!=y(j));
- return count;
- }
- bool valid()const
- {
- if( 2*f[0]>m )
- return 0;
- for(unsigned int i=1;i<=l;i++)
- if( f[i]>=MAX_FERTILITY )
- return 0;
- return 1;
- }
- friend class transpair_model5;
-};
-#endif
diff --git a/scripts/training/MGIZA/src/cmd.c b/scripts/training/MGIZA/src/cmd.c
deleted file mode 100644
index 323b5a9..0000000
--- a/scripts/training/MGIZA/src/cmd.c
+++ /dev/null
@@ -1,655 +0,0 @@
-
-// $Id: cmd.c 1307 2007-03-14 22:22:36Z hieuhoang1972 $
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-
-
-#include "cmd.h"
-
-#ifdef WIN32
-# define popen _popen
-# define pclose _pclose
-#include <stdarg.h>
-#endif
-
-static Enum_T BoolEnum[] = {
- { "FALSE", 0 },
- { "TRUE", 1 },
- { 0, 0 }
-};
-
-#ifdef NEEDSTRDUP
-char *strdup();
-#endif
-
-#define FALSE 0
-#define TRUE 1
-
-#define LINSIZ 10240
-#define MAXPARAM 256
-
-static char *GetLine(),
- **str2array();
-static int Scan(),
- SetParam(),
- SetEnum(),
- SetSubrange(),
- SetStrArray(),
- SetGte(),
- SetLte(),
- CmdError(),
- EnumError(),
- SubrangeError(),
- GteError(),
- LteError(),
- PrintParam(),
- PrintEnum(),
- PrintStrArray();
-
-static Cmd_T cmds[MAXPARAM+1];
-static char *SepString = " \t\n";
-
-#if defined(__STDC__)
-#include <stdarg.h>
-int DeclareParams(char *ParName, ...)
-#else
-#ifdef WIN32
-int DeclareParams(char *ParName, ...)
-#else
-#include <varargs.h>
-int DeclareParams(ParName, va_alist)
-char *ParName;
-va_dcl
-#endif
-#endif
-{
- va_list args;
- static int ParamN = 0;
- int j,
- c;
- char *s;
-
-#if defined(__STDC__) || defined (WIN32)
- va_start(args, ParName);
-#else
- va_start(args);
-#endif
- for(;ParName;) {
- if(ParamN==MAXPARAM) {
- fprintf(stderr, "Too many parameters !!\n");
- break;
- }
- for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
- ;
- if(!c) {
- fprintf(stderr,
- "Warning: parameter \"%s\" declared twice.\n",
- ParName);
- }
- for(c=ParamN; c>j; c--) {
- cmds[c] = cmds[c-1];
- }
- cmds[j].Name = ParName;
- cmds[j].Type = va_arg(args, int);
- cmds[j].Val = va_arg(args, void *);
- switch(cmds[j].Type) {
- case CMDENUMTYPE: /* get the pointer to Enum_T struct */
- cmds[j].p = va_arg(args, void *);
- break;
- case CMDSUBRANGETYPE: /* get the two extremes */
- cmds[j].p = (void*) calloc(2, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- ((int*)cmds[j].p)[1] = va_arg(args, int);
- break;
- case CMDGTETYPE: /* get lower or upper bound */
- case CMDLTETYPE:
- cmds[j].p = (void*) calloc(1, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- break;
- case CMDSTRARRAYTYPE: /* get the separators string */
- cmds[j].p = (s=va_arg(args, char*))
- ? (void*)strdup(s) : 0;
- break;
- case CMDBOOLTYPE:
- cmds[j].Type = CMDENUMTYPE;
- cmds[j].p = BoolEnum;
- break;
- case CMDDOUBLETYPE: /* nothing else is needed */
- case CMDINTTYPE:
- case CMDSTRINGTYPE:
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "DeclareParam()", "Unknown Type",
- cmds[j].Type, "for parameter", cmds[j].Name);
- exit(1);
- }
- ParamN++;
- ParName = va_arg(args, char *);
- }
- cmds[ParamN].Name = NULL;
- va_end(args);
- return 0;
-}
-
-int GetParams(n, a, CmdFileName)
-int *n;
-char ***a;
-char *CmdFileName;
-{
- char *Line,
- *ProgName;
- int argc = *n;
- char **argv = *a,
- *s;
- FILE *fp;
- int IsPipe;
-
-#ifdef MSDOS
-#define PATHSEP '\\'
- char *dot = NULL;
-#else
-#define PATHSEP '/'
-#endif
-
- if(!(Line=malloc(LINSIZ))) {
- fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
- LINSIZ);
- exit(1);
- }
- if((ProgName=strrchr(*argv, PATHSEP))) {
- ++ProgName;
- } else {
- ProgName = *argv;
- }
-#ifdef MSDOS
- if(dot=strchr(ProgName, '.')) *dot = 0;
-#endif
- --argc;
- ++argv;
- for(;;) {
- if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
- CmdFileName = argv[0]+2;
- ++argv;
- --argc;
- }
- if(!CmdFileName) {
- break;
- }
- IsPipe = !strncmp(CmdFileName, "@@", 2);
- fp = IsPipe
- ? popen(CmdFileName+2, "r")
- : strcmp(CmdFileName, "-")
- ? fopen(CmdFileName, "r")
- : stdin;
- if(!fp) {
- fprintf(stderr, "Unable to open command file %s\n",
- CmdFileName);
- exit(1);
- }
- while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
- if(Scan(ProgName, cmds, Line)) {
- CmdError(Line);
- }
- }
- if(fp!=stdin) {
- if(IsPipe) pclose(fp); else fclose(fp);
- }
- CmdFileName = NULL;
- }
- while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
- *s = ' ';
- sprintf(Line, "%s/%s", ProgName, *argv+1);
- *s = '=';
- if(Scan(ProgName, cmds, Line)) CmdError(*argv);
- --argc;
- ++argv;
- }
- *n = argc;
- *a = argv;
-#ifdef MSDOS
- if(dot) *dot = '.';
-#endif
- free(Line);
- return 0;
-}
-
-int PrintParams(ValFlag, fp)
-int ValFlag;
-FILE *fp;
-{
- int i;
-
- fflush(fp);
- if(ValFlag) {
- fprintf(fp, "Parameters Values:\n");
- } else {
- fprintf(fp, "Parameters:\n");
- }
- for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
- fprintf(fp, "\n");
- fflush(fp);
- return 0;
-}
-
-int SPrintParams(a, pfx)
-char ***a,
- *pfx;
-{
- int l,
- n;
- Cmd_T *cmd;
-
- if(!pfx) pfx="";
- l = strlen(pfx);
- for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
- a[0] = calloc(n, sizeof(char*));
- for(n=0, cmd=cmds; cmd->Name; cmd++) {
- if(!cmd->ArgStr) continue;
- a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
- sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
- ++n;
- }
- return n;
-}
-
-static int CmdError(opt)
-char *opt;
-{
- fprintf(stderr, "Invalid option \"%s\"\n", opt);
- fprintf(stderr, "This program expectes the following parameters:\n");
- PrintParams(FALSE, stderr);
- exit(0);
-}
-
-static int PrintParam(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- fprintf(fp, "%4s", "");
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDENUMTYPE:
- PrintEnum(cmd, ValFlag, fp);
- break;
- case CMDINTTYPE:
- case CMDSUBRANGETYPE:
- case CMDGTETYPE:
- case CMDLTETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDSTRINGTYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- if(*(char **)cmd->Val) {
- fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
- } else {
- fprintf(fp, ": %s", "NULL");
- }
- }
- fprintf(fp, "\n");
- break;
- case CMDSTRARRAYTYPE:
- PrintStrArray(cmd, ValFlag, fp);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "PrintParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- return 0;
-}
-
-static char *GetLine(fp, n, Line)
-FILE *fp;
-int n;
-char *Line;
-{
- int j,
- l,
- offs=0;
-
- for(;;) {
- if(!fgets(Line+offs, n-offs, fp)) {
- return NULL;
- }
- if(Line[offs]=='#') continue;
- l = strlen(Line+offs)-1;
- Line[offs+l] = 0;
- for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
- ;
- if(l<1) continue;
- if(j > offs) {
- char *s = Line+offs,
- *q = Line+j;
-
- while((*s++=*q++))
- ;
- }
- if(Line[offs+l-1]=='\\') {
- offs += l;
- Line[offs-1] = ' ';
- } else {
- break;
- }
- }
- return Line;
-}
-
-static int Scan(ProgName, cmds, Line)
-char *ProgName,
- *Line;
-Cmd_T *cmds;
-{
- char *q,
- *p;
- int i,
- hl,
- HasToMatch = FALSE,
- c0,
- c;
-
- p = Line+strspn(Line, SepString);
- if(!(hl=strcspn(p, SepString))) {
- return 0;
- }
- if((q=strchr(p, '/')) && q-p<hl) {
- *q = 0;
- if(strcmp(p, ProgName)) {
- *q = '/';
- return 0;
- }
- *q = '/';
- HasToMatch=TRUE;
- p = q+1;
- }
- if(!(hl = strcspn(p, SepString))) {
- return 0;
- }
- c0 = p[hl];
- p[hl] = 0;
- for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
- ;
- p[hl] = c0;
- if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
- return HasToMatch && c;
-}
-
-static int SetParam(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- if(!*s && cmd->Type != CMDSTRINGTYPE) {
- fprintf(stderr,
- "WARNING: No value specified for parameter \"%s\"\n",
- cmd->Name);
- return 0;
- }
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
- fprintf(stderr,
- "Float value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDENUMTYPE:
- SetEnum(cmd, s);
- break;
- case CMDINTTYPE:
- if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDSTRINGTYPE:
- *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
- ? strdup(s)
- : 0;
- break;
- case CMDSTRARRAYTYPE:
- SetStrArray(cmd, s);
- break;
- case CMDGTETYPE:
- SetGte(cmd, s);
- break;
- case CMDLTETYPE:
- SetLte(cmd, s);
- break;
- case CMDSUBRANGETYPE:
- SetSubrange(cmd, s);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "SetParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- cmd->ArgStr = strdup(s);
- return 0;
-}
-
-static int SetEnum(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && !strcmp(s, en->Name)) {
- *(int *) cmd->Val = en->Idx;
- return 0;
- }
- }
- return EnumError(cmd, s);
-}
-
-static int SetSubrange(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
- return SubrangeError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetGte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n<*(int *)cmd->p) {
- return GteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetStrArray(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- *(char***)cmd->Val = str2array(s, (char*)cmd->p);
- return 0;
-}
-
-static int SetLte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n > *(int *)cmd->p) {
- return LteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int EnumError(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- fprintf(stderr,
- "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
- fprintf(stderr, "Valid values are:\n");
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name) {
- fprintf(stderr, " %s\n", en->Name);
- }
- }
- fprintf(stderr, "\n");
- exit(1);
-}
-
-static int GteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be greater than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int LteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be less than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int SubrangeError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values range from %d to %d\n",
- *(int *)cmd->p, *((int *)cmd->p+1));
- exit(1);
-}
-
-static int PrintEnum(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- Enum_T *en;
-
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && en->Idx==*(int *)cmd->Val) {
- fprintf(fp, ": %s", en->Name);
- }
- }
- }
- fprintf(fp, "\n");
- return 0;
-}
-
-static int PrintStrArray(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- char *indent,
- **s = *(char***)cmd->Val;
- int l = 4+strlen(cmd->Name);
-
- fprintf(fp, "%s", cmd->Name);
- indent = malloc(l+2);
- memset(indent, ' ', l+1);
- indent[l+1] = 0;
- if(ValFlag) {
- fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
- if(s) while(*s) {
- fprintf(fp, "\n%s %s", indent, *s++);
- }
- }
- free(indent);
- fprintf(fp, "\n");
- return 0;
-}
-
-static char **str2array(s, sep)
-char *s,
- *sep;
-{
- char *p,
- **a;
- int n = 0,
- l;
-
- if(!sep) sep = SepString;
- p = s += strspn(s, sep);
- while(*p) {
- p += strcspn(p, sep);
- p += strspn(p, sep);
- ++n;
- }
- a = calloc(n+1, sizeof(char *));
- p = s;
- n = 0;
- while(*p) {
- l = strcspn(p, sep);
- a[n] = malloc(l+1);
- memcpy(a[n], p, l);
- a[n][l] = 0;
- ++n;
- p += l;
- p += strspn(p, sep);
- }
- return a;
-}
diff --git a/scripts/training/MGIZA/src/cmd.h b/scripts/training/MGIZA/src/cmd.h
deleted file mode 100644
index 6d39753..0000000
--- a/scripts/training/MGIZA/src/cmd.h
+++ /dev/null
@@ -1,52 +0,0 @@
-
-// $Id: cmd.h 1307 2007-03-14 22:22:36Z hieuhoang1972 $
-
-#if !defined(CMD_H)
-
-#define CMD_H
-
-#define CMDDOUBLETYPE 1
-#define CMDENUMTYPE 2
-#define CMDINTTYPE 3
-#define CMDSTRINGTYPE 4
-#define CMDSUBRANGETYPE 5
-#define CMDGTETYPE 6
-#define CMDLTETYPE 7
-#define CMDSTRARRAYTYPE 8
-#define CMDBOOLTYPE 9
-
-typedef struct {
- char *Name;
- int Idx;
-} Enum_T;
-
-typedef struct {
- int Type;
- char *Name,
- *ArgStr;
- void *Val,
- *p;
-} Cmd_T;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__STDC__) || defined(WIN32)
-int DeclareParams(char *, ...);
-#else
-
-int DeclareParams();
-#endif
-
-int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
-
-#ifdef __cplusplus
-}
-#endif
-#endif
-
-
-
diff --git a/scripts/training/MGIZA/src/collCounts.cpp b/scripts/training/MGIZA/src/collCounts.cpp
deleted file mode 100644
index 698e915..0000000
--- a/scripts/training/MGIZA/src/collCounts.cpp
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
-
- Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
- This file is part of GIZA++ ( extension of GIZA ).
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#include "alignment.h"
-#include "transpair_model3.h"
-#include <map>
-#include "collCounts.h"
-#include "MoveSwapMatrix.h"
-#include "D5Tables.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-#include "Parameter.h"
-
-extern float COUNTINCREASE_CUTOFF_AL;
-// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
-template<class TRANSPAIR> int collectCountsOverNeighborhood(
- const MoveSwapMatrix<TRANSPAIR>&msc, LogProb ascore,
- Array2<LogProb,Vector<LogProb> >&dtcount,
- Array2<LogProb,Vector<LogProb> >&ncount, LogProb&p1count,
- LogProb&p0count, LogProb&total_count) {
- int nAl=0;
- const PositionIndex l=msc.get_l(), m=msc.get_m();
- Array2<LogProb,Vector<LogProb> > cmove(l+1, m+1), cswap(l+1, m+1);
- Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
- LogProb total_move, total_swap;
- if (msc.isCenterDeleted()==0) {
- total_move+=ascore;
- nAl++;
- }
- for (PositionIndex j=1; j<=m; j++) {
- for (PositionIndex i=0; i<=l; i++) {
- if (msc(j)!=i && !msc.isDelMove(i, j) ) {
- LogProb newscore=ascore*msc.cmove(i, j);
- total_move+=newscore;
- nAl++;
- cmove(i, j)+=newscore;
- negmove[j]+=newscore;
- plus1fert[i]+=newscore;
- minus1fert[msc(j)]+=newscore;
- }
- }
- }
- for (PositionIndex j1=1; j1<=m; j1++) {
- for (PositionIndex j2=j1+1; j2<=m; j2++) {
- if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
- LogProb newscore=ascore*msc.cswap(j1, j2);
- total_swap+=newscore;
- nAl++;
- cswap(msc(j1), j2)+=newscore;
- cswap(msc(j2), j1)+=newscore;
- negswap[j1]+=newscore;
- negswap[j2]+=newscore;
- }
- }
- }
- total_count+=total_move+total_swap;
- for (PositionIndex j=1; j<=m; j++)
- for (PositionIndex i=0; i<=l; i++)
- dtcount(i, j) += ((i==msc(j)) ? (total_count
- -(negmove[j]+negswap[j])) : (cswap(i, j)+cmove(i, j)));
- for (PositionIndex i=1; i<=l; i++) {
- LogProb temp=minus1fert[i]+plus1fert[i];
- if (msc.fert(i)<MAX_FERTILITY)
- ncount(i, msc.fert(i))+=total_count-temp;
- if (msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
- ncount(i, msc.fert(i)-1)+=minus1fert[i];
- else if (minus1fert[i]!=0.0)
- cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' '
- << msc.fert(i)<< endl;
- if (msc.fert(i)+1<MAX_FERTILITY)
- ncount(i, msc.fert(i)+1)+=plus1fert[i];
- }
- LogProb temp=minus1fert[0]+plus1fert[0];
- p1count += (total_count-temp)*(LogProb)msc.fert(0);
- p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
- if (msc.fert(0)>0) {
- p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
- p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
- } else if (minus1fert[0]!=0.0)
- cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
- if (int(m)-2*(int(msc.fert(0))+1)>=0) {
- p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
- p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
- }
- msc.check();
- return nAl;
-}
-;
-
-template<class TRANSPAIR> double collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&, LogProb, void*) {
- return 0.0;
-}
-
-template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
- const TRANSPAIR&ef, LogProb normalized_ascore, d4model*d4Table) {
- Mmsc.check();
- const PositionIndex m=msc.get_m(), l=msc.get_l();
- for (PositionIndex j=1; j<=m; ++j)
- if (msc(j)!=0)
- if (msc.get_head(msc(j))==j) {
- int ep=msc.prev_cept(msc(j));
- d4Table->augCountRef_first(j, msc.get_center(ep),
- d4Table->ewordclasses->getClass(ef.get_es(ep)),
- d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
- } else {
- //massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
- d4Table->augCountRef_bigger(j, msc.prev_in_cept(j), 0,
- d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
- }
-}
-
-template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
- const TRANSPAIR&ef, LogProb normalized_ascore, d5model*d5Table) {
- Mmsc.check();
- _collectCountsOverNeighborhoodForSophisticatedModels(Mmsc, msc, ef,
- normalized_ascore, &d5Table->d4m);
- Mmsc.check();
- const PositionIndex m=msc.get_m(), l=msc.get_l();
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for (PositionIndex i=1; i<=l; i++) {
- PositionIndex cur_j=msc.als_i[i];
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if (cur_j) { // process first word of cept
- k++;
- d5Table->getCountRef_first(vacancies(vac, cur_j), vacancies(vac,
- msc.get_center(prev_cept)),
- d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
- vac_all-msc.fert(i)+k) +=normalized_ascore;
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
- Mmsc.check();
- prev_j=cur_j;
- cur_j=msc.als_j[cur_j].next;
- }
- while (cur_j) { // process following words of cept
- k++;
- int vprev=vacancies(vac, prev_j);
- d5Table->getCountRef_bigger(vacancies(vac, cur_j), vprev,
- d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
- vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
- vac_all--;
- vac[cur_j]=1;
- Mmsc.check();
- prev_j=cur_j;
- cur_j=msc.als_j[cur_j].next;
- }
- assert(k==msc.fert(i));
- if (k)
- prev_cept=i;
- }
- assert(vac_all==msc.fert(0));
-}
-
-extern int NumberOfAlignmentsInSophisticatedCountCollection;
-template<class TRANSPAIR, class MODEL> double collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&msc, LogProb normalized_ascore,
- MODEL*d5Table) {
- const PositionIndex m=msc.get_m(), l=msc.get_l();
- alignment x(msc);
- double sum=0;
- msc.check();
- if ( !msc.isCenterDeleted() ) {
- _collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc, x,
- msc.get_ef(), normalized_ascore, d5Table);
- NumberOfAlignmentsInSophisticatedCountCollection++;
- sum+=normalized_ascore;
- }
- msc.check();
- for (WordIndex j=1; j<=m; j++)
- for (WordIndex i=0; i<=l; i++) {
- WordIndex old=x(j);
- if (i!=old&& !msc.isDelMove(i, j) ) {
- msc.check();
- double c=msc.cmove(i, j)*normalized_ascore;
- if (c > COUNTINCREASE_CUTOFF_AL) {
- x.set(j, i);
- _collectCountsOverNeighborhoodForSophisticatedModels<
- TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
- NumberOfAlignmentsInSophisticatedCountCollection++;
- x.set(j, old);
- sum+=c;
- }
- msc.check();
- }
- }
- for (PositionIndex j1=1; j1<=m; j1++) {
- for (PositionIndex j2=j1+1; j2<=m; j2++) {
- if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
- double c=msc.cswap(j1, j2)*normalized_ascore;
- msc.check();
- if (c > COUNTINCREASE_CUTOFF_AL) {
- int old1=msc(j1), old2=msc(j2);
- x.set(j1, old2);
- x.set(j2, old1);
- _collectCountsOverNeighborhoodForSophisticatedModels<
- TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
- NumberOfAlignmentsInSophisticatedCountCollection++;
- x.set(j1, old1);
- x.set(j2, old2);
- sum+=c;
- }
- msc.check();
- }
- }
- }
- msc.check();
- return sum;
-}
-
-template<class TRANSPAIR, class MODEL> int collectCountsOverNeighborhood(
- const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
- Vector<WordIndex>&es, Vector<WordIndex>&fs, tmodel<COUNT,PROB>&tTable,
- amodel<COUNT>&aCountTable, amodel<COUNT>&dCountTable,
- nmodel<COUNT>&nCountTable, SyncDouble&p1count, SyncDouble&p0count,
- LogProb&_total, float count, bool addCounts, MODEL*d4Table) {
- int nAl=0;
- const PositionIndex l=es.size()-1, m=fs.size()-1;
- Array2<LogProb,Vector<LogProb> > dtcount(l+1, m+1), ncount(l+1,
- MAX_FERTILITY+1);
- LogProb p0=0, p1=0, all_total=0;
- for (unsigned int i=0; i<smsc.size(); ++i) {
- LogProb this_total=0;
- nAl+=collectCountsOverNeighborhood(*smsc[i].first, smsc[i].second,
- dtcount, ncount, p1, p0, this_total);
- all_total+=this_total;
- }
- _total=all_total;
- all_total/=(double)count;
- double sum2=0;
- if (addCounts && d4Table) {
- for (unsigned int i=0; i<smsc.size(); ++i) {
- //for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
- // (*smsc[i].first).cmove(ii,j);
- sum2+=collectCountsOverNeighborhoodForSophisticatedModels(
- *smsc[i].first, smsc[i].second/all_total, d4Table);
- }
- if (!(fabs(count-sum2)<0.05))
- cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2
- << ")\n";
- }
-
- /**
- NOTE! HERE IS THE UPDATE PROCESS!
- */
- if (addCounts) {
- for (PositionIndex i=0; i<=l; i++) {
- for (PositionIndex j=1; j<=m; j++) {
- LogProb ijadd=dtcount(i, j)/all_total;
- if (ijadd>COUNTINCREASE_CUTOFF_AL) {
- tTable.incCount(es[i], fs[j], ijadd);
- dCountTable.addValue(j, i, l, m, ijadd);
- aCountTable.addValue(i, j, l, m, ijadd);
- }
- }
- if (i>0)
- for (PositionIndex n=0; n<MAX_FERTILITY; n++)
- nCountTable.addValue(es[i], n, ncount(i, n)/all_total);
- }
- p0count+=p0/all_total;
- p1count+=p1/all_total;
- }
- return nAl;
-}
-
diff --git a/scripts/training/MGIZA/src/collCounts.h b/scripts/training/MGIZA/src/collCounts.h
deleted file mode 100644
index 9a0529b..0000000
--- a/scripts/training/MGIZA/src/collCounts.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef collCounts_h_defined
-#define collCounts_h_defined
-#include "alignment.h"
-#include "transpair_model3.h"
-#include <map>
-#include "MoveSwapMatrix.h"
-#include "D4Tables.h"
-#include "transpair_model4.h"
-
-class OneMoveSwap
-{
- public:
- short type;
- short a,b;
- OneMoveSwap(short _type,short _a,short _b)
- : type(_type),a(_a),b(_b)
- {}
- OneMoveSwap()
- : type(0){}
-};
-
-inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
-{
- if(a.type<b.type)return 1;
- else if(b.type<a.type)return 0;
- else if(a.a<b.a)return 1;
- else if(b.a<a.a)return 0;
- else return a.b<b.b;
-}
-
-inline bool operator==(const OneMoveSwap&a,const OneMoveSwap&b)
-{
- return a.type==b.type&&a.a==b.a&&a.b==b.b;
-}
-
-inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
-{
- return out << '(' << o.type << "," << o.a << "," << o.b << ")";
-}
-
-inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
-{
- for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
- cout << *i << ' ';
- return out;
-}
-
-bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
-
-template<class TRANSPAIR,class MODEL>
-int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
- Vector<WordIndex>&es,
- Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
- amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
- nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
- LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
-
-#endif
diff --git a/scripts/training/MGIZA/src/common.h b/scripts/training/MGIZA/src/common.h
deleted file mode 100644
index 511c1f1..0000000
--- a/scripts/training/MGIZA/src/common.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/*!
-This is global definition for all main files of the program set
-*/
diff --git a/scripts/training/MGIZA/src/d4norm.cxx b/scripts/training/MGIZA/src/d4norm.cxx
deleted file mode 100644
index a790a62..0000000
--- a/scripts/training/MGIZA/src/d4norm.cxx
+++ /dev/null
@@ -1,128 +0,0 @@
-// D4 Normalization executable
-
-#include <iostream>
-#include <strstream>
-#include <string>
-#include "hmm.h"
-#include "D4Tables.h"
-#include "Parameter.h"
-#define ITER_M2 0
-#define ITER_MH 5
-GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
-GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
-GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
-GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
-
-GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
-GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
-
-GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
-GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
-GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
-
-GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
-GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
-
-GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
-
-/**
-Here are parameters to support Load models and dump models
-*/
-
-GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
-GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
-GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
-/// END
-short OutputInAachenFormat=0;
-bool Transfer=TRANSFER;
-bool Transfer2to3=0;
-short NoEmptyWord=0;
-bool FEWDUMPS=0;
-GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
-GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
- "maximal fertility for fertility models", PARLEV_EM, 10);
-
-using namespace std;
-string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
- TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
- SourceVocabClassesFilename, TargetVocabClassesFilename,
- a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-
-
-int main(int argc, char* argv[]){
- if(argc < 5){
- cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
- return 1;
- }
- WordClasses ewc,fwc;
- d4model d4m(MAX_SENTENCE_LENGTH,ewc,fwc);
- Vector<WordEntry> evlist,fvlist;
- vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
- TargetVocabFilename = argv[2];
- SourceVocabFilename = argv[1];
- eTrainVcbList.setName(argv[1]);
- fTrainVcbList.setName(argv[2]);
- eTrainVcbList.readVocabList();
- fTrainVcbList.readVocabList();
- SourceVocabClassesFilename = argv[1];
- TargetVocabClassesFilename = argv[2];
- SourceVocabClassesFilename += ".classes";
- TargetVocabClassesFilename += ".classes";
- d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str(),eTrainVcbList,fTrainVcbList);
- // Start iteration:
- for(int i =4; i< argc ; i++){
- string name = argv[i];
- string nameA = name ;
- string nameB = name + ".b";
- if(d4m.augCount(nameA.c_str(),nameB.c_str())){
- cerr << "Loading (d4) table " << nameA << "/" << nameB << " OK" << endl;
-
- }else{
- cerr << "ERROR Loading (d) table " << nameA << " " << nameB << endl;
- }
- }
-
- d4m.normalizeTable();
- string DiffOPath = argv[3];
- string diff1 = DiffOPath;
- string diff2 = DiffOPath+".b";
- cerr << "Outputing d4 table to " << diff1 << " " << diff2;
- d4m.printProbTable(diff1.c_str(),diff2.c_str());
-
-
-}
-
-// Some utility functions to get it compile..
-
-ofstream logmsg;
-const string str2Num(int n) {
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
-}
-double LAMBDA=1.09;
-
-Vector<map< pair<int,int>,char > > ReferenceAlignment;
-
-double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
- const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
- int&eventsMissing, int&eventsToomuch, int pair_no){
- return 0;
- }
-
-void printGIZAPars(ostream&out){
-}
-
diff --git a/scripts/training/MGIZA/src/defs.h b/scripts/training/MGIZA/src/defs.h
deleted file mode 100644
index e94addd..0000000
--- a/scripts/training/MGIZA/src/defs.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _defs_h
-#define _defs_h 1
-#include <string>
-#include <math.h>
-#include <limits.h>
-
-const int TRANSFER_SIMPLE=1;
-const int TRANSFER=0;
-
-const unsigned int MAX_SENTENCE_LENGTH_ALLOWED=101;
-const int TRAIN_BUFFER_SIZE= 50000;
-//#ifdef WORDINDEX_WITH_4_BYTE
-typedef unsigned int WordIndex;
-const unsigned int MAX_VOCAB_SIZE=UINT_MAX;
-typedef unsigned int PositionIndex;
-//#else
-//typedef unsigned short WordIndex;
-//const unsigned int MAX_VOCAB_SIZE=USHRT_MAX;
-//typedef unsigned short PositionIndex;
-//#endif
-extern WordIndex MAX_FERTILITY;
-
-const int MAX_W=457979;
-extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
-
-typedef float PROB ;
-typedef float COUNT ;
-
-class LogProb {
- private:
- double x ;
- public:
- LogProb():x(0){}
- LogProb(double y):x(y){}
- LogProb(float y):x(y){}
- LogProb(int y):x(y){}
- LogProb(WordIndex y):x(y){}
- operator double() const {return x;}
- LogProb operator *= (double y) { x *= y ; return *this;}
- LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
- LogProb operator /= (double y) { x /= y ; return *this;}
- LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
- LogProb operator += (double y) { x += y ; return *this;}
- LogProb operator += (LogProb y) { x += y.x ; return *this;}
-};
-
-const int PARLEV_ITER=1;
-const int PARLEV_OPTHEUR=2;
-const int PARLEV_OUTPUT=3;
-const int PARLEV_SMOOTH=4;
-const int PARLEV_EM=5;
-const int PARLEV_MODELS=6;
-const int PARLEV_SPECIAL=7;
-const int PARLEV_INPUT=8;
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/file_spec.h b/scripts/training/MGIZA/src/file_spec.h
deleted file mode 100644
index 945aa5b..0000000
--- a/scripts/training/MGIZA/src/file_spec.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef FILE_SPEC_H
-#define FILE_SPEC_H
-
-#include <time.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-/* This function returns a string, locally called file_spec. This
- string is the concatenation of the date and time of execution
- and the user who is performing the execution */
-/* Originally implemented in C by Yaser Al-Onaizan;
- editions for C++ and formatting by Noah A. Smith, 9 July 1999 */
-
-char *Get_File_Spec (){
- struct tm *local;
- time_t t;
- const char *user;
- char time_stmp[57];
- char *file_spec = 0;
-
- t = time(NULL);
- local = localtime(&t);
-
- sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
- (local->tm_mon + 1), local->tm_mday, local->tm_hour,
- local->tm_min, local->tm_sec);
-#ifdef WIN32
- user = "WINUSER";
-#else
- user = getenv("USER");
-#endif
-
- file_spec = (char *)malloc(sizeof(char) *
- (strlen(time_stmp) + strlen(user) + 1));
- file_spec[0] = '\0';
- strcat(file_spec, time_stmp) ;
- strcat(file_spec, user);
- return file_spec;
-}
-
-#endif
diff --git a/scripts/training/MGIZA/src/getSentence.cpp b/scripts/training/MGIZA/src/getSentence.cpp
deleted file mode 100644
index ec1ad9d..0000000
--- a/scripts/training/MGIZA/src/getSentence.cpp
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* --------------------------------------------------------------------------*
- * *
- * Module : getSentece *
- * *
- * Method Definitions File: getSentence.cc *
- * *
- * Objective: Defines clases and methods for handling I/O for the parallel *
- * corpus. *
- *****************************************************************************/
-
-
-#include "getSentence.h"
-#include <iostream>
-#include <strstream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <boost/algorithm/string.hpp>
-#include <vector>
-#include <set>
-#include <pthread.h>
-#include "Parameter.h"
-#include "errno.h"
-
-int PrintedTooLong=0;
-
-/* -------------- Method Defnitions for Class sentenceHandler ---------------*/
-
-GLOBAL_PARAMETER(double,ManlexMAX_MULTIPLICITY,"manlexMAX_MULTIPLICITY","",PARLEV_EM,20.0);
-GLOBAL_PARAMETER(double,Manlexfactor1,"manlexfactor1","",PARLEV_EM,0.0);
-GLOBAL_PARAMETER(double,Manlexfactor2,"manlexfactor2","",PARLEV_EM,0.0);
-
-sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
- vcbList* flist) : realCount(0)
- // This method is the constructor of the class, it also intitializes the
- // sentence pair sequential number (count) to zero.
-{
- readsent_mutex=new boost::mutex();
- setprob_mutex = new boost::mutex();
-
- position = 0;
- readflag = false ;
- allInMemory = false ;
- inputFilename = filename ;
- inputFile = new ifstream(filename);
- pair_no = 0 ;
- if(!(*inputFile)){
- cerr << "\nERROR:(a) Cannot open " << filename;
- exit(1);
- }
- currentSentence = 0;
- totalPairs1 = 0 ;
- totalPairs2 =0;
- pair_no = 0 ;
- noSentInBuffer = 0 ;
- Buffer.clear();
- bool isNegative=0;
- std::set<WordIndex> evoc,fvoc;
- evoc.insert(0);
- fvoc.insert(0);
- if (elist && flist){
- cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
- sentPair s ;
- while (getNextSentence(s, elist, flist))
- {
- for(int i = 0 ; i< s.eSent.size() ; i++){
- evoc.insert(s.eSent[i]);
- }
- for(int i = 0 ; i< s.fSent.size() ; i++){
- fvoc.insert(s.fSent[i]);
- }
- totalPairs1++;
- totalPairs2+=s.realCount;
- // NOTE: this value might change during training
- // for words from the manual dictionary, yet this is ignored!
-
- if( s.noOcc<0 )
- isNegative=1;
- }
- }
- if( isNegative==1 )
- {
- cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
- realCount=new Vector<double>(totalPairs1,1.0);
- }
- else
- realCount=0;
- elist->compact(evoc);
- flist->compact(fvoc);
-}
-
-sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
- vcbList* flist,std::set<WordIndex>& eapp, std::set<WordIndex>& fapp) : realCount(0)
- // This method is the constructor of the class, it also intitializes the
- // sentence pair sequential number (count) to z
-{
- readsent_mutex=new boost::mutex();
- setprob_mutex=new boost::mutex();
- position = 0;
- readflag = false ;
- allInMemory = false ;
- inputFilename = filename ;
- inputFile = new ifstream(filename);
- pair_no = 0 ;
- if(!(*inputFile)){
- cerr << "\nERROR:(a) Cannot open " << filename;
- exit(1);
- }
- currentSentence = 0;
- totalPairs1 = 0 ;
- totalPairs2 =0;
- pair_no = 0 ;
- noSentInBuffer = 0 ;
- Buffer.clear();
- bool isNegative=0;
- if (elist && flist){
- cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
- sentPair s ;
- while (getNextSentence(s, elist, flist))
- {
- int k;
- for(k=0;k<s.eSent.size();k++){
- eapp.insert(s.eSent[k]);
- }
- for(k=0;k<s.fSent.size();k++){
- fapp.insert(s.fSent[k]);
- }
- totalPairs1++;
- totalPairs2+=s.realCount;
- // NOTE: this value might change during training
- // for words from the manual dictionary, yet this is ignored!
-
- if( s.noOcc<0 )
- isNegative=1;
- }
- }
- if( isNegative==1 )
- {
- cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
- realCount=new Vector<double>(totalPairs1,1.0);
- }
- else
- realCount=0;
-}
-
-void sentenceHandler::rewind()
-{
- readsent_mutex->lock();
- position = 0;
- currentSentence = 0;
- readflag = false ;
- if (!allInMemory ||
- !(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)){
- // check if the buffer doe not already has the first chunk of pairs
- if (Buffer.size() > 0)
- cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
- // totalPairs = 0 ;
- pair_no = 0 ;
- noSentInBuffer = 0 ;
- Buffer.clear();
- }
- if (!allInMemory){
- delete inputFile;
- inputFile = new ifstream(inputFilename);
- if(!(*inputFile)){
- cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
- }
- }
- readsent_mutex->unlock();
-}
-
-
-int sentenceHandler::getNextSentence(sentPair& sent, vcbList* elist, vcbList* flist)
-{
- readsent_mutex->lock();
-
- do{
- sentPair s ;
- if (readflag){
- cerr << "Attempting to read from the end of corpus, rewinding\n";
- //rewind();
- break;
- }
- if (currentSentence >= noSentInBuffer){
- if (allInMemory)
- break;
- /* no more sentences in buffer */
- noSentInBuffer = 0 ;
- currentSentence = 0 ;
- Buffer.clear();
- cout << "Reading more sentence pairs into memory ... \n";
- while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)){
- if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)){
- cerr << "WARNING: The following sentence pair has source/target sentence length ration more than\n"<<
- "the maximum allowed limit for a source word fertility\n"<<
- " source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
- " ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
- MAX_FERTILITY-1 << '\n';
- cerr << "Shortening sentence \n";
- cerr << s;
- s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
- s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
- }
- Buffer.push_back(s) ;
- if (elist && flist){
- if ((*elist).size() > 0)
- for (WordIndex i= 0 ; i < s.eSent.size() ; i++){
- if (s.eSent[i] >= (*elist).uniqTokens()){
- if( PrintedTooLong++<100)
- cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
- exit(-1);
- }
- (*elist).incFreq(s.eSent[i], s.realCount);
- }
- if ((*flist).size() > 0)
- for (WordIndex j= 1 ; j < s.fSent.size() ; j++){
- if (s.fSent[j] >= (*flist).uniqTokens()){
- cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
- exit(-1);
- }
- (*flist).incFreq(s.fSent[j], s.realCount);
- }
- }
- noSentInBuffer++;
- }
- if (inputFile->eof()){
- allInMemory = (Buffer.size() >= 1 &&
- Buffer[currentSentence].sentenceNo == 1) ;
- if (allInMemory)
- cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
- " sentence pairs.\n";
- }
- }
- if(noSentInBuffer <= 0 ){
- //cerr << "# sent in buffer " << noSentInBuffer << '\n';
- readflag = true ;
- break;
- }
- sent = Buffer[currentSentence++] ;
- position ++;
- if( sent.noOcc<0 && realCount ){
- if( Manlexfactor1 && sent.noOcc==-1.0 )
- sent.realCount=Manlexfactor1;
- else if( Manlexfactor2 && sent.noOcc==-2.0 )
- sent.realCount=Manlexfactor2;
- else
- sent.realCount=(*realCount)[sent.getSentenceNo()-1];
- }
- readsent_mutex->unlock();
- return position ;
- }while(false);
- readsent_mutex->unlock();
- return 0;
-}
-bool sentenceHandler::readNextSentence(sentPair& sent)
- /* This method reads in a new pair of sentences, each pair is read from the
- corpus file as line triples. The first line the no of times this line
- pair occured in the corpus, the second line is the source sentence and
- the third is the target sentence. The sentences are represented by a space
- separated positive integer token ids. */
-{
-
- string line;
- bool fail(false) ;
-
- sent.clear();
- vector<string> splits;
- if (getline(*inputFile, line)){
-
- boost::algorithm::split(splits,line,boost::algorithm::is_any_of("|#*"));
-
- if(splits.size() == 1 || splits.size() == 0){
- // continue, no problem
-
- }else if(splits.size()==3){
- line = splits[0];
- }else{
- fail = true;
- return false;
- }
-
- istrstream buffer(line.c_str());
- buffer >> sent.noOcc;
- if( sent.noOcc<0 )
- {
- if( realCount )
- {
- if( Manlexfactor1 && sent.noOcc==-1.0 )
- sent.realCount=Manlexfactor1;
- else if( Manlexfactor2 && sent.noOcc==-2.0 )
- sent.realCount=Manlexfactor2;
- else
- {
- sent.realCount=(*realCount)[pair_no];
- }
- }
- else
- sent.realCount=1.0;
- }
- else
- sent.realCount=sent.noOcc;
- }
- else {
- fail = true ;;
- }
- if (splits.size()==3 || getline(*inputFile, line)){
- if(splits.size()==3){
- line = splits[1];
- }
- istrstream buffer(line.c_str());
- WordIndex w; // w is a local variabe for token id
- sent.eSent.push_back(0); // each source word is assumed to have 0 ==
- // a null word (id 0) at the begining of the sentence.
- while(buffer>>w){ // read source sentece , word by word .
- if (sent.eSent.size() < MAX_SENTENCE_LENGTH)
- sent.eSent.push_back(w);
- else {
- if( PrintedTooLong++<100)
- cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
- //cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
- //cerr << "The following sentence will be truncated\n" << line;
- break ;
- }
- }
- }
- else {
- fail = true ;
- }
- if (splits.size()==3 ||getline(*inputFile, line)){
- if(splits.size()==3){
- line = splits[2];
- }
- istrstream buffer(line.c_str());
- WordIndex w; // w is a local variabe for token id
- sent.fSent.push_back(0); //0 is inserted for program uniformity
- while(buffer>>w){ // read target sentece , word by word .
- if (sent.fSent.size() < MAX_SENTENCE_LENGTH)
- sent.fSent.push_back(w);
- else {
- if( PrintedTooLong++<100)
- cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
- //cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
- //cerr << "The following sentence will be truncated\n" << line;
- break ;
- }
- }
- }
- else {
- fail = true ;
- }
- if (fail){
- sent.eSent.clear();
- sent.fSent.clear();
- sent.sentenceNo = 0 ;
- sent.noOcc = 0 ;
- sent.realCount=0;
- return(false);
- }
- if( sent.eSent.size()==1||sent.fSent.size()==1 )
- cerr << "ERROR: Forbidden zero sentence length " << sent.sentenceNo << endl;
- sent.sentenceNo = ++pair_no;
- if(pair_no % 100000 == 0)
- cout << "[sent:" << sent.sentenceNo << "]"<< '\n';
- return true;
-}
-
-double optimize_lambda(Vector<double>&vd)
-{
- Vector<double> l;
- for(double lambda=1.0;lambda<ManlexMAX_MULTIPLICITY;lambda+=0.33)
- {
- double prod=0.0;
- for(unsigned int i=0;i<vd.size();++i)
- {
- prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
- }
- l.push_back(fabs(prod-1.0));
- }
- double lam=double(min_element(l.begin(),l.end())-l.begin())*0.33+1.0;
- if( lam<1.0 )
- {
- cerr << "ERROR: lambda is smaller than one: " << lam << endl;
- for(unsigned int i=0;i<vd.size();++i)
- cerr << vd[i] << ' ';
- cerr << endl;
- }
- return lam;
-}
-
-void sentenceHandler::setProbOfSentence(const sentPair&s,double d)
-{
-
- if( realCount==0 )
- return;
- else{
- setprob_mutex->lock();
- if( s.noOcc<=0 )
- {
- double ed=exp(d);
- if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) )
- {
- double lambda=optimize_lambda(oldProbs);
- for(unsigned int i=0;i<oldPairs.size();++i)
- {
- if( oldProbs[i]<1e-5 )
- (*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
- else
- (*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
- }
- oldPairs.clear();
- oldProbs.clear();
- }
- oldPairs.push_back(s);
- oldProbs.push_back(ed);
- }
- setprob_mutex->unlock();
- }
-}
-
-/* ------------- End of Method Definition of Class sentenceHandler ----------*/
-
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/getSentence.h b/scripts/training/MGIZA/src/getSentence.h
deleted file mode 100644
index d7d4e53..0000000
--- a/scripts/training/MGIZA/src/getSentence.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* --------------------------------------------------------------------------*
- * *
- * Module : getSentence *
- * *
- * Prototypes File: getSentence.h *
- * *
- * Objective: Defines clases and methods for handling I/O for the parallel *
- * corpus. *
- *****************************************************************************/
-
-
-
-
-
-#ifndef _sentenceHandler_h
-#define _sentenceHandler_h 1
-
-
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <set>
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include "Vector.h"
-#include "defs.h"
-#include "vocab.h"
-#include "Globals.h"
-#include <boost/thread/mutex.hpp>
-/*----------------------- Class Prototype Definition ------------------------*
- Class Name: sentenceHandleer
- Objective: This class is defined to handle training sentece pairs from the
- parallel corpus. Each pair has: a target sentece, called here French; a
- source sentece, called here English sentece; and an integer number denoting
- the number of times this pair occured in trining corpus. Both source and
- target senteces are represented as integer vector (variable size arrays),
- each entry is a numeric value which is the token id for the particular token
- in the sentece.
-
- *---------------------------------------------------------------------------*/
-
-class sentPair{
- public:
- int sentenceNo ;
- float noOcc;
- float realCount;
- Vector<WordIndex> eSent ;
- Vector<WordIndex> fSent;
-
- public:
- sentPair(){};
- void clear(){ eSent.clear(); fSent.clear(); noOcc=0; realCount=0; sentenceNo=0;};
- const Vector<WordIndex>&get_eSent()const
- { return eSent; }
- const Vector<WordIndex>&get_fSent()const
- { return fSent; }
- int getSentenceNo()const
- { return sentenceNo; }
- double getCount()const
- { return realCount; }
-
-};
-
-inline ostream&operator<<(ostream&of,const sentPair&s)
-{
- of << "Sent No: " << s.sentenceNo << " , No. Occurrences: " << s.noOcc << '\n';
- if( s.noOcc!=s.realCount )
- of << " Used No. Occurrences: " << s.realCount << '\n';
- unsigned int i;
- for(i=0; i < s.eSent.size(); i++)
- of << s.eSent[i] << ' ';
- of << '\n';
- for(i=1; i < s.fSent.size(); i++)
- of << s.fSent[i] << ' ';
- of << '\n';
- return of;
-}
-
-/*Thread-safe version of sentence handler*/
-class sentenceHandler{
-public:
- const char * inputFilename; // parallel corpus file name, similar for all
- // sentence pair objects
- ifstream *inputFile; // parallel corpus file handler
- Vector<sentPair> Buffer;
- int noSentInBuffer ;
- int currentSentence ;
- int position; /*Sentence position (will be returned)*/
- int totalPairs1 ;
- double totalPairs2;
- bool readflag ; // true if you reach the end of file
- bool allInMemory ;
- int pair_no ;
- Vector<double> *realCount;
-
- Vector<sentPair> oldPairs;
- Vector<double> oldProbs;
- sentenceHandler(){readsent_mutex=new boost::mutex();setprob_mutex=new boost::mutex();};
- sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
- sentenceHandler(const char* filename, vcbList* elist, vcbList* flist,set<WordIndex>& eapp, set<WordIndex>& fapp);
- ~sentenceHandler(){delete readsent_mutex; delete setprob_mutex;}
- void rewind();
- int getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
- int getTotalNoPairs1()const {return totalPairs1;};
- double getTotalNoPairs2()const {return totalPairs2;};
- // method will read the next pair of sentence from memory buffer
- void setProbOfSentence(const sentPair&s,double d);
-private:
-
- boost::mutex* readsent_mutex;
- boost::mutex* setprob_mutex;
- bool readNextSentence(sentPair&); // will be defined in the definition file, this
-};
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/hmm.cpp b/scripts/training/MGIZA/src/hmm.cpp
deleted file mode 100644
index 3b104a3..0000000
--- a/scripts/training/MGIZA/src/hmm.cpp
+++ /dev/null
@@ -1,1125 +0,0 @@
-/*
-
-Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "hmm.h"
-#include "Globals.h"
-#include "utility.h"
-#include "HMMTables.h"
-#include "ForwardBackward.h"
-#include "Parameter.h"
-#include <iostream>
-#include "syncObj.h"
-using namespace std;
-#define CLASSIFY(i,empty,ianf) bool empty=(i>=l); unsigned int ianf=(i%l);
-#define CLASSIFY2(i,ianf) unsigned int ianf=(i%l);
-
-
-short PredictionInAlignments=0;
-short UniformEntryExit=3;
-short HMMTrainingSpecialFlags=0;
-
-GLOBAL_PARAMETER2(int,ModelH_Dump_Freq,
- "HMM DUMP FREQUENCY","th",
- "dump frequency of HMM",
- PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(short,CompareAlDeps,"emAlignmentDependencies",
- "lextrain: dependencies in the HMM alignment model. "
- " &1: sentence length; &2: previous class; &4: previous position; "
- " &8: French position; &16: French class"
- ,PARLEV_MODELS,2);
-
-GLOBAL_PARAMETER(double,GLOBALProbabilityForEmpty,
- "emProbForEmpty","f-b-trn: probability for empty word",
- PARLEV_MODELS,0.4);
-
-GLOBAL_PARAMETER(short,SmoothHMM,"emSmoothHMM",
- "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",
- PARLEV_SPECIAL,2);
-
-GLOBAL_PARAMETER(double,HMMAlignmentModelSmoothFactor,"emAlSmooth",
- "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",
- PARLEV_SMOOTH,0.2);
-
-
-/*template<class T>
-void smooth_standard(T*a,T*b,double p)
-{
- int n=b-a;
- if( n==0 )
- return;
- double pp=p/n;
- for(T*i=a;i!=b;++i)
- *i = (1.0-p)*(*i)+pp;
-}*/
-
-
-hmm::hmm(model2&m2,WordClasses &e, WordClasses& f)
-: ewordclasses(e), fwordclasses(f),model2(m2),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
-probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
-{
-}
-
-
-void hmm::initialize_table_uniformly(sentenceHandler&){}
-
-struct hmm_em_loop_t{
- hmm *m;
- int done;
- int valid;
- string alignfile;
- int it;
- bool dump_files;
- bool resume;
- pthread_t thread;
- hmm_em_loop_t():m(0),done(0),valid(0){};
-};
-
-void* hmm_exe_emloop(void *arg){
- hmm_em_loop_t* em =(hmm_em_loop_t *) arg;
- em->m->em_thread(em->it,em->alignfile,em->dump_files,em->resume);
- em->done = -1;
- return arg;
-}
-
-void hmm::em_thread(int it,string alignfile,bool dump_files,bool resume){
- em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
-}
-extern short NCPUS;
-int hmm::em_with_tricks(int noIterations,bool dumpCount,
- const char* dumpCountName, bool useString ,bool resume){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
- sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
- vector<hmm_em_loop_t> th;
- th.resize(NCPUS);
- for(int it=1; it <= noIterations ; it++){
- pair_no = 0;
- it_st = time(NULL) ;
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0 || it == noIterations) && !NODUMPS;
-
- cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
- //dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- // acfile = Prefix + ".ac" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- sHandler1.rewind();
- int k;
- char node[2] ;
- node[1] = '\0';
- for (k=1 ; k< NCPUS ; k++){
- th[k].m = this;
- th[k].done = 0;
- th[k].valid = 0;
- th[k].it = it;
- th[k].resume = resume;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
- th[k].dump_files = dump_files;
- th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
- if(th[k].valid){
- cerr << "Error starting thread " << k << endl;
- }
- }
- node[0] = '0';
- alignfile += ".part";
- alignfile += node;
- em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
- for (k=1;k<NCPUS;k++){
- pthread_join((th[k].thread),NULL);
- cerr << "Thread " << k << "done" << endl;
- }
- perp.record("HMM");
- trainViterbiPerp.record("HMM");
- errorReportAL(cout,"HMM");
-
- sHandler1.rewind();
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- if (testPerp && testHandler){
- testHandler->rewind();
- em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1 && (!resume),it);
- testHandler->rewind();
- }
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
-
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- string realHTableName = dumpCountName;
- realHTableName += ".h.count";
- string fnamealpha = realHTableName;
- string fnamebeta = realHTableName;
- fnamealpha += ".alpha";
- fnamebeta += ".beta";
- counts.writeJumps(realHTableName.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
-
- }
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- probs=counts;
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << testViterbiPerp->perplexity()
- << '\n';
- if (dump_files){
- if( OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- // ofstream afilestream(afileh.c_str());
- string fnamealpha = afileh;
- string fnamebeta = afileh;
- fnamealpha += ".alpha";
- fnamebeta += ".beta";
- probs.writeJumps(afileh.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
-// aCountTable.printTable(acfile.c_str());
- aTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
- cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
- } // end of iterations
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- //cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return minIter;
-}
-
-/*template<class T>
-T normalize_if_possible_with_increment(T*a,T*b,int increment)
-{
- T sum=0;
- for(T*i=a;i!=b;i+=increment)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;i+=increment)
- *i/=sum;
- else
- {
- T factor=increment/(b-a);
- for(T*i=a;i!=b;i+=increment)
- *i=factor;
- }
- return sum;
-}*/
-
-void hmm::load_table(const char* aname){
- cout << "Hmm: loading a table not implemented.\n";
- abort();
- ifstream anamefile(aname);
- probs.readJumps(anamefile);
-}
-
-HMMNetwork *hmm::makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const
-{
- unsigned int i,j;
- unsigned int l = es.size() - 1;
- unsigned int m = fs.size() - 1;
- unsigned int I=2*l,J=m;
- int IJ=I*J;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- HMMNetwork *net = new HMMNetwork(I,J);
- fill(net->alphainit.begin(),net->alphainit.end(),0.0);
- fill(net->betainit.begin(),net->betainit.end(),0.0);
- for(j=1;j<=m;j++){
- for(i=1;i<=l;i++){
- // cout << es[i] <<" " << fs[j] <<" " << tTable.getProb(es[i], fs[j]) << endl;
- net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
- }
- double emptyContribution=0;
- emptyContribution=tTable.getProb(es[0],fs[j]) ;
- for(i=1;i<=l;i++)
- net->n(i+l-1,j-1)=emptyContribution;
- net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
- }
- if( DependencyOfJ )
- net->e.resize(m-1);
- else
- net->e.resize(J>1);
- for(j=0;j<net->e.size();j++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
- net->e[j].resize(I,I,0);
- for(unsigned int i1=0;i1<I;++i1) {
- Array<double> al(l);
- CLASSIFY2(i1,i1real);
- for(unsigned int i2=0;i2<l;i2++)
- al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
- ,j+1);
- normalize_if_possible(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()));
- if( SmoothHMM&2 )
- smooth_standard(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()),HMMAlignmentModelSmoothFactor);
- for(unsigned int i2=0;i2<I;i2++) {
- CLASSIFY(i2,empty_i2,i2real);
- net->e[j](i1,i2) = al[i2real];
-
- if( empty_i2 )
- if(i1real!=i2real) {
- net->e[j](i1,i2)=0;
- } else{
- net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
- }
- }
- normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
- }
- }
- if( doInit ){
- for(unsigned int i=0;i<I;++i)
- {
- net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
- net->betainit[i]=1.0;
- }
- }else{
- if( DependencyOfPrevAJ==0 ){
- for(i=0;i<I;i++){
- CLASSIFY2(i,ireal);
- net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
- }
- }else{
- if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
- if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
- }
- }
- massert( net->alphainit.size()==I );massert( net->betainit.size()==I );
- normalize_if_possible(const_cast<double*>(&(net->alphainit[0])),const_cast<double*>(&(net->alphainit[0])+net->alphainit.size()));
- normalize_if_possible(const_cast<double*>(&(net->betainit[0])),const_cast<double*>(&(net->betainit[0])+net->betainit.size()));
- transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
- return net;
-}
-extern float MINCOUNTINCREASE;
-
-void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
-){
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
-
- while(sHandler1.getNextSentence(sent)){
- const Vector<WordIndex>& es = sent.get_eSent();// #
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());// #
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
-
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
-
- if( !test ){
-
-#ifdef WIN32
- double *gp=const_cast<double*>(&(gamma[0]));
-#else
- double *gp=conv<double>(gamma.begin());
-#endif
-
- for(unsigned int i2=0;i2<J;i2++)
- for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.addValue(0,i2+1,l,m,add);
- //aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.addValue(1+i1,1+i2,l,m,add);
- //aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- double *ep=epsilon[jj].begin();
- if( ep ){
- //for(i=0;i<I;i++)
- // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
- // for(i=0;i<I*I;++i)
- // ep[i] *= I;
- //if( DependencyOfJ )
- // if( J-1 )
- // for(i=0;i<I*I;++i)
- // ep[i] /= (J-1);
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- for(i=0;i<I;i++){
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- CLASSIFY(i,i_empty,ireal);
- CLASSIFY2(i_bef,i_befreal);
- if( i_empty )
- p0c+=*ep * mult;
- else{
- int v = ewordclasses.getClass(es[1+i_befreal]);
- //cerr << v <<" " << es.size() << " "<< i_befreal << endl;
- counts.addAlCount(i_befreal,ireal,l,m,v,
- frenchClass ,jj+1,*ep * mult,0.0);
- np0c+=*ep * mult;
- }
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- }
- }
-
-#ifdef WIN32
- double *gp1=const_cast<double *>(&(gamma[0])),*gp2=const_cast<double*>(&(gamma[0])+gamma.size())-I;
-#else
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
-#endif
- hmmentry_type&ai0=counts.doGetAlphaInit(I);
- Array<double>&ai = ai0.first;
- hmmentry_type&bi0=counts.doGetBetaInit(I);
- Array<double>&bi = bi0.first;
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
-#ifdef WIN32
- ai0.second->lock();
-#else
- ai0.second.lock();
-#endif
- for(i=0;i<I;i++,gp1++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- //bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
- }
-#ifdef WIN32
- ai0.second->unlock();
- bi0.second->lock();
-#else
- ai0.second.unlock();
- bi0.second.lock();
-#endif
-
- for(i=0;i<I;i++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- bi[i]+= *gp2;
- }
-#ifdef WIN32
- bi0.second->unlock();
-#else
- bi0.second.unlock();
-#endif
-
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
- }
-
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
- }
- sHandler1.setProbOfSentence(sent,cross_entropy);
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
-
- delete net;net=0;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
-
- pair_no++;
- } /* of while */
-
-}
-
-void hmm::clearCountTable(){counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);}
-
-#if 0
-CTTableDiff<COUNT,PROB>* hmm::em_loop_1(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
-){
- CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
- //diff->incCount(1,1,0);
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
- sHandler1.rewind();
- int nnn = 0;
- while(sHandler1.getNextSentence(sent)){
- nnn ++;
- cout << nnn << endl;
- cout << 1 << endl;
- const Vector<WordIndex>& es = sent.get_eSent();
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- cout << 2 << endl;
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- cout << 2.5 << endl;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
- cout << 3 << endl;
- if( !test ){
- double *gp=conv<double>(gamma.begin());
- cout << 4 << endl;
- for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- diff->incCount(es[0],fs[1+i2],add);
- //tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- diff->incCount(es[1+i1],fs[1+i2],add);
- //tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- cout << 5 << endl;
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- if (nnn==7779) cout << 1 << endl;
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- if (nnn==7779) cout << 2 << endl;
- double *ep=epsilon[jj].begin();
- if (nnn==7779) cout << 3 << endl;
- if( ep ){
- //for(i=0;i<I;i++)
- // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
- // for(i=0;i<I*I;++i)
- // ep[i] *= I;
- //if( DependencyOfJ )
- // if( J-1 )
- // for(i=0;i<I*I;++i)
- // ep[i] /= (J-1);
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- if (nnn==7779) cout << 4 << ":" << I << endl;
- for(i=0;i<I;i++){
- if (nnn==7779) cout << "i:" << i << endl;
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- if (nnn==7779) cout << " CL 1" << endl;
- CLASSIFY(i,i_empty,ireal);
- if (nnn==7779) cout << " CL 2 : " << i_bef << " " << (size_t)ep << endl;
- CLASSIFY2(i_bef,i_befreal);
- if((i+1)*(i_bef+1)>epsilon[jj].getLen1()*epsilon[jj].getLen2()){
- continue;
- }
- if( i_empty )
- p0c+=epsilon[jj](i,i_bef)*mult;// p0c+=*ep * mult;
- else{
- if (nnn==7779) cout << "ELSE" << endl;
- if (nnn==7779){
- cout << i_befreal<<" " <<ireal<<" " << l<<" " << m<<" "<< jj<<" "<<epsilon.size()<< " " << epsilon[jj].getLen1() <<" " << epsilon[jj].getLen2()<< endl;
- np0c+=epsilon[jj](i,i_bef)*mult;
- cout <<"..."<<endl;
- cout <<"......"<<ewordclasses.getClass(es[1+i_befreal]) << endl;
- cout <<"......"<<endl;
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,0,0.0);
- np0c+=epsilon[jj](i,i_bef)*mult;
- }
- else{
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,epsilon[jj](i,i_bef)*mult,0.0);
- np0c+=epsilon[jj](i,i_bef)*mult;
- }
- }
- if (nnn==7779) cout << "FI" << endl;
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- if (nnn==7779) cout << 5 << endl;
- }
- }
- // cout << 6 << endl;
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
- Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
- Array<double>&bi=counts.doGetBetaInit(I);
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
- for(i=0;i<I;i++,gp1++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
- }
- // cout << 7 << endl;
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
- }
- //cout << 8 << endl;
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- //cout << 9 << endl;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- //cout << 10 << endl;
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
- }
- //cout << 11 << endl;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cout << 12 << endl;
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
- delete net;net=0;
- //cout << 13 << endl;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- //cout << 14 << endl;
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
- pair_no++;
- } /* of while */
- sHandler1.rewind();
- perp.record("HMM");
- viterbi_perp.record("HMM");
- errorReportAL(cout,"HMM");
- return diff;
-}
-
-#endif
-Mutex mu;
-
-#if 0
-void hmm::em_loop_2(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int part
-){
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
- //sHandler1.rewind();
- int nnn = 0;
- while(sHandler1.getNextSentence(sent)){
- //nnn ++;
- //cout << nnn << endl;
- //cout << 1 << endl;
- const Vector<WordIndex>& es = sent.get_eSent();
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
-
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
- if( !test ){
- double *gp=conv<double>(gamma.begin());
- for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- //diff->incCount(es[0],fs[1+i2],add);
- tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- //diff->incCount(es[1+i1],fs[1+i2],add);
- tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- double *ep=epsilon[jj].begin();
- if( ep ){
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- for(i=0;i<I;i++){
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- CLASSIFY(i,i_empty,ireal);
- CLASSIFY2(i_bef,i_befreal);
- if( i_empty ){
- p0c+=*ep * mult;
- }else{
- //mu.lock();
- //cout<<"\rP "<<part<<" ";
- //cout<<epsilon.size()<<" "<<jj<<" ";
- //cout<<epsilon[jj].h1<<" " << epsilon[jj].h2<<" ";
- //cout<<i<<" "<<i_bef<<" ";
- //cout<<I<<" "<<J<<" ";
-
- cout.flush();
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,*ep * mult,0.0);
- np0c+=*ep * mult;
- //mu.unlock();
- }
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- }
- }
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
- Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
- Array<double>&bi=counts.doGetBetaInit(I);
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
- for(i=0;i<I;i++,gp1++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
- }
- // cout << 7 << endl;
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
- }
- //cout << 8 << endl;
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- //cout << 9 << endl;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- //cout << 10 << endl;
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
- }
- //cout << 11 << endl;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cout << 12 << endl;
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
- delete net;net=0;
- //cout << 13 << endl;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- //cout << 14 << endl;
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
- pair_no++;
- } /* of while */
-
-
- return ;
-}
-
-
-CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
- sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
- pair_no = 0;
-
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".AH" ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- CTTableDiff<COUNT,PROB>* diff =em_loop_1(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);
-
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- // if (testPerp && testHandler)
-// em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1,it);
-// if (dump_files&&OutputInAachenFormat==1)
-// tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
-// tTable.normalizeTable(Elist, Flist);
-// aCountTable.normalize(aTable);
-// probs=counts;
-// cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
-// << " PERPLEXITY " << perp.perplexity() << '\n';
-// if (testPerp && testHandler)
-// cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
-// << " PERPLEXITY " << (*testPerp).perplexity()
-// << '\n';
-// cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
-// << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
-// if (testPerp && testHandler)
-// cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
-// << " PERPLEXITY " << testViterbiPerp->perplexity()
-// << '\n';
-// if (dump_files){
-// if( OutputInAachenFormat==0)
-/// tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- // ofstream afilestream(afileh.c_str());
- // probs.writeJumps(afilestream);
- // aCountTable.printTable(afile.c_str());
-
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- //cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return diff;
-}
-
-
-void hmm::em_one_step_2(int it,int part){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
-
- pair_no = 0;
-
-
- dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".Ahmm." ;
- char v[2];
- v[1] = 0;
- v[0] = '0' + it;
- alignfile += v;
- alignfile += ".part";
- v[0] = '0' + part;
- alignfile += v;
-
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- em_loop_2(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,part);
-
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- return ;
-}
-
-struct hmm_align_struct{
- hmm *h;
- int part;
- int iter;
- int valid;
- pthread_t thread;
- int done;
-};
-
-void* em_thread(void *arg){
- hmm_align_struct * hm = (hmm_align_struct*) arg;
- hm->h->em_one_step_2(hm->iter,hm->part);
- hm->done = 1;
- return hm;
-}
-
-
-int multi_thread_em(int noIter, int noThread, hmm* base){
- // First, do one-step EM
- int i;
- int j;
- time_t it_st, st, it_fn, fn;
- fn = time(NULL);
- int dumpFreq=ModelH_Dump_Freq;
- bool dump_files = false ;
- string modelName = "HMM",shortModelName="hmm";
- string tfile, afile,acfile,afileh, number, alignfile, test_alignfile;
- vector<amodel<COUNT> > counts;
- vector<model2 *> m2;
- counts.resize(noThread);
- m2.resize(noThread);
- for(j=1;j<noThread;j++){
- m2[j] = new model2(*((model1*)base),base->aTable,counts[j]);
- }
- st = time(NULL);
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
-
- for(i=1;i<=noIter;i++){
- base->perp.clear();
- base->trainViterbiPerp.clear();
- if (base->testPerp && base->testHandler){
- base->testHandler->rewind();
- base->testPerp->clear();
- base->testViterbiPerp->clear();
- }
-
- it_st = time(NULL) ;
-
- cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
- dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
- dump_files = true;
- string number = "";
- int n = i;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- acfile = Prefix + ".ac" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
-
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- base->initAL();
- // except the current thread
- vector<hmm_align_struct> args;
- base->sHandler1.rewind();
- args.resize(noThread);
- for(j=1;j<noThread;j++){
- args[j].iter = i;
- args[j].part = j;
- args[j].done = 0;
- counts[j].clear();
- args[j].h = new hmm(*m2[j],base->ewordclasses,base->fwordclasses);
- args[j].h->probs = base->probs;
- args[j].valid = pthread_create(&(args[j].thread),NULL,em_thread,&(args[j]));
- if(args[j].valid){
- cerr << "Error starting thread " << j << endl;
- }
- }
- base->em_one_step_2(i,0);
- //ofstream afilestream(afileh.c_str());
- while(1){
- bool done = true;
- for (j=1;j<noThread;j++){
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if(args[j].done==1){
- args[j].done = 2;
- base->aCountTable.merge(args[j].h->aCountTable);
- //afilestream << "BEFORE MERGE"<<endl;
- //base->counts.writeJumps(afilestream);
- //afilestream << "MERGING"<<endl;
- //args[j].h->counts.writeJumps(afilestream);
- //afilestream << "MERGED"<<endl;
- base->counts.merge(args[j].h->counts);
- //base->counts.writeJumps(afilestream);
- delete args[j].h;
- args[j].h = 0;
- }else if(args[j].done==2){
- // Nothing
- }else if(args[j].done==0){
- done = false;
- }
- }
- if(done) break;
- }
- base->perp.record("HMM");
- base->trainViterbiPerp.record("HMM");
- base->errorReportAL(cout,"HMM");
-
- // Normalize
-// cout <<" Writing " << afileh <<"\n";
- base->probs = base->counts;
-// cout <<" Writing " << afileh <<"\n";
-// ofstream afilestream(afileh.c_str());
-// base->probs.writeJumps(afilestream);
- base->tTable.normalizeTable(base->Elist, base->Flist);
- base->aCountTable.normalize(base->aTable);
- base->aCountTable.clear();
- if (base->testPerp && base->testHandler)
- base->em_loop(*base->testPerp, *base->testHandler, dump_files, test_alignfile.c_str(), *base->testViterbiPerp, true,i==1,i);
- if (dump_files&&OutputInAachenFormat==1)
- base->tTable.printCountTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),1);
- cout << modelName << ": ("<<i<<") TRAIN CROSS-ENTROPY " << base->perp.cross_entropy()
- << " PERPLEXITY " << base->perp.perplexity() << '\n';
- if (base->testPerp && base->testHandler)
- cout << modelName << ": ("<<i<<") TEST CROSS-ENTROPY " << base->testPerp->cross_entropy()
- << " PERPLEXITY " << base->testPerp->perplexity()
- << '\n';
- cout << modelName << ": ("<<i<<") VITERBI TRAIN CROSS-ENTROPY " << base->trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << base->trainViterbiPerp.perplexity() << '\n';
- if (base->testPerp && base->testHandler)
- cout << modelName << ": ("<<i<<") VITERBI TEST CROSS-ENTROPY " << base->testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << base->testViterbiPerp->perplexity()
- << '\n';
- dump_files = true;
- if (dump_files){
- if( OutputInAachenFormat==0)
- base->tTable.printProbTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),OutputInAachenFormat);
- ofstream afilestream(afileh.c_str());
- base->counts.writeJumps(afilestream);
- //base->counts.clear();
- base->aCountTable.printTable(acfile.c_str());
- base->aTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
-
- cout << "\n" << modelName << " Iteration: " << i<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
-
- }
- for(j=1;j<noThread;j++){
- delete m2[j];
- }
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- return 1;
-}
-
-
-
-#endif
-#include "HMMTables.cpp"
-template class HMMTables<int,WordClasses>;
-
diff --git a/scripts/training/MGIZA/src/hmm.h b/scripts/training/MGIZA/src/hmm.h
deleted file mode 100644
index c518144..0000000
--- a/scripts/training/MGIZA/src/hmm.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _hmm_h
-#define _hmm_h 1
-
-#include <assert.h>
-
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include <fstream>
-#include <math.h>
-#include <time.h>
-
-#include "TTables.h"
-#include "ATables.h"
-#include "getSentence.h"
-#include "defs.h"
-#include "model2.h"
-#include "Perplexity.h"
-#include "vocab.h"
-#include "WordClasses.h"
-#include "HMMTables.h"
-#include "ForwardBackward.h"
-#include "ttableDiff.hpp"
-
-class hmm : public model2{
-public:
- WordClasses& ewordclasses;
- WordClasses& fwordclasses;
-public:
- HMMTables<int,WordClasses> counts,probs;
-public:
- template<class MAPPER>
- void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile){
- ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
- if( !estrm ) {
- cerr << "ERROR: can not read " << efile << endl;
- }else
- ewordclasses.read(estrm,m1,Elist);
- if( !fstrm )
- cerr << "ERROR: can not read " << ffile << endl;
- else
- fwordclasses.read(fstrm,m2,Flist);
- }
- hmm(model2&m2,WordClasses &e, WordClasses& f);
- void initialize_table_uniformly(sentenceHandler&);
- int em_with_tricks(int iterations, bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false,bool resume=false);
- CTTableDiff<COUNT,PROB>* em_one_step(int it);
- // void em_one_step_2(int it,int part);
- void load_table(const char* aname);
-
- // void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
- // const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
- /* CTTableDiff<COUNT,PROB>* em_loop_1(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
- const char* alignfile, Perplexity&, bool test,bool doInit,int iter);*/
- /* void em_loop_2( Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int part);*/
- void em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
- );
- void em_thread(int it,string alignfile,bool dump_files,bool resume=false);
- HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
- void clearCountTable();
- friend class model3;
-};
-//int multi_thread_em(int noIter, int noThread, hmm* base);
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/hmmnorm.cxx b/scripts/training/MGIZA/src/hmmnorm.cxx
deleted file mode 100644
index 2643102..0000000
--- a/scripts/training/MGIZA/src/hmmnorm.cxx
+++ /dev/null
@@ -1,137 +0,0 @@
-// HMM Normalization executable
-
-#include <iostream>
-#include <strstream>
-#include <string>
-#include "hmm.h"
-#include "Parameter.h"
-#define ITER_M2 0
-#define ITER_MH 5
-GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
-GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
-GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
-GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
-
-GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
-GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
-
-GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
-GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
-GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
-
-GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
-GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
-
-GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
-
-/**
-Here are parameters to support Load models and dump models
-*/
-
-GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
-GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
-GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
-/// END
-short OutputInAachenFormat=0;
-bool Transfer=TRANSFER;
-bool Transfer2to3=0;
-short NoEmptyWord=0;
-bool FEWDUMPS=0;
-GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
-GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
- "maximal fertility for fertility models", PARLEV_EM, 10);
-
-using namespace std;
-string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
- TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
- SourceVocabClassesFilename, TargetVocabClassesFilename,
- a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-
-
-int main(int argc, char* argv[]){
- if(argc < 5){
- cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
- return 1;
- }
- Vector<WordEntry> evlist,fvlist;
- vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
- TargetVocabFilename = argv[2];
- SourceVocabFilename = argv[1];
- eTrainVcbList.setName(argv[1]);
- fTrainVcbList.setName(argv[2]);
- eTrainVcbList.readVocabList();
- fTrainVcbList.readVocabList();
- Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
- tmodel<float, float> tTable;
- sentenceHandler *corpus = new sentenceHandler();
-
-
- model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
- trainPerp, *corpus, &testPerp, corpus, trainViterbiPerp,
- &testViterbiPerp);
- amodel<float> aTable(false);
- amodel<float> aCountTable(false);
- model2 m2(m1, aTable, aCountTable);
- WordClasses french,english;
- hmm h(m2,english,french);
- SourceVocabClassesFilename = argv[1];
- TargetVocabClassesFilename = argv[2];
- SourceVocabClassesFilename += ".classes";
- TargetVocabClassesFilename += ".classes";
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str());
- string base = argv[4];
- string baseA = base+".alpha";
- string baseB = base+".beta";
- string output = argv[3];
- string outputA = output+".alpha";
- string outputB = output+".beta";
- h.probs.readJumps(base.c_str(),NULL,baseA.c_str(), baseB.c_str());
- // Start iteration:
- for(int i = 5; i< argc ; i++){
- string name = argv[i];
- string nameA = name + ".alpha";
- string nameB = name + ".beta";
- if(h.counts.readJumps(name.c_str(),NULL,nameA.c_str(), nameB.c_str()))
- h.probs.merge(h.counts);
- else
- cerr << "Error, cannot load name.c_str()";
- h.clearCountTable();
- }
- h.probs.writeJumps(output.c_str(),NULL,outputA.c_str(), outputB.c_str());
- delete corpus;
-}
-
-// Some utility functions to get it compile..
-
-ofstream logmsg;
-const string str2Num(int n) {
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
-}
-double LAMBDA=1.09;
-
-Vector<map< pair<int,int>,char > > ReferenceAlignment;
-
-double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
- const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
- int&eventsMissing, int&eventsToomuch, int pair_no){
- return 0;
- }
-
-void printGIZAPars(ostream&out){
-}
-
diff --git a/scripts/training/MGIZA/src/logprob.cpp b/scripts/training/MGIZA/src/logprob.cpp
deleted file mode 100644
index 97464a7..0000000
--- a/scripts/training/MGIZA/src/logprob.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-// Routines to perform integer exponential arithmetic.
-// A number x is represented as n, where x = b**n.
-// It is assumed that b > 1, something like b = 1.001;
-
-#include "logprob.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-#include <fstream>
-#include <string>
-double *LogProb::ntof = NULL; // Tables will be initialized
-int *LogProb::addtbl = NULL; // in Initialize function.
-int *LogProb::subtbl = NULL; //
-
-const int LogProb::max_2byte_integer = 32767;
-const int LogProb::min_2byte_integer = -32768;
-const double LogProb::b = 1.001; // a logarithm basis
-const double LogProb::logb2 = log(b);
-//const int LogProb::nmax = round(78.0E0 * log(1.0E1) / logb2);
-const int LogProb::nmax = round(300.0E0 * log(1.0E1) / logb2);
-const int LogProb::nmin = -nmax;
-const int LogProb::tblbnd = round(log((b-1.0E0)/2.0E0)/logb2);
-const int LogProb::zeron = round(pow((double)-2, (double)23));
-const int LogProb::onen = 0;
-const int LogProb::infn = onen - zeron;
-
-const int LogProb::initialized = LogProb::Initialize();
-const LogProb LogProb::zero(0);
-const LogProb LogProb::one(1);
-const LogProb LogProb::minus2(1e-2);
-const LogProb LogProb::minus4(1e-4);
-const LogProb LogProb::minus6(1e-6);
-const LogProb LogProb::minus8(1e-8);
-const LogProb LogProb::minus10(1e-10);
-const LogProb LogProb::minus12(1e-12);
-const LogProb LogProb::minus14(1e-14);
-const LogProb LogProb::minus16(1e-16);
-
-// static table initialization function
-int LogProb::Initialize()
-{
- int nbytes = sizeof(double)*(nmax-nmin+1) + sizeof(int)*(0-tblbnd+1);
- std::cerr << nbytes << " bytes used for LogProb tables (C++ version)\n";
- ntof = new double[nmax-nmin+1];
- addtbl = new int[-tblbnd+1];
- subtbl = new int[-tblbnd+1];
-
- // char filename[257];
- // string filename ;
- // ifstream ifs;
- // ifs.open(filename.c_str());
- // if (!ifs)
- // {
- int i;
- std::cerr << "Building integer logs conversion tables\n";
- ntof[0] = 0 ;
-
- for (i=nmin+1; i<=nmax; ++i)
- {
- double x = i;
- ntof[i-nmin] = exp(x*logb2);
-
- }
- for (i=tblbnd; i<=0; ++i)
- {
- double x = 1.0 + pow(b, i);
- addtbl[i-tblbnd] = round(log(x)/logb2);
- }
- double sqrtb = exp(0.5*logb2);
- for (i=0; i<=-tblbnd; ++i)
- {
- double x = sqrtb * pow(b, i) - 1.0;
- subtbl[i] = round(log(x)/logb2);
- }
- // if (toolsRoot)
- // {
- // ofstream ofs(filename.c_str());
- // if (!ofs)
- // cerr << "Could not write LogProb data to " << filename << endl;
- // else
- // {
- // ofs.write((const char *)ntof, sizeof(double) * (nmax-nmin+1));
- // ofs.write((const char *)addtbl, sizeof(int) * (-tblbnd+1));
- // ofs.write((const char *)subtbl, sizeof(int) * (-tblbnd+1));
- // }
- // }
- // }
- // else
- // {
- // ifs.read((char *)ntof, sizeof(double) * (nmax - nmin + 1));
- // ifs.read((char *)addtbl, sizeof(int) * (-tblbnd+1));
- // ifs.read((char *)subtbl, sizeof(int) * (-tblbnd+1));
- // }
- return 1;
-}
-
-void LogProb::FreeTables()
-{
- delete [] addtbl;
- delete [] subtbl;
- delete [] ntof;
-}
-
-//---------------------------------------------------------------------------
-// Aritmetic operators
-//---------------------------------------------------------------------------
-
-
-// Subtract two logarithm numbers. Use the following method:
-// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
-LogProb& LogProb::operator-=(const LogProb &subs)
-{
- if (subs.logr == zeron)
- return *this;
- int a = logr - subs.logr;
- if (a <= 0)
- {
- if (a < 0)
- {
- std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
- //abort();
- }
- logr = zeron;
- return *this;
- }
- if (a > -tblbnd)
- return *this;
- logr = subs.logr + subtbl[a];
- return *this;
-}
-
-
diff --git a/scripts/training/MGIZA/src/logprob.h b/scripts/training/MGIZA/src/logprob.h
deleted file mode 100644
index 1dfbb72..0000000
--- a/scripts/training/MGIZA/src/logprob.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _LOGPROB_H
-#define _LOGPROB_H
-
-// Routines to perform integer exponential arithmetic.
-// A number x is represented as n, where x = b**n
-// It is assumed that b > 1, something like b = 1.001
-
-#include <iostream>
-#include <math.h>
-#include <algorithm>
-
-//#define MAX(A,B) ((A) > (B) ? (A) : (B))
-//#define MIN(A,B) ((A) > (B) ? (B) : (A))
-
-#ifdef WIN32
-#define round(x) floor(x+0.5)
-#endif
-class LogProb {
-public:
- // mj for cross entropy
- double base2() const {
- return (logr * logb2 / log((double)2));
- }
-
- // Constructors
- LogProb() : logr(zeron) {}
- LogProb(const LogProb &obj) : logr(obj.logr) {}
- LogProb(double x) : logr(x == 0.0 ? zeron : round(log(x)/logb2)) {}
- // destructor
- ~LogProb() {} // default destructor
-
- operator double() const // converts logr to (double) b**logr
- {
- if (logr < nmin) return ntof[0];
- if (logr > nmax) return ntof[nmax-nmin];
- return ntof[logr-nmin];
- }
-
- LogProb &operator=(const LogProb &obj) { logr = obj.logr; return *this; }
- int operator!() const { return logr == zeron; }
-
- // iostream friend specifications
- friend std::ostream& operator<<(std::ostream& os, const LogProb &obj);
- friend std::istream& operator>>(std::istream& is, LogProb &obj);
- friend std::ostream& operator<<=(std::ostream& os, const LogProb &obj);
- friend std::istream& operator>>=(std::istream& is, LogProb &obj);
-
- // arithmetic operators
- LogProb &operator+=(const LogProb &add) // logr2 = logb ( b**logr2 + b**logr1 )
- // Add two numbers represented as logarithms. Use the following method:
- // b**n + b**m = b**n(1 + b**(m-n)), assuming n >= m.
- {
- if (add.logr == zeron)
- return *this;
- if (logr == zeron)
- {
- logr = add.logr;
- return *this;
- }
- int a = add.logr - logr;
- if (a > 0)
- {
- a = -a;
- logr = add.logr;
- }
- if (a < tblbnd)
- return *this;
- logr += addtbl[a-tblbnd];
- return *this;
- }
-
- LogProb &operator-=(const LogProb &); // logr2 = logb ( b**logr2 + b**logr1 )
- LogProb operator*(const LogProb &mul) const // logr3 = logr2 + logr1
- {
- LogProb result; // start out with result == 0
- if ((logr != zeron) && (mul.logr != zeron))
- result.logr = std::max(logr+mul.logr, zeron);
- return result;
- }
- LogProb operator*(double x) const // logr3 = logr2 + logr1
- {
- return (*this)*(LogProb)x;
- }
- LogProb operator^(const int i) const // logr2 = logr1 * i
- {
- LogProb result; // start out with result == 0
- // if ((logr != zeron) && (mul.logr != zeron))
- result.logr = logr * i ;
- return result;
- }
- LogProb &operator*=(const LogProb &mul) // logr2 += logr1
- {
- if ((logr == zeron) || (mul.logr == zeron))
- logr = zeron;
- else
- logr = std::max(logr+mul.logr, zeron);
- return *this;
- }
- LogProb operator/(const LogProb &div) const // logr3 = logr2 -logr1
- {
- LogProb result;
- if (logr != zeron)
- result.logr = std::max(logr - div.logr, zeron);
- return result;
- }
- LogProb &operator/=(const LogProb &div) // logr2 -= logr1
- {
- if (logr != zeron)
- logr = std::max(logr - div.logr, zeron);
- return *this;
- }
- LogProb operator+(const LogProb &l) const // logr3 = logb ( b**logr2 + b**logr1 )
- { LogProb result(*this); result += l; return result; }
- LogProb operator-(const LogProb &l) const // logr3 = logb ( b**logr2 - b**logr1 )
- { LogProb result(*this); result -= l; return result; }
- LogProb power(const int n) const // logr2 = logr1 * int
- { LogProb result(*this); result.logr *= n; return result; }
-
- // Conditional operators
- int operator<(const LogProb &obj) const { return logr < obj.logr; }
- int operator<=(const LogProb &obj) const { return logr <= obj.logr; }
- int operator>(const LogProb &obj) const { return logr > obj.logr; }
- int operator>=(const LogProb &obj) const { return logr >= obj.logr; }
- int operator==(const LogProb &obj) const { return logr == obj.logr; }
- int operator!=(const LogProb &obj) const { return logr != obj.logr; }
- int operator<(double d) const { return ((double)*this) < d; }
- int operator<=(double d) const { return ((double)*this) <= d; }
- int operator>(double d) const { return ((double)*this) > d; }
- int operator>=(double d) const { return ((double)*this) >= d; }
- int operator==(double d) const { return ((double)*this) == d; }
- int operator!=(double d) const { return ((double)*this) != d; }
-
-
- LogProb &SetZero() { logr = zeron; return *this; } // representation of 0,
- LogProb &SetOne() { logr = onen; return *this; } // 1, and
- LogProb &SetInf() { logr = infn; return *this; } // inf in logarithm domain
-
-private:
- int logr; // a representation of logarithm
- // static constants
- static const int initialized; // initialization flag
- static const double b;
- static const double logb2;
- static const int nmin, nmax;
- static const int tblbnd;
- static const int zeron, onen, infn; // zero, one, and inf in log domain
- static const int max_2byte_integer, min_2byte_integer;
-
- // Arithmetic computation Tables
- static double *ntof;
- static int *addtbl;
- static int *subtbl;
-
- static int Initialize();
-
-public:
- static void FreeTables();
- // constants for initializing LogProbs to 0 or 1
- static const LogProb zero;
- static const LogProb one;
- static const LogProb minus2;
- static const LogProb minus4;
- static const LogProb minus6;
- static const LogProb minus8;
- static const LogProb minus10;
- static const LogProb minus12;
- static const LogProb minus14;
- static const LogProb minus16;
-};
-
-// iostream friend operators
-inline std::ostream &operator<<(std::ostream& os, const LogProb &obj)
-{
- return os << (double) obj; // output in linear domain, b**logr
-}
-
-inline std::istream &operator>>(std::istream& is, LogProb &obj)
-{
- double d;
- is >> d;
- obj = d;
- return is;
-}
-
-inline std::ostream &operator<<=(std::ostream& os, const LogProb &obj) // write binary
-{
- os.write((const char *)&obj.logr, sizeof(obj.logr));
- return os;
-}
-
-inline std::istream &operator>>=(std::istream& is, LogProb &obj)
-{
- is.read((char *)&obj.logr, sizeof(obj.logr));
- return is;
-}
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/main.cpp b/scripts/training/MGIZA/src/main.cpp
deleted file mode 100644
index d3a23c9..0000000
--- a/scripts/training/MGIZA/src/main.cpp
+++ /dev/null
@@ -1,1162 +0,0 @@
-/*
-
- EGYPT Toolkit for Statistical Machine Translation
- Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-
-#include <strstream>
-#include "getSentence.h"
-#include "TTables.h"
-#include "model1.h"
-#include "model2.h"
-#include "model3.h"
-#include "hmm.h"
-#include "file_spec.h"
-#include "defs.h"
-#include "vocab.h"
-#include "Perplexity.h"
-#include "Dictionary.h"
-#include "utility.h"
-#include "Parameter.h"
-#include "myassert.h"
-#include "D4Tables.h"
-#include "D5Tables.h"
-#include "transpair_model4.h"
-#include "transpair_model5.h"
-#include <boost/thread/thread.hpp>
-
-#define ITER_M2 0
-#define ITER_MH 5
-
-/**
- Here we can see that Every model is iterated several times, and we do not need to do it
- on all the corpora, instead we will only start a few.
- */
-GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
-GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
-GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
-GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
-GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
-
-GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
-GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
-
-GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
-GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
-GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
-
-GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
-GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
-
-GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
-
-/**
-Here are parameters to support Load models and dump models
-*/
-
-GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
-GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
-GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
-/// END
-short OutputInAachenFormat=0;
-bool Transfer=TRANSFER;
-bool Transfer2to3=0;
-short NoEmptyWord=0;
-bool FEWDUMPS=0;
-GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of threads to be executed, use 0 if you just want all CPUs to be used",PARLEV_EM,0);
-GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
-GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
-
-GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
- "maximal fertility for fertility models", PARLEV_EM, 10);
-
-Vector<map< pair<int,int>,char > > ReferenceAlignment;
-
-bool useDict = false;
-string CoocurrenceFile;
-string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
- SourceVocabClassesFilename(""), TargetVocabClassesFilename(""),
- TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
- a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-
-
-// QIN: Variables required for reloading model and continue training
-
-string prev_t, prev_p0, prev_a, prev_d, prev_d4,prev_d4_2, prev_hmm,prev_n;
-
-// QIN: And below are for count outputAlignment
-string countPrefix;
-
-Mutex logmsg_lock;
-ofstream logmsg;
-const string str2Num(int n) {
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
-}
-
-double LAMBDA=1.09;
-sentenceHandler *testCorpus=0, *corpus=0;
-Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
-
-string ReadTablePrefix;
-
-void printGIZAPars(ostream&out) {
- out << "general parameters:\n"
- "-------------------\n";
- printPars(out, getGlobalParSet(), 0);
- out << '\n';
-
- out << "No. of iterations:\n-"
- "------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_ITER);
- out << '\n';
-
- out
- << "parameter for various heuristics in GIZA++ for efficient training:\n"
- "------------------------------------------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_OPTHEUR);
- out << '\n';
-
- out << "parameters for describing the type and amount of output:\n"
- "-----------------------------------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_OUTPUT);
- out << '\n';
-
- out << "parameters describing input files:\n"
- "----------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_INPUT);
- out << '\n';
-
- out << "smoothing parameters:\n"
- "---------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_SMOOTH);
- out << '\n';
-
- out << "parameters modifying the models:\n"
- "--------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_MODELS);
- out << '\n';
-
- out << "parameters modifying the EM-algorithm:\n"
- "--------------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_EM);
- out << '\n';
-}
-
-const char*stripPath(const char*fullpath)
-// strip the path info from the file name
-{
- const char *ptr = fullpath + strlen(fullpath) - 1;
- while (ptr && ptr > fullpath && *ptr != '/') {
- ptr--;
- }
- if ( *ptr=='/')
- return (ptr+1);
- else
- return ptr;
-}
-
-void printDecoderConfigFile() {
- string decoder_config_file = Prefix + ".Decoder.config";
- cerr << "writing decoder configuration file to "
- << decoder_config_file.c_str() <<'\n';
- ofstream decoder(decoder_config_file.c_str());
- if (!decoder) {
- cerr << "\nCannot write to " << decoder_config_file <<'\n';
- exit(1);
- }
- decoder
- << "# Template for Configuration File for the Rewrite Decoder\n# Syntax:\n"
- << "# <Variable> = <value>\n# '#' is the comment character\n"
- << "#================================================================\n"
- << "#================================================================\n"
- << "# LANGUAGE MODEL FILE\n# The full path and file name of the language model file:\n";
- decoder << "LanguageModelFile =\n";
-
- decoder
- << "#================================================================\n"
- << "#================================================================\n"
- << "# TRANSLATION MODEL FILES\n# The directory where the translation model tables as created\n"
- << "# by Giza are located:\n#\n"
- << "# Notes: - All translation model \"source\" files are assumed to be in\n"
- << "# TM_RawDataDir, the binaries will be put in TM_BinDataDir\n"
- << "#\n# - Attention: RELATIVE PATH NAMES DO NOT WORK!!!\n"
- << "#\n# - Absolute paths (file name starts with /) will override\n"
- << "# the default directory.\n\n";
- // strip file prefix info and leave only the path name in Prefix
- string path = Prefix.substr(0, Prefix.find_last_of("/")+1);
- if (path=="")
- path=".";
- decoder << "TM_RawDataDir = " << path << '\n';
- decoder << "TM_BinDataDir = " << path << '\n' << '\n';
- decoder << "# file names of the TM tables\n# Notes:\n"
- << "# 1. TTable and InversTTable are expected to use word IDs not\n"
- << "# strings (Giza produces both, whereby the *.actual.* files\n"
- << "# use strings and are THE WRONG CHOICE.\n"
- << "# 2. FZeroWords, on the other hand, is a simple list of strings\n"
- << "# with one word per line. This file is typically edited\n"
- << "# manually. Hoeever, this one listed here is generated by GIZA\n\n";
-
- int lastmodel;
- if (Model5_Iterations>0)
- lastmodel = 5;
- else if (Model4_Iterations>0)
- lastmodel = 4;
- else if (Model3_Iterations>0)
- lastmodel = 3;
- else if (Model2_Iterations>0)
- lastmodel = 2;
- else
- lastmodel = 1;
- string lastModelName = str2Num(lastmodel);
- string p=Prefix + ".t" + /*lastModelName*/"3" +".final";
- decoder << "TTable = " << stripPath(p.c_str()) << '\n';
- p = Prefix + ".ti.final";
- decoder << "InverseTTable = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".n" + /*lastModelName*/"3" + ".final";
- decoder << "NTable = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".d" + /*lastModelName*/"3" + ".final";
- decoder << "D3Table = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".D4.final";
- decoder << "D4Table = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".p0_"+ /*lastModelName*/"3" + ".final";
- decoder << "PZero = " << stripPath(p.c_str()) << '\n';
- decoder << "Source.vcb = " << SourceVocabFilename << '\n';
- decoder << "Target.vcb = " << TargetVocabFilename << '\n';
- // decoder << "Source.classes = " << SourceVocabFilename + ".classes" << '\n';
- // decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n';
- decoder << "Source.classes = " << SourceVocabClassesFilename << '\n';
- decoder << "Target.classes = " << TargetVocabClassesFilename <<'\n';
- p=Prefix + ".fe0_"+ /*lastModelName*/"3" + ".final";
- decoder << "FZeroWords = " <<stripPath(p.c_str()) << '\n';
-
- /* decoder << "# Translation Parameters\n"
- << "# Note: TranslationModel and LanguageModelMode must have NUMBERS as\n"
- << "# values, not words\n"
- << "# CORRECT: LanguageModelMode = 2\n"
- << "# WRONG: LanguageModelMode = bigrams # WRONG, WRONG, WRONG!!!\n";
- decoder << "TMWeight = 0.6 # weight of TM for calculating alignment probability\n";
- decoder << "TranslationModel = "<<lastmodel<<" # which model to use (3 or 4)\n";
- decoder << "LanguageModelMode = 2 # (2 (bigrams) or 3 (trigrams)\n\n";
- decoder << "# Output Options\n"
- << "TellWhatYouAreDoing = TRUE # print diagnostic messages to stderr\n"
- << "PrintOriginal = TRUE # repeat original sentence in the output\n"
- << "TopTranslations = 3 # number of n best translations to be returned\n"
- << "PrintProbabilities = TRUE # give the probabilities for the translations\n\n";
-
- decoder << "# LOGGING OPTIONS\n"
- << "LogFile = - # empty means: no log, dash means: STDOUT\n"
- << "LogLM = true # log language model lookups\n"
- << "LogTM = true # log translation model lookups\n";
- */
-}
-
-void printAllTables(vcbList& eTrainVcbList, vcbList& eTestVcbList,
- vcbList& fTrainVcbList, vcbList& fTestVcbList, model1& m1) {
- cerr << "writing Final tables to Disk \n";
- string t_inv_file = Prefix + ".ti.final";
- if ( !FEWDUMPS)
- m1.getTTable().printProbTableInverse(t_inv_file.c_str(),
- m1.getEnglishVocabList(), m1.getFrenchVocabList(),
- m1.getETotalWCount(), m1.getFTotalWCount());
- t_inv_file = Prefix + ".actual.ti.final";
- if ( !FEWDUMPS)
- m1.getTTable().printProbTableInverse(t_inv_file.c_str(),
- eTrainVcbList.getVocabList(), fTrainVcbList.getVocabList(),
- m1.getETotalWCount(), m1.getFTotalWCount(), true);
-
- string perp_filename = Prefix + ".perp";
- ofstream of_perp(perp_filename.c_str());
-
- cout << "Writing PERPLEXITY report to: " << perp_filename << '\n';
- if (!of_perp) {
- cerr << "\nERROR: Cannot write to " << perp_filename <<'\n';
- exit(1);
- }
-
- if (testCorpus)
- generatePerplexityReport(trainPerp, testPerp, trainViterbiPerp,
- testViterbiPerp, of_perp, (*corpus).getTotalNoPairs1(), (*testCorpus).getTotalNoPairs1(), true);
- else
- generatePerplexityReport(trainPerp, testPerp, trainViterbiPerp,
- testViterbiPerp, of_perp, (*corpus).getTotalNoPairs1(), 0, true);
-
- string eTrainVcbFile = Prefix + ".trn.src.vcb";
- ofstream of_eTrainVcb(eTrainVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << eTrainVcbFile << '\n';
- if (!of_eTrainVcb) {
- cerr << "\nERROR: Cannot write to " << eTrainVcbFile <<'\n';
- exit(1);
- }
- eTrainVcbList.printVocabList(of_eTrainVcb) ;
-
- string fTrainVcbFile = Prefix + ".trn.trg.vcb";
- ofstream of_fTrainVcb(fTrainVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << fTrainVcbFile << '\n';
- if (!of_fTrainVcb) {
- cerr << "\nERROR: Cannot write to " << fTrainVcbFile <<'\n';
- exit(1);
- }
- fTrainVcbList.printVocabList(of_fTrainVcb) ;
-
- //print test vocabulary list
-
- string eTestVcbFile = Prefix + ".tst.src.vcb";
- ofstream of_eTestVcb(eTestVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << eTestVcbFile << '\n';
- if (!of_eTestVcb) {
- cerr << "\nERROR: Cannot write to " << eTestVcbFile <<'\n';
- exit(1);
- }
- eTestVcbList.printVocabList(of_eTestVcb) ;
-
- string fTestVcbFile = Prefix + ".tst.trg.vcb";
- ofstream of_fTestVcb(fTestVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << fTestVcbFile << '\n';
- if (!of_fTestVcb) {
- cerr << "\nERROR: Cannot write to " << fTestVcbFile <<'\n';
- exit(1);
- }
- fTestVcbList.printVocabList(of_fTestVcb) ;
- printDecoderConfigFile();
- if (testCorpus)
- printOverlapReport(m1.getTTable(), *testCorpus, eTrainVcbList,
- fTrainVcbList, eTestVcbList, fTestVcbList);
-
-}
-
-bool readNextSent(istream&is, map< pair<int,int>,char >&s, int&number) {
- string x;
- if ( !(is >> x))
- return 0;
- if (x=="SENT:")
- is >> x;
- int n=atoi(x.c_str());
- if (number==-1)
- number=n;
- else if (number!=n) {
- cerr << "ERROR: readNextSent: DIFFERENT NUMBERS: " << number << " "
- << n << '\n';
- return 0;
- }
- int nS, nP, nO;
- nS=nP=nO=0;
- while (is >> x) {
- if (x=="SENT:")
- return 1;
- int n1, n2;
- is >> n1 >> n2;
- map< pair<int,int>,char >::const_iterator i=s.find(pair<int, int>(n1,
- n2));
- if (i==s.end()||i->second=='P')
- s[pair<int,int>(n1,n2)]=x[0];
- massert(x[0]=='S'||x[0]=='P');
- nS+= (x[0]=='S');
- nP+= (x[0]=='P');
- nO+= (!(x[0]=='S'||x[0]=='P'));
- }
- return 1;
-}
-
-bool emptySent(map< pair<int,int>,char >&x) {
- x = map<pair<int,int>, char>();
- return 1;
-}
-
-void ReadAlignment(const string&x, Vector<map< pair<int,int>,char > >&a) {
- ifstream infile(x.c_str());
- a.clear();
- map< pair<int,int>,char > sent;
- int number=0;
- while (emptySent(sent) && (readNextSent(infile, sent, number))) {
- if (int(a.size())!=number)
- cerr << "ERROR: ReadAlignment: " << a.size() << " " << number
- << '\n';
- a.push_back(sent);
- number++;
- }
- cout << "Read: " << a.size() << " sentences in reference alignment."
- << '\n';
-}
-
-void initGlobals(void) {
- cerr << "DEBUG: Enter";
- NODUMPS = false;
- Prefix = Get_File_Spec();
- cerr << "DEBUG: Prefix";
- LogFilename= Prefix + ".log";
- cerr << "DEBUG: Log";
- MAX_SENTENCE_LENGTH = MAX_SENTENCE_LENGTH_ALLOWED;
-}
-
-void convert(const map< pair<int,int>,char >&reference, alignment&x) {
- int l=x.get_l();
- int m=x.get_m();
- for (map< pair<int,int>,char >::const_iterator i=reference.begin(); i
- !=reference.end(); ++i) {
- if (i->first.first+1>int(m)) {
- cerr << "ERROR m to big: " << i->first.first << " "
- << i->first.second+1 << " " << l << " " << m
- << " is wrong.\n";
- continue;
- }
- if (i->first.second+1>int(l)) {
- cerr << "ERROR l to big: " << i->first.first << " "
- << i->first.second+1 << " " << l << " " << m
- << " is wrong.\n";
- continue;
- }
- if (x(i->first.first+1)!=0)
- cerr << "ERROR: position " << i->first.first+1 << " already set\n";
- x.set(i->first.first+1, i->first.second+1);
- }
-}
-
-double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
- const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
- int&eventsMissing, int&eventsToomuch, int pair_no) {
- int err=0;
- for (unsigned int j=1; j<test.size(); j++) {
- if (test[j]>0) {
- map< pair<int,int>,char >::const_iterator i=
- reference.find(make_pair(test[j]-1, j-1));
- if (i==reference.end() ) {
- toomuch++;
- err++;
- } else {
- if ( !(i->second=='S' || i->second=='P')) {
- cerr << "ERROR: wrong symbol in reference alignment '"
- << i->second << ' ' << int(i->second) << " no:" << pair_no<< "'\n";
- }
- }
- eventsToomuch++;
- }
- }
- for (map< pair<int,int>,char >::const_iterator i=reference.begin(); i
- !=reference.end(); ++i) {
- if (i->second=='S') {
- unsigned int J=i->first.second+1;
- unsigned int I=i->first.first+1;
- if (int(J)>=int(test.size())||int(I)>int(l)||int(J)<1||int(I)<1)
- cerr
- << "ERROR: alignment outside of range in reference alignment"
- << J << " " << test.size() << " (" << I << " " << l
- << ") no:" << pair_no << '\n';
- else {
- if (test[J]!=I) {
- missing++;
- err++;
- }
- }
- eventsMissing++;
- }
- }
- if (Verbose)
- cout << err << " errors in sentence\n";
- if (eventsToomuch+eventsMissing)
- return (toomuch+missing)/(eventsToomuch+eventsMissing);
- else
- return 1.0;
-}
-
-vcbList *globeTrainVcbList, *globfTrainVcbList;
-
-double StartTraining(int&result) {
- double errors=0.0;
- Vector<WordEntry> evlist,fvlist;
- vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
- globeTrainVcbList=&eTrainVcbList;
- globfTrainVcbList=&fTrainVcbList;
-
- // What is being done here?
- string repFilename = Prefix + ".gizacfg";
- ofstream of2(repFilename.c_str());
- writeParameters(of2, getGlobalParSet(), -1) ;
- // Write another copy of configure file
-
- cout << "reading vocabulary files \n";
- eTrainVcbList.setName(SourceVocabFilename.c_str());
- fTrainVcbList.setName(TargetVocabFilename.c_str());
- eTrainVcbList.readVocabList();
- fTrainVcbList.readVocabList();
-
- // Vocabulary can be optional ?!
-
- cout << "Source vocabulary list has " << eTrainVcbList.uniqTokens()
- << " unique tokens \n";
-
- cout << "Target vocabulary list has " << fTrainVcbList.uniqTokens()
- << " unique tokens \n";
-
-
-
- corpus = new sentenceHandler(CorpusFilename.c_str(), &eTrainVcbList, &fTrainVcbList);
- vcbList eTestVcbList(eTrainVcbList); // Copied directly
- vcbList fTestVcbList(fTrainVcbList);
- // This portion of code should not be copied to model one
- // training
- if (TestCorpusFilename == "NONE")
- TestCorpusFilename = "";
- /////////////////////////// MODULE_TEST_START //////////////////
- if (TestCorpusFilename != "") {
- cout << "Test corpus will be read from: " << TestCorpusFilename << '\n';
-
- testCorpus= new sentenceHandler(
- TestCorpusFilename.c_str(),
- &eTestVcbList, &fTestVcbList);
-
- cout << " Test total # sentence pairs : " <<(*testCorpus).getTotalNoPairs1() <<" weighted:" <<(*testCorpus).getTotalNoPairs2() <<'\n';
-
- cout << "Size of the source portion of test corpus: "
- << eTestVcbList.totalVocab() << " tokens\n";
- cout << "Size of the target portion of test corpus: "
- << fTestVcbList.totalVocab() << " tokens \n";
- cout << "In source portion of the test corpus, only "
- << eTestVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "In target portion of the test corpus, only "
- << fTestVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "ratio (target/source) : " << double(fTestVcbList.totalVocab()) / eTestVcbList.totalVocab() << '\n';
- }
- cout << " Train total # sentence pairs (weighted): "
- << corpus->getTotalNoPairs2() << '\n';
- cout << "Size of source portion of the training corpus: "
- << eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2()
- << " tokens\n";
- cout << "Size of the target portion of the training corpus: "
- << fTrainVcbList.totalVocab() << " tokens \n";
- cout << "In source portion of the training corpus, only "
- << eTrainVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "In target portion of the training corpus, only "
- << fTrainVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "lambda for PP calculation in IBM-1,IBM-2,HMM:= " << double(fTrainVcbList.totalVocab()) << "/(" << eTrainVcbList.totalVocab() << "-"
- << corpus->getTotalNoPairs2() << ")=";
- LAMBDA = double(fTrainVcbList.totalVocab())
- / (eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2());
- cout << "= " << LAMBDA << '\n';
- /////////////////////////// MODULE_TEST_FINISH /////////////////
- // load dictionary
- Dictionary *dictionary;
- if (useDict)
- dictionary = new Dictionary(dictionary_Filename.c_str());
- else
- dictionary = new Dictionary("");
-
- int minIter=0;
- cerr << "Dictionary Loading complete" << endl;
-
- if (CoocurrenceFile.length()==0) {
- cerr << "ERROR: NO COOCURRENCE FILE GIVEN!\n";
- abort();
- }
-
- //ifstream coocs(CoocurrenceFile.c_str());
- tmodel<COUNT, PROB> tTable(CoocurrenceFile);
- cerr << "cooc file loading completed" << endl;
-
-
- // Need to rule out some bad logic
-
- if(restart == 1 && Model1_Iterations == 0) { // Restart on model 1 but not train on model one
- cerr << "You specified to load model 1 and train model 1 (restart == 1) but you specified zero Model 1 iteration, please revise your parameters";
- exit(1);
- }
- if(restart == 2 && Model2_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 1 and train model 2 (restart == 2) but you specified zero Model 2 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 3 && Model2_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 2 and train model 2 (restart == 3) but you specified zero Model 2 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 4 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 1 and train hmm (restart == 4) but you specified zero HMM iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 5 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 2 and train hmm (restart == 5) but you specified zero HMM iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 6 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load HMM and train hmm (restart == 6) but you specified zero HMM iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 7 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load HMM and train model 3 (restart == 7) but you specified zero Model 3 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 8 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 2 and train model 3 (restart == 8) but you specified zero Model 3 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 9 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 3 and train model 3 (restart == 9) but you specified zero Model 3 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 10 && Model4_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 3 and train model 4 (restart == 10) but you specified zero Model 4 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 11 && Model4_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 4 and train model 4 (restart == 10) but you specified zero Model 4 iteration, please revise your parameters";
- exit(1);
- }
-
- //QIN: If restart level is larger than 0, then we need to load
- if (restart > 0){
- cerr << "We are going to load previous model " << prev_t << endl;
- if(!tTable.readProbTable(prev_t.c_str())){
- cerr << "Failed reading " << prev_t << endl;
- exit(1);
- }
- }
-
-
-
- cerr << "TTable initialization OK" << endl;
- // TModel is important!
- model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
- trainPerp, *corpus, &testPerp, testCorpus, trainViterbiPerp,
- &testViterbiPerp);
- cerr << "Model one initalization OK" << endl;
- amodel<PROB> aTable(false);
-
- if (restart >2 && restart != 4 ){ // 1 is model 1, 2 is model 2 init, both just need t-table, 4 is directly train HMM from model one
- // and we do not need a model
- cerr << "We are going to load previous model from " << prev_a << endl;
- if(!aTable.readTable(prev_a.c_str())){
- cerr << "Failed reading " << prev_a << endl;
- exit(1);
- }
- }
-
- amodel<COUNT> aCountTable(false);
- model2 m2(m1, aTable, aCountTable);
- WordClasses french,english;
- hmm h(m2,english,french);
-
- bool hmmvalid = false;
-
- if (restart == 6 || restart ==7){ // If we want to initialize model 3 or continue train hmm, need to read jumps
- string al = prev_hmm + ".alpha";
- string be = prev_hmm + ".beta";
- cerr << "We are going to load previous (HMM) model from " << prev_hmm <<"," << al << "," << be << endl;
- if(!h.probs.readJumps(prev_hmm.c_str(),NULL,al.c_str(),be.c_str())){
- cerr << "Failed reading" << prev_hmm <<"," << al << "," << be << endl;
- exit(1);
- }
- hmmvalid = true;
- }else if (restart > 7){
- if (prev_hmm.length() > 0){
- string al = prev_hmm + ".alpha";
- string be = prev_hmm + ".beta";
- cerr << "We are going to load previous (HMM) model from " << prev_hmm <<"," << al << "," << be << endl;
- if(!h.probs.readJumps(prev_hmm.c_str(),NULL,al.c_str(),be.c_str())){
- cerr << "Failed reading" << prev_hmm <<"," << al << "," << be << endl ;
- cerr << "Continue without hmm" << endl;
- hmmvalid = false;
- }else
- hmmvalid = true;
- }
- }
- nmodel<PROB> nTable(m2.getNoEnglishWords()+1, MAX_FERTILITY);
- amodel<PROB> dTable(true);
-
- if(restart > 8){ // 9, 10, 11 requires ntable and d table,
- cerr << "We are going to load previous N model from " << prev_n << endl;
- if(!nTable.readNTable(prev_n.c_str())){
- cerr << "Failed reading " << prev_n << endl;
- exit(1);
- }
- cerr << "We are going to load previous D model from " << prev_d << endl;
- if(!dTable.readTable(prev_d.c_str())){
- cerr << "Failed reading " << prev_d << endl;
- exit(1);
- }
-
- }
-
-
-
- model3 m3(m2, dTable, nTable);
- if(restart > 8){
- double p0,p1;
- if (P0!=-1.0||prev_p0.length()==0) {
- p0 = P0;
- p1 = 1-P0;
- }else{
- cerr << "We are going to load previous P0 Value model from " << prev_p0 << endl;
- ifstream ifs(prev_p0.c_str());
- ifs >> p0;
- p1 = 1-p0;
- }
- m3.p0 = p0;
- m3.p1 = p1;
- }
-
- // For loading d4 table, we postpone it to model 4 iterations in the line marked with #LOADM4#
-
- if (ReadTablePrefix.length() ) {
- string number = "final";
- string tfile, afilennfile, dfile, d4file, p0file, afile, nfile; //d5file
- tfile = ReadTablePrefix + ".t3." + number;
- afile = ReadTablePrefix + ".a3." + number;
- nfile = ReadTablePrefix + ".n3." + number;
- dfile = ReadTablePrefix + ".d3." + number;
- d4file = ReadTablePrefix + ".d4." + number;
- //d5file = ReadTablePrefix + ".d5." + number ;
- p0file = ReadTablePrefix + ".p0_3." + number;
- tTable.readProbTable(tfile.c_str());
- aTable.readTable(afile.c_str());
- m3.dTable.readTable(dfile.c_str());
- m3.nTable.readNTable(nfile.c_str());
- sentPair sent;
- double p0;
- ifstream p0f(p0file.c_str());
- p0f >> p0;
- d4model d4m(MAX_SENTENCE_LENGTH,*(new WordClasses()), *(new WordClasses()));
-
- //d4m.readProbTable(d4file.c_str());
- //d5model d5m(d4m);
- //d5m.makeWordClasses(m1.Elist,m1.Flist,SourceVocabFilename+".classes",TargetVocabFilename+".classes");
- //d5m.readProbTable(d5file.c_str());
- makeSetCommand("model4smoothfactor", "0.0", getGlobalParSet(), 2);
- //makeSetCommand("model5smoothfactor","0.0",getGlobalParSet(),2);
- if (corpus||testCorpus) {
- sentenceHandler *x=corpus;
- if (x==0)
- x=testCorpus;
- cout << "Text corpus exists.\n";
- x->rewind();
- while (x&&x->getNextSentence(sent)) {
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- int l=es.size()-1;
- int m=fs.size()-1;
- transpair_model4 tm4(es, fs, m1.tTable, m2.aTable, m3.dTable,
- m3.nTable, 1-p0, p0, &d4m);
- alignment al(l, m);
- cout << "I use the alignment " << sent.sentenceNo-1 << '\n';
- //convert(ReferenceAlignment[sent.sentenceNo-1],al);
- transpair_model3 tm3(es, fs, m1.tTable, m2.aTable, m3.dTable,
- m3.nTable, 1-p0, p0, 0);
- double p=tm3.prob_of_target_and_alignment_given_source(al, 1);
- cout << "Sentence " << sent.sentenceNo << " has IBM-3 prob "
- << p << '\n';
- p=tm4.prob_of_target_and_alignment_given_source(al, 3, 1);
- cout << "Sentence " << sent.sentenceNo << " has IBM-4 prob "
- << p << '\n';
- //transpair_model5 tm5(es,fs,m1.tTable,m2.aTable,m3.dTable,m3.nTable,1-p0,p0,&d5m);
- //p=tm5.prob_of_target_and_alignment_given_source(al,3,1);
- //cout << "Sentence " << sent.sentenceNo << " has IBM-5 prob " << p << '\n';
- }
- } else {
- cout << "No corpus exists.\n";
- }
- } else {
- // initialize model1
- bool seedModel1 = false;
- if (Model1_Iterations > 0 && restart < 2) {
- if (t_Filename != "NONE" && t_Filename != "") {
- seedModel1 = true;
- m1.load_table(t_Filename.c_str());
- }
-
- if(restart ==1) seedModel1 = true;
- if(Model2_Iterations == 0 && HMM_Iterations == 0 &&
- Model3_Iterations == 0 && Model4_Iterations == 0 &&
- Model5_Iterations == 0 && dumpCount){ // OK we need to output!
- minIter=m1.em_with_tricks(Model1_Iterations, seedModel1,
- *dictionary, useDict,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString
- );
- }else{
- minIter=m1.em_with_tricks(Model1_Iterations, true,
- *dictionary, useDict);
- }
-
-
- errors=m1.errorsAL();
- }
- {
- if (Model2_Iterations > 0 && (restart < 2 || restart ==2 || restart == 3)) {
- if(restart == 2) m2.initialize_table_uniformly(*corpus);
- if(HMM_Iterations == 0 &&
- Model3_Iterations == 0 && Model4_Iterations == 0 &&
- Model5_Iterations == 0 && dumpCount){
- minIter=m2.em_with_tricks(Model2_Iterations,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString);
- }else{
- minIter=m2.em_with_tricks(Model2_Iterations);
- }
- errors=m2.errorsAL();
- }
- //cout << tTable.getProb(2, 2) << endl;
-
-
- if (HMM_Iterations > 0 && (restart < 2 || restart == 4 || restart == 5 || restart == 6)) {
- cout << "NOTE: I am doing iterations with the HMM model!\n";
-
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
- , TargetVocabClassesFilename);
- if(restart != 6) h.initialize_table_uniformly(*corpus);
-
- if(Model3_Iterations == 0 && Model4_Iterations == 0 &&
- Model5_Iterations == 0 && dumpCount){
- minIter=h.em_with_tricks(HMM_Iterations,true,
- countPrefix.length() == 0 ? NULL : countPrefix.c_str(),
- dumpCountUsingWordString, restart == 6);
- }else{
- minIter=h.em_with_tricks(HMM_Iterations,false,NULL,false,restart==6);
- }
- //multi_thread_em(HMM_Iterations, NCPUS, &h);
- errors=h.errorsAL();
- }
- if ( ((Transfer2to3 && Model2_Iterations>0)||(HMM_Iterations==0&&Model2_Iterations>0)||restart==8) && (restart!=7 && restart < 9)) {
- if (HMM_Iterations>0)
- cout << "WARNING: transfor is not needed, as results "
- "are overwritten bei transfer from HMM.\n";
- string test_alignfile = Prefix +".tst.A2to3";
- if (testCorpus)
- m2.em_loop(testPerp, *testCorpus, Transfer_Dump_Freq==1
- &&!NODUMPS, test_alignfile.c_str(),
- testViterbiPerp, true);
- if (testCorpus)
- cout << "\nTransfer: TEST CROSS-ENTROPY "
- << testPerp.cross_entropy() << " PERPLEXITY "
- << testPerp.perplexity() << "\n\n";
- if (Transfer == TRANSFER_SIMPLE)
- m3.transferSimple(*corpus, Transfer_Dump_Freq==1&&!NODUMPS,
- trainPerp, trainViterbiPerp);
- else
- m3.transfer(*corpus, Transfer_Dump_Freq==1&&!NODUMPS,
- trainPerp, trainViterbiPerp);
- errors=m3.errorsAL();
- }
- if(restart >= 7 && hmmvalid){
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
- , TargetVocabClassesFilename);
- }
- if (HMM_Iterations>0 || restart == 7)
- m3.setHMM(&h);
- else if (restart > 7 && hmmvalid){
- m3.setHMM(&h);
- }
-
- if (Model3_Iterations > 0 || Model4_Iterations > 0
- || Model5_Iterations || Model6_Iterations) {
-
- if(restart == 11){ // Need to load model 4
- if (Model5_Iterations==0 && Model6_Iterations==0 && dumpCount){
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,prev_d4.c_str(),prev_d4_2.c_str()
- ,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString); // #LOADM4#
- }else{
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,prev_d4.c_str(),prev_d4_2.c_str());
- }
- }else{
- if (Model5_Iterations==0 && Model6_Iterations==0 && dumpCount){
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,NULL,NULL
- ,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString); // #LOADM4#
- }else{
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,NULL,NULL);
- }
- }
- /*multi_thread_m34_em(m3, NCPUS, Model3_Iterations,
- Model4_Iterations);*/
- errors=m3.errorsAL();
- }
- if (FEWDUMPS||!NODUMPS) {
- printAllTables(eTrainVcbList, eTestVcbList, fTrainVcbList,
- fTestVcbList, m1);
- }
- }
- }
- result=minIter;
- return errors;
-}
-
-/*!
- Starts here
- */
-int main(int argc, char* argv[]) {
- ////////////////////////////////////////////////////////
- // Setup parameters
- ///////////////////////////////////////////////////////
- cerr << "Starting MGIZA " << endl;
- getGlobalParSet().insert(new Parameter<string>(
- "CoocurrenceFile",
- ParameterChangedFlag,
- "",
- CoocurrenceFile,
- PARLEV_SPECIAL));
- getGlobalParSet().insert(new Parameter<string>(
- "ReadTablePrefix",
- ParameterChangedFlag,
- "optimized",
- ReadTablePrefix,-1));
-
- getGlobalParSet().insert(new Parameter<string>("S",
- ParameterChangedFlag,
- "source vocabulary file name",
- SourceVocabFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "SOURCE VOCABULARY FILE",
- ParameterChangedFlag,
- "source vocabulary file name",
- SourceVocabFilename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("T",
- ParameterChangedFlag,
- "target vocabulary file name",
- TargetVocabFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "TARGET VOCABULARY FILE",
- ParameterChangedFlag,
- "target vocabulary file name",
- TargetVocabFilename,-1));
- getGlobalParSet().insert(new Parameter<string>(
- "Source Vocabulary Classes",
- ParameterChangedFlag,
- "source vocabulary classes file name",
- SourceVocabClassesFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "Target Vocabulary Classes",
- ParameterChangedFlag,
- "target vocabulary classes file name",
- TargetVocabClassesFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "C",
- ParameterChangedFlag,
- "training corpus file name",
- CorpusFilename,PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "CORPUS FILE",
- ParameterChangedFlag,
- "training corpus file name",
- CorpusFilename,-1));
- getGlobalParSet().insert(new Parameter<string>("TC",
- ParameterChangedFlag,
- "test corpus file name",
- TestCorpusFilename,
- PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "TEST CORPUS FILE",
- ParameterChangedFlag,
- "test corpus file name",
- TestCorpusFilename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("d",
- ParameterChangedFlag,
- "dictionary file name",
- dictionary_Filename,
- PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "DICTIONARY",
- ParameterChangedFlag,
- "dictionary file name",
- dictionary_Filename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("l",
- ParameterChangedFlag,
- "log file name",
- LogFilename,PARLEV_OUTPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "LOG FILE",
- ParameterChangedFlag,
- "log file name",
- LogFilename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("o",
- ParameterChangedFlag,
- "output file prefix",
- Prefix,PARLEV_OUTPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "OUTPUT FILE PREFIX",
- ParameterChangedFlag,
- "output file prefix",Prefix,-1));
-
- getGlobalParSet().insert(new Parameter<string>(
- "OUTPUT PATH",
- ParameterChangedFlag,
- "output path",
- OPath,PARLEV_OUTPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous T",
- ParameterChangedFlag,
- "The t-table of previous step",
- prev_t,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous A",
- ParameterChangedFlag,
- "The a-table of previous step",
- prev_a,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous D",
- ParameterChangedFlag,
- "The d-table of previous step",
- prev_d,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous N",
- ParameterChangedFlag,
- "The n-table of previous step",
- prev_n,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous D4",
- ParameterChangedFlag,
- "The d4-table of previous step",
- prev_d4,PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "Previous D42",
- ParameterChangedFlag,
- "The d4-table (2) of previous step",
- prev_d4_2,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous P0",
- ParameterChangedFlag,
- "The P0 previous step",
- prev_p0,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous HMM",
- ParameterChangedFlag,
- "The hmm-table of previous step",
- prev_hmm,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Count Output Prefix",
- ParameterChangedFlag,
- "The prefix for output counts",
- countPrefix,PARLEV_OUTPUT));
- // Timers
- time_t st1, fn;
- st1 = time(NULL); // starting time
-
- // Program Name
-
- string temp(argv[0]);
- Usage = temp + " <config_file> [options]\n";
-
- // At least, config file should be provided.
- if (argc < 2) {
- printHelp();
- exit(1);
- }
- cerr << "Initializing Global Paras " << endl;
- //
- initGlobals() ;
-
- cerr << "Parsing Arguments " << endl;
- //
- parseArguments(argc, argv);
-
- if (SourceVocabClassesFilename=="") {
- makeSetCommand("sourcevocabularyclasses",SourceVocabFilename+".classes",getGlobalParSet(),2);
- }
-
- if (TargetVocabClassesFilename=="") {
- makeSetCommand("targetvocabularyclasses",TargetVocabFilename+".classes",getGlobalParSet(),2);
- }
-
- // Determine number of threads
-
- if(NCPUS == 0){
- cerr << "Trying to detect number of CPUS...";
- NCPUS = boost::thread::hardware_concurrency();
- if(NCPUS==0){
- cerr << "failed, default to 2 threads" << std::endl;
- NCPUS = 2;
- }
- else{
- cerr << NCPUS << std::endl;
- }
- }
-
- cerr << "Opening Log File " << endl;
- if (Log) {
- logmsg.open(LogFilename.c_str(), ios::out);
- }
-
- cerr << "Printing parameters " << endl;
-
- printGIZAPars(cout);
-
- int a=-1;
-
- double errors=0.0;
-
- if (OldADBACKOFF!=0)
- cerr
- << "WARNING: Parameter -adBackOff does not exist further; use CompactADTable instead.\n";
-
- if (MAX_SENTENCE_LENGTH > MAX_SENTENCE_LENGTH_ALLOWED)
- cerr << "ERROR: MAX_SENTENCE_LENGTH is too big " << MAX_SENTENCE_LENGTH
- << " > " << MAX_SENTENCE_LENGTH_ALLOWED << '\n';
-
- // Actually word is done here
- errors=StartTraining(a);
-
- fn = time(NULL); // finish time
-
- cout << '\n' << "Entire Training took: " << difftime(fn, st1)
- << " seconds\n";
- cout << "Program Finished at: "<< my_ctime(&fn) << '\n';
- cout << "==========================================================\n";
- return 0;
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/Array.h b/scripts/training/MGIZA/src/mkcls/Array.h
deleted file mode 100644
index 5647fd0..0000000
--- a/scripts/training/MGIZA/src/mkcls/Array.h
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef ARRAY_H_DEFINED
-#define ARRAY_H_DEFINED
-using namespace std;
-#include "myassert.h"
-#include <algorithm>
-#include <string>
-#include <utility>
-#include <functional>
-#include "my.h"
-
-#define ARRAY_DEBUG
-
-
-template<class T> class Array
-{
- private:
- T *p;
- int realSize;
- int maxWritten;
- char a;
-
- void copy(T *a,const T *b,int n);
- void copy(T *a,T *b,int n);
- void _expand();
-
- public:
- Array()
- : p(0),realSize(0),maxWritten(-1) ,a(1)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
-#endif
- }
- Array(const Array<T> &x)
- : p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a)
- {
- copy(p,x.p,realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
-#endif
- }
- explicit Array(int n)
- : p(new T[n]),realSize(n),maxWritten(n-1),a(0)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- Array(int n,const T&_init,int _a=0)
- : p(new T[n]),realSize(n),maxWritten(n-1),a(_a)
- {
- for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
-
- ~Array()
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete [] p;
- }
-
- Array<T>& operator=(const Array<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
-
- delete [] p;
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- a = x.a;
- p = new T[realSize];
- copy(p,x.p,realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- return *this;
- }
-
- Array<T>& operator=(Array<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete [] p;
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- a = x.a;
- p = new T[realSize];
- copy(p,x.p,realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- return *this;
- }
-
- void allowAccess(int n)
- {
- while( realSize<=n )
- _expand();
- maxWritten=max(maxWritten,n);
- massert( maxWritten<realSize );
- }
- void resize(int n)
- {
- while( realSize<n )
- _expand();
- maxWritten=n-1;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until,greater<T>());
- }
- void init(int n,const T&_init,bool _a=0)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete []p;
- p=new T[n];
- realSize=n;
- a=_a;
- maxWritten=n-1;
- for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- inline int size() const
- {massert( maxWritten<realSize );
- return maxWritten+1;}
- inline int low() const
- { return 0; }
- inline int high() const
- { return maxWritten; }
- inline bool autoexpand() const
- {return a;}
- inline void autoexpand(bool autoExp)
- {a=autoExp;}
- int findMax() const;
- int findMin() const;
- const void errorAccess(int n) const;
- inline T*getPointerToData(){return p;}
-
- inline T& operator[](int n)
- {
- if( a && n==maxWritten+1 )
- allowAccess(n);
- if( n<0 || n>maxWritten )
- errorAccess(n);
- return p[n];
- }
- inline const T& operator[](int n) const
- {
- if(n<0 || n>maxWritten )
- errorAccess(n);
- return p[n];
- }
- const T&top(int n=0) const
- {return (*this)[maxWritten-n];}
- T&top(int n=0)
- {return (*this)[maxWritten-n];}
- T&push(const T&x)
- {
- (*this)[maxWritten+1]=x;
- return top();
- }
- bool writeTo(ostream&out) const
- {
- out << "Array ";
- out << size() << " ";
- out << a << endl;
- for(int iv=0;iv<=maxWritten;iv++)
- {
- writeOb(out,(*this)[iv]);
- out << endl;
- }
- return 1;
- }
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(Array): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="Array") )
- {
- cerr << "ERROR(Array): Array!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- in >> a;
- resize(biggest);
- for(int iv=0;iv<size();iv++)
- {
- readOb(in,(*this)[iv]);
- }
- return 1;
- }
-};
-
-template<class T> bool operator==(const Array<T> &x, const Array<T> &y)
-{
- if( &x == &y )
- return 1;
- else
- {
- if( y.size()!=x.size() )
- return 0;
- else
- {
- for(int iii=0;iii<x.size();iii++)
- if( !(x[iii]==y[iii]) )
- return 0;
- return 1;
- }
- }
-}
-
-template<class T> bool operator<(const Array<T> &x, const Array<T> &y)
-{
- if( &x == &y )
- return 0;
- else
- {
- if( y.size()<x.size() )
- return !(y<x);
- for(int iii=0;iii<x.size();iii++)
- {
- massert( iii!=y.size() );
- if( x[iii]<y[iii] )
- return 1;
- else if( y[iii]<x[iii] )
- return 0;
- }
- return x.size()!=y.size();
- }
-}
-
-
-template<class T> const void Array<T>:: errorAccess(int n) const
-{
- cerr << "ERROR: Access to array element " << n
- << " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
- cout << "ERROR: Access to array element " << n
- << " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
- massert(0);
-#ifndef DEBUG
- abort();
-#endif
-}
-
-template<class T> ostream& operator<<(ostream&o,const Array<T>&a)
-{
- o << "Array(" << a.size() << "," << a.autoexpand() << "){ ";
- for(int iii=0;iii<a.size();iii++)
- o << " " << iii<< ":" << a[iii]<<";";
- return o << "}\n";
-}
-
-template<class T> istream& operator>>(istream&in, Array<T>&)
-{return in;}
-
-template<class T> int Hash(const Array<T>&a)
-{
- int n=0;
- for(int iii=0;iii<a.size();iii++)
- n+=Hash(a[iii])*(iii+1);
- return n+a.size()*47;
-}
-template<class T> void Array<T>::copy(T *aa,const T *bb,int n)
-{
- for(int iii=0;iii<n;iii++)
- aa[iii]=bb[iii];
-}
-template<class T> void Array<T>::copy(T *aa,T *bb,int n)
-{
- for(int iii=0;iii<n;iii++)
- aa[iii]=bb[iii];
-}
-
-template<class T> void Array<T>::_expand()
-{
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- T *oldp=p;
- int oldsize=realSize;
- realSize=realSize*2+1;
- p=new T[realSize];
- copy(p,oldp,oldsize);
- delete [] oldp;
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
-}
-
-template<class T> int Array<T>::findMax() const
-{
- if( size()==0 )
- return -1;
- else
- {
- int maxPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[maxPos]<(*this)[iii] )
- maxPos=iii;
- return maxPos;
- }
-}
-template<class T> int Array<T>::findMin() const
-{
- if( size()==0 )
- return -1;
- else
- {
- int minPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[iii]<(*this)[minPos] )
- minPos=iii;
- return minPos;
- }
-}
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/CMakeLists.txt b/scripts/training/MGIZA/src/mkcls/CMakeLists.txt
deleted file mode 100644
index d94d92a..0000000
--- a/scripts/training/MGIZA/src/mkcls/CMakeLists.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-
-# Set output directory
-
-FIND_PACKAGE(Threads)
-
-
-SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
-SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
-
-ADD_DEFINITIONS("-DNDEBUG")
-IF (WIN32)
-
-ELSE()
-ADD_DEFINITIONS("-Wno-deprecated")
-ADD_DEFINITIONS("-Wno-write-strings")
-ENDIF()
-
-SET( MKCLS_SRC
- Array.h
- FixedArray.h
- FlexArray.h
- GDAOptimization.cpp
- GDAOptimization.h
- general.cpp
- general.h
- HCOptimization.cpp
- HCOptimization.h
- IterOptimization.cpp
- IterOptimization.h
- KategProblem.cpp
- KategProblem.h
- KategProblemKBC.cpp
- KategProblemKBC.h
- KategProblemTest.cpp
- KategProblemTest.h
- KategProblemWBC.cpp
- KategProblemWBC.h
- mkcls.cpp
- my.h
- myassert.h
- myleda.h
- MYOptimization.cpp
- MYOptimization.h
- mystl.h
- Optimization.cpp
- Optimization.h
- Problem.cpp
- Problem.h
- ProblemTest.cpp
- ProblemTest.h
- RRTOptimization.cpp
- RRTOptimization.h
- SAOptimization.cpp
- SAOptimization.h
- StatVar.cpp
- StatVar.h
- TAOptimization.cpp
- TAOptimization.h
-
- )
-
-ADD_EXECUTABLE(mkcls ${MKCLS_SRC})
-INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR} )
-INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/ )
-INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/mkcls )
-LINK_DIRECTORIES ( ${LIBRARY_OUTPUT_PATH} )
-
-
-INSTALL(TARGETS mkcls
- RUNTIME DESTINATION bin
- LIBRARY DESTINATION lib
- ARCHIVE DESTINATION lib
- )
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/FixedArray.h b/scripts/training/MGIZA/src/mkcls/FixedArray.h
deleted file mode 100644
index 39da0b1..0000000
--- a/scripts/training/MGIZA/src/mkcls/FixedArray.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#ifndef FIXARRAY_H_DEFINED
-#define FIXARRAY_H_DEFINED
-#include <iostream>
-#include <string>
-#include <functional>
-
-template<class T>
-bool writeOb(ostream&out,const T&f)
-{
- out << f << " ";
- return 1;
-}
-
-template<class T>
-bool readOb(istream&in,T&f)
-{
- in >> f;
- char c;
- in.get(c);
- massert(c==' ');
- return 1;
-}
-
-template<class T>
-bool writeOb(ostream&out,const string &s,const T&f)
-{
- out << s << " " << f << " ";
- return 1;
-}
-template<class T>
-bool readOb(istream&in,const string&s,T&f)
-{
- string ss;
- in >> ss;
- if( s!=ss )
- {
- cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
- return 0;
- }
- in >> f;
- char c;
- in.get(c);
- massert(c==' ');
- return 1;
-}
-
-template<class T> class FixedArray
-{
- private:
- void copy(T *aa,const T *bb,int nnn)
- {for(int iii=0;iii<nnn;iii++)aa[iii]=bb[iii];}
-
- public:
- T *p;
- int realSize;
- FixedArray()
- : p(0),realSize(0){}
- FixedArray(const FixedArray<T> &x)
- : p(new T[x.realSize]),realSize(x.realSize) {copy(p,x.p,realSize);}
- explicit FixedArray(int n)
- : p(new T[n]),realSize(n){}
- FixedArray(int n,const T&_init)
- : p(new T[n]),realSize(n){for(int z=0;z<n;z++)p[z]=_init;}
- FixedArray(const FixedArray&f,const T&t)
- : p(new T[f.size()+1]),realSize(f.size()+1){for(int z=0;z<f.size();z++)p[z]=f[z];p[f.size()]=t;}
- ~FixedArray()
- { delete [] p;p=0;realSize=-1;}
-
- FixedArray<T>& operator=(const FixedArray<T>&x)
- {
- if( this!= &x )
- {
- delete [] p;
- realSize = x.realSize;
- p = new T[x.realSize];
- copy(p,x.p,realSize);
- }
- return *this;
- }
- void resize(int n)
- {
- if( n<=realSize )
- shrink(n);
- else
- {
- T*np=new T[n];
- copy(np,p,realSize);
- delete []p;
- p=np;
- realSize=n;
- }
- }
- void shrink(int n)
- {
- assert(n<=realSize);
- realSize=n;
- }
- void init(int n,const T&_init)
- {
- delete []p;
- p=new T[n];
- realSize=n;
- for(int l=0;l<n;l++)p[l]=_init;
- }
- inline const T&top(int n=0) const
- {return (*this)[realSize-1-n];}
- inline int size() const
- {return realSize;}
-
- inline T*begin(){ return p; }
- inline T*end(){ return p+realSize; }
-
- inline const T*begin()const{ return p; }
- inline const T*end()const{return p+realSize;}
-
- inline int low() const
- {return 0;}
- inline int high() const
- {return realSize-1;}
- const void errorAccess(int n) const;
-
- inline T& operator[](int n)
- {
- return p[n];
- }
- inline const T& operator[](int n) const
- {
- return p[n];
- }
- bool writeTo(ostream&out) const
- {
- out << "FixedArray ";
- out << size() << " ";
- for(int a=0;a<size();a++)
- {
- writeOb(out,(*this)[a]);
- out << " ";
- }
- out << endl;
- return 1;
- }
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(FixedArray): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="FixedArray") )
- {
- cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- resize(biggest);
- for(int a=0;a<size();a++)
- readOb(in,(*this)[a]);
- return 1;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until,greater<T>());
- }
- int binary_locate(const T&t)
- {
- T*ppos=std::lower_bound(p,p+size(),t);
- int pos=ppos-p;
- if( pos>=-1&&pos<size() )
- return pos;
- else
- return -1;
- }
- int binary_search(const T&t)
- {
- T*ppos=std::lower_bound(p,p+size(),t);
- int pos=ppos-p;
- if( pos>=0&&pos<size()&& *ppos==t )
- return pos;
- else
- return -1;
- }
- typedef T* iterator;
- typedef const T* const_iterator;
-};
-
-template<class T> bool operator<(const FixedArray<T> &x, const FixedArray<T> &y)
-{
- return lexicographical_compare(x.begin(),x.end(),y.begin(),y.end());
-
-}
-
-
-template<class T> bool operator==(const FixedArray<T> &x, const FixedArray<T> &y)
-{
- if( &x == &y )return 1;
- const int s = x.size();
- if( s !=y.size() )return 0;
- for(int iii=0;iii<s;iii++)
- if( !(x.p[iii]==y.p[iii]) )
- return 0;
- return 1;
-}
-
-template<class T> int Hash(const FixedArray<T>&a)
-{
- int n=0;
- const int s=a.size();
- for(int iii=0;iii<s;iii++)
- n=13*n+Hash(a.p[iii]);
- return n;
-}
-
-template<class T> const void FixedArray<T>:: errorAccess(int n) const
-{
- massert(0);
- cerr << "ERROR: Access to array element " << n
- << " (" << realSize << "," << (void*)p << ")\n";
-}
-
-template<class T> ostream& operator<<(ostream&o,const FixedArray<T>&a)
-{
- o << "FixedArray(" << a.size() << "){ ";
- for(int iii=0;iii<a.size();iii++)
- o << " " << iii<< ":" << a[iii]<<";";
- return o << "}\n";
-}
-
-template<class T> istream& operator>>(istream&in, FixedArray<T>&)
-{ return in;}
-
-template<class T> FixedArray<T> operator+(const FixedArray<T>&a,const FixedArray<T>&b)
-{
- massert(a.size()==b.size());
- FixedArray<T> x(a.size());
- for(int iii=0;iii<a.size();iii++)
- x[iii]=a[iii]+b[iii];
- return x;
-}
-template<class T> FixedArray<T> operator|(const FixedArray<T>&aaa,const FixedArray<T>&bbb)
-{
- iassert(aaa.size()==bbb.size());
-
- FixedArray<T> xxx(aaa.size());
- for(int iii=0;iii<aaa.size();iii++)
- xxx.p[iii]=aaa.p[iii]||bbb.p[iii];
- return xxx;
-}
-
-#endif
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/FlexArray.h b/scripts/training/MGIZA/src/mkcls/FlexArray.h
deleted file mode 100644
index ede3e9e..0000000
--- a/scripts/training/MGIZA/src/mkcls/FlexArray.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef CLASS_FlexArray_defined
-#define CLASS_FlexArray_defined
-#include "FixedArray.h"
-
-template<class T>
-class FlexArray
-{
-private:
- FixedArray<T> p;
- int start,end;
-public:
- FlexArray(int _start=0,int _end=-1)
- : p(_end-_start+1),start(_start),end(_end) {}
- T&operator[](int i)
- {return p[i-start];}
- const T&operator[](int i)const
- {returnp[i-start];}
- int low()const{return start;}
- int high()const{return end;}
-};
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/GDAOptimization.cpp b/scripts/training/MGIZA/src/mkcls/GDAOptimization.cpp
deleted file mode 100644
index a9e2fa7..0000000
--- a/scripts/training/MGIZA/src/mkcls/GDAOptimization.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "GDAOptimization.h"
-#include "ProblemTest.h"
-#include <cmath>
-
-#define GDAOptimization GDAOptimization
-#define IterOptimization IterOptimization
-
-
-
-double GDAOptimization::defaultTemperatur=1e100;
-
-
-double GDAOptimization::defaultAlpha=0.001;
-
-
-
-GDAOptimization::GDAOptimization(Problem &p,int m)
-: IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
-{
-}
-
-
-GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
-: IterOptimization(p,m) ,temperatur(t) ,alpha(a)
-{
-}
-
-
-GDAOptimization::GDAOptimization(GDAOptimization &o)
-: IterOptimization(o)
-{
- temperatur = o.temperatur;
- alpha = o.alpha;
- gdaEndFlag = o.gdaEndFlag;
-}
-
-
-void GDAOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if(temperatur==1e100)
- {
- double v=problem.value();
-
-
-
-
-
- temperatur=v;
- }
- assert(alpha>=0);
-}
-
-short GDAOptimization::accept(double delta)
-{
- if( curValue + delta < temperatur )
- return 1;
- else
- return 0;
-}
-
-void GDAOptimization::abkuehlen()
-{
- double newTemperatur = temperatur - alpha*(temperatur - curValue);
- if( fabs(temperatur - newTemperatur)<1e-30 )
- gdaEndFlag=1;
- else
- gdaEndFlag=0;
- temperatur = newTemperatur;
-}
-
-short GDAOptimization::end()
-{
- return ( endFlag>0 ) && ( gdaEndFlag );
-}
-
-void GDAOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << temperatur-curValue;
-}
-
-
-
-
-double GDAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
-{
- if(typ!=1)
- {
- cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- else
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#GDA-optimizeValues: " << numParameter<<endl;
-
-
- defaultTemperatur=1e100;
-
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
- solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAlpha;
- }
- if( print )
- {
- cout << defaultAlpha <<" ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
- " Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAlpha=0.03;
- return bestPar;
- }
- return 1e100;
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/GDAOptimization.h b/scripts/training/MGIZA/src/mkcls/GDAOptimization.h
deleted file mode 100644
index 33bcec3..0000000
--- a/scripts/training/MGIZA/src/mkcls/GDAOptimization.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef GDAOPTIMIZATION
-#define GDAOPTIMIZATION
-#include "IterOptimization.h"
-
-class GDAOptimization : public IterOptimization
-{
-
- private:
- double temperatur;
- double alpha;
- short gdaEndFlag;
-
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- GDAOptimization(Problem &p,double temperatur,double alpha,
- int maxIter=-1);
-
-
- GDAOptimization(Problem &p,int maxIter=-1);
-
-
- GDAOptimization(GDAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
-
- static double defaultTemperatur;
- static double defaultAlpha;
-
-};
-#endif
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/HCOptimization.cpp b/scripts/training/MGIZA/src/mkcls/HCOptimization.cpp
deleted file mode 100644
index 0c6a729..0000000
--- a/scripts/training/MGIZA/src/mkcls/HCOptimization.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "HCOptimization.h"
-
-HCOptimization::HCOptimization(Problem &p,int m)
-: IterOptimization(p,m)
-{
- if( maxStep<=0 )
- maxStep=(int)(problem.expectedNumberOfIterations());
-}
-HCOptimization::HCOptimization(HCOptimization &o)
-: IterOptimization(o)
-{
-}
-
-
-short HCOptimization::accept(double delta)
-{
- if( delta < 0 )
- return 1;
- else
- return 0;
-}
-short HCOptimization::end()
-{
- return endFlag>0;
-}
-void HCOptimization::abkuehlen()
-{
-}
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/HCOptimization.h b/scripts/training/MGIZA/src/mkcls/HCOptimization.h
deleted file mode 100644
index ec147b2..0000000
--- a/scripts/training/MGIZA/src/mkcls/HCOptimization.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef HCOPTIMIZATION
-#define HCOPTIMIZATION
-#include "IterOptimization.h"
-
-class HCOptimization : public IterOptimization
-{
-
- protected:
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- public:
- HCOptimization(Problem &p,int maxIter=-1);
-
-
- HCOptimization(HCOptimization &o);
-
-
-};
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/IterOptimization.cpp b/scripts/training/MGIZA/src/mkcls/IterOptimization.cpp
deleted file mode 100644
index 258cb1f..0000000
--- a/scripts/training/MGIZA/src/mkcls/IterOptimization.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include "IterOptimization.h"
-#include "ProblemTest.h"
-
-ostream *GraphOutput;
-
-
-
-IterOptimization::IterOptimization(Problem& p,int m)
- : maxNonBetterIterations(0),problem(p),maxStep(m),initialisiert(0)
-{
-}
-
-
-
-IterOptimization::IterOptimization(IterOptimization& o) : Optimization(),problem(o.problem)
-{
- maxNonBetterIterations=o.maxNonBetterIterations;
- curValue = o.curValue;
- bestStep = o.bestStep;
- bestValue = o.bestValue;
- maxStep = o.maxStep;
- initialisiert = o.initialisiert;
- endFlag = o.endFlag;
- endFlag2 = o.endFlag2;
-}
-
-
-
-double IterOptimization::minimize(int steps)
-{
- if( !initialisiert )
- zInitialize();
-
- if( steps==0 )
- return curValue;
-
- int t=0;
- int every=(steps<0)?10000:(steps/1000+1);
-
- do
- {
- curStep++;
- t++;
- if(verboseMode&&(curStep%1000==0))
- {
- if(steps>0)
- cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
- << curValue << " max:" << maxStep << " " << steps << " \r";
- else
- cout << "In step:" << curStep << " currentValue: " << curValue
- << " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
- cout.flush();
- }
-
-
- ProblemChange *change= &(problem.change());
-
-
- double delta=problem.valueChange(*change);
-
-
- abkuehlen();
-
-
- if( accept(delta) )
- {
-
- problem.doChange(*change);
-
-
- curValue+=delta;
-
-
- if( curValue<bestValue-1e-10 )
- {
- bestValue=curValue;
- bestStep=curStep;
- endFlag2=endFlag=0;
- }
-
- if( verboseMode>1 )
- cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
- }
-
- if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
- endFlag=1;
- if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
- endFlag2=1;
-
-
-
- if( GraphOutput&&((curStep%every)==0) )
- {
- makeGraphOutput();
- *GraphOutput<<" "<<delta<<endl;
- }
-
- delete change;
- } while( t!=steps && (!end()) && (!problem.endCriterion()) );
-
- if( GraphOutput)
- {
- makeGraphOutput();
- *GraphOutput<<endl;
- }
- return curValue;
-}
-
-
-void IterOptimization::zInitialize()
-{
- initialisiert=1;
- bestValue=curValue=problem.value();
- maxNonBetterIterations=problem.maxNonBetterIterations();
- bestStep=curStep=0;
- endFlag2=endFlag=0;
-}
-
-
-void IterOptimization::makeGraphOutput()
-{
-
- *GraphOutput << curStep << " " <<curValue << " ";
-}
-
-
-double IterOptimizationOptimizeParameter(Problem &p,
- double ¶meter,double min,double max,
- int nRun,int nPar,int verfahren,
- double &bv)
-{
- if( nPar<=0 )
- return (max+min)/2;
-
- StatVar end1,time1,init1;
- StatVar end2,time2,init2;
- double mean1,mean2;
- double par1,par2;
-
- parameter = par1 = min + (max-min)/3;
- solveProblem(0,p,nRun,-1,verfahren,mean1,end1,time1,init1);
- cout << parameter << " " << mean1 << " " << end1.quantil(0.0) << " " << end1.quantil(1.0) << endl;
-
- parameter = par2 = min + 2*(max-min)/3;
- solveProblem(0,p,nRun,-1,verfahren,mean2,end2,time2,init2);
- cout << parameter << " " << mean2 << " " << end2.quantil(0.0) << " " << end2.quantil(1.0) << endl;
-
- double bestPar,bestVal;
- if(mean1<mean2)
- {
- bestVal = mean1;
- bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
- }
- else
- {
- bestVal = mean2;
- bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
- }
- if( mean1<bestVal&&mean1<=mean2 )
- {
- bv = mean1;
- return par1;
- }
- else if(mean2<bestVal && mean2<=mean1)
- {
- bv = mean2;
- return par2;
- }
- else
- {
- bv = bestVal;
- return bestPar;
- }
-}
diff --git a/scripts/training/MGIZA/src/mkcls/IterOptimization.h b/scripts/training/MGIZA/src/mkcls/IterOptimization.h
deleted file mode 100644
index ba39b55..0000000
--- a/scripts/training/MGIZA/src/mkcls/IterOptimization.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef ITEROPTIMIZATION
-#define ITEROPTIMIZATION
-
-#include "Optimization.h"
-
-
-
-
-
-#define ANZ_VERSCHLECHTERUNGEN 500
-
-extern ostream *GraphOutput;
-
-
-class IterOptimization : public Optimization
- {
-
-
- private:
- int maxNonBetterIterations;
-
-
- protected:
- Problem &problem;
- int curStep;
- double curValue;
- int bestStep;
- double bestValue;
- int maxStep;
- int initialisiert;
- short endFlag;
- short endFlag2;
-
-
-
-
- virtual void makeGraphOutput();
-
-
- virtual short end()=0;
-
-
- virtual void abkuehlen()=0;
-
-
- virtual short accept(double delta)=0;
-
-
- virtual void zInitialize();
-
-
- public:
- IterOptimization(Problem &p,int maxIter=-1);
-
-
- IterOptimization(IterOptimization &o);
-
-
- virtual double minimize(int steps=-1);
-
-
- inline int getCurStep();
-
-
- inline double getCurrentValue();
-
-
- inline const Problem& getProblem();
-
-
-};
-
-double IterOptimizationOptimizeParameter(Problem &p,
- double ¶meter,double min,double max,
- int nRun,int nPar,int verfahren,double &bv);
-
-inline int IterOptimization::getCurStep()
-{
- return curStep;
-};
-inline double IterOptimization::getCurrentValue()
-{
- return curValue;
-};
-inline const Problem& IterOptimization::getProblem()
-{
- return problem;
-};
-
-#endif
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblem.cpp b/scripts/training/MGIZA/src/mkcls/KategProblem.cpp
deleted file mode 100644
index 7318fb6..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblem.cpp
+++ /dev/null
@@ -1,1001 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "KategProblem.h"
-#include "KategProblemTest.h"
-
-#include "ProblemTest.h"
-
-extern double SigmaVerfaelschung;
-
-double h_table[MAX_H_TABLE],l_table[MAX_H_TABLE],hmy_table[MAX_H_TABLE],hmy_sigma;
-
-double LWRW_Faktor=0.5;
-
-static int intcompare(const void *p,const void *j)
-{
- return *(int *)p - *(int *)j;
-}
-
-KategProblem::KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
- int _nachbarschaft,int mindestAnzahl)
-: Problem(mak,aw,_initialisierung,_auswertung,_nachbarschaft),
- sigmaVerfaelschung(SigmaVerfaelschung),katWasEmpty(0),nwg(mak+2),ngw(mak+2),_katOfWord(aw,-1),words(0),kats(0),
- wordFreq(aw,mindestAnzahl),katFreq(mak+2,(_auswertung==CRITERION_MY)?SigmaVerfaelschung:0.0),
- initLike(aw,-1)
-
-{
- if( auswertung == CRITERION_MY )
- cout << "Sigma-Verfaelschung: " << sigmaVerfaelschung << endl;
- _maxComp=aw;
- _maxCompVal=mak;
- massert(katFreq.nKats>0);
- massert(mak<=aw);
-
-
- for(int i=1;i<MAX_H_TABLE;i++)
- {
- h_table[i]=i*log((double)(i));
- l_table[i]=log((double)(i));
- hmy_table[i]=i*log(verfaelsche(i,sigmaVerfaelschung));
- }
- hmy_sigma=sigmaVerfaelschung;
- l_table[0]=h_table[0]=0;
-
- if( katwahl()==K_BEST )
- _maxCompVal=1;
-
-}
-
-KategProblem::~KategProblem()
-
-{
- delete words;
- delete kats;
-}
-
-void KategProblem::_initialize(int initTyp)
-{
- _initialize(initTyp,-1);
-}
-
-void KategProblem::_initialize(int initTyp,int specialFixedWord)
-
-{
- massert(wordFreq.filled);
- initialisierung = initTyp;
- int i;
-
- for(i=0;i<katFreq.nKats;i++)
- for(int j=0;j<katFreq.nKats;j++)
- katFreq.setN(i,j,0);
-
-
-
-
- for(i=0;i<wordFreq.nWords;i++)
- {
- setKatOfWord(i,-1);
- if( strcmp(getString(i),"$")==0||strcmp(getString(i),"1$")==0||strcmp(getString(i),"2$")==0||strcmp(getString(i),"3$")==0||strcmp(getString(i),"4$")==0 )
- wordFreq.setDollar(i);
- }
- wordFreq.init(specialFixedWord);
-
-
-
-
- _maxComp=wordFreq.nTranspWords;
-
- switch(initTyp)
- {
- case INIT_OTHER:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_OTHER)\n";
- for(i=0;i<wordFreq.nWords;i++)
- fastPutWord(i,initLike[i]);
- break;
- case INIT_RAN:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_RAN)\n";
- for(i=0;i<wordFreq.nWords;i++)
- {
- if( wordFreq.minIndex[i]>0 && wordFreq.maxIndex[i]>0 )
- fastPutWord(i,wordFreq.minIndex[i]+randomInt(wordFreq.maxIndex[i]-wordFreq.minIndex[i]+1));
- else
- fastPutWord(i,2+randomInt(katFreq.nKats-2));
- }
-
-
- break;
- case INIT_AIO:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_AIO)\n";
- for(i=0;i<wordFreq.nWords;i++)
- fastPutWord(i,2);
- break;
- case INIT_FREQ:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_FREQ)\n";
- for(i=0;i<wordFreq.nWords;i++)
- {
- int to=i+2;
- if( to>=katFreq.nKats )
- to=katFreq.nKats-1;
- fastPutWord((*(wordFreq.absteigend))[i],to);
- }
- curComp=katFreq.nKats-2;
- break;
- case INIT_LWRW:
-
- {
- Array<int> markList(wordFreq.nWords,1);
- int to=2;
- int i=0;
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_LWRW)\n";
- for(to=2;to<katFreq.nKats*LWRW_Faktor;to++)
- {
- int w=(*(wordFreq.absteigend))[to-2];
- fastPutWord(w,to);
- markList[w]=0;
- }
- while(to<katFreq.nKats-1 && i<wordFreq.nWords)
- {
- int toFilled=0;
- int word=(*(wordFreq.absteigend))[i];
- if(i%2)
- {
- ManyFreq &after=wordFreq.after[word];
- for(int j=0;j<after.size();j++)
- {
- int w=after[j].w;
- if( markList[w] )
- fastPutWord(w,to),toFilled++;
- markList[w]=0;
- }
- }
- else
- {
- ManyFreq &before=wordFreq.before[word];
- for(int j=0;j<before.size();j++)
- {
- int w=before[j].w;
- if( markList[w] )
- fastPutWord(w,to),toFilled++;
- markList[w]=0;
- }
- }
- i++;
- if( toFilled>0 )
- to++;
- }
- for(i=0;i<wordFreq.nWords;i++)
- if(markList[i])
- fastPutWord(i,katFreq.nKats-1);
- }
- break;
- default:
- cerr << "Wrong _initialize in KategProblem: " << initTyp << endl;
- exit(1);
- }
-
-
-
- for(int word=0;word<wordFreq.nWords;word++)
- {
- Array<OneFreq>& aft=wordFreq.after[word];
-
- int nAft=aft.size();
-
- for(i=0;i<nAft;i++)
- katFreq.addN(katOfWord(word),katOfWord(aft[i].w),aft[i].n);
- }
-
- if(verboseMode>2)
- {
- cout << "\nInitialization of KategProblem:";
- dumpOn(cout);
- }
-}
-
-double KategProblem::valueChange(ProblemChange&c)
-
-{
- numberOfPartEvaluations++;
- KategProblemChange &k=*(KategProblemChange *)&c;
- fillNWG(k.word);
-
- return _valueChange(k);
-}
-
-
-Problem *KategProblem::makeEqualProblem()
-
-{
- KategProblem*p = new KategProblem(wordFreq.nWords,katFreq.nKats-2,initialisierung,
- auswertung,nachbarschaft);
- KategProblemWBC &w=p->wordFreq;
- for(int x=0;x<wordFreq.nWords;x++)
- {
- w.setAfterWords(x,wordFreq.after[x].size());
- w.setBeforeWords(x,wordFreq.before[x].size());
- }
- int i;
- for(i=0;i<wordFreq.nWords;i++)
- {
- for(int j=0;j<wordFreq.after[i].size();j++)
- w.setFreq(i,wordFreq.after[i][j].w,wordFreq.after[i][j].n);
- }
- w.testFull();
- w.mindestAnzahl = wordFreq.mindestAnzahl;
- if(words)
- p->words = new leda_array<string>(*words);
- for(i=0;i<wordFreq.nWords;i++)
- {
- p->setKatOfWord(i,katOfWord(i));
- p->initLike[i]=initLike[i];
- }
- p->setValuesFrom(this);
- return p;
-}
-
-double KategProblem::nicevalue(double val)
-
-{
- double v;
- if( val!=1e100)
- v=val;
- else
- v=value();
- double h=wordFreq.get_h_of_words();
- double n=wordFreq.numberOfWords();
- double k=0;
- if(auswertung == CRITERION_MY)
- k=katFreq.myCriterionTerm();
- return exp((v+h-k)/n);
-}
-
-void KategProblem::makeKats()
-
-{
- if(kats)delete kats;
- kats = new leda_array<intSet>(katFreq.nKats);
- for(int i=0;i<wordFreq.nWords;i++)
- (*kats)[katOfWord(i)].insert(i);
-}
-
-void KategProblem::dumpInfos(ostream &strm)
-
-{
- strm << ";KategProblem:";
- strm << "cats: " << katFreq.nKats-2 << " words: " << wordFreq.nWords
- << endl;
-}
-
-void KategProblem::dumpOn(ostream &strm)
-
-{
- writeClasses(_katOfWord,*this,strm);
- if(PrintBestTo2)
- {
- dumpInfos(*PrintBestTo2);
- makeKats();
- if( kats==0 )
- {
- if( words==0 )
- {
- for(int i=0;i<wordFreq.nWords;i++)
- {
- *PrintBestTo2 << i << ":" << katOfWord(i) << " ";
- }
- }
- else
- {
- for(int i=0;i<wordFreq.nWords;i++)
- *PrintBestTo2 << (*words)[i] << ":" << katOfWord(i) << " ";
- }
- }
- else
- {
- int anzkat=0;
- for(int i=0;i<katFreq.nKats;i++)
- {
- int printed=0;
- *PrintBestTo2 << i << ":";
- leda_set<int>&theSet = (*kats)[i];
- if( words==0 )
- {
- int nr=0;
- forall_set(leda_set<int>,nr,theSet)
- {
- *PrintBestTo2 << nr << ", ";
- printed=1;
- }
- }
- else
- {
- int nr=0;
- forall_set(leda_set<int>,nr,theSet)
- {
- *PrintBestTo2 << (*words)[nr]<< ",";
- printed=1;
- }
- }
- if(printed==1)anzkat++;
- *PrintBestTo2 << endl;
- }
- *PrintBestTo2 << ";I have " << anzkat << " categories used.\n";
- }
- *PrintBestTo2 << endl;
- Problem::dumpOn(*PrintBestTo2);
- }
-}
-
-
-
-
-
-
-const char *KategProblem::getString(int i)
-
-{
- if(words==0)
- return "<>";
- else
- return ((*words)[i]).c_str();
-}
-
-string KategProblem::getTheString(int i)
-{
- return (*words)[i];
-}
-
-int KategProblem::maxNonBetterIterations()
-
-{
- if(katwahl()==K_BEST)
- return wordFreq.nTranspWords;
- else
- return katFreq.nKats*wordFreq.nTranspWords;
-}
-
-int KategProblem::expectedNumberOfIterations()
-
-{
-
- if(katwahl()==K_BEST)
- return 10*wordFreq.nTranspWords;
- else
- return 13*katFreq.nKats*wordFreq.nTranspWords;
-}
-
-void KategProblem::makeTitle(char x[512])
-
-{
- char *ww;
- char *kw;
- char *in;
- switch(wortwahl())
- {
- case W_RAN:
- ww="zufaellig";
- break;
- case W_DET_DECR:
- ww="absteigend";
- break;
- case W_DET_INCR:
- ww="aufsteigend";
- break;
- default:
- cerr << "Error: unknown word selection\n";
- exit(1);
- }
- switch(katwahl())
- {
- case K_DET:
- kw="rotierend";
- break;
- case K_RAN:
- kw="zufaellig";
- break;
- case K_BEST:
- kw="best ";
- break;
- default:
- cout << "Error: unknown cagegory selection\n";
- exit(1);
- }
- switch(initialisierung)
- {
- case INIT_RAN:
- in="zufaellig ";
- break;
- case INIT_AIO:
- in="all-in-one";
- break;
- case INIT_LWRW:
- in="lwrw ";
- break;
- case INIT_FREQ:
- in="freq ";
- break;
- case INIT_OTHER:
- in="other ";
- break;
- default:
- cout << "Error: unknown initialization\n";
- exit(1);
- }
- sprintf(x,"(c:%d,w:%d(%d),ww:%s,kw:%s,in:%s)",katFreq.nKats,wordFreq.nWords,
- wordFreq.nTranspWords,ww,kw,in);
-}
-
-
-
-
-int KategProblem::_change(ProblemChange **p)
-
-{
- *p=0;
- int word=curDimension();
- switch( wortwahl() )
- {
- case W_RAN:
- word=(*(wordFreq.absteigend))[randomInt(wordFreq.nTranspWords)];
- break;
- case W_DET_DECR:
- word=(*(wordFreq.absteigend))[word];
- break;
- case W_DET_INCR:
- word=(*(wordFreq.absteigend))[wordFreq.nTranspWords-word-1];
- break;
- default:
- cerr << "Error: Unknown word selection\n";
- exit(1);
- }
-
- int kat=curDimensionVal()+2;
- switch( katwahl() )
- {
- case K_RAN:
- kat=randomInt(katFreq.nKats-2)+2;
-
- case K_DET:
-
-
- if( kat==katOfWord(word)||(katWasEmpty&&katFreq.n1(kat)==0) )
- return 0;
- else if( wordFreq.minIndex[word]>0 && wordFreq.maxIndex[word]>0 && (kat<wordFreq.minIndex[word]||kat>wordFreq.maxIndex[word]))
- {
-
- return 0;
- }
- else
- {
- KategProblemChange *c = new KategProblemChange;
- c->toKat=kat;
- c->word=word;
- c->fromKat=katOfWord(c->word);
- massert( c->toKat < katFreq.nKats );
- massert( c->fromKat < katFreq.nKats );
- massert( c->word < wordFreq.nWords );
- massert( c->toKat!=0 && c->toKat!=1 );
- massert( c->fromKat!=0 && c->fromKat!=1 );
- if(katFreq.n1(kat)==0)
- katWasEmpty=1;
- *p=c;
- return 1;
- }
- break;
- case K_BEST:
- {
- fillNWG(word);
- double smallest=1e100;
- KategProblemChange &smallestChange = *new KategProblemChange;
- short withEmpty=0;
-
-
- int startKat=2;
- int endKat=katFreq.nKats;
- if( wordFreq.minIndex[word]>0&&wordFreq.maxIndex[word]>0 )
- {
- startKat = max(2,wordFreq.minIndex[word]);
- endKat = min(katFreq.nKats,wordFreq.maxIndex[word]+1);
- }
- for(kat=startKat;kat<endKat;kat++)
- {
- if( kat!=katOfWord(word) && (withEmpty==0 || katFreq.n1(kat)
- || katFreq.n2(kat)) )
- {
- KategProblemChange c;
- c.toKat=kat;
- c.word=word;
- c.fromKat=katOfWord(word);
- double n=_valueChange(c);
- if(n<smallest)
- {
- smallest=n;
- smallestChange=c;
- }
- }
- if( katFreq.n1(kat)==0 && katFreq.n2(kat)==0 )
- withEmpty=1;
- }
- massert(smallest!=1e100);
- *p= &smallestChange;
- return 1;
- }
- break;
- default:
- cerr << "Error: Unknown category selection\n";
- exit(1);
- return 0;
- }
-}
-
-void KategProblem::_doChange(ProblemChange &c)
-
-{
- KategProblemChange &k=*(KategProblemChange *)&c;
- putWord(k.word,k.toKat);
-
-}
-
-void KategProblem::_undoChange(ProblemChange &c)
-
-{
- KategProblemChange &k=*(KategProblemChange *)&c;
- putWord(k.word,k.fromKat);
-
-}
-
-void KategProblem::incrementDirection()
-
-{
- Problem::incrementDirection();
- katWasEmpty=0;
- massert( _maxComp==wordFreq.nTranspWords );
-}
-
-double KategProblem::_value()
-
-{
-
- return katFreq.fullBewertung(auswertung);
-}
-
-
-double mkat_h_full(int n,double tf)
-{
-
-
- if( tf>0 )
- return n*log(tf);
- else
- return 0.0;
-}
-
-double mkat_h_part(int n,double cf)
-{
-
-
- if( cf>0.0 )
- return n*log(cf);
- else
- return 0.0;
-}
-
-double KategProblem::kat_h_full(int n)
-{
- return mkat_h_full(n,verfaelsche(n,sigmaVerfaelschung));
-}
-double KategProblem::kat_h_full(double n)
-{
- abort();
- return mkat_h_full((int)n,verfaelsche(n,sigmaVerfaelschung));
-}
-
-double KategProblem::kat_h_part(int n)
-{
- return mkat_h_part(n,verfaelsche(n,sigmaVerfaelschung));
-}
-double KategProblem::kat_h_part(double n)
-{
- abort();
- return mkat_h_part((int)n,verfaelsche(n,sigmaVerfaelschung));
-}
-
-
-
-
-double KategProblem::nmo_my(int i,int j)
-
-{
- FreqType n=nstrich(i,j),k=katFreq.n(i,j);
- return kat_h_full(n+k)-kat_h_full(k);
-}
-double KategProblem::nmo(int i,int j)
-
-{
- FreqType n=nstrich(i,j),k=katFreq.n(i,j);
- return kat_h(n+k)-kat_h(k);
-}
-double KategProblem::nmo_lo(int i,int j,int &e0,int &e1)
-
-{
- FreqType kij=katFreq.n(i,j);
- FreqType nij=nstrich(i,j)+kij;
- if( kij!=nij)
- {
- if( nij==0 )
- e0++;
- else if(nij==1)
- e1++;
- if( kij==0 )
- e0--;
- else if(kij==1)
- e1--;
- }
- return nij*kat_mlog(nij-1-rhoLo)-kij*kat_mlog(kij-1-rhoLo);
-}
-
-
-double KategProblem::_valueChange(KategProblemChange &k)
-
-{
- double v=0;
- int i=0;
-
- ursprung=k.fromKat;
- ziel=k.toKat;
-
- if( auswertung==CRITERION_LO )
- {
- int e0a=katFreq.eta0,e1a=katFreq.eta1;
- v-=nmo_lo(ursprung,ursprung,e0a,e1a)+nmo_lo(ziel,ziel,e0a,e1a)
- +nmo_lo(ursprung,ziel,e0a,e1a)+nmo_lo(ziel,ursprung,e0a,e1a);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_lo(ursprung,cl,e0a,e1a)+nmo_lo(ziel,cl,e0a,e1a);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_lo(cl,ursprung,e0a,e1a)+nmo_lo(cl,ziel,e0a,e1a);
- i++;
- }
-
- v+=kat_hlo(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_hlo(katFreq.n1(ursprung))
- +kat_hlo(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_hlo(katFreq.n2(ursprung))
- +kat_hlo(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_hlo(katFreq.n1(ziel))
- +kat_hlo(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_hlo(katFreq.n2(ziel));
-
- int old0=katFreq.c1_0*katFreq.nKats+katFreq.c2_0*katFreq.nKats
- -katFreq.c1_0*katFreq.c2_0;
- int nc1_0=katFreq.c1_0,nc2_0=katFreq.c2_0;
- if( wordFreq.n1(k.word)>0 && katFreq.n1(ursprung)==wordFreq.n1(k.word) )
- nc1_0++;
- if( wordFreq.n2(k.word)>0 && katFreq.n2(ursprung)==wordFreq.n2(k.word) )
- nc2_0++;
- if( wordFreq.n1(k.word)>0 && katFreq.n1(ziel)==0 ) nc1_0--;
- if( wordFreq.n2(k.word)>0 && katFreq.n2(ziel)==0 ) nc2_0--;
- int new0=nc1_0*katFreq.nKats+nc2_0*katFreq.nKats-nc1_0*nc2_0;
- v-=kat_etaFkt(e0a,e1a,new0,katFreq.nKats)
- -kat_etaFkt(katFreq.eta0,katFreq.eta1,old0,katFreq.nKats);
- vassert(NULLFLOAT(Problem::valueChange(k)-v));
- }
- else if(auswertung==CRITERION_ML)
- {
- v-=nmo(ursprung,ursprung)+nmo(ziel,ziel)
- +nmo(ursprung,ziel)+nmo(ziel,ursprung);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo(ursprung,cl)+nmo(ziel,cl);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo(cl,ursprung)+nmo(cl,ziel);
- i++;
- }
- v+=kat_h(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_h(katFreq.n1(ursprung))
- +kat_h(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_h(katFreq.n2(ursprung))
- +kat_h(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_h(katFreq.n1(ziel))
- +kat_h(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_h(katFreq.n2(ziel));
- }
- else if( auswertung==CRITERION_MY )
- {
- v-=nmo_my(ursprung,ursprung)+nmo_my(ziel,ziel)
- +nmo_my(ursprung,ziel)+nmo_my(ziel,ursprung);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_my(ursprung,cl)+nmo_my(ziel,cl);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_my(cl,ursprung)+nmo_my(cl,ziel);
- i++;
- }
- v+=kat_h_part(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_h_part(katFreq.n1(ursprung))
- +kat_h_part(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_h_part(katFreq.n2(ursprung))
- +kat_h_part(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_h_part(katFreq.n1(ziel))
- +kat_h_part(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_h_part(katFreq.n2(ziel));
- double bishZusatz = katFreq.myCriterionTerm();
- _doChange(k);
- double neuZusatz = katFreq.myCriterionTerm();
- _undoChange(k);
- if(verboseMode>2)
- cout << "ZUSATZ: " << bishZusatz << " " << neuZusatz << " " <<neuZusatz-bishZusatz<<" " << v << endl;
- v+=neuZusatz-bishZusatz;
- }
- else
- {
- cerr << "Fatal error: Unknown criterion: '"<<auswertung<<"'\n";
- }
- vassert( NULLFLOAT(Problem::valueChange(k)-v) );
- return v;
-}
-
-
-void KategProblem::fillNWG(int w)
-
-{
- if(nwgWord==w)
- return;
- else
- {
- Array<OneFreq> &after=wordFreq.after[w];
- int size=after.size(),i;
- nww=0;
- nwg.init();
- for(i=0;i<size;i++)
- {
- nwg.addFreq(katOfWord(after[i].w),after[i].n);
- if(after[i].w==w)
- nww=after[i].n;
- }
-
- Array<OneFreq> &before=wordFreq.before[w];
- size=before.size();
- ngw.init();
- for(i=0;i<size;i++)
- ngw.addFreq(katOfWord(before[i].w),before[i].n);
- nwgWord=w;
- }
-}
-
-void KategProblem::vnstrich(int i,int j)
-
-{
- cout << ".) " << katFreq.n(i,j) << " ";
- if( i==ursprung )
- cout << "a) "<<-nwg.getFreq(j) << " ";
- if( i==ziel )
- cout << "b) " <<nwg.getFreq(j) << " ";
-
- if( j==ursprung )
- cout << "c) " <<-ngw.getFreq(i) << " ";
- if( j==ziel )
- cout << "d) " <<+ngw.getFreq(i) << " " ;
-
- if( i==ursprung && j==ursprung )
- cout << "e) " <<+nww << " ";
- if( i==ziel && j==ziel )
- cout << "f) " <<+nww << " " ;
- if( i==ursprung && j==ziel )
- cout << "g) " <<-nww << " ";
- if( i==ziel && j==ursprung )
- cout << "h) " <<-nww << " ";
-}
-
-
-
-
-
-void KategProblem::fastPutWord(int word,int toKat)
-
-{
- massert(toKat>=0 && toKat<katFreq.nKats);
-
-
-
- if( wordFreq.fixedWord[word]>=0 )
- toKat=wordFreq.fixedWord[word];
- massert(katOfWord(word)==-1);
- setKatOfWord(word,toKat);
-}
-
-void KategProblem::fixInitLike()
-{
- int fixed=0,fixed2=0;
- over_arr(initLike,i)
- if(initLike[i]>=0 )
- {
- fixed++;
- if( initLike[i]>=wordFreq.minIndex[i] || initLike[i]==1 )
- wordFreq.fixedWord[i]=initLike[i];
- else
- {
- wordFreq.fixedWord[i]=wordFreq.minIndex[i]+initLike[i]-2;
- fixed2++;
- }
- initLike[i]=-1;
- }
- cout << "Fixed from file are: " << fixed << " " << fixed2 << " words.\n";
-}
-
-void KategProblem::putWord(int word,int toKat)
-
-{
- massert(toKat!=0);massert(toKat!=1);
- massert(word<wordFreq.nWords);
- massert(toKat<katFreq.nKats);
- massert(wordFreq.fixedWord[word]<0);
- int k=katOfWord(word);
- massert(k!=0&&k!=1);
- Array<OneFreq>& aft=wordFreq.after[word];
- Array<OneFreq>& bef=wordFreq.before[word];
- int nAft=aft.size();
- int nBef=bef.size();
- int i;
- if(verboseMode>4)
- cout << "putWord(" << word << "," << toKat << ")" << k << " nAft"
- << nAft << " nBef" << nBef << " k" << k << "\n";
-
- massert( k!=-1 );
- massert( k!=toKat );
-
- for(i=0;i<nAft;i++)
- {
- katFreq.addN(k,katOfWord(aft[i].w),-aft[i].n);
- if(verboseMode>4)
- cout << k << " " << katOfWord(aft[i].w) << " " << -aft[i].n << endl;
- }
- for(i=0;i<nBef;i++)
- if( bef[i].w!=word )
- {
- katFreq.addN(katOfWord(bef[i].w),k,-bef[i].n);
- if(verboseMode>4)
- cout << katOfWord(bef[i].w) << " " << k << " " << -bef[i].n << endl;
- }
-
- setKatOfWord(word,toKat);
-
- for(i=0;i<nAft;i++)
- katFreq.addN(toKat,katOfWord(aft[i].w),aft[i].n);
- for(i=0;i<nBef;i++)
- if( bef[i].w!=word )
- katFreq.addN(katOfWord(bef[i].w),toKat,bef[i].n);
-
-}
-
-
-
-
-
-
-
-
-
-
-static KategProblemChange theOneKategProblemChange;
-static int anzKategProblemChange=0;
-
-void *KategProblemChange::operator new(size_t size)
-{
- anzKategProblemChange++;
- massert(anzKategProblemChange>0);
- massert(anzKategProblemChange<2);
- if( anzKategProblemChange==1 )
- return &theOneKategProblemChange;
- else
- {
- if( verboseMode>1 )
- cout << "generate instance of KategProblemChange: " << size
- << " " << anzKategProblemChange<< endl;
- return malloc(size);
- }
-}
-void KategProblemChange::operator delete(void *ptr,size_t
-)
-{ massert(size==sizeof(KategProblemChange));
- anzKategProblemChange--;
- if( ptr!= &theOneKategProblemChange)
- free(ptr);
-}
-
-
-
-
-
-
-
-
-
-
-
-
-NWG::NWG(int n) : freq(n,0),timeOfFreq(n,0),not0(n),word(-1)
-{
- massert(n>0);
- curTime=1;
- init();
-}
-
-void NWG::init()
-{
- curTime++;
- anzNot0=0;
-}
-
-void NWG::sort()
-{
- qsort(not0.getPointerToData(),anzNot0,sizeof(int),intcompare);
- massert(anzNot0<=not0.size());
-}
-
-
-int KategProblem::maxDimension()
-{
- return _maxComp;
-}
-
-int KategProblem::maxDimensionVal()
-{
- return _maxCompVal;
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblem.h b/scripts/training/MGIZA/src/mkcls/KategProblem.h
deleted file mode 100644
index e5a5a46..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblem.h
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef KATEG_OPT_H
-#define KATEG_OPT_H
-#include <string>
-
-#include <stdlib.h>
-#include "Problem.h"
-
-extern double rhoLo;
-
-typedef int Kategory;
-typedef int Word;
-
-
-
-#ifdef FREQTYPE_DOUBLE
-typedef double FreqType;
-#else
-typedef int FreqType;
-#endif
-
-
-#include "KategProblemWBC.h"
-
-
-#include "KategProblemKBC.h"
-
-
-enum {
- INIT_RAN=1,
- INIT_AIO=2,
- INIT_LWRW=3,
- INIT_FREQ=4,
- INIT_OTHER=5
- };
-
-
-enum {
- W_RAN=(8|16),
- W_DET_DECR=(16),
- W_DET_INCR =(32)
-};
-#define CHOOSE_WORD (8|16|32)
-
-
-enum {
- K_DET=(64),
- K_RAN=(128),
- K_BEST=(64|128)
-};
-#define CHOOSE_KAT (64|128)
-
-
-enum {
- CRITERION_ML=0,
- CRITERION_LO=1,
- CRITERION_MY=2
-};
-
-
-
-class NWG
-{
- private:
- Array<FreqType> freq;
-
- Array<int> timeOfFreq;
-
-
-
-
- int curTime;
- public:
- NWG(int n);
- void init();
-
- int anzNot0;
-
-
- Array<int> not0;
-
- int word;
-
- inline void addFreq(int C,FreqType n);
-
- void sort();
-
- FreqType getFreq(int i)
- {
- if( timeOfFreq[i]==curTime )
- return freq[i];
- else
- return 0;
- };
-};
-
-inline void NWG::addFreq(int g,FreqType n)
-{
- if(timeOfFreq[g]==curTime)
- freq[g]+=n;
- else
- {
- timeOfFreq[g]=curTime;
- freq[g]=n;
- not0[anzNot0++]=g;
- }
-}
-
-
-
-struct KategProblemChange : public ProblemChange
-{
- void *operator new(size_t size);
- void operator delete(void *ptr,size_t size);
-
- int word;
- int toKat;
- int fromKat;
-};
-
-class KategProblem : public Problem
-{
- private:
- double kat_h_full(int n);
- double kat_h_full(double n);
- double kat_h_part(int n);
- double kat_h_part(double n);
- double sigmaVerfaelschung;
- short katWasEmpty;
-
-
-
- int nwgWord;
-
- NWG nwg;
- NWG ngw;
- FreqType nww;
-
- int ursprung,ziel;
-
- Array<int> _katOfWord;
-
- int _maxComp,_maxCompVal;
-
- double nmo_my(int i,int j);
- double nmo(int i,int j);
-
-
- double nmo_lo(int i,int j,int &e0,int &e1);
-
-
- void putWord(int word,int to);
-
-
- void fastPutWord(int word,int to);
-
-
- void setKatOfWord(int w,int k)
-{
- if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
- {
- cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
- }
- _katOfWord[w]=k;
- nwgWord=-1;
-};
-
-
- void fillNWG(int w);
-
-
- inline FreqType nstrich(int i,int j);
-
-
- void vnstrich(int i,int j);
-
-
-
- protected:
- virtual int _change(ProblemChange **p);
-
-
- virtual void _doChange(ProblemChange &c);
-
-
- virtual void _undoChange(ProblemChange &c);
-
-
- virtual double _value();
-
-
- double _valueChange(KategProblemChange &k);
-
-
- virtual void incrementDirection();
-
-
- virtual int maxDimensionVal(void) ;
-
-
- virtual int maxDimension(void) ;
-
-
-public:
- leda_array<string> *words;
-typedef leda_set<int> intSet;
-
-leda_array<intSet> *kats;
-
- KategProblemWBC wordFreq;
- KategProblemKBC katFreq;
-
- Array<int> initLike;
-
- KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
- int _nachbarschaft,int minw=0);
-
-
- virtual ~KategProblem();
-
-
- virtual void _initialize(int initTyp);
- virtual void _initialize(int initTyp,int specialFixedWord);
-
-
- virtual double valueChange(ProblemChange&c);
-
-
- virtual Problem *makeEqualProblem();
-
-
- virtual double nicevalue(double value=1e100);
-
-
- void makeKats();
-
-
- virtual void dumpOn(ostream &strm);
-
-
- virtual void dumpInfos(ostream &strm);
-
-
-
-
-
- inline void katwahl(int k);
-
-
- inline void wortwahl(int w);
-
-
-
-
-
- inline int katOfWord(int w);
-
-
- inline short wortwahl();
-
-
- inline short katwahl() ;
-
-
- virtual int maxNonBetterIterations();
-
-
- virtual int expectedNumberOfIterations();
-
-
- const char *getString(int i);
- string getTheString(int i);
-
-
- void makeTitle(char x[512]);
-
-
- void fixInitLike();
-
-};
-
-inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
-inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
-inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
-
-inline void KategProblem::katwahl(int k)
- {
- nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
- if(k==K_BEST)
- _maxCompVal=1;
- else
- _maxCompVal=katFreq.nKats-2;
- };
-
-inline void KategProblem::wortwahl(int w)
- {
- nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
- };
-
-
-
-inline FreqType KategProblem::nstrich(int i,int j)
-{
- FreqType n=0;
-
- if( i==ursprung )
- n-=nwg.getFreq(j);
- if( i==ziel )
- n+=nwg.getFreq(j);
-
- if( j==ursprung )
- n-=ngw.getFreq(i);
- if( j==ziel )
- n+=ngw.getFreq(i);
-
- if( i==ursprung && j==ursprung )
- n+=nww;
- if( i==ziel && j==ziel )
- n+=nww;
-
- if( i==ursprung && j==ziel )
- n-=nww;
- if( i==ziel && j==ursprung )
- n-=nww;
-
- return n;
-}
-
-
-
-
-
-#define MAX_H_TABLE 4000
-extern double h_table[],l_table[],hmy_table[],hmy_sigma;
-
-
-inline double kat_mlog(double x)
-{
- if(x<=1e-9)
- return 0;
- else
- return log(x);
-}
-
-
-inline double kat_mlog(int s)
-{
- if(s<=0)
- return 0;
- else if( s<MAX_H_TABLE )
- {
- massert( s==0 || l_table[s]==log(s) );
- return l_table[s];
- }
- else
- return log((double)(s));
-}
-
-
-
-inline double kat_hlo(int n)
-{
- return n*kat_mlog(n-1);
-}
-
-inline double kat_hlo(double n)
-{
- return n*kat_mlog(n-1);
-}
-
-
-inline double kat_h(int n)
-{
- massert(n>=-1);
- if(n<=0)
- return 0;
- else
- if(n<MAX_H_TABLE)
- {
- massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
- return h_table[n];
- }
- else
- return n*log((double)(n));
-}
-inline double kat_h(double n)
-{
- if(n<=1e-9)
- return 0;
- else
- return n*log(n);
-}
-
-
-inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
-{
- int e0 = _e0 - immer0;
- int ePlus = cats*cats - _e0;
- if(cats*cats-e0>1)
- return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
- else
- return 0;
-}
-
-double mkat_h_full(int n,double tf);
-double mkat_h_part(int n,double cf);
-
-int Hash(const string& s);
-
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblemKBC.cpp b/scripts/training/MGIZA/src/mkcls/KategProblemKBC.cpp
deleted file mode 100644
index 10a10d9..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblemKBC.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include <stdlib.h>
-#include <math.h>
-#include "KategProblem.h"
-
-#ifdef WIN32
-#include <boost\math\special_functions\erf.hpp>
-using namespace boost::math;
-#endif
-
-
-double rhoLo=0.75;
-#define MAX_VERFAELSCHUNG 5000
-double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0;
-double verfaelsche(int a,double b)
-{
-
- if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG )
- {
-
- massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
- return verfTab[a];
- }
- else
- {
- double x = b*(erf(10000.0) - erf(a/b))/2+a;
- return x;
- }
-}
-double verfaelsche(double,double b)
-{
- abort();
- return b;
-}
-
-KategProblemKBC::KategProblemKBC(int s,double sv) :
- _n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0),
- _nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0),
- eta0(s*s),eta1(0),c1_0(s),c2_0(s),
- _bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s)
-
-{
- verfInit0=0.0;
- int i;
- if( withVerfaelschung )
- {
- verfInit0=verfaelsche(0,sv);
- cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
- }
- for(i=0;i<s;i++)
- {
- _n[i].init(s,0);
- _nverf[i].init(s,verfInit0);
- _n1verf[i]=_n2verf[i]=verfInit0;
- _bigramVerfSum+=verfInit0*s;
- _unigramVerfSum1+=verfInit0;
- _unigramVerfSum2+=verfInit0;
- }
- if( withVerfaelschung )
- {
- cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
- }
- verfTabSigma=sigmaVerfaelschung;
-
-
-
-}
-
-void KategProblemKBC::setN(int w1,int w2, FreqType n)
-
-{
- addN(w1,w2,-_n[w1][w2]);
- addN(w1,w2,n);
-}
-
-
-double KategProblemKBC::fullBewertung(int auswertung)
-{
-
- double bewertung=0;
- int c1,c2;
-
-
- switch( auswertung )
- {
- case CRITERION_ML:
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=kat_h(_n[c1][c2]);
- bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
- }
- break;
- case CRITERION_MY:
- {
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
- bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
- }
- double u1=_unigramVerfSum1-verfInit0*c1_0;
- double u2=_unigramVerfSum2-verfInit0*c2_0;
- double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
- if( verboseMode>1 )
- {
- cout << "CRITERION_MY: " << bewertung << endl;
- cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
- << "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
- << "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
- }
- if(b>0.000001)
- {
-
-
- if(verboseMode>1 )
- cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
- bewertung -= _nWords*log( u1 * u2 / b );
- if(verboseMode>1)
- cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
- }
- else
- cout << "B zu klein " << b << endl;
- }
- break;
- case CRITERION_LO:
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
- bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
- }
- bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
- break;
- default:
- cerr << "Error: wrong criterion " << auswertung << endl;
- exit(1);
- }
- return bewertung;
-}
-
-double KategProblemKBC::myCriterionTerm()
-{
- iassert( withVerfaelschung );
- double r;
- double u1=_unigramVerfSum1-verfInit0*c1_0;
- double u2=_unigramVerfSum2-verfInit0*c2_0;
- double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
-
-
- if( verboseMode>1 )
- {
- cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
- cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
- cout << "0: "<<c1_0 << endl;
- }
- r = _nWords*log( u1 * u2 / b );
-
- return -r;
-}
-
-
-
-
-double KategProblemKBC::bigramVerfSum()
-{
- double sum=0;
- for(int c1=0;c1<nKats;c1++)
- for(int c2=0;c2<nKats;c2++)
- sum+=nverf(c1,c2);
- cout << "BIGRAMVERFSUM: " << sum << endl;
- return sum;
-}
-
-double KategProblemKBC::unigramVerfSum1()
-{
- double sum=0;
- for(int c1=0;c1<nKats;c1++)
- sum+=n1verf(c1);
- cout << "UNIGRAMVERFSUM1: " << sum << endl;
- return sum;
-}
-
-double KategProblemKBC::unigramVerfSum2()
-{
- double sum=0;
- for(int c1=0;c1<nKats;c1++)
- sum+=n2verf(c1);
- cout << "UNIGRAMVERFSUM2: " << sum << endl;
- return sum;
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblemKBC.h b/scripts/training/MGIZA/src/mkcls/KategProblemKBC.h
deleted file mode 100644
index 4bac62a..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblemKBC.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef KATEGPROBLEMKBC_H
-#define KATEGPROBLEMKBC_H
-
-typedef Array<FreqType> FreqArray;
-typedef Array<double> FreqArrayReal;
-
-
-double verfaelsche(int a,double b);
-double verfaelsche(double a,double b);
-
-class KategProblemKBC
-
-
-{
- friend class KategProblem;
-
- private:
- Array<FreqArray> _n;
- Array<FreqType> _n1;
-
- Array<FreqType> _n2;
-
-
- double sigmaVerfaelschung;
- short withVerfaelschung;
-
- Array<FreqArrayReal> _nverf;
- Array<double> _n1verf;
- Array<double> _n2verf;
- FreqType _nWords;
-
- protected:
- int eta0;
- int eta1;
- int c1_0;
- int c2_0;
- double _bigramVerfSum;
- double _unigramVerfSum1;
- double _unigramVerfSum2;
- double verfInit0;
-
- public:
- int nKats;
-
- KategProblemKBC(int nKats,double sv);
-
-
- double fullBewertung(int auswertung);
-
-
- FreqType n(int w1,int w2) { return _n[w1][w2]; };
-
-
- FreqType n1(int w) { return _n1[w];};
-
-
- FreqType n2(int w) { return _n2[w];};
-
-
- double bigramVerfSum();
- double unigramVerfSum1();
- double unigramVerfSum2();
-
- double nverf(int w1,int w2) { return _nverf[w1][w2]; }
-
- double n1verf(int w) { return _n1verf[w]; };
-
- double n2verf(int w) { return _n2verf[w]; };
-
- inline void addN(int w1,int w2, FreqType n);
-
-
- void setN(int w1,int w2, FreqType n);
-
-
- double myCriterionTerm();
-
-};
-
-inline void KategProblemKBC::addN(int w1,int w2, FreqType n)
-{
- if(n!=0)
- {
- FreqType &s= _n[w1][w2];
- if(s==0)
- eta0--;
- else if(s==1)
- eta1--;
- if(_n1[w1]==0)
- c1_0--;
- if(_n2[w2]==0)
- c2_0--;
-
- if(withVerfaelschung)
- {
- double verfOld=verfaelsche(s,sigmaVerfaelschung);
- double verfNew=verfaelsche(s+n,sigmaVerfaelschung);
- double verfOld1=verfaelsche(_n1[w1],sigmaVerfaelschung);
- assert(verfOld1==_n1verf[w1]);
- double verfNew1=verfaelsche(_n1[w1]+n,sigmaVerfaelschung);
- double verfOld2=verfaelsche(_n2[w2],sigmaVerfaelschung);
- assert(verfOld2==_n2verf[w2]);
- double verfNew2=verfaelsche(_n2[w2]+n,sigmaVerfaelschung);
- _n1verf[w1]=verfNew1;
- _unigramVerfSum1+=verfNew1-verfOld1;
- _n2verf[w2]=verfNew2;
- _unigramVerfSum2+=verfNew2-verfOld2;
- _nverf[w1][w2]=verfNew;
- _bigramVerfSum+=verfNew-verfOld;
- _nWords+=n;
- }
- s+=n;_n1[w1]+=n;_n2[w2]+=n;
-
- assert(_n[w1][w2]>=0);
- assert(_n1[w1]>=0);
- assert(_n2[w2]>=0);
-
- if(s==0)
- eta0++;
- else if(s==1)
- eta1++;
- if(_n1[w1]==0)
- c1_0++;
- if(_n2[w2]==0)
- c2_0++;
- }
-};
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblemTest.cpp b/scripts/training/MGIZA/src/mkcls/KategProblemTest.cpp
deleted file mode 100644
index 8c76ce5..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblemTest.cpp
+++ /dev/null
@@ -1,700 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "KategProblemTest.h"
-
-#include "ProblemTest.h"
-#include "HCOptimization.h"
-#include "TAOptimization.h"
-#include "RRTOptimization.h"
-#include "GDAOptimization.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include <strstream>
-
-typedef pair<string,string> PSS;
-
-#define NEW_SENTENCE_END "mkcls-mapped-dollar-symbol-$"
-
-#ifdef NeXT
-char *strdup(char *a)
-{
- char *p = (char *)malloc(strlen(a)+1);
- strcpy(p,a);
- return p;
-}
-
-#endif
-
-
-void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to)
-{
- for(int i=0;i<katOfWord.size();i++)
- {
- if( strcmp(problem.getString(i),"$") )
- if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
- to << "$" << "\t" << katOfWord[i] << endl;
- else
- to << problem.getString(i) << "\t" << katOfWord[i] << endl;
- }
-}
-
-
-void mysplit(const string &s,string &s1,string &s2)
-{
- unsigned int i=0;
- for(;i<s.length();i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
- s1=s.substr(0,i);
- for(;i<s.length();i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
- s2=s.substr(i,s.length()-i);
-
- iassert(s1.size());
- iassert(s2.size());
-}
-
-
-
-int fromCatFile(KategProblem *p,const char *fname,bool verb)
-{
- leda_h_array<string,int> translation(-1);
- int maxCat=2;
- ifstream in(fname);
- if(!in)
- {
- cerr << "Error: File '" << fname << "' cannot be opened.\n";
- exit(1);
- }
- for(int i=0;i<p->wordFreq.nWords;i++)
- (p->initLike)[i]= -1;
-
-
- translation["1"]=1;
- translation["0"]=0;
-
-
- string s;
- while( getline(in,s) )
- {
- string str,categ;
- mysplit(s,str,categ);
- int i=p->words->binary_locate(str);
- if(i>=0 && (*(p->words))[i]==str )
- {
-
- if( translation[categ]==-1 )
- translation[categ]=maxCat++;
- int cat=translation[categ];
- if( (p->initLike)[i]!= -1 )
- cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
- (p->initLike)[i]=cat;
- }
- else
- cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
- }
-
- if( verboseMode )
- cout << "We have " << maxCat << " read non-empty categories"
- " (with words from the corpus).\n";
-
- if(maxCat>p->katFreq.nKats)
- {
- cerr << "Error: Not enough categories reserved (only "
- << p->katFreq.nKats << ", but i need " << maxCat << ").\n";
- exit(1);
- }
-
-
- int i=p->words->binary_locate("$");
- if( i>=0 && (*(p->words))[i]=="$" )
- (p->initLike)[i]=0;
- else
- if( verboseMode )
- cerr << "Warning: No '$' in vocabulary!\n";
-
-
- int errors=0;
- for(i=0;i<p->wordFreq.nWords;i++)
- if((p->initLike)[i]== -1 )
- {
- if( verb ) cerr << "Error: I don't know the category of word " << i
- << " (" << (*(p->words))[i] << ") " << ".\n";
- errors=1;
- }
- return errors;
-}
-
-
-
-KategProblem *makeKategProblem(const leda_h_array<PSS,FreqType>&cTbl,const leda_set<string>&setVokabular, int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency)
-{
-
- int nwrd=0;
- leda_array<string>&sVok = *new leda_array<string>(setVokabular.size());
- string s;
- unsigned int ctr=0;
- forall_set(leda_set<string>,s,setVokabular)
- {
- if( verboseMode>2 )
- cout << "mkcls:Wort " << ctr << " " << s << endl;
- sVok[ctr++]=s;
- }
- for(unsigned int z=0;z<ctr-1;z++)
- iassert( sVok[z]<sVok[z+1] );
- sVok.sort();
-
- if( verboseMode>2 )
- cout << "*****Vocabulary: " << sVok;
-
- unsigned int vokSize=sVok.size();
- massert(vokSize==ctr); massert(vokSize==setVokabular.size());
- if(verboseMode)
- {cout << "Size of vocabulary: " << vokSize << "\n";cout.flush();}
-
- KategProblem *k = new KategProblem(vokSize,maxClass,initialisierung,
- auswertung,nachbarschaft,minWordFrequency);
- KategProblemWBC &w=k->wordFreq;
- k->words=&sVok;
-
- Array<int> after(vokSize,0);
- Array<int> before(vokSize,0);
-
-
- nwrd=0;
- {
- PSS s;
- forall_defined_h2(PSS,FreqType,s,cTbl)
- {
- const string&ss1=s.first;
- const string&ss2=s.second;
- if( ss2.length()&&(ss1!="$" || ss2!="$") )
- {
- int i1=sVok.binary_search(ss1);
- int i2=sVok.binary_search(ss2);
- iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
- after[i1]++;
- before[i2]++;
- }
- if( verboseMode&&((nwrd++)%10000==0) )
- {cout<<"Statistiken-1 " << nwrd<< ". \r";cout.flush();}
- }
- }
-
- for(unsigned int i=0;i<vokSize;i++)
- {
- w.setAfterWords(i,after[i]);
- w.setBeforeWords(i,before[i]);
- }
-
-
- {
- nwrd=0;
- PSS s;
- forall_defined_h2(PSS,FreqType,s,cTbl)
- {
- const string&ss1=s.first;
- const string&ss2=s.second;
- FreqType p=cTbl[s];
- if( ss2.length()&&(ss1!="$" || ss2!="$") )
- {
- int i1=sVok.binary_search(ss1);
- int i2=sVok.binary_search(ss2);
- iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
- w.setFreq(i1,i2,p);
- if( verboseMode>2 )
- cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << " "
- << ss2 << ":" << i2 << " " << p << endl;
- }
- if( verboseMode&&((nwrd++)%10000==0) )
- {cout<<"Statistiken-2 " <<nwrd<< ". \r";cout.flush();}
- }
- }
-
- w.testFull();
- if(verboseMode){cout << "Datenintegritaet getestet.\n";cout.flush();}
- return k;
-}
-
-KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency)
-{
- ifstream file(str);
- if(!file)return 0;
- leda_set<string> setVokabular;
- leda_h_array<PSS,FreqType> cTbl;
- double c=0;
- if( verboseMode )cout << "NGRFILE: " << str << endl;
- string s1,s2;
- while(file >> c >> s1 >> s2)
- {
- if( s1.length()==0||s2.length()==0 )
- {
- cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
- return 0;
- }
- if( c==0 )
- {
- cerr << "Count ist 0 " << s1 << " " << s2 << endl;
- return 0;
- }
- cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
- setVokabular.insert(s1);
- setVokabular.insert(s2);
- if( verboseMode>1 )
- cout << "R: " << s1 << " " << s2 << " " << c << endl;
- c=0;
- }
-
- return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
-}
-
-
-
-
-
-
-
-
-KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency)
-{
- string oldText,text,line;
- ifstream f(str);
- if( !f )
- {
- cerr << "ERROR: can not open file " << str << ".\n";
- return 0;
- }
-
- leda_set<string> setVokabular;
- leda_h_array<PSS,FreqType> cTbl(0);
- oldText="$";
- while(1)
- {
- getline(f,line);
- if(f.fail() && !f.bad() && !f.eof())
- {
- cerr << "WARNING: strange characters in stream (getline) " << endl;f.clear();
- }
- if(!f)break;
-
- istrstream f2(line.c_str());
- while( 1 )
- {
- f2 >> text;
- if(f2.fail() && !f2.bad() && !f2.eof())
- {
- cerr << "WARNING: strange characters in stream (>>) !\n";
- f2.clear(ios::failbit);
- }
- if(!f2){break;}
-
-
-
-
-
-
- if( text == "$" )
- text = "mkcls-mapped-dollar-symbol-$";
- if( !setVokabular.member(text) )setVokabular.insert(text);
- cTbl[pair<string,string>(oldText,text)]++;
- oldText=text;
- }
- text="$";
- if( !setVokabular.member(text) )setVokabular.insert(text);
- cTbl[pair<string,string>(oldText,text)]++;
- oldText=text;
- }
- return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
-}
-
-
-
-
-
-void KategProblemSetParameters(KategProblem &p)
-{
- if( p.katwahl()==K_BEST )
- {
- TAOptimization::defaultAnnRate=0.7;
- RRTOptimization::defaultAnnRate=0.95;
- GDAOptimization::defaultAlpha=0.05;
- if( verboseMode )
- cout << "Parameter-setting like W-DET-BEST\n";
- }
- else
- {
- TAOptimization::defaultAnnRate=0.4;
- RRTOptimization::defaultAnnRate=0.6;
- GDAOptimization::defaultAlpha=0.0125;
- if( verboseMode )
- cout << "Parameter-setting like W-DET-DET\n";
- }
-}
-
-
-
-
-KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
- int auswertung,int nachbarschaft,float relInit)
-{
- KategProblem &k=
- *new KategProblem(ANZ_WORD,ANZ_CLS,initValue,auswertung,nachbarschaft);
- KategProblemWBC &w=k.wordFreq;
- Array<int> after(ANZ_WORD,0);
- Array<int> before(ANZ_WORD,0);
- Array<FreqArray> twoD(ANZ_WORD);
- int i;
- for(i=0;i<ANZ_WORD;i++) twoD[i].init(ANZ_WORD,0);
-
- for(i=0;i<ANZ_WORD;i++)
- {
- massert(after[i]==0);
- massert(before[i]==0);
- for(int j=0;j<ANZ_WORD;j++)
- {
- massert(twoD[i][j]==0);
- }
- }
- for(i=0;i<ANZ_WORD*ANZ_WORD*relInit;i++)
- {
- int x=randomInt(ANZ_WORD);
- int y=randomInt(ANZ_WORD);
- if(twoD[x][y]==0)
- {
- after[x]++;
- before[y]++;
- }
- twoD[x][y]+=randomInt(10)+1;
- }
- for(i=0;i<ANZ_WORD;i++)
- {
- w.setAfterWords(i,after[i]);
- w.setBeforeWords(i,before[i]);
- }
-
- for(i=0;i<ANZ_WORD;i++)
- {
- for(int j=0;j<ANZ_WORD;j++)
- if( twoD[i][j] )
- w.setFreq(i,j,twoD[i][j]);
- }
- w.testFull();
- return k;
-}
-
-
-
-
-char *makeTitle(KategProblem &problem,int verfahren)
-{
- char x[1024];
- switch(verfahren)
- {
- case HC_OPT:
- strcpy(x,"HC ");
- break;
- case SA_OPT:
- strcpy(x,"SA ");
- break;
- case TA_OPT:
- strcpy(x,"TA ");
- break;
- case GDA_OPT:
- strcpy(x,"GDA ");
- break;
- case RRT_OPT:
- strcpy(x,"RRT ");
- break;
- }
- problem.makeTitle(x+strlen(x));
- return strdup(x);
-}
-
-
-
-
-#define MAX_MULTIPLE 10
-
-Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
-int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
-int anzIter,int verfahren)
-{
- massert(anzprob>1);
- massert(probs[0]->wordFreq.mindestAnzahl<=1);
- KategProblem *p0=probs[0];
-
- int nWords=p0->wordFreq.nWords;
- int nKats=p0->katFreq.nKats;
- int minimumNumberOfWords = max(1,int(nWords*0.95));
-
- int indexOfDurchschnitt;
- Array<int> newWords(nWords);
- int useAnzprob=anzprob;
- do
- {
- int w,k;
- indexOfDurchschnitt=0;
- for(w=0;w<nWords;w++)
- newWords[w]=-1;
- for(k=0;k<useAnzprob;k++)
- {
- massert(probs[k]->wordFreq.nWords==nWords);
- probs[k]->makeKats();
- }
-
- for(w=0;w<nWords;w++)
- {
- if( newWords[w]==-1 )
- {
-
-
-
- leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
- for(k=1;k<useAnzprob;k++)
- durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
-
-
- int _anzInDurchschnitt=0;
- int nr=0;
- forall_set(leda_set<int>,nr,durchschnitt)
- {
- _anzInDurchschnitt++;
- newWords[nr]=indexOfDurchschnitt;
- }
- if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
- {
- cout << "- (";
- forall_set(leda_set<int>,nr,durchschnitt)
- {
- cout << p0->getString(nr);
- if( p0->wordFreq.n1(nr)==1 )
- cout << "* ";
- else
- cout << " ";
- }
- cout << ")\n";
- }
-
-
-
-
- for(k=0;k<useAnzprob;k++)
- {
- durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
- }
- indexOfDurchschnitt++;
- }
- }
-
- if(indexOfDurchschnitt>=minimumNumberOfWords)
- {
- if(useAnzprob==1)
- {
- cout << "useAnzProb==1 => mysterious.\n";
- break;
- }
- useAnzprob--;
- }
- }
- while(indexOfDurchschnitt>=minimumNumberOfWords);
-
-
- Array<KategProblem *> &neu=*new Array<KategProblem *>(MAX_MULTIPLE*anzprob,(KategProblem *)0);
- qsort(probs.getPointerToData(),useAnzprob,sizeof(KategProblem *),compareProblem);
- massert(useAnzprob<=probs.size());
- double startTime=clockSec();
- int i, numberOfNew;
- for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
- || (numberOfNew < anzprob) ; numberOfNew++)
- {
- int w;
- if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
- break;
- KategProblem *p
- = neu[numberOfNew]
- = new KategProblem(indexOfDurchschnitt,nKats-2,
- p0->initialisierung,p0->auswertung,p0->nachbarschaft);
-
- for(w=0;w<indexOfDurchschnitt;w++)
- {
- p->wordFreq.setAfterWords(w,5);
- p->wordFreq.setBeforeWords(w,5);
- }
- for(w=0;w<nWords;w++)
- {
- Array<OneFreq> &after=p0->wordFreq.after[w];
- int size=after.size();
- for(i=0;i<size;i++)
- p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
- }
- p->wordFreq.testFull(1);
-
-
-
-
-
-
- p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
- double w1=0.0,w2=0.0;
- if(numberOfNew<useAnzprob)
- {
-
- for(i=0;i<nWords;i++)
- (p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
- p->_initialize(5);
- HCOptimization hc(*p,-1);
- if(verboseMode)
- {
- w1=p->nicevalue();
- cout << "from old category system:" << w1 << endl;
- }
- hc.minimize(-1);
- if(verboseMode)
- {
- w2=p->nicevalue();
- if(w2<w1)
- cout << "improvement: " << w1-w2 << endl;
- }
- }
- else
- {
- p->_initialize(1);
- double mean;
- StatVar end,laufzeit,start;
- solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
- w2=p->value();
- if(verboseMode)
- cout << "new category system: " << w2 << " (" << p->nicevalue()
- << ") Zeit: " << clockSec() << "\n";
- }
- }
- int p;
- for(p=0;p<probs.size();p++)
- {
- if( probs[p] )
- delete probs[p];
- }
- qsort(neu.getPointerToData(),numberOfNew,sizeof(Problem *),compareProblem);
- massert(numberOfNew<=neu.size());
- if( verboseMode )
- cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
- << " words. costs: " << neu[0]->value() << " "
- << neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
- << clockSec() << endl;
- if( indexOfDurchschnitt<=nKats
- || (clockSec()>maxClock&&maxClock) )
- {
- if( clockSec()>maxClock&&maxClock )
- cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
- for(i=0;i<nWords;i++)
- katOfWord[i]=neu[0]->katOfWord(newWords[i]);
- return neu;
- }
- else
- {
- Array<Kategory> &newKatOfWord=
- *(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
- Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
- maxClock,newKatOfWord,
- anzIter+1,verfahren);
- for(i=0;i<nWords;i++)
- katOfWord[i]=newKatOfWord[newWords[i]];
- return erg;
- }
-}
-
-
-
-
-KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
- double clockForOneRed,double maxClock,int verfahren)
-{
- Array<Kategory> katOfWord(p.wordFreq.nWords,-1);
- int startN;
- if( clockForOneRed<=0 )
- startN=firstN;
- else
- startN=1000;
- Array<KategProblem *> probs(startN);
- double val1=0.0,val2=0.0;
- double endTime=-1;
-
- double startTime=clockSec();
- int i;
- for(i=0;i<startN;i++)
- {
- StatVar end,laufzeit,start;
- double mean;
- probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
- solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
- if( i==minN-1 )
- endTime = clockSec();
- if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
- break;
- }
- if( endTime<0 )
- endTime=clockSec();
- massert(i>=firstN);
-
- qsort(probs.getPointerToData(),i,sizeof(KategProblem *),compareProblem);
- massert(i<=probs.size());
- if( clockForOneRed<=0 )
- {
- clockForOneRed=endTime-startTime;
- if( verboseMode )
- cout << "time for one reduction: " << clockForOneRed << endl;
- }
- _izrOptimization(probs,minN,clockForOneRed,maxClock,katOfWord,0,verfahren);
-
- KategProblem *n=(KategProblem *)(p.makeEqualProblem());
- n->initLike= katOfWord;
- n->_initialize(5);
- if( verboseMode )
- val1=n->value();
- HCOptimization hc(*n,-1);
- hc.minimize(-1);
- val2=n->value();
- if( verboseMode )
- cout << "last improvement: " << val2-val1 << "\n";
- cout << "final costs: " << val2 << " " << n->nicevalue() << endl;
- if(PrintBestTo)
- n->dumpOn(*PrintBestTo);
- return n;
-}
-
-
-
-
-
-
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblemTest.h b/scripts/training/MGIZA/src/mkcls/KategProblemTest.h
deleted file mode 100644
index 7767b7d..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblemTest.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#include "KategProblem.h"
-
-
-KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initialisierung,
- int auswertung,int nachbarschaft,float relInit=0.1);
-
-
-
-KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency);
-
-
-KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency);
-
-void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to);
-
-
-
-int fromCatFile(KategProblem *p,const char *s,bool verb=1);
-
-
-
-KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
-double clockForOneRed,double maxClock,int verfahren);
-
-
-
-void KategProblemSetParameters(KategProblem &p);
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblemWBC.cpp b/scripts/training/MGIZA/src/mkcls/KategProblemWBC.cpp
deleted file mode 100644
index 1a0d439..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblemWBC.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include <stdlib.h>
-#include "KategProblem.h"
-
-static int oneFreqCompareSteigend(const void *p,const void *j)
-{
-#ifdef FREQTYPE_DOUBLE
- if( (((OneFreq *)p)->n < ((OneFreq *)j)->n) )
- return -1;
- if( (((OneFreq *)p)->n > ((OneFreq *)j)->n) )
- return +1;
- else
- return 0;
-#else
- return ((OneFreq *)p)->n - ((OneFreq *)j)->n;
-#endif
-}
-static int oneFreqCompareFallend(const void *p,const void *j)
-{
-#ifdef FREQTYPE_DOUBLE
- if( (((OneFreq *)p)->n > ((OneFreq *)j)->n) )
- return -1;
- if( (((OneFreq *)p)->n < ((OneFreq *)j)->n) )
- return +1;
- else
- return 0;
-#else
- return -((OneFreq *)p)->n + ((OneFreq *)j)->n;
-#endif
-}
-
-
-KategProblemWBC::KategProblemWBC(int n,int minw)
-: _n1(n,0),_n2(n,0),with_h_of_words(0),afterFilled(n,0),beforeFilled(n,0),filled(0),fixedWord(n,-1),absteigend(0),nWords(n),nTranspWords(0),
- mindestAnzahl(minw),after(n),before(n),minIndex(n,-1),maxIndex(n,-1)
-
-{
-}
-
-KategProblemWBC::~KategProblemWBC()
-
-{
- massert( after.size()==nWords);
- if( absteigend )
- delete absteigend;
-}
-
-void KategProblemWBC::init(int specialFixedWord)
-{
-
- nTranspWords=0;
- int i;
- for(i=0;i<_n1.size();i++)
- {
- if( (_n1[i]<mindestAnzahl && _n2[i]<mindestAnzahl && minIndex[i]<=1) ||i==specialFixedWord )
- {
-
- if(!( fixedWord[i]==1 || fixedWord[i]== -1))
- cerr << "mkcls:KategProblemWBC::init::ERROR: " << i << " " << fixedWord[i] << endl;
- fixedWord[i]=1;
- }
- else if(fixedWord[i]<0)
- nTranspWords++;
- }
- if( absteigend==0 )
- absteigend= &(getSortedList(0));
-
-
-
-
-
- if(verboseMode && nTranspWords!=_n1.size()-1 )
- cout << "Es sind: " <<nTranspWords<<" transportierbar.\n";
-}
-
-void KategProblemWBC::set_h_of_words(double s)
-
-{
- with_h_of_words=1;
- h_of_words = -s;
-}
-
-double KategProblemWBC::get_h_of_words()
-
-{
- if( with_h_of_words )
- return -h_of_words;
- else
- {
- h_of_words=0;
- for(int i=0;i<nWords;i++)
- h_of_words+=0.5*(kat_h(_n2[i])+kat_h(_n1[i]));
- with_h_of_words=1;
- return -h_of_words;
- }
-}
-
-
-void KategProblemWBC::setAfterWords(int w,int anzahl)
-
-{
- OneFreq o;
- o.w=-1;
- o.n=0;
- afterFilled[w]=0;
- after[w].init(anzahl,o,1);
-}
-void KategProblemWBC::setBeforeWords(int w,int anzahl)
-
-{
- OneFreq o;
- o.w=-1;
- o.n=0;
- beforeFilled[w]=0;
- before[w].init(anzahl,o,1);
-}
-
-
-void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
-
-{
- OneFreq o;
- o.n=anzahl;
-
- o.w=w2;
- after[w1][afterFilled[w1]++]=o;
- _n1[w1]+=anzahl;
- o.w=w1;
- before[w2][beforeFilled[w2]++]=o;
- _n2[w2]+=anzahl;
-}
-
-void KategProblemWBC::addFreq(int w1,int w2,FreqType anzahl)
-
-{
- OneFreq o;
- o.n=anzahl;
- int pos=-1,i;
- for(i=0;i<afterFilled[w1];i++)
- if(after[w1][i].w==w2)
- pos=i;
-
- if(pos==-1)
- {
- o.w=w2;
- after[w1][afterFilled[w1]++]=o;
- }
- else
- after[w1][pos].n+=anzahl;
- _n1[w1]+=anzahl;
-
- pos=-1;
- for(i=0;i<beforeFilled[w2];i++)
- if(before[w2][i].w==w1)
- pos=i;
- if(pos==-1)
- {
- o.w=w1;
- before[w2][beforeFilled[w2]++]=o;
- }
- else
- before[w2][pos].n+=anzahl;
- _n2[w2]+=anzahl;
-}
-
-
-short KategProblemWBC::testFull(int doIt)
-
-{
- int enaNom=0;
- int afterFilledSum=0,beforeFilledSum=0;
- int ret=1,i;
- for(i=0;i<nWords;i++)
- {
- if( n1(i)==1 && n2(i)==1 )
- enaNom++;
- afterFilledSum+=afterFilled[i];
- beforeFilledSum+=beforeFilled[i];
- if(afterFilled[i]!=after[i].size())
- {
- ret=0;
- if( doIt )
- after[i].resize(afterFilled[i]);
- }
- if(beforeFilled[i]!=before[i].size())
- {
- ret=0;
- if( doIt )
- before[i].resize(beforeFilled[i]);
- }
-
- }
- if( ret==0 && !doIt )
- {
- cerr << "Error: Unfilled word bigram statistics.\n";
- exit(1);
- }
- else
- filled=1;
- if( verboseMode>1 )
- {
- cout << "MEAN(|L(w)|+|R(w)|)=" << (beforeFilledSum/(float)nWords)
- +(afterFilledSum/(float)nWords) << endl;
- cout << "Hapaslegomena: " << enaNom << endl;
- }
- int symmetrisch=1;
- for(i=0;i<nWords;i++)
- {
- int j;
- massert(before[i].size()==beforeFilled[i]);
- massert( after[i].size()== afterFilled[i]);
- FreqType sum=0;
- for(j=0;j<after[i].size();j++)
- sum+=after[i][j].n;
- massert( sum==_n1[i] );
- sum=0;
- for(j=0;j<before[i].size();j++)
- sum+=before[i][j].n;
- massert(sum==_n2[i]);
- if(_n1[i]!=_n2[i])
- {
- symmetrisch=0;
- if( verboseMode>1 )
- cout << "Asymmetrie: " << i << " " << _n1[i] << " " << _n2[i] << endl;
- }
-
- }
- if(verboseMode && symmetrisch==0)
- cout << "Warning: word bigram statistic is not symmetric "
- "(this is possibly an error)\n";
- return ret;
-}
-
-Array<Word> &KategProblemWBC::getSortedList(int steigend)
-
-{
- int siz=_n2.size(),i;
- massert(filled);
- Array<Word> &sortedList =*new Array<Word>(siz);
- Array<OneFreq> list(siz);
- int pos=0;
- for(i=0;i<siz;i++)
- {
- if( fixedWord[i]<0 )
- {
- list[pos].w=i;
- list[pos].n=_n1[i];
- pos++;
- }
- }
- int anzFree=pos;
- for(i=0;i<siz;i++)
- {
- if( fixedWord[i]>=0 )
- {
- list[pos].w=i;
- list[pos].n=_n1[i];
- pos++;
- }
- }
- massert(pos==siz);
- if(steigend )
- qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareSteigend);
- else
- qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareFallend);
- massert( anzFree<=list.size() );
-
- for(i=0;i<siz;i++)
- {
- sortedList[i]=list[i].w;
- massert(steigend || i==0 || i>=anzFree || list[i-1].n>=list[i].n );
- massert((!steigend) || i==0 || i>=anzFree || list[i-1].n<=list[i].n );
- }
- return sortedList;
-}
-
-FreqType KategProblemWBC::numberOfWords()
-
-{
- FreqType n1=0,n2=0;
- for(int i=0;i<_n1.size();i++)
- {
- n1+=_n1[i];
- n2+=_n2[i];
- }
- #ifndef FREQTYPE_DOUBLE
- massert(n1==n2);
- #endif
- return n1;
-}
-
-void KategProblemWBC::setDollar(int n)
-
-{
- if( fixedWord[n]<0 )
- nTranspWords--;
- fixedWord[n]=0;
-}
-
-void KategProblemWBC::initializeIndex(const leda_array<string>&words,char firstChar,int unten,int oben,bool noHapas)
-{
- int n=0;
- int i;
- massert(-1<unten);massert(unten<oben);
- if( verboseMode )
- cout << "InitializeIndex: " << firstChar << " u:" << unten << " o:" << oben << " " << noHapas << endl;
- over_array(words,i)
- {
- if( words[i][0]==firstChar && (noHapas || ((short)(n1(i)+0.0001))>=mindestAnzahl || ((short)(n2(i)+0.0001))>=mindestAnzahl) )
- {
- minIndex[i]=unten;
- maxIndex[i]=oben;
- n++;
- }
- }
- if( verboseMode )
- cout << "InitializeIndex gefunden fuer " << n << " Woerter.\n";
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/KategProblemWBC.h b/scripts/training/MGIZA/src/mkcls/KategProblemWBC.h
deleted file mode 100644
index 8a399e5..0000000
--- a/scripts/training/MGIZA/src/mkcls/KategProblemWBC.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef KATEGPROBLEMWBC_H
-#define KATEGPROBLEMWBC_H
-
-struct OneFreq
-{
- int w;
- FreqType n;
-};
-
-typedef Array<OneFreq> ManyFreq;
-
-class KategProblemWBC
-
-{
-
- friend class KategProblem;
-
- private:
- Array<FreqType> _n1;
-
- Array<FreqType> _n2;
-
-
- double h_of_words;
-
-
- short with_h_of_words;
-
- Array<int> afterFilled;
- Array<int> beforeFilled;
-
- Array<int> &getSortedList(int steigend);
-
-
- protected:
- KategProblemWBC(int n,int minw);
-
-
- ~KategProblemWBC();
-
-
- short filled;
-
- Array<int> fixedWord;
- Array<int> *absteigend;
-
- void init(int specialFixedWord=-1);
-
-
- public:
- int nWords;
- int nTranspWords;
- short mindestAnzahl;
- Array<ManyFreq> after;
- Array<ManyFreq> before;
- Array<int> minIndex;
- Array<int> maxIndex;
-
-
-
- void setAfterWords(int w,int anzahl);
-
-
- void setBeforeWords(int w,int anzahl);
-
-
- void setFreq(int w1,int w2, FreqType anzahl);
-
-
- void addFreq(int w1,int w2,FreqType anzahl);
-
-
- void setDollar(int n);
-
-
- int fixed(int w)
- {
- return fixedWord[w];
- }
-
- FreqType n1(int w) { return _n1[w];};
-
-
- FreqType n2(int w) { return _n2[w];};
-
-
- FreqType numberOfWords();
-
-
- short testFull(int doIt=0);
-
-
- double get_h_of_words();
-
-
- void set_h_of_words(double s);
-
-
- void initializeIndex(const leda_array<string>&words,char firstChar,int min,int max,bool noHapas);
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/MSBOptimization.cpp b/scripts/training/MGIZA/src/mkcls/MSBOptimization.cpp
deleted file mode 100644
index 9478826..0000000
--- a/scripts/training/MGIZA/src/mkcls/MSBOptimization.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "MSBOptimization.h"
-#include <stdlib.h>
-#include "ProblemTest.h"
-
-#ifdef __GNUC__
-template class Array<double>;
-template class Array<ProbAndOpt>;
-#endif
-
-struct doubleInt { double a; int i; };
-static int doubleintcompare(const void *p,const void *j)
-{
- if(((struct doubleInt *)p)->a < ((doubleInt *)j)->a)
- return -1;
- else if(((struct doubleInt *)p)->a == ((doubleInt *)j)->a)
- return 0;
- else
- return 1;
-}
-
-
-MSBOptimization::MSBOptimization(Problem &p,int verf,int anz,Array<double> &pos,Array<double> &por)
-: PopOptimization(p,verf,anz),
-percentOfSteps(pos),percentOfRun(por),nachMinimierung(0)
-{
-}
-
-
-void MSBOptimization::zInitialize()
-{
- PopOptimization::zInitialize();
-
- int iterationsschritte;
- double mean;
- StatVar end,laufzeit,start;
- zufallSeed();
-
-
-
-
- solveProblem(ProblemTestVerboseMode,*originalProblem,2,-1,verfahren,mean,
- end,laufzeit,start,0,&iterationsschritte);
- expectedSteps=(int)(iterationsschritte);
-
- if(verboseMode)
- cout << "MSB:mean number of steps for one run: " << expectedSteps << endl;
-}
-
-
-double MSBOptimization::minimize(int)
-{
- if( initialisiert==0 )
- zInitialize();
-
- int i;
- int anz=size();
- int numproblems=anz;
-
- if( verboseMode )
- {
- double usedSteps=0;
- for(i=0;i<percentOfSteps.size();i++)
- {
- usedSteps+=expectedSteps*(percentOfSteps[i]-
- (i==0?0:percentOfSteps[i-1]))*numproblems;
- numproblems=(int)(ceil(anz*(1.0-percentOfRun[i])));
- if( numproblems<1 )numproblems=1;
- }
- usedSteps+=expectedSteps*
- (1.0-percentOfSteps[percentOfSteps.size()-1])*numproblems;
- cout << "MSB: speed factor: "
- << (double)usedSteps/(expectedSteps*size()) << endl;
- numproblems=anz=size();
- }
-
- for(i=0;i<percentOfSteps.size();i++)
- {
-
- int steps=(int)(expectedSteps*(percentOfSteps[i]-
- (i==0?0:percentOfSteps[i-1])));
-
-
- for(int a=0;a<numproblems;a++)
- {
-
- double v;
- v= optimization(a)->minimize(steps);
- if(verboseMode)cout << "MSB:" << i << " " << a << ":" << v << endl;
- }
-
- sort();
-
- if(verboseMode)
- cout << "MSB: best:" << problem(0)->value()
- << " worst:" << problem(numproblems-1)->value() << endl;
-
-
- numproblems=(int)(anz*(1.0-percentOfRun[i]));
- if( numproblems<1 )
- numproblems=1;
- if(verboseMode)
- cout << "MSB: now i have : " << numproblems << " Problem's." << endl;
- if(numproblems==1)
- break;
- }
- assert( numproblems>0 );
-
-
- for(int a=0;a<numproblems;a++)
- optimization(a)->minimize(-1);
- sort();
-
- double ergebnisWert = problem(0)->value();
- cout << "MSB: value:" << ergebnisWert << " (nicevalue:"
- << problem(0)->nicevalue() << ")\n";
- nachMinimierung=1;
- return ergebnisWert;
-}
-
-
-
-void MSBOptimization::optimizeValues(Problem &p,int verfahren)
-{
- int i;
- struct doubleInt ri[20];
- double mean;
- StatVar end,laufzeit,start;
- solveProblem(ProblemTestVerboseMode,p,5,-1,verfahren,mean,end,laufzeit,start);
- double fivePercentSteps=(int)(laufzeit.getMean()/20.0);
- double qualitaet[20][20];
- for(i=0;i<20;i++)
- {
- Optimization *o=(Optimization *)genIterOptimizer(verfahren,p,-1);
- for(int a=0;a<20;a++)
- {
- qualitaet[i][a]=o->minimize((int)fivePercentSteps);
- cout << qualitaet[i][a] << " ";
- }
- ri[i].a=o->minimize(-1);
- ri[i].i=i;
- cout << ri[i].a << endl;
- delete o;
- }
- qsort(ri,20,sizeof(struct doubleInt),doubleintcompare);
-
- cout << "#Beschneidungsmatrix, welche die drei besten Laeufe erhaelt: ";
- for(i=0;i<20;i++)
- {
- int a;
- struct doubleInt v[20];
- for(a=0;a<20;a++)
- { v[a].i=a;v[a].a=qualitaet[a][i];}
- qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
- int nr=0;
- for(a=0;a<20;a++)
- if( v[a].i==ri[0].i || v[a].i==ri[1].i || v[a].i==ri[2].i )
- nr=a;
- float percent=(1.0-nr/20.0)*100.0;
- if(nr==2)
- percent=100.0;
- cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
- }
- cout << "#Beschneidungsmatrix, welche die zwei besten Laeufe erhaelt: ";
- for(i=0;i<20;i++)
- {
- int a;
- struct doubleInt v[20];
- for(a=0;a<20;a++)
- { v[a].i=a;v[a].a=qualitaet[a][i];}
- qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
- int nr=0;
- for(a=0;a<20;a++)
- if( v[a].i==ri[0].i || v[a].i==ri[1].i )
- nr=a;
- float percent=(1.0-nr/20.0)*100.0;
- if(nr==1)
- percent=100.0;
- cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
- }
- cout << "#Beschneidungsmatrix, welche den besten Lauf erhaelt: ";
- for(i=0;i<20;i++)
- {int a;
- struct doubleInt v[20];
- for(a=0;a<20;a++)
- { v[a].i=a;v[a].a=qualitaet[a][i];}
- qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
- int nr=0;
- for(a=0;a<20;a++)
- if( v[a].i==ri[0].i )
- nr=a;
- float percent=(1.0-nr/20.0)*100.0;
- if(nr==0)
- percent=100.0;
- cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
- }
-}
-
-
-Problem& MSBOptimization::bestProblem()
-{
- assert(nachMinimierung==1);
- return *(problem(0));
-}
diff --git a/scripts/training/MGIZA/src/mkcls/MSBOptimization.h b/scripts/training/MGIZA/src/mkcls/MSBOptimization.h
deleted file mode 100644
index ab30c98..0000000
--- a/scripts/training/MGIZA/src/mkcls/MSBOptimization.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef MSBOPTIMIZATION
-#define MSBOPTIMIZATION
-
-#include "PopOptimization.h"
-
-class MSBOptimization : public PopOptimization
- {
-
- protected:
-
- Array<double> percentOfSteps;
- Array<double> percentOfRun;
-
- int expectedSteps;
- short nachMinimierung;
-
- virtual void zInitialize();
-
-
- public:
- MSBOptimization(Problem &s,int verf,int anz,Array<double> &pos,
- Array<double> &por);
-
-
- virtual ~MSBOptimization(){}
-
- virtual double minimize(int steps=-1);
-
-
- static void optimizeValues(Problem &p,int verfahren);
-
-
- Problem& bestProblem();
-
-
-};
-#endif
-
-
-
-
-
-
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/MYOptimization.cpp b/scripts/training/MGIZA/src/mkcls/MYOptimization.cpp
deleted file mode 100644
index ced9d31..0000000
--- a/scripts/training/MGIZA/src/mkcls/MYOptimization.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "MYOptimization.h"
-
-MYOptimization::MYOptimization(Problem &p,int m)
-: IterOptimization(p,m),acceptFlagsNumber(0),acceptions(0),total(0)
-{
-}
-MYOptimization::MYOptimization(MYOptimization &o)
-: IterOptimization(o),acceptFlagsNumber(0),acceptions(0),total(0)
-{
-}
-short MYOptimization::accept(double delta)
- {
- int doIt;
- int verbesserung = delta<0;
- if( delta < 0 )
- doIt=1;
- else
- {
- if(total>=NUMBER_OF_ACCEPTIONS)
- {
- double prob = acceptions/(float)(NUMBER_OF_ACCEPTIONS);
- double zuf = zufall01();
-
- doIt=zuf<prob;
- }
- else
- doIt=0;
- }
- if( total>=NUMBER_OF_ACCEPTIONS )
- {
- if( acceptFlags[acceptFlagsNumber] )
- acceptions--;
- }
- acceptFlags[acceptFlagsNumber]=verbesserung;
- if( verbesserung )
- acceptions++;
- total++;
- acceptFlagsNumber++;
- if(acceptFlagsNumber>=NUMBER_OF_ACCEPTIONS)
- acceptFlagsNumber=0;
- return doIt;
- }
-
-short MYOptimization::end()
- {
- return endFlag>0 && total>NUMBER_OF_ACCEPTIONS && acceptions==0;
- }
-void MYOptimization::abkuehlen()
- {
- }
-
-
-
-void MYOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << acceptions;
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/MYOptimization.h b/scripts/training/MGIZA/src/mkcls/MYOptimization.h
deleted file mode 100644
index a6ca70c..0000000
--- a/scripts/training/MGIZA/src/mkcls/MYOptimization.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef MYOPTIMIZATION
-#define MYOPTIMIZATION
-#include "IterOptimization.h"
-
-#define NUMBER_OF_ACCEPTIONS 100
-
-class MYOptimization: public IterOptimization {
-
- protected:
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- public:
- MYOptimization(Problem &p,int maxIter=-1);
-
-
- MYOptimization(MYOptimization &o);
-
-
- int acceptFlags[NUMBER_OF_ACCEPTIONS],acceptFlagsNumber;
- int acceptions,total;
-
- void makeGraphOutput();
-
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/Makefile.am b/scripts/training/MGIZA/src/mkcls/Makefile.am
deleted file mode 100644
index b94e630..0000000
--- a/scripts/training/MGIZA/src/mkcls/Makefile.am
+++ /dev/null
@@ -1,53 +0,0 @@
-
-bin_PROGRAMS = \
- mkcls
-
-mkcls_SOURCES = \
- Array.h\
- FixedArray.h \
- FlexArray.h \
- GDAOptimization.cpp \
- GDAOptimization.h \
- general.cpp \
- general.h \
- HCOptimization.cpp \
- HCOptimization.h \
- IterOptimization.cpp \
- IterOptimization.h \
- KategProblem.cpp \
- KategProblem.h \
- KategProblemKBC.cpp \
- KategProblemKBC.h \
- KategProblemTest.cpp \
- KategProblemTest.h \
- KategProblemWBC.cpp \
- KategProblemWBC.h \
- Makefile.am \
- Makefile.am.bak \
- mkcls.cpp \
- my.h \
- myassert.h \
- myleda.h \
- MYOptimization.cpp \
- MYOptimization.h \
- mystl.h \
- Optimization.cpp \
- Optimization.h \
- Problem.cpp \
- Problem.h \
- ProblemTest.cpp \
- ProblemTest.h \
- RRTOptimization.cpp \
- RRTOptimization.h \
- SAOptimization.cpp \
- SAOptimization.h \
- StatVar.cpp \
- StatVar.h \
- TAOptimization.cpp \
- TAOptimization.h
-
-mkcls_CXXFLAGS = \
- -DNDEBUG
-
-## File created by the gnome-build tools
-
diff --git a/scripts/training/MGIZA/src/mkcls/Makefile.in b/scripts/training/MGIZA/src/mkcls/Makefile.in
deleted file mode 100644
index a08b2f2..0000000
--- a/scripts/training/MGIZA/src/mkcls/Makefile.in
+++ /dev/null
@@ -1,729 +0,0 @@
-# Makefile.in generated by automake 1.10.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-bin_PROGRAMS = mkcls$(EXEEXT)
-subdir = src/mkcls
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-am__installdirs = "$(DESTDIR)$(bindir)"
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
-PROGRAMS = $(bin_PROGRAMS)
-am_mkcls_OBJECTS = mkcls-GDAOptimization.$(OBJEXT) \
- mkcls-general.$(OBJEXT) mkcls-HCOptimization.$(OBJEXT) \
- mkcls-IterOptimization.$(OBJEXT) mkcls-KategProblem.$(OBJEXT) \
- mkcls-KategProblemKBC.$(OBJEXT) \
- mkcls-KategProblemTest.$(OBJEXT) \
- mkcls-KategProblemWBC.$(OBJEXT) mkcls-mkcls.$(OBJEXT) \
- mkcls-MYOptimization.$(OBJEXT) mkcls-Optimization.$(OBJEXT) \
- mkcls-Problem.$(OBJEXT) mkcls-ProblemTest.$(OBJEXT) \
- mkcls-RRTOptimization.$(OBJEXT) mkcls-SAOptimization.$(OBJEXT) \
- mkcls-StatVar.$(OBJEXT) mkcls-TAOptimization.$(OBJEXT)
-mkcls_OBJECTS = $(am_mkcls_OBJECTS)
-mkcls_LDADD = $(LDADD)
-mkcls_LINK = $(CXXLD) $(mkcls_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp = $(SHELL) $(top_srcdir)/depcomp
-am__depfiles_maybe = depfiles
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
- -o $@
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(mkcls_SOURCES)
-DIST_SOURCES = $(mkcls_SOURCES)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MKDIR_P = @MKDIR_P@
-OBJEXT = @OBJEXT@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-VERSION = @VERSION@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build_alias = @build_alias@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host_alias = @host_alias@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-mkcls_SOURCES = \
- Array.h\
- FixedArray.h \
- FlexArray.h \
- GDAOptimization.cpp \
- GDAOptimization.h \
- general.cpp \
- general.h \
- HCOptimization.cpp \
- HCOptimization.h \
- IterOptimization.cpp \
- IterOptimization.h \
- KategProblem.cpp \
- KategProblem.h \
- KategProblemKBC.cpp \
- KategProblemKBC.h \
- KategProblemTest.cpp \
- KategProblemTest.h \
- KategProblemWBC.cpp \
- KategProblemWBC.h \
- Makefile.am \
- Makefile.am.bak \
- mkcls.cpp \
- my.h \
- myassert.h \
- myleda.h \
- MYOptimization.cpp \
- MYOptimization.h \
- mystl.h \
- Optimization.cpp \
- Optimization.h \
- Problem.cpp \
- Problem.h \
- ProblemTest.cpp \
- ProblemTest.h \
- RRTOptimization.cpp \
- RRTOptimization.h \
- SAOptimization.cpp \
- SAOptimization.h \
- StatVar.cpp \
- StatVar.h \
- TAOptimization.cpp \
- TAOptimization.h
-
-mkcls_CXXFLAGS = \
- -DNDEBUG
-
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .cpp .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/mkcls/Makefile'; \
- cd $(top_srcdir) && \
- $(AUTOMAKE) --gnu src/mkcls/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-install-binPROGRAMS: $(bin_PROGRAMS)
- @$(NORMAL_INSTALL)
- test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
- if test -f $$p \
- ; then \
- f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
- $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
- else :; fi; \
- done
-
-uninstall-binPROGRAMS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_PROGRAMS)'; for p in $$list; do \
- f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
- echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
- rm -f "$(DESTDIR)$(bindir)/$$f"; \
- done
-
-clean-binPROGRAMS:
- -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
-mkcls$(EXEEXT): $(mkcls_OBJECTS) $(mkcls_DEPENDENCIES)
- @rm -f mkcls$(EXEEXT)
- $(mkcls_LINK) $(mkcls_OBJECTS) $(mkcls_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-GDAOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-HCOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-IterOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-KategProblem.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-KategProblemKBC.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-KategProblemTest.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-KategProblemWBC.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-MYOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-Optimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-Problem.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-ProblemTest.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-RRTOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-SAOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-StatVar.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-TAOptimization.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-general.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mkcls-mkcls.Po@am__quote@
-
-.cpp.o:
-@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
-
-.cpp.obj:
-@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-mkcls-GDAOptimization.o: GDAOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-GDAOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-GDAOptimization.Tpo -c -o mkcls-GDAOptimization.o `test -f 'GDAOptimization.cpp' || echo '$(srcdir)/'`GDAOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-GDAOptimization.Tpo $(DEPDIR)/mkcls-GDAOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='GDAOptimization.cpp' object='mkcls-GDAOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-GDAOptimization.o `test -f 'GDAOptimization.cpp' || echo '$(srcdir)/'`GDAOptimization.cpp
-
-mkcls-GDAOptimization.obj: GDAOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-GDAOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-GDAOptimization.Tpo -c -o mkcls-GDAOptimization.obj `if test -f 'GDAOptimization.cpp'; then $(CYGPATH_W) 'GDAOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/GDAOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-GDAOptimization.Tpo $(DEPDIR)/mkcls-GDAOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='GDAOptimization.cpp' object='mkcls-GDAOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-GDAOptimization.obj `if test -f 'GDAOptimization.cpp'; then $(CYGPATH_W) 'GDAOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/GDAOptimization.cpp'; fi`
-
-mkcls-general.o: general.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-general.o -MD -MP -MF $(DEPDIR)/mkcls-general.Tpo -c -o mkcls-general.o `test -f 'general.cpp' || echo '$(srcdir)/'`general.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-general.Tpo $(DEPDIR)/mkcls-general.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='general.cpp' object='mkcls-general.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-general.o `test -f 'general.cpp' || echo '$(srcdir)/'`general.cpp
-
-mkcls-general.obj: general.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-general.obj -MD -MP -MF $(DEPDIR)/mkcls-general.Tpo -c -o mkcls-general.obj `if test -f 'general.cpp'; then $(CYGPATH_W) 'general.cpp'; else $(CYGPATH_W) '$(srcdir)/general.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-general.Tpo $(DEPDIR)/mkcls-general.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='general.cpp' object='mkcls-general.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-general.obj `if test -f 'general.cpp'; then $(CYGPATH_W) 'general.cpp'; else $(CYGPATH_W) '$(srcdir)/general.cpp'; fi`
-
-mkcls-HCOptimization.o: HCOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-HCOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-HCOptimization.Tpo -c -o mkcls-HCOptimization.o `test -f 'HCOptimization.cpp' || echo '$(srcdir)/'`HCOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-HCOptimization.Tpo $(DEPDIR)/mkcls-HCOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='HCOptimization.cpp' object='mkcls-HCOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-HCOptimization.o `test -f 'HCOptimization.cpp' || echo '$(srcdir)/'`HCOptimization.cpp
-
-mkcls-HCOptimization.obj: HCOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-HCOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-HCOptimization.Tpo -c -o mkcls-HCOptimization.obj `if test -f 'HCOptimization.cpp'; then $(CYGPATH_W) 'HCOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/HCOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-HCOptimization.Tpo $(DEPDIR)/mkcls-HCOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='HCOptimization.cpp' object='mkcls-HCOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-HCOptimization.obj `if test -f 'HCOptimization.cpp'; then $(CYGPATH_W) 'HCOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/HCOptimization.cpp'; fi`
-
-mkcls-IterOptimization.o: IterOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-IterOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-IterOptimization.Tpo -c -o mkcls-IterOptimization.o `test -f 'IterOptimization.cpp' || echo '$(srcdir)/'`IterOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-IterOptimization.Tpo $(DEPDIR)/mkcls-IterOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='IterOptimization.cpp' object='mkcls-IterOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-IterOptimization.o `test -f 'IterOptimization.cpp' || echo '$(srcdir)/'`IterOptimization.cpp
-
-mkcls-IterOptimization.obj: IterOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-IterOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-IterOptimization.Tpo -c -o mkcls-IterOptimization.obj `if test -f 'IterOptimization.cpp'; then $(CYGPATH_W) 'IterOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/IterOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-IterOptimization.Tpo $(DEPDIR)/mkcls-IterOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='IterOptimization.cpp' object='mkcls-IterOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-IterOptimization.obj `if test -f 'IterOptimization.cpp'; then $(CYGPATH_W) 'IterOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/IterOptimization.cpp'; fi`
-
-mkcls-KategProblem.o: KategProblem.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblem.o -MD -MP -MF $(DEPDIR)/mkcls-KategProblem.Tpo -c -o mkcls-KategProblem.o `test -f 'KategProblem.cpp' || echo '$(srcdir)/'`KategProblem.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblem.Tpo $(DEPDIR)/mkcls-KategProblem.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblem.cpp' object='mkcls-KategProblem.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblem.o `test -f 'KategProblem.cpp' || echo '$(srcdir)/'`KategProblem.cpp
-
-mkcls-KategProblem.obj: KategProblem.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblem.obj -MD -MP -MF $(DEPDIR)/mkcls-KategProblem.Tpo -c -o mkcls-KategProblem.obj `if test -f 'KategProblem.cpp'; then $(CYGPATH_W) 'KategProblem.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblem.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblem.Tpo $(DEPDIR)/mkcls-KategProblem.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblem.cpp' object='mkcls-KategProblem.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblem.obj `if test -f 'KategProblem.cpp'; then $(CYGPATH_W) 'KategProblem.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblem.cpp'; fi`
-
-mkcls-KategProblemKBC.o: KategProblemKBC.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblemKBC.o -MD -MP -MF $(DEPDIR)/mkcls-KategProblemKBC.Tpo -c -o mkcls-KategProblemKBC.o `test -f 'KategProblemKBC.cpp' || echo '$(srcdir)/'`KategProblemKBC.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblemKBC.Tpo $(DEPDIR)/mkcls-KategProblemKBC.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblemKBC.cpp' object='mkcls-KategProblemKBC.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblemKBC.o `test -f 'KategProblemKBC.cpp' || echo '$(srcdir)/'`KategProblemKBC.cpp
-
-mkcls-KategProblemKBC.obj: KategProblemKBC.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblemKBC.obj -MD -MP -MF $(DEPDIR)/mkcls-KategProblemKBC.Tpo -c -o mkcls-KategProblemKBC.obj `if test -f 'KategProblemKBC.cpp'; then $(CYGPATH_W) 'KategProblemKBC.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblemKBC.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblemKBC.Tpo $(DEPDIR)/mkcls-KategProblemKBC.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblemKBC.cpp' object='mkcls-KategProblemKBC.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblemKBC.obj `if test -f 'KategProblemKBC.cpp'; then $(CYGPATH_W) 'KategProblemKBC.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblemKBC.cpp'; fi`
-
-mkcls-KategProblemTest.o: KategProblemTest.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblemTest.o -MD -MP -MF $(DEPDIR)/mkcls-KategProblemTest.Tpo -c -o mkcls-KategProblemTest.o `test -f 'KategProblemTest.cpp' || echo '$(srcdir)/'`KategProblemTest.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblemTest.Tpo $(DEPDIR)/mkcls-KategProblemTest.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblemTest.cpp' object='mkcls-KategProblemTest.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblemTest.o `test -f 'KategProblemTest.cpp' || echo '$(srcdir)/'`KategProblemTest.cpp
-
-mkcls-KategProblemTest.obj: KategProblemTest.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblemTest.obj -MD -MP -MF $(DEPDIR)/mkcls-KategProblemTest.Tpo -c -o mkcls-KategProblemTest.obj `if test -f 'KategProblemTest.cpp'; then $(CYGPATH_W) 'KategProblemTest.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblemTest.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblemTest.Tpo $(DEPDIR)/mkcls-KategProblemTest.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblemTest.cpp' object='mkcls-KategProblemTest.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblemTest.obj `if test -f 'KategProblemTest.cpp'; then $(CYGPATH_W) 'KategProblemTest.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblemTest.cpp'; fi`
-
-mkcls-KategProblemWBC.o: KategProblemWBC.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblemWBC.o -MD -MP -MF $(DEPDIR)/mkcls-KategProblemWBC.Tpo -c -o mkcls-KategProblemWBC.o `test -f 'KategProblemWBC.cpp' || echo '$(srcdir)/'`KategProblemWBC.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblemWBC.Tpo $(DEPDIR)/mkcls-KategProblemWBC.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblemWBC.cpp' object='mkcls-KategProblemWBC.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblemWBC.o `test -f 'KategProblemWBC.cpp' || echo '$(srcdir)/'`KategProblemWBC.cpp
-
-mkcls-KategProblemWBC.obj: KategProblemWBC.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-KategProblemWBC.obj -MD -MP -MF $(DEPDIR)/mkcls-KategProblemWBC.Tpo -c -o mkcls-KategProblemWBC.obj `if test -f 'KategProblemWBC.cpp'; then $(CYGPATH_W) 'KategProblemWBC.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblemWBC.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-KategProblemWBC.Tpo $(DEPDIR)/mkcls-KategProblemWBC.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='KategProblemWBC.cpp' object='mkcls-KategProblemWBC.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-KategProblemWBC.obj `if test -f 'KategProblemWBC.cpp'; then $(CYGPATH_W) 'KategProblemWBC.cpp'; else $(CYGPATH_W) '$(srcdir)/KategProblemWBC.cpp'; fi`
-
-mkcls-mkcls.o: mkcls.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-mkcls.o -MD -MP -MF $(DEPDIR)/mkcls-mkcls.Tpo -c -o mkcls-mkcls.o `test -f 'mkcls.cpp' || echo '$(srcdir)/'`mkcls.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-mkcls.Tpo $(DEPDIR)/mkcls-mkcls.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='mkcls.cpp' object='mkcls-mkcls.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-mkcls.o `test -f 'mkcls.cpp' || echo '$(srcdir)/'`mkcls.cpp
-
-mkcls-mkcls.obj: mkcls.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-mkcls.obj -MD -MP -MF $(DEPDIR)/mkcls-mkcls.Tpo -c -o mkcls-mkcls.obj `if test -f 'mkcls.cpp'; then $(CYGPATH_W) 'mkcls.cpp'; else $(CYGPATH_W) '$(srcdir)/mkcls.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-mkcls.Tpo $(DEPDIR)/mkcls-mkcls.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='mkcls.cpp' object='mkcls-mkcls.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-mkcls.obj `if test -f 'mkcls.cpp'; then $(CYGPATH_W) 'mkcls.cpp'; else $(CYGPATH_W) '$(srcdir)/mkcls.cpp'; fi`
-
-mkcls-MYOptimization.o: MYOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-MYOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-MYOptimization.Tpo -c -o mkcls-MYOptimization.o `test -f 'MYOptimization.cpp' || echo '$(srcdir)/'`MYOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-MYOptimization.Tpo $(DEPDIR)/mkcls-MYOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='MYOptimization.cpp' object='mkcls-MYOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-MYOptimization.o `test -f 'MYOptimization.cpp' || echo '$(srcdir)/'`MYOptimization.cpp
-
-mkcls-MYOptimization.obj: MYOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-MYOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-MYOptimization.Tpo -c -o mkcls-MYOptimization.obj `if test -f 'MYOptimization.cpp'; then $(CYGPATH_W) 'MYOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/MYOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-MYOptimization.Tpo $(DEPDIR)/mkcls-MYOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='MYOptimization.cpp' object='mkcls-MYOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-MYOptimization.obj `if test -f 'MYOptimization.cpp'; then $(CYGPATH_W) 'MYOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/MYOptimization.cpp'; fi`
-
-mkcls-Optimization.o: Optimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-Optimization.o -MD -MP -MF $(DEPDIR)/mkcls-Optimization.Tpo -c -o mkcls-Optimization.o `test -f 'Optimization.cpp' || echo '$(srcdir)/'`Optimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-Optimization.Tpo $(DEPDIR)/mkcls-Optimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Optimization.cpp' object='mkcls-Optimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-Optimization.o `test -f 'Optimization.cpp' || echo '$(srcdir)/'`Optimization.cpp
-
-mkcls-Optimization.obj: Optimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-Optimization.obj -MD -MP -MF $(DEPDIR)/mkcls-Optimization.Tpo -c -o mkcls-Optimization.obj `if test -f 'Optimization.cpp'; then $(CYGPATH_W) 'Optimization.cpp'; else $(CYGPATH_W) '$(srcdir)/Optimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-Optimization.Tpo $(DEPDIR)/mkcls-Optimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Optimization.cpp' object='mkcls-Optimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-Optimization.obj `if test -f 'Optimization.cpp'; then $(CYGPATH_W) 'Optimization.cpp'; else $(CYGPATH_W) '$(srcdir)/Optimization.cpp'; fi`
-
-mkcls-Problem.o: Problem.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-Problem.o -MD -MP -MF $(DEPDIR)/mkcls-Problem.Tpo -c -o mkcls-Problem.o `test -f 'Problem.cpp' || echo '$(srcdir)/'`Problem.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-Problem.Tpo $(DEPDIR)/mkcls-Problem.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Problem.cpp' object='mkcls-Problem.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-Problem.o `test -f 'Problem.cpp' || echo '$(srcdir)/'`Problem.cpp
-
-mkcls-Problem.obj: Problem.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-Problem.obj -MD -MP -MF $(DEPDIR)/mkcls-Problem.Tpo -c -o mkcls-Problem.obj `if test -f 'Problem.cpp'; then $(CYGPATH_W) 'Problem.cpp'; else $(CYGPATH_W) '$(srcdir)/Problem.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-Problem.Tpo $(DEPDIR)/mkcls-Problem.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='Problem.cpp' object='mkcls-Problem.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-Problem.obj `if test -f 'Problem.cpp'; then $(CYGPATH_W) 'Problem.cpp'; else $(CYGPATH_W) '$(srcdir)/Problem.cpp'; fi`
-
-mkcls-ProblemTest.o: ProblemTest.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-ProblemTest.o -MD -MP -MF $(DEPDIR)/mkcls-ProblemTest.Tpo -c -o mkcls-ProblemTest.o `test -f 'ProblemTest.cpp' || echo '$(srcdir)/'`ProblemTest.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-ProblemTest.Tpo $(DEPDIR)/mkcls-ProblemTest.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ProblemTest.cpp' object='mkcls-ProblemTest.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-ProblemTest.o `test -f 'ProblemTest.cpp' || echo '$(srcdir)/'`ProblemTest.cpp
-
-mkcls-ProblemTest.obj: ProblemTest.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-ProblemTest.obj -MD -MP -MF $(DEPDIR)/mkcls-ProblemTest.Tpo -c -o mkcls-ProblemTest.obj `if test -f 'ProblemTest.cpp'; then $(CYGPATH_W) 'ProblemTest.cpp'; else $(CYGPATH_W) '$(srcdir)/ProblemTest.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-ProblemTest.Tpo $(DEPDIR)/mkcls-ProblemTest.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ProblemTest.cpp' object='mkcls-ProblemTest.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-ProblemTest.obj `if test -f 'ProblemTest.cpp'; then $(CYGPATH_W) 'ProblemTest.cpp'; else $(CYGPATH_W) '$(srcdir)/ProblemTest.cpp'; fi`
-
-mkcls-RRTOptimization.o: RRTOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-RRTOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-RRTOptimization.Tpo -c -o mkcls-RRTOptimization.o `test -f 'RRTOptimization.cpp' || echo '$(srcdir)/'`RRTOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-RRTOptimization.Tpo $(DEPDIR)/mkcls-RRTOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='RRTOptimization.cpp' object='mkcls-RRTOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-RRTOptimization.o `test -f 'RRTOptimization.cpp' || echo '$(srcdir)/'`RRTOptimization.cpp
-
-mkcls-RRTOptimization.obj: RRTOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-RRTOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-RRTOptimization.Tpo -c -o mkcls-RRTOptimization.obj `if test -f 'RRTOptimization.cpp'; then $(CYGPATH_W) 'RRTOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/RRTOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-RRTOptimization.Tpo $(DEPDIR)/mkcls-RRTOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='RRTOptimization.cpp' object='mkcls-RRTOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-RRTOptimization.obj `if test -f 'RRTOptimization.cpp'; then $(CYGPATH_W) 'RRTOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/RRTOptimization.cpp'; fi`
-
-mkcls-SAOptimization.o: SAOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-SAOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-SAOptimization.Tpo -c -o mkcls-SAOptimization.o `test -f 'SAOptimization.cpp' || echo '$(srcdir)/'`SAOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-SAOptimization.Tpo $(DEPDIR)/mkcls-SAOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='SAOptimization.cpp' object='mkcls-SAOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-SAOptimization.o `test -f 'SAOptimization.cpp' || echo '$(srcdir)/'`SAOptimization.cpp
-
-mkcls-SAOptimization.obj: SAOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-SAOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-SAOptimization.Tpo -c -o mkcls-SAOptimization.obj `if test -f 'SAOptimization.cpp'; then $(CYGPATH_W) 'SAOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/SAOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-SAOptimization.Tpo $(DEPDIR)/mkcls-SAOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='SAOptimization.cpp' object='mkcls-SAOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-SAOptimization.obj `if test -f 'SAOptimization.cpp'; then $(CYGPATH_W) 'SAOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/SAOptimization.cpp'; fi`
-
-mkcls-StatVar.o: StatVar.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-StatVar.o -MD -MP -MF $(DEPDIR)/mkcls-StatVar.Tpo -c -o mkcls-StatVar.o `test -f 'StatVar.cpp' || echo '$(srcdir)/'`StatVar.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-StatVar.Tpo $(DEPDIR)/mkcls-StatVar.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='StatVar.cpp' object='mkcls-StatVar.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-StatVar.o `test -f 'StatVar.cpp' || echo '$(srcdir)/'`StatVar.cpp
-
-mkcls-StatVar.obj: StatVar.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-StatVar.obj -MD -MP -MF $(DEPDIR)/mkcls-StatVar.Tpo -c -o mkcls-StatVar.obj `if test -f 'StatVar.cpp'; then $(CYGPATH_W) 'StatVar.cpp'; else $(CYGPATH_W) '$(srcdir)/StatVar.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-StatVar.Tpo $(DEPDIR)/mkcls-StatVar.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='StatVar.cpp' object='mkcls-StatVar.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-StatVar.obj `if test -f 'StatVar.cpp'; then $(CYGPATH_W) 'StatVar.cpp'; else $(CYGPATH_W) '$(srcdir)/StatVar.cpp'; fi`
-
-mkcls-TAOptimization.o: TAOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-TAOptimization.o -MD -MP -MF $(DEPDIR)/mkcls-TAOptimization.Tpo -c -o mkcls-TAOptimization.o `test -f 'TAOptimization.cpp' || echo '$(srcdir)/'`TAOptimization.cpp
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-TAOptimization.Tpo $(DEPDIR)/mkcls-TAOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='TAOptimization.cpp' object='mkcls-TAOptimization.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-TAOptimization.o `test -f 'TAOptimization.cpp' || echo '$(srcdir)/'`TAOptimization.cpp
-
-mkcls-TAOptimization.obj: TAOptimization.cpp
-@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -MT mkcls-TAOptimization.obj -MD -MP -MF $(DEPDIR)/mkcls-TAOptimization.Tpo -c -o mkcls-TAOptimization.obj `if test -f 'TAOptimization.cpp'; then $(CYGPATH_W) 'TAOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/TAOptimization.cpp'; fi`
-@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/mkcls-TAOptimization.Tpo $(DEPDIR)/mkcls-TAOptimization.Po
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='TAOptimization.cpp' object='mkcls-TAOptimization.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(mkcls_CXXFLAGS) $(CXXFLAGS) -c -o mkcls-TAOptimization.obj `if test -f 'TAOptimization.cpp'; then $(CYGPATH_W) 'TAOptimization.cpp'; else $(CYGPATH_W) '$(srcdir)/TAOptimization.cpp'; fi`
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$tags $$unique; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- tags=; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$tags$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$tags $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && cd $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) $$here
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
- fi; \
- cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
- else \
- test -f $(distdir)/$$file \
- || cp -p $$d/$$file $(distdir)/$$file \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(PROGRAMS)
-installdirs:
- for dir in "$(DESTDIR)$(bindir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
-
-distclean: distclean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-exec-am: install-binPROGRAMS
-
-install-html: install-html-am
-
-install-info: install-info-am
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-ps: install-ps-am
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -rf ./$(DEPDIR)
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-binPROGRAMS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
- clean-generic ctags distclean distclean-compile \
- distclean-generic distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-binPROGRAMS \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
- uninstall-am uninstall-binPROGRAMS
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/scripts/training/MGIZA/src/mkcls/Optimization.cpp b/scripts/training/MGIZA/src/mkcls/Optimization.cpp
deleted file mode 100644
index 03e06df..0000000
--- a/scripts/training/MGIZA/src/mkcls/Optimization.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "Optimization.h"
-
-Optimization::~Optimization() {}
-
diff --git a/scripts/training/MGIZA/src/mkcls/Optimization.h b/scripts/training/MGIZA/src/mkcls/Optimization.h
deleted file mode 100644
index 4c43427..0000000
--- a/scripts/training/MGIZA/src/mkcls/Optimization.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef OPTIMIZATION
-#define OPTIMIZATION
-
-#include "Problem.h"
-#include "general.h"
-
-class Optimization
-{
-
-public:
-
- virtual double minimize(int steps)=0;
- virtual ~Optimization();
-
-};
-#endif
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/PopOptimization.cpp b/scripts/training/MGIZA/src/mkcls/PopOptimization.cpp
deleted file mode 100644
index 2e65a2c..0000000
--- a/scripts/training/MGIZA/src/mkcls/PopOptimization.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "PopOptimization.h"
-#include "ProblemTest.h"
-
-
-int compareProbAndOpt(const void *p,const void *j)
-{
- double a=((ProbAndOpt *)p)->prob->value();
- double b=((ProbAndOpt *)j)->prob->value();
- if(a==b)
- return 0;
- if(a<b)
- return -1;
- else
- return +1;
-}
-bool operator<(const ProbAndOpt&a, const ProbAndOpt&b)
- {
- return a.prob->value()<b.prob->value();
- }
-bool operator==(const ProbAndOpt&a, const ProbAndOpt&b)
- {
- return a.prob->value()==b.prob->value();
- }
-
-ostream& operator<<(ostream&o , const ProbAndOpt&){return o;}
-istream& operator>>(istream&i , ProbAndOpt&){return i;}
-
-
-
-PopOptimization::PopOptimization(Problem &p,int verf,int anz)
-: probandopt(anz),initialisiert(0),verfahren(verf)
-{
- originalProblem = &p;
-}
-
-
-int PopOptimization::size()
-{
- return probandopt.size();
-}
-
-Problem *PopOptimization::problem(int i)
-{
- assert(initialisiert);
- return probandopt[i].prob;
-}
-
-Optimization *PopOptimization::optimization(int i)
-{
- assert(initialisiert);
- return probandopt[i].opt;
-}
-
-void PopOptimization::zInitialize()
-{
- int i;
- zufallSeed();
- for(i=0;i<size();i++)
- {
- probandopt[i].prob=originalProblem->makeEqualProblem();
- probandopt[i].prob->initialize();
- }
-
- zufallSeed();
- for(i=0;i<size();i++)
- probandopt[i].opt=(Optimization *)genIterOptimizer(verfahren,
- *(probandopt[i].prob),-1);
-
- initialisiert=1;
-}
-
-
-void PopOptimization::sort()
-{
- assert(initialisiert);
-
- probandopt.sort(size());
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/PopOptimization.h b/scripts/training/MGIZA/src/mkcls/PopOptimization.h
deleted file mode 100644
index be8d4a2..0000000
--- a/scripts/training/MGIZA/src/mkcls/PopOptimization.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef POPULATIONOPTIMIZATION
-#define POPULATIONOPTIMIZATION
-
-#include "Optimization.h"
-
-typedef struct
-{
- Optimization *opt;
- Problem *prob;
-} ProbAndOpt;
-
-bool operator<(const ProbAndOpt&a, const ProbAndOpt&b);
-bool operator==(const ProbAndOpt&a, const ProbAndOpt&b);
-ostream& operator<<(ostream& , const ProbAndOpt&b);
-istream& operator>>(istream& , ProbAndOpt&b);
-
-inline DEFINE_STANDARD_COMPARE(ProbAndOpt);
-
-int compareProbAndOpt(const void *p,const void *j);
-
-class PopOptimization : public Optimization {
-
-
- private:
- Array<ProbAndOpt> probandopt;
-
- protected:
- int initialisiert;
- Problem *originalProblem;
-
-
- int verfahren;
-
-
- virtual void zInitialize();
-
-
- public:
- PopOptimization(Problem &s,int verf,int anz);
-
-
- virtual ~PopOptimization() {}
-
- int size();
-
-
- void sort();
-
-
- virtual Problem& bestProblem()=0;
-
-
- Problem *problem(int i);
-
-
- Optimization *optimization(int i);
-
-
-};
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/Problem.cpp b/scripts/training/MGIZA/src/mkcls/Problem.cpp
deleted file mode 100644
index 6e126c8..0000000
--- a/scripts/training/MGIZA/src/mkcls/Problem.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include "Problem.h"
-#include "Optimization.h"
-
-Problem::~Problem() {}
-
-Problem::Problem(int max,int anz,int _initialisierung,int _auswertung,
- int _nachbarschaft)
-: initialized(0),curCompVal(0),curCompChange(0),maxCompVal(max),maxComp(anz),curComp(0),
- initialisierung(_initialisierung),auswertung(_auswertung),nachbarschaft(_nachbarschaft),
- numberOfFullEvaluations(0),numberOfPartEvaluations(0),numberOfDoChange(0)
-{
- if( verboseMode>1 )
- cout << "Initialization of Problem: " << maxComp << " " << maxCompVal
- << endl;
-}
-
-void Problem::initialize(int i)
-{
- curComp=curCompVal=curCompChange=0;
- numberOfFullEvaluations=numberOfPartEvaluations=numberOfDoChange=0;
- initialized=1;
- if( i== -23 )
- _initialize(initialisierung);
- else
- _initialize(i);
- maxComp=maxDimension();
- maxCompVal=maxDimensionVal();
-}
-
-void Problem::doChange(ProblemChange &c)
-{
- assert (initialized);
- curCompChange=1;
- _doChange(c);
- numberOfDoChange++;
-}
-
-void Problem::incrementDirection()
-{
- if( maxCompVal==curCompVal )
- curCompVal=0;
- curCompChange=0;
- curComp=(curComp+1)%maxComp;
-}
-
-ProblemChange& Problem::change()
-{
- assert( initialized );
- assert( maxCompVal>=curCompVal);
-
- if( curCompChange||maxCompVal==curCompVal )
- incrementDirection();
-
- ProblemChange *p;
- int changeFound=_change(&p);
- curCompVal++;
- if( changeFound==0 )
- return change();
- else
- return *p;
-}
-double Problem::value()
-{
- numberOfFullEvaluations++;
- if( !initialized )
- initialize();
- return _value();
-}
-
-double Problem::valueChange(ProblemChange &x)
-{
- numberOfPartEvaluations++;
- assert( initialized );
- double currentValue=value();
- _doChange(x);numberOfDoChange++;
- double newValue=value();
- _undoChange(x);numberOfDoChange++;
- assert( currentValue==value() );
- return newValue-currentValue;
-}
-
-void Problem::dumpOn(ostream &strm)
-{
- assert( initialized );
- strm << "Problem(" << initialisierung << "," << auswertung << ","
- << nachbarschaft << ")\n";
- strm << " #value: " << numberOfFullEvaluations << endl;
- strm << "#valueChange: " << numberOfPartEvaluations << endl;
- strm << " #doChange: " << numberOfDoChange << endl;
-}
-
-StatVar& Problem::deviationStatVar(Optimization &s,int anz)
-{
- assert( initialized );
- StatVar &v=*new StatVar;
- double cur=value();
- int howOften=0;
- while( v.getNum()<anz )
- {
- if( howOften++>50000 )
- break;
- double neuer=s.minimize(1);
- if( neuer>cur )
- v.addValue(neuer-cur);
- cur=neuer;
- vassert(NULLFLOAT(cur-value()));
- }
- return v;
-}
-
-void Problem::dumpInfos(ostream &strm)
-{
- strm << "Problem: " << endl;
- assert( initialized );
-}
-
-
-double Problem::nicevalue(double)
-{
- return value();
-}
-
-int Problem::maxDimensionVal(void) {return -1;}
-int Problem::maxDimension(void) {return -1;}
-
-ProblemChange::~ProblemChange()
- {
- }
-
-ProblemChange::ProblemChange()
- {
- }
-
-void Problem::setValuesFrom(Problem *p)
-{
- numberOfFullEvaluations=p->numberOfFullEvaluations;
- numberOfPartEvaluations=p->numberOfPartEvaluations;
- numberOfDoChange=p->numberOfDoChange;
- initialized=p->initialized;
-}
diff --git a/scripts/training/MGIZA/src/mkcls/Problem.h b/scripts/training/MGIZA/src/mkcls/Problem.h
deleted file mode 100644
index 337390e..0000000
--- a/scripts/training/MGIZA/src/mkcls/Problem.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef PROBLEMCHANGE
-#define PROBLEMCHANGE
-#include <iostream>
-#include "general.h"
-#include "StatVar.h"
-
-class Optimization;
-
-class ProblemChange
-
-{
- public:
- virtual ~ProblemChange();
- ProblemChange();
-};
-
-class Problem {
-
- private:
- short initialized;
- int curCompVal;
- short curCompChange;
- int maxCompVal;
- int maxComp;
-
-
- protected:
- int curComp;
-
- void setValuesFrom(Problem *p);
-
- virtual int maxDimensionVal(void) ;
-
-
- virtual int maxDimension(void) ;
-
-
- inline int curDimension(void) { assert(maxComp!=-1);return curComp;}
-
-
- inline int curDimensionVal(void) { assert(maxComp!=-1);return curCompVal;}
-
-
-
- virtual void _doChange(ProblemChange &c)=0;
-
-
- virtual int _change(ProblemChange **p)=0;
-
-
- virtual void _undoChange(ProblemChange &c)=0;
-
-
- virtual void _initialize(int initialisierung)=0;
-
-
- virtual double _value()=0;
-
-
- public:
- Problem(int maxCompVal=-1,int maxComp=-1,int _initialisierung=0,
- int _auswertung=0,int _nachbarschaft=0);
-
- virtual ~Problem();
-
-
- void doChange(ProblemChange &c);
-
-
- ProblemChange& change();
-
-
- virtual double value();
-
-
- virtual double valueChange(ProblemChange &c);
-
-
- virtual void initialize(int a= -23);
-
-
- inline virtual short endCriterion();
-
-
- virtual int maxNonBetterIterations()=0;
-
-
- virtual int expectedNumberOfIterations()=0;
-
-
- virtual void dumpOn(ostream &strm);
-
-
- virtual void dumpInfos(ostream &strm);
-
-
- virtual Problem *makeEqualProblem()=0;
-
-
- virtual double nicevalue(double vorher=1e100);
-
-
- virtual StatVar& deviationStatVar(Optimization &s,int anz);
-
-
- virtual void incrementDirection();
-
-
-
-
-
- int initialisierung;
- int auswertung;
- int nachbarschaft;
-
- int numberOfFullEvaluations;
- int numberOfPartEvaluations;
- int numberOfDoChange;
-
-
-
-};
-
-inline short Problem::endCriterion()
-{
- return 0;
-};
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/mkcls/ProblemTest.cpp b/scripts/training/MGIZA/src/mkcls/ProblemTest.cpp
deleted file mode 100644
index 40fea7a..0000000
--- a/scripts/training/MGIZA/src/mkcls/ProblemTest.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "ProblemTest.h"
-#include "HCOptimization.h"
-#include "RRTOptimization.h"
-#include "SAOptimization.h"
-#include "TAOptimization.h"
-#include "GDAOptimization.h"
-#include "MYOptimization.h"
-#include <stdio.h>
-#include "general.h"
-#include <stdlib.h>
-
-short ProblemTestVerboseMode=1;
-ofstream *PrintBestTo=0,*PrintBestTo2=0;
-
-
-int compareProblem(const void *p,const void *j)
-{
- double a=(*(Problem **)p)->value();
- double b=(*(Problem **)j)->value();
- if(a==b)
- return 0;
- if(a<b)
- return -1;
- else
- return +1;
-}
-
-
-IterOptimization *genIterOptimizer(int verfahren,Problem &problem,int maxIter)
-{
- IterOptimization *opt;
- switch(verfahren)
- {
- case HC_OPT:
- opt = new HCOptimization(problem,maxIter);
- break;
- case GDA_OPT:
- opt = new GDAOptimization(problem,maxIter);
- break;
- case SA_OPT:
- opt = new SAOptimization(problem,maxIter);
- break;
- case TA_OPT:
- opt = new TAOptimization(problem,maxIter);
- break;
- case RRT_OPT:
- opt = new RRTOptimization(problem,maxIter);
- break;
- case MY_OPT:
- opt = new MYOptimization(problem,maxIter);
- break;
- default:
- return 0;
- }
- problem.initialize();
- return opt;
-}
-
-
-double solveProblem(int verbose,Problem &problem,int versuche,
- int optimierungsschritte,int verfahren,double &mean,
- StatVar &endNice,StatVar &auswertungen,StatVar &startNice,
- double maxClock,int *iterationsschritte)
-{
- double smallestV=1e100;
- Problem *bestP=0;
- StatVar start,end;
- StatVar dauer;
- StatVar iterschritte;
-
- for(int i=0;i<versuche;i++)
- {
- if(verbose>2)
- {
- cout << " " << i << " of " << versuche << ".\n";
- cout.flush();
- }
- double vorher=clockSec();
-
- IterOptimization *opt=genIterOptimizer(verfahren,problem,
- optimierungsschritte);
- problem.numberOfPartEvaluations=0;
-
- startNice.addValue(problem.nicevalue());
- start.addValue(problem.value());
-
- double v=opt->minimize(optimierungsschritte);
-
- if( problem.numberOfPartEvaluations==0)
- auswertungen.addValue(opt->getCurStep());
- else
- auswertungen.addValue(problem.numberOfPartEvaluations);
- iterschritte.addValue(opt->getCurStep());
-
- endNice.addValue(problem.nicevalue());
- end.addValue(problem.value());
- dauer.addValue(clockSec()-vorher);
- if( verbose>2 )
- {
- cout << i << ". " << v << ": ";
- problem.dumpOn(cout);
- }
- delete opt;
- if( v<smallestV && verbose>1 )
- {
- bestP=problem.makeEqualProblem();
- smallestV=v;
- }
- if( verbose>2 )
- cout << " time: " << clockSec() << " best:" << endNice.quantil(0)
- << " this:" << problem.nicevalue() << endl;
- if( maxClock && clockSec()>maxClock )
- {
- if(verbose)
- cout << "Stop because of time limit ( " << (clockSec()-maxClock)
- << " Sekunden)\n";
- break;
- }
- }
-
- if(verbose)
- {
- cout << "\n***** " << start.getNum() << " runs. (algorithm:";
- switch(verfahren)
- {
- case HC_OPT:
- cout << "HC";
- break;
- case RRT_OPT:
- cout << "RRT";
- break;
- case GDA_OPT:
- cout << "GDA";
- break;
- case TA_OPT:
- cout << "TA";
- break;
- case SA_OPT:
- cout << "SA";
- break;
- case MY_OPT:
- cout << "MY";
- break;
- default:
- cout << "!unknown!";
- }
- cout << ")*****\n";
- problem.dumpInfos(cout);
- cout << endl;
- cout << "start-costs: "; start.dumpOn(cout); cout << endl;
- cout << " end-costs: "; end.dumpOn(cout); cout << endl;
- cout << " start-pp: "; startNice.dumpOn(cout); cout << endl;
- cout << " end-pp: "; endNice.dumpOn(cout); cout << endl;
- cout << " iterations: "; auswertungen.dumpOn(cout); cout << endl;
- cout << " time: "; dauer.dumpOn(cout);
- cout << endl;
- }
- if( bestP )
- {
- if(PrintBestTo)
- bestP->dumpOn(*PrintBestTo);
- else
- bestP->dumpOn(cout);
- delete bestP;
- }
- mean = end.getMean();
- if( iterationsschritte )
- *iterationsschritte=(int)(iterschritte.getMean());
- return end.getSmallest();
-}
-
-
-
-void multiSolveProblem(Problem &problem,int versuche,int maxSeconds)
-{
- int i;
- int maxLaeufe;
- double rDummy;
- StatVar end[MAX_OPT_NR],auswertungen[MAX_OPT_NR],start[MAX_OPT_NR];
- double maxClock=clockSec()+maxSeconds;
- if(maxSeconds<=0)maxClock=0;
- solveProblem(ProblemTestVerboseMode,problem,versuche,-1,HC_OPT,rDummy,
- end[HC_OPT],auswertungen[HC_OPT],start[HC_OPT],maxClock);
- maxLaeufe=(int)(auswertungen[HC_OPT].getMean()*5);
- for(i=0;i<MAX_OPT_NR;i++)
- {
- if( i==HC_OPT )
- continue;
- double maxClock=clockSec()+maxSeconds;
- if(maxSeconds<=0)maxClock=0;
- solveProblem(ProblemTestVerboseMode,problem,versuche, -1,i,rDummy,end[i],
- auswertungen[i],start[i],maxClock);
- }
- end[HC_OPT].title = " HC";
- end[SA_OPT].title = " SA";
- end[GDA_OPT].title = " GDA";
- end[RRT_OPT].title = " RRT";
- end[TA_OPT].title = " TA";
- end[MY_OPT].title = " MY";
-
- for(i=0;i<MAX_OPT_NR;i++)
- end[i].quantil(0.5);
-
- cout << "mean: \n";
- compareStatVarQuantil=-1;
- qsort(end,MAX_OPT_NR,sizeof(StatVar),compareStatVar);
- for(i=0;i<MAX_OPT_NR;i++)
- cout << end[i].title << " " << end[i].getMean() << endl;
-
- cout << "\nbest: \n";
- compareStatVarQuantil=0;
- qsort(end,MAX_OPT_NR,sizeof(StatVar),compareStatVar);
- for(i=0;i<MAX_OPT_NR;i++)
- cout << end[i].title << " " << end[i].quantil(compareStatVarQuantil)
- << endl;
-
- cout << "\n20%-quantil: \n";
- compareStatVarQuantil=0.2;
- qsort(end,MAX_OPT_NR,sizeof(StatVar),compareStatVar);
- for(i=0;i<MAX_OPT_NR;i++)
- cout << end[i].title << " " << end[i].quantil(compareStatVarQuantil)
- << endl;
-}
-
-
-void metaOptimization(Problem &tp,int nLaeufe,int nPars)
-{
- double bestPar,bestValue;
-
- bestPar=IterOptimizationOptimizeParameter(tp,TAOptimization::defaultAnnRate,0.0,1.0,nLaeufe,nPars,TA_OPT,bestValue);
- cout << "#TA(defaultAnnRate) BEST-PAR: " << bestPar << " BEST-VAL: " << bestValue << endl;
- bestPar=IterOptimizationOptimizeParameter(tp,RRTOptimization::defaultAnnRate,0.0,1.0,nLaeufe,nPars,RRT_OPT,bestValue);
- cout << "#RRT(defaultAnnRate) BEST-PAR: " << bestPar << " BEST-VAL: " << bestValue << endl;
- bestPar=IterOptimizationOptimizeParameter(tp,GDAOptimization::defaultAlpha,0.0,0.01,nLaeufe,nPars,GDA_OPT,bestValue);
- cout << "#GDA(defaultAlpha) BEST-PAR: " << bestPar << " BEST-VAL: " << bestValue << endl;
- bestPar=IterOptimizationOptimizeParameter(tp,SAOptimization::defaultEndAnnRate,0.0,1.0,nLaeufe,nPars,SA_OPT,bestValue);
- cout << "#SA(defaultEndAnnRate) BEST-PAR: " << bestPar << " BEST-VAL: " << bestValue << endl;
-}
diff --git a/scripts/training/MGIZA/src/mkcls/ProblemTest.h b/scripts/training/MGIZA/src/mkcls/ProblemTest.h
deleted file mode 100644
index 4bd8bda..0000000
--- a/scripts/training/MGIZA/src/mkcls/ProblemTest.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef PROBLEMTEST_H
-#define PROBLEMTEST_H
-
-#include "Problem.h"
-#include "StatVar.h"
-#include <fstream>
-
-
-enum {TA_OPT, HC_OPT, SA_OPT,RRT_OPT,GDA_OPT,MAX_OPT_NR,MY_OPT };
-
-class IterOptimization;
-
-extern short ProblemTestVerboseMode;
-
-extern ofstream *PrintBestTo,*PrintBestTo2;
-
-double solveProblem(int verbose,Problem &problem,int versuche,
-int optimierungsschritte,int verfahren,double &mean,StatVar &endValue,
-StatVar &laufzeit,StatVar &initValue,double maxSec= 0,int *iterationsschritte=0);
-
-
-
-int compareProblem(const void *p,const void *j);
-
-
-
-void multiSolveProblem(Problem &problem,int versuche,int maxSeconds);
-
-
-
-IterOptimization *genIterOptimizer(int verfahren,Problem &problem,int maxIter);
-
-
-void metaOptimization(Problem &p,int nLaeufe,int nPars);
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/RRTOptimization.cpp b/scripts/training/MGIZA/src/mkcls/RRTOptimization.cpp
deleted file mode 100644
index 55e2122..0000000
--- a/scripts/training/MGIZA/src/mkcls/RRTOptimization.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "RRTOptimization.h"
-#include "ProblemTest.h"
-
-double RRTOptimization::defaultAnnRate=0.6;
-double RRTOptimization::defaultMultiple=2.0;
-
-
-
-RRTOptimization::RRTOptimization(Problem &p,double t,double dt,int m)
-: IterOptimization(p,m),deviation(t),deltaDeviation(dt)
-{
- assert(deviation>=0);
-}
-
-
-
-RRTOptimization:: RRTOptimization(Problem &p,int m)
-: IterOptimization(p,m),deviation(-1),deltaDeviation(0)
-{
-}
-
-
-
-RRTOptimization::RRTOptimization(RRTOptimization &o)
-: IterOptimization(o)
-{
- deviation = o.deviation;
- deltaDeviation= o.deltaDeviation;
- record = o.record;
-}
-
-
-
-void RRTOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if( deviation<0 )
- {
-
-
- int n;
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if( maxStep>0 )
- n=(int)(maxStep*4.0/5.0);
- else
- maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
-
- deviation = v.quantil(defaultAnnRate);
- deltaDeviation = deviation/(float)n;
-
- if( verboseMode>0 )
- cout << "#Algorithm: Record-To-Record-Travel: (anfAnnRate="
- << defaultAnnRate << ",T=" << deviation << ",deltaT="
- << deltaDeviation << ")\n";
-
- curStep=0;
- endFlag=0;
- delete &v;
- problem.initialize();
- IterOptimization::zInitialize();
- }
- record=problem.value();
- assert(deviation>=0);
-}
-
-short RRTOptimization::end()
-{
- return ( endFlag>0 && deviation==0.0 );
-}
-void RRTOptimization::abkuehlen()
-{
- if( deviation>=0 )
- {
- deviation -= deltaDeviation;
- if(deviation<0)
- deviation=0;
- }
-}
-short RRTOptimization::accept(double delta)
-{
- if( deviation<0 )
- return 1;
- else
- {
- if( delta + curValue - deviation < record )
- {
- if( delta + curValue < record )
- record = delta+curValue;
- return 1;
- }
- else
- return 0;
- }
-}
-
-void RRTOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << deviation;
-}
-
-
-
-
-double RRTOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
-{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- if( print )
- cout << "#RRT-optimizeValues: Quantil: " << numParameter << endl;
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- if(i==0) defaultAnnRate=0.2;
- else defaultAnnRate = 0.3+(float)(0.6*i)/numParameter;
- solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnnRate;
- }
- if( print )
- {
- cout << defaultAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAnnRate=0.8;
- return bestPar;
- }
- break;
- case 10:
- {
- double i;
- double bestPar=-1,best=1e100;
- StatVar end,laufzeit,init;
-
- if( print )
- cout << "#RRT-optimizeValues: defaultMultiple" << 8 << endl;
- for(i=0.5;i<=10;i+=1.5)
- {
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
- }
- break;
- default:
- cerr << "Error: wrong parameter-type in RRTOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- return 1e100;
-}
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/RRTOptimization.h b/scripts/training/MGIZA/src/mkcls/RRTOptimization.h
deleted file mode 100644
index 42ec6e2..0000000
--- a/scripts/training/MGIZA/src/mkcls/RRTOptimization.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef RRTOPTIMIZATION
-#define RRTOPTIMIZATION
-#include "IterOptimization.h"
-
-class RRTOptimization : public IterOptimization {
-
-
- private:
- double deviation;
- double deltaDeviation;
- double record;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- RRTOptimization(Problem &p,double temperatur,
- double deltaTemperatur,int maxIter=-1);
-
-
- RRTOptimization(Problem &p,int maxIter=-1);
-
-
- RRTOptimization(RRTOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
- static double defaultAnnRate;
-
- static double defaultMultiple;
-
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/SAOptimization.cpp b/scripts/training/MGIZA/src/mkcls/SAOptimization.cpp
deleted file mode 100644
index 6ae589a..0000000
--- a/scripts/training/MGIZA/src/mkcls/SAOptimization.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include <stdlib.h>
-#include <iostream>
-
-#include "SAOptimization.h"
-
-#include "ProblemTest.h"
-
-#define ALPHA 0.95
-
-double SAOptimization::defaultAnfAnnRate=0.9;
-double SAOptimization::defaultEndAnnRate=1e-9;
-double SAOptimization::defaultMultiple=2.0;
-
-
-
-SAOptimization::SAOptimization(Problem &p,int m)
-: IterOptimization(p,m), temperatur(-1)
-{
-}
-
-
-
-
-SAOptimization::SAOptimization(Problem &p,double t,double a,int s,int m)
-: IterOptimization(p,m),temperatur(t), alpha(a),schrittzahl(s)
-{
- assert(alpha<1);
- assert(schrittzahl>0);
- assert(t>0);
-}
-
-
-SAOptimization::SAOptimization(SAOptimization &o)
-: IterOptimization(o)
-{
- temperatur = o.temperatur;
- endTemperatur = o.endTemperatur;
- alpha = o.alpha;
- schrittzahl = o.schrittzahl;
- stepsForAbkuehlung = o.stepsForAbkuehlung;
-}
-
-
-void SAOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if( temperatur<0)
- {
-
-
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if( maxStep>0 )
- stepsForAbkuehlung=(int)(maxStep*4.0/5.0);
- else
- maxStep=stepsForAbkuehlung=(int)(problem.expectedNumberOfIterations()*
- defaultMultiple);
-
- temperatur = v.getMean()/log(1/defaultAnfAnnRate);
- endTemperatur = v.getMean()/log(1/defaultEndAnnRate);
- schrittzahl = (int)(stepsForAbkuehlung/(log(endTemperatur/temperatur)/
- log(ALPHA)));
- if(schrittzahl==0)schrittzahl=1;
- alpha = ALPHA;
-
- if( verboseMode )
- cout << "#Algorithm: Simulated Annealing(anfAnnRate="
- << defaultAnfAnnRate <<",(endAnnRate=" << defaultEndAnnRate
- << ",T0=" << temperatur<< ",Te=" << endTemperatur<< ",schrittzahl="
- << schrittzahl<< ",stepsForAbkuehlung=" << stepsForAbkuehlung
- << ")\n";
- curStep=0;
- endFlag=0;
- delete &v;
- problem.initialize();
- IterOptimization::zInitialize();
- }
-}
-
-short SAOptimization::end()
-{
- if( temperatur>endTemperatur )
- bestStep = curStep;
- if( endFlag>0 && temperatur<endTemperatur)
- return 1;
- else
- return 0;
-}
-void SAOptimization::abkuehlen()
-{
- if(temperatur>=0)
- {
- if( curStep%schrittzahl == 0 )
- temperatur=temperatur * alpha;
- if( curStep> stepsForAbkuehlung)
- temperatur = 0;
- }
-}
-short SAOptimization::accept(double delta)
-{
- if( temperatur<0 )
- return 1;
- else
- {
- if( delta > 0 )
- {
- if( temperatur==0 )
- return 0;
- else
- {
- double z=zufall01();
- assert(z!=0.0);
- if(z==0.0)
- z+=1e-20;
- double e=exp(-delta/temperatur);
-
-
-
- return z+0.000000000001<=e;
- }
- }
- else
- return 1;
- }
-}
-
-void SAOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << temperatur;
-}
-
-
-
-
-double SAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,
- int typ,int optimierungsschritte,int print)
-{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#SA-optimizeValues: defaultAnfAnnRate" << endl;
- for(int i=0;i<numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultAnfAnnRate=0.1 + (1.0/numParameter)*i;
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnfAnnRate;
- }
- if( print )
- {
- cout << defaultAnfAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAnfAnnRate=0.9;
- return bestPar;
- }
- break;
- case 2:
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#Optimierung von SA: defaultEndAnnRate" << endl;
- for(int i=1;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultEndAnnRate=1/(pow(10.0,i));
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultEndAnnRate;
- }
- if( print )
- {
- cout << defaultEndAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultEndAnnRate=1/10000.0;
- return bestPar;
- }
- break;
- case 10:
- {
- double bestPar=-1,best=1e100;
-
- if( print )
- cout << "#SA-optimizeValues: defaultMultiple " << 8 << endl;
- for(int i=1;i<=6;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
- }
- break;
- default:
- cerr << "Error: wrong parameter-type in SAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- return 1e100;
-}
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/SAOptimization.h b/scripts/training/MGIZA/src/mkcls/SAOptimization.h
deleted file mode 100644
index 97c528b..0000000
--- a/scripts/training/MGIZA/src/mkcls/SAOptimization.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef SAOPTIMIZATION
-#define SAOPTIMIZATION
-#include "IterOptimization.h"
-
-class SAOptimization : public IterOptimization
- {
-
-
- private:
- double temperatur;
- double endTemperatur;
- double alpha;
- int schrittzahl;
- int stepsForAbkuehlung;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- SAOptimization(Problem &p,double temperatur,double alpha,
- int schrittzahl,int maxIter=-1);
-
-
- SAOptimization(Problem &p,int maxIter=-1);
-
-
- SAOptimization(SAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,
- int schritte= -1,int verbose=1);
-
-
- static double defaultAnfAnnRate;
-
- static double defaultEndAnnRate;
-
- static double defaultMultiple;
-
-
-};
-#endif
-
diff --git a/scripts/training/MGIZA/src/mkcls/StatVar.cpp b/scripts/training/MGIZA/src/mkcls/StatVar.cpp
deleted file mode 100644
index dbd76cd..0000000
--- a/scripts/training/MGIZA/src/mkcls/StatVar.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include "StatVar.h"
-#include <iostream>
-#include <stdlib.h>
-
-double compareStatVarQuantil=-1;
-
-StatV::~StatV() {}
-
-
-int doublecompare(const void *p,const void *j)
-{
- if( *(double *)p == *(double *)j)
- return 0;
- if( *(double *)p- *(double *)j<0 )
- return -1;
- else
- return 1;
-}
-
-int compareStatVar(const void *p,const void *j)
-{
- double a;
- double b;
- if(compareStatVarQuantil>=0)
- {
- a=((StatVar *)p)->quantil(compareStatVarQuantil);
- b=((StatVar *)j)->quantil(compareStatVarQuantil);
- }
- else
- {
- a=((StatVar *)p)->getMean();
- b=((StatVar *)j)->getMean();
- }
- if(a==b)
- return 0;
- if(a<b)
- return -1;
- else
- return +1;
-}
-
-
-double StatVar::getSigmaSmaller()
-{
- double ss=0;
- int ns=0;
- for(int i=0;i<n;i++)
- {
- if( values[i]<getMean() )
- {
- ss+=(values[i]-getMean())*(values[i]-getMean());
- ns++;
- }
- }
- if( ss/ns>0 )
- return sqrt(ss/ns);
- else
- return 0;
-}
-double StatVar::getSigmaBigger()
-{
- double ss=0;
- int ns=0;
- for(int i=0;i<n;i++)
- if( values[i]>getMean() )
- {
- ss+=(values[i]-getMean())*(values[i]-getMean());
- ns++;
- }
- if( ss/ns>0 )
- return sqrt(ss/ns);
- else
- return 0;
-}
-
-
-
-void StatV::dumpOn(ostream &strm)
-{
- strm << "MEAN: " << getMean() << " (" << smallest << "-" << biggest
- << ") SIGMA:" << getSigma()<< " ";
-}
-
-
-
-double StatVar::quantil(double percent)
-{
- int index=(int)(n*percent);
- if(index==n)
- index=n-1;
- assert(index>=0&&index<n);
- if(sortedFlag==0)
- {
- qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
- assert(n<=values.size());
- sortedFlag=1;
- }
- if(index<0)
- {
- cerr << "WARNING: StatVar.cc\n";
- return 0.0;
- }
- else
- return values[index];
-}
-
-
-void StatVar::printValues(ostream &strm)
-{
- qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
- assert(n<=values.size());
- for(int i=0;i<n;i++)
- strm << i/(double)n << " " << values[i] << endl;
- return;
-}
diff --git a/scripts/training/MGIZA/src/mkcls/StatVar.h b/scripts/training/MGIZA/src/mkcls/StatVar.h
deleted file mode 100644
index bdf1e19..0000000
--- a/scripts/training/MGIZA/src/mkcls/StatVar.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef STATVAR_H
-#define STATVAR_H
-
-#include <stdlib.h>
-#include <iostream>
-#include "Array.h"
-#include "mystl.h"
-#include "myleda.h"
-#include <cmath>
-
-
-extern double compareStatVarQuantil;
-int compareStatVar(const void *p,const void *j);
-
-class StatV
-
-{
- protected:
- int n;
- double sum;
- double squareSum;
- double smallest,biggest;
-
- public:
- const char *title;
- StatV() : n(0),sum(0),squareSum(0),smallest(1e100),biggest(-1e100),title("") {}
- virtual ~StatV();
-
-
- virtual void addValue(double a)
- {
- n++;
- sum+=a;
- squareSum+=a*a;
- if(smallest>a)
- smallest=a;
- if(biggest<a)
- biggest=a;
-
- }
-
-
- double getMean()
- { return sum/n; }
-
-
- double getSigma()
- {
- if(squareSum/n - getMean()*getMean()<=0)
- return 0.0;
- else
- return sqrt(squareSum/n - getMean()*getMean());
- }
-
-
-
- double getBiggest()
- { return biggest; }
-
-
- double getSmallest()
- { return smallest; }
-
-
- int getNum()
- { return n; }
-
-
- void dumpOn(ostream &strm);
-
-
-};
-
-class StatVar : public StatV
-{
- private:
- Array<double> values;
- short sortedFlag;
- public:
- StatVar()
- : values(10,0.0,1),sortedFlag(0) {}
- virtual ~StatVar(){}
- double quantil(double percent=0.5);
-
-
- inline double value(int i)
- {return values[i];}
-
-
- void printValues(ostream &strm);
-
-
- virtual void addValue(double a)
- {
- sortedFlag=0;
- values[n]=a;
- StatV::addValue(a);
- }
-
- double getSigmaSmaller();
-
-
- double getSigmaBigger();
-
-
-};
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/TAOptimization.cpp b/scripts/training/MGIZA/src/mkcls/TAOptimization.cpp
deleted file mode 100644
index 074ff62..0000000
--- a/scripts/training/MGIZA/src/mkcls/TAOptimization.cpp
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "TAOptimization.h"
-#include "ProblemTest.h"
-
-
-double TAOptimization::defaultAnnRate=0.4;
-double TAOptimization::defaultMultiple=2.0;
-
-
-TAOptimization::TAOptimization(Problem &p,double t,double d,int m)
-: IterOptimization(p,m) , temperatur(t) , deltaTemperatur(d)
-{
- assert(t>0 && d>0);
-}
-
-
-
-TAOptimization::TAOptimization(Problem&p,int m)
-: IterOptimization(p,m), temperatur(-1)
-{
-}
-
-
-
-TAOptimization::TAOptimization(TAOptimization &o)
-: IterOptimization(o)
-{
- temperatur= o.temperatur;
- deltaTemperatur= o.deltaTemperatur;
-}
-
-
-
-
-void TAOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if( temperatur<0)
- {
-
-
- int n;
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if(maxStep>0)
- n=(int)(maxStep*4.0/5.0);
- else
- maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
-
- temperatur = v.quantil(defaultAnnRate);
- deltaTemperatur = temperatur/n;
-
- if( verboseMode>0 )
- cout << "#TA: (anfAnnRate="
- << defaultAnnRate << ",T=" << temperatur << ",deltaT="
- << deltaTemperatur << ")\n";
- curStep=0;
- endFlag=0;
- delete &v;
- }
-}
-
-
-short TAOptimization::end()
-{
-
-
- if( temperatur>0 )
- {
- endFlag=0;
- bestStep=curStep;
- }
- return endFlag>0;
-}
-
-short TAOptimization::accept(double delta)
-{
- if( temperatur<0 )
- return 1;
- else
- if( delta < temperatur )
- return 1;
- else
- return 0;
-}
-
-void TAOptimization::abkuehlen()
-{
- if( temperatur>=0 )
- temperatur=(temperatur-deltaTemperatur>0)?(temperatur-deltaTemperatur):0;
-}
-
-void TAOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << temperatur;
-}
-
-
-
-
-double TAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
-{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- if(print)cout << "#TA-optimizeValues: " << numParameter << endl;
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultAnnRate = (float)(i)/numParameter;
- solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnnRate;
- }
- if( print)
- {
- cout << defaultAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester"
- " Sigma SigmaSmaller SigmaBigger\n";
- defaultAnnRate=0.5;
- return bestPar;
- }
- break;
- case 10:
- {
- double bestPar=-1,best=1e100;
- if( print )
- cout << "#TA-optimizeValues: defaultMultiple " << 10 << endl;
- for(int i=1;i<=6;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester Sigma "
- " SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
- }
- break;
- default:
- cerr << "Error: wrong parameter-type in TAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- return 1e100;
-}
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/TAOptimization.h b/scripts/training/MGIZA/src/mkcls/TAOptimization.h
deleted file mode 100644
index 3382306..0000000
--- a/scripts/training/MGIZA/src/mkcls/TAOptimization.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef TAOPTIMIZATION
-#define TAOPTIMIZATION
-
-#include "IterOptimization.h"
-
-class TAOptimization : public IterOptimization {
-
-
- private:
- double temperatur;
- double deltaTemperatur;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- TAOptimization(Problem &p,double temperatur,
- double deltaTemperatur,int maxIter=-1);
-
-
- TAOptimization(Problem &p,int maxIter=-1);
-
-
- TAOptimization(TAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
- static double defaultAnnRate;
-
- static double defaultMultiple;
-
-};
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/general.cpp b/scripts/training/MGIZA/src/mkcls/general.cpp
deleted file mode 100644
index cb3f27d..0000000
--- a/scripts/training/MGIZA/src/mkcls/general.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-
-
-extern "C" {
-
-#ifndef WIN32
-#include <sys/time.h>
-#include <sys/resource.h>
-#else
-#define srand48 srand
-#define drand48() (rand()/RAND_MAX)
-#endif
-
-}
-
-#include "general.h"
-
-extern "C" {
-#ifndef __linux__
-int getrusage(int who, struct rusage *rusage);
-#endif
-};
-int verboseMode=0;
-
-#ifdef aNeXT
-#define NO_TEMPLATES
-#endif
-
-
-void myerror(int line,const char *file,const char *expression)
-{
- cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
-}
-
-
-void imyerror(int line,const char *file,const char *expression)
-{
- cerr << "Error: '" << expression << "' ::: in Source " << file
- << ":" << line << endl;
- #ifndef DEBUG
-
- #endif
-}
-
-
-
-void zufallSeed(int z)
-{
-#ifdef NeXT
- srandom(z);
-#else
- srand48(z);
-#endif
-}
-
-
-
-double zufall01()
-{
-#ifdef NeXT
- return (double)(random()%65536)/65536.0;
-#else
- return drand48();
-#endif
-}
-
-
-
-double zufall(double min,double max)
-{
- double z=zufall01()*(max-min)+min;
- assert(z>=min&&z<max);
- return z;
-}
-
-
-
-int randomInt(int exclusive)
-{
- int i=(int)zufall(0,exclusive);
- assert(i>=0);
- assert(i<exclusive);
- return i;
-}
-
-double clockSec()
-{
-#ifdef WIN32
- return 0;
-#else
-#ifdef linux
- enum __rusage_who who=RUSAGE_SELF;
-#else
- int who=RUSAGE_SELF;
-#endif
- struct rusage rusage;
- getrusage(who, &rusage);
- return rusage.ru_utime.tv_sec+rusage.ru_utime.tv_usec/1000000.0;
-#endif
-}
diff --git a/scripts/training/MGIZA/src/mkcls/general.h b/scripts/training/MGIZA/src/mkcls/general.h
deleted file mode 100644
index 8db48aa..0000000
--- a/scripts/training/MGIZA/src/mkcls/general.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-
-#ifndef GENERAL_HEADER
-#define GENERAL_HEADER
-
-#include <iostream>
-#ifdef NeXT
-#include <minmax.h>
-#endif
-#include <string.h>
-
-
-
-#define NULLFLOAT(x) ( fabs(x)<=0.0000001 )
-#define EQUALFLOAT(x,y) ( fabs(x-y)<(fabs(x)+fabs(y))/10000000.0 )
-
-
-
-
-#define TEST_RANDOM_SEED 532567487
-
-double zufall01();
-
-
-double zufall(double min,double max);
-
-
-int randomInt(int exclusive);
-
-
-void zufallSeed(int z =TEST_RANDOM_SEED);
-
-
-
-
-#include "myassert.h"
-#include <cassert>
-#include "Array.h"
-
-
-
-
-
-
-double clockSec();
-
-extern int verboseMode;
-
-
-
-inline string operator&(const string&a,const string&b)
-{
- string c(a);
- c+=b;
- return c;
-}
-
-
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/mkcls/mkcls.cpp b/scripts/training/MGIZA/src/mkcls/mkcls.cpp
deleted file mode 100644
index a2bf695..0000000
--- a/scripts/training/MGIZA/src/mkcls/mkcls.cpp
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-#ifdef WIN32
-#define strcasecmp strcmpi
-#endif
-
-#include <stdio.h>
-#include <iostream>
-#include <stdlib.h>
-#include <ctype.h>
-#include "general.h"
-
-#include "KategProblem.h"
-#include "KategProblemTest.h"
-
-#include "ProblemTest.h"
-#include "TAOptimization.h"
-#include "GDAOptimization.h"
-#include "RRTOptimization.h"
-#include "SAOptimization.h"
-#include "HCOptimization.h"
-
-
-double SigmaVerfaelschung=5.0;
-int OneWithHapas=1;
-char *hapaxInitName=0;
-
-
-
-
-
-static int nLaeufe=1,nLaeufeReduce=3;
-
-
-static int optimizeParameterAnzahl=10;
-
-
-static int IterOptVerf=TA_OPT;
-
-
-static int MaxIterOptSteps= -1;
-
-
-static int MaxSecs=0;
-
-
-
-
-
-static int InitValue=INIT_RAN;
-
-
-static int Criterion=CRITERION_ML;
-
-
-static int Wwahl=W_DET_DECR;
-
-
-static int Kwahl=K_BEST;
-
-
-static int NumberCategories=100;
-
-
-static int MinWordFrequency=0;
-
-
-static int IterOptSet=0;
-
-
-static KategProblem *p = 0;
-
-
-char korpusName[1024]="train";
-int korpusIsText=1;
-
-
-char *FileForOther=0;
-
-void printUsage(int r)
-{
- cout <<
- "mkcls - a program for making word classes: Usage: \n"
- " mkcls [-nnum] [-ptrain] [-Vfile] opt\n"
-
-
-
-
-
-
- "-V output classes (Default: no file)\n"
-
-
- "-n number of optimization runs (Default: 1); larger number => better results\n"
-
- "-p filename of training corpus (Default: 'train')\n"
-
-
-
-
-
-
-
-
- "Example:\n"
- " mkcls -c80 -n10 -pin -Vout opt\n"
- " (generates 80 classes for the corpus 'in' and writes the classes in 'out')\n"
- "Literature: \n"
- " Franz Josef Och: »Maximum-Likelihood-Schätzung von Wortkategorien mit Verfahren\n"
- " der kombinatorischen Optimierung?Studienarbeit, Universität Erlangen-Nürnberg,\n"
- " Germany,1995. \n";
- exit(r);
-}
-
-
-
-
-
-
-
-void makeIterOpt()
-{
- double maxTime=clockSec()+MaxSecs;
- if(MaxSecs==0)maxTime=0;
- double mean;
- StatVar end,laufzeit,init;
- solveProblem(1+(PrintBestTo!=0),*p,nLaeufe,MaxIterOptSteps,IterOptVerf,
- mean,end,laufzeit,init,maxTime);
- if( verboseMode>1 )
- p->dumpOn(cout);
-}
-
-
-
-void makeIzrOpt()
-{
- double maxTime=clockSec()+MaxSecs;
- if(MaxSecs==0)maxTime=0;
- izrOptimization(*p,nLaeufeReduce,nLaeufeReduce,0,maxTime,IterOptVerf);
-}
-
-
-
-int makeMetaOpt(int argc,char **argv)
-{
- int ret=0;
-
- if(argc==4 || argc==3)
- {
- int typ=0;
- if( argc==4 )
- {
- sscanf(argv[3],"%d",&typ);
- assert(typ>0 && typ<=11 );
- }
- if( isdigit(argv[2][0]) )
- {
- int a;
- sscanf(argv[2],"%d",&a);
- switch(a)
- {
- case 1:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 2:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,2);
- break;
- case 3:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 4:
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 5:
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 6:
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 7:
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 8:
- GDAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- default:
- cerr << "Error: Wrong number of parameter (" << argv[2]
- << ").\n";
- printUsage(1);
- }
- }
- else
- {
- if(strcasecmp(argv[2],"gda")==0)
- {
- GDAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"ta")==0)
- {
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"rrt")==0)
- {
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"sa")==0)
- {
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
-
-
-
-
- else
- {
- cerr << "Error: unknown algorithm" << argv[2] << endl;
- printUsage(1);
- }
- }
- }
- else
- {
- cerr << "Error: wrong number of arguments: " << argc << endl;
- printUsage(1);
- }
- return ret;
-}
-
-
-
-
-
-
-
-
-
-
-void setVerfahren(char *p)
-{
- if(strcasecmp(p,"rrt")==0 )
- IterOptVerf=RRT_OPT;
- else if(strcasecmp(p,"ta")==0)
- IterOptVerf=TA_OPT;
- else if(strcasecmp(p,"gda")==0)
- IterOptVerf=GDA_OPT;
- else if(strcasecmp(p,"sa")==0)
- IterOptVerf=SA_OPT;
- else if(strcasecmp(p,"hc")==0)
- IterOptVerf=HC_OPT;
- else
- {
- cerr << "Error: Unknown iterativ-optimizing algorithm '" << p << "'.\n";
- printUsage(1);
- }
-}
-
-
-
-void setInitValue(char *iv,char *fileForOther)
-{
- if(strcasecmp(iv,"ran")==0 )
- InitValue=INIT_RAN;
- else if(strcasecmp(iv,"aio")==0)
- InitValue=INIT_AIO;
- else if(strcasecmp(iv,"gda")==0)
- InitValue=INIT_LWRW;
- else if(strcasecmp(iv,"freq")==0)
- InitValue=INIT_FREQ;
- else if(strcasecmp(iv,"other")==0)
- {
- InitValue=INIT_OTHER;
- FileForOther=strdup(fileForOther);
- }
- else
- {
- cerr << "Error: Unknown initialization '" << p << "'.\n";;
- printUsage(1);
- }
-}
-
-
-void setWwahl(const char *ww)
-{
- if(strcasecmp(ww,"ran")==0 )
- Wwahl=W_RAN;
- else if(strcasecmp(ww,"det")==0)
- Wwahl=W_DET_DECR;
- else if(strcasecmp(ww,"incr")==0)
- Wwahl=W_DET_INCR;
- else
- {
- cerr << "Error: Unknown word-selection '" << ww << "'.\n";;
- printUsage(1);
- }
-}
-
-
-void setKwahl(const char *kw)
-{
- if( strcasecmp(kw,"det")==0 )
- Kwahl=K_DET;
- else if(strcasecmp(kw,"ran")==0 )
- Kwahl=K_RAN;
- else if(strcasecmp(kw,"best")==0)
- Kwahl=K_BEST;
- else
- {
- cerr << "Error: Unknown category-selection '" << kw << "'.\n";
- printUsage(1);
- }
-}
-
-
-void setParameter(const char *nr1,const char *nr2)
-{
- int n1;
- float n2;
- sscanf(nr1,"%d",&n1);
- sscanf(nr2,"%f",&n2);
- IterOptSet=1;
- switch(n1)
- {
- case 1:
- SAOptimization::defaultAnfAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_0 (SA) set to "
- << SAOptimization::defaultAnfAnnRate << endl;
- iassert(0<=SAOptimization::defaultAnfAnnRate&&
- SAOptimization::defaultAnfAnnRate<=1);
- break;
- case 2:
- SAOptimization::defaultEndAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_e (SA) set to "
- << SAOptimization::defaultEndAnnRate << endl;
- iassert(0<=SAOptimization::defaultEndAnnRate
- &&SAOptimization::defaultEndAnnRate<=1);
- break;
- case 3:
- SAOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_e (SA) set to "
- << SAOptimization::defaultMultiple << endl;
- iassert( SAOptimization::defaultMultiple>0 );
- break;
- case 4:
- TAOptimization::defaultAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_{TA} set to "
- << TAOptimization::defaultAnnRate << endl;
- iassert(0<=TAOptimization::defaultAnnRate
- &&TAOptimization::defaultAnnRate<=1);
- break;
- case 5:
- TAOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_{TA} set to "
- << TAOptimization::defaultMultiple << endl;
- iassert( TAOptimization::defaultMultiple>0 );
- break;
- case 6:
- RRTOptimization::defaultAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_{RRT} set to "
- << RRTOptimization::defaultAnnRate << endl;
- iassert(0<=RRTOptimization::defaultAnnRate
- && RRTOptimization::defaultAnnRate<=1);
- break;
- case 7:
- RRTOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_{RRT} set to "
- << RRTOptimization::defaultMultiple << endl;
- iassert( RRTOptimization::defaultMultiple>0 );
- break;
- case 8:
- GDAOptimization::defaultAlpha=n2;
- if(verboseMode)cout << "Parameter alpha set to "
- << GDAOptimization::defaultAlpha << endl;
- iassert(0<=GDAOptimization::defaultAlpha
- && GDAOptimization::defaultAlpha<1 );
- break;
- default:
- cerr << "Error: Wrong parameter number " << nr1 << " " << n1 << endl;
- printUsage(1);
- }
-}
-
-
-
-void setKorpusName(const char *s)
-{
- strcpy(korpusName,s);
-}
-
-void setHapaxInitName(const char *s)
-{
- hapaxInitName=strdup(s);
-}
-
-void setKorpus()
-{
- if( korpusIsText )
- {
- if( (p=fromKModel(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
- MinWordFrequency))==0)
- {
- cerr << "Error: Could not read the file '" << korpusName << "'.\n";
- printUsage(1);
- }
- }
- else
- {
- if( (p=fromNgrFile(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
- MinWordFrequency))==0)
- {
- cerr << "Error: Could not read the file '" << korpusName << "'.\n";
- printUsage(1);
- }
- p->wordFreq.initializeIndex(*(p->words),'1',2,1+NumberCategories/2,!OneWithHapas);
- p->wordFreq.initializeIndex(*(p->words),'2',2+NumberCategories/2,1+NumberCategories,OneWithHapas);
- }
- if( IterOptSet==0 )
- KategProblemSetParameters(*p);
-}
-
-
-
-
-
-
-int main(int argc,char **argv)
-{
- double startTime=clockSec();
- zufallSeed();
- while( argc>1 && argv[1][0]=='-' )
- {
-
- switch(argv[1][1])
- {
- case 'v':
- sscanf(argv[1]+2,"%d",&verboseMode);
- iassert(verboseMode>=0);
- break;
- case 'O':
- sscanf(argv[1]+2,"%d",&OneWithHapas);
- cout << "OneWithHapas: " << OneWithHapas << endl;
- break;
- case 'n':
- sscanf(argv[1]+2,"%d",&nLaeufe);
- nLaeufeReduce=nLaeufe;
- iassert( nLaeufe>=1 );
- break;
- case 'l':
- Criterion=1;
- if( argv[1][2] )
- {
- sscanf(argv[1]+2,"%lf",&rhoLo);
- if( verboseMode )
- cout << "Parameter rho (for LO) set to" << rhoLo << ".\n";
- iassert(0<=rhoLo && rhoLo<=1);
- }
- if( verboseMode )
- cout << "Criterion LO used.\n";
- break;
- case 'y':
- Criterion=2;
- if( argv[1][2] )
- {
- sscanf(argv[1]+2,"%lf",&SigmaVerfaelschung);
- if( verboseMode )
- cout << "Parameter rho (for LO) set to" << SigmaVerfaelschung << ".\n";
- iassert(0<SigmaVerfaelschung);
- }
- if( verboseMode )
- cout << "My special criterion used.\n";
- break;
- case 'p':
- setKorpusName(argv[1]+2);
- assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
- break;
- case 'P':
- setKorpusName(argv[1]+2);
- korpusIsText=0;
- assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
- break;
- case 'i':
- setInitValue(argv[1]+2,argv[2]);
- if( InitValue==INIT_OTHER )
- argv++,argc--;
- break;
- case 'h':
- setHapaxInitName(argv[1]+2);
- break;
- case 'k':
- setKwahl(argv[1]+2);
- break;
- case 'w':
- setWwahl(argv[1]+2);
- break;
- case 'c':
- sscanf(argv[1]+2,"%d",&NumberCategories);
- iassert(NumberCategories>=2);
- break;
- case 'm':
- sscanf(argv[1]+2,"%d",&MinWordFrequency);
- break;
- case 'e':
- setParameter(argv[1]+2,argv[2]);
- argv++,argc--;
- break;
- case 'a':
- setVerfahren(argv[1]+2);
- break;
- case 'r':
- {
- int s;
- sscanf(argv[1]+2,"%d",&s);
- zufallSeed(s);
- }
- break;
- case 'V':
- if(argv[1][2])
- {
- char str[1024];
- strcpy(str,argv[1]+2);
- PrintBestTo=new ofstream(str);
- strcat(str,".cats");
- PrintBestTo2=new ofstream(str);
- }
- else
- cout << "AUSGABE auf cout\n";
- break;
- case 'M':
- sscanf(argv[1]+2,"%d",&MaxIterOptSteps);
- break;
- case 's':
- sscanf(argv[1]+2,"%d",&MaxSecs);
- break;
- case 'N':
- sscanf(argv[1]+2,"%d",&optimizeParameterAnzahl);
- break;
- case 'o':
- GraphOutput = new ofstream(argv[1]+2);
- if( GraphOutput==0 )
- cerr << "Warning: Open failed for file '" << argv[1]+2 << "'.\n";
- break;
- default:
- cerr << "Fehlerhafte Option: " << argv[1] << endl;
- printUsage(1);
- }
- argv++;
- argc--;
- }
-
-
- setKorpus();
- if( FileForOther )
- {
- fromCatFile(p,FileForOther);
- p->initialisierung=InitValue;
- p->_initialize(InitValue);
- }
-
- if( hapaxInitName )
- {
- fromCatFile(p,hapaxInitName,0);
- p->fixInitLike();
- }
-
- double start2Time=clockSec();
-
- if(argc>=2 && strcasecmp(argv[1],"opt")==0 )
- makeIterOpt();
- else if(argc>=2 && strcasecmp(argv[1],"meta-opt")==0)
- makeMetaOpt(argc,argv);
- else if(argc>=2 && strcasecmp(argv[1],"izr-opt")==0)
- makeIzrOpt();
-
-
- else
- {
- makeIterOpt();
- }
-
- if( verboseMode )
- {
- cout << " full-time: " << clockSec()-startTime << endl;
- cout << "optimize-time: " << clockSec()-start2Time << endl;
- }
- return 0;
-}
-
diff --git a/scripts/training/MGIZA/src/mkcls/my.h b/scripts/training/MGIZA/src/mkcls/my.h
deleted file mode 100644
index ba06657..0000000
--- a/scripts/training/MGIZA/src/mkcls/my.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef HEADER_my_DEFINED
-#define HEADER_my_DEFINED
-
-#define over_array(a,i) for(i=(a).low();i<=(a).high();i++)
-#define backwards_array(a,i) for(i=(a).high();i>=(a).low();i--)
-#define over_arr(a,i) for(int i=(a).low();i<=(a).high();i++)
-#define over_arrMAX(a,i,max) for(int i=(a).low();i<=min((a).high(),max-1);i++)
-#define backwards_arr(a,i) for(int i=(a).high();i>=(a).low();i--)
-
-extern double n1mult,n2mult,n3mult;
-
-inline double realProb(int n1,int n2)
-{
- massert(n1<=n2);
- iassert(n1>=0&&n2>0);
- if(n2==0)n2=1;
- return ((double)n1)/(double)n2;
-}
-
-inline double verfProb(int n1,int n2)
-{
- double prob = realProb(n1,n2);
- if( n1==1 )return prob*n1mult;
- else if( n1==2 )return prob*n2mult;
- else if( n1==3 )return prob*n3mult;
- else return prob;
-}
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/myassert.h b/scripts/training/MGIZA/src/mkcls/myassert.h
deleted file mode 100644
index da86ffb..0000000
--- a/scripts/training/MGIZA/src/mkcls/myassert.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef MY_ASSERT_DEFINED
-#define MY_ASSERT_DEFINED
-void myerror(int line,const char *file,const char *expression);
-void imyerror(int line,const char *file,const char *expression);
-
-#define iassert(expression) do {if (!(expression)) {imyerror(__LINE__,__FILE__,#expression);}} while (0)
-
-#define massert(expr) do {} while(0)
-
-#define vassert(expr) do {} while(0)
-
-#include <assert.h>
-
-#endif
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mkcls/myleda.h b/scripts/training/MGIZA/src/mkcls/myleda.h
deleted file mode 100644
index 715f846..0000000
--- a/scripts/training/MGIZA/src/mkcls/myleda.h
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef myleda_HEADER_defined
-#define myleda_HEADER_defined
-using namespace std;
-#include "myassert.h"
-#ifdef WIN32
-#include<list>
-#endif
-
-
-#if defined(USE_LEDA_array)||defined(USE_LEDA)
-#include <LEDA/array.h>
-#else
-
-#include "FixedArray.h"
-
-template<class T>
-class leda_array : public FixedArray<T>
-{
-public:
- leda_array() {}
- leda_array(int n) : FixedArray<T>(n) {}
-};
-#endif
-
-
-#if defined(USE_LEDA_set)||defined(USE_LEDA)
-#include <LEDA/set.h>
-#define forall_set(a,b,c) forall(b,c)
-#else
-#include <set>
-template<class T>
-class leda_set : public set<T>
-{
-public:
- bool member(const T&m) const
- { return this->count(m)!=0; }
- void del(const T&m)
- { this->erase(m); }
-};
-#define forall_set(a,b,c) for(a::iterator __i__=c.begin();__i__!=c.end()&&((b=*__i__),1);++__i__)
-template<class T>
-leda_set<T> operator&(const leda_set<T>&a,const leda_set<T>&b)
-{
- leda_set<T>c;
-
-#ifdef WIN32
- std::list<T> lst;
- set_intersection(a.begin(),a.end(),b.begin(),b.end(),lst.begin());
- for(std::list<T>::iterator it = lst.begin() ;it!=lst.end();it++){
- c.insert(*it);
- }
-#else
- insert_iterator<set<T> > iter(c,c.begin());
- set_intersection(a.begin(),a.end(),b.begin(),b.end(),iter);
-#endif
- return c;
-}
-template<class T>
-leda_set<T> operator-(const leda_set<T>&a,const leda_set<T>&b)
-{
-
- leda_set<T>c;
-
-
-#ifdef WIN32
- std::list<T> lst;
- set_difference(a.begin(),a.end(),b.begin(),b.end(),lst.begin());
- for(std::list<T>::iterator it = lst.begin() ;it!=lst.end();it++){
- c.insert(*it);
- }
-#else
- insert_iterator<set<T> > iter(c,c.begin());
- set_difference(a.begin(),a.end(),b.begin(),b.end(),iter);
-#endif
- return c;
-}
-
-#endif
-
-
-#if defined(USE_LEDA_d_array)||defined(USE_LEDA)
-#include <LEDA/d_array.h>
-#define forall_defined_d(a,b,c,d) forall_defined(c,d)
-#define forall_d(a,b,c,d) forall(c,d)
-#else
-#include <map>
-template<class A,class B>
-class leda_d_array : public map<A,B>
-{
-private:
- B init;
-public:
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename map<A,B>::const_iterator pos=find(a);
- iassert(pos!=this->end());
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename map<A,B>::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(map<A,B>::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
-};
-
-#define forall_defined_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->first),1) ;++__ii__)
-#define forall_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->second),1);++__ii__)
-#endif
-
-
-#if defined(USE_LEDA_h_array)||defined(USE_LEDA)
-#include <LEDA/h_array.h>
-#define forall_defined_h(a,b,c,d) forall_defined(c,d)
-#define forall_h(a,b,c,d) forall(c,d)
-#else
-
-double used_time();
-#if 0
-
-#include "my_hashmap.h"
-#define leda_h_array my_hashmap
-
-#else
-
-template<class T ,class _Pr = less<T> >
-class my_hash
-{
-public:
- int operator()(const T&t)const {return Hash(t);}
-#ifdef WIN32
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
- my_hash()
- : comp()
- { // construct with default comparator
- }
-
- my_hash(_Pr _Pred)
- : comp(_Pred)
- { // construct with _Pred comparator
- }
-protected:
- _Pr comp;
-public:
- int operator()(const T&t , const T&t1)const {return comp(t,t1);}
-#endif
-};
-
-inline int Hash(int value) { return value; }
-#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-using __gnu_cxx::hash;
-#else
-#include <hash_map>
-#endif
-template<class A,class B>
-class leda_h_array : public MY_HASH_BASE
-{
-private:
- B init;
-public:
- leda_h_array() {}
- leda_h_array(const B&_init)
- : MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=this->find(a);
-
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=this->find(a);
- if( pos==this->end() )
- {
- this->insert(typename MY_HASH_BASE::value_type(a,init));
- pos=this->find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
-};
-
-#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-#define forall_defined_h2(a,b,c,d) for(leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-#define forall_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jjj__=(d).begin();__jjj__!=(d).end()&&((c=__jjj__->second),1);++__jjj__)
-
-#endif
-
-#endif
-
-
-
-template<class T> int compare(const T&a,const T&b)
-{if(a==b)return 0; else if(a<b) return -1; else return 1;}
-
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_d_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-
-template<class T>
-ostream&printSet(ostream&out,const leda_set<T>&s)
-{
- bool first=1;
- T t;
- out << "{";
- forall_set(typename set<T>,t,s)
- {
- if( first==0 )
- out << ", ";
- out << t;
- first=0;
- }
- return out << "}\n";
-}
-
-template<class T,class U>
-istream & operator>>(istream&in,leda_h_array<T,U>&)
-{
- return in;
-}
-
-template<class A,class B>
-bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
-{
- A v;
- forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-template<class A,class B>
-bool operator==(const leda_d_array<A,B>&p1,const leda_d_array<A,B>&p2)
-{
- A v;
- forall_defined_d(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_d(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-
-
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/mkcls/mystl.h b/scripts/training/MGIZA/src/mkcls/mystl.h
deleted file mode 100644
index bcda88d..0000000
--- a/scripts/training/MGIZA/src/mkcls/mystl.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef MY_STL_H_DEFINED
-#define MY_STL_H_DEFINED
-#include <string>
-#include <utility>
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-using __gnu_cxx::hash;
-#else
-#include <hash_map>
-#endif
-#include <cmath>
-
-using namespace std;
-
-#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
-
-inline int Hash(const string& s)
-{
- int sum=0;
- string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
- return sum;
-}
-
-template<class V> int Hash(const pair<V,V>&a)
-{ return Hash(a.first)+4*Hash(a.second); }
-
-template<class T1,class T2>
-istream& operator>>(istream &in,pair<T1,T2> &ir)
-{
- char c;
-
- do in.get(c); while (in && isspace(c));
-
- if (!in) return in;
-
- if (c != '(') in.putback(c);
-
- in >> ir.first;
-
- do in.get(c); while (isspace(c));
- if (c != ',') in.putback(c);
-
- in >> ir.second;
-
- do in.get(c); while (c == ' ');
- if (c != ')') in.putback(c);
-
- return in;
-}
-
-template<class T1,class T2>
-ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
- out << "(" << ir.first << "," << ir.second << ")";
- return out;
-}
-
-void printSpaces(ostream&out,int n);
-void mysplit(const string &s,string &s1,string &s2);
-string untilChar(const string&s,char c);
-
-template<class A,class B,class C>
-class tri
-{
-public:
- A a;
- B b;
- C c;
- tri(){};
- tri(const A&_a,const B&_b,const C&_c)
- : a(_a),b(_b),c(_c) {}
-};
-template<class A,class B,class C>
-bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
-
-template<class A,class B,class C>
-bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{
- if(x.a<y.a)
- return 1;
- if(y.a<x.a)
- return 0;
-
- if(x.b<y.b)
- return 1;
- if(y.b<x.b)
- return 0;
-
- if(x.c<y.c)
- return 1;
- if(y.c<x.c)
- return 0;
- return 0;
-}
-
-#endif
diff --git a/scripts/training/MGIZA/src/model1.cpp b/scripts/training/MGIZA/src/model1.cpp
deleted file mode 100644
index 122869f..0000000
--- a/scripts/training/MGIZA/src/model1.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model1.h"
-#include "Globals.h"
-#include "utility.h"
-#include "Parameter.h"
-
-extern short NoEmptyWord;
-extern int VerboseSentence;
-
-extern short NCPUS;
-
-GLOBAL_PARAMETER2(int,Model1_Dump_Freq,"MODEL 1 DUMP FREQUENCY","t1","dump frequency of Model 1",PARLEV_OUTPUT,0);
-int NumberOfVALIalignments=100;
-
-model1::model1(const char* efname, vcbList& evcblist, vcbList& fvcblist,tmodel<COUNT, PROB>&_tTable,Perplexity& _perp,
- sentenceHandler& _sHandler1,
- Perplexity* _testPerp,
- sentenceHandler* _testHandler,
- Perplexity& _trainViterbiPerp,
- Perplexity* _testViterbiPerp):
- report_info(_perp,_sHandler1,_testPerp,_testHandler,_trainViterbiPerp,_testViterbiPerp),
- efFilename(efname), Elist(evcblist), Flist(fvcblist),
- eTotalWCount(Elist.totalVocab()), fTotalWCount(Flist.totalVocab()),
- noEnglishWords(Elist.size()), noFrenchWords(Flist.size()), tTable(_tTable),
- evlist(Elist.getVocabList()), fvlist(Flist.getVocabList())
-{}
-
-model1::model1 (const model1& m1, int _threadID):
-report_info(m1),efFilename(m1.efFilename),
-Elist(m1.Elist),Flist(m1.Flist),eTotalWCount(m1.eTotalWCount),fTotalWCount(m1.fTotalWCount),
-noEnglishWords(m1.noEnglishWords),noFrenchWords(m1.noFrenchWords),tTable(m1.tTable),
-evlist(m1.evlist),fvlist(m1.fvlist)
-{}
-
-void model1::initialize_table_uniformly(sentenceHandler& sHandler1){
- WordIndex i, j;
-
- cout << "Initialize tTable\n";
-
- sentPair sent ;
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- PROB uniform = 1.0/es.size() ;
- for( i=0; i < es.size(); i++)
- for(j=1; j < fs.size(); j++)
- tTable.insert(es[i],fs[j],0,uniform);
- }
-}
-
-struct em_loop_t{
- model1 *m1;
- int it;
- int nthread;
- Dictionary *dict;
- bool useDict;
- int result;
- pthread_t thread;
- int valid ;
-};
-
-void* exe_emloop(void *arg){
- em_loop_t* em =(em_loop_t *) arg;
- em->result = em->m1->em_thread(em->it,em->nthread,*em->dict,em->useDict);
- return arg;
-}
-
-int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenceHandler& sHandler1, */
- Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
- Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */ )
-{
- double minErrors=1.0;int minIter=0;
- string modelName="Model1",shortModelName="1";
- char b[2];
- b[1] = '\0';
- b[0] = '0' + nthread;
- time_t st, it_st, fn, it_fn;
- string tfile, number, alignfile, test_alignfile;
- int pair_no;
- bool dump_files = false ;
- cout << "==========================================================\n";
- cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
- int it = noIterations;
- pair_no = 0 ;
- it_st = time(NULL);
- cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0 || noIterations == it) && !NODUMPS ;
-// dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- alignfile = Prefix + ".A" + shortModelName + "." + number + ".part" ;
- alignfile = alignfile + b;
-
- em_loop(it,perp, sHandler1, false, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
- return minIter;
-}
-
-int model1::em_with_tricks(int noIterations, /*Perplexity& perp, sentenceHandler& sHandler1, */
- bool seedModel1, Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
- Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */
-, bool dumpCount , const char* dumpCountName, bool useString) // If specified, then will dump files before last iteration
-{
- double minErrors=1.0;int minIter=0;
- string modelName="Model1",shortModelName="1";
- time_t st, it_st, fn, it_fn;
- string tfile, number, alignfile, test_alignfile;
- int pair_no;
- bool dump_files = false ;
- st = time(NULL);
- sHandler1.rewind();
- cout << "==========================================================\n";
- cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
- for(int it = 1; it <= noIterations; it++){
- pair_no = 0 ;
- it_st = time(NULL);
- cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0 || it == noIterations) && !NODUMPS ;
- //dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number+".part0" ;
- test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
- initAL();
- threadID = 0;
- int th;
- vector<em_loop_t> ths;
- ths.resize(NCPUS);
- sHandler1.rewind();
- for (th=1;th<NCPUS;th++){
- ths[th].m1=this;
- ths[th].it = it;
- ths[th].nthread = th;
- ths[th].dict = & dictionary;
- ths[th].useDict = useDict;
- ths[th].result = 0;
- ths[th].valid = pthread_create(&(ths[th].thread),NULL,exe_emloop,&(ths[th]));
- if(ths[th].valid){
- cerr << "Error starting thread " << th << endl;
- }
- }
- em_loop(it,perp, sHandler1, seedModel1, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
- perp.record("Model1");
- trainViterbiPerp.record("Model1");
- errorReportAL(cout, "IBM-1");
-
- cerr << "Main thread done, waiting" << endl;;
- for (th=1;th<NCPUS;th++){
- pthread_join((ths[th].thread),NULL);
- cerr << "Thread " << th << "done" << endl;
- }
- if (testPerp && testHandler) // calculate test perplexity
- em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true);
- if( errorsAL()<minErrors ) {
- minErrors=errorsAL();
- minIter=it;
- }
- //if (dump_files){
- // if( OutputInAachenFormat==1 )
- // tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
- //}
- cerr << "Normalizing T " << endl;
-
- /**
- If asked for dumping count table, just dump it.
- */
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- }
-
- tTable.normalizeTable(Elist, Flist);
- //cout << tTable.getProb(2,2) << endl;
- cerr << " DONE Normalizing " << endl;
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<
- it<<") VITERBI TEST CROSS-ENTROPY "
- << (*testViterbiPerp).cross_entropy()
- << " PERPLEXITY " << (*testViterbiPerp).perplexity()
- << '\n';
- if (dump_files){
- if( OutputInAachenFormat==0 )
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),
- Flist.getVocabList(),OutputInAachenFormat);
- }
- it_fn = time(NULL);
- cout << "Model 1 Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
-
-
- }
- fn = time(NULL) ;
- cout << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- return minIter;
-}
-
-bool model1::load_table(const char* tname){
- /* This function loads the t table from the given file; use it
- when you want to load results from previous t training
- without doing any new training.
- NAS, 7/11/99
- */
- cout << "Model1: loading t table \n" ;
- return tTable.readProbTable(tname);
-}
-
-
-extern float MINCOUNTINCREASE;
-void model1::em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1,
- bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test)
-{
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS)
- of2.open(alignfile);
- PROB uniform = 1.0/noFrenchWords ;
- sentPair sent ;
-
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
- double viterbi_score = 1 ;
-
-#ifdef WIN32
- bool *eindict = new bool[l + 1];
- bool *findict = new bool[m + 1];
- bool **indict = new bool*[m + 1];
- for(int _i = 0; _i < m+1; _i++)
- indict[_i] = new bool[l + 1];
-#else
- bool eindict[l + 1];
- bool findict[m + 1];
- bool indict[m + 1][l + 1];
-#endif
- if(it == 1 && useDict){
- for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
- for(unsigned int dummy = 0; dummy <= m; dummy++){
- findict[dummy] = false;
- for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++)
- indict[dummy][dummy2] = false;
- }
- for(j = 0; j <= m; j++)
- for(i = 0; i <= l; i++)
- if(dict.indict(fs[j], es[i])){
- eindict[i] = findict[j] = indict[j][i] = true;
- }
- }
-
- for(j=1; j <= m; j++){
- // entries that map fs to all possible ei in this sentence.
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
- LpPair<COUNT,PROB> **sPtrCachePtr;
-
- PROB denom = 0.0;
- WordIndex best_i = 0 ; // i for which fj is best maped to ei
- PROB word_best_score = 0 ; // score for the best mapping of fj
- if (it == 1 && !seedModel1){
- denom = uniform * es.size() ;
- word_best_score = uniform ;
- }
- else
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- PROB e(0.0) ;
- (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
- if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- denom += e ;
- if (e > word_best_score){
- word_best_score = e ;
- best_i = i ;
- }
- }
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score ; /// denom ;
- if (denom == 0){
- if (test)
- cerr << "WARNING: denom is zero (TEST)\n";
- else
- cerr << "WARNING: denom is zero (TRAIN)\n";
- }
- cross_entropy += log(denom) ;
- if (!test){
- if(denom > 0){
- COUNT val = COUNT(so) / (COUNT) double(denom) ;
- /* this if loop implements a constraint on counting:
- count(es[i], fs[j]) is implemented if and only if
- es[i] and fs[j] occur together in the dictionary,
- OR
- es[i] does not occur in the dictionary with any fs[x] and
- fs[j] does not occur in the dictionary with any es[y]
- */
- if(it == 1 && useDict){
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- if(indict[j][i] || (!findict[j] && !eindict[i])){
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- COUNT x=e*val;
- if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE )
- /* if ((*sPtrCachePtr) != 0)
- (*((*sPtrCachePtr))).count += x;
- else */
- tTable.incCount(es[i], fs[j], x);
- } /* end of if */
- } /* end of for i */
- } /* end of it == 1 */
- // Old code:
- else{
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- //for(i=0; i <= l; i++) {
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- //if( !(i==0) )
- //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
- COUNT x=e*val;
- if( pair_no==VerboseSentence )
- cout << i << "(" << evlist[es[i]].word << ")," << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
- if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE ){
- /*if( NoEmptyWord==0 || i!=0 )
- if ((*sPtrCachePtr) != 0)
- (*((*sPtrCachePtr))).count += x;
- else */
- //cerr << i << " " << j << " (+) " << endl;
- //cerr.flush();
- //cerr << es[i] << " " << fs[j] << " (=) "<< endl;
- //cerr.flush();
- tTable.incCount(es[i], fs[j], x);
- //cerr << es[i] << " " << fs[j] << " (-) "<< endl;
- //cerr.flush();
- }
- } /* end of for i */
- } // end of else
- } // end of if (denom > 0)
- }// if (!test)
- } // end of for (j) ;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
- perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
- if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
- printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- pair_no++;
-#ifdef WIN32
- delete[] eindict;
- delete[] findict;
- for(int _i = 0; _i < m+1; _i++)
- delete[] indict[_i];
- delete[] indict;
-#endif
- } /* of while */
-}
-
-CTTableDiff<COUNT,PROB>* model1::one_step_em(int it, bool seedModel1,
- Dictionary& dictionary, bool useDict){
- CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
- double minErrors=1.0;int minIter=0;
- string modelName="Model1",shortModelName="1";
- time_t st, it_st, fn, it_fn;
- string tfile, number, alignfile, test_alignfile;
- int pair_no;
- bool dump_files = false ;
- st = time(NULL);
- sHandler1.rewind();
- cout << "==========================================================\n";
- cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
- pair_no = 0 ;
- it_st = time(NULL);
- cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0) && !NODUMPS ;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- alignfile = Prefix + ".A1" ;
- test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
- initAL();
- em_loop_1(diff,it,perp, sHandler1, seedModel1,
- dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
- //if (testPerp && testHandler) // calculate test perplexity
- // em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true);
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- fn = time(NULL) ;
- cout << "Partial " << modelName << " Training took: " << difftime(fn, it_st) << " seconds\n";
- return diff;
- }
-
- void model1::combine_one(CTTableDiff<COUNT,PROB>* cb){
- cb->AugmentTTable(tTable);
- }
-
- void model1::recombine(){
- tTable.normalizeTable(Elist, Flist);
- }
-
- void save_table(const char* tname){
-/* if (dump_files){
- * if( OutputInAachenFormat==0 )
- * tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- */
-
- }
-
-
-void model1::em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1,
- bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test) {
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS)
- of2.open(alignfile);
- PROB uniform = 1.0/noFrenchWords ;
- sentPair sent ;
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
- double viterbi_score = 1 ;
-
-#ifdef WIN32
- bool *eindict = new bool[l + 1];
- bool *findict = new bool[m + 1];
- bool **indict = new bool*[m + 1];
- for(int _i = 0; _i < m+1; _i++)
- indict[_i] = new bool[l + 1];
-#else
- bool eindict[l + 1];
- bool findict[m + 1];
- bool indict[m + 1][l + 1];
-#endif
- if(it == 1 && useDict){
- for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
- for(unsigned int dummy = 0; dummy <= m; dummy++){
- findict[dummy] = false;
- for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++)
- indict[dummy][dummy2] = false;
- }
- for(j = 0; j <= m; j++)
- for(i = 0; i <= l; i++)
- if(dict.indict(fs[j], es[i])){
- eindict[i] = findict[j] = indict[j][i] = true;
- }
- }
-
- for(j=1; j <= m; j++){
- // entries that map fs to all possible ei in this sentence.
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
- //Vector<COUNT *> sPtrCacheDif(es.size(),0); // cache pointers to table
- LpPair<COUNT,PROB> **sPtrCachePtr;
- //COUNT **sPtrCachePtrDif;
-
- PROB denom = 0.0;
- WordIndex best_i = 0 ; // i for which fj is best maped to ei
- PROB word_best_score = 0 ; // score for the best mapping of fj
- if (it == 1 && !seedModel1){
- denom = uniform * es.size() ;
- word_best_score = uniform ;
- }
- else {
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- PROB e(0.0) ;
- (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
- //(*sPtrCachePtrDif) = diff->GetPtr(es[i], fs[j]) ;
- if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- denom += e ;
- if (e > word_best_score){
- word_best_score = e ;
- best_i = i ;
- }
- }
- }
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score ; /// denom ;
- if (denom == 0){
- if (test)
- cerr << "WARNING: denom is zero (TEST)\n";
- else
- cerr << "WARNING: denom is zero (TRAIN)\n";
- }
- cross_entropy += log(denom) ;
- if (!test){
- if(denom > 0){
- COUNT val = COUNT(so) / (COUNT) double(denom) ;
- /* this if loop implements a constraint on counting:
- count(es[i], fs[j]) is implemented if and only if
- es[i] and fs[j] occur together in the dictionary,
- OR
- es[i] does not occur in the dictionary with any fs[x] and
- fs[j] does not occur in the dictionary with any es[y]
- */
- if(it == 1 && useDict){
- for((i=0),(sPtrCachePtr=&sPtrCache[0]);
- i <= l; i++,sPtrCachePtr++){
- if(indict[j][i] || (!findict[j] && !eindict[i])){
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- COUNT x=e*val;
- if( it==1||x>MINCOUNTINCREASE ){
- /*if ((*sPtrCachePtr) != 0){
- (*((*sPtrCachePtr))).count += x;
- } else {*/
- tTable.incCount(es[i], fs[j], x);
- //}
- diff->incCount(es[i], fs[j], x);
- }
- } /* end of if */
- } /* end of for i */
- } /* end of it == 1 */
- // Old code:
- else{
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- //for(i=0; i <= l; i++) {
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- //if( !(i==0) )
- //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
- COUNT x=e*val;
- if( pair_no==VerboseSentence )
- cout << i << "(" << evlist[es[i]].word << "),"
- << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
- if( it==1||x>MINCOUNTINCREASE )
- if( NoEmptyWord==0 || ( NoEmptyWord==0 || i!=0 )){
- /*if ((*sPtrCachePtr) != 0){
- (*((*sPtrCachePtr))).count += x;
- } else */
- tTable.incCount(es[i], fs[j], x);
- diff->incCount(es[i], fs[j], x);
- }
- } /* end of for i */
- } // end of else
- } // end of if (denom > 0)
- }// if (!test)
- } // end of for (j) ;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
- perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
- if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
- printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- pair_no++;
-#ifdef WIN32
- delete[] eindict;
- delete[] findict;
- for(int _i = 0; _i < m+1; _i++)
- delete[] indict[_i];
- delete[] indict;
-#endif
- } /* of while */
- sHandler1.rewind();
- perp.record("Model1");
- viterbi_perp.record("Model1");
- errorReportAL(cout, "IBM-1");
-
- }
diff --git a/scripts/training/MGIZA/src/model1.h b/scripts/training/MGIZA/src/model1.h
deleted file mode 100644
index cadc1e3..0000000
--- a/scripts/training/MGIZA/src/model1.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _model1_h
-#define _model1_h 1
-
-#include <assert.h>
-
-#include <iostream>
-#include <strstream>
-#include <algorithm>
-#include <functional>
-#include <list>
-#include <map>
-#include <set>
-#include <utility>
-
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include <time.h>
-#include <fstream>
-#include <math.h>
-#include <stdio.h>
-
-#include "Vector.h"
-#include "vocab.h"
-#include "TTables.h"
-#include "getSentence.h"
-#include "Perplexity.h"
-#include "vocab.h"
-#include "Dictionary.h"
-#include "ttableDiff.hpp"
-#include "syncObj.h"
-
-extern int NumberOfVALIalignments;
-
-class report_info{
- public:
- Mutex alLock;
- Perplexity& perp;
- sentenceHandler& sHandler1;
- Perplexity* testPerp;
- sentenceHandler* testHandler;
- Perplexity& trainViterbiPerp;
- Perplexity* testViterbiPerp;
- report_info(Perplexity& _perp,
- sentenceHandler& _sHandler1,
- Perplexity* _testPerp,
- sentenceHandler* _testHandler,
- Perplexity& _trainViterbiPerp,
- Perplexity* _testViterbiPerp)
- : perp(_perp),sHandler1(_sHandler1),testPerp(_testPerp),testHandler(_testHandler),trainViterbiPerp(_trainViterbiPerp),testViterbiPerp(_testViterbiPerp)
- {}
-
- report_info(const report_info & rp):
- perp(rp.perp),sHandler1(rp.sHandler1), testPerp(rp.testPerp),
- trainViterbiPerp(rp.trainViterbiPerp), testViterbiPerp(rp.testViterbiPerp),
- testHandler(rp.testHandler)
- {}
-};
-
-
-class model1 : public report_info{
-public:
- string efFilename;
- vcbList& Elist ;
- vcbList& Flist ;
- double eTotalWCount ; // size of source copus in number of words
- double fTotalWCount ; // size of target corpus in number of words
- int noEnglishWords;
- int noFrenchWords;
- tmodel<COUNT, PROB>&tTable;
- Vector<WordEntry>& evlist ;
- Vector<WordEntry>& fvlist ;
- int threadID;
-public:
- int ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch;
- int ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI;
- int ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST;
- model1 (const char* efname, vcbList& evcblist, vcbList& fvcblist,tmodel<COUNT, PROB>&_tTable,Perplexity& _perp,
- sentenceHandler& _sHandler1,
- Perplexity* _testPerp,
- sentenceHandler* _testHandler,
- Perplexity& _trainViterbiPerp,
- Perplexity* _testViterbiPerp);
-
- model1 (const model1& m1, int _threadID=0);
- void initialize_table_uniformly(sentenceHandler& sHandler1);
-
- int em_with_tricks(int noIterations,
- bool seedModel1, Dictionary& dictionary, bool useDict, bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false);
- int em_thread(int noIterations, int thread,Dictionary& dictionary, bool useDict);
- bool load_table(const char* tname);
- void readVocabFile(const char* fname, Vector<WordEntry>& vlist, int& vsize,
- int& total);
- inline Vector<WordEntry>& getEnglishVocabList(void)const {return Elist.getVocabList();};
- inline Vector<WordEntry>& getFrenchVocabList(void)const {return Flist.getVocabList();};
- inline double getETotalWCount(void) const {return eTotalWCount;};
- inline double getFTotalWCount(void) const {return fTotalWCount;};
- inline int getNoEnglishWords(void) const {return noEnglishWords;};
- inline int getNoFrenchWords(void) const {return noFrenchWords;};
- inline tmodel<COUNT, PROB>& getTTable(void) {return tTable;};
- inline string& getEFFilename(void) {return efFilename;};
-
-////////////////////////////////////////////////////////////////
-// Added by Qin Gao To Enable Parallel Training
-////////////////////////////////////////////////////////////////
-
- CTTableDiff<COUNT,PROB>* one_step_em(int it ,bool seedModel1, Dictionary& dictionary,
- bool useDict);
-
- void recombine();
-
- void combine_one(CTTableDiff<COUNT,PROB>* cb);
-
- void save_table(const char* tname);
-
-
-
-
-////////////////////////////////////////////////////////////////
-// END OF QIN GAO's CODE
-////////////////////////////////////////////////////////////////
-private:
- void em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict,
- Perplexity& viterbiperp, bool=false);
- void em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict,
- Perplexity& viterbiperp, bool=false);
- friend class model2;
- friend class hmm;
-public:
- void addAL(const Vector<WordIndex>& viterbi_alignment,int pair_no,int l){
- alLock.lock();
- if( pair_no<=int(ReferenceAlignment.size()) ){
- //cerr << "AL: " << viterbi_alignment << " " << pair_no << endl;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- if( pair_no<=NumberOfVALIalignments ){
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI,pair_no);
- }
- if( pair_no>NumberOfVALIalignments ){
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST,pair_no);
- }
- }
- alLock.unlock();
- }
- void initAL(){ALmissingVALI=ALtoomuchVALI=ALeventsMissingVALI=ALeventsToomuchVALI=ALmissingTEST=ALtoomuchTEST=ALeventsMissingTEST=ALeventsToomuchTEST=ALmissing=ALtoomuch=ALeventsMissing=ALeventsToomuch=0;}
- double errorsAL()const{
- if( ALeventsMissingVALI+ALeventsToomuchVALI ){
- return (ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI);
- }else{
- return 0.0;
- }
- }
- void errorReportAL(ostream&out,string m)const{
- if( ALeventsMissing+ALeventsToomuch ){
- out << "alignmentErrors (" << m << "): "
- << 100.0*(ALmissing+ALtoomuch)/double(ALeventsMissing+ALeventsToomuch)
- << " recall: " << 100.0*(1.0-ALmissing/double(ALeventsMissing))
- << " precision: " << 100.0*(1.0-ALtoomuch/double(ALeventsToomuch))
- << " (missing:" << ALmissing << "/" << ALeventsMissing << " " << ALtoomuch
- << " " << ALeventsToomuch << ")\n";
- }
- if( ALeventsMissingVALI+ALeventsToomuchVALI ){
- out << "alignmentErrors VALI (" << m << "): "
- << 100.0*(ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI)
- << " recall: " << 100.0*(1.0-ALmissingVALI/double(ALeventsMissingVALI))
- << " precision: " << 100.0*(1.0-ALtoomuchVALI/double(ALeventsToomuchVALI))
- << " (missing:" << ALmissingVALI << "/" << ALeventsMissingVALI << " " << ALtoomuchVALI
- << " " << ALeventsToomuchVALI << ")\n";
- }
- if( ALeventsMissingTEST+ALeventsToomuchTEST ){
- out << "alignmentErrors TEST(" << m << "): "
- << 100.0*(ALmissingTEST+ALtoomuchTEST)/double(ALeventsMissingTEST+ALeventsToomuchTEST)
- << " recall: " << 100.0*(1.0-ALmissingTEST/double(ALeventsMissingTEST))
- << " precision: " << 100.0*(1.0-ALtoomuchTEST/double(ALeventsToomuchTEST))
- << " (missing:" << ALmissingTEST << "/" << ALeventsMissingTEST << " " << ALtoomuchTEST
- << " " << ALeventsToomuchTEST << ")\n";
- }
- }
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/model2.cpp b/scripts/training/MGIZA/src/model2.cpp
deleted file mode 100644
index dddde77..0000000
--- a/scripts/training/MGIZA/src/model2.cpp
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model2.h"
-#include "Globals.h"
-#include "utility.h"
-#include "Parameter.h"
-#include "defs.h"
-
-extern short NoEmptyWord;
-
-
-GLOBAL_PARAMETER2(int,Model2_Dump_Freq,"MODEL 2 DUMP FREQUENCY","t2","dump frequency of Model 2",PARLEV_OUTPUT,0);
-
-model2::model2(model1& m,amodel<PROB>&_aTable,amodel<COUNT>&_aCountTable):
- model1(m),aTable(_aTable),aCountTable(_aCountTable)
-{ }
-
-void model2::initialize_table_uniformly(sentenceHandler& sHandler1){
- // initialize the aTable uniformly (run this before running em_with_tricks)
- int n=0;
- sentPair sent ;
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- WordIndex l = es.size() - 1;
- WordIndex m = fs.size() - 1;
- n++;
- if(1<=m&&aTable.getValue(l,m,l,m)<=PROB_SMOOTH){
- PROB uniform_val = 1.0 / (l+1) ;
- for(WordIndex j=1; j <= m; j++)
- for(WordIndex i=0; i <= l; i++)
- aTable.setValue(i,j, l, m, uniform_val);
- }
- }
-}
-
-int model2::em_with_tricks(int noIterations,bool dumpCount,
- const char* dumpCountName, bool useString){
- double minErrors=1.0;int minIter=0;
- string modelName="Model2",shortModelName="2";
- time_t it_st, st, it_fn, fn;
- string tfile, afile, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
- sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st) << " iter: " << noIterations << "\n";
- for(int it=1; it <= noIterations ; it++){
- pair_no = 0;
- it_st = time(NULL) ;
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model2_Dump_Freq != 0) && ((it % Model2_Dump_Freq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- aCountTable.clear();
- initAL();
- em_loop(perp, sHandler1, dump_files, alignfile.c_str(), trainViterbiPerp, false);
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- if (testPerp && testHandler)
- em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true);
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
-
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- }
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << testViterbiPerp->perplexity()
- << '\n';
- if (dump_files) {
- if(OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- aCountTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
- cout << modelName << " Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
- } // end of iterations
- aCountTable.clear();
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- // cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return minIter;
-}
-
-void model2::load_table(const char* aname){
- /* This function loads the a table from the given file; use it
- when you want to load results from previous a training without
- doing any new training.
- NAS, 7/11/99
- */
- cout << "Model2: loading a table \n";
- aTable.readTable(aname);
-}
-
-
-void model2::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test)
-{
- massert( aTable.is_distortion==0 );
- massert( aCountTable.is_distortion==0 );
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
-
- vector<double> ferts(evlist.size());
-
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
- double viterbi_score = 1;
- for(j=1; j <= m; j++){
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
- // entries that map fs to all possible ei in this sentence.
- PROB denom = 0.0;
- PROB e = 0.0, word_best_score = 0;
- WordIndex best_i = 0 ; // i for which fj is best maped to ei
- for(i=0; i <= l; i++){
- sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ;
- if (sPtrCache[i] != 0 &&(*(sPtrCache[i])).prob > PROB_SMOOTH )
- e = (*(sPtrCache[i])).prob * aTable.getValue(i,j, l, m) ;
- else e = PROB_SMOOTH * aTable.getValue(i,j, l, m);
- denom += e ;
- if (e > word_best_score){
- word_best_score = e ;
- best_i = i ;
- }
- }
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score; ///denom ;
- cross_entropy += log(denom) ;
- if (denom == 0){
- if (test)
- cerr << "WARNING: denom is zero (TEST)\n";
- else
- cerr << "WARNING: denom is zero (TRAIN)\n";
- }
- if (!test){
- if(denom > 0){
- COUNT val = COUNT(so) / (COUNT) double(denom) ;
- for( i=0; i <= l; i++){
- PROB e(0.0);
- if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH)
- e = (*(sPtrCache[i])).prob ;
- else e = PROB_SMOOTH ;
- e *= aTable.getValue(i,j, l, m);
- COUNT temp = COUNT(e) * val ;
- if( NoEmptyWord==0 || i!=0 )
- if (sPtrCache[i] != 0)
- (*(sPtrCache[i])).count += temp ;
- else
- tTable.incCount(es[i], fs[j], temp);
- aCountTable.addValue(i,j, l, m,temp) ;
- } /* end of for i */
- } // end of if (denom > 0)
- }// if (!test)
- } // end of for (j) ;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score), so, l, m,1);
- if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- pair_no++;
- } /* of while */
- sHandler1.rewind();
- perp.record("Model2");
- viterbi_perp.record("Model2");
- errorReportAL(cout,"IBM-2");
-}
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/model2.h b/scripts/training/MGIZA/src/model2.h
deleted file mode 100644
index ff8e993..0000000
--- a/scripts/training/MGIZA/src/model2.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _model2_h
-#define _model2_h 1
-
-#include <assert.h>
-
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-
-#include <fstream>
-#include <math.h>
-#include <time.h>
-
-#include "TTables.h"
-#include "ATables.h"
-#include "getSentence.h"
-#include "defs.h"
-#include "model1.h"
-#include "Perplexity.h"
-#include "vocab.h"
-
-class model2 : public model1{
-public:
- amodel<PROB>&aTable;
- amodel<COUNT>&aCountTable;
-public:
- model2(model1& m1,amodel<PROB>&,amodel<COUNT>&);
- void initialize_table_uniformly(sentenceHandler&);
- int em_with_tricks(int iterations,bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false);
- void load_table(const char* aname);
- inline amodel<PROB>& getATable(void) {return aTable;};
- inline amodel<COUNT>& getACountTable(void) {return aCountTable;};
- void em_loop(Perplexity& perp,sentenceHandler& sHandler1, bool dump_files,const char* alignfile, Perplexity&, bool test);
- friend class model3;
-};
-
-#endif
diff --git a/scripts/training/MGIZA/src/model2to3.cpp b/scripts/training/MGIZA/src/model2to3.cpp
deleted file mode 100644
index 4c6d729..0000000
--- a/scripts/training/MGIZA/src/model2to3.cpp
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model3.h"
-#include "utility.h"
-#include "Globals.h"
-
-#define _MAX_FERTILITY 10
-
-double get_sum_of_partitions(int n, int source_pos, double alpha[_MAX_FERTILITY][MAX_SENTENCE_LENGTH_ALLOWED])
-{
- int done, init ;
- double sum = 0, prod ;
- int s, w, u, v;
- WordIndex k, k1, i ;
- WordIndex num_parts = 0 ;
- int total_partitions_considered = 0;
-
- int part[_MAX_FERTILITY], mult[_MAX_FERTILITY];
-
- done = false ;
- init = true ;
- for (i = 0 ; i < _MAX_FERTILITY ; i++){
- part[i] = mult[i] = 0 ;
- }
-
- //printf("Entering get sum of partitions\n");
- while(! done){
- total_partitions_considered++;
- if (init){
- part[1] = n ;
- mult[1] = 1 ;
- num_parts = 1 ;
- init = false ;
- }
- else {
- if ((part[num_parts] > 1) || (num_parts > 1)){
- if (part[num_parts] == 1){
- s = part[num_parts-1] + mult[num_parts];
- k = num_parts - 1;
- }
- else {
- s = part[num_parts];
- k = num_parts ;
- }
- w = part[k] - 1 ;
- u = s / w ;
- v = s % w ;
- mult[k] -= 1 ;
- if (mult[k] == 0)
- k1 = k ;
- else k1 = k + 1 ;
- mult[k1] = u ;
- part[k1] = w ;
- if (v == 0){
- num_parts = k1 ;
- }
- else {
- mult[k1+1] = 1 ;
- part[k1+1] = v ;
- num_parts = k1 + 1;
- }
- } /* of if num_parts > 1 || part[num_parts] > 1 */
- else {
- done = true ;
- }
- }
- /* of else of if(init) */
- if (!done){
- prod = 1.0 ;
- if (n != 0)
- for (i = 1 ; i <= num_parts ; i++){
- prod *= pow(alpha[part[i]][source_pos], mult[i]) / factorial(mult[i]) ;
- }
- sum += prod ;
- }
- } /* of while */
- if (sum < 0) sum = 0 ;
- return(sum) ;
-}
-
-void model3::estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perplexity& trainVPerp,
- bool simple, bool dump_files,bool updateT)
-{
- string tfile, nfile, dfile, p0file, afile, alignfile;
- WordIndex i, j, l, m, max_fertility_here, k ;
- PROB val, temp_mult[MAX_SENTENCE_LENGTH_ALLOWED][MAX_SENTENCE_LENGTH_ALLOWED];
- double cross_entropy;
- double beta, sum,
- alpha[_MAX_FERTILITY][MAX_SENTENCE_LENGTH_ALLOWED];
- double total, temp, r ;
-
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear() ;
- if (simple)
- nTable.clear();
- perp.clear() ;
- trainVPerp.clear() ;
- ofstream of2;
- if (dump_files){
- alignfile = Prefix +".A2to3";
- of2.open(alignfile.c_str());
- }
- if (simple) cerr <<"Using simple estimation for fertilties\n";
- sHandler1.rewind() ;
- sentPair sent ;
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- Vector<WordIndex> viterbi_alignment(fs.size());
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- double viterbi_score = 1 ;
- PROB word_best_score ; // score for the best mapping of fj
- for(j = 1 ; j <= m ; j++){
- word_best_score = 0 ; // score for the best mapping of fj
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0);
- total = 0 ;
- WordIndex best_i = 0 ;
- for(i = 0; i <= l ; i++){
- sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ;
- if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH) // if valid pointer
- temp_mult[i][j]= (*(sPtrCache[i])).prob * aTable.getValue(i, j, l, m) ;
- else
- temp_mult[i][j] = PROB_SMOOTH * aTable.getValue(i, j, l, m) ;
- total += temp_mult[i][j] ;
- if (temp_mult[i][j] > word_best_score){
- word_best_score = temp_mult[i][j] ;
- best_i = i ;
- }
- } // end of for (i)
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score ; /// total ;
- cross_entropy += log(total) ;
- if (total == 0){
- cerr << "WARNING: total is zero (TRAIN)\n";
- viterbi_score = 0 ;
- }
- if (total > 0){
- for(i = 0; i <= l ; i++){
- temp_mult[i][j] /= total ;
- if (temp_mult[i][j] == 1) // smooth to prevent underflow
- temp_mult[i][j] = 0.99 ;
- else if (temp_mult[i][j] == 0)
- temp_mult[i][j] = PROB_SMOOTH ;
- val = temp_mult[i][j] * PROB(count) ;
- if ( val > PROB_SMOOTH) {
- if( updateT )
- {
- if (sPtrCache[i] != 0)
- (*(sPtrCache[i])).count += val ;
- else
- tTable.incCount(es[i], fs[j], val);
- }
- aCountTable.addValue(i, j, l, m,val);
- if (0 != i)
- dCountTable.addValue(j, i, l, m,val);
- }
- } // for (i = ..)
- } // for (if total ...)
- } // end of for (j ...)
- if (dump_files)
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- if (!simple){
- max_fertility_here = min(WordIndex(m+1), MAX_FERTILITY);
- for (i = 1; i <= l ; i++) {
- for ( k = 1; k < max_fertility_here; k++){
- beta = 0 ;
- alpha[k][i] = 0 ;
- for (j = 1 ; j <= m ; j++){
- temp = temp_mult[i][j];
- if (temp > 0.95) temp = 0.95; // smooth to prevent under/over flow
- else if (temp < 0.05) temp = 0.05;
- beta += pow(temp/(1.0-temp), (double) k);
- }
- alpha[k][i] = beta * pow((double) -1, (double) (k+1)) / (double) k ;
- }
- }
- for (i = 1; i <= l ; i++){
- r = 1;
- for (j = 1 ; j <= m ; j++)
- r *= (1 - temp_mult[i][j]);
- for (k = 0 ; k < max_fertility_here ; k++){
- sum = get_sum_of_partitions(k, i, alpha);
- temp = r * sum * count;
- nCountTable.addValue(es[i], k,temp);
- } // end of for (k ..)
- } // end of for (i == ..)
- } // end of if (!simple)
- perp.addFactor(cross_entropy, count, l, m,1);
- trainVPerp.addFactor(log(viterbi_score), count, l, m,1);
- } // end of while
- sHandler1.rewind();
- cerr << "Normalizing t, a, d, n count tables now ... " ;
- if( dump_files && OutputInAachenFormat==1 )
- {
- tfile = Prefix + ".t2to3" ;
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
- }
- if( updateT )
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- if (!simple)
- nCountTable.normalize(nTable,&Elist.getVocabList());
- else {
- for (i = 0 ; i< Elist.uniqTokens() ; i++){
- if (0 < MAX_FERTILITY){
- nTable.addValue(i,0,PROB(0.2));
- if (1 < MAX_FERTILITY){
- nTable.addValue(i,1,PROB(0.65));
- if (2 < MAX_FERTILITY){
- nTable.addValue(i,2,PROB(0.1));
- if (3 < MAX_FERTILITY)
- nTable.addValue(i,3,PROB(0.04));
- PROB val = 0.01/(MAX_FERTILITY-4);
- for (k = 4 ; k < MAX_FERTILITY ; k++)
- nTable.addValue(i, k,val);
- }
- }
- }
- }
- } // end of else (!simple)
- p0 = 0.95;
- p1 = 0.05;
- if (dump_files){
- tfile = Prefix + ".t2to3" ;
- afile = Prefix + ".a2to3" ;
- nfile = Prefix + ".n2to3" ;
- dfile = Prefix + ".d2to3" ;
- p0file = Prefix + ".p0_2to3" ;
-
- if( OutputInAachenFormat==0 )
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- aTable.printTable(afile.c_str());
- dTable.printTable(dfile.c_str());
- nCountTable.printNTable(Elist.uniqTokens(), nfile.c_str(), Elist.getVocabList(),OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- }
- errorReportAL(cerr,"IBM-2");
- if(simple)
- {
- perp.record("T2To3");
- trainVPerp.record("T2To3");
- }
- else
- {
- perp.record("ST2To3");
- trainVPerp.record("ST2To3");
- }
-}
-
-void model3::transferSimple(/*model1& m1, model2& m2, */ sentenceHandler& sHandler1,
- bool dump_files, Perplexity& perp, Perplexity& trainVPerp,bool updateT)
-{
- /*
- This function performs simple Model 2 -> Model 3 transfer.
- It sets values for n and p without considering Model 2's ideas.
- It sets d values based on a.
- */
- time_t st, fn;
- // just inherit these from the previous models, to avoid data duplication
-
- st = time(NULL);
- cerr << "==========================================================\n";
- cerr << "\nTransfer started at: "<< my_ctime(&st) << '\n';
-
- cerr << "Simple tranfer of Model2 --> Model3 (i.e. estimating initial parameters of Model3 from Model2 tables)\n";
-
- estimate_t_a_d(sHandler1, perp, trainVPerp, true, dump_files,updateT) ;
- fn = time(NULL) ;
- cerr << "\nTransfer: TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- cerr << "\nTransfer took: " << difftime(fn, st) << " seconds\n";
- cerr << "\nTransfer Finished at: "<< my_ctime(&fn) << '\n';
- cerr << "==========================================================\n";
-
-}
-
-
-void model3::transfer(sentenceHandler& sHandler1,bool dump_files, Perplexity& perp, Perplexity& trainVPerp,bool updateT)
-{
- if (Transfer == TRANSFER_SIMPLE)
- transferSimple(sHandler1,dump_files,perp, trainVPerp,updateT);
- {
- time_t st, fn ;
-
- st = time(NULL);
- cerr << "==========================================================\n";
- cerr << "\nTransfer started at: "<< my_ctime(&st) << '\n';
- cerr << "Transfering Model2 --> Model3 (i.e. estimating initial parameters of Model3 from Model2 tables)\n";
-
- p1_count = p0_count = 0 ;
-
- estimate_t_a_d(sHandler1, perp, trainVPerp, false, dump_files,updateT);
-
-
-
- /* Below is a made-up stab at transferring t & a probs to p0/p1.
- (Method not documented in IBM paper).
- It seems to give p0 = .96, which may be right for Model 2, or may not.
- I'm commenting it out for now and hardwiring p0 = .90 as above. -Kevin
-
- // compute p0, p1 counts
- Vector<LogProb> nm(Elist.uniqTokens(),0.0);
-
- for(i=0; i < Elist.uniqTokens(); i++){
- for(k=1; k < MAX_FERTILITY; k++){
- nm[i] += nTable.getValue(i, k) * (LogProb) k;
- }
- }
-
- LogProb mprime;
- // sentenceHandler sHandler1(efFilename.c_str());
- // sentPair sent ;
-
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.noOccurrences;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- mprime = 0 ;
- for (i = 1; i <= l ; i++){
- mprime += nm[es[i]] ;
- }
- mprime = LogProb((int((double) mprime + 0.5))); // round mprime to nearest integer
- if ((mprime < m) && (2 * mprime >= m)) {
- // cerr << "updating both p0_count and p1_count, mprime: " << mprime <<
- // "m = " << m << "\n";
- p1_count += (m - (double) mprime) * count ;
- p0_count += (2 * (double) mprime - m) * count ;
- // cerr << "p0_count = "<<p0_count << " , p1_count = " << p1_count << endl ;
- }
- else {
- // p1_count += 0 ;
- // cerr << "updating only p0_count, mprime: " << mprime <<
- // "m = " << m << "\n";
- p0_count += double(m * count) ;
- // cerr << "p0_count = "<<p0_count << " , p1_count = " << p1_count << endl ;
- }
- }
-
- // normalize p1, p0
-
- cerr << "p0_count = "<<p0_count << " , p1_count = " << p1_count << endl ;
- p1 = p1_count / (p1_count + p0_count ) ;
- p0 = 1 - p1;
- cerr << "p0 = "<<p0 << " , p1 = " << p1 << endl ;
- // Smooth p0 probability to avoid getting zero probability.
- if (0 == p0){
- p0 = (LogProb) SMOOTH_THRESHOLD ;
- p1 = p1 - (LogProb) SMOOTH_THRESHOLD ;
- }
- if (0 == p1){
- p1 = (LogProb) SMOOTH_THRESHOLD ;
- p0 = p0 - (LogProb) SMOOTH_THRESHOLD ;
- }
- */
-
- fn = time(NULL) ;
- cerr << "\nTransfer: TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- // cerr << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cerr << "\nTransfer took: " << difftime(fn, st) << " seconds\n";
- cerr << "\nTransfer Finished at: "<< my_ctime(&fn) << endl;
- cerr << "==========================================================\n";
-
- }
-
-}
diff --git a/scripts/training/MGIZA/src/model3.cpp b/scripts/training/MGIZA/src/model3.cpp
deleted file mode 100644
index ec3c701..0000000
--- a/scripts/training/MGIZA/src/model3.cpp
+++ /dev/null
@@ -1,1370 +0,0 @@
-/*
-
- EGYPT Toolkit for Statistical Machine Translation
- Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#include "model3.h"
-#include "collCounts.h"
-#include "Globals.h"
-#include "utility.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-#include "Parameter.h"
-
-#define TRICKY_IBM3_TRAINING
-
-GLOBAL_PARAMETER(int,M4_Dependencies,"depm4","d_{=1}: &1:l, &2:m, &4:F, &8:E, d_{>1}&16:l, &32:m, &64:F, &128:E",PARLEV_MODELS,76)
-;
-GLOBAL_PARAMETER(int,M5_Dependencies,"depm5","d_{=1}: &1:l, &2:m, &4:F, &8:E, d_{>1}&16:l, &32:m, &64:F, &128:E",PARLEV_MODELS,68)
-;
-GLOBAL_PARAMETER4(int,Model3_Dump_Freq,"MODEL 345 DUMP FREQUENCY","MODEL 3 DUMP FREQUENCY","t3","t345","dump frequency of Model 3/4/5",PARLEV_OUTPUT,0)
-;
-
-/*model3::model3(model2& m2) :
- model2(m2),dTable( amodel<PROB>(true)), dCountTable(true),
- nTable( nmodel<PROB>(m2.getNoEnglishWords()+1, MAX_FERTILITY)),
- nCountTable(m2.getNoEnglishWords()+1, MAX_FERTILITY),h(0)
- {}*/
-
-extern int Transfer_Dump_Freq;
-
-model3::model3(model2& m2, amodel<PROB>& d, nmodel<PROB>& n) :
- model2(m2), dTable(d), dCountTable(true), nTable(n),//m2.getNoEnglishWords()+1, MAX_FERTILITY),
- nCountTable(m2.getNoEnglishWords()+1, MAX_FERTILITY), h(0) {
- ewordclasses = fwordclasses = NULL;
-}
-
-model3::model3(model3& m3, amodel<PROB>& d, nmodel<PROB>& n, amodel<COUNT>& a) :
- model2(*(&m3), m3.aTable, a), dTable(d), dCountTable(true), nTable(n),//m2.getNoEnglishWords()+1, MAX_FERTILITY),
- nCountTable(m3.getNoEnglishWords()+1, MAX_FERTILITY), h(0) {
- ewordclasses = fwordclasses = NULL;
-}
-
-void model3::load_tables(const char *nfile, const char *dfile,
- const char *p0file) {
- cout << "Model3: loading n, d, p0 tables \n";
-
- nTable.readNTable(nfile);
- dTable.readTable(dfile);
- ifstream inf(p0file);
- if ( !inf)
- cerr << "Can not open: " << p0file << '\n';
- else {
- cout << "Reading p0 value from " << p0file << "\n";
- inf >> p0;
- inf.close();
- p1 = 1 - p0;
- }
- cout << "p0 is: " << p0 << " p1:" << p1 << '\n';
-}
-
-model3::~model3() {
- dTable.clear();
- dCountTable.clear();
- nTable.clear();
- nCountTable.clear();
- if(h==NULL && ewordclasses!=NULL && fwordclasses!=NULL){
- delete ewordclasses;
- delete fwordclasses;
- }
-}
-
-void model3::em(int noIterations, sentenceHandler& sHandler1) {
-
- LogProb all_prob, aprob, temp;
- WordIndex i, j, l, m;
- time_t it_st, st, it_fn, fn;
- string tfile, dfile, nfile, p0file, afile, number;
-
- st = time(NULL) ;
- cout << "\n" << "Starting Model3: Training";
- // sentenceHandler sHandler1(efFilename.c_str());
- sHandler1.rewind();
- for (int it=1; it <= noIterations; it++) {
- it_st = time(NULL) ;
- cout << "\n" << "Model3: Iteration " << it;
-
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- tfile = Prefix + ".t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- dfile = Prefix + ".d3." + number;
- p0file = Prefix + ".p0_3." + number;
- // tCountTable.clear();
- dCountTable.clear();
- nCountTable.clear();
- p0_count = 0.0;
- p1_count = 0.0;
- all_prob = 0;
- sentPair sent;
- while (sHandler1.getNextSentence(sent)) {
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 1000) == 0)
- cout <<sent.sentenceNo << '\n';
- Vector<WordIndex> A(fs.size(),/*-1*/0);
- Vector<WordIndex> Fert(es.size(),0);
- LogProb lcount=(LogProb)count;
- l = es.size()-1;
- m = fs.size()-1;
- WordIndex x, y;
- all_prob = prob_of_target_given_source(tTable, fs, es);
- if (all_prob == 0)
- cout << "\n" <<"all_prob = 0";
-
- for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A
- y = x;
- for (j = 1; j <= m; j++) {
- A[j] = y % (l+1);
- y /= (l+1);
- }
- for (i = 0; i <= l; i++)
- Fert[i] = 0;
- for (j = 1; j <= m; j++)
- Fert[A[j]]++;
- if (2 * Fert[0] <= m) { /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- aprob = prob_of_target_and_alignment_given_source(A, Fert,
- tTable, fs, es);
- temp = aprob/all_prob;
- LogProb templcount = temp*lcount;
-
- for (j = 1; j <= m; j++) {
- tTable.incCount(es[A[j]], fs[j], templcount);
- if (0 != A[j])
- dCountTable.addValue(j, A[j], l, m, templcount);
- }
- for (i = 0; i <= l; i++) {
- nCountTable.addValue(es[i], Fert[i], templcount);
- //cout << "AFTER INC2: " << templcount << " " << nCountTable.getRef(es[i], Fert[i]) << '\n';
- }
- p1_count += double(temp) * (Fert[0] * count);
- p0_count += double(temp) * ((m - 2 * Fert[0]) * count);
- }
- } /* of looping over all alignments */
- } /* of sentence pair E, F */
- sHandler1.rewind();
-
- // normalize tables
- if (OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), 1);
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- nCountTable.normalize(nTable, &Elist.getVocabList());
-
- // normalize p1 & p0
-
- if (p1_count + p0_count != 0) {
- p1 = p1_count / (p1_count + p0_count );
- p0 = 1 - p1;
- } else {
- p1 = p0 = 0;
- }
- // print tables
- if (OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), OutputInAachenFormat);
- dTable.printTable(dfile.c_str());
- nTable.printNTable(Elist.uniqTokens(), nfile.c_str(),
- Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- it_fn = time(NULL) ;
- cout << "\n" << "Model3 Iteration "<<it<<" took: " << difftime(it_fn,
- it_st) << " seconds\n";
-
- } /* of iterations */
- fn = time(NULL) ;
- cout << "\n" << "Entire Model3 Training took: " << difftime(fn, st)
- << " seconds\n";
-}
-
-//-----------------------------------------------------------------------
-
-/*
- void simpleModel3Test()
- {
- PositionIndex l=6;
- PositionIndex m=8;
- alignment al(l,m);
- al.set(1,1);
- al.set(2,2);
- al.set(3,3);
- al.set(4,2);
- al.set(5,0);
- al.set(6,6);
- al.set(7,3);
- al.set(8,4);
- cout << al;
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for(PositionIndex i=1;i<=l;i++)
- {
- PositionIndex cur_j=al.als_i[i];
- cout << "LOOP: " << i << " " << cur_j << '\n';
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if(cur_j) { // process first word of cept
- k++;
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
- for(unsigned int q=0;q<vac.size();q++)cout << (vac[q]?'1':'0') << ' ';
- cout << '\n';
- cout << i << " " << cur_j << ": d1(" << vacancies(vac,cur_j) << "|" << vacancies(vac,al.get_center(prev_cept)) << "," << vac_all << "+" << -al.fert(i)<< "+" << +k << ")\n" << '\n';
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- while(cur_j) { // process following words of cept
- k++;
- vac_all--;
- vac[cur_j]=1;
- int vprev=vacancies(vac,prev_j);
- cout << "PREV: " << prev_j << '\n';
- for(unsigned int q=0;q<vac.size();q++)cout << (vac[q]?'1':'0') << ' ';
- cout << '\n';
- cout << i << " " << cur_j << ": d>1(" << vacancies(vac,cur_j) << "-" << vprev << "|" << vac_all<< "+" << -al.fert(i)<< "+" << +k << ")\n" << '\n';
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- assert(k==al.fert(i));
- if( k )
- prev_cept=i;
- }
- assert(vac_all==al.fert(0));
- }
- */
-
-extern short DoViterbiTraining;
-
-struct m3_em_loop_t {
- model3 *m;
- int done;
- int valid;
- string alignfile;
- string modelName;
- int it;
- bool dump_files;
- char toModel, fromModel;
- pthread_t thread;
- d4model* d4;
- d5model* d5;
- bool final;
- m3_em_loop_t() :
- m(0), done(0), valid(0),d4(0),d5(0) {
- }
- ;
-};
-
-void* m3_exe_emloop(void *arg) {
- m3_em_loop_t* em =(m3_em_loop_t *) arg;
- em->m->viterbi_thread(em->it, em->alignfile, em->dump_files, *(em->d4),*(em->d5),em->final,em->fromModel,em->toModel,em->modelName);
- em->done = -1;
- return arg;
-}
-
-void model3::viterbi_thread(int it, string alignfile, bool dump_files,d4model& d4m,d5model& d5m,bool final,char fromModel,char toModel,string& modelName) {
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, dump_files, alignfile.c_str(), true, modelName,final
- switch (toModel) {
- case '3':{
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- break;
- default:
- abort();
- }
- break;
- }
- case '4': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d4model>(TRAIN_ARGS,h,&d4m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
- break;
- default:
- abort();
- }
- }
- break;
- case '5': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d5model>(TRAIN_ARGS,h,&d5m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d5model>(TRAIN_ARGS, (void*)0,&d5m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d5model>(TRAIN_ARGS, &d4m,&d5m);
- break;
- case '5':
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
- break;
- default:
- abort();
- }
- }
- break;
- default:
- abort();
- }
-
-}
-extern short NCPUS;
-
-int model3::viterbi(int noIterationsModel3, int noIterationsModel4,
- int noIterationsModel5, int noIterationsModel6, const char* prev_d4,const char* prev_d4_2,bool dumpCount,
- const char* dumpCountName, bool useString) {
- double minErrors=1.0;
- int minIter=0;
- if(ewordclasses==NULL)
- ewordclasses = new WordClasses;
- if(fwordclasses==NULL)
- fwordclasses = new WordClasses;
- d4model d4m(MAX_SENTENCE_LENGTH,*ewordclasses,*fwordclasses);
- if(prev_d4){
- string previous_d4model = prev_d4;
-
- string previous_d4model_1 = prev_d4_2;
- cerr << "We are going to read d4 table from " << previous_d4model << "," << previous_d4model_1 << endl;
- d4m.readProbTable(previous_d4model.c_str(),previous_d4model_1.c_str());
- }
- if(h==NULL)
- d4m.makeWordClasses(Elist, Flist, SourceVocabClassesFilename,
- TargetVocabClassesFilename,Elist,Flist);
-
- d5model d5m(d4m);
- //d5m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- // TargetVocabFilename+".classes");
- time_t it_st, st, it_fn, fn;
- bool dump_files = false;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- st = time(NULL);
- sHandler1.rewind();
- if (testPerp && testHandler)
- (*testHandler).rewind();
- string trainingString;
-
- trainingString+=(prev_d4 ? '4' : (h ? 'H' : '3'));
- for (int i=0; i<noIterationsModel3; ++i)
- trainingString+='3';
- for (int i=0; i<noIterationsModel4; ++i)
- trainingString+='4';
- for (int i=0; i<noIterationsModel5; ++i)
- trainingString+='5';
- for (int i=0; i<noIterationsModel6; ++i)
- trainingString+='6';
- cout << "\n==========================================================\n";
- cout << "Starting "<<trainingString<<": Viterbi Training";
- cout << "\n "<<trainingString<<" Training Started at: "<< my_ctime(&st)
- << '\n';
-
-
- vector<m3_em_loop_t> th;
- th.resize(NCPUS);
-
- int k;
-
- for(k = 1; k< NCPUS; k++){
- th[k].m = this;
- th[k].d4 = &d4m;
- th[k].d5 = &d5m;
- }
-
- for (unsigned int it=1; it < trainingString.length(); it++) {
- bool final=0;
- if (it==trainingString.length()-1)
- final=1;
- string modelName;
- char fromModel=trainingString[it-1], toModel=trainingString[it];
- if (fromModel==toModel)
- modelName=string("Model")+fromModel;
- else
- modelName=string("T")+fromModel+"To"+toModel;
- it_st = time(NULL);
- cout <<"\n---------------------\n"<<modelName<<": Iteration " << it
- <<'\n';
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it
- % Model3_Dump_Freq) == 0))) && !NODUMPS;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A3." + number;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- d4m.clear();
- p0_count = p1_count = 0;
- //dump_files=true;
-
- sHandler1.rewind();
- if (testPerp && testHandler)
- (*testHandler).rewind();
-
- char node[2] ;
- node[1] = '\0';
- for (k=1 ; k< NCPUS ; k++){
- th[k].m = this;
- th[k].done = 0;
- th[k].valid = 0;
- th[k].it = it;
- th[k].final = final;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
- th[k].dump_files = dump_files;
- th[k].fromModel = fromModel;
- th[k].toModel = toModel;
- th[k].modelName = modelName;
- th[k].valid = pthread_create(&(th[k].thread),NULL,m3_exe_emloop,&(th[k]));
- if(th[k].valid){
- cerr << "Error starting thread " << k << endl;
- }
- }
- node[0] = '0';
- alignfile = alignfile + ".part";
- alignfile += node;
-
-#ifdef TRICKY_IBM3_TRAINING
-
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, dump_files, alignfile.c_str(), true, modelName,final
-#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- switch (toModel) {
- case '3':
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TEST_ARGS, h,(void*)0);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
- break;
- default:
- abort();
- }
- break;
- case '4': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d4model>(TRAIN_ARGS,h,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm,
- d4model>(TEST_ARGS, h,&d4m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void, d4model>( TEST_ARGS , (void*)0,&d4m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>( TEST_ARGS, &d4m,&d4m);
- break;
- default:
- abort();
- }
- if(dumpCount && it == trainingString.length()-1){
- string realD4TableName = dumpCountName;
- realD4TableName += ".d4.count";
- string realD4bTableName = realD4TableName+".b";
- if(!d4m.dumpCount(realD4TableName.c_str(),realD4bTableName.c_str()))
- cerr <<"Error writing count file to" << realD4TableName << endl;
- }
- d4m.normalizeTable();
- if (dump_files)
- d4m.printProbTable(d4file.c_str(), d4file2.c_str());
- }
- break;
- case '5': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d5model>(TRAIN_ARGS,h,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm,
- d5model>(TEST_ARGS, h,&d5m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d5model>(TRAIN_ARGS, (void*)0,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void, d5model>( TEST_ARGS , (void*)0,&d5m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d5model>(TRAIN_ARGS, &d4m,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model, d5model>( TEST_ARGS, &d4m,&d5m);
- break;
- case '5':
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>( TEST_ARGS, &d5m,&d5m);
- break;
- default:
- abort();
- }
- if(dumpCount && it == trainingString.length()-1){
- string realD4TableName = dumpCountName;
- realD4TableName += ".d4";
- string realD4bTableName = realD4TableName+".b";
- if(!d5m.d4m.dumpCount(realD4TableName.c_str(),realD4bTableName.c_str()))
- cerr <<"Error writing count file to" << realD4TableName << endl;
- }
- d5m.d4m.normalizeTable();
- if (dump_files)
- d5m.d4m.printProbTable(d4file.c_str(), d4file2.c_str());
- d5m.normalizeTable();
- if (dump_files) {
- ofstream d5output(d5file.c_str());
- d5output << d5m;
- }
- }
- break;
- default:
- abort();
- }
-
-#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- for (k=1;k<NCPUS;k++){
- pthread_join((th[k].thread),NULL);
- cerr << "Thread " << k << "done" << endl;
- }
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- // now normalize count tables
-// dump_files = true;
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), 1);
- perp.record(modelName);
- errorReportAL(cerr, modelName);
- trainViterbiPerp.record(modelName);
-
- if(dumpCount && it == trainingString.length()-1){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- string realDTableName = dumpCountName;
- realDTableName += ".d.count";
- dCountTable.printRealTable(realDTableName.c_str());
- string realNTableName = dumpCountName;
- realNTableName += ".n.count";
- nCountTable.printRealNTable(Elist.uniqTokens(),realNTableName.c_str(),Elist.getVocabList(),useString);
- }
-
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- nCountTable.normalize(nTable, &Elist.getVocabList());
- sHandler1.rewind();
- //testHandler->rewind();
- // cout << "tTable contains " <<
- // tTable.getHash().bucket_count() << " buckets and "<<
- //tTable.getHash().size() << " entries.\n";
-
- // normalize p1 & p0
-
- cout << "p0_count is " << p0_count << " and p1 is " << p1_count << "; ";
- if (P0!=-1.0) {
- p0 = P0;
- p1 = 1-P0;
- } else {
- if (p1_count + p0_count != 0) {
- p1 = p1_count / (p1_count + p0_count );
- p0 = 1 - p1;
- } else {
- p1 = p0 = 0;
- cerr << "ERROR: p0_count+p1_count is zero!!!\n";
- }
- }
-
- cout << "p0 is " << p0 << " p1: " << p1 << '\n';
-
- cout << modelName<<": TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ":("<<it<<" TEST CROSS-ENTROPY " << (*testPerp).cross_entropy() << " PERPLEXITY " << (*testPerp).perplexity() << " sum: " << (*testPerp).getSum()<< " wc: " << (*testPerp).word_count() << '\n';
- cout << modelName << ": ("<<it<<") TRAIN VITERBI CROSS-ENTROPY "
- << trainViterbiPerp.cross_entropy() << " PERPLEXITY "
- << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<")TEST VITERBI CROSS-ENTROPY "
- << (*testViterbiPerp).cross_entropy() << " PERPLEXITY "
- << (*testViterbiPerp).perplexity() << " Sum: " << (*testViterbiPerp).getSum() << " wc: " << (*testViterbiPerp).word_count() << '\n';
- //dump_files = true;
- if (dump_files) {
- if (OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), OutputInAachenFormat);
- aTable.printTable(afile.c_str());
- dTable.printTable(dfile.c_str());
- nTable.printNTable(Elist.uniqTokens(), nfile.c_str(),
- Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- }
- it_fn = time(NULL) ;
- cout << "\n" << modelName << " Viterbi Iteration : "<<it<< " took: "
- << difftime(it_fn, it_st) << " seconds\n";
- } /* of iterations */
- fn = time(NULL);
- cout << trainingString <<" Training Finished at: " << my_ctime(&fn) << "\n";
- cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: "
- << difftime(fn, st) << " seconds\n";
- cout << "==========================================================\n";
- if (noIterationsModel4||noIterationsModel5)
- minIter-=noIterationsModel3;
- if (noIterationsModel5)
- minIter-=noIterationsModel4;
- return minIter;
-}
-
-int model3::viterbi_hto3() {
-
- double minErrors=1.0;
- int minIter=0;
- time_t it_st, st, it_fn, fn;
- bool dump_files = false;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- st = time(NULL);
- cout << "Starting HMM To Model 3 Viterbi Training";
- cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
- string modelName="H23";
- //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
- int it = 1;
- bool final =false;
- ///ump_files = true;
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".AH3_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
-
-#ifdef TRICKY_IBM3_TRAINING
-
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
-#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TEST_ARGS, h,(void*)0);
-
-#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return minIter;
-}
-
-int model3::viterbi_3to3() {
- bool final = false;
- double minErrors=1.0;
- int minIter=0;
- time_t it_st, st, it_fn, fn;
- bool dump_files = false;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- st = time(NULL);
- cout << "Starting HMM To Model 3 Viterbi Training";
- cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
- string modelName="H23";
- int it = 1;
-
- // cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
-
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- dump_files = true;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A3_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
-
-#ifdef TRICKY_IBM3_TRAINING
-
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
-#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
-
-#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return minIter;
-}
-
-d4model* model3::viterbi_3to4() {
- double minErrors=1.0;
- int minIter=0;
- time_t it_st, st, it_fn, fn;
- bool final = false;
- bool dump_files = false;
- if(ewordclasses==NULL)
- ewordclasses = new WordClasses;
- if(fwordclasses==NULL)
- fwordclasses = new WordClasses;
-
- d4model *dm1 = new d4model(MAX_SENTENCE_LENGTH,*ewordclasses,*fwordclasses);
- d4model& d4m = *dm1;
- //d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- // TargetVocabFilename+".classes");
-
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- st = time(NULL);
- cout << "Starting Model 3 To Model 4 Viterbi Training";
- cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
- string modelName="34";
- int it = 1;
- //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
-
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- dump_files = true;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A34_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
-
-#ifdef TRICKY_IBM3_TRAINING
-
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
-#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void, d4model>( TEST_ARGS , (void*)0,&d4m);
-
-#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return dm1;
-}
-
-int model3::viterbi_4to4(d4model& d4m) {
- double minErrors=1.0;
- int minIter=0;
- bool dump_files = false;
-
- //d4model d4m(MAX_SENTENCE_LENGTH);
- //d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- // TargetVocabFilename+".classes");
-
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
-
- cout << "Starting Model4 To Model 4 Viterbi Training";
- int it = 1;
- bool final = false;
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- dump_files = true;
-
- string modelName="H23";
- //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A4_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
-
-#ifdef TRICKY_IBM3_TRAINING
-
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
-#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
-
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
-
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>( TEST_ARGS, &d4m,&d4m);
-
-#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return minIter;
-}
-
-struct model3_align_struct {
- model3 *m;
- int part;
- int iter;
- int valid;
- pthread_t thread;
- int done;
- d4model *d4;
- int result;
- model3_align_struct() :
- m(NULL), part(0), iter(0), valid(0), done(0), d4(NULL) {
- }
-
-};
-
-void* em_thread_h23(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->result = m3->m->viterbi_hto3();
- m3->done = 1;
- return m3;
-}
-
-void* em_thread_323(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->result = m3->m->viterbi_3to3();
- m3->done = 1;
- return m3;
-}
-
-void* em_thread_324(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->d4 = m3->m->viterbi_3to4();
- m3->done = 1;
- return m3;
-}
-
-void* em_thread_424(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->result = m3->m->viterbi_4to4(*(m3->d4));
- m3->done = 1;
- return m3;
-}
-
-void multi_thread_m34_em(model3& m3, int ncpu, int Model3_Iterations,
- int Model4_Iterations) {
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- vector<model3_align_struct> threads;
- bool dump_files = false;
- threads.resize(ncpu);
- time_t it_st, st, it_fn, fn;
- int i, j;
- int H = 0;
- int T4 = Model3_Iterations;
- ncpu=1;
- vector<amodel<COUNT> > counts;
- counts.resize(ncpu);
- m3.part=0;
- for (i=1; i<ncpu; i++) {
- threads[i].m = new model3(m3,m3.dTable,m3.nTable,counts[i]);
- threads[i].m->setHMM(m3.h);
- threads[i].m->part = i;
- }
- d4model *d4m= NULL;
- st = time(NULL);
-
- string trainingString;
- trainingString+=(m3.h ? 'H' : '3');
- for (int i=0; i<Model3_Iterations; ++i)
- trainingString+='3';
- for (int i=0; i<Model4_Iterations; ++i)
- trainingString+='4';
- cout << "\n==========================================================\n";
- cout << "Starting "<<trainingString<<": Viterbi Training";
- cout << "\n "<<trainingString<<" Training Started at: "<< my_ctime(&st)
- << '\n';
-
- for (i=0; i<Model3_Iterations+Model4_Iterations; i++) {
- m3.perp.clear();
- m3.trainViterbiPerp.clear();
- m3.iter = i;
- bool final = (i==Model3_Iterations-1 || i == Model4_Iterations
- +Model3_Iterations-1);
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((i
- % Model3_Dump_Freq) == 0))) && !NODUMPS;
- m3.sHandler1.rewind();
- m3.perp.clear() ; // clears cross_entrop & perplexity
- m3.trainViterbiPerp.clear() ; // clears cross_entrop & perplexity
- string modelName;
- it_st = time(NULL);
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((i
- % Model3_Dump_Freq) == 0))) && !NODUMPS;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = i;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A3." + number;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- if (m3.testPerp && m3.testHandler) {
- m3.testHandler->rewind();
- m3.testPerp->clear();
- m3.testViterbiPerp->clear();
- }
-
- for (j=1; j<ncpu; j++) {
- threads[j].m->p0 = m3.p0;
- threads[j].m->p1 = m3.p1;
- threads[j].m->p0_count = 0;
- threads[j].m->p1_count = 0;
- threads[j].m->nCountTable.clear();
- threads[j].m->dCountTable.clear();
- threads[j].m->aCountTable.clear();
- threads[j].m->iter = i;
- if (threads[j].d4) {
- *(threads[j].d4) = *d4m;
- threads[j].d4->clear();
- }
- }
- if (i==0) { // H23
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_h23, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- modelName = "HTO3";
- m3.viterbi_hto3();
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- } else if (i>0 && i< Model3_Iterations) {
- modelName = "3TO3";
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_323, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- m3.viterbi_3to3();
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- } else if (i==Model3_Iterations) {
- modelName = "3TO4";
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_324, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- d4m = m3.viterbi_3to4();
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- d4m->merge(*threads[j].d4);
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- } else if (i>Model3_Iterations) {
- modelName = "4TO4";
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_424, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- m3.viterbi_4to4(*d4m);
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- d4m->merge(*(threads[j].d4));
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- }
- m3.perp.record(modelName);
- m3.errorReportAL(cerr, modelName);
- m3.trainViterbiPerp.record(modelName);
-
- m3.tTable.normalizeTable(m3.Elist, m3.Flist);
- m3.aCountTable.normalize(m3.aTable);
- m3.aCountTable.clear();
- m3.dCountTable.normalize(m3.dTable);
- m3.dCountTable.clear();
- m3.nCountTable.normalize(m3.nTable, &(m3.Elist.getVocabList()));
- m3.nCountTable.clear();
- cout << "p0_count is " << m3.p0_count << " and p1 is " << m3.p1_count
- << "; ";
- if (P0!=-1.0) {
- m3.p0 = P0;
- m3.p1 = 1-P0;
- } else {
- if (m3.p1_count + m3.p0_count != 0) {
- m3.p1 = m3.p1_count / (m3.p1_count + m3.p0_count );
- m3.p0 = 1 - m3.p1;
- } else {
- m3.p1 = m3.p0 = 0;
- cerr << "ERROR: p0_count+p1_count is zero!!!\n";
- }
- }
- m3.p0_count = m3.p1_count = 0;
- cout << "p0 is " << m3.p0 << " p1: " << m3.p1 << '\n';
- if (d4m) {
- d4m->normalizeTable();
- d4m->clear();
- }
-
- cout << modelName<<": TRAIN CROSS-ENTROPY " << m3.perp.cross_entropy()
- << " PERPLEXITY " << m3.perp.perplexity() << '\n';
- if (m3.testPerp && m3.testHandler)
- cout << modelName << ":("<<i<<" TEST CROSS-ENTROPY "
- << m3.testPerp->cross_entropy() << " PERPLEXITY "
- << m3.testPerp->perplexity() << " sum: "
- << m3.testPerp->getSum()<< " wc: "
- << m3.testPerp->word_count() << '\n';
- cout << modelName << ": ("<<i<<") TRAIN VITERBI CROSS-ENTROPY "
- << m3.trainViterbiPerp.cross_entropy() << " PERPLEXITY "
- << m3.trainViterbiPerp.perplexity() << '\n';
- bool dump_files = true;
- if (dump_files) {
- if (OutputInAachenFormat==0)
- m3.tTable.printProbTable(tfile.c_str(),
- m3.Elist.getVocabList(), m3.Flist.getVocabList(),
- OutputInAachenFormat);
- m3.aTable.printTable(afile.c_str());
- m3.dTable.printTable(dfile.c_str());
- m3.nTable.printNTable(m3.Elist.uniqTokens(), nfile.c_str(),
- m3.Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << m3.p0;
- of.close();
- }
- it_fn = time(NULL);
- cout << "\n" << modelName << " Viterbi Iteration : "<<i<< " took: "
- << difftime(it_fn, it_st) << " seconds\n";
- }
- fn = time(NULL);
- cout << trainingString <<" Training Finished at: " << my_ctime(&fn) << "\n";
- cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: "
- << difftime(fn, st) << " seconds\n";
- cout << "==========================================================\n";
-
-}
-
diff --git a/scripts/training/MGIZA/src/model3.h b/scripts/training/MGIZA/src/model3.h
deleted file mode 100644
index c3e7d28..0000000
--- a/scripts/training/MGIZA/src/model3.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
-
- EGYPT Toolkit for Statistical Machine Translation
- Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#ifndef _model3_h
-#define _model3_h 1
-#include <assert.h>
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-
-#include <time.h>
-#include <fstream>
-#include <math.h>
-#include "MoveSwapMatrix.h"
-#include "TTables.h"
-#include "ATables.h"
-#include "NTables.h"
-#include "getSentence.h"
-#include "defs.h"
-#include "model2.h"
-#include "Perplexity.h"
-#include "transpair_model3.h"
-#include "transpair_modelhmm.h"
-#include "alignment.h"
-#include "vocab.h"
-#include "D4Tables.h"
-#include "D5Tables.h"
-#include "AlignTables.h"
-#include "syncObj.h"
-class model3 : public model2 {
- Mutex plock;
-public:
- amodel<PROB>& dTable;
- amodel<COUNT> dCountTable;
-
- PROB p0, p1;
- SyncDouble p0_count, p1_count;
-
- nmodel<PROB>& nTable;
- nmodel<COUNT> nCountTable;
- hmm*h;
- int part;
- int iter;
-
-private:
- WordClasses* ewordclasses;
- WordClasses* fwordclasses;
-public:
-
- model3(model2& m2);
- void setHMM(hmm*_h) {
- ewordclasses = &(_h->ewordclasses);
- fwordclasses = &(_h->fwordclasses);
- h=_h;
- }
- model3(model2& m2, amodel<PROB>& d, nmodel<PROB>& n);
- model3(model3& m3, amodel<PROB>& d, nmodel<PROB>& n, amodel<COUNT>&);
- ~model3();
- // methods
- void transfer(sentenceHandler&, bool, Perplexity&, Perplexity&,
- bool updateT=1);
- void transferSimple(sentenceHandler&, bool, Perplexity&, Perplexity&,
- bool updateT=1);
- void load_tables(const char *nfile, const char *dfile, const char *p0file);
-
- void em(int, sentenceHandler&);
- int viterbi(int, int, int, int,const char* prev_d4 = NULL,const char* prev_d4_2=NULL,bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false);
- int viterbi_hto3();
- d4model* viterbi_3to4();
- int viterbi_3to3();
- int viterbi_4to4(d4model& d4m);
- void viterbi_thread(int it, string alignfile, bool dump_files,d4model& d4m,d5model& d5m,bool final,char fromModel,char toModel,string& modelName);
-private:
-
-
- LogProb prob_of_special(Vector<WordIndex>&, Vector<WordIndex>&,
- tmodel<COUNT, PROB>&, Vector<WordIndex>&, Vector<WordIndex>&);
-
- LogProb prob_of_target_and_alignment_given_source(Vector<WordIndex>&,
- Vector<WordIndex>&, tmodel<COUNT, PROB>&, Vector<WordIndex>&,
- Vector<WordIndex>&);
- LogProb prob_of_target_given_source(tmodel<COUNT, PROB>&,
- Vector<WordIndex>&, Vector<WordIndex>&);
-
- LogProb scoreOfMove(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&, tmodel<COUNT, PROB>&,
- WordIndex, WordIndex);
-
- LogProb scoreOfSwap(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, tmodel<COUNT, PROB>&, int, int);
-
- void hillClimb(Vector<WordIndex>&, Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, LogProb&, tmodel<COUNT, PROB>&, int, int);
-
- void findBestAlignment(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&, LogProb&, int, int);
-
- void findAlignmentsNeighborhood(Vector<WordIndex>&, Vector<WordIndex>&,
- LogProb&align_total_count, alignmodel&neighborhood, int, int);
- void collectCountsOverAlignement(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs, const Vector<WordIndex>&, LogProb,
- float count);
- LogProb viterbi_model2(const transpair_model3&ef, alignment&output,
- int pair_no, int i_peg = -1, int j_peg = -1) const;
- LogProb _viterbi_model2(const transpair_model2&ef, alignment&output,
- int i_peg = -1, int j_peg = -1) const;
- LogProb viterbi_model2(const transpair_modelhmm&ef, alignment&output,
- int pair_no, int i_peg = -1, int j_peg = -1) const;
-
-private:
- void estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perplexity& perp1,
- bool simple, bool dump_files, bool updateT);
- void viterbi_loop(Perplexity&, Perplexity&, sentenceHandler&, bool,
- const char*, bool, string model);
-
- template<class MODEL_TYPE, class A, class B> void viterbi_loop_with_tricks(
- Perplexity&, Perplexity&, sentenceHandler&, bool, const char*,
- bool, string model, bool final, A*d4m, B*d5m);
-
-};
-
-void multi_thread_m34_em(model3& m3, int ncpu, int Model3_Iterations,
- int Model4_Iterations);
-
-#endif
diff --git a/scripts/training/MGIZA/src/model345-peg.cpp b/scripts/training/MGIZA/src/model345-peg.cpp
deleted file mode 100644
index 8c1bde6..0000000
--- a/scripts/training/MGIZA/src/model345-peg.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model3.h"
-#include "collCounts.h"
-
-bool makeOneMoveSwap(const alignment&x,const alignment&y,set<OneMoveSwap>&soms)
-{
- OneMoveSwap oms;
- oms.type=0;
- int count=0;
- Vector<int> positions(4);
- assert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
- if(x(j)!=y(j))
- {
- if(count==4)
- return 0;
- positions[count]=j;
- count++;
- }
- assert(count>0);
- if(count==1)
- {
- oms.type=1;
- oms.a=positions[0];
- oms.b=y(positions[0]);
- soms.insert(oms);
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&y(j)==y(positions[0]))
- {
- oms.type=3;
- oms.a=j;
- oms.b=x(positions[0]);
- soms.insert(oms);
- }
- }
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&x(j)==x(positions[0]))
- {
- oms.type=2;
- oms.a=positions[0];
- oms.b=j;
- if( oms.b<oms.a)swap(oms.b,oms.a);
- soms.insert(oms);
- }
- }
- return 1;
- }
- else if(count==2)
- {
- if(x(positions[0])==y(positions[1]) && x(positions[1])==y(positions[0]))
- {
- oms.type=4;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&y(j)==y(positions[0]))
- {
- oms.type=2;oms.a=j;oms.b=positions[1];if( oms.b<oms.a)swap(oms.b,oms.a);soms.insert(oms);
- }
- if( int(j)!=positions[1]&&y(j)==y(positions[1]))
- {
- oms.type=2;oms.a=j;oms.b=positions[0];if( oms.b<oms.a)swap(oms.b,oms.a);soms.insert(oms);
- }
- }
- }
- else if(x(positions[0])==y(positions[1]) )
- {
- oms.type=3;
- oms.a=positions[0];
- oms.b=x(positions[1]);
- soms.insert(oms);
- oms.type=2;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- }
- else if( x(positions[1])==y(positions[0]) )
- {
- oms.type=3;
- oms.a=positions[1];
- oms.b=x(positions[0]);
- soms.insert(oms);
- oms.type=2;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- }
- oms.type=3;
- oms.a=positions[0];
- oms.b=x(positions[0]);
- soms.insert(oms);
- oms.a=positions[1];
- oms.b=x(positions[1]);
- soms.insert(oms);
- return 1;
- }
- else if( count==3 )
- { // three differences and three different numbers
- Vector<int> xx(3),yy(3);
- xx[0]=x(positions[0]);xx[1]=x(positions[1]);xx[2]=x(positions[2]);
- yy[0]=y(positions[0]);yy[1]=y(positions[1]);yy[2]=y(positions[2]);
- sort(xx.begin(),xx.end());
- sort(yy.begin(),yy.end());
- if(xx==yy)
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[1];soms.insert(oms);
- oms.type=2;oms.a=positions[0];oms.b=positions[2];soms.insert(oms);
- oms.type=2;oms.a=positions[1];oms.b=positions[2];soms.insert(oms);
- }
- else
- {
- //cout << "HERE.\n";
- if( x(positions[0])==y(positions[1])&&x(positions[1])==y(positions[0]) )
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[1];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[2];oms.b=x(positions[2]);soms.insert(oms);
- }
- if( x(positions[2])==y(positions[1])&&x(positions[1])==y(positions[2]) )
- {
- oms.type=2;oms.a=positions[2];oms.b=positions[1];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[0];oms.b=x(positions[0]);soms.insert(oms);
- }
- if( x(positions[0])==y(positions[2])&&x(positions[2])==y(positions[0]) )
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[2];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[1];oms.b=x(positions[1]);soms.insert(oms);
- }
- }
- return 1;
- }
- else if(count==4)
- {
- Vector<int> xx(4),yy(4);
- for(int i=0;i<4;++i)
- {
- xx[i]=x(positions[i]);
- yy[i]=y(positions[i]);
- }
- sort(xx.begin(),xx.end());
- sort(yy.begin(),yy.end());
- if(xx==yy)
- {
- oms.type=2;
- for(int j1=0;j1<4;j1++)
- for(int j2=j1+1;j2<4;j2++)
- {
- if(x(positions[j1])!=x(positions[j2])&&
- x(positions[j1])==y(positions[j2])&&
- x(positions[j2])==y(positions[j1]))
- {
- oms.type=2;oms.a=positions[j1];oms.b=positions[j2];
- soms.insert(oms);
- }
- }
- }
- return 1;
- }
- else
- return 0;
-}
diff --git a/scripts/training/MGIZA/src/model3_viterbi.cpp b/scripts/training/MGIZA/src/model3_viterbi.cpp
deleted file mode 100644
index 9398116..0000000
--- a/scripts/training/MGIZA/src/model3_viterbi.cpp
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
-
- EGYPT Toolkit for Statistical Machine Translation
- Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA.
-
- */
-#include "model3.h"
-#include "utility.h"
-#include "Globals.h"
-#include "AlignTables.h"
-#ifdef WIN32
-typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment > alignment_hash;
-#else
-typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > alignment_hash;
-
-#endif
-
-LogProb model3::prob_of_target_and_alignment_given_source(Vector<WordIndex>& A,
- Vector<WordIndex>& Fert, tmodel<COUNT, PROB>& tTable,
- Vector<WordIndex>& fs, Vector<WordIndex>& es) {
- LogProb total = 1.0;
- LogProb temp = 0.0;
- const LogProb zero = 0.0;
- WordIndex l = es.size()-1, m = fs.size()-1;
- WordIndex i, j;
-
- total *= pow(double(1-p1), m-2.0 * Fert[0]) * pow(double(p1), double(Fert[0]));
- if (total == 0)
- return (zero);
- for (i = 1; i <= Fert[0]; i++) { // loop caculates m-fert[0] choose fert[0]
- total *= double(m - Fert[0] - i + 1) / i;
- if (total == 0)
- return (zero);
- }
- for (i = 1; i <= l; i++) { // this loop calculates fertilities term
- total *= double(nTable.getValue(es[i], Fert[i])) * (LogProb) factorial(Fert[i]);
- if (total == 0)
- return (zero);
- }
- for (j = 1; j <= m; j++) {
- // temp = tTable.getValue(es[A[j]], fs[j]) ;
- temp = double(tTable.getProb(es[A[j]], fs[j]));
- total *= temp;
- if (0 != A[j])
- total *= double(dTable.getValue(j, A[j], l, m));
- if (total == 0)
- return (zero);
- }
- return (total);
-}
-
-LogProb model3::prob_of_target_given_source(tmodel<COUNT, PROB>& tTable,
- Vector<WordIndex>& fs, Vector<WordIndex>& es) {
-
- WordIndex x, y;
- LogProb total = 0;
- // WordIndex l = es.size(), m = fs.size();
- WordIndex l = es.size()-1, m = fs.size()-1;
- Vector<WordIndex> A(fs.size(),/*-1*/0);
- Vector<WordIndex> Fert(es.size(),0);
- WordIndex i, j;
-
- for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A
- y = x;
- // for (j = 1 ; j < m ; j++){
- for (j = 1; j <= m; j++) {
- A[j] = y % (l+1);
- y /= (l+1);
- }
- // for(i = 0 ; i < l ; i++)
- for (i = 0; i <= l; i++)
- Fert[i] = 0;
- // for (j = 1 ; j < m ; j++)
- for (j = 1; j <= m; j++)
- Fert[A[j]]++;
- // if (2 * Fert[0] < m){
- if (2 * Fert[0] <= m) { /* consider alignments that has Fert[0] less than
- half the length of french sentence */
- total += prob_of_target_and_alignment_given_source(A, Fert, tTable,
- fs, es);
- }
- }
- return (total);
-}
-
-LogProb model3::scoreOfMove(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, Vector<WordIndex>& Fert,
- tmodel<COUNT, PROB>& tTable, WordIndex j, WordIndex i)
-// returns the scaling factor of the original score if A[j] is linked to
-// i, no change is really made to A
-// but the score is calculated if the move is to be taken (i.e.
-// no side effects on Alignment A nor its Fertility Fert
-// If the value of the scaling factor is:
-// 1: then the score of the new alignment if the move is taken will
-// not change.
-// 0.5: the new score is half the score of the original alignment.
-// 2.0: the new score will be twice as much.
-//
-{
- // LogProb score;
- LogProb change;
- WordIndex m, l;
-
- m = fs.size() - 1;
- l = es.size() - 1;
-
- if (A[j] == i)
- // return(original_score);
- return (1);
- else if (A[j] == 0) { // a move from position zero to something else
- change = double(p0*p0)/p1 * (double((Fert[0]*(m-Fert[0]+1))) / ((m-2*Fert[0]+1)*(m-2*Fert[0]
- +2))) * (Fert[i]+1) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m));
- } else if (i == 0) { // a move to position zero
- change= ((double(p1) / (p0*p0)) * (double((m-2*Fert[0])*(m-2*Fert[0]-1))/((Fert[0]+1)*(m-Fert[0]))) * (double(1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]]))* double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * 1.0 / double(dTable.getValue(j, A[j], l, m)));
- } else { // a move that does not involve position zero
- change = ((double(Fert[i]+1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]])) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j]))/ double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m))/ double(dTable.getValue(j, A[j], l, m)));
- }
- return (change);
-}
-
-LogProb model3::scoreOfSwap(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, tmodel<COUNT, PROB>& tTable, int j1, int j2)
-// returns the scaling factor of the original score if the swap to
-// take place,
-// No side effects here (none of the parameters passed is changed!
-// (i.e. the alignment A is not really changed)
-// If the value of the scaling factor is:
-// 1: then the score of the new alignment if the move is taken will
-// not change.
-// 0.5: the new score is half the score of the original alignment.
-// 2.0: the new score will be twice as much.
-//
-{
- LogProb score;
- WordIndex i1, i2, m, l;
-
- m = fs.size() - 1;
- l = es.size() - 1;
- if (j1 == j2 || A[j1] == A[j2]) // if swapping same position return ratio 1
- return (1);
- else {
- i1 = A[j1];
- i2 = A[j2];
- score = double(tTable.getProb(es[i2], fs[j1]))/double(tTable.getProb(es[i1], fs[j1])) * double(tTable.getProb(es[i1], fs[j2]))/double(tTable.getProb(es[i2], fs[j2]));
- if (i1 != 0) {
- score *= double(dTable.getValue(j2, i1, l, m))/double(dTable.getValue(j1, i1, l, m));
- }
- if (i2 != 0) {
- score *= double(dTable.getValue(j1, i2, l, m))/double(dTable.getValue(j2, i2, l, m));
- }
- return (score);
- }
-}
-
-void model3::hillClimb(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, Vector<WordIndex>& Fert, LogProb& best_score,
- tmodel<COUNT, PROB>& tTable, int = -1, int j_peg = -1)
-// Hill climbing given alignment A .
-// Alignment A will be updated and also best_score
-// if no pegging is needed i_peg == -1, and j_peg == -1
-{
- WordIndex i, j, l, m, j1, old_i;
- LogProb change;
- bool local_minima;
- int level = 0;
- LogProb best_change_so_far, best_change;
- Vector<WordIndex> A_so_far;
- Vector<WordIndex> Fert_so_far;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- best_change = 1; // overall scaling factor (i.e. from the begining of climb
- do {
- best_change_so_far = 1; // best scaling factor of this level of hill climb
- local_minima = true;
- for (j = 1; j <= m; j++) {
- if (int(j) != j_peg) { // make sure not to change the pegged link
- for (j1 = j + 1; j1 <= m; j1++) {
- // for all possible swaps
- // make sure you are not swapping at same position
- if ((A[j] != A[j1]) && (int(j1) != j_peg)) {
- // change = scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
- change = scoreOfSwap(es, fs, A, tTable, j, j1);
- if (change > best_change_so_far) { // if better alignment found, keep it
- local_minima = false;
- best_change_so_far = change;
- A_so_far = A;
- Fert_so_far = Fert;
- old_i = A_so_far[j];
- A_so_far[j] = A_so_far[j1];
- A_so_far[j1] = old_i;
- } // end of if (change > best_change_so_far)
- } // end of if (A[j] != A[j1] ..)
- } // of for (j1 = j+1 ....)
- // for (i = 0 ; i < l ; i++){ // all possible moves
- for (i = 0; i <= l; i++) { // all possible moves
- if (i != A[j]) { // make sure not to move to same position
- if (i != 0 || (m >= 2 * (Fert[0]+1))) { // if moving to NULL word
- // (pos 0), make sure not to violate the fertility restriction
- // i.e. NULL can not take more than half the target words
- // change = scoreOfMove(es, fs, A, Fert, best_score, tTable, j, i);
- change = scoreOfMove(es, fs, A, Fert, tTable, j, i);
- if (change > best_change_so_far) { // if better alignment found, keep it
- best_change_so_far = change;
- local_minima = false;
- A_so_far = A;
- Fert_so_far = Fert;
- old_i = A_so_far[j];
- A_so_far[j] = i;
- Fert_so_far[old_i]--;
- Fert_so_far[i]++;
- } // end of if (change > best_change_so_far)
- } // end of if ((i!=0) ...
- } // end of if (i != A[j] )
- } // end of for (i = 0 ; ....)
- } // end of if(j != j_peg)
- } // end of for (j = 1 ; ...)
- level++;
- if (!local_minima) {
- if (best_change_so_far > 1) { // if current chage is improving
- A = A_so_far;
- Fert = Fert_so_far;
- best_change *= best_change_so_far;
- } else {
- local_minima = true;
- }
- } // end of if(!local_minima)
- if (level> 15)
- cerr << ".";
- } while (local_minima == false);
- if (level > 15)
- cerr << "\nHill Climb Level: " << level << " score: scaling old: "
- <<(best_score*best_change);
- best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs,
- es);
- if (level>15)
- cerr << " using new calc: " << best_score << '\n';
-}
-
-void model3::findBestAlignment(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, Vector<WordIndex>& Fert, LogProb& best_score,
- /*tmodel<COUNT, PROB>& tTable,
- amodel<PROB>& aTable, */
- int i_peg = -1, int j_peg = -1)
-// This finds the best Model2 alignment (i.e. no fertilities stuff) in A
-// for the given sentence pair. Its score is returned in A. Its fertility
-// info in Fert.
-// if j_peg == -1 && i_peg == -1 then No pegging is performed.
-{
- WordIndex i, j, l, m, best_i=0;
- LogProb temp, score, ss;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- for (i=0; i <= l; i++)
- Fert[i] = 0;
- ss = 1;
- if ((j_peg != -1) && (i_peg != -1)) { // if you're doing pegging
- A[j_peg] = i_peg;
- Fert[i_peg] = 1;
- ss *= double(tTable.getProb(es[i_peg], fs[j_peg])) * double(aTable.getValue(i_peg, j_peg, l, m));
- }
- for (j = 1; j <= m; j++) {
- if (int(j) != j_peg) {
- score = 0;
- for (i = 0; i <= l; i++) {
- // first make sure that connecting target word at pos j to source word
- // at pos i will not lead to a violation on Fertility restrictions
- // (e.g. maximum fertility for a word, max fertility for NULL word, etc)
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]
- +1))) || (i != 0))) {
- temp = double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m));
- if (temp > score) {
- best_i = i;
- score = temp;
- } // end of if (temp > score)
- } // end of if (((i == 0 ...)
- } // end of for (i= 0 ...)
- if (score == 0) {
- cerr << "WARNING: In searching for model2 best alignment\n ";
- cerr << "Nothing was set for target token " << fs[j]
- << "at position j: " << j << "\n";
- for (i = 0; i <= l; i++) {
- cerr << "i: " << i << "ttable("<<es[i]<<", "<<fs[j]<<") = "
- << tTable.getProb(es[i], fs[j]) << " atable(" << i
- <<", "<<j<<", "<< l<<", "<<m<<") = "
- << aTable.getValue(i, j, l, m) << " product "
- << double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m)) << '\n';
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2
- *(Fert[0]+1))) || (i != 0)))
- cerr <<"Passed fertility condition \n";
- else
- cerr <<"Failed fertility condition \n";
- }
-
- } // end of if (score == 0)
- else {
- Fert[best_i]++;
- A[j] = best_i;
- }
- ss *= score;
- } // end of if (j != j_peg)
- } // end of for (j == 1 ; ...)
- if (ss <= 0) {
- cerr
- << "WARNING: Model2 viterbi alignment has zero score for sentence pair:\n";
- printSentencePair(es, fs, cerr);
- }
- best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs,
- es);
-}
-
-void model3::collectCountsOverAlignement(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs, const Vector<WordIndex>& A, LogProb score,
- float count) {
- WordIndex j, i, l, m;
- Vector<WordIndex> Fert(es.size(),0);
- l = es.size() - 1;
- m = fs.size() - 1;
- score *= LogProb(count);
- COUNT temp = COUNT(score) ;
- for (i=0; i <= l; i++)
- Fert[i] = 0;
- for (j = 1; j <= m; j++) {
- Fert[A[j]]++;
- tTable.incCount(es[A[j]], fs[j], temp);
- // tCountTable.getRef(es[A[j]], fs[j])+=score;
- if (A[j])
- dCountTable.addValue(j, A[j], l, m, temp);
- aCountTable.addValue(A[j], j, l, m, temp);
- }
- for (i = 0; i <= l; i++)
- nCountTable.addValue(es[i], Fert[i], temp);
- // p1_count += score * (LogProb) (Fert[0]) ;
- // p0_count += score * (LogProb) ((m - 2 * Fert[0])) ;
- p1_count += temp * (Fert[0]);
- p0_count += temp * ((m - 2 * Fert[0]));
-}
-
-void model3::findAlignmentsNeighborhood(Vector<WordIndex>& es,
- Vector<WordIndex>& fs, LogProb&align_total_count,
- alignmodel&neighborhood, int i_peg = -1, int j_peg = -1)
-// Finding the Neigborhood of a best viterbi alignment after hill climbing
-// if (i_peg == -1 and j_peg == -1, then No Pegging is done.
-{
- LogProb best_score, score;
- WordIndex i, j, l, m, old_i, j1;
- Vector<WordIndex> A(fs.size(),0);
- Vector<WordIndex> Fert(es.size(),0);
- time_t it_st;
-
- best_score = 0;
- l = es.size() - 1;
- m = fs.size() - 1;
- findBestAlignment(es, fs, A, Fert, best_score, /*tTable, aTable,*/i_peg,
- j_peg);
- if (best_score == 0) {
- cerr
- << "WARNING: viterbi alignment score is zero for the following pair\n";
- printSentencePair(es, fs, cerr);
- }
- hillClimb(es, fs, A, Fert, best_score, tTable, i_peg, j_peg);
- if (best_score <= 0) {
- cerr
- << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, cerr);
- } else { // best_score > 0
- // if (2 * Fert[0] < m ){
- if (2*Fert[0] <= m) {
- /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- if (neighborhood.insert(A, best_score)) {
- align_total_count += best_score;
- }
- } else { // else part is added for debugging / Yaser
- cerr
- << "WARNING:Best Alignment found violates Fertility requiremnets !!\n";
- for (i = 0; i <= l; i++)
- cerr << "Fert["<<i<<"] = "<< Fert[i] << "\n";
- for (j = 1; j <= m; j++) {
- cerr << "A["<<j<<"] = "<< A[j] <<"\n";
- }
- cerr << "Condition violated : 2 * Fert[0] <= m " << 2*Fert[0] <<"?"
- << m << "\n";
- } // end of added code for debugging // Yaser
- it_st = time(NULL) ;
-
- // Now find add all neighbors of the best alignmet to the collection
- for (j = 1; j <= m; j++) {
- for (j1 = j + 1; j1 <= m; j1++) { // all possible swaps
- if (A[j] != A[j1]) {// make sure you are not swapping at same position
- // score = best_score * scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
- score = best_score * scoreOfSwap(es, fs, A, tTable, j, j1);
- // ADD A and its score to list of alig. to collect counts over
- if (2 * Fert[0] <= m && score > 0) {
- /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- old_i = A[j];
- A[j] = A[j1];
- A[j1] = old_i;
- if (neighborhood.insert(A, score)) {
- align_total_count += score;
- }
- // restore original alignment
- old_i = A[j];
- A[j] = A[j1];
- A[j1] = old_i;
- }
- }
- }
- for (i = 0; i <= l; i++) { // all possible moves
- if (i != A[j]) { // make sure not to move to same position
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2
- *(Fert[0]+1))) || (i != 0))) {
- // consider legal alignments only
- score = best_score * scoreOfMove(es, fs, A, Fert,
- tTable, j, i);
- // ADD A and its score to list of alig. to collect counts over
- if (score > 0) {
- old_i = A[j];
- A[j] = i;
- Fert[old_i]--;
- Fert[i]++;
- // add to list of alignemts here ******************
- if (neighborhood.insert(A, score)) {
- align_total_count += score;
- }
- // now resotre alignment and fertilities to previoud values
- A[j] = old_i;
- Fert[old_i]++;
- Fert[i]--;
- } // end of if (score > 0)
- } // end of if (i == 0 ...)
- } // end of if (i != A[j])
- }// end of for(i = 0 ; ...)
- }// end of for (j = 1 ; ...)
- } // of else best_score <= 0
-}
-
-void model3::viterbi_loop(Perplexity& perp, Perplexity& viterbiPerp,
- sentenceHandler& sHandler1, bool dump_files, const char* alignfile,
- bool collect_counts, string model) {
- WordIndex i, j, l, m;
- ofstream of2;
- int pair_no;
- LogProb temp;
-
- if (dump_files)
- of2.open(alignfile);
- pair_no = 0; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear();
- sentPair sent;
- while (sHandler1.getNextSentence(sent)) {
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 1000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++;
- l = es.size() - 1;
- m = fs.size() - 1;
-
- LogProb align_total_count=0;
- // LogProb best_score;
-
- Vector<WordIndex> viterbi_alignment;
- LogProb viterbi_score;
- alignmodel neighborhood;
- neighborhood.clear();
- align_total_count = 0;
- findAlignmentsNeighborhood(
- /*tTable, aTable,*//*p1_count, p0_count,*/es, fs,
- align_total_count, neighborhood) ;
- if (Peg) {
- for (i = 0; i <= l; i++)
- for (j = 1; j <= m; j++) {
- if ( (tTable.getProb(es[i], fs[j]) > PROB_SMOOTH)
- && (aTable.getValue(i, j, l, m) > PROB_SMOOTH)
- && (dTable.getValue(j, i, l, m) > PROB_SMOOTH))
- findAlignmentsNeighborhood(/*tTable, aTable,*//*p1_count,
- p0_count, */es, fs, align_total_count, neighborhood, i,
- j);
- }
- }
- // Now Collect counts over saved neighborhoods
- viterbi_score = 0;
- if (Verbose)
- cerr << "\nCollecting counts over found alignments, total prob: "
- << align_total_count << "\n";
- alignment_hash::iterator align;
- int acount = 0;
- if (align_total_count == 0) {
- cerr << " WARNINIG: For the following sentence pair : \n";
- printSentencePair(es, fs, cerr);
- cerr << "The collection of alignments found have 0 probability!!\n";
- cerr << "No counts will be collected of it \n";
- } else {
- if (collect_counts) {
- for (align = neighborhood.begin(); align != neighborhood.end(); align++) {
- temp = (*align).second/align_total_count;
- collectCountsOverAlignement(/*tTable, aCountTable, */es,
- fs, /*p1_count,
- p0_count ,*/((*align).first), temp, count);
- acount++;
- if (viterbi_score < temp) {
- viterbi_alignment = ((*align).first);
- viterbi_score = temp;
- }
- }
- } // end of if (collect_counts)
- perp.addFactor(log(double(align_total_count)), count, l, m, 0);
- viterbiPerp.addFactor(log(double(viterbi_score)), count, l, m, 0);
-
- if (Verbose) {
- cerr << "Collected counts over "<<acount <<" (of " << pow(
- double(m), double(l+1)) <<") differnet alignments\n";
- cerr << "Bucket count of alignments hash: "
- << neighborhood.getHash().bucket_count()<< ", size "
- << neighborhood.getHash().size() << "\n";
- }
- } // end of else
- // write best alignment (viterbi) for this sentence pair to alignment file
- if (collect_counts) {
- if (viterbi_score <= 0) {
- cerr << "Viterbi Alignment for this pair have score zero!!\n";
- of2 << "\n\n";
- } else {
- if (dump_files)
- printAlignToFile(es, fs, Elist.getVocabList(),
- Flist.getVocabList(), of2, viterbi_alignment,
- pair_no, viterbi_score);
- addAL(viterbi_alignment, sent.sentenceNo, l);
- }
- } // end of if (collect_counts)
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- sHandler1.rewind();
- errorReportAL(cerr, model);
- perp.record(model);
- viterbiPerp.record(model);
- if (dump_files)
- of2.close();
-
-}
diff --git a/scripts/training/MGIZA/src/model3_viterbi_with_tricks.cpp b/scripts/training/MGIZA/src/model3_viterbi_with_tricks.cpp
deleted file mode 100644
index 1034533..0000000
--- a/scripts/training/MGIZA/src/model3_viterbi_with_tricks.cpp
+++ /dev/null
@@ -1,865 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "mystl.h"
-#include "model3.h"
-#include "collCounts.h"
-#include "utility.h"
-#include "Globals.h"
-#include "D5Tables.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-#include "myassert.h"
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(float,PrintN,"nbestalignments","for printing the n best alignments",PARLEV_OUTPUT,0);
-
-const short LogHillClimb=0,LogPeg=0;
-const short UseHMMViterbiAlignmentIfPossible=1;
-short DoViterbiTraining=0;
-
-GLOBAL_PARAMETER(int,VerboseSentence,"VerboseSentence","number of sentence for which a lot of information should be printed (negative: no output)",PARLEV_OUTPUT,-10);
-GLOBAL_PARAMETER(double,PEGGED_CUTOFF,"PEGGED_CUTOFF","relative cutoff probability for alignment-centers in pegging",PARLEV_OPTHEUR,3e-2);
-GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF_AL,"COUNTINCREASE CUTOFF AL","countCutoffAl","Counts increment cutoff threshold for alignments in training of fertility models",PARLEV_OPTHEUR,1e-5);
-
-//int SentNr;
-bool UseLinkCache=1; /// optimization for pegging
-int NumberOfAlignmentsInSophisticatedCountCollection;
-
-extern bool ONLYALDUMPS;
-
-int PrintHillClimbWarning=0;
-int PrintZeroScoreWarning=0;
-
-
-LogProb model3::viterbi_model2(const transpair_modelhmm&ef, alignment&output, int
-#ifdef STORE_HMM_ALIGNMENTS
-pair_no
-#endif
-, int i_peg , int j_peg )const
-{
- static Vector<pair<alignment,LogProb> > viterbis;
- Vector<int>vit;
- int m=ef.get_m();
- int l=ef.get_l();
- double ret=0.0;
- //#define STORE_HMM_ALIGNMENTS
-#ifdef STORE_HMM_ALIGNMENTS
- if( i_peg==-1 && j_peg==-1 && viterbis.size()>pair_no ){
- output=viterbis[pair_no].first;
- ret=viterbis[pair_no].second;
- massert( ret==HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply );
- } else{
- ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
- for(int j=1;j<=m;j++){
- if( vit[j-1]+1>l )
- output.set(j,0);
- else
- output.set(j,vit[j-1]+1);
- massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
- }
- if( i_peg==-1 && j_peg==-1 ){
- iassert(viterbis.size()==pair_no);
- viterbis.push_back(make_pair(output,ret));
- }
- }
-#else
- ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
- for(int j=1;j<=m;j++){
- if( vit[j-1]+1>l )
- output.set(j,0);
- else
- output.set(j,vit[j-1]+1);
- massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
- }
-#endif
- massert( j_peg==-1 || int(output(j_peg))==i_peg );
- if( j_peg!=-1 )
- massert(int(output(j_peg))==i_peg);
- if( output.valid() )
- return ret;
- else{
- return _viterbi_model2(ef,output,i_peg,j_peg);
- }
-}
-
-LogProb model3::_viterbi_model2(const transpair_model2&ef, alignment&output, int i_peg, int j_peg)const{
- WordIndex best_i=0;
- LogProb ss=1;
- PositionIndex l = ef.get_l(), m=ef.get_m();
- Vector<WordIndex> Fert(l+1, (WordIndex)0);
- if ((j_peg != -1) && (i_peg != -1)){
- output.set(j_peg, i_peg);
- ss *= ef.get_t(i_peg, j_peg) * ef.get_a(i_peg, j_peg);
- if( ss==0 )
- cerr << "WARNING: already starting is zero: " << ef.get_t(i_peg, j_peg) << " " << ef.get_a(i_peg, j_peg) << '\n';
- }else
- ss=1;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg){
- LogProb score = 0 ;
- for (PositionIndex i = 0 ; i <= l ; i++){
- if( Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Fert[0] + 1)))){
- LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
- if (temp > score ){
- best_i = i ;
- score = temp ;
- }
- }
- }
- if (score == 0){
- cerr << "WARNING: In searching for model2 best alignment\n";
- cerr << "Nothing was set for target token at position j: " << j << "\n";
- for (PositionIndex i = 0 ; i <= l ; i++){
- cerr << "i: " << i << "ttable("<<i<<", "<<j<<") = " <<
- ef.get_t(i, j) << " atable(" << i<<", "<<j<<", "<<
- l<<", "<<m<<") = "<< ef.get_a(i, j) << " product " <<
- ef.get_t(i, j) * ef.get_a(i, j) ;
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]+1)))
- || (i != 0)))
- cerr <<"Passed fertility condition \n";
- else
- cerr <<"Failed fertility condition \n";
- }
- }else{
- output.set(j, best_i);
- Fert[best_i]++;
- }
- ss *= score;
- }
- if (ss <= 0){
- //cerr << ef;
- cerr << "WARNING: Model2 viterbi alignment has zero score.\n" ;
- cerr << "Here are the different elements that made this alignment probability zero \n";
- cerr << "Source length " << l << " target length " << m << '\n';
- LogProb gg=1 ; // for debugging only .....
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg){
- LogProb score = 0 ;
- LogProb a = 0, t =0 ;
- for (PositionIndex i = 0 ; i <= l ; i++){
- // if( Debug_Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Debug_Fert[0] + 1)))){
- LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
- if (temp > score ){
- score = temp ;
- best_i = i ;
- a = ef.get_a(i, j);
- t = ef.get_t(i, j) ;
- }
- // }
- }
- gg *= score ;
- cerr << "best: fs[" << j << "] "<< j <<" : es[" << best_i << "] " <<
- best_i << " , a: " << ef.get_a(best_i, j) << " t: " << t << " score " << score << " product : " << gg << " ss " <<
- ss << '\n';
- }
- for(PositionIndex i = 0 ; i <= l ; i++)
- cerr << "Fert["<<i<<"] selected " << Fert[i] << '\n';
- }
- massert(output.valid());
- return ss;
-}
-LogProb model3::viterbi_model2(const transpair_model3&ef, alignment&output, int pair_no,int i_peg , int j_peg )const
-{
- if( h&&UseHMMViterbiAlignmentIfPossible ){
- transpair_modelhmm efhmm(ef.E,ef.F,tTable,aTable,dTable,nTable,0.0,0.0,h);
- LogProb ret=viterbi_model2(efhmm,output,pair_no,i_peg,j_peg);
- massert(output.valid());
- return ret;
- }
- return _viterbi_model2(ef,output,i_peg,j_peg);
-}
-
-//int HillClimbingSteps=0;
-
-template<class TRANSPAIR>
-LogProb greedyClimb_WithIBM3Scoring(MoveSwapMatrix<TRANSPAIR>&msc2,int& HillClimbingSteps,int j_peg=-1)
-{
- PositionIndex l = msc2.get_l(), m=msc2.get_m();
- int changed=0;
- int iter=0;
- bool hereVERB=0;
- do
- {
- MoveSwapMatrix<typename TRANSPAIR::simpler_transpair_model> msc_IBM3(msc2.get_ef(),alignment(msc2));
- vector<pair<double,OneMoveSwap> > msvec;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)
- if((aj != msc2(j1)) && (int(j1) != j_peg))
- msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cswap(j,j1),OneMoveSwap(1,j,j1)));
- for (PositionIndex i = 0 ; i <= l ; i++)
- if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
- msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cmove(i,j),OneMoveSwap(2,i,j)));
- }
- sort(msvec.begin(),msvec.end());
- HillClimbingSteps++;
- int iused=-1;
- changed=0;
- for(unsigned int i=0;i<msvec.size()&&changed==0;++i)
- {
- LogProb csts;
- const OneMoveSwap &oms=msvec[i].second;
- if( oms.type==1&&(csts=msc2.cswap(oms.a,oms.b))>1.0001 )
- {
- if( hereVERB==1 )
- cerr << "SWAP: " << csts << '\n';
- msc2.doSwap(oms.a,oms.b);
- changed=1;
- iused=i;
- break;
- }
- if( oms.type==2&&(csts=msc2.cmove(oms.a,oms.b))>1.0001 )
- {
- if( hereVERB==1 )
- cerr << "MOVE: " << csts << '\n';
- msc2.doMove(oms.a,oms.b);
- changed=1;
- iused=i;
- break;
- }
- }
- if( ++iter>30 )
- {
- //msc2.ef.verboseTP=1;
- hereVERB=1;
- cerr << "ERROR: more than 30 iterations in hill-climbing: " << iused
- << " improvement: " << msvec[iused].first << " value:" << msvec[iused].second
- << '\n' << msc2 << '\n';
- for(int a=0;a<20;++a)
- cout << a << ' ' << msvec[a].first << ' ' << msvec[a].second << '\n';
- //cerr << msvec << '\n';
- }
- if( iter>50 )
- break;
- } while(changed);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
-}
-
-template<class TRANSPAIR>
-LogProb greedyClimb(MoveSwapMatrix<TRANSPAIR>&msc2, int& HillClimbingSteps, int j_peg = -1)
-{
- if( msc2.get_ef().greedyHillClimbing()==1 )
- return greedyClimb_WithIBM3Scoring(msc2,HillClimbingSteps,j_peg);
- PositionIndex l = msc2.get_l(), m=msc2.get_m();
- int changed=0;
- do
- {
- HillClimbingSteps++;
- changed=0;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg)&&msc2.cswap(j, j1) > 1.0)
- msc2.doSwap(j, j1), changed=1;
- for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY && msc2.cmove(i, j)>1.0)
- msc2.doMove(i, j), changed=1;
- }
- } while (changed);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
-}
-
-template<class TRANSPAIR>
-LogProb hillClimb_std(MoveSwapMatrix<TRANSPAIR>&msc2, int &HillClimbingSteps,int= -1,int j_peg = -1)
-{
- if( msc2.isLazy() )
- return greedyClimb_WithIBM3Scoring(msc2,HillClimbingSteps,j_peg);
- if( LogHillClimb>1 )
- cout << msc2 << '\n';
- PositionIndex l = msc2.get_l(), m=msc2.get_m();
- int changes=0;
- int best_change_type=-1, best_change_v1=-1, best_change_v2=-1;
- do
- {
- HillClimbingSteps++;
- LogProb best_change_so_far = 1.00001 ;
- best_change_type=0;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg))
- {
- LogProb change = msc2.cswap(j, j1);
- if (change > best_change_so_far)
- {
- best_change_so_far = change ;
- best_change_type=1;
- best_change_v1=j;
- best_change_v2=j1;
- if( LogHillClimb )
- cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
- massert(msc2.get_ef().isSubOptimal()==1);
- }
- }
- for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
- {
- LogProb change = msc2.cmove(i, j);
- if (change > best_change_so_far)
- {
- best_change_so_far = change ;
- best_change_type=2;
- best_change_v1=j;
- best_change_v2=i;
- if( LogHillClimb )
- cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
- massert(msc2.get_ef().isSubOptimal()==1);
- }
- }
- }
- if (best_change_type==1)
- {
- msc2.doSwap(best_change_v1, best_change_v2);
- if( LogHillClimb )
- cerr << "SW-CLIMB-DONE: " << j_peg << msc2 << '\n';
- }
- if (best_change_type==2)
- {
- msc2.doMove(best_change_v2, best_change_v1);
- if( LogHillClimb )
- cerr << "MO-CLIMB-DONE: " << j_peg << msc2 << '\n';
- }
- changes++;
- if( changes>40 )
- {
- if( PrintHillClimbWarning++<1000 )
- cerr << "WARNING: already " << changes << " iterations in hillclimb: " << best_change_so_far << " " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << '\n';
- else if (PrintHillClimbWarning==1000)
- cerr << "ERROR: too many hill climbing warnings => I do not print more.\n";
- }
- if(changes>60 )
- {
- cerr << msc2 << '\n';
- break;
- }
- } while (best_change_type);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
-}
-
-template<class MODEL_TYPE>
-bool extendCenterList(Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >&setOfGoodCenters,MoveSwapMatrix<MODEL_TYPE> *msc,double peggedAlignmentScore)
-{
- unsigned int l=msc->get_ef().get_l();
- set<OneMoveSwap> alreadyCovered;
- for(unsigned int nr=0;nr<setOfGoodCenters.size();nr++)
- makeOneMoveSwap(*setOfGoodCenters[nr].first,*msc,alreadyCovered);
- for(set<OneMoveSwap>::const_iterator i=alreadyCovered.begin();i!=alreadyCovered.end();++i)
- {
- if( i->type==1||i->type==4)
- msc->delCenter();
- if( i->type==1 )
- {
- for(unsigned int ii=0;ii<=l;++ii)
- if( (*msc)(i->a)!=ii )
- msc->delMove(ii,i->a);
- }
- else if( i->type==2||i->type==4 )
- msc->delSwap(i->a,i->b);
- else if( i->type==3 )
- msc->delMove(i->b,i->a);
- else abort();
- }
- setOfGoodCenters.push_back(make_pair(msc,peggedAlignmentScore));
- return 1;
-}
-
-bool OldLog=0;
-short OldLogPeg=0,OldLogHillClimb=0;
-class Als
-{
-public:
- int s,a,b;
- double v;
- Als(int _s,int _a,int _b,double _v)
- : s(_s),a(_a),b(_b),v(_v) {}
-};
-
-inline bool operator<(const Als&x,const Als&y)
-{return x.v>y.v;}
-
-template<class MODEL_TYPE, class ADDITIONAL_MODEL_DATA_IN,class ADDITIONAL_MODEL_DATA_OUT>
-void model3::viterbi_loop_with_tricks(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
- bool dump_files, const char* alignfile,
- bool collect_counts, string model, bool final,
- ADDITIONAL_MODEL_DATA_IN*dm_in,
- ADDITIONAL_MODEL_DATA_OUT*dm_out){
- ofstream *writeNBestErrorsFile=0;
- if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 ) {
- string x=alignfile+string("NBEST");
- writeNBestErrorsFile= new ofstream(x.c_str());
- }
- ofstream *of3=0;
- PositionIndex i, j, l, m ;
- ofstream of2;
- int pair_no;
- int HillClimbingSteps=0;
- NumberOfAlignmentsInSophisticatedCountCollection=0;
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) ){
- of2.open(alignfile);
- if(of2.is_open()){
- cout << "I will write alignment to " << alignfile << endl;
- }
- }
-/* if(!of2.is_open()){
- cerr << "I don't know why you do not let me dump file " << alignfile << endl;
- }*/
- if( dump_files&&PrintN&&final ){
- string x=alignfile+string("NBEST");
- of3= new ofstream(x.c_str());
- }
- pair_no = 0 ; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear() ; // clears cross_entrop & perplexity
- sentPair sent ;
- int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
- while(sHandler1.getNextSentence(sent)){
- if( sent.eSent.size()==1||sent.fSent.size()==1 )
- continue;
-// SentNr=sent.sentenceNo;
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 10000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++ ;
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
-
- LogProb align_total_count=0;
- alignment viterbi2alignment(l,m);
- MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
- viterbi_model2(ef,viterbi2alignment,pair_no-1);
- Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
- set<alignment> alignments;
- MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
- MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
- if( ef.isSubOptimal() )
- setOfGoodCenters[0].second = hillClimb_std(*best,HillClimbingSteps);
- else{
- setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
- if( setOfGoodCenters[0].second==0 ){
- cerr << "PROBLEM: alignment is 0.\n";
- best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
- }
- }
- int bestAlignment=0;
-
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- alignments.insert(*best);
- if (setOfGoodCenters[bestAlignment].second <= 0){
- if( PrintZeroScoreWarning++<100 ){
- cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
- printSentencePair(es, fs, cerr);
- }
- else if(PrintZeroScoreWarning==100) {
- cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
- }
- setOfGoodCenters[bestAlignment].second=1e-300;
- continue;
- }
- int nHillClimbed=1,nAlignment=1;
- bool flagBetterByPegging=0;
- if ( Peg ){
- const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
- Array2<short, vector<short> > linkCache(l+1, m+1, false);
- if(UseLinkCache)for(unsigned int j=1;j<=m;j++)linkCache((*useMatrix)(j), j)=1;
- for(PositionIndex j=1;j<=m;j++)for(PositionIndex i=0;i<=l;i++){
- nAlignment++;
- if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
- ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
- (i != 0 || (m >= 2 * (useMatrix->fert(0)+1)))){
- MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
- LogProb peggedAlignmentScore;
- nHillClimbed++;
- if( ef.isSubOptimal() ){
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
- BESTPEGGED->doMove(i, j);
- peggedAlignmentScore= hillClimb_std(*BESTPEGGED,HillClimbingSteps, i,j);
- }else{
- alignment pegAlignment(l,m);
- peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
- massert( pegAlignment(j)==i );
- }
- if(UseLinkCache)
- for(unsigned int j=1;j<=m;j++)
- linkCache((*BESTPEGGED)(j), j)=1;
- if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 ){
- if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore)){
- alignments.insert(*BESTPEGGED);
- if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second ){
- if( LogPeg ){
- cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
- cerr << "NEW BEST: " << alignment(*BESTPEGGED);
- cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
- }
- flagBetterByPegging=1;
- bestAlignment=alignments.size()-1;
- }
- }
- assert( differences(*BESTPEGGED, *best)!=0 );
- BESTPEGGED=0;
- }else
- delete BESTPEGGED;
- }
- }
- } // end of if(Peg)
- NBetterByPegging+=flagBetterByPegging;
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( LogPeg>1 )
- cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
- int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
- dCountTable, nCountTable, p1_count, p0_count,
- align_total_count, count, collect_counts, dm_out);
- if( LogPeg>1 ){
- cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
- massert(alTotal<=pow(double(l+1),double(m)));
- }
- NCenter+=setOfGoodCenters.size();NHillClimbed+=nHillClimbed;NAlignment+=nAlignment;NTotal+=alTotal;
- perp.addFactor(log(double(align_total_count)), count, l, m,0);
- viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
- massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
- if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), sent.sentenceNo,
- setOfGoodCenters[bestAlignment].second);
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) ){
- vector<Als> als;
- for(unsigned int s=0;s<setOfGoodCenters.size();++s){
- const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
- msc.check();
- double normalized_ascore=setOfGoodCenters[s].second;
- if( !msc.isCenterDeleted() )
- als.push_back( Als(s,0,0,normalized_ascore) );
-
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex i=0;i<=l;i++)
- if( i!=msc(j)&& !msc.isDelMove(i,j) )
- als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
- for(PositionIndex j1=1;j1<=m;j1++)
- for(PositionIndex j2=j1+1;j2<=m;j2++)
- if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
- als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
- }
- sort(als.begin(),als.end());
- double sum=0,sum2=0;
- for(unsigned int i=0;i<als.size();++i)
- sum+=als[i].v;
- for(unsigned int i=0;i<min((unsigned int)als.size(),(unsigned int)PrintN);++i){
- alignment x=*setOfGoodCenters[als[i].s].first;
- if( !(als[i].a==0 && als[i].b==0) ){
- if( als[i].a<=0&&als[i].b<=0 )
- x.doSwap(-als[i].a,-als[i].b);
- else
- x.doMove(als[i].a,als[i].b);
- }
- if( of3&&i<PrintN )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), sent.sentenceNo,
- als[i].v/sum*count);
- sum2+=als[i].v;
- if( writeNBestErrorsFile ){
- if( pair_no<int(ReferenceAlignment.size()) ){
- int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
- vector<double> scores;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- ef.computeScores(x,scores);
- *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
- for(unsigned int i=0;i<scores.size();++i)
- *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
- *writeNBestErrorsFile << '\n';
- }
- }
- }
- if( writeNBestErrorsFile )
- *writeNBestErrorsFile << '\n';
- }
- addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
- for(unsigned int i=0;i<setOfGoodCenters.size();i++)
- delete setOfGoodCenters[i].first;
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- //sHandler1.rewind();
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.close();
- delete of3;
- delete writeNBestErrorsFile;
- double FSent=pair_no;
- cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
- cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
-}
-
-/*Perform only one step of viterbi alignment*/
-#if 0
-template<class MODEL_TYPE, class ADDITIONAL_MODEL_DATA_IN,class ADDITIONAL_MODEL_DATA_OUT>
-void model3::viterbi_loop_with_tricks_1(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
- bool dump_files, const char* alignfile,
- bool collect_counts, string model, bool final,
- ADDITIONAL_MODEL_DATA_IN*dm_in,
- ADDITIONAL_MODEL_DATA_OUT*dm_out){
- ofstream *writeNBestErrorsFile=0;
- if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 ) {
- string x=alignfile+string("NBEST");
- writeNBestErrorsFile= new ofstream(x.c_str());
- }
- ofstream *of3=0;
- ofstream of2;
- int pair_no;
- HillClimbingSteps=0;
- NumberOfAlignmentsInSophisticatedCountCollection=0;
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.open(alignfile);
- if( dump_files&&PrintN&&final ){
- string x=alignfile+string("NBEST");
- of3= new ofstream(x.c_str());
- }
- pair_no = 0 ; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear() ; // clears cross_entrop & perplexity
- sentPair sent ;
- int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
- while(sHandler1.getNextSentence(sent)){
- if( sent.eSent.size()==1||sent.fSent.size()==1 )
- continue;
- SentNr=sent.sentenceNo;
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 10000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++ ;
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
- if (Log){
- logmsg << "Processing sentence pair:\n\t";
- printSentencePair(es, fs, logmsg);
- for (i = 0 ; i <= l ; i++)
- logmsg << Elist.getVocabList()[es[i]].word << " ";
- logmsg << "\n\t";
- for (j = 1 ; j <= m ; j++)
- logmsg << Flist.getVocabList()[fs[j]].word << " ";
- logmsg << "\n";
- }
-
- LogProb align_total_count=0;
- alignment viterbi2alignment(l,m);
- MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
- viterbi_model2(ef,viterbi2alignment,pair_no-1);
- Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
- set<alignment> alignments;
- MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
- MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
- if (Log)
- logmsg << "VITERBI: " << alignment(_viterbi);
- if( ef.isSubOptimal() )
- setOfGoodCenters[0].second = hillClimb_std(*best);
- else{
- setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
- if( setOfGoodCenters[0].second==0 ){
- cerr << "PROBLEM: alignment is 0.\n";
- best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
- }
- }
- int bestAlignment=0;
-
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- alignments.insert(*best);
- if (setOfGoodCenters[bestAlignment].second <= 0){
- if( PrintZeroScoreWarning++<100 ){
- cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
- printSentencePair(es, fs, cerr);
- if(Log){
- logmsg << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, logmsg);
- }
- }
- else if(PrintZeroScoreWarning==100) {
- cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
- }
- setOfGoodCenters[bestAlignment].second=1e-300;
- continue;
- }
- int nHillClimbed=1,nAlignment=1;
- bool flagBetterByPegging=0;
- if ( Peg ){
- const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
- Array2<short, vector<short> > linkCache(l+1, m+1, false);
- if(UseLinkCache)for(unsigned int j=1;j<=m;j++)linkCache((*useMatrix)(j), j)=1;
- for(PositionIndex j=1;j<=m;j++)for(PositionIndex i=0;i<=l;i++){
- nAlignment++;
- if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
- ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
- (i != 0 || (m >= 2 * (useMatrix->fert(0)+1)))){
- MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
- LogProb peggedAlignmentScore;
- nHillClimbed++;
- if( ef.isSubOptimal() ){
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
- BESTPEGGED->doMove(i, j);
- peggedAlignmentScore= hillClimb_std(*BESTPEGGED, i,j);
- }else{
- alignment pegAlignment(l,m);
- peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
- massert( pegAlignment(j)==i );
- }
- if(UseLinkCache)
- for(unsigned int j=1;j<=m;j++)
- linkCache((*BESTPEGGED)(j), j)=1;
- if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 ){
- if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore)){
- alignments.insert(*BESTPEGGED);
- if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second ){
- if( LogPeg ){
- cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
- cerr << "NEW BEST: " << alignment(*BESTPEGGED);
- cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
- }
- flagBetterByPegging=1;
- bestAlignment=alignments.size()-1;
- }
- }
- assert( differences(*BESTPEGGED, *best)!=0 );
- BESTPEGGED=0;
- }else
- delete BESTPEGGED;
- }
- }
- } // end of if(Peg)
- NBetterByPegging+=flagBetterByPegging;
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( LogPeg>1 )
- cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
- int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
- dCountTable, nCountTable, p1_count, p0_count,
- align_total_count, count, collect_counts, dm_out);
- if( LogPeg>1 ){
- cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
- massert(alTotal<=pow(double(l+1),double(m)));
- }
- NCenter+=setOfGoodCenters.size();NHillClimbed+=nHillClimbed;NAlignment+=nAlignment;NTotal+=alTotal;
- perp.addFactor(log(double(align_total_count)), count, l, m,0);
- viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
- massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
- if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), sent.sentenceNo,
- setOfGoodCenters[bestAlignment].second);
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) ){
- vector<Als> als;
- for(unsigned int s=0;s<setOfGoodCenters.size();++s){
- const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
- msc.check();
- double normalized_ascore=setOfGoodCenters[s].second;
- if( !msc.isCenterDeleted() )
- als.push_back( Als(s,0,0,normalized_ascore) );
-
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex i=0;i<=l;i++)
- if( i!=msc(j)&& !msc.isDelMove(i,j) )
- als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
- for(PositionIndex j1=1;j1<=m;j1++)
- for(PositionIndex j2=j1+1;j2<=m;j2++)
- if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
- als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
- }
- sort(als.begin(),als.end());
- double sum=0,sum2=0;
- for(unsigned int i=0;i<als.size();++i)
- sum+=als[i].v;
- for(unsigned int i=0;i<min((unsigned int)als.size(),(unsigned int)PrintN);++i){
- alignment x=*setOfGoodCenters[als[i].s].first;
- if( !(als[i].a==0 && als[i].b==0) ){
- if( als[i].a<=0&&als[i].b<=0 )
- x.doSwap(-als[i].a,-als[i].b);
- else
- x.doMove(als[i].a,als[i].b);
- }
- if( of3&&i<PrintN )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), sent.sentenceNo,
- als[i].v/sum*count);
- sum2+=als[i].v;
- if( writeNBestErrorsFile ){
- if( pair_no<int(ReferenceAlignment.size()) ){
- int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
- vector<double> scores;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- ef.computeScores(x,scores);
- *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
- for(unsigned int i=0;i<scores.size();++i)
- *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
- *writeNBestErrorsFile << '\n';
- }
- }
- }
- if( writeNBestErrorsFile )
- *writeNBestErrorsFile << '\n';
- }
- addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
- for(unsigned int i=0;i<setOfGoodCenters.size();i++)
- delete setOfGoodCenters[i].first;
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- //sHandler1.rewind();
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.close();
- delete of3;
- delete writeNBestErrorsFile;
- double FSent=pair_no;
- cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
- cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
-}
-#endif
-
-
-
-#include "collCounts.cpp"
-#define INSTANTIATE(A,B,C) template \
-void model3::viterbi_loop_with_tricks<A,B,C>(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1, \
- bool dump_files, const char* alignfile,bool collect_counts, string, bool final,\
- B*d4m,C*d5m);
-
-INSTANTIATE(transpair_model3, void, void);
-INSTANTIATE(transpair_modelhmm, const hmm, void);
-INSTANTIATE(transpair_modelhmm, const hmm, d4model);
-INSTANTIATE(transpair_modelhmm, const hmm, d5model);
-INSTANTIATE(transpair_model3, void,d4model);
-INSTANTIATE(transpair_model3, void,d5model);
-INSTANTIATE(transpair_model4, d4model,d4model);
-INSTANTIATE(transpair_model4, d4model,d5model);
-INSTANTIATE(transpair_model5, d5model,d5model);
diff --git a/scripts/training/MGIZA/src/myassert.cpp b/scripts/training/MGIZA/src/myassert.cpp
deleted file mode 100644
index 2d49be8..0000000
--- a/scripts/training/MGIZA/src/myassert.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "mystl.h"
-#include <iostream>
-#include "myassert.h"
-
-#ifndef STANDARD_ASSERT
-void myerror(int line,const char *file,const char *expression)
-{
- cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
- cout << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
-}
-void imyerror(int line,const char *file,const char *expression)
-{
- cerr << "Error: '" << expression << "' ::: in Source " << file
- << ":" << line << endl;
-}
-
-#endif
-
diff --git a/scripts/training/MGIZA/src/myassert.h b/scripts/training/MGIZA/src/myassert.h
deleted file mode 100644
index b648fdd..0000000
--- a/scripts/training/MGIZA/src/myassert.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef MY_ASSERT_DEFINED
-#define MY_ASSERT_DEFINED
-void myerror(int line,const char *file,const char *expression);
-void imyerror(int line,const char *file,const char *expression);
-
-#define iassert(expression) do {if (!(expression)) {imyerror(__LINE__,__FILE__,#expression);}} while (0)
-
-#
-#define massert(expr) do {} while(0)
-
-#define vassert(expr) do {} while(0)
-
-#include <cassert>
-
-#endif
-
-
-
-
-
diff --git a/scripts/training/MGIZA/src/mymath.h b/scripts/training/MGIZA/src/mymath.h
deleted file mode 100644
index f8ad926..0000000
--- a/scripts/training/MGIZA/src/mymath.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* ---------------------------------------------------------------- */
-/* Copyright 1998 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
-/* Franz Josef Och */
-/* ---------------------------------------------------------------- */
-#ifndef HEADER_MYMATH_DEFINED
-#define HEADER_MYMATH_DEFINED
-inline double mfabs(double x){return (x<0)?(-x):x;}
-#include <math.h>
-#endif
diff --git a/scripts/training/MGIZA/src/mystl.h b/scripts/training/MGIZA/src/mystl.h
deleted file mode 100644
index 29fa778..0000000
--- a/scripts/training/MGIZA/src/mystl.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/* ---------------------------------------------------------------- */
-/* Copyright 1998 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
-/* Franz Josef Och */
-/* ---------------------------------------------------------------- */
-#ifndef MY_STL_H_DEFINED
-#define MY_STL_H_DEFINED
-
-#include <string>
-using namespace std;
-#ifdef USE_STLPORT
-#ifdef __STL_DEBUG
-using namespace _STLD;
-#else
-using namespace _STL;
-#endif
-#endif
-
-#include "myassert.h"
-#include <string>
-#include <utility>
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include <iostream>
-#include "mymath.h"
-#include "Array2.h"
-
-#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
-#define over_array(a,i) for(i=(a).low();i<=(a).high();i++)
-#define backwards_array(a,i) for(i=(a).high();i>=(a).low();i--)
-#define over_arr(a,i) for(int i=(a).low();i<=(a).high();i++)
-#define over_arrMAX(a,i,max) for(int i=(a).low();i<=min((a).high(),max-1);i++)
-#define backwards_arr(a,i) for(int i=(a).high();i>=(a).low();i--)
-
-extern double n1mult,n2mult,n3mult;
-
-inline double realProb(int n1,int n2)
-{
- massert(n1<=n2);
- iassert(n1>=0&&n2>0);
- if(n2==0)n2=1;
- return ((double)n1)/(double)n2;
-}
-
-inline double verfProb(int n1,int n2)
-{
- double prob = realProb(n1,n2);
- if( n1==1 )return prob*n1mult;
- else if( n1==2 )return prob*n2mult;
- else if( n1==3 )return prob*n3mult;
- else
- return prob;
-}
-
-inline bool prefix(const string&x,const string&y)
-{
- if(y.size()>x.size() )
- return 0;
- for(unsigned int i=0;i<y.size();++i)
- if( y[i]!=x[i] )
- return 0;
- return 1;
-}
-
-
-/*template<class T>
-int lev(const T&s1,const T&s2)
-{
- Array2<int,vector<int> > a(s1.size()+1,s2.size()+1,1000);
- Array2<pair<int,int>,vector<pair<int,int> > > back(s1.size()+1,s2.size()+1,pair<int,int>(0,0));
- for(unsigned int i=0;i<=s1.size();i++)
- for(unsigned int j=0;j<=s2.size();j++)
- {
- if( i==0&&j==0 )
- a(i,j)=0;
- else
- {
- int aDEL=100,aINS=100,aSUB=100;
- if(i>0)
- aDEL=a(i-1,j)+1;
- if(j>0)
- aINS=a(i,j-1)+1;
- if(i>0&&j>0)
- aSUB=a(i-1,j-1)+ !(s1[i-1]==s2[j-1]);
- if( aSUB<=aDEL && aSUB<=aINS )
- {
- a(i,j)=aSUB;
- back(i,j)=pair<int,int>(i-1,j-1);
- }
- else if( aDEL<=aSUB && aDEL<=aINS )
- {
- a(i,j)=aDEL;
- back(i,j)=pair<int,int>(i-1,j);
- }
- else
- {
- a(i,j)=aINS;
- back(i,j)=pair<int,int>(i,j-1);
- }
- }
- }
- return a(s1.size(),s2.size());
-}
-
-template<class T>
-float rel_lev(const T&s1,const T&s2)
-{
- if( s1.size()==0 )
- return s2.size()==0;
- else
- return min(1.0,lev(s1,s2)/(double)s1.size());
-}*/
-
-template<class V> int Hash(const pair<V,V>&a)
-{ return Hash(a.first)+13001*Hash(a.second); }
-
-template<class T1,class T2>
-ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
- out << "(" << ir.first << "," << ir.second << ")";
- return out;
-}
-
-inline int Hash(const string& s)
-{
- int sum=0;
- string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
- return sum;
-}
-template<class A,class B,class C>
-class tri
-{
-public:
- A a;
- B b;
- C c;
- tri(){};
- tri(const A&_a,const B&_b,const C&_c)
- : a(_a),b(_b),c(_c) {}
-};
-template<class A,class B,class C>
-bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
-
-template<class A,class B,class C>
-bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{
- if(x.a<y.a)return 1;
- if(y.a<x.a)return 0;
- if(x.b<y.b)return 1;
- if(y.b<x.b)return 0;
- if(x.c<y.c)return 1;
- if(y.c<x.c)return 0;
- return 0;
-}
-
-double used_time();
-
-
-template<class T ,class _Pr = less<T> >
-class my_hash
-{
-public:
- int operator()(const T&t)const {return Hash(t);}
-#ifdef WIN32
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
- my_hash()
- : comp()
- { // construct with default comparator
- }
-
- my_hash(_Pr _Pred)
- : comp(_Pred)
- { // construct with _Pred comparator
- }
-protected:
- _Pr comp;
-public:
- int operator()(const T&t , const T&t1)const {return comp(t,t1);}
-#endif
-};
-
-inline int Hash(int value) { return value; }
-#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
-
-template<class A,class B>
-class leda_h_array : public MY_HASH_BASE
-{
-private:
- B init;
-public:
- leda_h_array() : MY_HASH_BASE() {}
- leda_h_array(const B&_init)
- : MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=find(a);
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(MY_HASH_BASE::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
- const B&initValue()const
- {return init;}
-};
-
-#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-
-template<class T,class U>
-istream & operator>>(istream&in,leda_h_array<T,U>&)
-{
- return in;
-}
-
-template<class A,class B>
-bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
-{
- A v;
- forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-
-template<class T>
-int count_elements(T a,T b)
-{
- int c=0;
- while(a!=b)
- {
- a++;
- c++;
- }
- return c;
-}
-
-template<class T>
-T normalize_if_possible_with_increment(T*a,T*b,int increment)
-{
- T sum=0;
- for(T*i=a;i!=b;i+=increment)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;i+=increment)
- *i/=sum;
- else
- {
- T factor=increment/(b-a);
- for(T*i=a;i!=b;i+=increment)
- *i=factor;
- }
- return sum;
-}
-
-template<class T>
-inline int m_comp_3way(T a,T b,int n)
-{
- int _n=0;
- while((_n++<n) && a && b)
- {
- const typename T::value_type &aa=*a;
- const typename T::value_type &bb=*b;
- if( aa<bb )return 1;
- if( bb<aa )return -1;
- ++a;
- ++b;
- }
- return 0;
-}
-
-template<class T>
-void smooth_standard(T*a,T*b,double p)
-{
- int n=b-a;
- if( n==0 )
- return;
- double pp=p/n;
- for(T*i=a;i!=b;++i)
- *i = (1.0-p)*(*i)+pp;
-}
-
-template<class T>
-const T *conv(typename vector<T>::const_iterator i)
-{
- return &(*i);
-}
-#if __GNUC__>2
-template<class T>
-T *conv(typename vector<T>::iterator i)
-{
- return &(*i);
-}
-#endif
-
-/*template<class T>
-const T *conv(const T*x)
-{
- return x;
-}*/
-template<class T>
-T *conv(T*x)
-{
- return x;
-}
-
-#endif
diff --git a/scripts/training/MGIZA/src/parse.cpp b/scripts/training/MGIZA/src/parse.cpp
deleted file mode 100644
index c8eb570..0000000
--- a/scripts/training/MGIZA/src/parse.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-/* FJO 01/2001: completely reorganized parameter processing */
-
-#include <strstream>
-#include <string>
-#include <fstream>
-#include "defs.h"
-#include "utility.h"
-#include "Globals.h"
-#include "D4Tables.h"
-#include "D5Tables.h"
-#include "ATables.h"
-#include "Parameter.h"
-
-extern bool ONLYALDUMPS;
-
-void parseConfigFile (char * fname )
- // This functions reads in the configuration file to set up some run-time
- // parameters. The parameters are global variables that are defined in
- // main.cc and used all over the place in the program
- // The format of the configuration file can be explained in the following way
- // FORMAT:
- // the character '\n' separates lines ..
- // lines that start with "//" (skipping over white spaces are considered
- // as comments and will be ignored.
- // Any other line is considered as an attribute setting instruction and it
- // is divided into haves (separated by a colon ":"). The first half is the
- // attribute value which consists of the concatenation of all non-white space
- // tokens before the colon. These tokens will have spaces eseparating them.
- // The attribute vlue is the first token after the colon (any thing after
- // it will be ignored ;
- // For example :
- // if the configuration file has the following entry:
- //
- // NO. ITERATIONS MODEL 2 : 10
- //
- // then the attribute is "NO. ITERATIONS MODEL 2" , and the attribute value
- // is "10" (these do not include the quotation marks).
-
-{
-
- string line, word, attrib, attribval ;
- ifstream Config_File(fname);
- if(!Config_File){
- cerr << "ERROR: Cannot open configuration file " << fname << "!\n" ;
- exit(1);
- }
-
- cout << "The following options are from the config file and will be overwritten by any command line options.\n";
-
- while(getline(Config_File, line)){
-
- istrstream buffer(line.c_str());
- word = attrib = attribval = "" ;
- buffer >> word ;
- if (word != "//"){ // if line does not start with "//" (i.e. not a comment)
- attrib = word ;
- while((buffer >> word) && (word != ":")){
- attrib += " " + word ;
- }
- if(!(buffer >> attribval))
- {
- istrstream buffer2(line.c_str());
- buffer2>>attrib;
- buffer2>>attribval;
- }
-
- // This# is where (1) the configuration file is defined and
- // (2) parsing of its attributes occurs.
-
- if(attrib == "t FILE"){
- t_Filename = attribval;
- cout << "\tt file: " << t_Filename << '\n';
- }
- else if(attrib == "a FILE"){
- a_Filename = attribval;
- cout << "\ta file: " << a_Filename << '\n';
- }
- else if(attrib == "d FILE"){
- d_Filename = attribval;
- cout << "\td file: " << d_Filename << '\n';
- }
- else if(attrib == "n FILE"){
- n_Filename = attribval;
- cout << "\tn file: " << n_Filename << '\n';
- }
- else if(attrib == "p0 FILE"){
- p0_Filename = attribval;
- cout << "\tp0 file: " << p0_Filename << '\n';
- }
- else if ( line == ""){}
- else if( !makeSetCommand(attrib,attribval,getGlobalParSet(),2) )
- cerr << "ERROR: Unrecognized attribute :" << attrib << '\n';
- }
- }
-}
-
-
-void parseArguments(int argc, char *argv[])
-{
- int arg = 1;
-
- if(!strcmp(argv[1], "--h") || !strcmp(argv[1], "--help")){
- printHelp();
- exit(0);
- }
- if( argv[1][0]=='-' )
- arg=0;
- else
- parseConfigFile(argv[1]);
- while(++arg<argc){
- if( strlen(argv[arg])>2 && argv[arg][0]=='-' && argv[arg][1]=='-' ) {
- if( !makeSetCommand(argv[arg]+1,"1",getGlobalParSet(),2))
- cerr << "WARNING: ignoring unrecognized option: "<< argv[arg] << '\n' ;
- }
- else if( arg+1<argc && !makeSetCommand(argv[arg],argv[arg+1],getGlobalParSet(),2))
- cerr << "WARNING: ignoring unrecognized option: "<< argv[arg] << '\n' ;
- else
- {
- arg++;
- }
- }
- if( OPath.length() )
- OPath+="/";
- Prefix = (OPath + Prefix);
- LogFilename = (OPath + LogFilename);
- printGIZAPars(cout);
-}
-
diff --git a/scripts/training/MGIZA/src/plain2snt.cpp b/scripts/training/MGIZA/src/plain2snt.cpp
deleted file mode 100644
index 2aae066..0000000
--- a/scripts/training/MGIZA/src/plain2snt.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <iostream>
-#include <string>
-#include <strstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <stdlib.h>
-#include <stdio.h>
-
-using namespace std;
-
-int main(int argc,char**argv)
-{
- string snt1(""), snt2(""), vcb1(""), vcb2("");
- vector<double>weights;
- vector<string>filenames;
- for(int i=1;i<argc;++i)
- if(string(argv[i])=="-weight")
- weights.push_back(atof(argv[++i]));
- else if(string(argv[i])=="-snt1")
- snt1=argv[++i];
- else if(string(argv[i])=="-snt2")
- snt2=argv[++i];
- else if(string(argv[i])=="-vcb1")
- vcb1=argv[++i];
- else if(string(argv[i])=="-vcb2")
- vcb2=argv[++i];
- else
- filenames.push_back(argv[i]);
-
- if((filenames.size()%2)==1||filenames.size()==0 )
- {
- cerr << argv[0] << " txt1 txt2 [txt3 txt4 -weight w -vcb1 output1.vcb -vcb2 output2.vcb -snt1 output1_output2.snt -snt2 output2_output1.snt]\n";
- cerr << " Converts plain text into GIZA++ snt-format.\n";
- exit(1);
- }
- string line1,line2,word;
- map<string,int> v1,v2;
- map<string,int> id1,id2;
- vector<string> iid1(2),iid2(2);
-
- string w1(filenames[0]);
- string w2(filenames[1]);
-
- if( w1.length()>4&&w2.length()>4&&((w1.substr(w1.length()-4,w1.length())==".tok" && w2.substr(w2.length()-4,w2.length())==".tok" )||
- (w1.substr(w1.length()-4,w1.length())==".txt" && w2.substr(w2.length()-4,w2.length())==".txt" ) ))
- {
- w1=w1.substr(0,w1.length()-4);
- w2=w2.substr(0,w2.length()-4);
- cerr << "w1:"<< w1 << " w2:" << w2 << endl;
- }
-
- string vocab1(w1),vocab2(w2);
- unsigned int slashpos=vocab1.rfind('/')+1;
-#ifdef WIN32
- if(slashpos==0) slashpos=vocab1.rfind('\\')+1;
-#endif
- if( slashpos>=vocab1.length() ) slashpos=0;
- string vocab1x(vocab1.substr(slashpos,vocab1.length()));
- cout << vocab1 << " -> " << vocab1x << endl;
- slashpos=vocab2.rfind('/')+1;
-#ifdef WIN32
- if(slashpos==0) slashpos=vocab1.rfind('\\')+1;
-#endif
- if( slashpos>=vocab2.length() ) slashpos=0;
- string vocab2x(vocab2.substr(slashpos,vocab2.length()));
- cout << vocab2 << " -> " << vocab2x << endl;
- if (snt1=="") {
- snt1=vocab1+"_"+vocab2x+string(".snt");
- }
- if (snt2=="") {
- snt2=vocab2+"_"+vocab1x+string(".snt");
- }
- if (vcb1=="") {
- vocab1+=string(".vcb");
- } else {
- vocab1=vcb1;
- }
- if (vcb2=="") {
- vocab2+=string(".vcb");
- } else {
- vocab2=vcb2;
- }
-
- ofstream ovocab1(vocab1.c_str()),ovocab2(vocab2.c_str()),osnt1(snt1.c_str()),osnt2(snt2.c_str());
- for(unsigned int i=0;i<filenames.size();i+=2)
- {
- ifstream i1(filenames[i].c_str()),i2(filenames[i+1].c_str());
- if(!i1)cerr << "WARNING: " << filenames[i] << " cannot be read.\n";
- if(!i2)cerr << "WARNING: " << filenames[i+1] << " cannot be read.\n";
- while(getline(i1,line1) && getline(i2,line2) )
- {
- vector<string> t1,t2;
- istrstream ii1(line1.c_str());
- while(ii1>>word)
- {
- t1.push_back(word);
- v1[word]++;
- if( id1.find(word)==id1.end() )
- {
- iid1.push_back(word);
- id1[word]=iid1.size()-1;
- }
- }
- istrstream ii2(line2.c_str());
- while(ii2>>word)
- {
- t2.push_back(word);
- v2[word]++;
- if( id2.find(word)==id2.end() )
- {
- iid2.push_back(word);
- id2[word]=iid2.size()-1;
- }
- }
- double w=1.0;
- if( i/2<weights.size() )
- w=weights[i/2];
- if( t1.size()&&t2.size() )
- {
- osnt1 << w << "\n";
- for(unsigned int j=0;j<t1.size();++j)osnt1 << id1[t1[j]] << ' ';
- osnt1 << '\n';
- for(unsigned int j=0;j<t2.size();++j)osnt1 << id2[t2[j]] << ' ';
- osnt1 << '\n';
-
- osnt2 << w << "\n";
- for(unsigned int j=0;j<t2.size();++j)osnt2 << id2[t2[j]] << ' ';
- osnt2 << '\n';
- for(unsigned int j=0;j<t1.size();++j)osnt2 << id1[t1[j]] << ' ';
- osnt2 << '\n';
- }
- else
- cerr << "WARNING: filtered out empty sentence (source: " << filenames[i] << " " << t1.size() <<
- " target: " << filenames[i+1] << " " << t2.size() << ").\n";
- }
- }
-
- for(unsigned int i=2;i<iid1.size();++i)
- ovocab1 << i << ' ' << iid1[i] << ' ' << v1[iid1[i]] << '\n';
- for(unsigned int i=2;i<iid2.size();++i)
- ovocab2 << i << ' ' << iid2[i] << ' ' << v2[iid2[i]] << '\n';
-}
diff --git a/scripts/training/MGIZA/src/reports.cpp b/scripts/training/MGIZA/src/reports.cpp
deleted file mode 100644
index 4d5873a..0000000
--- a/scripts/training/MGIZA/src/reports.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include <strstream>
-#include <time.h>
-#include <set>
-#include "defs.h"
-#include "vocab.h"
-#include "Perplexity.h"
-#include "getSentence.h"
-#include "TTables.h"
-#include "Globals.h"
-#include "Parameter.h"
-
-void printHelp(void)
-{
- cerr << "Usage:\n\n" << Usage << '\n';
- cerr << "Options (these override parameters set in the config file):\n\n";
- cerr << "\t--v \t\t print verbose message, Warning this is not very descriptive and not systematic.\n";
- cerr << "\t--NODUMPS \t Do not write any files to disk (This will over write dump frequency options).\n";
- cerr << "\t--h[elp]\t\tprint this help\n";
- cerr << "\t--p\t\tUse pegging when generating alignments for Model3 training. (Default NO PEGGING)\n";
- cerr << "\t--st\t\tto use a fixed ditribution for the fertility parameters when tranfering from model 2 to model 3 (Default complicated estimation)\n";
- printGIZAPars(cout);
-}
-
-
-void generatePerplexityReport(const Perplexity& trainperp,
- const Perplexity& testperp,
- const Perplexity& trainVperp,
- const Perplexity& testVperp,
- ostream& of, int trainsize, int testsize,
- bool)
-{
- unsigned int i, m;
- unsigned int m1 = max(trainperp.size(), testperp.size());
- unsigned int m2 = max(trainVperp.size(), testVperp.size());
- m = max(m1,m2);
- of << "#trnsz\ttstsz\titer\tmodel\ttrn-pp\t\ttest-pp\t\ttrn-vit-pp\t\ttst-vit-pp\n";
- for (i = 0 ; i <m ; i++){
- of << trainsize << '\t' << testsize << '\t' << i<< '\t' << trainperp.modelid[i] << '\t';
- if (i < trainperp.perp.size())
- of << trainperp.perp[i] << "\t\t" ;
- else
- of << "N/A\t\t";
- if (i<testperp.perp.size())
- of << testperp.perp[i] << "\t\t" ;
- else
- of << "N/A\t\t";
- if (i < trainVperp.perp.size())
- of << trainVperp.perp[i] << "\t\t" ;
- else
- of << "N/A\t";
- if (i< testVperp.perp.size())
- of << testVperp.perp[i] << '\n' ;
- else
- of << "N/A\n";
- }
-}
-
-void printSentencePair(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- ostream& of)
-
- // just writes a sentece pair to the give output stream, one sentence pair line
- // it writes token ids not actual tokens.
-{
- WordIndex i, j, l, m;
- l = es.size() - 1;
- m = fs.size() - 1;
- of << "Source sentence length : " << l << " , target : " << m << "\n";
- for (i = 1 ; i <= l ; i++)
- of << es[i] << ' ';
- of << "\n";
- for (j = 1 ; j <= m ; j++)
- of << fs[j] << ' ';
- of << "\n";
-
-}
-
-extern short CompactAlignmentFormat;
-void printAlignToFile(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- ostream& of2,
- const Vector<WordIndex>& viterbi_alignment,
- int pair_no, double alignment_score)
-
- // prints the given alignment to alignments file (given it stream pointer)
- // in a format recognizable by the draw-alignment tool ... which is of the
- // example (each line triple is one sentence pair):
- // # sentence caption
- // target_word_1 target_word_2 ..... target_word_m
- // source_word_1 ({ x y z }) source_word_2 ({ }) .. source_word_n ({w})
- // where x, y, z, and w are positions of target words that each source word
- // is connected to.
-
-{
- WordIndex l, m;
- Vector<Vector<WordIndex> > translations(es.size()); // each english words has a vector
- // of zero or more translations .
- l = es.size() - 1;
- m = fs.size() - 1;
- if( CompactAlignmentFormat )
- {
- for (WordIndex j = 1 ; j <= m ; j++)
- if( viterbi_alignment[j] )
- of2 << viterbi_alignment[j]-1 << ' ' << j-1 << ' ';
- of2 << '\n';
- }
- else
- {
- of2 << "# Sentence pair (" << pair_no <<") source length " << l << " target length "<< m <<
- " alignment score : "<< alignment_score << '\n';
- for (WordIndex j = 1 ; j <= m ; j++){
- of2 << fvlist[fs[j]].word << " " ;
- translations[viterbi_alignment[j]].push_back(j);
- }
- of2 << '\n';
-
- for (WordIndex i = 0 ; i <= l ; i++){
- of2 << evlist[es[i]].word << " ({ " ;
- for (WordIndex j = 0 ; j < translations[i].size() ; j++)
- of2 << translations[i][j] << " " ;
- of2 << "}) ";
- }
- of2 << '\n';
- }
-}
-
-
-void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
- sentenceHandler& testHandler, vcbList& trainEList,
- vcbList& trainFList, vcbList& testEList, vcbList& testFList)
-{
- set<pair<WordIndex, WordIndex> > testCoocur ;
- sentPair s ;
- /* string unseenCoocurFile = Prefix + ".tst.unseen.cooc" ;
- ofstream of_unseenCoocur(unseenCoocurFile.c_str());
-
- string seenCoocurFile = Prefix + ".tst.seen.cooc" ;
- ofstream of_seenCoocur(seenCoocurFile.c_str());
- */
- testHandler.rewind();
- int seen_coocur = 0, unseen_coocur = 0, srcUnk = 0, trgUnk = 0 ;
- while(testHandler.getNextSentence(s)){
- for (WordIndex i = 1 ; i < s.eSent.size() ; i++)
- for (WordIndex j = 1 ; j < s.fSent.size() ; j++)
- testCoocur.insert(pair<WordIndex, WordIndex> (s.eSent[i], s.fSent[j])) ;
- }
- set<pair<WordIndex, WordIndex> >::const_iterator i ;
- for (i = testCoocur.begin() ; i != testCoocur.end() ; ++i){
- if (tTable.getProb((*i).first, (*i).second) > PROB_SMOOTH){
- seen_coocur ++ ;
- // of_seenCoocur << (*i).first << ' ' << (*i).second << '\n';
- }
- else {
- unseen_coocur++;
- // of_unseenCoocur << (*i).first << ' ' << (*i).second << '\n';
- }
- }
-
- string trgUnkFile = Prefix + ".tst.trg.unk" ;
- ofstream of_trgUnk(trgUnkFile.c_str());
-
- for (WordIndex i = 0 ; i < testFList.getVocabList().size() && i < testFList.uniqTokens();i++)
- if (testFList.getVocabList()[i].freq > 0 && trainFList.getVocabList()[i].freq <= 0){
- of_trgUnk << i << ' ' << testFList.getVocabList()[i].word << ' ' << testFList.getVocabList()[i].freq
- << '\n';
- trgUnk++ ;
- }
- string srcUnkFile = Prefix + ".tst.src.unk" ;
- ofstream of_srcUnk(srcUnkFile.c_str());
-
- for (WordIndex j = 0 ; j < testEList.getVocabList().size() && j < testEList.uniqTokens();j++)
- if (testEList.getVocabList()[j].freq > 0 && trainEList.getVocabList()[j].freq <= 0){
- srcUnk++ ;
- of_srcUnk << j << ' ' << testEList.getVocabList()[j].word << ' ' << testEList.getVocabList()[j].freq
- << '\n';
- }
- string summaryFile = Prefix + ".tst.stats" ;
- ofstream of_summary(summaryFile.c_str());
- of_summary << "\t\t STATISTICS ABOUT TEST CORPUS\n\n";
- of_summary << "source unique tokens: " << testEList.uniqTokens() << '\n';
- of_summary << "target unique tokens: " << testFList.uniqTokens() << '\n';
- of_summary << "unique unseen source tokens: " << srcUnk << '\n';
- of_summary << "unique unseen target tokens: " << trgUnk << '\n';
- of_summary << "cooccurrences not found in the final t table: " << unseen_coocur << '\n';
- of_summary << "cooccurrences found in the final t table: " << seen_coocur << '\n';
-
-}
-
diff --git a/scripts/training/MGIZA/src/snt2cooc-reduce-mem-preprocess.cpp b/scripts/training/MGIZA/src/snt2cooc-reduce-mem-preprocess.cpp
deleted file mode 100644
index 3309a7e..0000000
--- a/scripts/training/MGIZA/src/snt2cooc-reduce-mem-preprocess.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <iostream>
-#include <string>
-#include <strstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <set>
-#include <stdio.h>
-#include <stdlib.h>
-
-using namespace std;
-
-void readVoc(istream&in,map<string,string>&voc)
-{
- string line,s1,s2;
- voc["1"]="UNK";
- if( !in )cerr <<"Vocabulary does not exist.\n";
- while(getline(in,line))
- {
- istrstream eingabe(line.c_str());
- if( !(eingabe>>s1>>s2))
- cerr << "ERROR in vocabulary '" << line << "'\n";
- voc[s1]=s2;
- }
-}
-
-int maxElems=0;
-int main(int argc,char **argv)
-{
- if( argc!=4&&argc!=5 )
- {
- cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n";
- cerr << "Converts GIZA++ snt-format into plain text.\n";
- exit(1);
- }
- bool counts=0;
- if( argc==6 )
- {
- if(string(argv[4])!="-counts")
- cerr << "ERROR: wrong option " << argv[5] << endl;
- counts=1;
- maxElems=10000000;
- }
- ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
- map<string,string>voc1,voc2;
- readVoc(v1,voc1);
- readVoc(v2,voc2);
- string line1,line2,line3;
- int nLine=0;
- int totalElems=0;
- while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
- {
- istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
- double count;
- string word;
- eingabe1>>count;
- vector<int>l1,l2;
- while(eingabe2>>word)
- l1.push_back(atoi(word.c_str()));
- while(eingabe3>>word)
- l2.push_back(atoi(word.c_str()));
- if( ((++nLine)%1000)==0 )
- cerr << "line " << nLine << '\n';
- for(unsigned int j=0; j<l2.size(); ++j)
- {
- cout << 0 << " " << l2[j] << endl;
- }
- for(unsigned int i=0; i<l1.size(); ++i)
- {
- for(unsigned int j=0; j<l2.size(); ++j)
- {
- cout << l1[i] << " " << l2[j] << endl;
- }
- }
- }
- cerr << "END.\n";
-}
-
-
diff --git a/scripts/training/MGIZA/src/snt2cooc.cpp b/scripts/training/MGIZA/src/snt2cooc.cpp
deleted file mode 100644
index 0eb2aaf..0000000
--- a/scripts/training/MGIZA/src/snt2cooc.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#include <iostream>
-#include <string>
-#include <strstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <set>
-#include <stdio.h>
-#include <stdlib.h>
-
-using namespace std;
-
-void readVoc(istream&in,map<string,string>&voc)
-{
- string line,s1,s2;
- voc["1"]="UNK";
- if( !in )cerr <<"Vocabulary does not exist.\n";
- while(getline(in,line))
- {
- istrstream eingabe(line.c_str());
- if( !(eingabe>>s1>>s2))
- cerr << "ERROR in vocabulary '" << line << "'\n";
- voc[s1]=s2;
- }
-}
-
-int maxElems=0;
-int main(int argc,char **argv)
-{
- if( argc!=5&&argc!=6 )
- {
- cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n";
- cerr << "Converts GIZA++ snt-format into plain text.\n";
- exit(1);
- }
- bool counts=0;
- if( argc==6 )
- {
- if(string(argv[5])!="-counts")
- cerr << "ERROR: wrong option " << argv[6] << endl;
- counts=1;
- maxElems=10000000;
- }
- ifstream v1(argv[2]),v2(argv[3]),t(argv[4]);
- ofstream ou(argv[1]);
- map<string,string>voc1,voc2;
- readVoc(v1,voc1);
- readVoc(v2,voc2);
- string line1,line2,line3;
- vector<map<int,int> > vsi(voc1.size()+1000);
- int nLine=0;
- int totalElems=0;
- while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
- {
- istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
- double count;
- string word;
- eingabe1>>count;
- vector<int>l1,l2;
- while(eingabe2>>word)
- l1.push_back(atoi(word.c_str()));
- while(eingabe3>>word)
- l2.push_back(atoi(word.c_str()));
- if( ((++nLine)%1000)==0 )
- cerr << "line " << nLine << '\n';
- totalElems-=vsi[0].size();
- for(unsigned int j=0;j<l2.size();++j)
- vsi[0][l2[j]]++;
- totalElems+=vsi[0].size();
- for(unsigned int i=0;i<l1.size();++i)
- {
- if( l1[i]>=int(vsi.size()) )
- {
- cerr << "I have to resize: " << l1[i] << endl;
- vsi.resize(l1[i]+1);
- }
- map<int,int>&theset=vsi[l1[i]];
- totalElems-=theset.size();
- for(unsigned int j=0;j<l2.size();++j)
- theset[l2[j]]++;
- totalElems+=theset.size();
- }
- if( totalElems>maxElems&&maxElems )
- {
- cerr << "INFO: print out " << totalElems << " entries.\n";
- for(unsigned int i=0;i<vsi.size();++i)
- for(map<int,int>::const_iterator j=vsi[i].begin();j!=vsi[i].end();++j)
- {
- if(counts==1 )
- ou << j->second << " " << i << " " << j->first << '\n';
- else
- ou << i << " " << j->first << '\n';
- }
- totalElems=0;
- vsi.clear();
- vsi.resize(voc1.size()+1000);
- }
- }
- cerr << "END.\n";
- for(unsigned int i=0;i<vsi.size();++i)
- for(map<int,int>::const_iterator j=vsi[i].begin();j!=vsi[i].end();++j)
- {
- if(counts==1 )
- ou << j->second << " " << i << " " << j->first << '\n';
- else
- ou << i << " " << j->first << '\n';
- }
- ou.flush();
- ou.close();
-}
-
diff --git a/scripts/training/MGIZA/src/snt2plain.cpp b/scripts/training/MGIZA/src/snt2plain.cpp
deleted file mode 100644
index 23dacbe..0000000
--- a/scripts/training/MGIZA/src/snt2plain.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#include <iostream>
-#include <string>
-#include <strstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <stdio.h>
-#include <stdlib.h>
-
-
-using namespace std;
-
-void readVoc(istream&in,map<string,string>&voc)
-{
- string line,s1,s2;
- voc["1"]="UNK";
- if( !in )cerr <<"Vocabulary does not exist.\n";
- while(getline(in,line))
- {
- istrstream eingabe(line.c_str());
- if( !(eingabe>>s1>>s2))
- cerr << "ERROR in vocabulary '" << line << "'\n";
- voc[s1]=s2;
- }
-}
-
-int main(int argc,char **argv)
-{
- if( argc!=5&&argc!=6 )
- {
- cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n";
- cerr << "Converts GIZA++ snt-format into plain text.\n";
- exit(1);
- }
- bool counts=0;
- if( argc==6 )
- {
- if(string(argv[5])!="-counts")
- cerr << "ERROR: wrong option " << argv[5] << endl;
- counts=1;
- }
- ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
- string prefix(argv[4]);
- string outfil1=prefix+"1.txt";
- string outfil2=prefix+"2.txt";
- ofstream out1(outfil1.c_str());
- ofstream out2(outfil2.c_str());
- map<string,string>voc1,voc2;
- readVoc(v1,voc1);
- readVoc(v2,voc2);
- int source=0,target=0;
- string line1,line2,line3;
- int printed=0;
- while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
- {
- istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
- double count;
- string word;
- eingabe1>>count;
- vector<string>l1,l2;
- while(eingabe2>>word)
- l1.push_back(word);
- while(eingabe3>>word)
- l2.push_back(word);
- if( counts )
- cout << count << '\n';
- for(unsigned int p=0;p<l1.size();p++)
- {
- if(voc1.count(l1[p])==0)
- {
- if( printed++==0)
- cerr << "ERROR: source vocabulary entry " << l1[p] << " unknown.\n";
- out1 << l1[p]<<' ';
- }
- else
- out1 << voc1[l1[p]] << ' ';
- source++;
- }
- for(unsigned int p=0;p<l2.size();p++)
- {
- if(voc2.count(l2[p])==0)
- {
- if( printed++ ==0)
- cerr << "ERROR: target vocabulary entry " << l2[p] << " unknown.\n";
- out2 <<l2[p]<<' ';
- }
- out2 << voc2[l2[p]] << ' ';
- target++;
- }
- out1<<'\n';
- out2<<'\n';
- }
-}
diff --git a/scripts/training/MGIZA/src/symal.cpp b/scripts/training/MGIZA/src/symal.cpp
deleted file mode 100644
index 5767859..0000000
--- a/scripts/training/MGIZA/src/symal.cpp
+++ /dev/null
@@ -1,501 +0,0 @@
-// $Id: symal.cpp 1905 2008-10-16 21:14:38Z phkoehn $
-
-#include <cassert>
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <list>
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <cstring>
-#include "cmd.h"
-
-using namespace std;
-
-#define MAX_WORD 1000 //maximum lengthsource/target strings
-#define MAX_M 200 //maximum length of source strings
-#define MAX_N 200 //maximum length of target strings
-
-#define UNION 1
-#define INTERSECT 2
-#define GROW 3
-#define SRCTOTGT 4
-#define TGTTOSRC 5
-#define BOOL_YES 1
-#define BOOL_NO 0
-
-#define END_ENUM { (char*)0, 0 }
-
-static Enum_T AlignEnum [] = {
-{ "union", UNION },
-{ "u", UNION },
-{ "intersect", INTERSECT},
-{ "i", INTERSECT},
-{ "grow", GROW },
-{ "g", GROW },
-{ "srctotgt", SRCTOTGT },
-{ "s2t", SRCTOTGT },
-{ "tgttosrc", TGTTOSRC },
-{ "t2s", TGTTOSRC },
- END_ENUM
-};
-
-static Enum_T BoolEnum [] = {
- { "true", BOOL_YES },
- { "yes", BOOL_YES },
- { "y", BOOL_YES },
- { "false", BOOL_NO },
- { "no", BOOL_NO },
- { "n", BOOL_NO },
- END_ENUM
-};
-
-
-
-// global variables and constants
-
-int* fa; //counters of covered foreign positions
-int* ea; //counters of covered english positions
-int** A; //alignment matrix with information symmetric/direct/inverse alignments
-
-int verbose=0;
-
-//read an alignment pair from the input stream.
-
-int lc = 0;
-
-int getals(fstream& inp,int& m, int *a,int& n, int *b,ostream& out)
-{
- char w[MAX_WORD], dummy[10];
- string tgtsent;
- int i,j,freq;
- if (inp >> freq){
- ++lc;
- //target sentence
- inp >> n; assert(n<MAX_N);
- for (i=1;i<=n;i++){
- inp >> setw(MAX_WORD) >> w;
- if (strlen(w)>=MAX_WORD-1) {
- cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
- << MAX_WORD-1 << endl;
- assert(strlen(w)<MAX_WORD-1);
- }
- tgtsent+=w;
- tgtsent+=" ";
- }
-
- inp >> dummy; //# separator
- // inverse alignment
- for (i=1;i<=n;i++) inp >> b[i];
-
- //source sentence
- inp >> m; assert(m<MAX_M);
- for (j=1;j<=m;j++){
- inp >> setw(MAX_WORD) >> w;
- if (strlen(w)>=MAX_WORD-1) {
- cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
- << MAX_WORD-1 << endl;
- assert(strlen(w)<MAX_WORD-1);
- }
- out << w << " ";
- }
- out << "{##} " << tgtsent << "{##} ";
-
-
- inp >> dummy; //# separator
-
- // direct alignment
- for (j=1;j<=m;j++) {
- inp >> a[j];
- assert(0<=a[j] && a[j]<=n);
- }
-
- //check inverse alignemnt
- for (i=1;i<=n;i++)
- assert(0<=b[i] && b[i]<=m);
-
- return 1;
-
- }
- else
- return 0;
-};
-
-
-//compute union alignment
-int prunionalignment(fstream& out,int m,int *a,int n,int* b){
-
- ostringstream sout;
-
- for (int j=1;j<=m;j++)
- if (a[j])
- sout << j-1 << "-" << a[j]-1 << " ";
-
- for (int i=1;i<=n;i++)
- if (b[i] && a[b[i]]!=i)
- sout << b[i]-1 << "-" << i-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-
-//Compute intersection alignment
-
-int printersect(fstream& out,int m,int *a,int n,int* b){
-
- ostringstream sout;
-
- for (int j=1;j<=m;j++)
- if (a[j] && b[a[j]]==j)
- sout << j-1 << "-" << a[j]-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-//Compute target-to-source alignment
-
-int printtgttosrc(fstream& out,int m,int *a,int n,int* b){
-
- ostringstream sout;
-
- for (int i=1;i<=n;i++)
- if (b[i])
- sout << b[i]-1 << "-" << i-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-//Compute source-to-target alignment
-
-int printsrctotgt(fstream& out,int m,int *a,int n,int* b){
-
- ostringstream sout;
-
- for (int j=1;j<=m;j++)
- if (a[j])
- sout << j-1 << "-" << a[j]-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-//Compute Grow Diagonal Alignment
-//Nice property: you will never introduce more points
-//than the unionalignment alignemt. Hence, you will always be able
-//to represent the grow alignment as the unionalignment of a
-//directed and inverted alignment
-
-int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false){
-
- ostringstream sout;
-
- vector <pair <int,int> > neighbors; //neighbors
-
- pair <int,int> entry;
-
- neighbors.push_back(make_pair(-1,-0));
- neighbors.push_back(make_pair(0,-1));
- neighbors.push_back(make_pair(1,0));
- neighbors.push_back(make_pair(0,1));
-
-
- if (diagonal){
- neighbors.push_back(make_pair(-1,-1));
- neighbors.push_back(make_pair(-1,1));
- neighbors.push_back(make_pair(1,-1));
- neighbors.push_back(make_pair(1,1));
- }
-
-
- int i,j,o;
-
-
- //covered foreign and english positions
-
- memset(fa,0,(m+1)*sizeof(int));
- memset(ea,0,(n+1)*sizeof(int));
-
- //matrix to quickly check if one point is in the symmetric
- //alignment (value=2), direct alignment (=1) and inverse alignment
-
- for (int i=1;i<=n;i++) memset(A[i],0,(m+1)*sizeof(int));
-
- set <pair <int,int> > currentpoints; //symmetric alignment
- set <pair <int,int> > unionalignment; //union alignment
-
- pair <int,int> point; //variable to store points
- set<pair <int,int> >::const_iterator k; //iterator over sets
-
- //fill in the alignments
- for (j=1;j<=m;j++){
- if (a[j]){
- unionalignment.insert(make_pair(a[j],j));
- if (b[a[j]]==j){
- fa[j]=1;ea[a[j]]=1;
- A[a[j]][j]=2;
- currentpoints.insert(make_pair(a[j],j));
- }
- else
- A[a[j]][j]=-1;
- }
- }
-
- for (i=1;i<=n;i++)
- if (b[i] && a[b[i]]!=i){ //not intersection
- unionalignment.insert(make_pair(i,b[i]));
- A[i][b[i]]=1;
- }
-
-
- int added=1;
-
- while (added){
- added=0;
- ///scan the current alignment
- for (k=currentpoints.begin();k!=currentpoints.end();k++){
- //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
- for (o=0;o<neighbors.size();o++){
- //cout << "go over check all neighbors\n";
- point.first=k->first+neighbors[o].first;
- point.second=k->second+neighbors[o].second;
- //cout << point.second-1 << " " << point.first-1 << "\n";
- //check if neighbor is inside 'matrix'
- if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
- //check if neighbor is in the unionalignment alignment
- if (b[point.first]==point.second || a[point.second]==point.first){
- //cout << "In unionalignment ";cout.flush();
- //check if it connects at least one uncovered word
- if (!(ea[point.first] && fa[point.second]))
- {
- //insert point in currentpoints!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- ea[point.first]=1; fa[point.second]=1;
- added=1;
- //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
- }
- }
- }
- }
- }
-
- if (final){
- for (k=unionalignment.begin();k!=unionalignment.end();k++)
- if (A[k->first][k->second]==1)
- {
- point.first=k->first;point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second])))
- {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
-
- for (k=unionalignment.begin();k!=unionalignment.end();k++)
- if (A[k->first][k->second]==-1)
- {
- point.first=k->first;point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second])))
- {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
- }
-
-
- for (k=currentpoints.begin();k!=currentpoints.end();k++)
- sout << k->second-1 << "-" << k->first-1 << " ";
-
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
- return 1;
-
- return 1;
-}
-
-
-
-//Main file here
-
-
-int main(int argc, char** argv){
-
-int alignment=0;
-char* input="/dev/stdin";
-char* output="/dev/stdout";
-int diagonal=false;
-int final=false;
-int bothuncovered=false;
-
-
- DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
- "alignment", CMDENUMTYPE, &alignment, AlignEnum,
- "d", CMDENUMTYPE, &diagonal, BoolEnum,
- "diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
- "f", CMDENUMTYPE, &final, BoolEnum,
- "final", CMDENUMTYPE, &final, BoolEnum,
- "b", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "both", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "i", CMDSTRINGTYPE, &input,
- "o", CMDSTRINGTYPE, &output,
- "v", CMDENUMTYPE, &verbose, BoolEnum,
- "verbose", CMDENUMTYPE, &verbose, BoolEnum,
-
- (char *)NULL);
-
- GetParams(&argc, &argv, (char*) NULL);
-
- if (alignment==0){
- cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
- << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
-
- exit(1);
-
- }
-
- fstream inp(input,ios::in);
- fstream out(output,ios::out);
-
- if (!inp.is_open()){
- cerr << "cannot open " << input << "\n";
- exit(1);
- }
-
- if (!out.is_open()){
- cerr << "cannot open " << output << "\n";
- exit(1);
- }
-
-
- int a[MAX_M],b[MAX_N],m,n;
- fa=new int[MAX_M+1];
- ea=new int[MAX_N+1];
-
-
- int sents = 0;
- A=new int *[MAX_N+1];
- for (int i=1;i<=MAX_N;i++) A[i]=new int[MAX_M+1];
-
- switch (alignment){
- case UNION:
- cerr << "symal: computing union alignment\n";
- while(getals(inp,m,a,n,b,out)) {
- prunionalignment(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case INTERSECT:
- cerr << "symal: computing intersect alignment\n";
- while(getals(inp,m,a,n,b,out)) {
- printersect(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case GROW:
- cerr << "symal: computing grow alignment: diagonal ("
- << diagonal << ") final ("<< final << ")"
- << "both-uncovered (" << bothuncovered <<")\n";
-
- while(getals(inp,m,a,n,b,out))
- printgrow(out,m,a,n,b,diagonal,final,bothuncovered);
-
- break;
- case TGTTOSRC:
- cerr << "symal: computing target-to-source alignment\n";
-
- while(getals(inp,m,a,n,b,out)){
- printtgttosrc(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case SRCTOTGT:
- cerr << "symal: computing source-to-target alignment\n";
-
- while(getals(inp,m,a,n,b,out)){
- printsrctotgt(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- default:
- exit(1);
- }
-
- delete [] fa; delete [] ea;
- for (int i=1;i<=MAX_N;i++) delete [] A[i];
- delete [] A;
-
- exit(0);
-}
diff --git a/scripts/training/MGIZA/src/syncObj.h b/scripts/training/MGIZA/src/syncObj.h
deleted file mode 100644
index 86af946..0000000
--- a/scripts/training/MGIZA/src/syncObj.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef __SYNC_OBJ_H__
-#define __SYNC_OBJ_H__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <iostream>
-
-#ifdef WIN32
-#include <boost/thread/mutex.hpp>
-class Mutex{
- private:
- mutable boost::mutex* my_mutex;
- Mutex(const Mutex&){
-
- }
- public:
- Mutex(){
- my_mutex = new boost::mutex();
- };
- ~Mutex(){delete my_mutex;my_mutex = 0;}
-
- inline void operator=(const Mutex& ref){}
-
- public:
- inline void lock() const{my_mutex->lock();};
- inline void unlock() const{my_mutex->unlock();};
-};
-
-#else
-
-class Mutex{
-private:
- mutable pthread_mutex_t mutex;
-
-public:
- Mutex(){
- pthread_mutex_init(&mutex,NULL);
- };
- ~Mutex(){pthread_mutex_destroy(&mutex);}
-
-public:
- inline void lock() const{pthread_mutex_lock(&mutex);};
- inline void unlock() const{pthread_mutex_unlock(&mutex);};
-};
-
-#endif
-class SyncDouble{
-private:
- double i;
- Mutex m;
-public:
- SyncDouble(double d) {i=d;};
- SyncDouble() {i=0;};
- //inline operator const double()const{return i;}
- inline bool operator ==(const double& r) const{return i == r;};
- inline void operator +=(const double& r) {m.lock();i += r;m.unlock();};
- inline void operator +=(const SyncDouble& r) {m.lock();i += r.i;m.unlock();};
- inline void operator -=(const double& r) {m.lock();i -= r;m.unlock();};
- inline void operator *=(const double& r) {m.lock();i *= r;m.unlock();};
- inline void operator /=(const double& r) {m.lock();i /= r;m.unlock();};
- inline double operator =(const double& r) {m.lock();i = r;m.unlock();return i;};
- inline double operator =(const int& r) {m.lock();i = r;m.unlock();return i;};
- inline void operator ++() {m.lock();i++;m.unlock();};
- inline double operator +(const SyncDouble& r){return r.i+i;};
- inline double operator /(const SyncDouble& r){return i/r.i;};
- //inline void operator --() {m.lock();i--;m.unlock();};
- //inline const istream& operator<<(const istream& p)const{p<<i;return p;};
- friend ostream& operator<<( ostream& p,const SyncDouble& d);
-};
-
-inline ostream& operator<<( ostream& p, const SyncDouble& d){p<<d.i;return p;};
-
-#endif
diff --git a/scripts/training/MGIZA/src/transpair_model1.h b/scripts/training/MGIZA/src/transpair_model1.h
deleted file mode 100644
index 00d7875..0000000
--- a/scripts/training/MGIZA/src/transpair_model1.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef transpair_model1_h_fjo_defined
-#define transpair_model1_h_fjo_defined
-//#include "logprob.h"
-#include "defs.h"
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <math.h>
-#include <algorithm>
-#include "Array2.h"
-#include "mystl.h"
-
-class transpair_model1
-{
- public:
- bool verboseTP;
- Array2<PROB, Vector<PROB> > t;
- WordIndex l, m;
- Vector<WordIndex> E,F;
- void setMode(bool)
- {}
- transpair_model1(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable)
- : verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs)
- {
- WordIndex l=es.size()-1,m=fs.size()-1;
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- {
- t(i, j)=tTable.getProb(es[i], fs[j]);
- if( !(t(i,j)>=PROB_SMOOTH) )
- cerr << "ERROR IN PROBABILITY: " << t(i,j) << " " << PROB_SMOOTH << endl;
- }
- }
- /* transpair_model1(const Vector<WordIndex>&es, const Vector<WordIndex>&fs)
- : verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs)
- {
- WordIndex l=es.size()-1,m=fs.size()-1;
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- {
- const string&estr=globeTrainVcbList->getVocabList()[es[i]].word;
- const string&fstr=globfTrainVcbList->getVocabList()[fs[j]].word;
- if( lev(estr,fstr)==0 )
- t(i,j)=1.0;
- else
- t(i,j)=1/100.0;
- massert( t(i,j)>=PROB_SMOOTH );
- }
-}*/
- WordIndex get_l()const
- {return l;}
- WordIndex get_m()const
- {return m;}
- const PROB&get_t(WordIndex i, WordIndex j)const
- {massert( t(i,j)>=PROB_SMOOTH);
- return t(i, j);}
- WordIndex get_es(int i)const {return E[i];}
- WordIndex get_fs(int j)const {return F[j];}
- bool greedyHillClimbing()const
- {return 0;}
- void computeScores(const alignment&,vector<double>&)const
- {}
- LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const
- {
- int old_i=a(j);
- return (t(new_i, j) /t(old_i, j));
- }
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
- {
- WordIndex i1=a(j1), i2=a(j2);
- return (t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
- }
- LogProb prob_of_target_and_alignment_given_source(const alignment&al)const
- {
- LogProb prob=1.0;
- int lp1=al.get_l()+1;
- for(unsigned int j=1;j<=al.get_m();++j)
- prob*=t(al(j),j)/lp1;
- return prob;
- }
-};
-#endif
diff --git a/scripts/training/MGIZA/src/transpair_model2.h b/scripts/training/MGIZA/src/transpair_model2.h
deleted file mode 100644
index f3d53e3..0000000
--- a/scripts/training/MGIZA/src/transpair_model2.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef transpair_model2_defined_h
-#define transpair_model2_defined_h
-
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <math.h>
-#include "transpair_model1.h"
-
-
-class transpair_model2 : public transpair_model1
-{
- protected:
- Array2<PROB, Vector<PROB> > a;
- public:
- transpair_model2(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable,
- const amodel<PROB>&aTable)
- : transpair_model1(es,fs,tTable),a(es.size(),fs.size())
- {
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- a(i, j)=aTable.getValue(i, j, l, m);
- }
- const PROB&get_a(WordIndex i, WordIndex j)const
- {return a(i, j);}
-};
-#endif
diff --git a/scripts/training/MGIZA/src/transpair_model3.cpp b/scripts/training/MGIZA/src/transpair_model3.cpp
deleted file mode 100644
index 0ab4c54..0000000
--- a/scripts/training/MGIZA/src/transpair_model3.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-transpair_model3: representation of a translation pair for model3 training
-allowing for fast access (esp. to t table).
-
-Franz Josef Och (30/07/99)
---*/
-#include "transpair_model3.h"
-#include <algorithm>
-
-transpair_model3::transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0, void*)
- : transpair_model2(es,fs,tTable,aTable),d(es.size(), fs.size()),n(es.size(), MAX_FERTILITY+1), p0(_p0), p1(_p1)
-{
- WordIndex l=es.size()-1,m=fs.size()-1;
- for(WordIndex i=0;i<=l;i++)
- {
- for(WordIndex j=1;j<=m;j++)
- d(i, j)=dTable.getValue(j, i, l, m);
- if( i>0 )
- {
- for(WordIndex f=0;f<MAX_FERTILITY;f++)
- n(i, f)=nTable.getValue(es[i], f);
- n(i,MAX_FERTILITY)=PROB_SMOOTH;
- }
- }
-}
-
-LogProb transpair_model3::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j, double,bool forModel3)const
-{
- LogProb change;
- const WordIndex old_i=a(j);
- WordIndex f0=a.fert(0);
- if (old_i == new_i)
- change=1.0;
- else if (old_i == 0)
- change=((double)p0*p0/p1) *
- (( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):f0)*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
- ((PROB)(forModel3?(a.fert(new_i)+1.0):1.0)) *
- (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
- (t(new_i, j)/t(old_i, j))*
- (forModel3?d(new_i, j):1.0);
- else if (new_i == 0)
- change=(double(p1) / (p0*p0)) *
- (double((m-2*f0)*(m-2*f0-1))/( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):(1+f0))*(m-f0))) *
- (forModel3?(1.0/a.fert(old_i)):1.0) *
- (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
- (t(new_i, j) /t(old_i, j)) *
- (forModel3?(1.0 / d(old_i, j)):1.0);
- else
- change=(forModel3?((a.fert(new_i)+1.0)/a.fert(old_i)):1.0) *
- (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
- (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
- (t(new_i,j)/t(old_i,j)) *
- (forModel3?(d(new_i,j)/d(old_i,j)):1.0);
- return change;
-}
-
-LogProb transpair_model3::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double,bool forModel3)const
-{
- PROB score=1;
- assert(j1<j2);
- WordIndex i1=a(j1), i2=a(j2);
- if (i1!=i2)
- {
- score=(t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
- if( forModel3 )
- {
- if (i1)
- score *= d(i1, j2)/d(i1, j1);
- if (i2)
- score *= d(i2, j1)/d(i2, j2);
- }
- }
- return score;
-}
-
-ostream&operator<<(ostream&out, const transpair_model3&m)
-{
- for(WordIndex i=0;i<=m.get_l();i++)
- {
- out << "EF-I:"<<i<<' ';
- for(WordIndex j=1;j<=m.get_m();j++)
- out << "("<<m.t(i,j)<<","<<m.d(i,j)<<")";
- for(WordIndex j=1;j<MAX_FERTILITY;j++)
- if( i>0 )
- out << "(fert:"<<m.get_fertility(i,j)<<")";
- out << '\n';
- }
- out << "T:" << m.t << "D:" << m.d << "A:" << m.a << "N:" << m.n << m.p0 << m.p1 << '\n';
- return out;
-}
-
-LogProb transpair_model3::_scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double)const
-{
- alignment b(a);
- b.set(j, new_i);
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
-}
-
-LogProb transpair_model3::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
-{
- alignment b(a);
- b.set(j1, a(j2));
- b.set(j2, a(j1));
- LogProb a_prob=thisValue;
- if( a_prob<0.0 )
- a_prob=prob_of_target_and_alignment_given_source(a);
- massert(a_prob==prob_of_target_and_alignment_given_source(a));
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
-}
-
-LogProb transpair_model3::prob_of_target_and_alignment_given_source(const alignment&al,bool verb)const
-{
- LogProb total = 1.0 ;
- static const LogProb zero = 1E-299 ;
- total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- if( verb) cerr << "IBM-3: (1-p1)^(m-2 f0)*p1^f0: " << total << '\n';
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
- if( verb) cerr << "IBM-3: +NULL:binomial+distortion " << total << '\n';
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
- if( verb) cerr << "IBM-3: fertility of " << i << " with factorial " << get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i)) << " -> " << total << '\n';
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total*= get_t(al(j), j) ;
- massert( get_t(al(j), j)>=PROB_SMOOTH );
- if( verb) cerr << "IBM-3: t of " << j << " " << al(j) << ": " << get_t(al(j), j) << " -> " << total << '\n';
- if (al(j))
- {
- total *= get_d(al(j), j);
- if( verb) cerr << "IBM-3: d of " << j << ": " << get_d(al(j), j) << " -> " << total << '\n';
- }
- }
- return total?total:zero;
-}
-
-
-void transpair_model3::computeScores(const alignment&al,vector<double>&d)const
-{
- LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
- total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total1 *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total2 *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total3*= get_t(al(j), j) ;
- massert( get_t(al(j), j)>=PROB_SMOOTH );
- if (al(j))
- {
- total4 *= get_d(al(j), j);
- }
- }
- d.push_back(total1);//5
- d.push_back(total2);//6
- d.push_back(total3);//7
- d.push_back(total4);//8
-}
diff --git a/scripts/training/MGIZA/src/transpair_model3.h b/scripts/training/MGIZA/src/transpair_model3.h
deleted file mode 100644
index 5fa43db..0000000
--- a/scripts/training/MGIZA/src/transpair_model3.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-transpair_model3: representation of a translation pair for model3 training
-allowing for fast access (esp. to t table).
-
-Franz Josef Och (30/07/99)
---*/
-#ifndef transpair_model3_h_fjo_defined
-#define transpair_model3_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <math.h>
-#include "transpair_model2.h"
-
-extern double factorial(int n);
-inline bool doubleEqual(const double a, const double b)
-{
- if( a==b )
- return 1.0;
- bool bl=fabs(1.0-a/b)<1e-10;
- if( bl )
- return 1;
- else
- {
- cerr << "DIFFERENT: " << a << " " << b << " " << a/b << " " << 1.0-a/b << endl;
- return 0;
- }
-}
-
-
-class transpair_model3 : public transpair_model2
-{
- protected:
- Array2<PROB, Vector<PROB> > d, n;
- PROB p0, p1;
- public:
- typedef transpair_model3 simpler_transpair_model;
- transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable,
- amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable,
- double _p1, double _p0, void*x=0);
- const PROB&get_d(WordIndex i, WordIndex j)const
- {return d(i, j);}
- const PROB&get_a(WordIndex i, WordIndex j)const
- {return a(i, j);}
- const PROB&get_fertility(WordIndex i, WordIndex f)const
- {massert(i>0);return (f>=MAX_FERTILITY)?n(i, MAX_FERTILITY):n(i, f);}
- int modelnr()const{return 3;}
- LogProb scoreOfAlignmentForChange(const alignment&)const
- {return -1.0; }
- LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j, double thisValue=-1.0,bool withDistortions=1)const;
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double thisValue=-1.0,bool withDistortions=1)const ;
- LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
- LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const;
- friend ostream&operator<<(ostream&out, const transpair_model3&m);
- LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verb=0)const;
- bool isSubOptimal()const{return 1;}
- void computeScores(const alignment&al,vector<double>&d)const;
-};
-#endif
diff --git a/scripts/training/MGIZA/src/transpair_model4.cpp b/scripts/training/MGIZA/src/transpair_model4.cpp
deleted file mode 100644
index ebc2666..0000000
--- a/scripts/training/MGIZA/src/transpair_model4.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "transpair_model4.h"
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(float,d4modelsmooth_factor,"model4SmoothFactor","smooting parameter for alignment probabilities in Model 4",PARLEV_SMOOTH,0.2);
-
-LogProb transpair_model4::_scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double)const
-{
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- alignment b(a);
- b.set(j, new_i);
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
-}
-LogProb transpair_model4::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double)const
-{
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- alignment b(a);
- b.set(j1, a(j2));
- b.set(j2, a(j1));
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
-}
-//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model4::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue)const
-{
- if( a(j)==new_i )
- return 1.0;
- LogProb change=transpair_model3::scoreOfMove(a,new_i,j,-1.0,0);
- LogProb a_prob=thisValue;
- if(a_prob<0.0 )
- a_prob=prob_of_target_and_alignment_given_source(a,2);
- massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
- WordIndex old_i=a(j);
- //alignment b(a);
- const_cast<alignment&>(a).set(j,new_i);
- LogProb b_prob=prob_of_target_and_alignment_given_source(a,2);
- const_cast<alignment&>(a).set(j,old_i);
- change*=b_prob/a_prob;
- return change;
-}
-//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model4::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
-{
- WordIndex aj1=a(j1),aj2=a(j2);
- if( aj1==aj2 )
- return 1.0;
- LogProb change=transpair_model3::scoreOfSwap(a,j1,j2,-1.0,0);
- LogProb a_prob=thisValue;
- if( a_prob<0.0 )
- a_prob=prob_of_target_and_alignment_given_source(a,2);
- massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-
- //alignment b(a);
- const_cast<alignment&>(a).set(j1,aj2);
- const_cast<alignment&>(a).set(j2,aj1);
- LogProb b_prob=prob_of_target_and_alignment_given_source(a,2);
- const_cast<alignment&>(a).set(j1,aj1);
- const_cast<alignment&>(a).set(j2,aj2);
-
- if( verboseTP )
- cerr << "scoreOfSwap: " << change << ' ' << a_prob << ' ' << b_prob << ' ' << endl;
- change*=b_prob/a_prob;
- if( verboseTP )
- cerr << "resulting: " << change << " should be " << _scoreOfSwap(a,j1,j2) << endl;
- return change;
-}
-
-LogProb transpair_model4::prob_of_target_and_alignment_given_source_1(const alignment&al,bool verb)const
-{
- LogProb total = 1.0 ;
- total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- if( verb) cerr << "IBM-4: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
- if( verb) cerr << "IBM-4: +NULL:binomial+distortion " << total << endl;
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
- if( verb) cerr << "IBM-4: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total*= get_t(al(j), j) ;
- if( verb) cerr << "IBM-4: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl;
- }
- return total;
-}
-
-LogProb transpair_model4::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const
-{
- LogProb total = 1.0 ;
- static const LogProb almostZero = 1E-299 ;
- if( distortionType&1 )
- {
- total *= prob_of_target_and_alignment_given_source_1(al,verb);
- }
- if( distortionType&2 )
- {
- for(WordIndex j=1;j<=m;j++)
- if( al(j) )
- if( al.get_head(al(j))==j)
- {
- int ep=al.prev_cept(al(j));
- float x2=probFirst[ep](j,al.get_center(ep));
- massert(x2<=1.0);
- total*=x2;
- if( verb) cerr << "IBM-4: d=1 of " << j << ": " << x2 << " -> " << total << endl;
- }
- else
- {
- float x2=probSecond(j,al.prev_in_cept(j));
- massert(x2<=1.0);
- total*=x2;
- if( verb) cerr << "IBM-4: d>1 of " << j << ": " << x2 << " -> " << total << endl;
- }
- }
- return total?total:almostZero;
-}
-
-void transpair_model4::computeScores(const alignment&al,vector<double>&d)const
-{
- LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
- total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total1 *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
- for (WordIndex i = 1 ; i <= l ; i++)
- total2 *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
- for (WordIndex j = 1 ; j <= m ; j++)
- total3*= get_t(al(j), j) ;
- for(WordIndex j=1;j<=m;j++)
- if( al(j) )
- if( al.get_head(al(j))==j)
- {
- int ep=al.prev_cept(al(j));
- float x2=probFirst[ep](j,al.get_center(ep));
- total4*=x2;
- }
- else
- {
- float x2=probSecond(j,al.prev_in_cept(j));
- total4*=x2;
- }
- d.push_back(total1);//9
- d.push_back(total2);//10
- d.push_back(total3);//11
- d.push_back(total4);//12
-}
diff --git a/scripts/training/MGIZA/src/transpair_model4.h b/scripts/training/MGIZA/src/transpair_model4.h
deleted file mode 100644
index c8e1853..0000000
--- a/scripts/training/MGIZA/src/transpair_model4.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef transpair_model4_h_fjo_defined
-#define transpair_model4_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include "D4Tables.h"
-#include "transpair_model3.h"
-
-extern double factorial(int n);
-
-class transpair_model4 : public transpair_model3
-{
- private:
- d4model&d4m;
- Array2<double> probSecond;
- Vector<Array2<double> > probFirst;
- public:
- typedef transpair_model3 simpler_transpair_model;
- transpair_model4(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,d4model*_d4m)
- : transpair_model3(es, fs, tTable, aTable, dTable, nTable, _p1, _p0),
- d4m(*_d4m),probSecond(m+1,m+1,0.0),probFirst(l+1)
- {
- for(unsigned int j1=1;j1<=m;++j1)
- for(unsigned int j2=1;j2<j1;++j2)
- {
- probSecond(j1,j2)=d4m.getProb_bigger(j1,j2,0,d4m.fwordclasses->getClass(get_fs(j1)),l,m);
- }
- for(unsigned int i=0;i<=l;++i)
- {
- Array2<double> &pf=probFirst[i]=Array2<double>(m+1,m+1,0.0);
- for(unsigned int j1=1;j1<=m;++j1)
- {
- map<m4_key,d4model::Vpff,compare1 >::const_iterator ci=d4m.getProb_first_iterator(d4m.ewordclasses->getClass(get_es(i)),d4m.fwordclasses->getClass(get_fs(j1)),l,m);
- for(unsigned int j2=0;j2<=m;++j2)
- {
- pf(j1,j2)=d4m.getProb_first_withiterator(j1,j2,m,ci);
- massert(pf(j1,j2)==d4m.getProb_first(j1,j2,d4m.ewordclasses->getClass(get_es(i)),d4m.fwordclasses->getClass(get_fs(j1)),l,m));
- }
- }
- }
- }
- LogProb prob_of_target_and_alignment_given_source_1(const alignment&al,bool verb)const;
- LogProb scoreOfAlignmentForChange(const alignment&a)const
- {return prob_of_target_and_alignment_given_source(a,2); }
- LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
- LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
- LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
- int modelnr()const{return 4;}
- LogProb prob_of_target_and_alignment_given_source(const alignment&al, short distortionType=3,bool verb=0)const;
- void computeScores(const alignment&al,vector<double>&d)const;
-};
-#endif
diff --git a/scripts/training/MGIZA/src/transpair_model5.cpp b/scripts/training/MGIZA/src/transpair_model5.cpp
deleted file mode 100644
index c621206..0000000
--- a/scripts/training/MGIZA/src/transpair_model5.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "transpair_model5.h"
-#include "Parameter.h"
-
-int m5scorefound=0,m5scorenotfound=0;
-
-GLOBAL_PARAMETER(float,d5modelsmooth_factor,"model5SmoothFactor","smooting parameter for distortion probabilities in Model 5 (linear interpolation with constant)",PARLEV_SMOOTH,0.1);
-float d5modelsmooth_countoffset=0.0;
-
-LogProb transpair_model5::_scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double)const
-{
- if( doModel4Scoring )
- return transpair_model4::_scoreOfMove(a,new_i,j);
- alignment b(a);
- b.set(j, new_i);
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
-}
-LogProb transpair_model5::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
-{
- if( doModel4Scoring )
- return transpair_model4::_scoreOfSwap(a,j1,j2,thisValue);
- alignment b(a);
- b.set(j1, a(j2));
- b.set(j2, a(j1));
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- assert(a_prob);
- assert(b_prob);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
-}
-
-//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model5::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue)const
-{
- if( doModel4Scoring )
- return transpair_model4::scoreOfMove(a,new_i,j,thisValue);
- alignment b(a);
- b.set(j,new_i);
-
- LogProb change;
- const WordIndex old_i=a(j);
- WordIndex f0=a.fert(0);
- if (old_i == new_i)
- change=1.0;
- else if (old_i == 0)
- change=((double)p0*p0/p1) *
- ((f0*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
- ((PROB)(1.0)) *
- (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
- (t(new_i, j)/t(old_i, j))*
- 1.0;
- else if (new_i == 0)
- change=(double(p1) / (p0*p0)) *
- (double((m-2*f0)*(m-2*f0-1))/((1+f0)*(m-f0))) *
- (1.0) *
- (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
- (t(new_i, j) /t(old_i, j)) *
- (1.0);
- else
- change=(1.0) *
- (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
- (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
- (t(new_i,j)/t(old_i,j)) *
- (1.0);
- LogProb a_prob=thisValue;
- if( a_prob<0.0 )
- a_prob=prob_of_target_and_alignment_given_source(a,2);
- massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-
- LogProb b_prob=prob_of_target_and_alignment_given_source(b,2);
- change*=b_prob/a_prob;
- return change;
-}
-LogProb transpair_model5::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
-{
- if( doModel4Scoring )
- return transpair_model4::scoreOfSwap(a,j1,j2,thisValue);
- alignment b(a);
- b.set(j1,a(j2));
- b.set(j2,a(j1));
- LogProb change=transpair_model3::scoreOfSwap(a,j1,j2,-1.0,0);
- LogProb a_prob=thisValue;
- if( a_prob<0.0 )
- a_prob=prob_of_target_and_alignment_given_source(a,2);
- massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
- LogProb b_prob=prob_of_target_and_alignment_given_source(b,2);
- change*=b_prob/a_prob;
- return change;
-}
-
-LogProb transpair_model5::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const
-{
- if( doModel4Scoring )
- return transpair_model4::prob_of_target_and_alignment_given_source(al,distortionType);
- LogProb total = 1.0 ;
- static const LogProb almostZero = 1E-299 ;
- double x2;
- if( distortionType&1 )
- {
- total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
- if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl;
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total *= get_fertility(i, al.fert(i));
- if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total*= get_t(al(j), j) ;
- if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl;
- }
- }
- if( distortionType&2 )
- {
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for(WordIndex i=1;i<=l;i++)
- {
- PositionIndex cur_j=al.als_i[i];
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if(cur_j) { // process first word of cept
- k++;
- // previous position
- total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k));
-
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
-
- if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2 << " -> " << total << endl;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- while(cur_j) { // process following words of cept
- k++;
- // previous position
- int vprev=vacancies(vac,prev_j);
- total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k));
-
-
- vac_all--;
- vac[cur_j]=1;
-
-
- if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2 << " -> " << total << endl;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- assert(k==al.fert(i));
- if( k )
- prev_cept=i;
- }
- assert(vac_all==al.fert(0));
- }
- total = total?total:almostZero;
- return total;
-}
-
-
-void transpair_model5::computeScores(const alignment&al,vector<double>&d)const
-{
- LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
- total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total1 *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
- for (WordIndex i = 1 ; i <= l ; i++)
- total2 *= get_fertility(i, al.fert(i));
- for (WordIndex j = 1 ; j <= m ; j++)
- total3*= get_t(al(j), j) ;
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for(WordIndex i=1;i<=l;i++)
- {
- PositionIndex cur_j=al.als_i[i];
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if(cur_j) { // process first word of cept
- k++;
- total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k);
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- while(cur_j) { // process following words of cept
- k++;
- int vprev=vacancies(vac,prev_j);
- total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k);
- vac_all--;
- vac[cur_j]=1;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- assert(k==al.fert(i));
- if( k )
- prev_cept=i;
- }
- assert(vac_all==al.fert(0));
- d.push_back(total1);//13
- d.push_back(total2);//14
- d.push_back(total3);//15
- d.push_back(total4);//16
-}
diff --git a/scripts/training/MGIZA/src/transpair_model5.h b/scripts/training/MGIZA/src/transpair_model5.h
deleted file mode 100644
index 5ecf49d..0000000
--- a/scripts/training/MGIZA/src/transpair_model5.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef transpair_model5_h_fjo_defined
-#define transpair_model5_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include "D5Tables.h"
-#include "transpair_model4.h"
-
-extern double factorial(int n);
-
-inline int vacancies(const Vector<char>&vac,int u)
-{
- int n=0;
- const char *i=&(vac[0])+1;
- const char *end=&(vac[0])+u+1;
- while(i<end)
- n+= ((*i++)==0);
- return n;
-}
-
-class transpair_model5 : public transpair_model4
-{
- private:
- const d5model&d5m;
- bool doModel4Scoring;
- public:
- typedef transpair_model3 simpler_transpair_model;
- mutable map<Vector<PositionIndex>,LogProb> scores[4];
- transpair_model5(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable,
- amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,
- const d5model*_d5m)
- : transpair_model4(es, fs, tTable, aTable, dTable, nTable, _p1, _p0,&_d5m->d4m),d5m(*_d5m),doModel4Scoring(0) {}
- LogProb scoreOfAlignmentForChange(const alignment&a)const
- {
- if( doModel4Scoring )
- return transpair_model4::prob_of_target_and_alignment_given_source(a,2);
- else
- return prob_of_target_and_alignment_given_source(a,2);
- }
- LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
- LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
- LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
- int modelnr()const{return 5;}
- LogProb prob_of_target_and_alignment_given_source(const alignment&al, short distortionType=3,bool verb=0)const;
- void computeScores(const alignment&al,vector<double>&d)const;
-};
-#endif
diff --git a/scripts/training/MGIZA/src/transpair_modelhmm.h b/scripts/training/MGIZA/src/transpair_modelhmm.h
deleted file mode 100644
index 2b38913..0000000
--- a/scripts/training/MGIZA/src/transpair_modelhmm.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef transpair_modelhmm_h_fjo_defined
-#define transpair_modelhmm_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <math.h>
-#include "transpair_model2.h"
-#include "ForwardBackward.h"
-#include "hmm.h"
-
-class transpair_modelhmm : public transpair_model2
-{
- public:
- typedef transpair_modelhmm simpler_transpair_model;
- HMMNetwork*net;
- transpair_modelhmm(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable,
- const amodel<PROB>&aTable,const amodel<PROB>&,const nmodel<PROB>&,
- double, double,const hmm*h)
- : transpair_model2(es,fs,tTable,aTable),net(h->makeHMMNetwork(es,fs,0))
- {}
- ~transpair_modelhmm() { delete net; }
- int modelnr()const{return 6;}
- LogProb scoreOfMove(const alignment&a, WordIndex _new_i, WordIndex j,double=-1.0)const
- {
- int new_i=_new_i;
- LogProb change=1.0;
- int old_i=a(j);
- if (old_i == new_i)
- change=1.0;
- else
- {
- int theJ=j-1;
- old_i--;
- new_i--;
- int jj=j-1;
- while(jj>0&&a(jj)==0)
- jj--;
- int theIPrev= (jj>0)?(a(jj)-1):0;
- if( j>1&&a(j-1)==0 )
- theIPrev+=l;
- if( old_i==-1 ){old_i = theIPrev;if(old_i<int(l))old_i+=l;}
- if( new_i==-1 ){new_i = theIPrev;if(new_i<int(l))new_i+=l;}
- int theIPrevOld=theIPrev,theIPrevNew=theIPrev;
- if( theJ==0 )
- {
- change*=net->getAlphainit(new_i)/net->getAlphainit(old_i);
- }
- do
- {
- if( new_i!=old_i )
- {
- change*=net->nodeProb(new_i,theJ)/net->nodeProb(old_i,theJ);
- }
- if( theJ>0)
- change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,old_i);
- theIPrevOld=old_i;
- theIPrevNew=new_i;
- theJ++;
- if( theJ<int(m) && a(theJ+1)==0 )
- {
- if( new_i<int(l)) new_i+=l;
- if( old_i<int(l)) old_i+=l;
- }
- } while( theJ<int(m) && a(theJ+1)==0 );
- if(theJ==int(m))
- {
- change*=net->getBetainit(new_i)/net->getBetainit(old_i);
- }
- else
- {
- new_i=a(theJ+1)-1;
- if( new_i==-1)
- new_i=theIPrevNew;
- change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,new_i);
- }
- }
- return change;
- }
- LogProb scoreOfAlignmentForChange(const alignment&)const
- {return -1.0; }
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
- {
- return _scoreOfSwap(a,j1,j2);
- }
- LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const
- {
- alignment b(a);
- b.set(j, new_i);
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
- }
- LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
- {
- WordIndex aj1=a(j1),aj2=a(j2);
- if( aj1==aj2 )
- return 1.0;
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-
- /*alignment b(a);
- b.set(j1, a(j2));
- b.set(j2, a(j1));
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);*/
-
- const_cast<alignment&>(a).set(j1,aj2);
- const_cast<alignment&>(a).set(j2,aj1);
- LogProb b_prob=prob_of_target_and_alignment_given_source(a);
- const_cast<alignment&>(a).set(j1,aj1);
- const_cast<alignment&>(a).set(j2,aj2);
-
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
- }
- inline friend ostream&operator<<(ostream&out, const transpair_modelhmm&)
- {
- return out << "NO-OUTPUT for transpair_modelhmm\n";
- }
- LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verbose=0)const
- {
- double prob=1.0;
- int theIPrev=0;
- for(unsigned int j=1;j<=m;j++)
- {
- int theJ=j-1;
- int theI=al(j)-1;
- if( theI==-1 )
- theI=(theIPrev%l)+l;
- prob*=net->nodeProb(theI,theJ);
- if( verbose )
- cout << "NP " << net->nodeProb(theI,theJ) << ' ';
- if( j==1 )
- {
- prob*=net->getAlphainit(theI);
- if( verbose )
- cout << "AP0 " << net->getAlphainit(theI) << ' ';
- }
- else
- {
- prob*=net->outProb(theJ,theIPrev,theI);
- if( verbose )
- cout << "AP1 " << net->outProb(theJ,theIPrev,theI) << ' ';
- }
- theIPrev=theI;
- if( j==m )
- {
- prob*=net->getBetainit(theI);
- if( verbose )
- cout << "AP2 " << net->getBetainit(theI) << ' ';
- }
- if( verbose )
- cout << "j:"<<theJ<<" i:"<<theI << "; ";
- }
- if( verbose )
- cout << '\n';
- return prob*net->finalMultiply;
- }
- void computeScores(const alignment&al,vector<double>&d)const
- {
- double prob1=1.0,prob2=1.0;
- int theIPrev=0;
- for(unsigned int j=1;j<=m;j++)
- {
- int theJ=j-1;
- int theI=al(j)-1;
- if( theI==-1 )
- theI=(theIPrev%l)+l;
- prob1*=net->nodeProb(theI,theJ);
- if( j==1 )
- {
- prob2*=net->getAlphainit(theI);
- }
- else
- {
- prob2*=net->outProb(theJ,theIPrev,theI);
- }
- theIPrev=theI;
- if( j==m )
- {
- prob2*=net->getBetainit(theI);
- }
- }
- d.push_back(prob1);
- d.push_back(prob2);
- }
-
- bool isSubOptimal()const{return 0;}
-};
-#endif
diff --git a/scripts/training/MGIZA/src/ttableDiff.hpp b/scripts/training/MGIZA/src/ttableDiff.hpp
deleted file mode 100644
index 280cadd..0000000
--- a/scripts/training/MGIZA/src/ttableDiff.hpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
-/*
- * newgiza
- * Copyright (C) Qin Gao 2007 <qing@cs.cmu.edu>
- *
- * newgiza is free software.
- *
- * You may redistribute it and/or modify it under the terms of the
- * GNU General Public License, as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * newgiza is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with newgiza. If not, write to:
- * The Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor
- * Boston, MA 02110-1301, USA.
- */
-
-
-#ifndef _TTABLEDIFF_HPP_
-#define _TTABLEDIFF_HPP_
-#include "TTables.h"
-#include <sstream>
-#include <string>
-#include "types.h"
-
-using namespace std;
-#ifdef WIN32
- typedef hash_map<wordPairIds, COUNT, hashpair> wordpair_hash;
-#else
- typedef hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> > wordpair_hash;
-#endif
-/*!
-This class is meant to create a difference file in order to make
-GIZA paralell.
-*/
-template <class COUNT,class PROB>
-class CTTableDiff{
-private:
- INT32 noEnglishWords; // total number of unique source words
- INT32 noFrenchWords; // total number of unique target words
- /*!
- Store only the counting*/
- wordpair_hash ef;
-
-public:
- INT32 SaveToFile(const char* filename){
- ofstream ofs(filename);
- if(!ofs.is_open()){
- return -1;
- }else{
- wordpair_hash::iterator it;
- for( it = ef.begin() ; it != ef.end(); it++){
- ofs << it->first.first << " " << it->first.second << " "
- << it->second << std::endl;
- }
- }
- return SUCCESS;
- }
-
- INT32 LoadFromFile(const char* filename){
- ef.clear();
- ifstream ifs(filename);
- if(!ifs.is_open()){
- return -1;
- }
- string sline;
- while(!ifs.eof()){
- sline = "";
- std::getline(ifs,sline);
- if(sline.length()){
- //cout << sline << endl;
- stringstream ss(sline.c_str());
- WordIndex we=-1,wf=-1;
- COUNT ct=-1 ;
- ss >> we >> wf >> ct;
- if(we==-1||wf==-1||ct==-1)
- continue;
- ef[wordPairIds(we,wf)] = ct;
- }
- }
- return SUCCESS;
- }
-
- COUNT * GetPtr(WordIndex e, WordIndex f){
- // look up this pair and return its position
- wordpair_hash::iterator i = ef.find(wordPairIds(e, f));
- if(i != ef.end()) // if it exists, return a pointer to it.
- return(&((*i).second));
- else return(0) ; // else return NULL pointer
- }
-
- void incCount(WordIndex e, WordIndex f, COUNT inc)
- // increments the count of the given word pair. if the pair does not exist,
- // it creates it with the given value.
- {
- if( inc )
- ef[wordPairIds(e, f)] += inc ;
- }
-
- INT32 AugmentTTable(tmodel<COUNT,PROB>& ttable){
- wordpair_hash::iterator it;
- for( it = ef.begin() ; it != ef.end(); it++){
- ttable.incCount(it->first.first,it->first.second,it->second);
- }
- return SUCCESS;
- }
-
-protected:
-
-};
-
-#endif // _TTABLEDIFF_HPP_
diff --git a/scripts/training/MGIZA/src/types.h b/scripts/training/MGIZA/src/types.h
deleted file mode 100644
index b26db96..0000000
--- a/scripts/training/MGIZA/src/types.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*!
-A set of definition of types, when port to X64, something should
-be changed.
-*/
-
-
-typedef int INT32;
-typedef float FLOAT32;
-typedef double FLOAT64;
-
-#define SUCCESS 0
-#define IS_SUCCESS(x) (x==0)
diff --git a/scripts/training/MGIZA/src/utility.cpp b/scripts/training/MGIZA/src/utility.cpp
deleted file mode 100644
index 15b4783..0000000
--- a/scripts/training/MGIZA/src/utility.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include <string>
-#include <time.h>
-#include "mymath.h"
-
-using namespace std;
-
-double factorial(int n)
-{
- double f=1;
- for(int i=2; i <= n; i++)
- f *= i;
- return f;
-}
-#ifdef WIN32
-string my_ctime(const time_t* t){
- char buffer[256];
- ctime_s(buffer,256,t);
- return buffer;
-}
-#else
-
-string my_ctime(const time_t* t){
- return ctime(t);
-}
-#endif
diff --git a/scripts/training/MGIZA/src/utility.h b/scripts/training/MGIZA/src/utility.h
deleted file mode 100644
index fc6cded..0000000
--- a/scripts/training/MGIZA/src/utility.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef utility_h
-#define utility_h
-#include <iostream>
-#include <time.h>
-#include "Perplexity.h"
-#include "Vector.h"
-#include "TTables.h"
-#include "getSentence.h"
-#include "vocab.h"
-
-extern void printHelp(void);
-extern void parseConfigFile (char * fname );
-extern void parseArguments(int argc, char *argv[]);
-extern void generatePerplexityReport(const Perplexity& trainperp,
- const Perplexity& testperp,
- const Perplexity& trainVperp,
- const Perplexity& testVperp,
- ostream& of, int trainsize,
- int testsize, unsigned int last, bool);
-
-extern void printSentencePair(Vector<WordIndex>& es, Vector<WordIndex>& fs, ostream& of);
-
-extern void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
- sentenceHandler& testHandler, vcbList& trainEList,
- vcbList& trainFList, vcbList& testEList, vcbList& testFList);
-
-extern void printAlignToFile(const Vector<WordIndex>& es, const Vector<WordIndex>& fs,
- const Vector<WordEntry>& evlist, const Vector<WordEntry>& fvlist,
- ostream& of2, const Vector<WordIndex>& viterbi_alignment, int pair_no,
- double viterbi_score);
-
-extern double factorial(int) ;
-
-string my_ctime(const time_t* t);
-
-
-
-#endif
diff --git a/scripts/training/MGIZA/src/vocab.cpp b/scripts/training/MGIZA/src/vocab.cpp
deleted file mode 100644
index e7bf13a..0000000
--- a/scripts/training/MGIZA/src/vocab.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "vocab.h"
-
-void vcbList::readVocabList()
- // reads a vocabulary file from fname. It expects the following format:
- //
- // token_id token_string frequency
-{
-
- int freq=0;
- WordIndex word_id ;
- WordEntry entry("NULL",0) ;
-
- string line, word ;
- cerr << "Reading vocabulary file from:" << fname << "\n";
- // total = 0 ;
- ifstream ifs(fname);
-
- if(!ifs){
- cerr << "\nCannot open vocabulary file " << fname << "file";
- exit(1);
- }
- size_t sline = 0;
- while(getline(ifs, line)){
- sline ++;
- }
-
- ifs.close();
-
- ifstream vFile(fname);
- if(!vFile){
- cerr << "\nCannot open vocabulary file " << fname << "file";
- exit(1);
- }
-
- list.reserve(sline+100); // Reserve space to prevent re-allocating
-
- list.push_back(entry);
- s2i[entry.word]=list.size()-1;
-
- while(getline(vFile, line)){
- istrstream buffer(line.c_str());
- if(!(buffer >> word_id >> word >> freq))
- cerr << "ERROR: reading vocabulary; " << word_id << ' ' << word << ' ' << freq << endl;
- if (word_id == 0){
- cerr << "ERROR: TOKEN ID 0 is reserved for special token NULL, in line: \n"<< line<<"\n" ;
- exit(-1);
- }
- else if (word_id >= MAX_VOCAB_SIZE){
- cerr << "ERROR: TOKEN ID is greater than maximum vocabulary size "
- << MAX_VOCAB_SIZE << " in line :\n"<< line <<"\n" ;
- exit(-1);
- }
- else if (freq < 0){
- cerr << "ERROR: frequency must be a positive integer, in line :\n"
- << line <<"\n";
- exit(-1);
- }
- else if(word_id >= list.size()){
- list.resize(word_id+1);
- list[word_id].word = word ;
- s2i[word]=word_id;
- list[word_id].freq = 0 ;
- noUniqueTokens = word_id + 1 ;
- // noUniqueTokens++ ;
- // total += freq ;
- }
- else if(list[word_id].word != "\0"){
- cerr << "ERROR: TOKEN ID must be unique for each token, in line :\n"
- << line <<"\n";
- cerr << "TOKEN ID " << word_id << " has already been assigned to: " <<
- list[word_id].word << "\n";
- exit(-1);
- }
- else { // line has valid information
- list[word_id].word = word ;
- s2i[word]=word_id;
- list[word_id].freq = 0 ;
- // noUniqueTokens++ ;
- noUniqueTokens = word_id + 1 ;
- // total += freq ;
- }
- } // end of while
-}
-
-
-void vcbList::compact(const std::set<WordIndex>& evoc){
- int del = 0;
- for(int i=0; i< list.size() ; i++){
- if(evoc.find(i)==evoc.end()){ // Not appear in corpus
- s2i.erase(list[i].word);
- list[i].word = "";
- del++;
- }
- }
- cerr << "Compacted Vocabulary, eliminated " << del << " entries "
- << s2i.size() << " remains " << endl;
-}
-
-
diff --git a/scripts/training/MGIZA/src/vocab.h b/scripts/training/MGIZA/src/vocab.h
deleted file mode 100644
index 8bf5de7..0000000
--- a/scripts/training/MGIZA/src/vocab.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _vocab_h
-#define _vocab_h 1
-
-#include "defs.h"
-#include "Vector.h"
-
-#include <fstream>
-#include <strstream>
-#include <map>
-#include <set>
-
-class WordEntry {
- public:
- string word ;
- double freq ;
- WordEntry():word("\0"), freq(0){};
- WordEntry(string w, int f):word(w), freq(f){};
-};
-
-class vcbList{
- private:
- Vector<WordEntry>& list ;
- map<string,int> s2i;
- double total;
- WordIndex noUniqueTokens ;
- WordIndex noUniqueTokensInCorpus ;
- const char* fname ;
- public:
- vcbList(Vector<WordEntry>& vcb,const char* f=0):list(vcb), total(0), noUniqueTokens(0), noUniqueTokensInCorpus(0), fname(f){};
- void setName(const char*f)
- { fname=f; }
- vcbList(const vcbList& a):list(a.list), total(a.total), noUniqueTokens(a.noUniqueTokens), noUniqueTokensInCorpus(0), fname(a.fname){};
- void compact(const std::set<WordIndex>& evoc);
- inline WordIndex size()const {return (list.size());};
- inline WordIndex uniqTokens()const {return noUniqueTokens;};
- inline WordIndex uniqTokensInCorpus()const {return noUniqueTokensInCorpus;};
- inline double totalVocab() const {return total;};
- inline Vector<WordEntry>& getVocabList() { return(list);};
- inline const Vector<WordEntry>& getVocabList()const { return(list);};
- void readVocabList();
- void incFreq(WordIndex id , double f){
- if(id < list.size()){
- if (list[id].freq == 0)
- noUniqueTokensInCorpus++;
- list[id].freq += f ;
- total += f ;
- }
- };
- void clearAllFreq(){
- for (WordIndex id = 0 ; id < list.size() ; id++)
- list[id].freq = 0 ;
- total = 0 ;
- noUniqueTokensInCorpus = 0 ;
- };
-
- const bool has_word(const string& x) const{
- map<string,int>::const_iterator i=s2i.find(x);
- return i!=s2i.end();
- }
- int operator()(const string&x)const
- {
- map<string,int>::const_iterator i=s2i.find(x);
- if( i!=s2i.end() )
- return i->second;
- else
- {
- cerr << "ERROR: no word index for '"<<x<<"'\n";
- return 0;
- }
- }
- const string operator()(WordIndex id) const { // Yaser - 2000-12-13
- if (id < list.size())
- return list[id].word ;
- else return 0 ;
- }
- const string operator[](WordIndex id) const { // Yaser - 2000-12-13
- if (id < list.size())
- return list[id].word ;
- else return 0 ;
- }
- void printVocabList(ostream& of){
- for (WordIndex i = 1 ; i < list.size() ; i++){
- if (list[i].word != "" && list[i].freq > 0)
- of << i << ' ' << list[i].word << ' ' << list[i].freq << '\n';
- }
- }
-
-};
-#endif
diff --git a/scripts/training/giza-pp/GIZA++-v2/mystl.h b/scripts/training/giza-pp/GIZA++-v2/mystl.h
deleted file mode 100644
index a3a6e41..0000000
--- a/scripts/training/giza-pp/GIZA++-v2/mystl.h
+++ /dev/null
@@ -1,329 +0,0 @@
-/* ---------------------------------------------------------------- */
-/* Copyright 1998 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
-/* Franz Josef Och */
-/* ---------------------------------------------------------------- */
-#ifndef MY_STL_H_DEFINED
-#define MY_STL_H_DEFINED
-
-#include <string>
-using namespace std;
-#ifdef USE_STLPORT
-#ifdef __STL_DEBUG
-using namespace _STLD;
-#else
-using namespace _STL;
-#endif
-#endif
-
-#include "myassert.h"
-#include <string>
-#include <utility>
-
-#if __GNUC__==2
-#include <hash_map>
-#elsif __GNUC__==3
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <tr1/unordered_map>
-#define hash_map unordered_map
-using namespace std::tr1;
-#endif
-
-#include <vector>
-#include <iostream>
-#include "mymath.h"
-#include "Array2.h"
-
-#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
-#define over_array(a,i) for(i=(a).low();i<=(a).high();i++)
-#define backwards_array(a,i) for(i=(a).high();i>=(a).low();i--)
-#define over_arr(a,i) for(int i=(a).low();i<=(a).high();i++)
-#define over_arrMAX(a,i,max) for(int i=(a).low();i<=min((a).high(),max-1);i++)
-#define backwards_arr(a,i) for(int i=(a).high();i>=(a).low();i--)
-
-extern double n1mult,n2mult,n3mult;
-
-inline double realProb(int n1,int n2)
-{
- massert(n1<=n2);
- iassert(n1>=0&&n2>0);
- if(n2==0)n2=1;
- return ((double)n1)/(double)n2;
-}
-
-inline double verfProb(int n1,int n2)
-{
- double prob = realProb(n1,n2);
- if( n1==1 )return prob*n1mult;
- else if( n1==2 )return prob*n2mult;
- else if( n1==3 )return prob*n3mult;
- else
- return prob;
-}
-
-inline bool prefix(const string&x,const string&y)
-{
- if(y.size()>x.size() )
- return 0;
- for(unsigned int i=0;i<y.size();++i)
- if( y[i]!=x[i] )
- return 0;
- return 1;
-}
-
-
-/*template<class T>
-int lev(const T&s1,const T&s2)
-{
- Array2<int,vector<int> > a(s1.size()+1,s2.size()+1,1000);
- Array2<pair<int,int>,vector<pair<int,int> > > back(s1.size()+1,s2.size()+1,pair<int,int>(0,0));
- for(unsigned int i=0;i<=s1.size();i++)
- for(unsigned int j=0;j<=s2.size();j++)
- {
- if( i==0&&j==0 )
- a(i,j)=0;
- else
- {
- int aDEL=100,aINS=100,aSUB=100;
- if(i>0)
- aDEL=a(i-1,j)+1;
- if(j>0)
- aINS=a(i,j-1)+1;
- if(i>0&&j>0)
- aSUB=a(i-1,j-1)+ !(s1[i-1]==s2[j-1]);
- if( aSUB<=aDEL && aSUB<=aINS )
- {
- a(i,j)=aSUB;
- back(i,j)=pair<int,int>(i-1,j-1);
- }
- else if( aDEL<=aSUB && aDEL<=aINS )
- {
- a(i,j)=aDEL;
- back(i,j)=pair<int,int>(i-1,j);
- }
- else
- {
- a(i,j)=aINS;
- back(i,j)=pair<int,int>(i,j-1);
- }
- }
- }
- return a(s1.size(),s2.size());
-}
-
-template<class T>
-float rel_lev(const T&s1,const T&s2)
-{
- if( s1.size()==0 )
- return s2.size()==0;
- else
- return min(1.0,lev(s1,s2)/(double)s1.size());
-}*/
-
-template<class V> int Hash(const pair<V,V>&a)
-{ return Hash(a.first)+13001*Hash(a.second); }
-
-template<class T1,class T2>
-ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
- out << "(" << ir.first << "," << ir.second << ")";
- return out;
-}
-
-inline int Hash(const string& s)
-{
- int sum=0;
- string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
- return sum;
-}
-template<class A,class B,class C>
-class tri
-{
-public:
- A a;
- B b;
- C c;
- tri(){};
- tri(const A&_a,const B&_b,const C&_c)
- : a(_a),b(_b),c(_c) {}
-};
-template<class A,class B,class C>
-bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
-
-template<class A,class B,class C>
-bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{
- if(x.a<y.a)return 1;
- if(y.a<x.a)return 0;
- if(x.b<y.b)return 1;
- if(y.b<x.b)return 0;
- if(x.c<y.c)return 1;
- if(y.c<x.c)return 0;
- return 0;
-}
-
-double used_time();
-
-template<class T>
-class my_hash
-{
-public:
- int operator()(const T&t)const {return Hash(t);}
-};
-
-inline int Hash(int value) { return value; }
-#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
-
-template<class A,class B>
-class leda_h_array : public MY_HASH_BASE
-{
-private:
- B init;
-public:
- leda_h_array() : MY_HASH_BASE() {}
- leda_h_array(const B&_init)
- : MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=find(a);
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(MY_HASH_BASE::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
- const B&initValue()const
- {return init;}
-};
-
-#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-
-template<class T,class U>
-istream & operator>>(istream&in,leda_h_array<T,U>&)
-{
- return in;
-}
-
-template<class A,class B>
-bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
-{
- A v;
- forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-
-template<class T>
-int count_elements(T a,T b)
-{
- int c=0;
- while(a!=b)
- {
- a++;
- c++;
- }
- return c;
-}
-
-template<class T>
-T normalize_if_possible_with_increment(T*a,T*b,int increment)
-{
- T sum=0;
- for(T*i=a;i!=b;i+=increment)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;i+=increment)
- *i/=sum;
- else
- {
- T factor=increment/(b-a);
- for(T*i=a;i!=b;i+=increment)
- *i=factor;
- }
- return sum;
-}
-
-template<class T>
-inline int m_comp_3way(T a,T b,int n)
-{
- int _n=0;
- while((_n++<n) && a && b)
- {
- const typename T::value_type &aa=*a;
- const typename T::value_type &bb=*b;
- if( aa<bb )return 1;
- if( bb<aa )return -1;
- ++a;
- ++b;
- }
- return 0;
-}
-
-template<class T>
-void smooth_standard(T*a,T*b,double p)
-{
- int n=b-a;
- if( n==0 )
- return;
- double pp=p/n;
- for(T*i=a;i!=b;++i)
- *i = (1.0-p)*(*i)+pp;
-}
-
-template<class T>
-const T *conv(typename vector<T>::const_iterator i)
-{
- return &(*i);
-}
-#if __GNUC__>2
-template<class T>
-T *conv(typename vector<T>::iterator i)
-{
- return &(*i);
-}
-#endif
-
-/*template<class T>
-const T *conv(const T*x)
-{
- return x;
-}*/
-template<class T>
-T *conv(T*x)
-{
- return x;
-}
-
-#endif
diff --git a/scripts/training/giza-pp/mkcls-v2/myleda.h b/scripts/training/giza-pp/mkcls-v2/myleda.h
deleted file mode 100644
index 81091c9..0000000
--- a/scripts/training/giza-pp/mkcls-v2/myleda.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef myleda_HEADER_defined
-#define myleda_HEADER_defined
-#include <map>
-#include <set>
-#include <tr1/unordered_map>
-#include "myassert.h"
-#include "FixedArray.h"
-using namespace std;
-
-template<class T>
-class leda_array : public FixedArray<T>
-{
-public:
- leda_array() {}
- leda_array(int n) : FixedArray<T>(n) {}
-};
-
-template<class T>
-class leda_set : public set<T>
-{
-public:
- bool member(const T&m) const
- { return this->count(m)!=0; }
- void del(const T&m)
- { this->erase(m); }
-};
-#define forall_set(a,b,c) for(a::iterator __i__=c.begin();__i__!=c.end()&&((b=*__i__),1);++__i__)
-template<class T>
-leda_set<T> operator&(const leda_set<T>&a,const leda_set<T>&b)
-{
- leda_set<T>c;
- insert_iterator<set<T> > iter(c,c.begin());
- set_intersection(a.begin(),a.end(),b.begin(),b.end(),iter);
- return c;
-}
-template<class T>
-leda_set<T> operator-(const leda_set<T>&a,const leda_set<T>&b)
-{
- leda_set<T>c;
- insert_iterator<set<T> > iter(c,c.begin());
- set_difference(a.begin(),a.end(),b.begin(),b.end(),iter);
- return c;
-}
-
-template<class A,class B>
-class leda_d_array : public map<A,B>
-{
-private:
- B init;
-public:
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename map<A,B>::const_iterator pos=find(a);
- iassert(pos!=this->end());
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename map<A,B>::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(map<A,B>::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
-};
-
-#define forall_defined_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->first),1) ;++__ii__)
-#define forall_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->second),1);++__ii__)
-
-double used_time();
-
-template<class T>
-class my_hash
-{
-public:
- int operator()(const T&t)const {return Hash(t);}
-};
-
-inline int Hash(int value) { return value; }
-#define MY_HASH_BASE std::tr1::unordered_map<A,B>
-
-template<class A,class B>
-class leda_h_array : public MY_HASH_BASE
-{
-private:
- B init;
-public:
- leda_h_array() {}
- leda_h_array(const B&_init)
- : MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=this->find(a);
-
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=this->find(a);
- if( pos==this->end() )
- {
- this->insert(typename MY_HASH_BASE::value_type(a,init));
- pos=this->find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
-};
-
-#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-#define forall_defined_h2(a,b,c,d) for(leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-#define forall_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jjj__=(d).begin();__jjj__!=(d).end()&&((c=__jjj__->second),1);++__jjj__)
-
-
-template<class T> int compare(const T&a,const T&b)
-{if(a==b)return 0; else if(a<b) return -1; else return 1;}
-
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_d_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-
-template<class T>
-ostream&printSet(ostream&out,const leda_set<T>&s)
-{
- bool first=1;
- T t;
- out << "{";
- forall_set(typename set<T>,t,s)
- {
- if( first==0 )
- out << ", ";
- out << t;
- first=0;
- }
- return out << "}\n";
-}
-
-template<class T,class U>
-istream & operator>>(istream&in,leda_h_array<T,U>&)
-{
- return in;
-}
-
-template<class A,class B>
-bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
-{
- A v;
- forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-template<class A,class B>
-bool operator==(const leda_d_array<A,B>&p1,const leda_d_array<A,B>&p2)
-{
- A v;
- forall_defined_d(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_d(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-
-
-
-#endif
diff --git a/scripts/training/giza-pp/mkcls-v2/mystl.h b/scripts/training/giza-pp/mkcls-v2/mystl.h
deleted file mode 100644
index 0eb311f..0000000
--- a/scripts/training/giza-pp/mkcls-v2/mystl.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef MY_STL_H_DEFINED
-#define MY_STL_H_DEFINED
-#include <string>
-#include <utility>
-#include <tr1/unordered_map>
-#include <cmath>
-
-using namespace std;
-
-namespace std {
- namespace tr1 {
- template <typename T, typename V>
- struct hash<pair<T, V> > {
- static inline void hash_combine(std::size_t & seed, const T & v) {
- hash<T> hasher;
- seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
- }
-
- size_t operator()(const std::pair<T, V>& x) const {
- size_t h = 0;
- hash_combine(h, x.first);
- hash_combine(h, x.second);
- return h;
- }
- };
- }
-}
-
-#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
-
-template<class T1,class T2>
-istream& operator>>(istream &in,pair<T1,T2> &ir)
-{
- char c;
- do in.get(c); while (in && isspace(c));
- if (!in) return in;
- if (c != '(') in.putback(c);
- in >> ir.first;
- do in.get(c); while (isspace(c));
- if (c != ',') in.putback(c);
- in >> ir.second;
- do in.get(c); while (c == ' ');
- if (c != ')') in.putback(c);
- return in;
-}
-
-template<class T1,class T2>
-ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
- out << "(" << ir.first << "," << ir.second << ")";
- return out;
-}
-
-void printSpaces(ostream&out,int n);
-void mysplit(const string &s,string &s1,string &s2);
-string untilChar(const string&s,char c);
-
-template<class A,class B,class C>
-class tri
-{
-public:
- A a;
- B b;
- C c;
- tri(){};
- tri(const A&_a,const B&_b,const C&_c)
- : a(_a),b(_b),c(_c) {}
-};
-template<class A,class B,class C>
-bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
-
-template<class A,class B,class C>
-bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{
- if(x.a<y.a)
- return 1;
- if(y.a<x.a)
- return 0;
-
- if(x.b<y.b)
- return 1;
- if(y.b<x.b)
- return 0;
-
- if(x.c<y.c)
- return 1;
- if(y.c<x.c)
- return 0;
- return 0;
-}
-
-#endif
diff --git a/scripts/training/normalize-punctuation.pl b/scripts/training/normalize-punctuation.pl
index f37e586..8f3f082 100755
--- a/scripts/training/normalize-punctuation.pl
+++ b/scripts/training/normalize-punctuation.pl
@@ -7,6 +7,7 @@
use strict;
use warnings;
use utf8;
+use v5.12;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
@@ -78,7 +79,7 @@
s/(\.+)\"(\s*[^<])/\"$1$2/g; # don't fix period at end of sentence
}
- print STDERR $_ if //;
+# print STDERR "BAD LINE (with <feff>): $_" if //;
if ($language eq "de" || $language eq "es" || $language eq "cz" || $language eq "cs" || $language eq "fr") {
s/(\d)\xA0(\d)/$1,$2/g;
diff --git a/scripts/training/paralign.pl b/scripts/training/paralign.pl
index d198b54..d5159a7 100755
--- a/scripts/training/paralign.pl
+++ b/scripts/training/paralign.pl
@@ -24,6 +24,8 @@
$args{lc $key} = $value;
}
+my $aligner_conf = $args{conf} || "$JOSHUA/scripts/training/templates/alignment/word-align.conf";
+
my $cachepipe = new CachePipe();
$cachepipe->omit_cmd();
@@ -39,7 +41,7 @@
if ($args{aligner} eq "giza") {
run_giza($chunkdir, $chunkno, $args{num_threads} > 1);
} elsif ($args{aligner} eq "berkeley") {
- run_berkeley_aligner($chunkdir, $chunkno);
+ run_berkeley_aligner($chunkdir, $chunkno, $aligner_conf);
} elsif ($args{aligner} eq "jacana") {
run_jacana_aligner($chunkdir, $chunkno);
}
@@ -59,10 +61,10 @@
}
sub run_berkeley_aligner {
- my ($chunkdir,$chunkno) = @_;
+ my ($chunkdir, $chunkno, $aligner_conf) = @_;
# copy and modify the config file
- open FROM, "$JOSHUA/scripts/training/templates/alignment/word-align.conf" or die "can't read berkeley alignment template";
+ open FROM, $aligner_conf or die "can't read berkeley alignment template";
open TO, ">", "alignments/$chunkno/word-align.conf" or die "can't write to 'alignments/$chunkno/word-align.conf'";
while (<FROM>) {
s/<SOURCE>/$args{source}.$chunkno/g;
diff --git a/scripts/training/paste b/scripts/training/paste
new file mode 100755
index 0000000..4a6f9bd
--- /dev/null
+++ b/scripts/training/paste
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+
+# "Safe" version of paste that dies if the input files have different lengths.
+
+use strict;
+use warnings;
+use FileHandle;
+use List::Util qw/sum reduce/;
+
+binmode STDIN, ':utf8';
+binmode STDOUT, ':utf8';
+
+my @fh = map { new FileHandle($_, "< :encoding(UTF-8)") } @ARGV;
+
+for (;;) {
+ my @lines = map { $_->getline } @fh;
+
+ my $num_done = grep { ! defined $_ } @lines;
+ if ($num_done > 0 and $num_done < @lines) {
+ print STDERR "* FATAL! unequal file lengths.\n";
+ exit 1;
+ }
+ last if $num_done == @lines;
+
+ chomp(@lines);
+ print join("\t", @lines) . $/;
+}
diff --git a/scripts/training/penn-treebank-tokenizer.perl b/scripts/training/penn-treebank-tokenizer.perl
index ca49763..2b2165e 100755
--- a/scripts/training/penn-treebank-tokenizer.perl
+++ b/scripts/training/penn-treebank-tokenizer.perl
@@ -6,18 +6,15 @@
binmode(STDIN, ":encoding(utf8)");
binmode(STDOUT, ":encoding(utf8)");
-use FindBin qw($Bin);
use strict;
-#use Time::HiRes;
-my $mydir = "$Bin/nonbreaking_prefixes";
+my $mydir = "$ENV{JOSHUA}/scripts/training/nonbreaking_prefixes";
my %NONBREAKING_PREFIX = ();
my $language = "en";
-my $QUIET = 0;
+my $QUIET = 1;
my $HELP = 0;
-
my $use_penn_treebank_tokenization = 1;
#my $start = [ Time::HiRes::gettimeofday( ) ];
@@ -25,7 +22,7 @@
while (@ARGV) {
$_ = shift;
/^-l$/ && ($language = shift, next);
- /^-q$/ && ($QUIET = 1, next);
+ /^-v$/ && ($QUIET = 0, next);
/^-h$/ && ($HELP = 1, next);
}
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index ae42532..92ddbb5 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -39,14 +39,14 @@
my $HADOOP = $ENV{HADOOP};
my $MOSES = $ENV{MOSES};
-delete $ENV{GREP_OPTIONS};
-
+my $METEOR = $ENV{METEOR};
my $THRAX = "$JOSHUA/thrax";
+delete $ENV{GREP_OPTIONS};
die not_defined("JAVA_HOME") unless exists $ENV{JAVA_HOME};
my (@CORPORA,$TUNE,$TEST,$ALIGNMENT,$SOURCE,$TARGET,@LMFILES,$GRAMMAR_FILE,$GLUE_GRAMMAR_FILE,$_TUNE_GRAMMAR_FILE,$_TEST_GRAMMAR_FILE,$THRAX_CONF_FILE, $_JOSHUA_CONFIG, $_JOSHUA_ARGS);
-my $FIRST_STEP = "FIRST";
+my $FIRST_STEP = "SUBSAMPLE";
my $LAST_STEP = "LAST";
my $LMFILTER = "$ENV{HOME}/code/filter/filter";
@@ -77,7 +77,7 @@
my $BUNDLER = "$JOSHUA/scripts/support/run_bundler.py";
my $STARTDIR;
my $RUNDIR = $STARTDIR = getcwd();
-my $GRAMMAR_TYPE = "hiero"; # or "itg" or "samt" or "ghkm" or "phrase" or "phrasal"
+my $GRAMMAR_TYPE = undef; # hiero, itg, samt, ghkm, phrase, or moses
my $SEARCH_ALGORITHM = "cky"; # or "stack" (for phrase-based)
# Which GHKM extractor to use ("galley" or "moses")
@@ -92,6 +92,9 @@
# gzip-aware cat
my $CAT = "$SCRIPTDIR/training/scat";
+# custom version of paste that dies on unequal file lengths
+my $PASTE = "$SCRIPTDIR/training/paste";
+
# where processed data files are stored
my $DATA_DIR = "data";
@@ -100,6 +103,7 @@
# Which aligner to use. The options are "giza" or "berkeley".
my $ALIGNER = "giza"; # "berkeley" or "giza" or "jacana"
+my $ALIGNER_CONF = "$JOSHUA/scripts/training/templates/alignment/word-align.conf";
# Filter rules to the following maximum scope (Hopkins & Langmead, 2011).
my $SCOPE = 3;
@@ -166,19 +170,19 @@
my $DO_BUILD_CLASS_LM = 0;
my $CLASS_LM_CORPUS = undef;
my $CLASS_MAP = undef;
-my $CLASS_LM_ORDER = 9;
+my $CLASS_LM_ORDER = 5;
# whether to tokenize and lowercase training, tuning, and test data
my $DO_PREPARE_CORPORA = 1;
-# how many optimizer runs to perform
-my $OPTIMIZER_RUNS = 1;
+# compute the nth optimizer run
+my $OPTIMIZER_RUN = 1;
# what to use to create language models ("berkeleylm" or "srilm")
my $LM_GEN = "kenlm";
my $LM_OPTIONS = "";
-my @STEPS = qw[FIRST SUBSAMPLE ALIGN PARSE THRAX GRAMMAR PHRASE TUNE MERT PRO TEST LAST];
+my @STEPS = qw[FIRST SUBSAMPLE ALIGN PARSE THRAX MODEL GRAMMAR PHRASE TUNE MERT PRO TEST LAST];
my %STEPS = map { $STEPS[$_] => $_ + 1 } (0..$#STEPS);
# Methods to use for merging alignments (see Koehn et al., 2003).
@@ -189,7 +193,11 @@
my $MERGE_LMS = 0;
# Which tuner to use by default
-my $TUNER = "mert"; # or pro, mira, or kbmira (the latter calling out to Moses)
+my @TUNERS = ("mert", "pro", "mira", "adagrad", "kbmira");
+my $TUNER = "mert";
+
+# The metric to update to
+my $METRIC = "BLEU 4 closest";
# The number of iterations of the mira to run
my $TUNER_ITERATIONS = 15;
@@ -221,6 +229,7 @@
"aligner=s" => \$ALIGNER,
"alignment=s" => \$ALIGNMENT,
"aligner-mem=s" => \$ALIGNER_MEM,
+ "aligner-conf=s" => \$ALIGNER_CONF,
"giza-merge=s" => \$GIZA_MERGE,
"source=s" => \$SOURCE,
"target=s" => \$TARGET,
@@ -239,7 +248,7 @@
"tune-grammar=s" => \$_TUNE_GRAMMAR_FILE,
"test-grammar=s" => \$_TEST_GRAMMAR_FILE,
"grammar=s" => \$GRAMMAR_FILE,
- "glue-grammar=s" => \$GLUE_GRAMMAR_FILE,
+ "model=s" => \$GRAMMAR_FILE,
"maxspan=i" => \$MAXSPAN,
"mbr!" => \$DO_MBR,
"type=s" => \$GRAMMAR_TYPE,
@@ -250,6 +259,7 @@
"maxlen-test=i" => \$MAXLEN_TEST,
"tokenizer-source=s" => \$TOKENIZER_SOURCE,
"tokenizer-target=s" => \$TOKENIZER_TARGET,
+ "normalizer=s" => \$NORMALIZER,
"joshua-config=s" => \$_JOSHUA_CONFIG,
"joshua-args=s" => \$_JOSHUA_ARGS,
"joshua-mem=s" => \$JOSHUA_MEM,
@@ -261,6 +271,7 @@
"tuner=s" => \$TUNER,
"tuner-mem=s" => \$TUNER_MEM,
"tuner-iterations=i" => \$TUNER_ITERATIONS,
+ "tuner-metric=s" => \$METRIC,
"thrax=s" => \$THRAX,
"thrax-conf=s" => \$THRAX_CONF_FILE,
"jobs=i" => \$NUM_JOBS,
@@ -281,6 +292,7 @@
"class-lm!" => \$DO_BUILD_CLASS_LM,
"class-lm-corpus=s" => \$CLASS_LM_CORPUS,
"class-map=s" => \$CLASS_MAP,
+ "optimizer-run=i" => \$OPTIMIZER_RUN,
);
if (! $retval) {
@@ -294,8 +306,7 @@
my $DOING_LATTICES = 0;
-# Prepend a space to the arguments list if it's non-empty and doesn't already have the space.
-my $JOSHUA_ARGS = $_JOSHUA_ARGS;
+my $JOSHUA_ARGS = (defined $_JOSHUA_ARGS) ? $_JOSHUA_ARGS : "";
my %DATA_DIRS = (
train => get_absolute_path("$RUNDIR/$DATA_DIR/train"),
@@ -303,6 +314,11 @@
test => get_absolute_path("$RUNDIR/$DATA_DIR/test"),
);
+if (! -x $NORMALIZER) {
+ print "* FATAL: couldn't find normalizer '$NORMALIZER'\n";
+ exit 1;
+}
+
# capitalize these to offset a common error:
$FIRST_STEP = uc($FIRST_STEP);
$LAST_STEP = uc($LAST_STEP);
@@ -364,10 +380,16 @@
}
}
+my @GRAMMAR_TYPES = qw/hiero samt ghkm phrase moses/;
+if (! defined $GRAMMAR_TYPE or ! in($GRAMMAR_TYPE,\@GRAMMAR_TYPES)) {
+ print "* FATAL: You must define --type (" . join("|", @GRAMMAR_TYPES) . ")\n";
+ exit 47;
+}
+
# case-normalize this
$GRAMMAR_TYPE = lc $GRAMMAR_TYPE;
-if ($GRAMMAR_TYPE eq "phrase") {
+if ($GRAMMAR_TYPE eq "phrase" or $GRAMMAR_TYPE eq "moses") {
$SEARCH_ALGORITHM = "stack";
$MAXSPAN = 0;
}
@@ -503,13 +525,13 @@
exit 1;
}
-if ($GRAMMAR_TYPE eq "phrase" and ! defined $MOSES) {
- print "* FATAL: building phrase-based models (--type phrase) requires setting the MOSES environment variable\n";
+if ($GRAMMAR_TYPE eq "moses" and ! defined $MOSES) {
+ print "* FATAL: building Moses phrase-based models (--type moses) requires setting the MOSES environment variable\n";
exit 1;
}
-if ($TUNER ne "mert" and $TUNER ne "zmert" and $TUNER ne "mira" and $TUNER ne "local-mira" and $TUNER ne "pro" and $TUNER ne "kbmira") {
- print "* FATAL: --tuner must be one of '[z]mert', 'pro', '[local]-mira', or 'kbmira'.\n";
+if (! in($TUNER, \@TUNERS)) {
+ print "* FATAL: --tuner must be one of " . join(", ", @TUNERS) . $/;
exit 1;
}
@@ -536,7 +558,7 @@
## Dependent variable setting ######################################################################
####################################################################################################
-my $OOV = ($GRAMMAR_TYPE eq "hiero" or $GRAMMAR_TYPE eq "itg" or $GRAMMAR_TYPE eq "phrase") ? "X" : "OOV";
+my $OOV = ($GRAMMAR_TYPE eq "hiero" or $GRAMMAR_TYPE eq "itg" or $GRAMMAR_TYPE eq "phrase" or $GRAMMAR_TYPE eq "moses") ? "X" : "OOV";
# The phrasal system should use the ITG grammar, allowing for limited distortion
if ($GRAMMAR_TYPE eq "phrasal") {
@@ -590,33 +612,25 @@
}
}
-if ($FIRST_STEP ne "FIRST") {
- if (@CORPORA > 1) {
- print "* FATAL: you can't skip steps if you specify more than one --corpus\n";
- exit(1);
- }
-
- if (eval { goto $FIRST_STEP }) {
- print "* Skipping to step $FIRST_STEP\n";
- goto $FIRST_STEP;
- } else {
- print "* No such step $FIRST_STEP\n";
- exit 1;
- }
-}
+# Record the preprocessing scripts that were used
+mkdir("scripts") unless -e "scripts";
+unlink "scripts/normalize.$SOURCE";
+unlink "scripts/normalize.$TARGET";
+symlink $NORMALIZER, "scripts/normalize.$SOURCE";
+symlink $NORMALIZER, "scripts/normalize.$TARGET";
+symlink $TOKENIZER_SOURCE, "scripts/tokenize.$SOURCE";
+symlink $TOKENIZER_TARGET, "scripts/tokenize.$TARGET";
## STEP 1: filter and preprocess corpora #############################
-FIRST:
- ;
-if (defined $ALIGNMENT) {
+if (defined $ALIGNMENT and $STEPS{$FIRST_STEP} < $STEPS{ALIGN}) {
print "* FATAL: it doesn't make sense to provide an alignment and then do\n";
print " tokenization. Either remove --alignment or specify a first step\n";
print " of Thrax (--first-step THRAX)\n";
exit 1;
}
-if (@CORPORA == 0) {
+if (@CORPORA == 0 and $STEPS{$FIRST_STEP} < $STEPS{TUNE}) {
print "* FATAL: need at least one training corpus (--corpus)\n";
exit 1;
}
@@ -625,11 +639,9 @@
my %PREPPED = (
TRAIN => 0,
TUNE => 0,
- TEST => 0
- );
+ TEST => 0);
-
-if ($DO_PREPARE_CORPORA) {
+if (@CORPORA > 0 && $DO_PREPARE_CORPORA) {
my $prefixes = prepare_data("train",\@CORPORA,$MAXLEN);
# used for parsing
@@ -664,7 +676,14 @@
$PREPPED{TEST} = 1;
}
-maybe_quit("FIRST");
+## Use of GOTO considered very useful
+if (eval { goto $FIRST_STEP }) {
+ print "* Skipping to step $FIRST_STEP\n";
+ goto $FIRST_STEP;
+} else {
+ print "* No such step $FIRST_STEP\n";
+ exit 1;
+}
## SUBSAMPLE #########################################################
@@ -672,32 +691,32 @@
;
# subsample
- if ($DO_SUBSAMPLE) {
- mkdir("$DATA_DIRS{train}/subsampled") unless -d "$DATA_DIRS{train}/subsampled";
+if ($DO_SUBSAMPLE) {
+ mkdir("$DATA_DIRS{train}/subsampled") unless -d "$DATA_DIRS{train}/subsampled";
- $cachepipe->cmd("subsample-manifest",
- "echo corpus > $DATA_DIRS{train}/subsampled/manifest",
- "$DATA_DIRS{train}/subsampled/manifest");
+ $cachepipe->cmd("subsample-manifest",
+ "echo corpus > $DATA_DIRS{train}/subsampled/manifest",
+ "$DATA_DIRS{train}/subsampled/manifest");
- $cachepipe->cmd("subsample-testdata",
- "cat $TUNE{source} $TEST{source} > $DATA_DIRS{train}/subsampled/test-data",
- $TUNE{source},
- $TEST{source},
- "$DATA_DIRS{train}/subsampled/test-data");
+ $cachepipe->cmd("subsample-testdata",
+ "cat $TUNE{source} $TEST{source} > $DATA_DIRS{train}/subsampled/test-data",
+ $TUNE{source},
+ $TEST{source},
+ "$DATA_DIRS{train}/subsampled/test-data");
- $cachepipe->cmd("subsample",
- "java -Xmx4g -Dfile.encoding=utf8 -cp $JOSHUA/bin:$JOSHUA/lib/commons-cli-2.0-SNAPSHOT.jar joshua.subsample.Subsampler -e $TARGET -f $SOURCE -epath $DATA_DIRS{train}/ -fpath $DATA_DIRS{train}/ -output $DATA_DIRS{train}/subsampled/subsampled.$MAXLEN -ratio 1.04 -test $DATA_DIRS{train}/subsampled/test-data -training $DATA_DIRS{train}/subsampled/manifest",
- "$DATA_DIRS{train}/subsampled/manifest",
- "$DATA_DIRS{train}/subsampled/test-data",
- $TRAIN{source},
- $TRAIN{target},
- "$DATA_DIRS{train}/subsampled/subsampled.$MAXLEN.$TARGET",
- "$DATA_DIRS{train}/subsampled/subsampled.$MAXLEN.$SOURCE");
+ $cachepipe->cmd("subsample",
+ "java -Xmx4g -Dfile.encoding=utf8 -cp $JOSHUA/bin:$JOSHUA/lib/commons-cli-2.0-SNAPSHOT.jar joshua.subsample.Subsampler -e $TARGET -f $SOURCE -epath $DATA_DIRS{train}/ -fpath $DATA_DIRS{train}/ -output $DATA_DIRS{train}/subsampled/subsampled.$MAXLEN -ratio 1.04 -test $DATA_DIRS{train}/subsampled/test-data -training $DATA_DIRS{train}/subsampled/manifest",
+ "$DATA_DIRS{train}/subsampled/manifest",
+ "$DATA_DIRS{train}/subsampled/test-data",
+ $TRAIN{source},
+ $TRAIN{target},
+ "$DATA_DIRS{train}/subsampled/subsampled.$MAXLEN.$TARGET",
+ "$DATA_DIRS{train}/subsampled/subsampled.$MAXLEN.$SOURCE");
- # rewrite the symlinks to point to the subsampled corpus
- foreach my $lang ($TARGET,$SOURCE) {
- system("ln -sf subsampled/subsampled.$MAXLEN.$lang $DATA_DIRS{train}/corpus.$lang");
- }
+ # rewrite the symlinks to point to the subsampled corpus
+ foreach my $lang ($TARGET,$SOURCE) {
+ system("ln -sf subsampled/subsampled.$MAXLEN.$lang $DATA_DIRS{train}/corpus.$lang");
+ }
}
maybe_quit("SUBSAMPLE");
@@ -785,7 +804,8 @@
my $aligner_cmd = (
"$SCRIPTDIR/training/paralign.pl "
. " -aligner $ALIGNER"
- . " -num_threads 1"
+ . " -conf $ALIGNER_CONF"
+ . " -num_threads 2"
. " -giza_merge $GIZA_MERGE"
. " -aligner_mem $ALIGNER_MEM"
. " -source $SOURCE"
@@ -815,7 +835,6 @@
while (@children) {
my $old_child = shift @children;
waitpid( $old_child, 0 );
- print "child finished\n";
if ($next_chunk < $lastchunk + 1) {
my $new_child = fork();
@@ -862,8 +881,8 @@
# Parsing only happens for SAMT grammars.
-if ($FIRST_STEP eq "PARSE" and ($GRAMMAR_TYPE eq "hiero" or $GRAMMAR_TYPE eq "phrasal" or $GRAMMAR_TYPE eq "phrase")) {
- print STDERR "* FATAL: parsing doesn't apply to hiero grammars; You need to add '--type samt|ghkm'\n";
+if ($FIRST_STEP eq "PARSE" and ($GRAMMAR_TYPE eq "hiero" or $GRAMMAR_TYPE eq "phrasal" or $GRAMMAR_TYPE eq "phrase" or $GRAMMAR_TYPE eq "moses")) {
+ print STDERR "* FATAL: parsing only applies to GHKM and SAMT grammars; you need to add '--type samt|ghkm'\n";
exit;
}
@@ -919,6 +938,8 @@
## THRAX #############################################################
+MODEL:
+ ;
GRAMMAR:
;
THRAX:
@@ -976,17 +997,18 @@
exit(1);
}
-# Look for a pre-existing grammar, since building it is expensive, and something we want to
-# avoid if this is a rerun
+
+# Since this is an expensive step, we short-circuit it if the grammar file is present. I'm not
+# sure that this is the right behavior.
if (-e "grammar.gz" && ! -z "grammar.gz") {
chomp(my $is_empty = `gzip -cd grammar.gz | head | wc -l`);
$GRAMMAR_FILE = "grammar.gz" unless ($is_empty == 0);
}
-# If the grammar file wasn't specified
+# If the grammar file wasn't specified, or found, we need to build it!
if (! defined $GRAMMAR_FILE) {
- my $target_file = ($GRAMMAR_TYPE eq "hiero" or $GRAMMAR_TYPE eq "phrasal" or $GRAMMAR_TYPE eq "phrase") ? $TRAIN{target} : $TRAIN{parsed};
+ my $target_file = ($GRAMMAR_TYPE eq "ghkm" or $GRAMMAR_TYPE eq "samt") ? $TRAIN{parsed} : $TRAIN{target};
if ($GRAMMAR_TYPE eq "ghkm") {
if ($GHKM_EXTRACTOR eq "galley") {
@@ -1037,7 +1059,7 @@
$GRAMMAR_FILE = "grammar.gz";
- } elsif ($GRAMMAR_TYPE eq "phrase") {
+ } elsif ($GRAMMAR_TYPE eq "moses") {
mkdir("model") unless -d "model";
@@ -1071,23 +1093,20 @@
$cachepipe->cmd("build-ttable",
"$MOSES/scripts/training/train-model.perl -mgiza -mgiza-cpus $NUM_THREADS -dont-zip -first-step 6 -last-step 6 -external-bin-dir $MOSES/bin -f $SOURCE -e $TARGET -alignment grow-diag-final-and -max-phrase-length $MAX_PHRASE_LEN -score-options '--GoodTuring' -parallel -extract-file model/extract -lexical-file model/lex -phrase-translation-table model/phrase-table",
"model/lex.e2f",
- "model/extract.sorted.gz"
+ "model/extract.sorted.gz",
+ "model/phrase-table.gz",
);
$GRAMMAR_FILE = "model/phrase-table.gz";
- } elsif ($GRAMMAR_TYPE eq "samt" or $GRAMMAR_TYPE eq "hiero") {
-
- # Since this is an expensive step, we short-circuit it if the grammar file is present. I'm not
- # sure that this is the right behavior.
+ } elsif ($GRAMMAR_TYPE eq "samt" or $GRAMMAR_TYPE eq "hiero" or $GRAMMAR_TYPE eq "phrase") {
# create the input file
$cachepipe->cmd("thrax-input-file",
- "paste $TRAIN{source} $target_file $ALIGNMENT | perl -pe 's/\\t/ ||| /g' | grep -v '()' | grep -v '||| \\+\$' > $DATA_DIRS{train}/thrax-input-file",
+ "$PASTE $TRAIN{source} $target_file $ALIGNMENT | perl -pe 's/\\t/ ||| /g' | grep -v '()' | grep -v '||| \\+\$' > $DATA_DIRS{train}/thrax-input-file",
$TRAIN{source}, $target_file, $ALIGNMENT,
"$DATA_DIRS{train}/thrax-input-file");
-
# Rollout the hadoop cluster if needed. This causes $HADOOP to be defined (pointing to the
# unrolled directory).
start_hadoop_cluster() unless defined $HADOOP;
@@ -1095,7 +1114,7 @@
# put the hadoop files in place
my $THRAXDIR;
my $thrax_input;
- if ($HADOOP eq "hadoop") {
+ if (! defined $HADOOP or $HADOOP eq "") {
$THRAXDIR = "thrax";
$thrax_input = "$DATA_DIRS{train}/thrax-input-file"
@@ -1119,8 +1138,7 @@
system("mv $thrax_file.tmp $thrax_file");
$cachepipe->cmd("thrax-run",
- "$HADOOP/bin/hadoop jar $THRAX/bin/thrax.jar -D mapred.child.java.opts='-Xmx$HADOOP_MEM' $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; $HADOOP/bin/hadoop fs -getmerge $THRAXDIR/final/ grammar.gz",
-# "$HADOOP/bin/hadoop jar $THRAX/bin/thrax.jar -D mapred.child.java.opts='-Xmx$HADOOP_MEM' $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; $HADOOP/bin/hadoop fs -getmerge $THRAXDIR/final/ grammar.gz; $HADOOP/bin/hadoop fs -rmr $THRAXDIR",
+ "$HADOOP/bin/hadoop jar $THRAX/bin/thrax.jar -D mapred.child.java.opts='-Xmx$HADOOP_MEM' -D hadoop.tmp.dir=$TMPDIR $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; $HADOOP/bin/hadoop fs -getmerge $THRAXDIR/final/ grammar.gz", #; $HADOOP/bin/hadoop fs -rm -r $THRAXDIR",
"$DATA_DIRS{train}/thrax-input-file",
$thrax_file,
"grammar.gz");
@@ -1129,14 +1147,14 @@
stop_hadoop_cluster() if $HADOOP eq "hadoop";
# cache the thrax-prep step, which depends on grammar.gz
- if ($HADOOP ne "hadoop") {
- $cachepipe->cmd("thrax-prep", "--cache-only");
- }
+# if ($HADOOP ne "hadoop") {
+# $cachepipe->cmd("thrax-prep", "--cache-only");
+# }
# clean up
# TODO: clean up real hadoop clusters too
# if ($HADOOP eq "hadoop") {
- # system("rm -rf $THRAXDIR hadoop hadoop-0.20.2");
+ # system("rm -rf $THRAXDIR hadoop hadoop-2.5.2");
# }
$GRAMMAR_FILE = "grammar.gz";
@@ -1166,6 +1184,29 @@
$PREPPED{TUNE} = 1;
}
+
+# figure out how many references there are
+my $numrefs = get_numrefs($TUNE{target});
+
+# make sure the dev source exist
+if (! -e $TUNE{source}) {
+ print STDERR "* FATAL: couldn't fine tuning source file '$TUNE{source}'\n";
+ exit 1;
+}
+if ($numrefs > 1) {
+ for my $i (0..$numrefs-1) {
+ if (! -e "$TUNE{target}.$i") {
+ print STDERR "* FATAL: couldn't find tuning reference file '$TUNE{target}.$i'\n";
+ exit 1;
+ }
+ }
+} else {
+ if (! -e $TUNE{target}) {
+ print STDERR "* FATAL: couldn't find tuning reference file '$TUNE{target}'\n";
+ exit 1;
+ }
+}
+
sub compile_lm($) {
my $lmfile = shift;
if ($LM_TYPE eq "kenlm") {
@@ -1282,7 +1323,7 @@
my $mem = uc $BUILDLM_MEM;
my $class_lmfile = "class_lm.gz";
$cachepipe->cmd("classlm",
- "$JOSHUA/bin/lmplz -o 9 -T $TMPDIR -S $mem --discount_fallback=0.5 1 1.5 --verbose_header --text $CLASS_LM_CORPUS $LM_OPTIONS | gzip -9n > $class_lmfile",
+ "$JOSHUA/bin/lmplz -o $CLASS_LM_ORDER -T $TMPDIR -S $mem --discount_fallback=0.5 1 1.5 --verbose_header --text $CLASS_LM_CORPUS $LM_OPTIONS | gzip -9n > $class_lmfile",
"$CLASS_LM_CORPUS",
$class_lmfile);
}
@@ -1317,49 +1358,29 @@
system("mkdir -p $DATA_DIRS{tune}") unless -d $DATA_DIRS{tune};
-# figure out how many references there are
-my $numrefs = get_numrefs($TUNE{target});
-
-# make sure the dev source exist
-if (! -e $TUNE{source}) {
- print STDERR "* FATAL: couldn't fine tuning source file '$TUNE{source}'\n";
- exit 1;
-}
-if ($numrefs > 1) {
- for my $i (0..$numrefs-1) {
- if (! -e "$TUNE{target}.$i") {
- print STDERR "* FATAL: couldn't find tuning reference file '$TUNE{target}.$i'\n";
- exit 1;
- }
- }
-} else {
- if (! -e $TUNE{target}) {
- print STDERR "* FATAL: couldn't find tuning reference file '$TUNE{target}'\n";
- exit 1;
- }
-}
-
# Set $TUNE_GRAMMAR to a specifically-passed tuning grammar or the
# main default grammar. Then update it if filtering was requested and
# is possible.
my $TUNE_GRAMMAR = $_TUNE_GRAMMAR_FILE || $GRAMMAR_FILE;
-if ($DO_FILTER_TM and defined $TUNE_GRAMMAR and ! $DOING_LATTICES and ! defined $_TUNE_GRAMMAR_FILE) {
+if ($DO_FILTER_TM and defined $GRAMMAR_FILE and ! $DOING_LATTICES and ! defined $_TUNE_GRAMMAR_FILE) {
$TUNE_GRAMMAR = "$DATA_DIRS{tune}/grammar.filtered.gz";
- $cachepipe->cmd("filter-tune",
- "$SCRIPTDIR/support/filter_grammar.sh -g $GRAMMAR_FILE $FILTERING -v $TUNE{source} | $SCRIPTDIR/training/filter-rules.pl -bus$SCOPE | gzip -9n > $TUNE_GRAMMAR",
- $GRAMMAR_FILE,
- $TUNE{source},
- $TUNE_GRAMMAR);
+ if ($OPTIMIZER_RUN == 1 and ! is_packed($TUNE_GRAMMAR)) {
+ $cachepipe->cmd("filter-tune",
+ "$SCRIPTDIR/support/filter_grammar.sh -g $GRAMMAR_FILE $FILTERING -v $TUNE{source} | $SCRIPTDIR/training/filter-rules.pl -bus$SCOPE | gzip -9n > $TUNE_GRAMMAR",
+ $GRAMMAR_FILE,
+ $TUNE{source},
+ "$DATA_DIRS{tune}/grammar.filtered.gz");
+ }
}
# Create the glue grammars. This is done by looking at all the symbols in the grammar file and
# creating all the needed rules. This is only done if there is a $TUNE_GRAMMAR defined (which
# can be skipped if we skip straight to the tuning step).
-if (defined $TUNE_GRAMMAR and $GRAMMAR_TYPE ne "phrase") {
+if ($OPTIMIZER_RUN == 1 and defined $TUNE_GRAMMAR and $GRAMMAR_TYPE ne "phrase" and $GRAMMAR_TYPE ne "moses") {
if (! defined $GLUE_GRAMMAR_FILE) {
$cachepipe->cmd("glue-tune",
- "java -Xmx2g -cp $JOSHUA/lib/*:$THRAX/bin/thrax.jar edu.jhu.thrax.util.CreateGlueGrammar $TUNE_GRAMMAR > $DATA_DIRS{tune}/grammar.glue",
+ "java -Xmx2g -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.decoder.ff.tm.CreateGlueGrammar -g $TUNE_GRAMMAR > $DATA_DIRS{tune}/grammar.glue",
get_file_from_grammar($TUNE_GRAMMAR),
"$DATA_DIRS{tune}/grammar.glue");
$GLUE_GRAMMAR_FILE = "$DATA_DIRS{tune}/grammar.glue";
@@ -1392,8 +1413,10 @@
if ($DOING_LATTICES) {
push(@feature_functions, "SourcePath");
+
+ $weightstr .= "SourcePath 1.0 ";
}
-if ($GRAMMAR_TYPE eq "phrase") {
+if ($GRAMMAR_TYPE eq "phrase" or $GRAMMAR_TYPE eq "moses") {
push(@feature_functions, "Distortion");
push(@feature_functions, "PhrasePenalty");
@@ -1412,14 +1435,14 @@
}
# Glue grammars are only needed for hierarchical models
- if ($GRAMMAR_TYPE ne "phrase") {
+ if ($GRAMMAR_TYPE ne "phrase" and $GRAMMAR_TYPE ne "moses") {
# Glue grammar
$weightstr .= "tm_${GLUE_OWNER}_0 1 ";
}
}
my $tm_type = $GRAMMAR_TYPE;
-if ($GRAMMAR_TYPE eq "phrase") {
+if ($GRAMMAR_TYPE eq "moses") {
$tm_type = "moses";
}
@@ -1432,11 +1455,22 @@
return $file;
}
-my $tunedir = "$RUNDIR/tune";
+# The first tuning run is just a symlink to the tune/ directory (for backward compat.)
+# Subsequent runs are under their run number
+my $tunedir;
+if ($OPTIMIZER_RUN == 1) {
+ $tunedir = "$RUNDIR/tune";
+ system("mkdir -p $tunedir") unless -d $tunedir;
+ symlink "$RUNDIR/tune", "$RUNDIR/tune/1";
+} else {
+ $tunedir = "$RUNDIR/tune/$OPTIMIZER_RUN";
+ system("mkdir -p $tunedir") unless -d $tunedir;
+}
+
system("mkdir -p $tunedir") unless -d $tunedir;
# Build the filtered tuning model
-my $tunemodeldir = "$tunedir/model";
+my $tunemodeldir = "$RUNDIR/tune/model";
# We build up this string with TMs to substitute in, if any are provided
my $tm_switch = "";
@@ -1447,19 +1481,22 @@
$tm_copy_config_args = " -tm0/type $tm_type -tm0/owner ${TM_OWNER} -tm0/maxspan $MAXSPAN";
}
# If we specified a new glue grammar, put that in
-if (defined $GLUE_GRAMMAR_FILE) {
- $tm_switch .= " --tm $GLUE_GRAMMAR_FILE";
- $tm_copy_config_args .= " -tm1/owner ${GLUE_OWNER}";
-} else {
+if ($GRAMMAR_TYPE eq "phrase" or $GRAMMAR_TYPE eq "moses") {
# if there is no glue grammar, remove it from the config template
$tm_copy_config_args .= " -tm1 DELETE";
+} elsif (defined $GLUE_GRAMMAR_FILE) {
+ $tm_switch .= " --tm $GLUE_GRAMMAR_FILE";
+ $tm_copy_config_args .= " -tm1/owner ${GLUE_OWNER}";
}
# Now build the bundle
-$cachepipe->cmd("tune-bundle",
- "$BUNDLER --force --symlink --absolute --verbose $JOSHUA_CONFIG $tunemodeldir --copy-config-options '-top-n $NBEST -output-format \"%i ||| %s ||| %f ||| %c\" -mark-oovs false -search $SEARCH_ALGORITHM -weights \"$weightstr\" $feature_functions ${tm_copy_config_args}' ${tm_switch}",
- $JOSHUA_CONFIG,
- get_file_from_grammar($TUNE_GRAMMAR) || $JOSHUA_CONFIG,);
+if ($OPTIMIZER_RUN == 1) {
+ $cachepipe->cmd("tune-bundle",
+ "$BUNDLER --force --symlink --absolute --verbose -T $TMPDIR $JOSHUA_CONFIG $tunemodeldir --copy-config-options '-top-n $NBEST -output-format \"%i ||| %s ||| %f ||| %c\" -mark-oovs false -search $SEARCH_ALGORITHM -weights \"$weightstr\" $feature_functions ${tm_copy_config_args}' ${tm_switch}",
+ $JOSHUA_CONFIG,
+ get_file_from_grammar($TUNE_GRAMMAR) || $JOSHUA_CONFIG,
+ "$tunemodeldir/run-joshua.sh");
+}
# Update the tune grammar to its new location in the bundle
if (defined $TUNE_GRAMMAR) {
@@ -1475,36 +1512,44 @@
}
}
-# Update the config file location
-$JOSHUA_CONFIG = "$tunedir/model/joshua.config";
+# Copy the generated config to the tunedir, and update the config file location
+system("cp $tunemodeldir/joshua.config $tunedir/joshua.config");
+$JOSHUA_CONFIG = "$tunedir/joshua.config";
# Write the decoder run command. The decoder will use the config file in the bundled
# directory, continually updating it.
+
+# If we're decoding a lattice, also output the source side path we chose
+$JOSHUA_ARGS = "";
+if ($DOING_LATTICES) {
+ $JOSHUA_ARGS .= " -maxlen 0 -lattice-decoding";
+}
$JOSHUA_ARGS .= " -output-format \"%i ||| %s ||| %f ||| %c\"";
+$JOSHUA_ARGS .= " $_JOSHUA_ARGS" if defined $_JOSHUA_ARGS;
open DEC_CMD, ">$tunedir/decoder_command";
-print DEC_CMD "cat $TUNE{source} | $tunedir/model/run-joshua.sh -m $JOSHUA_MEM -config $JOSHUA_CONFIG -threads $NUM_THREADS $JOSHUA_ARGS > $tunedir/output.nbest 2> $tunedir/joshua.log\n";
+print DEC_CMD "cat $TUNE{source} | $tunemodeldir/run-joshua.sh -m $JOSHUA_MEM -config $JOSHUA_CONFIG -threads $NUM_THREADS $JOSHUA_ARGS > $tunedir/output.nbest 2> $tunedir/joshua.log\n";
close(DEC_CMD);
chmod(0755,"$tunedir/decoder_command");
# tune
-if ($TUNER eq "mert" or $TUNER eq "zmert" or $TUNER eq "pro" or $TUNER eq "mira" or $TUNER eq "local-mira") {
- $cachepipe->cmd($TUNER,
- "$SCRIPTDIR/training/run_tuner.py $TUNE{source} $TUNE{target} --tunedir $tunedir --tuner $TUNER --decoder-config $JOSHUA_CONFIG --iterations $TUNER_ITERATIONS",
+if ($TUNER ne "kbmira") {
+ $cachepipe->cmd("${TUNER}-${OPTIMIZER_RUN}",
+ "$SCRIPTDIR/training/run_tuner.py $TUNE{source} $TUNE{target} --tunedir $tunedir --tuner $TUNER --decoder $tunedir/decoder_command --decoder-config $JOSHUA_CONFIG --decoder-output-file $tunedir/output.nbest --decoder-log-file $tunedir/joshua.log --iterations $TUNER_ITERATIONS --metric '$METRIC'",
$TUNE{source},
$JOSHUA_CONFIG,
get_file_from_grammar($TUNE_GRAMMAR) || $JOSHUA_CONFIG,
"$tunedir/joshua.config.final");
-} elsif ($TUNER eq "kbmira") { # Moses' batch MIRA
+} else { # Moses' batch kbmira
my $refs_path = $TUNE{target};
$refs_path .= "." if (get_numrefs($TUNE{target}) > 1);
my $extra_args = $JOSHUA_ARGS;
$extra_args =~ s/"/\\"/g;
- $cachepipe->cmd("mira",
- "$SCRIPTDIR/training/mira/run-mira.pl --mertdir $MOSES/bin --rootdir $MOSES/scripts --batch-mira --working-dir $tunedir --maximum-iterations $TUNER_ITERATIONS --nbest $NBEST --no-filter-phrase-table --decoder-flags \"-m $JOSHUA_MEM -threads $NUM_THREADS -moses $extra_args\" $TUNE{source} $refs_path $tunedir/model/run-joshua.sh $tunedir/model/joshua.config > $tunedir/mira.log 2>&1",
- get_file_from_grammar($TUNE_GRAMMAR),
+ $cachepipe->cmd("kbmira-${OPTIMIZER_RUN}",
+ "$SCRIPTDIR/training/mira/run-mira.pl --mertdir $MOSES/bin --rootdir $MOSES/scripts --batch-mira --working-dir $tunedir --maximum-iterations $TUNER_ITERATIONS --nbest $NBEST --no-filter-phrase-table --decoder-flags \"-m $JOSHUA_MEM -threads $NUM_THREADS -moses $extra_args\" $TUNE{source} $refs_path $tunemodeldir/run-joshua.sh $JOSHUA_CONFIG > $tunedir/mira.log 2>&1",
+ get_file_from_grammar($TUNE_GRAMMAR) || $JOSHUA_CONFIG,
$TUNE{source},
"$tunedir/joshua.config.final");
}
@@ -1512,10 +1557,7 @@
$JOSHUA_CONFIG = "$tunedir/joshua.config.final";
# Go to the next tuning run if tuning is the last step.
-if ($LAST_STEP eq "TUNE") {
- next;
-}
-
+maybe_quit("TUNE");
#################################################################
## TESTING ######################################################
@@ -1525,7 +1567,7 @@
;
# prepare the testing data
-if (! $PREPPED{TEST} and $DO_PREPARE_CORPORA) {
+if (! $PREPPED{TEST} and $DO_PREPARE_CORPORA and $OPTIMIZER_RUN == 1) {
my $prefixes = prepare_data("test",[$TEST],$MAXLEN_TEST);
$TEST{source} = "$DATA_DIRS{test}/$prefixes->{lowercased}.$SOURCE";
$TEST{target} = "$DATA_DIRS{test}/$prefixes->{lowercased}.$TARGET";
@@ -1537,26 +1579,26 @@
# Define the test grammar, if it was provided
my $TEST_GRAMMAR = $_TEST_GRAMMAR_FILE || $GRAMMAR_FILE;
-# Now filter, if its defined and should be done
-if ($DO_FILTER_TM and defined $TEST_GRAMMAR and ! $DOING_LATTICES and ! defined $_TEST_GRAMMAR_FILE) {
- $TEST_GRAMMAR = "$DATA_DIRS{test}/grammar.filtered.gz";
-
- $cachepipe->cmd("filter-test",
- "$SCRIPTDIR/support/filter_grammar.sh -g $GRAMMAR_FILE $FILTERING -v $TEST{source} | $SCRIPTDIR/training/filter-rules.pl -bus$SCOPE | gzip -9n > $TEST_GRAMMAR",
- $GRAMMAR_FILE,
- $TEST{source},
- $TEST_GRAMMAR);
+if ($DO_FILTER_TM and defined $GRAMMAR_FILE and ! $DOING_LATTICES and ! defined $_TEST_GRAMMAR_FILE) {
+ # On the first test run, we take some pains to prepare and pack the model, which won't have
+ # to be done for subsequent runs
+ if ($OPTIMIZER_RUN == 1 and ! is_packed($TEST_GRAMMAR)) {
+ $TEST_GRAMMAR = "$DATA_DIRS{test}/grammar.filtered.gz";
+
+ $cachepipe->cmd("filter-test",
+ "$SCRIPTDIR/support/filter_grammar.sh -g $GRAMMAR_FILE $FILTERING -v $TEST{source} | $SCRIPTDIR/training/filter-rules.pl -bus$SCOPE | gzip -9n > $TEST_GRAMMAR",
+ $GRAMMAR_FILE,
+ $TEST{source},
+ "$DATA_DIRS{test}/grammar.filtered.gz");
+ }
}
-my $testdir = "$RUNDIR/test";
-
-# Create and update the glue file, if the test grammar was provided (if not, we assume these
-# are in the $JOSHUA_CONFIG)
-if (defined $TEST_GRAMMAR and $GRAMMAR_TYPE ne "phrase") {
+# Create the glue grammar
+if ($OPTIMIZER_RUN == 1 and defined $TEST_GRAMMAR and $GRAMMAR_TYPE ne "phrase" and $GRAMMAR_TYPE ne "moses") {
if (! defined $GLUE_GRAMMAR_FILE) {
$cachepipe->cmd("glue-test",
- "java -Xmx1g -cp $JOSHUA/lib/*:$THRAX/bin/thrax.jar edu.jhu.thrax.util.CreateGlueGrammar $TEST_GRAMMAR > $DATA_DIRS{test}/grammar.glue",
- $TEST_GRAMMAR,
+ "java -Xmx2g -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.decoder.ff.tm.CreateGlueGrammar -g $TEST_GRAMMAR > $DATA_DIRS{test}/grammar.glue",
+ get_file_from_grammar($TEST_GRAMMAR),
"$DATA_DIRS{test}/grammar.glue");
$GLUE_GRAMMAR_FILE = "$DATA_DIRS{test}/grammar.glue";
@@ -1571,45 +1613,70 @@
}
}
+# Create the test directory
+my $testdir;
+if ($OPTIMIZER_RUN == 1) {
+ $testdir = "$RUNDIR/test";
+ system("mkdir -p $testdir") unless -d $testdir;
+ symlink("$RUNDIR/test", "$RUNDIR/test/1");
+} else {
+ $testdir = "$RUNDIR/test/$OPTIMIZER_RUN";
+ system("mkdir -p $testdir") unless -d $testdir;
+}
+
$tm_switch = "";
$tm_copy_config_args = "";
-if (defined $TEST_GRAMMAR) {
- $tm_switch .= ($DO_PACK_GRAMMARS) ? "--pack-tm" : "--tm";
- $tm_switch .= " $TEST_GRAMMAR";
+if ($DO_PACK_GRAMMARS) {
+ my $packed_dir = "$DATA_DIRS{test}/grammar.packed";
+ if ($OPTIMIZER_RUN == 1 and ! is_packed($TEST_GRAMMAR)) {
+ $cachepipe->cmd("test-pack",
+ "$SCRIPTDIR/support/grammar-packer.pl -T $TMPDIR -m $PACKER_MEM -g $TEST_GRAMMAR -o $packed_dir",
+ $TEST_GRAMMAR,
+ "$packed_dir/vocabulary",
+ "$packed_dir/encoding",
+ "$packed_dir/slice_00000.source");
+ }
+ $TEST_GRAMMAR = $packed_dir;
+
+ $tm_switch .= " --pack-tm $TEST_GRAMMAR";
+} else {
+ $tm_switch .= " --tm $TEST_GRAMMAR";
}
+
+# Add in the glue grammar
if (defined $GLUE_GRAMMAR_FILE) {
$tm_switch .= " --tm $GLUE_GRAMMAR_FILE";
}
-# Build the filtered testing model
-$cachepipe->cmd("test-bundle",
- "$BUNDLER --force --symlink --verbose $JOSHUA_CONFIG test/model --copy-config-options '-top-n $NBEST -output-format \"%i ||| %s ||| %f ||| %c\" -mark-oovs false' ${tm_switch}",
+# Build the test model
+my $testmodeldir = "$RUNDIR/test/$OPTIMIZER_RUN/model";
+$cachepipe->cmd("test-bundle-${OPTIMIZER_RUN}",
+ "$BUNDLER --force --symlink --absolute --verbose -T $TMPDIR $JOSHUA_CONFIG $testmodeldir --copy-config-options '-top-n $NBEST -pop-limit 5000 -output-format \"%i ||| %s ||| %f ||| %c\" -mark-oovs false' ${tm_switch}",
$JOSHUA_CONFIG,
get_file_from_grammar($TEST_GRAMMAR) || $JOSHUA_CONFIG,
- "$testdir/model/joshua.config");
+ "$testmodeldir/joshua.config");
if (defined $TEST_GRAMMAR) {
# Update the test grammar (if defined) to its new path
my $basename = basename($TEST_GRAMMAR);
- if (-e "$testdir/model/$basename") {
- $TEST_GRAMMAR = "$testdir/model/$basename";
- } elsif (-e "$testdir/model/$basename.packed") {
- $TEST_GRAMMAR = "$testdir/model/$basename.packed";
+ if (-e "$testmodeldir/$basename") {
+ $TEST_GRAMMAR = "$testmodeldir/$basename";
+ } elsif (-e "$testmodeldir/$basename.packed") {
+ $TEST_GRAMMAR = "$testmodeldir/$basename.packed";
} else {
print STDERR "* FATAL: test model bundling didn't produce a grammar?";
exit 1;
}
}
-my $testrun = get_absolute_path("test", $RUNDIR);
-my $bestoutput = "$testrun/output";
-my $nbestoutput = "$testrun/output.nbest";
+my $bestoutput = "$testdir/output";
+my $nbestoutput = "$testdir/output.nbest";
my $output;
# If we're decoding a lattice, also output the source side path we chose
-$JOSHUA_ARGS = $_JOSHUA_ARGS;
+$JOSHUA_ARGS = "";
if ($DOING_LATTICES) {
- $JOSHUA_ARGS .= " -maxlen 0 -output-format \"%i ||| %s ||| %e ||| %f ||| %c\"";
+ $JOSHUA_ARGS .= " -maxlen 0 -lattice-decoding -output-format \"%i ||| %s ||| %e ||| %f ||| %c\"";
}
if ($DO_MBR) {
@@ -1619,21 +1686,22 @@
$JOSHUA_ARGS .= " -top-n 0 -output-format %s";
$output = $bestoutput;
}
+$JOSHUA_ARGS .= " $_JOSHUA_ARGS" if defined $_JOSHUA_ARGS;
# Write the decoder run command
-open DEC_CMD, ">$testrun/decoder_command";
-print DEC_CMD "cat $TEST{source} | $testrun/model/run-joshua.sh -m $JOSHUA_MEM -threads $NUM_THREADS $JOSHUA_ARGS > $output 2> $testrun/joshua.log\n";
+open DEC_CMD, ">$testdir/decoder_command";
+print DEC_CMD "cat $TEST{source} | $testmodeldir/run-joshua.sh -m $JOSHUA_MEM -threads $NUM_THREADS $JOSHUA_ARGS > $output 2> $testdir/joshua.log\n";
close(DEC_CMD);
-chmod(0755,"$testrun/decoder_command");
+chmod(0755,"$testdir/decoder_command");
# Decode. $output here is either $nbestoutput (if doing MBR decoding, in which case we'll
# need the n-best output) or $bestoutput (which only outputs the hypothesis but is tons faster)
-$cachepipe->cmd("test-decode",
- "$testrun/decoder_command",
+$cachepipe->cmd("test-decode-${OPTIMIZER_RUN}",
+ "$testdir/decoder_command",
$TEST{source},
- "$testrun/decoder_command",
- "$testrun/model/joshua.config",
- get_file_from_grammar($TEST_GRAMMAR) || "$testrun/model/joshua.config",
+ "$testdir/decoder_command",
+ "$testmodeldir/joshua.config",
+ get_file_from_grammar($TEST_GRAMMAR) || "$testmodeldir/joshua.config",
$output);
# $cachepipe->cmd("remove-oov",
@@ -1643,46 +1711,54 @@
# Extract the 1-best output from the n-best file if the n-best file alone was output
if ($DO_MBR) {
- $cachepipe->cmd("test-extract-onebest",
+ $cachepipe->cmd("test-extract-onebest-${OPTIMIZER_RUN}",
"java -Xmx500m -cp $JOSHUA/class -Dfile.encoding=utf8 joshua.util.ExtractTopCand $nbestoutput $bestoutput",
$nbestoutput,
$bestoutput);
}
# Now compute the BLEU score on the 1-best output
-$cachepipe->cmd("test-bleu",
- "$JOSHUA/bin/bleu $output $TEST{target} > $testrun/bleu",
+$cachepipe->cmd("test-bleu-${OPTIMIZER_RUN}",
+ "$JOSHUA/bin/bleu $output $TEST{target} > $testdir/bleu",
$bestoutput,
- "$testrun/bleu");
+ "$testdir/bleu");
# Update the BLEU summary.
-compute_bleu_summary("$testrun/bleu", "$testrun/final-bleu");
+compute_bleu_summary("test/*/bleu", "test/final-bleu");
+
+if (defined $METEOR) {
+ $cachepipe->cmd("test-meteor-${OPTIMIZER_RUN}",
+ "$JOSHUA/bin/meteor $output $TEST{target} $TARGET > $testdir/meteor",
+ $bestoutput,
+ "$testdir/meteor");
+ compute_meteor_summary("test/*/meteor", "test/final-meteor");
+}
if ($DO_MBR) {
my $numlines = `cat $TEST{source} | wc -l`;
$numlines--;
- my $mbr_output = "$testrun/output.mbr";
+ my $mbr_output = "$testdir/output.mbr";
- $cachepipe->cmd("test-onebest-parmbr",
+ $cachepipe->cmd("test-onebest-parmbr-${OPTIMIZER_RUN}",
"cat $nbestoutput | java -Xmx1700m -cp $JOSHUA/class -Dfile.encoding=utf8 joshua.decoder.NbestMinRiskReranker false 1 $NUM_THREADS > $mbr_output",
$nbestoutput,
$mbr_output);
- $cachepipe->cmd("test-bleu-mbr",
- "$JOSHUA/bin/bleu output $TEST{target} $numrefs > $testrun/bleu.mbr",
+ $cachepipe->cmd("test-bleu-mbr-${OPTIMIZER_RUN}",
+ "$JOSHUA/bin/bleu output $TEST{target} $numrefs > $testdir/bleu.mbr",
$mbr_output,
- "$testrun/bleu.mbr");
+ "$testdir/bleu.mbr");
- compute_bleu_summary("$testrun/bleu.mbr", "$testrun/final-bleu-mbr");
+ compute_bleu_summary("test/*/bleu.mbr", "test/final-bleu-mbr");
}
-compute_time_summary("$testrun/joshua.log", "$testrun/final-times");
+compute_time_summary("test/*/joshua.log", "test/final-times");
# Now do the analysis
if ($DOING_LATTICES) {
# extract the source
- my $source = "$testrun/test.lattice-path.txt";
- $cachepipe->cmd("test-lattice-extract-source",
+ my $source = "$testdir/test.lattice-path.txt";
+ $cachepipe->cmd("test-lattice-extract-source-${OPTIMIZER_RUN}",
"$JOSHUA/bin/extract-1best $nbestoutput 2 | perl -pe 's/<s> //' > $source",
$nbestoutput, $source);
@@ -1745,9 +1821,16 @@
my $infiles = join(" ", @infiles);
my $outfiles = join(" ", @outfiles);
- $cachepipe->cmd("$label-copy-and-filter",
- "paste $infiles | $SCRIPTDIR/training/filter-empty-lines.pl | $SCRIPTDIR/training/split2files.pl $outfiles",
- @indeps, @outfiles);
+ # only skip blank lines for training data
+ if ($label eq "train") {
+ $cachepipe->cmd("$label-copy-and-filter",
+ "$PASTE $infiles | $SCRIPTDIR/training/filter-empty-lines.pl | $SCRIPTDIR/training/split2files.pl $outfiles",
+ @indeps, @outfiles);
+ } else {
+ $cachepipe->cmd("$label-copy-and-filter",
+ "$PASTE $infiles | $SCRIPTDIR/training/split2files.pl $outfiles",
+ @indeps, @outfiles);
+ }
# Done concatenating and filtering files
my $prefix = "$label";
@@ -1787,7 +1870,7 @@
# trim training data
$cachepipe->cmd("$label-trim",
- "paste $infilelist | $SCRIPTDIR/training/trim_parallel_corpus.pl $maxlen | $SCRIPTDIR/training/split2files.pl $outfilelist",
+ "$PASTE $infilelist | $SCRIPTDIR/training/trim_parallel_corpus.pl $maxlen | $SCRIPTDIR/training/split2files.pl $outfilelist",
@infiles,
@outfiles);
$prefix .= ".$maxlen";
@@ -1889,9 +1972,9 @@
# if it's not already unpacked, unpack it
if (! -d "hadoop") {
- my $hadoop_tmp_dir = tempdir("hadoop-0.20.2.XXXX", DIR => $TMPDIR, CLEANUP => 1);
- system("tar xzf $JOSHUA/lib/hadoop-0.20.2.tar.gz -C $hadoop_tmp_dir");
- system("ln -sf $hadoop_tmp_dir/hadoop-0.20.2 hadoop");
+ my $hadoop_tmp_dir = tempdir("hadoop-XXXX", DIR => $TMPDIR, CLEANUP => 0);
+ system("tar xzf $JOSHUA/lib/hadoop-2.5.2.tar.gz -C $hadoop_tmp_dir");
+ system("ln -sf $hadoop_tmp_dir/hadoop-2.5.2 hadoop");
if (defined $HADOOP_CONF) {
print STDERR "Copying HADOOP_CONF($HADOOP_CONF) to hadoop/conf/core-site.xml\n";
system("cp $HADOOP_CONF hadoop/conf/core-site.xml");
@@ -1908,10 +1991,10 @@
}
}
-sub teardown_hadoop_cluster {
- stop_hadoop_cluster();
- system("rm -f hadoop");
-}
+#sub teardown_hadoop_cluster {
+# stop_hadoop_cluster();
+# system("rm -f hadoop");
+#}
sub is_lattice {
my $file = shift;
@@ -1927,6 +2010,12 @@
}
}
+# Set membership: is value in array?
+sub in {
+ my ($value, $array) = @_;
+ return grep( /^$value$/, @$array );
+}
+
# This function retrieves the names of all the features in the grammar. Dense features
# are named with consecutive integers starting at 0, while sparse features can have any name.
# To get the feature names from an unpacked grammar, we have to read through the whole grammar,
@@ -2001,12 +2090,32 @@
my $references = join(" -r ", @references);
- $cachepipe->cmd("analyze-test",
+ $cachepipe->cmd("analyze-test-${OPTIMIZER_RUN}",
"$SCRIPTDIR/analysis/sentence-by-sentence.pl -s $source -r $references $output > $dir/analysis/sentence-by-sentence.html",
$output,
"$dir/analysis/sentence-by-sentence.html");
}
+sub compute_meteor_summary {
+ my ($filepattern, $outputfile) = @_;
+
+ # Average the runs, report result
+ my @scores;
+ my $numrecs = 0;
+ open CMD, "grep '^Final score' $filepattern |";
+ my @F = split(' ', <CMD>);
+ close(CMD);
+ push(@scores, 1.0 * $F[-1]);
+
+ if (scalar @scores) {
+ my $final_score = sum(@scores) / (scalar @scores);
+
+ open SUMMARY, ">$outputfile" or die "Can't write to $outputfile";
+ printf(SUMMARY "%s / %d = %.4f\n", join(" + ", @scores), scalar @scores, $final_score);
+ close(SUMMARY);
+ }
+}
+
sub compute_bleu_summary {
my ($filepattern, $outputfile) = @_;
diff --git a/scripts/training/preprocess.sh b/scripts/training/preprocess.sh
new file mode 100755
index 0000000..69e4337
--- /dev/null
+++ b/scripts/training/preprocess.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# Strings together the preprocessing scripts
+
+set -u
+
+lang=$1
+
+$JOSHUA/scripts/training/normalize-punctuation.pl $lang | $JOSHUA/scripts/training/penn-treebank-tokenizer.perl -l $lang | $JOSHUA/scripts/lowercase.perl
diff --git a/scripts/training/run_tuner.py b/scripts/training/run_tuner.py
index 7396bd6..dd1c086 100755
--- a/scripts/training/run_tuner.py
+++ b/scripts/training/run_tuner.py
@@ -44,7 +44,7 @@
-r <REF>
-rps <NUMREFS> # references per sentence
-p <TUNEDIR>/params.txt # parameter file
--m BLEU 4 closest # evaluation metric and its options
+-m <METRIC> # evaluation metric and its options
-maxIt <ITERATIONS> # maximum MERT iterations
-ipi 20 # number of intermediate initial points per iteration
-cmd <DECODER_COMMAND> # file containing commands to run decoder
@@ -65,7 +65,7 @@
-p <TUNEDIR>/params.txt
#metric setting:
--m BLEU 4 closest
+-m <METRIC>
#-m TER nocase punc 5 5 joshua/zmert/tercom-0.7.25/tercom.7.25.jar 1
#-m TER-BLEU nocase punc 20 50 joshua/zmert/tercom-0.7.25/tercom.7.25.jar 1 4 closest
#-m METEOR en norm_yes keepPunc 2 #old meteor interface #Z-MERT Meteor interface(not working)
@@ -130,7 +130,7 @@
-p <TUNEDIR>/params.txt
#metric setting:
--m BLEU 4 closest
+-m <METRIC>
#-m TER nocase punc 5 5 joshua/zmert/tercom-0.7.25/tercom.7.25.jar 1
#-m TER-BLEU nocase punc 20 50 joshua/zmert/tercom-0.7.25/tercom.7.25.jar 1 4 closest
#-m METEOR en norm_yes keepPunc 2 #old meteor interface #Z-MERT Meteor interface(not working)
@@ -194,13 +194,93 @@
-runPercep 0
"""
+ADAGRAD_CONFIG_TEMPLATE = """### Part 1: parameters similar to Z-MERT
+# target sentences file name (in this case, file name prefix)
+-r <REF>
+
+# references per sentence
+-rps <NUMREFS>
+
+# parameter file
+-p <TUNEDIR>/params.txt
+
+#metric setting:
+-m <METRIC>
+#-m TER nocase punc 5 5 joshua/zmert/tercom-0.7.25/tercom.7.25.jar 1
+#-m TER-BLEU nocase punc 20 50 joshua/zmert/tercom-0.7.25/tercom.7.25.jar 1 4 closest
+#-m METEOR en norm_yes keepPunc 2 #old meteor interface #Z-MERT Meteor interface(not working)
+#-m Meteor en lowercase '0.5 1.0 0.5 0.5' 'exact stem synonym paraphrase' '1.0 0.5 0.5 0.5' #CMU meteor interface
+
+# maximum iterations
+-maxIt <ITERATIONS>
+
+# file containing commands to run decoder
+-cmd <DECODER_COMMAND>
+
+# file prodcued by decoder
+-decOut <DECODER_OUTPUT>
+
+# decoder config file
+-dcfg <DECODER_CONFIG>
+
+# size of N-best list
+-N 300
+
+# verbosity level (0-2; higher value => more verbose)
+-v 1
+
+### PART 2: AdaGrad parameters
+#oracle selection method:
+#1: "hope"(default)
+#2: best metric score(ex: max BLEU)
+-oracleSelection 1
+
+#prediction selection method:
+#1: "fear"(default)
+#2: max model score
+#3: worst metric score(ex: min BLEU)
+-predictionSelection 1
+
+#shuffle the training samples? (default:1)
+-needShuffle 1
+
+#average the weights after each epoch? (default:1)
+-needAvg 1
+
+#return the best weights during tuning? (default:1)
+-returnBest 1
+
+#when use BLEU/TER-BLEU as metric, use the pseudo corpus to compute BLEU? (default:1)
+-usePseudoCorpus 1
+
+#corpus decay coefficient (only valid when pseudo corpus is used for BLEU, default:0.99)
+-corpusDecay 0.99
+
+#scale the model score(in order to make it comparable to the metric score)?(default:1)
+-needScaling 1
+
+#options for scaling (only valid when -needScaling=1)
+-scoreRatio 5 #scale the model score so that abs(model_score/metric_score) \approx scoreRatio (default:5)
+
+#regularzation (0: no reg 1: l1-reg; 2: l2-reg. Default: 2)
+-regularization 2
+
+#regularization coefficient
+-lambda 0.1
+
+#step size coefficient
+-eta 0.1
+
+#mini-batch size (default: 10)
+-batchSize 10
+"""
+
PARAMS_TEMPLATE = """<PARAMS>
WordPenalty ||| -2.844814 Opt -Inf +Inf -5 0
OOVPenalty ||| 1 Fix 0 0 0 0
normalization = absval 1 lm_0
"""
-
def write_template(template, path, lookup):
"""Writes a template file, substituting variables of the form <NAME> for values found
in the 'lookup' hash.
@@ -248,16 +328,16 @@
return (owner, maxspan, path)
-def get_features(grammar_path):
- """Opens the grammar at grammar_path and returns the list of features. Works for
- both packed grammars and unpacked grammars. For packed grammars, the feature list
- is complete, but for unpacked ones, only the features found on the first line are
- returned. Dense features (unlabeled ones) are returned as sequential numbers
- starting at 0."""
+def get_features(config_file):
+ """Queries the decoder for all dense features that will be fired by the feature
+ functions activated in the config file"""
- features = check_output("%s/scripts/training/get_grammar_features.pl %s" % (JOSHUA, grammar_path), shell=True)
- return features.strip().split('\n')
-
+ output = check_output("%s/bin/joshua-decoder -c %s -show-weights -v 0" % (JOSHUA, config_file), shell=True)
+ features = []
+ for index, item in enumerate(output.split('\n')):
+ if item != "":
+ features.append(tuple(item.split()))
+ return features
def get_num_refs(prefix):
"""Determines how many references there are."""
@@ -276,13 +356,13 @@
def safe_symlink(to_path, from_path):
- if (os.path.isfile(from_path)):
+ if os.path.isfile(from_path) or os.path.islink(from_path):
os.unlink(from_path)
os.symlink(to_path, from_path)
-def setup_configs(template, template_dest, target, num_refs, tunedir, command, config, output, iterations):
+def setup_configs(template, template_dest, target, num_refs, tunedir, command, config, output, metric, iterations):
"""Writes the config files for both Z-MERT and PRO (which run on the same codebase).
Both of them write the file "params.txt", but they use different names for the config file,
so that is a parameter."""
@@ -291,37 +371,20 @@
{ 'REF': target,
'NUMREFS': num_refs,
'TUNEDIR': tunedir,
+ 'METRIC': metric,
'ITERATIONS': `iterations`,
'DECODER_COMMAND': command,
'DECODER_CONFIG': config,
'DECODER_OUTPUT': output })
- # Parse the config file, looking for tms, lms, and feature
- # functions for which we need to provide initial weights
+ # Query the decoder for the list of dense parameters. These need to be listed in the
+ # config file or MERT will not know about them, despite them being listed in params.txt.
params = []
- lm_i = 0
- for line in open(config):
- if line.startswith('tm ='):
- owner, span, path = parse_tm_line(line)
-
- if not os.path.isabs(path):
- path = os.path.join(os.path.dirname(config), path)
-
- for f in get_features(path):
- if re.match(r'^\d+$', f):
- params.append('tm_%s_%s ||| 1.0 Opt -Inf +Inf -1 +1' % (owner, f))
- else:
- params.append('%s ||| 0.0 Opt -Inf +Inf -1 +1' % (f))
-
- elif line.startswith('feature-function'):
- if 'LanguageModel' in line:
- params.append('lm_%d ||| 1.0 Opt 0.1 +Inf +0.5 +1.5' % (lm_i))
- lm_i += 1
- else:
- ff = line.strip().split(' ', 2)[2]
- if ff in ['SourcePath', 'PhrasePenalty', 'Distortion']:
- params.append('%s ||| 1.0 Opt -Inf +Inf -1 +1' % (ff))
-
+ for feature,weight in get_features(config):
+ if feature.startswith('lm_'):
+ params.append('%s ||| %s Opt 0.1 +Inf +0.5 +1.5' % (feature, weight))
+ else:
+ params.append('%s ||| %s Opt -Inf +Inf -1 +1' % (feature, weight))
paramstr = '\n'.join(params)
write_template(PARAMS_TEMPLATE, '%s/params.txt' % (tunedir),
{ 'REF': target,
@@ -331,11 +394,11 @@
def run_zmert(tunedir, source, target, command, config, output, opts):
- """Runs Z-MERT after setting up all its crazy file requirements."""
+ """Runs Z-MERT after setting up all its file requirements."""
setup_configs(ZMERT_CONFIG_TEMPLATE, '%s/mert.config' % (tunedir),
target, get_num_refs(target), tunedir, command, config, output,
- opts.iterations or 10)
+ opts.metric, opts.iterations or 10)
tuner_mem = '4g'
call("java -d64 -Xmx%s -cp %s/class joshua.zmert.ZMERT -maxMem 4000 %s/mert.config > %s/mert.log 2>&1" % (tuner_mem, JOSHUA, tunedir, tunedir), shell=True)
@@ -345,11 +408,11 @@
def run_pro(tunedir, source, target, command, config, output, opts):
- """Runs PRO after setting up all its crazy file requirements."""
+ """Runs PRO after setting up all its file requirements."""
setup_configs(PRO_CONFIG_TEMPLATE, '%s/pro.config' % (tunedir),
target, get_num_refs(target), tunedir, command, config, output,
- opts.iterations or 30)
+ opts.metric, opts.iterations or 30)
tuner_mem = '4g'
call("java -d64 -Xmx%s -cp %s/class joshua.pro.PRO %s/pro.config > %s/pro.log 2>&1" % (tuner_mem, JOSHUA, tunedir, tunedir), shell=True)
@@ -359,11 +422,11 @@
def run_mira(tunedir, source, target, command, config, output, opts):
- """Runs MIRA after setting up all its crazy file requirements."""
+ """Runs MIRA after setting up all its file requirements."""
setup_configs(MIRA_CONFIG_TEMPLATE, '%s/mira.config' % (tunedir),
target, get_num_refs(target), tunedir, command, config, output,
- opts.iterations or 5)
+ opts.metric, opts.iterations or 5)
tuner_mem = '4g'
call("java -d64 -Xmx%s -cp %s/class joshua.mira.MIRA %s/mira.config > %s/mira.log 2>&1" % (tuner_mem, JOSHUA, tunedir, tunedir), shell=True)
@@ -371,7 +434,19 @@
safe_symlink(os.path.join(os.path.dirname(config),'joshua.config.MIRA.final'),
os.path.join(tunedir, 'joshua.config.final'))
-
+def run_adagrad(tunedir, source, target, command, config, output, opts):
+ """Runs ADAGRAD after setting up all its file requirements."""
+
+ setup_configs(ADAGRAD_CONFIG_TEMPLATE, '%s/adagrad.config' % (tunedir),
+ target, get_num_refs(target), tunedir, command, config, output,
+ opts.metric, opts.iterations or 10)
+
+ tuner_mem = '4g'
+ call("java -d64 -Xmx%s -cp %s/class joshua.adagrad.AdaGrad %s/adagrad.config > %s/adagrad.log 2>&1" % (tuner_mem, JOSHUA, tunedir, tunedir), shell=True)
+
+ safe_symlink(os.path.join(os.path.dirname(config),'joshua.config.ADAGRAD.final'),
+ os.path.join(tunedir, 'joshua.config.final'))
+
def error_quit(message):
logging.error(message)
sys.exit(2)
@@ -397,7 +472,7 @@
help='path to tuning directory')
parser.add_argument(
'--tuner', default='zmert',
- help='which tuner to use: zmert, pro, or mira')
+ help='which tuner to use: zmert, pro, mira, or adagrad')
parser.add_argument(
'--decoder', default='tune/decoder_command',
help='The path to the decoder or wrapper script. This script is responsible for '
@@ -420,6 +495,9 @@
'-i', '--iterations', type=int,
help='the maximum number of iterations to run the tuner for')
parser.add_argument(
+ '-m', '--metric', default='BLEU 4 closest',
+ help='the metric to optimize')
+ parser.add_argument(
'-v', '--verbose', action='store_true',
help='print informational messages'
)
@@ -447,6 +525,9 @@
elif 'mira' in opts.tuner:
run_mira(opts.tunedir, opts.source, opts.target, opts.decoder, opts.decoder_config, opts.decoder_output_file, opts)
+ elif 'adagrad' in opts.tuner:
+ run_adagrad(opts.tunedir, opts.source, opts.target, opts.decoder, opts.decoder_config, opts.decoder_output_file, opts)
+
if __name__ == "__main__":
try:
assert JOSHUA
diff --git a/scripts/training/summarize.pl b/scripts/training/summarize.pl
index 3b44e09..f7ff6fe 100755
--- a/scripts/training/summarize.pl
+++ b/scripts/training/summarize.pl
@@ -3,24 +3,28 @@
use strict;
use warnings;
-opendir DIR, "." or die;
+my $basedir = shift(@ARGV) || ".";
+
+opendir DIR, $basedir or die;
my @dirs = sort { $a <=> $b } grep /^\d[\d\.]*$/, readdir DIR;
closedir DIR;
foreach my $dir (@dirs) {
- chomp(my $readme = `cat $dir/README`);
- my $bleu = get_bleu("$dir/test/final-bleu");
- my $time = get_time("$dir/test/final-times");
+ chomp(my $readme = `[[ -e "$basedir/$dir/README" ]] && cat $basedir/$dir/README`);
+ my $bleu = get_scores("$basedir/$dir/test/final-bleu", 100.0);
+ my $meteor = get_scores("$basedir/$dir/test/final-meteor", 100.0);
+ my $time = get_time("$basedir/$dir/test/final-times");
# my $mbr = get_bleu("$dir/test/final-bleu-mbr");
my $dirstring = dirstring($dir);
# print "$dirstring\t$bleu\t$mbr\t$readme\n";
- print "$dirstring\t$bleu\t$time\t$readme\n";
+ print "$dirstring\t$bleu\t$meteor\t$time\t$readme\n";
}
-sub get_bleu {
- my ($file) = @_;
+sub get_scores {
+ my ($file, $factor) = @_;
+ $factor = 1.0 unless $factor;
my $score = 0.0;
my $num_scores = 0;
@@ -32,9 +36,11 @@
$num_scores++ if $token eq "+";
}
- $score = $tokens[-1] * 100;
+ $score = $tokens[-1] * $factor;
}
+ $score /= $factor if $score > $factor;
+
return sprintf("%5.2f", $score) . "($num_scores)";
}
diff --git a/scripts/training/templates/thrax-phrase.conf b/scripts/training/templates/thrax-phrase.conf
new file mode 100644
index 0000000..630c76f
--- /dev/null
+++ b/scripts/training/templates/thrax-phrase.conf
@@ -0,0 +1,80 @@
+# this is an example Thrax configuration file
+# <- this symbol indicates a comment
+# each line should be a key-value pair separated by whitespace
+
+###
+### GRAMMAR OPTIONS
+###
+
+grammar hiero # or samt
+reverse false
+source-is-parsed false
+target-is-parsed false
+# default-nt X # X is the default anyway
+
+min-rule-count 1
+
+# the number of reducers
+reducers 16
+
+# not only do these next six options have the suggested values as given
+# in Chiang's "Hierarchical Phrase-based Translation" (CL), they are also
+# Thrax's default values! You could comment them out and the resulting grammar
+# would be identical.
+
+# maximum length of initial phrase pairs
+initial-phrase-length 10
+lex-source-words 10
+lex-target-words 10
+
+# maximum number of NTs in a rule
+arity 0
+
+# minimum number of aligned terminals in a rule
+lexicality 1
+
+# allow adjacent nonterminals on source side
+adjacent-nts false
+
+# allow unaligned words at boundaries of phrases
+loose true
+
+allow-abstract-rules false
+allow-nonlexical-x false
+allow-full-sentence-rules false
+
+nonlex-source-length 5
+nonlex-target-length 5
+nonlex-source-words 5
+nonlex-target-words 5
+
+allow-double-plus false
+
+rule-span-limit 12
+
+phrase-penalty 2.718
+
+# a whitespace seperated list of features
+# in this example, the features are phrase translation probability,
+# lexical probability, and phrase penalty
+# features phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
+features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+
+# the only option and default later we will want to add formats for other decoders such as moses and
+# cdec, if they use other formats
+output-format joshua
+
+# label feature scores? each score will be output as name=score
+label-feature-scores false
+
+amazon-work s3://edu.jhu.cs.jonny/wmt11/fr-en/hiero
+amazon-jar s3://edu.jhu.cs.jonny/thrax.jar
+amazon-num-instances 15
+
+max-split-size 8388608
+
+# the format should be:
+# foreign sentence ||| english sentence ||| alignment
+# where the english is either parsed or not depending on whether you want
+# SAMT or you want Hiero.
+#input-file s3://edu.jhu.cs.jonny/wmt11/corpus.fr-en
diff --git a/scripts/training/giza-pp/GIZA++-v2/ATables.cpp b/src/giza-pp/GIZA++-v2/ATables.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/ATables.cpp
rename to src/giza-pp/GIZA++-v2/ATables.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/ATables.h b/src/giza-pp/GIZA++-v2/ATables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/ATables.h
rename to src/giza-pp/GIZA++-v2/ATables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/AlignTables.cpp b/src/giza-pp/GIZA++-v2/AlignTables.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/AlignTables.cpp
rename to src/giza-pp/GIZA++-v2/AlignTables.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/AlignTables.h b/src/giza-pp/GIZA++-v2/AlignTables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/AlignTables.h
rename to src/giza-pp/GIZA++-v2/AlignTables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Array.h b/src/giza-pp/GIZA++-v2/Array.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Array.h
rename to src/giza-pp/GIZA++-v2/Array.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Array2.h b/src/giza-pp/GIZA++-v2/Array2.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Array2.h
rename to src/giza-pp/GIZA++-v2/Array2.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Array4.h b/src/giza-pp/GIZA++-v2/Array4.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Array4.h
rename to src/giza-pp/GIZA++-v2/Array4.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/D4Tables.h b/src/giza-pp/GIZA++-v2/D4Tables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/D4Tables.h
rename to src/giza-pp/GIZA++-v2/D4Tables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/D5Tables.h b/src/giza-pp/GIZA++-v2/D5Tables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/D5Tables.h
rename to src/giza-pp/GIZA++-v2/D5Tables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Dictionary.cpp b/src/giza-pp/GIZA++-v2/Dictionary.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Dictionary.cpp
rename to src/giza-pp/GIZA++-v2/Dictionary.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/Dictionary.h b/src/giza-pp/GIZA++-v2/Dictionary.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Dictionary.h
rename to src/giza-pp/GIZA++-v2/Dictionary.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/FlexArray.h b/src/giza-pp/GIZA++-v2/FlexArray.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/FlexArray.h
rename to src/giza-pp/GIZA++-v2/FlexArray.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/ForwardBackward.cpp b/src/giza-pp/GIZA++-v2/ForwardBackward.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/ForwardBackward.cpp
rename to src/giza-pp/GIZA++-v2/ForwardBackward.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/ForwardBackward.h b/src/giza-pp/GIZA++-v2/ForwardBackward.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/ForwardBackward.h
rename to src/giza-pp/GIZA++-v2/ForwardBackward.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/GNU.GPL b/src/giza-pp/GIZA++-v2/GNU.GPL
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/GNU.GPL
rename to src/giza-pp/GIZA++-v2/GNU.GPL
diff --git a/scripts/training/giza-pp/GIZA++-v2/Globals.h b/src/giza-pp/GIZA++-v2/Globals.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Globals.h
rename to src/giza-pp/GIZA++-v2/Globals.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/HMMTables.cpp b/src/giza-pp/GIZA++-v2/HMMTables.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/HMMTables.cpp
rename to src/giza-pp/GIZA++-v2/HMMTables.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/HMMTables.h b/src/giza-pp/GIZA++-v2/HMMTables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/HMMTables.h
rename to src/giza-pp/GIZA++-v2/HMMTables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/LICENSE b/src/giza-pp/GIZA++-v2/LICENSE
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/LICENSE
rename to src/giza-pp/GIZA++-v2/LICENSE
diff --git a/scripts/training/giza-pp/GIZA++-v2/Makefile b/src/giza-pp/GIZA++-v2/Makefile
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Makefile
rename to src/giza-pp/GIZA++-v2/Makefile
diff --git a/scripts/training/giza-pp/GIZA++-v2/Makefile.definitions b/src/giza-pp/GIZA++-v2/Makefile.definitions
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Makefile.definitions
rename to src/giza-pp/GIZA++-v2/Makefile.definitions
diff --git a/scripts/training/giza-pp/GIZA++-v2/Makefile.src b/src/giza-pp/GIZA++-v2/Makefile.src
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Makefile.src
rename to src/giza-pp/GIZA++-v2/Makefile.src
diff --git a/scripts/training/giza-pp/GIZA++-v2/MoveSwapMatrix.cpp b/src/giza-pp/GIZA++-v2/MoveSwapMatrix.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/MoveSwapMatrix.cpp
rename to src/giza-pp/GIZA++-v2/MoveSwapMatrix.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/MoveSwapMatrix.h b/src/giza-pp/GIZA++-v2/MoveSwapMatrix.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/MoveSwapMatrix.h
rename to src/giza-pp/GIZA++-v2/MoveSwapMatrix.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/NTables.cpp b/src/giza-pp/GIZA++-v2/NTables.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/NTables.cpp
rename to src/giza-pp/GIZA++-v2/NTables.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/NTables.h b/src/giza-pp/GIZA++-v2/NTables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/NTables.h
rename to src/giza-pp/GIZA++-v2/NTables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Parameter.cpp b/src/giza-pp/GIZA++-v2/Parameter.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Parameter.cpp
rename to src/giza-pp/GIZA++-v2/Parameter.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/Parameter.h b/src/giza-pp/GIZA++-v2/Parameter.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Parameter.h
rename to src/giza-pp/GIZA++-v2/Parameter.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Perplexity.cpp b/src/giza-pp/GIZA++-v2/Perplexity.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Perplexity.cpp
rename to src/giza-pp/GIZA++-v2/Perplexity.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/Perplexity.h b/src/giza-pp/GIZA++-v2/Perplexity.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Perplexity.h
rename to src/giza-pp/GIZA++-v2/Perplexity.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Pointer.h b/src/giza-pp/GIZA++-v2/Pointer.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Pointer.h
rename to src/giza-pp/GIZA++-v2/Pointer.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/README b/src/giza-pp/GIZA++-v2/README
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/README
rename to src/giza-pp/GIZA++-v2/README
diff --git a/scripts/training/giza-pp/GIZA++-v2/TTables.cpp b/src/giza-pp/GIZA++-v2/TTables.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/TTables.cpp
rename to src/giza-pp/GIZA++-v2/TTables.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/TTables.h b/src/giza-pp/GIZA++-v2/TTables.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/TTables.h
rename to src/giza-pp/GIZA++-v2/TTables.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/Vector.h b/src/giza-pp/GIZA++-v2/Vector.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/Vector.h
rename to src/giza-pp/GIZA++-v2/Vector.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/WordClasses.h b/src/giza-pp/GIZA++-v2/WordClasses.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/WordClasses.h
rename to src/giza-pp/GIZA++-v2/WordClasses.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/alignment.cpp b/src/giza-pp/GIZA++-v2/alignment.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/alignment.cpp
rename to src/giza-pp/GIZA++-v2/alignment.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/alignment.h b/src/giza-pp/GIZA++-v2/alignment.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/alignment.h
rename to src/giza-pp/GIZA++-v2/alignment.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/collCounts.cpp b/src/giza-pp/GIZA++-v2/collCounts.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/collCounts.cpp
rename to src/giza-pp/GIZA++-v2/collCounts.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/collCounts.h b/src/giza-pp/GIZA++-v2/collCounts.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/collCounts.h
rename to src/giza-pp/GIZA++-v2/collCounts.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/defs.h b/src/giza-pp/GIZA++-v2/defs.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/defs.h
rename to src/giza-pp/GIZA++-v2/defs.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/dependencies b/src/giza-pp/GIZA++-v2/dependencies
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/dependencies
rename to src/giza-pp/GIZA++-v2/dependencies
diff --git a/scripts/training/giza-pp/GIZA++-v2/file_spec.h b/src/giza-pp/GIZA++-v2/file_spec.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/file_spec.h
rename to src/giza-pp/GIZA++-v2/file_spec.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/getSentence.cpp b/src/giza-pp/GIZA++-v2/getSentence.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/getSentence.cpp
rename to src/giza-pp/GIZA++-v2/getSentence.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/getSentence.h b/src/giza-pp/GIZA++-v2/getSentence.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/getSentence.h
rename to src/giza-pp/GIZA++-v2/getSentence.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/hmm.cpp b/src/giza-pp/GIZA++-v2/hmm.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/hmm.cpp
rename to src/giza-pp/GIZA++-v2/hmm.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/hmm.h b/src/giza-pp/GIZA++-v2/hmm.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/hmm.h
rename to src/giza-pp/GIZA++-v2/hmm.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/logprob.cpp b/src/giza-pp/GIZA++-v2/logprob.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/logprob.cpp
rename to src/giza-pp/GIZA++-v2/logprob.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/logprob.h b/src/giza-pp/GIZA++-v2/logprob.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/logprob.h
rename to src/giza-pp/GIZA++-v2/logprob.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/main.cpp b/src/giza-pp/GIZA++-v2/main.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/main.cpp
rename to src/giza-pp/GIZA++-v2/main.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model1.cpp b/src/giza-pp/GIZA++-v2/model1.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model1.cpp
rename to src/giza-pp/GIZA++-v2/model1.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model1.h b/src/giza-pp/GIZA++-v2/model1.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model1.h
rename to src/giza-pp/GIZA++-v2/model1.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/model2.cpp b/src/giza-pp/GIZA++-v2/model2.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model2.cpp
rename to src/giza-pp/GIZA++-v2/model2.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model2.h b/src/giza-pp/GIZA++-v2/model2.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model2.h
rename to src/giza-pp/GIZA++-v2/model2.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/model2to3.cpp b/src/giza-pp/GIZA++-v2/model2to3.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model2to3.cpp
rename to src/giza-pp/GIZA++-v2/model2to3.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model3.cpp b/src/giza-pp/GIZA++-v2/model3.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model3.cpp
rename to src/giza-pp/GIZA++-v2/model3.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model3.h b/src/giza-pp/GIZA++-v2/model3.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model3.h
rename to src/giza-pp/GIZA++-v2/model3.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/model345-peg.cpp b/src/giza-pp/GIZA++-v2/model345-peg.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model345-peg.cpp
rename to src/giza-pp/GIZA++-v2/model345-peg.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model3_viterbi.cpp b/src/giza-pp/GIZA++-v2/model3_viterbi.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model3_viterbi.cpp
rename to src/giza-pp/GIZA++-v2/model3_viterbi.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp b/src/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp
rename to src/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/myassert.cpp b/src/giza-pp/GIZA++-v2/myassert.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/myassert.cpp
rename to src/giza-pp/GIZA++-v2/myassert.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/myassert.h b/src/giza-pp/GIZA++-v2/myassert.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/myassert.h
rename to src/giza-pp/GIZA++-v2/myassert.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/mymath.h b/src/giza-pp/GIZA++-v2/mymath.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/mymath.h
rename to src/giza-pp/GIZA++-v2/mymath.h
diff --git a/src/giza-pp/GIZA++-v2/mystl.h b/src/giza-pp/GIZA++-v2/mystl.h
new file mode 100644
index 0000000..2046e11
--- /dev/null
+++ b/src/giza-pp/GIZA++-v2/mystl.h
@@ -0,0 +1,321 @@
+/* ---------------------------------------------------------------- */
+/* Copyright 1998 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
+/* Franz Josef Och */
+/* ---------------------------------------------------------------- */
+#ifndef MY_STL_H_DEFINED
+#define MY_STL_H_DEFINED
+
+#include <string>
+using namespace std;
+#ifdef USE_STLPORT
+#ifdef __STL_DEBUG
+using namespace _STLD;
+#else
+using namespace _STL;
+#endif
+#endif
+
+#include "myassert.h"
+#include <string>
+#include <utility>
+
+#include <unordered_map>
+#define hash_map unordered_map
+
+#include <vector>
+#include <iostream>
+#include "mymath.h"
+#include "Array2.h"
+
+#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
+#define over_array(a,i) for(i=(a).low();i<=(a).high();i++)
+#define backwards_array(a,i) for(i=(a).high();i>=(a).low();i--)
+#define over_arr(a,i) for(int i=(a).low();i<=(a).high();i++)
+#define over_arrMAX(a,i,max) for(int i=(a).low();i<=min((a).high(),max-1);i++)
+#define backwards_arr(a,i) for(int i=(a).high();i>=(a).low();i--)
+
+extern double n1mult,n2mult,n3mult;
+
+inline double realProb(int n1,int n2)
+{
+ massert(n1<=n2);
+ iassert(n1>=0&&n2>0);
+ if(n2==0)n2=1;
+ return ((double)n1)/(double)n2;
+}
+
+inline double verfProb(int n1,int n2)
+{
+ double prob = realProb(n1,n2);
+ if( n1==1 )return prob*n1mult;
+ else if( n1==2 )return prob*n2mult;
+ else if( n1==3 )return prob*n3mult;
+ else
+ return prob;
+}
+
+inline bool prefix(const string&x,const string&y)
+{
+ if(y.size()>x.size() )
+ return 0;
+ for(unsigned int i=0;i<y.size();++i)
+ if( y[i]!=x[i] )
+ return 0;
+ return 1;
+}
+
+
+/*template<class T>
+int lev(const T&s1,const T&s2)
+{
+ Array2<int,vector<int> > a(s1.size()+1,s2.size()+1,1000);
+ Array2<pair<int,int>,vector<pair<int,int> > > back(s1.size()+1,s2.size()+1,pair<int,int>(0,0));
+ for(unsigned int i=0;i<=s1.size();i++)
+ for(unsigned int j=0;j<=s2.size();j++)
+ {
+ if( i==0&&j==0 )
+ a(i,j)=0;
+ else
+ {
+ int aDEL=100,aINS=100,aSUB=100;
+ if(i>0)
+ aDEL=a(i-1,j)+1;
+ if(j>0)
+ aINS=a(i,j-1)+1;
+ if(i>0&&j>0)
+ aSUB=a(i-1,j-1)+ !(s1[i-1]==s2[j-1]);
+ if( aSUB<=aDEL && aSUB<=aINS )
+ {
+ a(i,j)=aSUB;
+ back(i,j)=pair<int,int>(i-1,j-1);
+ }
+ else if( aDEL<=aSUB && aDEL<=aINS )
+ {
+ a(i,j)=aDEL;
+ back(i,j)=pair<int,int>(i-1,j);
+ }
+ else
+ {
+ a(i,j)=aINS;
+ back(i,j)=pair<int,int>(i,j-1);
+ }
+ }
+ }
+ return a(s1.size(),s2.size());
+}
+
+template<class T>
+float rel_lev(const T&s1,const T&s2)
+{
+ if( s1.size()==0 )
+ return s2.size()==0;
+ else
+ return min(1.0,lev(s1,s2)/(double)s1.size());
+}*/
+
+template<class V> int Hash(const pair<V,V>&a)
+{ return Hash(a.first)+13001*Hash(a.second); }
+
+template<class T1,class T2>
+ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
+{
+ out << "(" << ir.first << "," << ir.second << ")";
+ return out;
+}
+
+inline int Hash(const string& s)
+{
+ int sum=0;
+ string::const_iterator i=s.begin(),end=s.end();
+ for(;i!=end;i++)sum=5*sum+(*i);
+ return sum;
+}
+template<class A,class B,class C>
+class tri
+{
+public:
+ A a;
+ B b;
+ C c;
+ tri(){};
+ tri(const A&_a,const B&_b,const C&_c)
+ : a(_a),b(_b),c(_c) {}
+};
+template<class A,class B,class C>
+bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
+{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
+
+template<class A,class B,class C>
+bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
+{
+ if(x.a<y.a)return 1;
+ if(y.a<x.a)return 0;
+ if(x.b<y.b)return 1;
+ if(y.b<x.b)return 0;
+ if(x.c<y.c)return 1;
+ if(y.c<x.c)return 0;
+ return 0;
+}
+
+double used_time();
+
+template<class T>
+class my_hash
+{
+public:
+ int operator()(const T&t)const {return Hash(t);}
+};
+
+inline int Hash(int value) { return value; }
+#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
+
+template<class A,class B>
+class leda_h_array : public MY_HASH_BASE
+{
+private:
+ B init;
+public:
+ leda_h_array() : MY_HASH_BASE() {}
+ leda_h_array(const B&_init)
+ : MY_HASH_BASE(),init(_init) {}
+ bool defined(const A&a) const
+ { return find(a)!=this->end(); }
+ const B&operator[](const A&a)const
+ {
+ typename MY_HASH_BASE::const_iterator pos=find(a);
+ if( pos==this->end() )
+ return init;
+ else
+ return pos->second;
+ }
+ B&operator[](const A&a)
+ {
+ typename MY_HASH_BASE::iterator pos=find(a);
+ if( pos==this->end() )
+ {
+ insert(MY_HASH_BASE::value_type(a,init));
+ pos=find(a);
+ iassert(pos!=this->end());
+ }
+ return pos->second;
+ }
+ const B&initValue()const
+ {return init;}
+};
+
+#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
+template<class T,class U>
+ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
+{
+ T t;
+ bool makeNl=0;
+ out << "h_array{";
+ forall_defined_h(T,U,t,w)
+ {
+ if( makeNl )
+ out << "\n ";
+ out << "EL:" << t << " INH:" << w[t] << ".";
+ makeNl=1;
+ }
+ return out << "}\n";
+}
+
+template<class T,class U>
+istream & operator>>(istream&in,leda_h_array<T,U>&)
+{
+ return in;
+}
+
+template<class A,class B>
+bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
+{
+ A v;
+ forall_defined_h(A,B,v,p1)
+ if( !( p1[v]==p2[v]) ) return 0;
+ forall_defined_h(A,B,v,p2)
+ if( !( p1[v]==p2[v]) ) return 0;
+ return 1;
+}
+
+template<class T>
+int count_elements(T a,T b)
+{
+ int c=0;
+ while(a!=b)
+ {
+ a++;
+ c++;
+ }
+ return c;
+}
+
+template<class T>
+T normalize_if_possible_with_increment(T*a,T*b,int increment)
+{
+ T sum=0;
+ for(T*i=a;i!=b;i+=increment)
+ sum+=*i;
+ if( sum )
+ for(T*i=a;i!=b;i+=increment)
+ *i/=sum;
+ else
+ {
+ T factor=increment/(b-a);
+ for(T*i=a;i!=b;i+=increment)
+ *i=factor;
+ }
+ return sum;
+}
+
+template<class T>
+inline int m_comp_3way(T a,T b,int n)
+{
+ int _n=0;
+ while((_n++<n) && a && b)
+ {
+ const typename T::value_type &aa=*a;
+ const typename T::value_type &bb=*b;
+ if( aa<bb )return 1;
+ if( bb<aa )return -1;
+ ++a;
+ ++b;
+ }
+ return 0;
+}
+
+template<class T>
+void smooth_standard(T*a,T*b,double p)
+{
+ int n=b-a;
+ if( n==0 )
+ return;
+ double pp=p/n;
+ for(T*i=a;i!=b;++i)
+ *i = (1.0-p)*(*i)+pp;
+}
+
+template<class T>
+const T *conv(typename vector<T>::const_iterator i)
+{
+ return &(*i);
+}
+#if __GNUC__>2
+template<class T>
+T *conv(typename vector<T>::iterator i)
+{
+ return &(*i);
+}
+#endif
+
+/*template<class T>
+const T *conv(const T*x)
+{
+ return x;
+}*/
+template<class T>
+T *conv(T*x)
+{
+ return x;
+}
+
+#endif
diff --git a/scripts/training/giza-pp/GIZA++-v2/parse.cpp b/src/giza-pp/GIZA++-v2/parse.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/parse.cpp
rename to src/giza-pp/GIZA++-v2/parse.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/plain2snt.cpp b/src/giza-pp/GIZA++-v2/plain2snt.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/plain2snt.cpp
rename to src/giza-pp/GIZA++-v2/plain2snt.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/reports.cpp b/src/giza-pp/GIZA++-v2/reports.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/reports.cpp
rename to src/giza-pp/GIZA++-v2/reports.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/small_snt2cooc.cpp b/src/giza-pp/GIZA++-v2/small_snt2cooc.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/small_snt2cooc.cpp
rename to src/giza-pp/GIZA++-v2/small_snt2cooc.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/snt2cooc.cpp b/src/giza-pp/GIZA++-v2/snt2cooc.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/snt2cooc.cpp
rename to src/giza-pp/GIZA++-v2/snt2cooc.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/snt2plain.cpp b/src/giza-pp/GIZA++-v2/snt2plain.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/snt2plain.cpp
rename to src/giza-pp/GIZA++-v2/snt2plain.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/trainGIZA++.sh b/src/giza-pp/GIZA++-v2/trainGIZA++.sh
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/trainGIZA++.sh
rename to src/giza-pp/GIZA++-v2/trainGIZA++.sh
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model1.h b/src/giza-pp/GIZA++-v2/transpair_model1.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model1.h
rename to src/giza-pp/GIZA++-v2/transpair_model1.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model2.h b/src/giza-pp/GIZA++-v2/transpair_model2.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model2.h
rename to src/giza-pp/GIZA++-v2/transpair_model2.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model3.cpp b/src/giza-pp/GIZA++-v2/transpair_model3.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model3.cpp
rename to src/giza-pp/GIZA++-v2/transpair_model3.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model3.h b/src/giza-pp/GIZA++-v2/transpair_model3.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model3.h
rename to src/giza-pp/GIZA++-v2/transpair_model3.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model4.cpp b/src/giza-pp/GIZA++-v2/transpair_model4.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model4.cpp
rename to src/giza-pp/GIZA++-v2/transpair_model4.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model4.h b/src/giza-pp/GIZA++-v2/transpair_model4.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model4.h
rename to src/giza-pp/GIZA++-v2/transpair_model4.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model5.cpp b/src/giza-pp/GIZA++-v2/transpair_model5.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model5.cpp
rename to src/giza-pp/GIZA++-v2/transpair_model5.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_model5.h b/src/giza-pp/GIZA++-v2/transpair_model5.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_model5.h
rename to src/giza-pp/GIZA++-v2/transpair_model5.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/transpair_modelhmm.h b/src/giza-pp/GIZA++-v2/transpair_modelhmm.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/transpair_modelhmm.h
rename to src/giza-pp/GIZA++-v2/transpair_modelhmm.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/utility.cpp b/src/giza-pp/GIZA++-v2/utility.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/utility.cpp
rename to src/giza-pp/GIZA++-v2/utility.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/utility.h b/src/giza-pp/GIZA++-v2/utility.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/utility.h
rename to src/giza-pp/GIZA++-v2/utility.h
diff --git a/scripts/training/giza-pp/GIZA++-v2/vocab.cpp b/src/giza-pp/GIZA++-v2/vocab.cpp
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/vocab.cpp
rename to src/giza-pp/GIZA++-v2/vocab.cpp
diff --git a/scripts/training/giza-pp/GIZA++-v2/vocab.h b/src/giza-pp/GIZA++-v2/vocab.h
similarity index 100%
rename from scripts/training/giza-pp/GIZA++-v2/vocab.h
rename to src/giza-pp/GIZA++-v2/vocab.h
diff --git a/scripts/training/giza-pp/Makefile b/src/giza-pp/Makefile
similarity index 100%
rename from scripts/training/giza-pp/Makefile
rename to src/giza-pp/Makefile
diff --git a/scripts/training/giza-pp/README b/src/giza-pp/README
similarity index 100%
rename from scripts/training/giza-pp/README
rename to src/giza-pp/README
diff --git a/scripts/training/giza-pp/mkcls-v2/Array.h b/src/giza-pp/mkcls-v2/Array.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/Array.h
rename to src/giza-pp/mkcls-v2/Array.h
diff --git a/scripts/training/giza-pp/mkcls-v2/FixedArray.h b/src/giza-pp/mkcls-v2/FixedArray.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/FixedArray.h
rename to src/giza-pp/mkcls-v2/FixedArray.h
diff --git a/scripts/training/giza-pp/mkcls-v2/FlexArray.h b/src/giza-pp/mkcls-v2/FlexArray.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/FlexArray.h
rename to src/giza-pp/mkcls-v2/FlexArray.h
diff --git a/scripts/training/giza-pp/mkcls-v2/GDAOptimization.cpp b/src/giza-pp/mkcls-v2/GDAOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/GDAOptimization.cpp
rename to src/giza-pp/mkcls-v2/GDAOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/GDAOptimization.h b/src/giza-pp/mkcls-v2/GDAOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/GDAOptimization.h
rename to src/giza-pp/mkcls-v2/GDAOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/GNU.GPL b/src/giza-pp/mkcls-v2/GNU.GPL
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/GNU.GPL
rename to src/giza-pp/mkcls-v2/GNU.GPL
diff --git a/scripts/training/giza-pp/mkcls-v2/HCOptimization.cpp b/src/giza-pp/mkcls-v2/HCOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/HCOptimization.cpp
rename to src/giza-pp/mkcls-v2/HCOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/HCOptimization.h b/src/giza-pp/mkcls-v2/HCOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/HCOptimization.h
rename to src/giza-pp/mkcls-v2/HCOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/IterOptimization.cpp b/src/giza-pp/mkcls-v2/IterOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/IterOptimization.cpp
rename to src/giza-pp/mkcls-v2/IterOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/IterOptimization.h b/src/giza-pp/mkcls-v2/IterOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/IterOptimization.h
rename to src/giza-pp/mkcls-v2/IterOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblem.cpp b/src/giza-pp/mkcls-v2/KategProblem.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblem.cpp
rename to src/giza-pp/mkcls-v2/KategProblem.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblem.h b/src/giza-pp/mkcls-v2/KategProblem.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblem.h
rename to src/giza-pp/mkcls-v2/KategProblem.h
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblemKBC.cpp b/src/giza-pp/mkcls-v2/KategProblemKBC.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblemKBC.cpp
rename to src/giza-pp/mkcls-v2/KategProblemKBC.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblemKBC.h b/src/giza-pp/mkcls-v2/KategProblemKBC.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblemKBC.h
rename to src/giza-pp/mkcls-v2/KategProblemKBC.h
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblemTest.cpp b/src/giza-pp/mkcls-v2/KategProblemTest.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblemTest.cpp
rename to src/giza-pp/mkcls-v2/KategProblemTest.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblemTest.h b/src/giza-pp/mkcls-v2/KategProblemTest.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblemTest.h
rename to src/giza-pp/mkcls-v2/KategProblemTest.h
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblemWBC.cpp b/src/giza-pp/mkcls-v2/KategProblemWBC.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblemWBC.cpp
rename to src/giza-pp/mkcls-v2/KategProblemWBC.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/KategProblemWBC.h b/src/giza-pp/mkcls-v2/KategProblemWBC.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/KategProblemWBC.h
rename to src/giza-pp/mkcls-v2/KategProblemWBC.h
diff --git a/scripts/training/giza-pp/mkcls-v2/LICENSE b/src/giza-pp/mkcls-v2/LICENSE
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/LICENSE
rename to src/giza-pp/mkcls-v2/LICENSE
diff --git a/scripts/training/giza-pp/mkcls-v2/MSBOptimization.cpp b/src/giza-pp/mkcls-v2/MSBOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/MSBOptimization.cpp
rename to src/giza-pp/mkcls-v2/MSBOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/MSBOptimization.h b/src/giza-pp/mkcls-v2/MSBOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/MSBOptimization.h
rename to src/giza-pp/mkcls-v2/MSBOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/MYOptimization.cpp b/src/giza-pp/mkcls-v2/MYOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/MYOptimization.cpp
rename to src/giza-pp/mkcls-v2/MYOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/MYOptimization.h b/src/giza-pp/mkcls-v2/MYOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/MYOptimization.h
rename to src/giza-pp/mkcls-v2/MYOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/Makefile b/src/giza-pp/mkcls-v2/Makefile
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/Makefile
rename to src/giza-pp/mkcls-v2/Makefile
diff --git a/scripts/training/giza-pp/mkcls-v2/Optimization.cpp b/src/giza-pp/mkcls-v2/Optimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/Optimization.cpp
rename to src/giza-pp/mkcls-v2/Optimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/Optimization.h b/src/giza-pp/mkcls-v2/Optimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/Optimization.h
rename to src/giza-pp/mkcls-v2/Optimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/PopOptimization.cpp b/src/giza-pp/mkcls-v2/PopOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/PopOptimization.cpp
rename to src/giza-pp/mkcls-v2/PopOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/PopOptimization.h b/src/giza-pp/mkcls-v2/PopOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/PopOptimization.h
rename to src/giza-pp/mkcls-v2/PopOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/Problem.cpp b/src/giza-pp/mkcls-v2/Problem.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/Problem.cpp
rename to src/giza-pp/mkcls-v2/Problem.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/Problem.h b/src/giza-pp/mkcls-v2/Problem.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/Problem.h
rename to src/giza-pp/mkcls-v2/Problem.h
diff --git a/scripts/training/giza-pp/mkcls-v2/ProblemTest.cpp b/src/giza-pp/mkcls-v2/ProblemTest.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/ProblemTest.cpp
rename to src/giza-pp/mkcls-v2/ProblemTest.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/ProblemTest.h b/src/giza-pp/mkcls-v2/ProblemTest.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/ProblemTest.h
rename to src/giza-pp/mkcls-v2/ProblemTest.h
diff --git a/scripts/training/giza-pp/mkcls-v2/README b/src/giza-pp/mkcls-v2/README
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/README
rename to src/giza-pp/mkcls-v2/README
diff --git a/scripts/training/giza-pp/mkcls-v2/RRTOptimization.cpp b/src/giza-pp/mkcls-v2/RRTOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/RRTOptimization.cpp
rename to src/giza-pp/mkcls-v2/RRTOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/RRTOptimization.h b/src/giza-pp/mkcls-v2/RRTOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/RRTOptimization.h
rename to src/giza-pp/mkcls-v2/RRTOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/SAOptimization.cpp b/src/giza-pp/mkcls-v2/SAOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/SAOptimization.cpp
rename to src/giza-pp/mkcls-v2/SAOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/SAOptimization.h b/src/giza-pp/mkcls-v2/SAOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/SAOptimization.h
rename to src/giza-pp/mkcls-v2/SAOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/StatVar.cpp b/src/giza-pp/mkcls-v2/StatVar.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/StatVar.cpp
rename to src/giza-pp/mkcls-v2/StatVar.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/StatVar.h b/src/giza-pp/mkcls-v2/StatVar.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/StatVar.h
rename to src/giza-pp/mkcls-v2/StatVar.h
diff --git a/scripts/training/giza-pp/mkcls-v2/TAOptimization.cpp b/src/giza-pp/mkcls-v2/TAOptimization.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/TAOptimization.cpp
rename to src/giza-pp/mkcls-v2/TAOptimization.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/TAOptimization.h b/src/giza-pp/mkcls-v2/TAOptimization.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/TAOptimization.h
rename to src/giza-pp/mkcls-v2/TAOptimization.h
diff --git a/scripts/training/giza-pp/mkcls-v2/general.cpp b/src/giza-pp/mkcls-v2/general.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/general.cpp
rename to src/giza-pp/mkcls-v2/general.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/general.h b/src/giza-pp/mkcls-v2/general.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/general.h
rename to src/giza-pp/mkcls-v2/general.h
diff --git a/scripts/training/giza-pp/mkcls-v2/makePackage.sh b/src/giza-pp/mkcls-v2/makePackage.sh
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/makePackage.sh
rename to src/giza-pp/mkcls-v2/makePackage.sh
diff --git a/scripts/training/giza-pp/mkcls-v2/mkcls.cpp b/src/giza-pp/mkcls-v2/mkcls.cpp
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/mkcls.cpp
rename to src/giza-pp/mkcls-v2/mkcls.cpp
diff --git a/scripts/training/giza-pp/mkcls-v2/my.h b/src/giza-pp/mkcls-v2/my.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/my.h
rename to src/giza-pp/mkcls-v2/my.h
diff --git a/scripts/training/giza-pp/mkcls-v2/myassert.h b/src/giza-pp/mkcls-v2/myassert.h
similarity index 100%
rename from scripts/training/giza-pp/mkcls-v2/myassert.h
rename to src/giza-pp/mkcls-v2/myassert.h
diff --git a/src/giza-pp/mkcls-v2/myleda.h b/src/giza-pp/mkcls-v2/myleda.h
new file mode 100644
index 0000000..adf3845
--- /dev/null
+++ b/src/giza-pp/mkcls-v2/myleda.h
@@ -0,0 +1,232 @@
+/*
+
+Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
+
+mkcls - a program for making word classes .
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+
+
+
+#ifndef myleda_HEADER_defined
+#define myleda_HEADER_defined
+#include <map>
+#include <set>
+#include <unordered_map>
+#include "myassert.h"
+#include "FixedArray.h"
+using namespace std;
+
+template<class T>
+class leda_array : public FixedArray<T>
+{
+public:
+ leda_array() {}
+ leda_array(int n) : FixedArray<T>(n) {}
+};
+
+template<class T>
+class leda_set : public set<T>
+{
+public:
+ bool member(const T&m) const
+ { return this->count(m)!=0; }
+ void del(const T&m)
+ { this->erase(m); }
+};
+#define forall_set(a,b,c) for(a::iterator __i__=c.begin();__i__!=c.end()&&((b=*__i__),1);++__i__)
+template<class T>
+leda_set<T> operator&(const leda_set<T>&a,const leda_set<T>&b)
+{
+ leda_set<T>c;
+ insert_iterator<set<T> > iter(c,c.begin());
+ set_intersection(a.begin(),a.end(),b.begin(),b.end(),iter);
+ return c;
+}
+template<class T>
+leda_set<T> operator-(const leda_set<T>&a,const leda_set<T>&b)
+{
+ leda_set<T>c;
+ insert_iterator<set<T> > iter(c,c.begin());
+ set_difference(a.begin(),a.end(),b.begin(),b.end(),iter);
+ return c;
+}
+
+template<class A,class B>
+class leda_d_array : public map<A,B>
+{
+private:
+ B init;
+public:
+ bool defined(const A&a) const
+ { return find(a)!=this->end(); }
+ const B&operator[](const A&a)const
+ {
+ typename map<A,B>::const_iterator pos=find(a);
+ iassert(pos!=this->end());
+ if( pos==this->end() )
+ return init;
+ else
+ return pos->second;
+ }
+ B&operator[](const A&a)
+ {
+ typename map<A,B>::iterator pos=find(a);
+ if( pos==this->end() )
+ {
+ insert(map<A,B>::value_type(a,init));
+ pos=find(a);
+ iassert(pos!=this->end());
+ }
+ return pos->second;
+ }
+};
+
+#define forall_defined_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->first),1) ;++__ii__)
+#define forall_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->second),1);++__ii__)
+
+double used_time();
+
+template<class T>
+class my_hash
+{
+public:
+ int operator()(const T&t)const {return Hash(t);}
+};
+
+inline int Hash(int value) { return value; }
+#define MY_HASH_BASE std::unordered_map<A,B>
+
+template<class A,class B>
+class leda_h_array : public MY_HASH_BASE
+{
+private:
+ B init;
+public:
+ leda_h_array() {}
+ leda_h_array(const B&_init)
+ : MY_HASH_BASE(),init(_init) {}
+ bool defined(const A&a) const
+ { return find(a)!=this->end(); }
+ const B&operator[](const A&a)const
+ {
+ typename MY_HASH_BASE::const_iterator pos=this->find(a);
+
+ if( pos==this->end() )
+ return init;
+ else
+ return pos->second;
+ }
+ B&operator[](const A&a)
+ {
+ typename MY_HASH_BASE::iterator pos=this->find(a);
+ if( pos==this->end() )
+ {
+ this->insert(typename MY_HASH_BASE::value_type(a,init));
+ pos=this->find(a);
+ iassert(pos!=this->end());
+ }
+ return pos->second;
+ }
+};
+
+#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
+#define forall_defined_h2(a,b,c,d) for(leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
+#define forall_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jjj__=(d).begin();__jjj__!=(d).end()&&((c=__jjj__->second),1);++__jjj__)
+
+
+template<class T> int compare(const T&a,const T&b)
+{if(a==b)return 0; else if(a<b) return -1; else return 1;}
+
+template<class T,class U>
+ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
+{
+ T t;
+ bool makeNl=0;
+ out << "h_array{";
+ forall_defined_h(T,U,t,w)
+ {
+ if( makeNl )
+ out << "\n ";
+ out << "EL:" << t << " INH:" << w[t] << ".";
+ makeNl=1;
+ }
+ return out << "}\n";
+}
+template<class T,class U>
+ostream & operator<<(ostream&out,const leda_d_array<T,U>&w)
+{
+ T t;
+ bool makeNl=0;
+ out << "h_array{";
+ forall_defined_h(T,U,t,w)
+ {
+ if( makeNl )
+ out << "\n ";
+ out << "EL:" << t << " INH:" << w[t] << ".";
+ makeNl=1;
+ }
+ return out << "}\n";
+}
+
+template<class T>
+ostream&printSet(ostream&out,const leda_set<T>&s)
+{
+ bool first=1;
+ T t;
+ out << "{";
+ forall_set(typename set<T>,t,s)
+ {
+ if( first==0 )
+ out << ", ";
+ out << t;
+ first=0;
+ }
+ return out << "}\n";
+}
+
+template<class T,class U>
+istream & operator>>(istream&in,leda_h_array<T,U>&)
+{
+ return in;
+}
+
+template<class A,class B>
+bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
+{
+ A v;
+ forall_defined_h(A,B,v,p1)
+ if( !( p1[v]==p2[v]) ) return 0;
+ forall_defined_h(A,B,v,p2)
+ if( !( p1[v]==p2[v]) ) return 0;
+ return 1;
+}
+template<class A,class B>
+bool operator==(const leda_d_array<A,B>&p1,const leda_d_array<A,B>&p2)
+{
+ A v;
+ forall_defined_d(A,B,v,p1)
+ if( !( p1[v]==p2[v]) ) return 0;
+ forall_defined_d(A,B,v,p2)
+ if( !( p1[v]==p2[v]) ) return 0;
+ return 1;
+}
+
+
+
+#endif
diff --git a/src/giza-pp/mkcls-v2/mystl.h b/src/giza-pp/mkcls-v2/mystl.h
new file mode 100644
index 0000000..99f7965
--- /dev/null
+++ b/src/giza-pp/mkcls-v2/mystl.h
@@ -0,0 +1,116 @@
+/*
+
+Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
+
+mkcls - a program for making word classes .
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+
+
+
+#ifndef MY_STL_H_DEFINED
+#define MY_STL_H_DEFINED
+#include <string>
+#include <utility>
+#include <unordered_map>
+#include <cmath>
+
+using namespace std;
+
+namespace std {
+ template <typename T, typename V>
+ struct hash<pair<T, V> > {
+ static inline void hash_combine(std::size_t & seed, const T & v) {
+ hash<T> hasher;
+ seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+ }
+
+ size_t operator()(const std::pair<T, V>& x) const {
+ size_t h = 0;
+ hash_combine(h, x.first);
+ hash_combine(h, x.second);
+ return h;
+ }
+ };
+}
+
+#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
+
+template<class T1,class T2>
+istream& operator>>(istream &in,pair<T1,T2> &ir)
+{
+ char c;
+ do in.get(c); while (in && isspace(c));
+ if (!in) return in;
+ if (c != '(') in.putback(c);
+ in >> ir.first;
+ do in.get(c); while (isspace(c));
+ if (c != ',') in.putback(c);
+ in >> ir.second;
+ do in.get(c); while (c == ' ');
+ if (c != ')') in.putback(c);
+ return in;
+}
+
+template<class T1,class T2>
+ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
+{
+ out << "(" << ir.first << "," << ir.second << ")";
+ return out;
+}
+
+void printSpaces(ostream&out,int n);
+void mysplit(const string &s,string &s1,string &s2);
+string untilChar(const string&s,char c);
+
+template<class A,class B,class C>
+class tri
+{
+public:
+ A a;
+ B b;
+ C c;
+ tri(){};
+ tri(const A&_a,const B&_b,const C&_c)
+ : a(_a),b(_b),c(_c) {}
+};
+template<class A,class B,class C>
+bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
+{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
+
+template<class A,class B,class C>
+bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
+{
+ if(x.a<y.a)
+ return 1;
+ if(y.a<x.a)
+ return 0;
+
+ if(x.b<y.b)
+ return 1;
+ if(y.b<x.b)
+ return 0;
+
+ if(x.c<y.c)
+ return 1;
+ if(y.c<x.c)
+ return 0;
+ return 0;
+}
+
+#endif
diff --git a/src/joshua/adagrad/AdaGrad.java b/src/joshua/adagrad/AdaGrad.java
new file mode 100755
index 0000000..206abd3
--- /dev/null
+++ b/src/joshua/adagrad/AdaGrad.java
@@ -0,0 +1,142 @@
+package joshua.adagrad;
+
+import joshua.decoder.JoshuaConfiguration;
+import joshua.util.FileUtility;
+import joshua.util.StreamGobbler;
+
+public class AdaGrad {
+ public static void main(String[] args) throws Exception {
+ JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+ boolean external = false; // should each AdaGrad iteration be launched externally?
+
+ if (args.length == 1) {
+ if (args[0].equals("-h")) {
+ printAdaGradUsage(args.length, true);
+ System.exit(2);
+ } else {
+ external = false;
+ }
+ } else if (args.length == 3) {
+ external = true;
+ } else {
+ printAdaGradUsage(args.length, false);
+ System.exit(1);
+ }
+
+ if (!external) {
+ AdaGradCore myAdaGrad = new AdaGradCore(args[0], joshuaConfiguration);
+ myAdaGrad.run_AdaGrad(); // optimize lambda[]
+ myAdaGrad.finish();
+ } else {
+
+ int maxMem = Integer.parseInt(args[1]);
+ String configFileName = args[2];
+ String stateFileName = FileUtility.dirname(configFileName) + "/AdaGrad.temp.state";
+ String cp = System.getProperty("java.class.path");
+ boolean done = false;
+ int iteration = 0;
+
+ while (!done) {
+ ++iteration;
+ Runtime rt = Runtime.getRuntime();
+ Process p =
+ rt.exec("java -Xmx" + maxMem + "m -cp " + cp + " joshua.adagrad.AdaGradCore " + configFileName
+ + " " + stateFileName + " " + iteration);
+ /*
+ * BufferedReader br_i = new BufferedReader(new InputStreamReader(p.getInputStream()));
+ * BufferedReader br_e = new BufferedReader(new InputStreamReader(p.getErrorStream()));
+ * String dummy_line = null; while ((dummy_line = br_i.readLine()) != null) {
+ * System.out.println(dummy_line); } while ((dummy_line = br_e.readLine()) != null) {
+ * System.out.println(dummy_line); }
+ */
+ StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), 1);
+ StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), 1);
+
+ errorGobbler.start();
+ outputGobbler.start();
+
+ int status = p.waitFor();
+
+ if (status == 90) {
+ done = true;
+ } else if (status == 91) {
+ done = false;
+ } else {
+ System.out.println("AdaGrad exiting prematurely (AdaGradCore returned " + status + ")...");
+ break;
+ }
+ }
+ }
+
+ System.exit(0);
+
+ } // main(String[] args)
+
+ public static void printAdaGradUsage(int argsLen, boolean detailed) {
+ if (!detailed) {
+ println("Oops, you provided " + argsLen + " args!");
+ println("");
+ println("Usage:");
+ println(" AdaGrad -maxMem maxMemoryInMB AdaGrad_configFile");
+ println("");
+ println("Where -maxMem specifies the maximum amount of memory (in MB) AdaGrad is");
+ println("allowed to use when performing its calculations (no memroy is needed while");
+ println("the decoder is running),");
+ println("and the config file contains any subset of AdaGrad's 20-some parameters,");
+ println("one per line. Run AdaGrad -h for more details on those parameters.");
+ } else {
+ println("Usage:");
+ println(" AdaGrad -maxMem maxMemoryInMB AdaGrad_configFile");
+ println("");
+ println("Where -maxMem specifies the maximum amount of memory (in MB) AdaGrad is");
+ println("allowed to use when performing its calculations (no memroy is needed while");
+ println("the decoder is running),");
+ println("and the config file contains any subset of AdaGrad's 20-some parameters,");
+ println("one per line. Those parameters, and their default values, are:");
+ println("");
+ println("Relevant files:");
+ println(" -dir dirPrefix: working directory\n [[default: null string (i.e. they are in the current directory)]]");
+ println(" -s sourceFile: source sentences (foreign sentences) of the AdaGrad dataset\n [[default: null string (i.e. file name is not needed by AdaGrad)]]");
+ println(" -r refFile: target sentences (reference translations) of the AdaGrad dataset\n [[default: reference.txt]]");
+ println(" -rps refsPerSen: number of reference translations per sentence\n [[default: 1]]");
+ //println(" -txtNrm textNormMethod: how should text be normalized?\n (0) don't normalize text,\n or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n or (2) apply 1 and also rejoin dashes between letters,\n or (3) apply 1 and also drop non-ASCII characters,\n or (4) apply 1+2+3\n [[default: 1]]");
+ println(" -p paramsFile: file containing parameter names, initial values, and ranges\n [[default: params.txt]]");
+ //println(" -docInfo documentInfoFile: file informing AdaGrad which document each\n sentence belongs to\n [[default: null string (i.e. all sentences are in one 'document')]]");
+ println(" -fin finalLambda: file name for final lambda[] values\n [[default: null string (i.e. no such file will be created)]]");
+ println("");
+ println("AdaGrad specs:");
+ println(" -m metricName metric options: name of evaluation metric and its options\n [[default: BLEU 4 closest]]");
+ println(" -maxIt maxAdaGradIts: maximum number of AdaGrad iterations\n [[default: 20]]");
+ println(" -prevIt prevAdaGradIts: maximum number of previous AdaGrad iterations to\n construct candidate sets from\n [[default: 20]]");
+ println(" -minIt minAdaGradIts: number of iterations before considering an early exit\n [[default: 5]]");
+ println(" -stopIt stopMinIts: some early stopping criterion must be satisfied in\n stopMinIts *consecutive* iterations before an early exit\n [[default: 3]]");
+ println(" -stopSig sigValue: early AdaGrad exit if no weight changes by more than sigValue\n [[default: -1 (i.e. this criterion is never investigated)]]");
+ //println(" -thrCnt threadCount: number of threads to run in parallel when optimizing\n [[default: 1]]");
+ println(" -save saveInter: save intermediate cfg files (1) or decoder outputs (2)\n or both (3) or neither (0)\n [[default: 3]]");
+ println(" -compress compressFiles: should AdaGrad compress the files it produces (1)\n or not (0)\n [[default: 0]]");
+ //println(" -ipi initsPerIt: number of intermediate initial points per iteration\n [[default: 20]]");
+ //println(" -opi oncePerIt: modify a parameter only once per iteration (1) or not (0)\n [[default: 0]]");
+ //println(" -rand randInit: choose initial point randomly (1) or from paramsFile (0)\n [[default: 0]]");
+ //println(" -seed seed: seed used to initialize random number generator\n [[default: time (i.e. value returned by System.currentTimeMillis()]]");
+ // println(" -ud useDisk: reliance on disk (0-2; higher value => more reliance)\n [[default: 2]]");
+ println("");
+ println("Decoder specs:");
+ println(" -cmd commandFile: name of file containing commands to run the decoder\n [[default: null string (i.e. decoder is a JoshuaDecoder object)]]");
+ println(" -passIt passIterationToDecoder: should iteration number be passed\n to command file (1) or not (0)\n [[default: 0]]");
+ println(" -decOut decoderOutFile: name of the output file produced by the decoder\n [[default: output.nbest]]");
+ println(" -decExit validExit: value returned by decoder to indicate success\n [[default: 0]]");
+ println(" -dcfg decConfigFile: name of decoder config file\n [[default: dec_cfg.txt]]");
+ println(" -N N: size of N-best list (per sentence) generated in each AdaGrad iteration\n [[default: 100]]");
+ println("");
+ println("Output specs:");
+ println(" -v verbosity: AdaGrad verbosity level (0-2; higher value => more verbose)\n [[default: 1]]");
+ println(" -decV decVerbosity: should decoder output be printed (1) or ignored (0)\n [[default: 0]]");
+ println("");
+ }
+ }
+
+ private static void println(Object obj) {
+ System.out.println(obj);
+ }
+
+}
diff --git a/src/joshua/adagrad/AdaGradCore.java b/src/joshua/adagrad/AdaGradCore.java
new file mode 100755
index 0000000..fa16bed
--- /dev/null
+++ b/src/joshua/adagrad/AdaGradCore.java
@@ -0,0 +1,3195 @@
+package joshua.adagrad;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Scanner;
+import java.util.TreeSet;
+import java.util.Vector;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import joshua.decoder.Decoder;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.metrics.EvaluationMetric;
+import joshua.util.StreamGobbler;
+import joshua.corpus.Vocabulary;
+
+/**
+ * This code was originally written by Yuan Cao, who copied the MERT code to produce this file.
+ */
+
+public class AdaGradCore {
+ private final JoshuaConfiguration joshuaConfiguration;
+ private TreeSet<Integer>[] indicesOfInterest_all;
+
+ private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
+ private final Runtime myRuntime = Runtime.getRuntime();
+
+ private final static double NegInf = (-1.0 / 0.0);
+ private final static double PosInf = (+1.0 / 0.0);
+ private final static double epsilon = 1.0 / 1000000;
+
+ private int progress;
+
+ private int verbosity; // anything of priority <= verbosity will be printed
+ // (lower value for priority means more important)
+
+ private Random randGen;
+ private int generatedRands;
+
+ private int numSentences;
+ // number of sentences in the dev set
+ // (aka the "MERT training" set)
+
+ private int numDocuments;
+ // number of documents in the dev set
+ // this should be 1, unless doing doc-level optimization
+
+ private int[] docOfSentence;
+ // docOfSentence[i] stores which document contains the i'th sentence.
+ // docOfSentence is 0-indexed, as are the documents (i.e. first doc is indexed 0)
+
+ private int[] docSubsetInfo;
+ // stores information regarding which subset of the documents are evaluated
+ // [0]: method (0-6)
+ // [1]: first (1-indexed)
+ // [2]: last (1-indexed)
+ // [3]: size
+ // [4]: center
+ // [5]: arg1
+ // [6]: arg2
+ // [1-6] are 0 for method 0, [6] is 0 for methods 1-4 as well
+ // only [1] and [2] are needed for optimization. The rest are only needed for an output message.
+
+ private int refsPerSen;
+ // number of reference translations per sentence
+
+ private int textNormMethod;
+ // 0: no normalization, 1: "NIST-style" tokenization, and also rejoin 'm, 're, *'s, 've, 'll, 'd,
+ // and n't,
+ // 2: apply 1 and also rejoin dashes between letters, 3: apply 1 and also drop non-ASCII
+ // characters
+ // 4: apply 1+2+3
+
+ private int numParams;
+ // total number of firing features
+ // this number may increase overtime as new n-best lists are decoded
+ // initially it is equal to the # of params in the parameter config file
+ private int numParamsOld;
+ // number of features before observing the new features fired in the current iteration
+
+ private double[] normalizationOptions;
+ // How should a lambda[] vector be normalized (before decoding)?
+ // nO[0] = 0: no normalization
+ // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
+ // nO[0] = 2: scale so that the maximum absolute value is nO[1]
+ // nO[0] = 3: scale so that the minimum absolute value is nO[1]
+ // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
+
+ /* *********************************************************** */
+ /* NOTE: indexing starts at 1 in the following few arrays: */
+ /* *********************************************************** */
+
+ // private double[] lambda;
+ private ArrayList<Double> lambda = new ArrayList<Double>();
+ // the current weight vector. NOTE: indexing starts at 1.
+ private ArrayList<Double> bestLambda = new ArrayList<Double>();
+ // the best weight vector across all iterations
+
+ private boolean[] isOptimizable;
+ // isOptimizable[c] = true iff lambda[c] should be optimized
+
+ private double[] minRandValue;
+ private double[] maxRandValue;
+ // when choosing a random value for the lambda[c] parameter, it will be
+ // chosen from the [minRandValue[c],maxRandValue[c]] range.
+ // (*) minRandValue and maxRandValue must be real values, but not -Inf or +Inf
+
+ private double[] defaultLambda;
+ // "default" parameter values; simply the values read in the parameter file
+ // USED FOR NON-OPTIMIZABLE (FIXED) FEATURES
+
+ /* *********************************************************** */
+ /* *********************************************************** */
+
+ private Decoder myDecoder;
+ // COMMENT OUT if decoder is not Joshua
+
+ private String decoderCommand;
+ // the command that runs the decoder; read from decoderCommandFileName
+
+ private int decVerbosity;
+ // verbosity level for decoder output. If 0, decoder output is ignored.
+ // If 1, decoder output is printed.
+
+ private int validDecoderExitValue;
+ // return value from running the decoder command that indicates success
+
+ private int numOptThreads;
+ // number of threads to run things in parallel
+
+ private int saveInterFiles;
+ // 0: nothing, 1: only configs, 2: only n-bests, 3: both configs and n-bests
+
+ private int compressFiles;
+ // should AdaGrad gzip the large files? If 0, no compression takes place.
+ // If 1, compression is performed on: decoder output files, temp sents files,
+ // and temp feats files.
+
+ private int sizeOfNBest;
+ // size of N-best list generated by decoder at each iteration
+ // (aka simply N, but N is a bad variable name)
+
+ private long seed;
+ // seed used to create random number generators
+
+ private boolean randInit;
+ // if true, parameters are initialized randomly. If false, parameters
+ // are initialized using values from parameter file.
+
+ private int maxMERTIterations, minMERTIterations, prevMERTIterations;
+ // max: maximum number of MERT iterations
+ // min: minimum number of MERT iterations before an early MERT exit
+ // prev: number of previous MERT iterations from which to consider candidates (in addition to
+ // the candidates from the current iteration)
+
+ private double stopSigValue;
+ // early MERT exit if no weight changes by more than stopSigValue
+ // (but see minMERTIterations above and stopMinIts below)
+
+ private int stopMinIts;
+ // some early stopping criterion must be satisfied in stopMinIts *consecutive* iterations
+ // before an early exit (but see minMERTIterations above)
+
+ private boolean oneModificationPerIteration;
+ // if true, each MERT iteration performs at most one parameter modification.
+ // If false, a new MERT iteration starts (i.e. a new N-best list is
+ // generated) only after the previous iteration reaches a local maximum.
+
+ private String metricName;
+ // name of evaluation metric optimized by MERT
+
+ private String metricName_display;
+ // name of evaluation metric optimized by MERT, possibly with "doc-level " prefixed
+
+ private String[] metricOptions;
+ // options for the evaluation metric (e.g. for BLEU, maxGramLength and effLengthMethod)
+
+ private EvaluationMetric evalMetric;
+ // the evaluation metric used by MERT
+
+ private int suffStatsCount;
+ // number of sufficient statistics for the evaluation metric
+
+ private String tmpDirPrefix;
+ // prefix for the AdaGrad.temp.* files
+
+ private boolean passIterationToDecoder;
+ // should the iteration number be passed as an argument to decoderCommandFileName?
+
+ // used by adagrad
+ private boolean needShuffle = true; // shuffle the training sentences or not
+ private boolean needAvg = true; // average the weihgts or not?
+ private boolean usePseudoBleu = true; // need to use pseudo corpus to compute bleu?
+ private boolean returnBest = true; // return the best weight during tuning
+ private boolean needScale = true; // need scaling?
+ private String trainingMode;
+ private int oraSelectMode = 1;
+ private int predSelectMode = 1;
+ private int adagradIter = 1;
+ private int regularization = 2;
+ private int batchSize = 1;
+ private double eta;
+ private double lam;
+ private double R = 0.99; // corpus decay when pseudo corpus is used for bleu computation
+ // private double sentForScale = 0.15; //percentage of sentences for scale factor estimation
+ private double scoreRatio = 5.0; // sclale so that model_score/metric_score = scoreratio
+ private double prevMetricScore = 0; // final metric score of the previous iteration, used only
+ // when returnBest = true
+
+ private String dirPrefix; // where are all these files located?
+ private String paramsFileName, docInfoFileName, finalLambdaFileName;
+ private String sourceFileName, refFileName, decoderOutFileName;
+ private String decoderConfigFileName, decoderCommandFileName;
+ private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
+
+ // e.g. output.it[1-x].someOldRun would be specified as:
+ // output.it?.someOldRun
+ // and we'd have prefix = "output.it" and suffix = ".sameOldRun"
+
+ // private int useDisk;
+
+ public AdaGradCore(JoshuaConfiguration joshuaConfiguration) {
+ this.joshuaConfiguration = joshuaConfiguration;
+ }
+
+ public AdaGradCore(String[] args, JoshuaConfiguration joshuaConfiguration) {
+ this.joshuaConfiguration = joshuaConfiguration;
+ EvaluationMetric.set_knownMetrics();
+ processArgsArray(args);
+ initialize(0);
+ }
+
+ public AdaGradCore(String configFileName, JoshuaConfiguration joshuaConfiguration) {
+ this.joshuaConfiguration = joshuaConfiguration;
+ EvaluationMetric.set_knownMetrics();
+ processArgsArray(cfgFileToArgsArray(configFileName));
+ initialize(0);
+ }
+
+ private void initialize(int randsToSkip) {
+ println("NegInf: " + NegInf + ", PosInf: " + PosInf + ", epsilon: " + epsilon, 4);
+
+ randGen = new Random(seed);
+ for (int r = 1; r <= randsToSkip; ++r) {
+ randGen.nextDouble();
+ }
+ generatedRands = randsToSkip;
+
+ if (randsToSkip == 0) {
+ println("----------------------------------------------------", 1);
+ println("Initializing...", 1);
+ println("----------------------------------------------------", 1);
+ println("", 1);
+
+ println("Random number generator initialized using seed: " + seed, 1);
+ println("", 1);
+ }
+
+ // count the total num of sentences to be decoded, reffilename is the combined reference file
+ // name(auto generated)
+ numSentences = countLines(refFileName) / refsPerSen;
+
+ // ??
+ processDocInfo();
+ // sets numDocuments and docOfSentence[]
+
+ if (numDocuments > 1)
+ metricName_display = "doc-level " + metricName;
+
+ // ??
+ set_docSubsetInfo(docSubsetInfo);
+
+ // count the number of initial features
+ numParams = countNonEmptyLines(paramsFileName) - 1;
+ numParamsOld = numParams;
+
+ // read parameter config file
+ try {
+ // read dense parameter names
+ BufferedReader inFile_names = new BufferedReader(new FileReader(paramsFileName));
+
+ for (int c = 1; c <= numParams; ++c) {
+ String line = "";
+ while (line != null && line.length() == 0) { // skip empty lines
+ line = inFile_names.readLine();
+ }
+
+ // save feature names
+ String paramName = (line.substring(0, line.indexOf("|||"))).trim();
+ Vocabulary.id(paramName);
+ // System.err.println(String.format("VOCAB(%s) = %d", paramName, id));
+ }
+
+ inFile_names.close();
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.initialize(int): " + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.initialize(int): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ // the parameter file contains one line per parameter
+ // and one line for the normalization method
+ // indexing starts at 1 in these arrays
+ for (int p = 0; p <= numParams; ++p)
+ lambda.add(new Double(0));
+ bestLambda.add(new Double(0));
+ // why only lambda is a list? because the size of lambda
+ // may increase over time, but other arrays are specified in
+ // the param config file, only used for initialization
+ isOptimizable = new boolean[1 + numParams];
+ minRandValue = new double[1 + numParams];
+ maxRandValue = new double[1 + numParams];
+ defaultLambda = new double[1 + numParams];
+ normalizationOptions = new double[3];
+
+ // read initial param values
+ processParamFile();
+ // sets the arrays declared just above
+
+ // SentenceInfo.createV(); // uncomment ONLY IF using vocabulary implementation of SentenceInfo
+
+ String[][] refSentences = new String[numSentences][refsPerSen];
+
+ try {
+
+ // read in reference sentences
+ InputStream inStream_refs = new FileInputStream(new File(refFileName));
+ BufferedReader inFile_refs = new BufferedReader(new InputStreamReader(inStream_refs, "utf8"));
+
+ for (int i = 0; i < numSentences; ++i) {
+ for (int r = 0; r < refsPerSen; ++r) {
+ // read the rth reference translation for the ith sentence
+ refSentences[i][r] = inFile_refs.readLine();
+ }
+ }
+
+ inFile_refs.close();
+
+ // normalize reference sentences
+ for (int i = 0; i < numSentences; ++i) {
+ for (int r = 0; r < refsPerSen; ++r) {
+ // normalize the rth reference translation for the ith sentence
+ refSentences[i][r] = normalize(refSentences[i][r], textNormMethod);
+ }
+ }
+
+ // read in decoder command, if any
+ decoderCommand = null;
+ if (decoderCommandFileName != null) {
+ if (fileExists(decoderCommandFileName)) {
+ BufferedReader inFile_comm = new BufferedReader(new FileReader(decoderCommandFileName));
+ decoderCommand = inFile_comm.readLine(); // READ IN DECODE COMMAND
+ inFile_comm.close();
+ }
+ }
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.initialize(int): " + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.initialize(int): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ // set static data members for the EvaluationMetric class
+ EvaluationMetric.set_numSentences(numSentences);
+ EvaluationMetric.set_numDocuments(numDocuments);
+ EvaluationMetric.set_refsPerSen(refsPerSen);
+ EvaluationMetric.set_refSentences(refSentences);
+ EvaluationMetric.set_tmpDirPrefix(tmpDirPrefix);
+
+ evalMetric = EvaluationMetric.getMetric(metricName, metricOptions);
+ // used only if returnBest = true
+ prevMetricScore = evalMetric.getToBeMinimized() ? PosInf : NegInf;
+
+ // length of sufficient statistics
+ // for bleu: suffstatscount=8 (2*ngram+2)
+ suffStatsCount = evalMetric.get_suffStatsCount();
+
+ // set static data members for the IntermediateOptimizer class
+ /*
+ * IntermediateOptimizer.set_MERTparams(numSentences, numDocuments, docOfSentence,
+ * docSubsetInfo, numParams, normalizationOptions, isOptimizable oneModificationPerIteration,
+ * evalMetric, tmpDirPrefix, verbosity);
+ */
+
+ // print info
+ if (randsToSkip == 0) { // i.e. first iteration
+ println("Number of sentences: " + numSentences, 1);
+ println("Number of documents: " + numDocuments, 1);
+ println("Optimizing " + metricName_display, 1);
+
+ /*
+ * print("docSubsetInfo: {", 1); for (int f = 0; f < 6; ++f) print(docSubsetInfo[f] + ", ",
+ * 1); println(docSubsetInfo[6] + "}", 1);
+ */
+
+ println("Number of initial features: " + numParams, 1);
+ print("Initial feature names: {", 1);
+
+ for (int c = 1; c <= numParams; ++c)
+ print("\"" + Vocabulary.word(c) + "\"", 1);
+ println("}", 1);
+ println("", 1);
+
+ // TODO just print the correct info
+ println("c Default value\tOptimizable?\tRand. val. range", 1);
+
+ for (int c = 1; c <= numParams; ++c) {
+ print(c + " " + f4.format(lambda.get(c).doubleValue()) + "\t\t", 1);
+
+ if (!isOptimizable[c]) {
+ println(" No", 1);
+ } else {
+ print(" Yes\t\t", 1);
+ print(" [" + minRandValue[c] + "," + maxRandValue[c] + "]", 1);
+ println("", 1);
+ }
+ }
+
+ println("", 1);
+ print("Weight vector normalization method: ", 1);
+ if (normalizationOptions[0] == 0) {
+ println("none.", 1);
+ } else if (normalizationOptions[0] == 1) {
+ println(
+ "weights will be scaled so that the \""
+ + Vocabulary.word((int) normalizationOptions[2])
+ + "\" weight has an absolute value of " + normalizationOptions[1] + ".", 1);
+ } else if (normalizationOptions[0] == 2) {
+ println("weights will be scaled so that the maximum absolute value is "
+ + normalizationOptions[1] + ".", 1);
+ } else if (normalizationOptions[0] == 3) {
+ println("weights will be scaled so that the minimum absolute value is "
+ + normalizationOptions[1] + ".", 1);
+ } else if (normalizationOptions[0] == 4) {
+ println("weights will be scaled so that the L-" + normalizationOptions[1] + " norm is "
+ + normalizationOptions[2] + ".", 1);
+ }
+
+ println("", 1);
+
+ println("----------------------------------------------------", 1);
+ println("", 1);
+
+ // rename original config file so it doesn't get overwritten
+ // (original name will be restored in finish())
+ renameFile(decoderConfigFileName, decoderConfigFileName + ".AdaGrad.orig");
+ } // if (randsToSkip == 0)
+
+ // by default, load joshua decoder
+ if (decoderCommand == null && fakeFileNameTemplate == null) {
+ println("Loading Joshua decoder...", 1);
+ myDecoder = new Decoder(joshuaConfiguration, decoderConfigFileName + ".AdaGrad.orig");
+ println("...finished loading @ " + (new Date()), 1);
+ println("");
+ } else {
+ myDecoder = null;
+ }
+
+ @SuppressWarnings("unchecked")
+ TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
+ indicesOfInterest_all = temp_TSA;
+
+ for (int i = 0; i < numSentences; ++i) {
+ indicesOfInterest_all[i] = new TreeSet<Integer>();
+ }
+ } // void initialize(...)
+
+ // -------------------------
+
+ public void run_AdaGrad() {
+ run_AdaGrad(minMERTIterations, maxMERTIterations, prevMERTIterations);
+ }
+
+ public void run_AdaGrad(int minIts, int maxIts, int prevIts) {
+ // FIRST, CLEAN ALL PREVIOUS TEMP FILES
+ String dir;
+ int k = tmpDirPrefix.lastIndexOf("/");
+ if (k >= 0) {
+ dir = tmpDirPrefix.substring(0, k + 1);
+ } else {
+ dir = "./";
+ }
+ String files;
+ File folder = new File(dir);
+
+ if (folder.exists()) {
+ File[] listOfFiles = folder.listFiles();
+
+ for (int i = 0; i < listOfFiles.length; i++) {
+ if (listOfFiles[i].isFile()) {
+ files = listOfFiles[i].getName();
+ if (files.startsWith("AdaGrad.temp")) {
+ deleteFile(files);
+ }
+ }
+ }
+ }
+
+ println("----------------------------------------------------", 1);
+ println("AdaGrad run started @ " + (new Date()), 1);
+ // printMemoryUsage();
+ println("----------------------------------------------------", 1);
+ println("", 1);
+
+ // if no default lambda is provided
+ if (randInit) {
+ println("Initializing lambda[] randomly.", 1);
+ // initialize optimizable parameters randomly (sampling uniformly from
+ // that parameter's random value range)
+ lambda = randomLambda();
+ }
+
+ println("Initial lambda[]: " + lambdaToString(lambda), 1);
+ println("", 1);
+
+ int[] maxIndex = new int[numSentences];
+
+ // HashMap<Integer,int[]>[] suffStats_array = new HashMap[numSentences];
+ // suffStats_array[i] maps candidates of interest for sentence i to an array
+ // storing the sufficient statistics for that candidate
+
+ int earlyStop = 0;
+ // number of consecutive iteration an early stopping criterion was satisfied
+
+ for (int iteration = 1;; ++iteration) {
+
+ // what does "A" contain?
+ // retA[0]: FINAL_score
+ // retA[1]: earlyStop
+ // retA[2]: should this be the last iteration?
+ double[] A = run_single_iteration(iteration, minIts, maxIts, prevIts, earlyStop, maxIndex);
+ if (A != null) {
+ earlyStop = (int) A[1];
+ if (A[2] == 1)
+ break;
+ } else {
+ break;
+ }
+
+ } // for (iteration)
+
+ println("", 1);
+
+ println("----------------------------------------------------", 1);
+ println("AdaGrad run ended @ " + (new Date()), 1);
+ // printMemoryUsage();
+ println("----------------------------------------------------", 1);
+ println("", 1);
+ if (!returnBest)
+ println("FINAL lambda: " + lambdaToString(lambda), 1);
+ // + " (" + metricName_display + ": " + FINAL_score + ")",1);
+ else
+ println("BEST lambda: " + lambdaToString(lambda), 1);
+
+ // delete intermediate .temp.*.it* decoder output files
+ for (int iteration = 1; iteration <= maxIts; ++iteration) {
+ if (compressFiles == 1) {
+ deleteFile(tmpDirPrefix + "temp.sents.it" + iteration + ".gz");
+ deleteFile(tmpDirPrefix + "temp.feats.it" + iteration + ".gz");
+ if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".copy.gz")) {
+ deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".copy.gz");
+ } else {
+ deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".gz");
+ }
+ } else {
+ deleteFile(tmpDirPrefix + "temp.sents.it" + iteration);
+ deleteFile(tmpDirPrefix + "temp.feats.it" + iteration);
+ if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".copy")) {
+ deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".copy");
+ } else {
+ deleteFile(tmpDirPrefix + "temp.stats.it" + iteration);
+ }
+ }
+ }
+ } // void run_AdaGrad(int maxIts)
+
+ // this is the key function!
+ @SuppressWarnings("unchecked")
+ public double[] run_single_iteration(int iteration, int minIts, int maxIts, int prevIts,
+ int earlyStop, int[] maxIndex) {
+ double FINAL_score = 0;
+
+ double[] retA = new double[3];
+ // retA[0]: FINAL_score
+ // retA[1]: earlyStop
+ // retA[2]: should this be the last iteration?
+
+ boolean done = false;
+ retA[2] = 1; // will only be made 0 if we don't break from the following loop
+
+ // save feats and stats for all candidates(old & new)
+ HashMap<String, String>[] feat_hash = new HashMap[numSentences];
+ for (int i = 0; i < numSentences; i++)
+ feat_hash[i] = new HashMap<String, String>();
+
+ HashMap<String, String>[] stats_hash = new HashMap[numSentences];
+ for (int i = 0; i < numSentences; i++)
+ stats_hash[i] = new HashMap<String, String>();
+
+ while (!done) { // NOTE: this "loop" will only be carried out once
+ println("--- Starting AdaGrad iteration #" + iteration + " @ " + (new Date()) + " ---", 1);
+
+ // printMemoryUsage();
+
+ /******************************/
+ // CREATE DECODER CONFIG FILE //
+ /******************************/
+
+ createConfigFile(lambda, decoderConfigFileName, decoderConfigFileName + ".AdaGrad.orig");
+ // i.e. use the original config file as a template
+
+ /***************/
+ // RUN DECODER //
+ /***************/
+
+ if (iteration == 1) {
+ println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
+ } else {
+ println("Redecoding using weight vector " + lambdaToString(lambda), 1);
+ }
+
+ // generate the n-best file after decoding
+ String[] decRunResult = run_decoder(iteration); // iteration passed in case fake decoder will
+ // be used
+ // [0] name of file to be processed
+ // [1] indicates how the output file was obtained:
+ // 1: external decoder
+ // 2: fake decoder
+ // 3: internal decoder
+
+ if (!decRunResult[1].equals("2")) {
+ println("...finished decoding @ " + (new Date()), 1);
+ }
+
+ checkFile(decRunResult[0]);
+
+ /************* END OF DECODING **************/
+
+ println("Producing temp files for iteration " + iteration, 3);
+
+ produceTempFiles(decRunResult[0], iteration);
+
+ // save intermedidate output files
+ // save joshua.config.adagrad.it*
+ if (saveInterFiles == 1 || saveInterFiles == 3) { // make copy of intermediate config file
+ if (!copyFile(decoderConfigFileName, decoderConfigFileName + ".AdaGrad.it" + iteration)) {
+ println("Warning: attempt to make copy of decoder config file (to create"
+ + decoderConfigFileName + ".AdaGrad.it" + iteration + ") was unsuccessful!", 1);
+ }
+ }
+
+ // save output.nest.AdaGrad.it*
+ if (saveInterFiles == 2 || saveInterFiles == 3) { // make copy of intermediate decoder output
+ // file...
+
+ if (!decRunResult[1].equals("2")) { // ...but only if no fake decoder
+ if (!decRunResult[0].endsWith(".gz")) {
+ if (!copyFile(decRunResult[0], decRunResult[0] + ".AdaGrad.it" + iteration)) {
+ println("Warning: attempt to make copy of decoder output file (to create"
+ + decRunResult[0] + ".AdaGrad.it" + iteration + ") was unsuccessful!", 1);
+ }
+ } else {
+ String prefix = decRunResult[0].substring(0, decRunResult[0].length() - 3);
+ if (!copyFile(prefix + ".gz", prefix + ".AdaGrad.it" + iteration + ".gz")) {
+ println("Warning: attempt to make copy of decoder output file (to create" + prefix
+ + ".AdaGrad.it" + iteration + ".gz" + ") was unsuccessful!", 1);
+ }
+ }
+
+ if (compressFiles == 1 && !decRunResult[0].endsWith(".gz")) {
+ gzipFile(decRunResult[0] + ".AdaGrad.it" + iteration);
+ }
+ } // if (!fake)
+ }
+
+ // ------------- end of saving .adagrad.it* files ---------------
+
+ int[] candCount = new int[numSentences];
+ int[] lastUsedIndex = new int[numSentences];
+
+ ConcurrentHashMap<Integer, int[]>[] suffStats_array = new ConcurrentHashMap[numSentences];
+ for (int i = 0; i < numSentences; ++i) {
+ candCount[i] = 0;
+ lastUsedIndex[i] = -1;
+ // suffStats_array[i].clear();
+ suffStats_array[i] = new ConcurrentHashMap<Integer, int[]>();
+ }
+
+ // initLambda[0] is not used!
+ double[] initialLambda = new double[1 + numParams];
+ for (int i = 1; i <= numParams; ++i)
+ initialLambda[i] = lambda.get(i);
+
+ // the "score" in initialScore refers to that
+ // assigned by the evaluation metric)
+
+ // you may consider all candidates from iter 1, or from iter (iteration-prevIts) to current
+ // iteration
+ int firstIt = Math.max(1, iteration - prevIts);
+ // i.e. only process candidates from the current iteration and candidates
+ // from up to prevIts previous iterations.
+ println("Reading candidate translations from iterations " + firstIt + "-" + iteration, 1);
+ println("(and computing " + metricName
+ + " sufficient statistics for previously unseen candidates)", 1);
+ print(" Progress: ");
+
+ int[] newCandidatesAdded = new int[1 + iteration];
+ for (int it = 1; it <= iteration; ++it)
+ newCandidatesAdded[it] = 0;
+
+ try {
+ // read temp files from all past iterations
+ // 3 types of temp files:
+ // 1. output hypo at iter i
+ // 2. feature value of each hypo at iter i
+ // 3. suff stats of each hypo at iter i
+
+ // each inFile corresponds to the output of an iteration
+ // (index 0 is not used; no corresponding index for the current iteration)
+ BufferedReader[] inFile_sents = new BufferedReader[iteration];
+ BufferedReader[] inFile_feats = new BufferedReader[iteration];
+ BufferedReader[] inFile_stats = new BufferedReader[iteration];
+
+ // temp file(array) from previous iterations
+ for (int it = firstIt; it < iteration; ++it) {
+ InputStream inStream_sents, inStream_feats, inStream_stats;
+ if (compressFiles == 0) {
+ inStream_sents = new FileInputStream(tmpDirPrefix + "temp.sents.it" + it);
+ inStream_feats = new FileInputStream(tmpDirPrefix + "temp.feats.it" + it);
+ inStream_stats = new FileInputStream(tmpDirPrefix + "temp.stats.it" + it);
+ } else {
+ inStream_sents = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.sents.it"
+ + it + ".gz"));
+ inStream_feats = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.feats.it"
+ + it + ".gz"));
+ inStream_stats = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.stats.it"
+ + it + ".gz"));
+ }
+
+ inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
+ inFile_feats[it] = new BufferedReader(new InputStreamReader(inStream_feats, "utf8"));
+ inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
+ }
+
+ InputStream inStream_sentsCurrIt, inStream_featsCurrIt, inStream_statsCurrIt;
+ // temp file for current iteration!
+ if (compressFiles == 0) {
+ inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration);
+ inStream_featsCurrIt = new FileInputStream(tmpDirPrefix + "temp.feats.it" + iteration);
+ } else {
+ inStream_sentsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
+ + "temp.sents.it" + iteration + ".gz"));
+ inStream_featsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
+ + "temp.feats.it" + iteration + ".gz"));
+ }
+
+ BufferedReader inFile_sentsCurrIt = new BufferedReader(new InputStreamReader(
+ inStream_sentsCurrIt, "utf8"));
+ BufferedReader inFile_featsCurrIt = new BufferedReader(new InputStreamReader(
+ inStream_featsCurrIt, "utf8"));
+
+ BufferedReader inFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below
+ // is set to true
+ PrintWriter outFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below is
+ // set to false
+
+ // just to check if temp.stat.it.iteration exists
+ boolean statsCurrIt_exists = false;
+
+ if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration)) {
+ inStream_statsCurrIt = new FileInputStream(tmpDirPrefix + "temp.stats.it" + iteration);
+ inFile_statsCurrIt = new BufferedReader(new InputStreamReader(inStream_statsCurrIt,
+ "utf8"));
+ statsCurrIt_exists = true;
+ copyFile(tmpDirPrefix + "temp.stats.it" + iteration, tmpDirPrefix + "temp.stats.it"
+ + iteration + ".copy");
+ } else if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".gz")) {
+ inStream_statsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
+ + "temp.stats.it" + iteration + ".gz"));
+ inFile_statsCurrIt = new BufferedReader(new InputStreamReader(inStream_statsCurrIt,
+ "utf8"));
+ statsCurrIt_exists = true;
+ copyFile(tmpDirPrefix + "temp.stats.it" + iteration + ".gz", tmpDirPrefix
+ + "temp.stats.it" + iteration + ".copy.gz");
+ } else {
+ outFile_statsCurrIt = new PrintWriter(tmpDirPrefix + "temp.stats.it" + iteration);
+ }
+
+ // output the 4^th temp file: *.temp.stats.merged
+ PrintWriter outFile_statsMerged = new PrintWriter(tmpDirPrefix + "temp.stats.merged");
+ // write sufficient statistics from all the sentences
+ // from the output files into a single file
+ PrintWriter outFile_statsMergedKnown = new PrintWriter(tmpDirPrefix
+ + "temp.stats.mergedKnown");
+ // write sufficient statistics from all the sentences
+ // from the output files into a single file
+
+ // output the 5^th 6^th temp file, but will be deleted at the end of the function
+ FileOutputStream outStream_unknownCands = new FileOutputStream(tmpDirPrefix
+ + "temp.currIt.unknownCands", false);
+ OutputStreamWriter outStreamWriter_unknownCands = new OutputStreamWriter(
+ outStream_unknownCands, "utf8");
+ BufferedWriter outFile_unknownCands = new BufferedWriter(outStreamWriter_unknownCands);
+
+ PrintWriter outFile_unknownIndices = new PrintWriter(tmpDirPrefix
+ + "temp.currIt.unknownIndices");
+
+ String sents_str, feats_str, stats_str;
+
+ // BUG: this assumes a candidate string cannot be produced for two
+ // different source sentences, which is not necessarily true
+ // (It's not actually a bug, but only because existingCandStats gets
+ // cleared before moving to the next source sentence.)
+ // FIX: should be made an array, indexed by i
+ HashMap<String, String> existingCandStats = new HashMap<String, String>();
+ // VERY IMPORTANT:
+ // A CANDIDATE X MAY APPEARED IN ITER 1, ITER 3
+ // BUT IF THE USER SPECIFIED TO CONSIDER ITERATIONS FROM ONLY ITER 2, THEN
+ // X IS NOT A "REPEATED" CANDIDATE IN ITER 3. THEREFORE WE WANT TO KEEP THE
+ // SUFF STATS FOR EACH CANDIDATE(TO SAVE COMPUTATION IN THE FUTURE)
+
+ // Stores precalculated sufficient statistics for candidates, in case
+ // the same candidate is seen again. (SS stored as a String.)
+ // Q: Why do we care? If we see the same candidate again, aren't we going
+ // to ignore it? So, why do we care about the SS of this repeat candidate?
+ // A: A "repeat" candidate may not be a repeat candidate in later
+ // iterations if the user specifies a value for prevMERTIterations
+ // that causes MERT to skip candidates from early iterations.
+
+ double[] currFeatVal = new double[1 + numParams];
+ String[] featVal_str;
+
+ int totalCandidateCount = 0;
+
+ // new candidate size for each sentence
+ int[] sizeUnknown_currIt = new int[numSentences];
+
+ for (int i = 0; i < numSentences; ++i) {
+ // process candidates from previous iterations
+ // low efficiency? for each iteration, it reads in all previous iteration outputs
+ // therefore a lot of overlapping jobs
+ // this is an easy implementation to deal with the situation in which user only specified
+ // "previt" and hopes to consider only the previous previt
+ // iterations, then for each iteration the existing candadites will be different
+ for (int it = firstIt; it < iteration; ++it) {
+ // Why up to but *excluding* iteration?
+ // Because the last iteration is handled a little differently, since
+ // the SS must be calculated (and the corresponding file created),
+ // which is not true for previous iterations.
+
+ for (int n = 0; n <= sizeOfNBest; ++n) {
+ // note that in all temp files, "||||||" is a separator between 2 n-best lists
+
+ // Why up to and *including* sizeOfNBest?
+ // So that it would read the "||||||" separator even if there is
+ // a complete list of sizeOfNBest candidates.
+
+ // for the nth candidate for the ith sentence, read the sentence, feature values,
+ // and sufficient statistics from the various temp files
+
+ // read one line of temp.sent, temp.feat, temp.stats from iteration it
+ sents_str = inFile_sents[it].readLine();
+ feats_str = inFile_feats[it].readLine();
+ stats_str = inFile_stats[it].readLine();
+
+ if (sents_str.equals("||||||")) {
+ n = sizeOfNBest + 1; // move on to the next n-best list
+ } else if (!existingCandStats.containsKey(sents_str)) // if this candidate does not
+ // exist
+ {
+ outFile_statsMergedKnown.println(stats_str);
+
+ // save feats & stats
+ feat_hash[i].put(sents_str, feats_str);
+ stats_hash[i].put(sents_str, stats_str);
+
+ // extract feature value
+ featVal_str = feats_str.split("\\s+");
+
+ if (feats_str.indexOf('=') != -1) {
+ for (String featurePair : featVal_str) {
+ String[] pair = featurePair.split("=");
+ String name = pair[0];
+ Double value = Double.parseDouble(pair[1]);
+ }
+ }
+ existingCandStats.put(sents_str, stats_str);
+ candCount[i] += 1;
+ newCandidatesAdded[it] += 1;
+
+ } // if unseen candidate
+ } // for (n)
+ } // for (it)
+
+ outFile_statsMergedKnown.println("||||||");
+
+ // ---------- end of processing previous iterations ----------
+ // ---------- now start processing new candidates ----------
+
+ // now process the candidates of the current iteration
+ // now determine the new candidates of the current iteration
+
+ /*
+ * remember: BufferedReader inFile_sentsCurrIt BufferedReader inFile_featsCurrIt
+ * PrintWriter outFile_statsCurrIt
+ */
+
+ String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest + 1];
+
+ Vector<String> unknownCands_V = new Vector<String>();
+ // which candidates (of the i'th source sentence) have not been seen before
+ // this iteration?
+
+ for (int n = 0; n <= sizeOfNBest; ++n) {
+ // Why up to and *including* sizeOfNBest?
+ // So that it would read the "||||||" separator even if there is
+ // a complete list of sizeOfNBest candidates.
+
+ // for the nth candidate for the ith sentence, read the sentence,
+ // and store it in the sentsCurrIt_currSrcSent array
+
+ sents_str = inFile_sentsCurrIt.readLine(); // read one candidate from the current
+ // iteration
+ sentsCurrIt_currSrcSent[n] = sents_str; // Note: possibly "||||||"
+
+ if (sents_str.equals("||||||")) {
+ n = sizeOfNBest + 1;
+ } else if (!existingCandStats.containsKey(sents_str)) {
+ unknownCands_V.add(sents_str); // NEW CANDIDATE FROM THIS ITERATION
+ writeLine(sents_str, outFile_unknownCands);
+ outFile_unknownIndices.println(i); // INDEX OF THE NEW CANDIDATES
+ newCandidatesAdded[iteration] += 1;
+ existingCandStats.put(sents_str, "U"); // i.e. unknown
+ // we add sents_str to avoid duplicate entries in unknownCands_V
+ }
+ } // for (n)
+
+ // only compute suff stats for new candidates
+ // now unknownCands_V has the candidates for which we need to calculate
+ // sufficient statistics (for the i'th source sentence)
+ int sizeUnknown = unknownCands_V.size();
+ sizeUnknown_currIt[i] = sizeUnknown;
+
+ existingCandStats.clear();
+
+ } // for (i) each sentence
+
+ // ---------- end of merging candidates stats from previous iterations
+ // and finding new candidates ------------
+
+ /*
+ * int[][] newSuffStats = null; if (!statsCurrIt_exists && sizeUnknown > 0) { newSuffStats =
+ * evalMetric.suffStats(unknownCands, indices); }
+ */
+
+ outFile_statsMergedKnown.close();
+ outFile_unknownCands.close();
+ outFile_unknownIndices.close();
+
+ // want to re-open all temp files and start from scratch again?
+ for (int it = firstIt; it < iteration; ++it) // previous iterations temp files
+ {
+ inFile_sents[it].close();
+ inFile_stats[it].close();
+
+ InputStream inStream_sents, inStream_stats;
+ if (compressFiles == 0) {
+ inStream_sents = new FileInputStream(tmpDirPrefix + "temp.sents.it" + it);
+ inStream_stats = new FileInputStream(tmpDirPrefix + "temp.stats.it" + it);
+ } else {
+ inStream_sents = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.sents.it"
+ + it + ".gz"));
+ inStream_stats = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.stats.it"
+ + it + ".gz"));
+ }
+
+ inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
+ inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
+ }
+
+ inFile_sentsCurrIt.close();
+ // current iteration temp files
+ if (compressFiles == 0) {
+ inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration);
+ } else {
+ inStream_sentsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
+ + "temp.sents.it" + iteration + ".gz"));
+ }
+ inFile_sentsCurrIt = new BufferedReader(new InputStreamReader(inStream_sentsCurrIt, "utf8"));
+
+ // calculate SS for unseen candidates and write them to file
+ FileInputStream inStream_statsCurrIt_unknown = null;
+ BufferedReader inFile_statsCurrIt_unknown = null;
+
+ if (!statsCurrIt_exists && newCandidatesAdded[iteration] > 0) {
+ // create the file...
+ evalMetric.createSuffStatsFile(tmpDirPrefix + "temp.currIt.unknownCands", tmpDirPrefix
+ + "temp.currIt.unknownIndices", tmpDirPrefix + "temp.stats.unknown", sizeOfNBest);
+
+ // ...and open it
+ inStream_statsCurrIt_unknown = new FileInputStream(tmpDirPrefix + "temp.stats.unknown");
+ inFile_statsCurrIt_unknown = new BufferedReader(new InputStreamReader(
+ inStream_statsCurrIt_unknown, "utf8"));
+ }
+
+ // open mergedKnown file
+ // newly created by the big loop above
+ FileInputStream instream_statsMergedKnown = new FileInputStream(tmpDirPrefix
+ + "temp.stats.mergedKnown");
+ BufferedReader inFile_statsMergedKnown = new BufferedReader(new InputStreamReader(
+ instream_statsMergedKnown, "utf8"));
+
+ // num of features before observing new firing features from this iteration
+ numParamsOld = numParams;
+
+ for (int i = 0; i < numSentences; ++i) {
+ // reprocess candidates from previous iterations
+ for (int it = firstIt; it < iteration; ++it) {
+ for (int n = 0; n <= sizeOfNBest; ++n) {
+ sents_str = inFile_sents[it].readLine();
+ stats_str = inFile_stats[it].readLine();
+
+ if (sents_str.equals("||||||")) {
+ n = sizeOfNBest + 1;
+ } else if (!existingCandStats.containsKey(sents_str)) {
+ existingCandStats.put(sents_str, stats_str);
+ } // if unseen candidate
+ } // for (n)
+ } // for (it)
+
+ // copy relevant portion from mergedKnown to the merged file
+ String line_mergedKnown = inFile_statsMergedKnown.readLine();
+ while (!line_mergedKnown.equals("||||||")) {
+ outFile_statsMerged.println(line_mergedKnown);
+ line_mergedKnown = inFile_statsMergedKnown.readLine();
+ }
+
+ int[] stats = new int[suffStatsCount];
+
+ for (int n = 0; n <= sizeOfNBest; ++n) {
+ sents_str = inFile_sentsCurrIt.readLine();
+ feats_str = inFile_featsCurrIt.readLine();
+
+ if (sents_str.equals("||||||")) {
+ n = sizeOfNBest + 1;
+ } else if (!existingCandStats.containsKey(sents_str)) {
+
+ if (!statsCurrIt_exists) {
+ stats_str = inFile_statsCurrIt_unknown.readLine();
+
+ String[] temp_stats = stats_str.split("\\s+");
+ for (int s = 0; s < suffStatsCount; ++s) {
+ stats[s] = Integer.parseInt(temp_stats[s]);
+ }
+
+ outFile_statsCurrIt.println(stats_str);
+ } else {
+ stats_str = inFile_statsCurrIt.readLine();
+
+ String[] temp_stats = stats_str.split("\\s+");
+ for (int s = 0; s < suffStatsCount; ++s) {
+ stats[s] = Integer.parseInt(temp_stats[s]);
+ }
+ }
+
+ outFile_statsMerged.println(stats_str);
+
+ // save feats & stats
+ // System.out.println(sents_str+" "+feats_str);
+
+ feat_hash[i].put(sents_str, feats_str);
+ stats_hash[i].put(sents_str, stats_str);
+
+ featVal_str = feats_str.split("\\s+");
+
+ if (feats_str.indexOf('=') != -1) {
+ for (String featurePair : featVal_str) {
+ String[] pair = featurePair.split("=");
+ String name = pair[0];
+ Double value = Double.parseDouble(pair[1]);
+ int featId = Vocabulary.id(name);
+
+ // need to identify newly fired feats here
+ // in this case currFeatVal is not given the value
+ // of the new feat, since the corresponding weight is
+ // initialized as zero anyway
+ if (featId > numParams) {
+ ++numParams;
+ lambda.add(new Double(0));
+ }
+ }
+ }
+ existingCandStats.put(sents_str, stats_str);
+ candCount[i] += 1;
+
+ // newCandidatesAdded[iteration] += 1;
+ // moved to code above detecting new candidates
+ } else {
+ if (statsCurrIt_exists)
+ inFile_statsCurrIt.readLine();
+ else {
+ // write SS to outFile_statsCurrIt
+ stats_str = existingCandStats.get(sents_str);
+ outFile_statsCurrIt.println(stats_str);
+ }
+ }
+
+ } // for (n)
+
+ // now d = sizeUnknown_currIt[i] - 1
+
+ if (statsCurrIt_exists)
+ inFile_statsCurrIt.readLine();
+ else
+ outFile_statsCurrIt.println("||||||");
+
+ existingCandStats.clear();
+ totalCandidateCount += candCount[i];
+
+ // output sentence progress
+ if ((i + 1) % 500 == 0) {
+ print((i + 1) + "\n" + " ", 1);
+ } else if ((i + 1) % 100 == 0) {
+ print("+", 1);
+ } else if ((i + 1) % 25 == 0) {
+ print(".", 1);
+ }
+
+ } // for (i)
+
+ inFile_statsMergedKnown.close();
+ outFile_statsMerged.close();
+
+ // for testing
+ /*
+ * int total_sent = 0; for( int i=0; i<numSentences; i++ ) {
+ * System.out.println(feat_hash[i].size()+" "+candCount[i]); total_sent +=
+ * feat_hash[i].size(); feat_hash[i].clear(); }
+ * System.out.println("----------------total sent: "+total_sent); total_sent = 0; for( int
+ * i=0; i<numSentences; i++ ) { System.out.println(stats_hash[i].size()+" "+candCount[i]);
+ * total_sent += stats_hash[i].size(); stats_hash[i].clear(); }
+ * System.out.println("*****************total sent: "+total_sent);
+ */
+
+ println("", 1); // finish progress line
+
+ for (int it = firstIt; it < iteration; ++it) {
+ inFile_sents[it].close();
+ inFile_feats[it].close();
+ inFile_stats[it].close();
+ }
+
+ inFile_sentsCurrIt.close();
+ inFile_featsCurrIt.close();
+ if (statsCurrIt_exists)
+ inFile_statsCurrIt.close();
+ else
+ outFile_statsCurrIt.close();
+
+ if (compressFiles == 1 && !statsCurrIt_exists) {
+ gzipFile(tmpDirPrefix + "temp.stats.it" + iteration);
+ }
+
+ // clear temp files
+ deleteFile(tmpDirPrefix + "temp.currIt.unknownCands");
+ deleteFile(tmpDirPrefix + "temp.currIt.unknownIndices");
+ deleteFile(tmpDirPrefix + "temp.stats.unknown");
+ deleteFile(tmpDirPrefix + "temp.stats.mergedKnown");
+
+ // cleanupMemory();
+
+ println("Processed " + totalCandidateCount + " distinct candidates " + "(about "
+ + totalCandidateCount / numSentences + " per sentence):", 1);
+ for (int it = firstIt; it <= iteration; ++it) {
+ println("newCandidatesAdded[it=" + it + "] = " + newCandidatesAdded[it] + " (about "
+ + newCandidatesAdded[it] / numSentences + " per sentence)", 1);
+ }
+
+ println("", 1);
+
+ println("Number of features observed so far: " + numParams);
+ println("", 1);
+
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.run_single_iteration(6): "
+ + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.run_single_iteration(6): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ // n-best list converges
+ if (newCandidatesAdded[iteration] == 0) {
+ if (!oneModificationPerIteration) {
+ println("No new candidates added in this iteration; exiting AdaGrad.", 1);
+ println("", 1);
+ println("--- AdaGrad iteration #" + iteration + " ending @ " + (new Date()) + " ---", 1);
+ println("", 1);
+ deleteFile(tmpDirPrefix + "temp.stats.merged");
+
+ if (returnBest) {
+ // note that bestLambda.size() <= lambda.size()
+ for (int p = 1; p < bestLambda.size(); ++p)
+ lambda.set(p, bestLambda.get(p));
+ // and set the rest of lambda to be 0
+ for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
+ lambda.set(p + bestLambda.size(), new Double(0));
+ }
+
+ return null; // this means that the old values should be kept by the caller
+ } else {
+ println("Note: No new candidates added in this iteration.", 1);
+ }
+ }
+
+ /************* start optimization **************/
+
+ /*
+ * for( int v=1; v<initialLambda[1].length; v++ ) System.out.print(initialLambda[1][v]+" ");
+ * System.exit(0);
+ */
+
+ Optimizer.sentNum = numSentences; // total number of training sentences
+ Optimizer.needShuffle = needShuffle;
+ Optimizer.adagradIter = adagradIter;
+ Optimizer.oraSelectMode = oraSelectMode;
+ Optimizer.predSelectMode = predSelectMode;
+ Optimizer.needAvg = needAvg;
+ // Optimizer.sentForScale = sentForScale;
+ Optimizer.scoreRatio = scoreRatio;
+ Optimizer.evalMetric = evalMetric;
+ Optimizer.normalizationOptions = normalizationOptions;
+ Optimizer.needScale = needScale;
+ Optimizer.regularization = regularization;
+ Optimizer.batchSize = batchSize;
+ Optimizer.eta = eta;
+ Optimizer.lam = lam;
+
+ // if need to use bleu stats history
+ if (iteration == 1) {
+ if (evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
+ Optimizer.initBleuHistory(numSentences, evalMetric.get_suffStatsCount());
+ Optimizer.usePseudoBleu = usePseudoBleu;
+ Optimizer.R = R;
+ }
+ if (evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
+ Optimizer.initBleuHistory(numSentences, evalMetric.get_suffStatsCount() - 2); // Stats
+ // count of
+ // TER=2
+ Optimizer.usePseudoBleu = usePseudoBleu;
+ Optimizer.R = R;
+ }
+ }
+
+ Vector<String> output = new Vector<String>();
+
+ // note: initialLambda[] has length = numParamsOld
+ // augmented with new feature weights, initial values are 0
+ double[] initialLambdaNew = new double[1 + numParams];
+ System.arraycopy(initialLambda, 1, initialLambdaNew, 1, numParamsOld);
+
+ // finalLambda[] has length = numParams (considering new features)
+ double[] finalLambda = new double[1 + numParams];
+
+ Optimizer opt = new Optimizer(output, isOptimizable, initialLambdaNew, feat_hash, stats_hash);
+ finalLambda = opt.runOptimizer();
+
+ if (returnBest) {
+ double metricScore = opt.getMetricScore();
+ if (!evalMetric.getToBeMinimized()) {
+ if (metricScore > prevMetricScore) {
+ prevMetricScore = metricScore;
+ for (int p = 1; p < bestLambda.size(); ++p)
+ bestLambda.set(p, finalLambda[p]);
+ if (1 + numParams > bestLambda.size()) {
+ for (int p = bestLambda.size(); p <= numParams; ++p)
+ bestLambda.add(p, finalLambda[p]);
+ }
+ }
+ } else {
+ if (metricScore < prevMetricScore) {
+ prevMetricScore = metricScore;
+ for (int p = 1; p < bestLambda.size(); ++p)
+ bestLambda.set(p, finalLambda[p]);
+ if (1 + numParams > bestLambda.size()) {
+ for (int p = bestLambda.size(); p <= numParams; ++p)
+ bestLambda.add(p, finalLambda[p]);
+ }
+ }
+ }
+ }
+
+ // System.out.println(finalLambda.length);
+ // for( int i=0; i<finalLambda.length-1; i++ )
+ // System.out.println(finalLambda[i+1]);
+
+ /************* end optimization **************/
+
+ for (int i = 0; i < output.size(); i++)
+ println(output.get(i));
+
+ // check if any parameter has been updated
+ boolean anyParamChanged = false;
+ boolean anyParamChangedSignificantly = false;
+
+ for (int c = 1; c <= numParams; ++c) {
+ if (finalLambda[c] != lambda.get(c)) {
+ anyParamChanged = true;
+ }
+ if (Math.abs(finalLambda[c] - lambda.get(c)) > stopSigValue) {
+ anyParamChangedSignificantly = true;
+ }
+ }
+
+ // System.arraycopy(finalLambda,1,lambda,1,numParams);
+
+ println("--- AdaGrad iteration #" + iteration + " ending @ " + (new Date()) + " ---", 1);
+ println("", 1);
+
+ if (!anyParamChanged) {
+ println("No parameter value changed in this iteration; exiting AdaGrad.", 1);
+ println("", 1);
+ break; // exit for (iteration) loop preemptively
+ }
+
+ // was an early stopping criterion satisfied?
+ boolean critSatisfied = false;
+ if (!anyParamChangedSignificantly && stopSigValue >= 0) {
+ println("Note: No parameter value changed significantly " + "(i.e. by more than "
+ + stopSigValue + ") in this iteration.", 1);
+ critSatisfied = true;
+ }
+
+ if (critSatisfied) {
+ ++earlyStop;
+ println("", 1);
+ } else {
+ earlyStop = 0;
+ }
+
+ // if min number of iterations executed, investigate if early exit should happen
+ if (iteration >= minIts && earlyStop >= stopMinIts) {
+ println("Some early stopping criteria has been observed " + "in " + stopMinIts
+ + " consecutive iterations; exiting AdaGrad.", 1);
+ println("", 1);
+
+ if (returnBest) {
+ for (int f = 1; f <= bestLambda.size() - 1; ++f)
+ lambda.set(f, bestLambda.get(f));
+ } else {
+ for (int f = 1; f <= numParams; ++f)
+ lambda.set(f, finalLambda[f]);
+ }
+
+ break; // exit for (iteration) loop preemptively
+ }
+
+ // if max number of iterations executed, exit
+ if (iteration >= maxIts) {
+ println("Maximum number of AdaGrad iterations reached; exiting AdaGrad.", 1);
+ println("", 1);
+
+ if (returnBest) {
+ for (int f = 1; f <= bestLambda.size() - 1; ++f)
+ lambda.set(f, bestLambda.get(f));
+ } else {
+ for (int f = 1; f <= numParams; ++f)
+ lambda.set(f, finalLambda[f]);
+ }
+
+ break; // exit for (iteration) loop
+ }
+
+ // use the new wt vector to decode the next iteration
+ // (interpolation with previous wt vector)
+ double interCoef = 1.0; // no interpolation for now
+ for (int i = 1; i <= numParams; i++)
+ lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
+
+ println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
+ println("", 1);
+
+ // printMemoryUsage();
+ for (int i = 0; i < numSentences; ++i) {
+ suffStats_array[i].clear();
+ }
+ // cleanupMemory();
+ // println("",2);
+
+ retA[2] = 0; // i.e. this should NOT be the last iteration
+ done = true;
+
+ } // while (!done) // NOTE: this "loop" will only be carried out once
+
+ // delete .temp.stats.merged file, since it is not needed in the next
+ // iteration (it will be recreated from scratch)
+ deleteFile(tmpDirPrefix + "temp.stats.merged");
+
+ retA[0] = FINAL_score;
+ retA[1] = earlyStop;
+ return retA;
+
+ } // run_single_iteration
+
+ private String lambdaToString(ArrayList<Double> lambdaA) {
+ String retStr = "{";
+ int featToPrint = numParams > 15 ? 15 : numParams;
+ // print at most the first 15 features
+
+ retStr += "(listing the first " + featToPrint + " lambdas)";
+ for (int c = 1; c <= featToPrint - 1; ++c) {
+ retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
+ }
+ retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
+
+ return retStr;
+ }
+
+ private String[] run_decoder(int iteration) {
+ String[] retSA = new String[2];
+
+ // retsa saves the output file name(nbest-file)
+ // and the decoder type
+
+ // [0] name of file to be processed
+ // [1] indicates how the output file was obtained:
+ // 1: external decoder
+ // 2: fake decoder
+ // 3: internal decoder
+
+ // use fake decoder
+ if (fakeFileNameTemplate != null
+ && fileExists(fakeFileNamePrefix + iteration + fakeFileNameSuffix)) {
+ String fakeFileName = fakeFileNamePrefix + iteration + fakeFileNameSuffix;
+ println("Not running decoder; using " + fakeFileName + " instead.", 1);
+ /*
+ * if (fakeFileName.endsWith(".gz")) { copyFile(fakeFileName,decoderOutFileName+".gz");
+ * gunzipFile(decoderOutFileName+".gz"); } else { copyFile(fakeFileName,decoderOutFileName); }
+ */
+ retSA[0] = fakeFileName;
+ retSA[1] = "2";
+
+ } else {
+ println("Running external decoder...", 1);
+
+ try {
+ ArrayList<String> cmd = new ArrayList<String>();
+ cmd.add(decoderCommandFileName);
+
+ if (passIterationToDecoder)
+ cmd.add(Integer.toString(iteration));
+
+ ProcessBuilder pb = new ProcessBuilder(cmd);
+ // this merges the error and output streams of the subprocess
+ pb.redirectErrorStream(true);
+ Process p = pb.start();
+
+ // capture the sub-command's output
+ new StreamGobbler(p.getInputStream(), decVerbosity).start();
+
+ int decStatus = p.waitFor();
+ if (decStatus != validDecoderExitValue) {
+ println("Call to decoder returned " + decStatus + "; was expecting "
+ + validDecoderExitValue + ".");
+ System.exit(30);
+ }
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.run_decoder(int): " + e.getMessage());
+ System.exit(99902);
+ } catch (InterruptedException e) {
+ System.err.println("InterruptedException in AdaGradCore.run_decoder(int): "
+ + e.getMessage());
+ System.exit(99903);
+ }
+
+ retSA[0] = decoderOutFileName;
+ retSA[1] = "1";
+
+ }
+
+ return retSA;
+ }
+
+ private void produceTempFiles(String nbestFileName, int iteration) {
+ try {
+ String sentsFileName = tmpDirPrefix + "temp.sents.it" + iteration;
+ String featsFileName = tmpDirPrefix + "temp.feats.it" + iteration;
+
+ FileOutputStream outStream_sents = new FileOutputStream(sentsFileName, false);
+ OutputStreamWriter outStreamWriter_sents = new OutputStreamWriter(outStream_sents, "utf8");
+ BufferedWriter outFile_sents = new BufferedWriter(outStreamWriter_sents);
+
+ PrintWriter outFile_feats = new PrintWriter(featsFileName);
+
+ InputStream inStream_nbest = null;
+ if (nbestFileName.endsWith(".gz")) {
+ inStream_nbest = new GZIPInputStream(new FileInputStream(nbestFileName));
+ } else {
+ inStream_nbest = new FileInputStream(nbestFileName);
+ }
+ BufferedReader inFile_nbest = new BufferedReader(
+ new InputStreamReader(inStream_nbest, "utf8"));
+
+ String line; // , prevLine;
+ String candidate_str = "";
+ String feats_str = "";
+
+ int i = 0;
+ int n = 0;
+ line = inFile_nbest.readLine();
+
+ while (line != null) {
+
+ /*
+ * line format:
+ *
+ * i ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val
+ * .*
+ */
+
+ // in a well formed file, we'd find the nth candidate for the ith sentence
+
+ int read_i = Integer.parseInt((line.substring(0, line.indexOf("|||"))).trim());
+
+ if (read_i != i) {
+ writeLine("||||||", outFile_sents);
+ outFile_feats.println("||||||");
+ n = 0;
+ ++i;
+ }
+
+ line = (line.substring(line.indexOf("|||") + 3)).trim(); // get rid of initial text
+
+ candidate_str = (line.substring(0, line.indexOf("|||"))).trim();
+ feats_str = (line.substring(line.indexOf("|||") + 3)).trim();
+ // get rid of candidate string
+
+ int junk_i = feats_str.indexOf("|||");
+ if (junk_i >= 0) {
+ feats_str = (feats_str.substring(0, junk_i)).trim();
+ }
+
+ writeLine(normalize(candidate_str, textNormMethod), outFile_sents);
+ outFile_feats.println(feats_str);
+
+ ++n;
+ if (n == sizeOfNBest) {
+ writeLine("||||||", outFile_sents);
+ outFile_feats.println("||||||");
+ n = 0;
+ ++i;
+ }
+
+ line = inFile_nbest.readLine();
+ }
+
+ if (i != numSentences) { // last sentence had too few candidates
+ writeLine("||||||", outFile_sents);
+ outFile_feats.println("||||||");
+ }
+
+ inFile_nbest.close();
+ outFile_sents.close();
+ outFile_feats.close();
+
+ if (compressFiles == 1) {
+ gzipFile(sentsFileName);
+ gzipFile(featsFileName);
+ }
+
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.produceTempFiles(int): "
+ + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.produceTempFiles(int): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ }
+
+ private void createConfigFile(ArrayList<Double> params, String cfgFileName,
+ String templateFileName) {
+ try {
+ // i.e. create cfgFileName, which is similar to templateFileName, but with
+ // params[] as parameter values
+
+ BufferedReader inFile = new BufferedReader(new FileReader(templateFileName));
+ PrintWriter outFile = new PrintWriter(cfgFileName);
+
+ BufferedReader inFeatDefFile = null;
+ PrintWriter outFeatDefFile = null;
+ int origFeatNum = 0; // feat num in the template file
+
+ String line = inFile.readLine();
+ while (line != null) {
+ int c_match = -1;
+ for (int c = 1; c <= numParams; ++c) {
+ if (line.startsWith(Vocabulary.word(c) + " ")) {
+ c_match = c;
+ ++origFeatNum;
+ break;
+ }
+ }
+
+ if (c_match == -1) {
+ outFile.println(line);
+ } else {
+ if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
+ outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
+ }
+
+ line = inFile.readLine();
+ }
+
+ // now append weights of new features
+ for (int c = origFeatNum + 1; c <= numParams; ++c) {
+ if (Math.abs(params.get(c).doubleValue()) > 1e-20)
+ outFile.println(Vocabulary.word(c) + " " + params.get(c));
+ }
+
+ inFile.close();
+ outFile.close();
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.createConfigFile(double[],String,String): "
+ + e.getMessage());
+ System.exit(99902);
+ }
+ }
+
+ private void processParamFile() {
+ // process parameter file
+ Scanner inFile_init = null;
+ try {
+ inFile_init = new Scanner(new FileReader(paramsFileName));
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.processParamFile(): "
+ + e.getMessage());
+ System.exit(99901);
+ }
+
+ String dummy = "";
+
+ // initialize lambda[] and other related arrays
+ for (int c = 1; c <= numParams; ++c) {
+ // skip parameter name
+ while (!dummy.equals("|||")) {
+ dummy = inFile_init.next();
+ }
+
+ // read default value
+ lambda.set(c, inFile_init.nextDouble());
+ defaultLambda[c] = lambda.get(c).doubleValue();
+
+ // read isOptimizable
+ dummy = inFile_init.next();
+ if (dummy.equals("Opt")) {
+ isOptimizable[c] = true;
+ } else if (dummy.equals("Fix")) {
+ isOptimizable[c] = false;
+ } else {
+ println("Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
+ System.exit(21);
+ }
+
+ if (!isOptimizable[c]) { // skip next two values
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
+ } else {
+ // the next two values are not used, only to be consistent with ZMERT's params file format
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
+ // set minRandValue[c] and maxRandValue[c] (range for random values)
+ dummy = inFile_init.next();
+ if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
+ println("minRandValue[" + c + "] cannot be -Inf or +Inf!");
+ System.exit(21);
+ } else {
+ minRandValue[c] = Double.parseDouble(dummy);
+ }
+
+ dummy = inFile_init.next();
+ if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
+ println("maxRandValue[" + c + "] cannot be -Inf or +Inf!");
+ System.exit(21);
+ } else {
+ maxRandValue[c] = Double.parseDouble(dummy);
+ }
+
+ // check for illogical values
+ if (minRandValue[c] > maxRandValue[c]) {
+ println("minRandValue[" + c + "]=" + minRandValue[c] + " > " + maxRandValue[c]
+ + "=maxRandValue[" + c + "]!");
+ System.exit(21);
+ }
+
+ // check for odd values
+ if (minRandValue[c] == maxRandValue[c]) {
+ println("Warning: lambda[" + c + "] has " + "minRandValue = maxRandValue = "
+ + minRandValue[c] + ".", 1);
+ }
+ } // if (!isOptimizable[c])
+
+ /*
+ * precision[c] = inFile_init.nextDouble(); if (precision[c] < 0) { println("precision[" + c +
+ * "]=" + precision[c] + " < 0! Must be non-negative."); System.exit(21); }
+ */
+
+ }
+
+ // set normalizationOptions[]
+ String origLine = "";
+ while (origLine != null && origLine.length() == 0) {
+ origLine = inFile_init.nextLine();
+ }
+
+ // How should a lambda[] vector be normalized (before decoding)?
+ // nO[0] = 0: no normalization
+ // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
+ // nO[0] = 2: scale so that the maximum absolute value is nO[1]
+ // nO[0] = 3: scale so that the minimum absolute value is nO[1]
+ // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
+
+ // normalization = none
+ // normalization = absval 1 lm
+ // normalization = maxabsval 1
+ // normalization = minabsval 1
+ // normalization = LNorm 2 1
+
+ dummy = (origLine.substring(origLine.indexOf("=") + 1)).trim();
+ String[] dummyA = dummy.split("\\s+");
+
+ if (dummyA[0].equals("none")) {
+ normalizationOptions[0] = 0;
+ } else if (dummyA[0].equals("absval")) {
+ normalizationOptions[0] = 1;
+ normalizationOptions[1] = Double.parseDouble(dummyA[1]);
+ String pName = dummyA[2];
+ for (int i = 3; i < dummyA.length; ++i) { // in case parameter name has multiple words
+ pName = pName + " " + dummyA[i];
+ }
+ normalizationOptions[2] = Vocabulary.id(pName);
+
+ if (normalizationOptions[1] <= 0) {
+ println("Value for the absval normalization method must be positive.");
+ System.exit(21);
+ }
+ if (normalizationOptions[2] == 0) {
+ println("Unrecognized feature name " + normalizationOptions[2]
+ + " for absval normalization method.", 1);
+ System.exit(21);
+ }
+ } else if (dummyA[0].equals("maxabsval")) {
+ normalizationOptions[0] = 2;
+ normalizationOptions[1] = Double.parseDouble(dummyA[1]);
+ if (normalizationOptions[1] <= 0) {
+ println("Value for the maxabsval normalization method must be positive.");
+ System.exit(21);
+ }
+ } else if (dummyA[0].equals("minabsval")) {
+ normalizationOptions[0] = 3;
+ normalizationOptions[1] = Double.parseDouble(dummyA[1]);
+ if (normalizationOptions[1] <= 0) {
+ println("Value for the minabsval normalization method must be positive.");
+ System.exit(21);
+ }
+ } else if (dummyA[0].equals("LNorm")) {
+ normalizationOptions[0] = 4;
+ normalizationOptions[1] = Double.parseDouble(dummyA[1]);
+ normalizationOptions[2] = Double.parseDouble(dummyA[2]);
+ if (normalizationOptions[1] <= 0 || normalizationOptions[2] <= 0) {
+ println("Both values for the LNorm normalization method must be positive.");
+ System.exit(21);
+ }
+ } else {
+ println("Unrecognized normalization method " + dummyA[0] + "; "
+ + "must be one of none, absval, maxabsval, and LNorm.");
+ System.exit(21);
+ } // if (dummyA[0])
+
+ inFile_init.close();
+ } // processParamFile()
+
+ private void processDocInfo() {
+ // sets numDocuments and docOfSentence[]
+ docOfSentence = new int[numSentences];
+
+ if (docInfoFileName == null) {
+ for (int i = 0; i < numSentences; ++i)
+ docOfSentence[i] = 0;
+ numDocuments = 1;
+ } else {
+
+ try {
+
+ // 4 possible formats:
+ // 1) List of numbers, one per document, indicating # sentences in each document.
+ // 2) List of "docName size" pairs, one per document, indicating name of document and #
+ // sentences.
+ // 3) List of docName's, one per sentence, indicating which doument each sentence belongs
+ // to.
+ // 4) List of docName_number's, one per sentence, indicating which doument each sentence
+ // belongs to,
+ // and its order in that document. (can also use '-' instead of '_')
+
+ int docInfoSize = countNonEmptyLines(docInfoFileName);
+
+ if (docInfoSize < numSentences) { // format #1 or #2
+ numDocuments = docInfoSize;
+ int i = 0;
+
+ BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
+ String line = inFile.readLine();
+ boolean format1 = (!(line.contains(" ")));
+
+ for (int doc = 0; doc < numDocuments; ++doc) {
+
+ if (doc != 0)
+ line = inFile.readLine();
+
+ int docSize = 0;
+ if (format1) {
+ docSize = Integer.parseInt(line);
+ } else {
+ docSize = Integer.parseInt(line.split("\\s+")[1]);
+ }
+
+ for (int i2 = 1; i2 <= docSize; ++i2) {
+ docOfSentence[i] = doc;
+ ++i;
+ }
+
+ }
+
+ // now i == numSentences
+
+ inFile.close();
+
+ } else if (docInfoSize == numSentences) { // format #3 or #4
+
+ boolean format3 = false;
+
+ HashSet<String> seenStrings = new HashSet<String>();
+ BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
+ for (int i = 0; i < numSentences; ++i) {
+ // set format3 = true if a duplicate is found
+ String line = inFile.readLine();
+ if (seenStrings.contains(line))
+ format3 = true;
+ seenStrings.add(line);
+ }
+
+ inFile.close();
+
+ HashSet<String> seenDocNames = new HashSet<String>();
+ HashMap<String, Integer> docOrder = new HashMap<String, Integer>();
+ // maps a document name to the order (0-indexed) in which it was seen
+
+ inFile = new BufferedReader(new FileReader(docInfoFileName));
+ for (int i = 0; i < numSentences; ++i) {
+ String line = inFile.readLine();
+
+ String docName = "";
+ if (format3) {
+ docName = line;
+ } else {
+ int sep_i = Math.max(line.lastIndexOf('_'), line.lastIndexOf('-'));
+ docName = line.substring(0, sep_i);
+ }
+
+ if (!seenDocNames.contains(docName)) {
+ seenDocNames.add(docName);
+ docOrder.put(docName, seenDocNames.size() - 1);
+ }
+
+ int docOrder_i = docOrder.get(docName);
+
+ docOfSentence[i] = docOrder_i;
+
+ }
+
+ inFile.close();
+
+ numDocuments = seenDocNames.size();
+
+ } else { // badly formatted
+
+ }
+
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.processDocInfo(): "
+ + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.processDocInfo(): " + e.getMessage());
+ System.exit(99902);
+ }
+ }
+
+ }
+
+ private boolean copyFile(String origFileName, String newFileName) {
+ try {
+ File inputFile = new File(origFileName);
+ File outputFile = new File(newFileName);
+
+ InputStream in = new FileInputStream(inputFile);
+ OutputStream out = new FileOutputStream(outputFile);
+
+ byte[] buffer = new byte[1024];
+ int len;
+ while ((len = in.read(buffer)) > 0) {
+ out.write(buffer, 0, len);
+ }
+ in.close();
+ out.close();
+
+ /*
+ * InputStream inStream = new FileInputStream(new File(origFileName)); BufferedReader inFile =
+ * new BufferedReader(new InputStreamReader(inStream, "utf8"));
+ *
+ * FileOutputStream outStream = new FileOutputStream(newFileName, false); OutputStreamWriter
+ * outStreamWriter = new OutputStreamWriter(outStream, "utf8"); BufferedWriter outFile = new
+ * BufferedWriter(outStreamWriter);
+ *
+ * String line; while(inFile.ready()) { line = inFile.readLine(); writeLine(line, outFile); }
+ *
+ * inFile.close(); outFile.close();
+ */
+ return true;
+ } catch (FileNotFoundException e) {
+ System.err.println("FileNotFoundException in AdaGradCore.copyFile(String,String): "
+ + e.getMessage());
+ return false;
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.copyFile(String,String): " + e.getMessage());
+ return false;
+ }
+ }
+
+ private void renameFile(String origFileName, String newFileName) {
+ if (fileExists(origFileName)) {
+ deleteFile(newFileName);
+ File oldFile = new File(origFileName);
+ File newFile = new File(newFileName);
+ if (!oldFile.renameTo(newFile)) {
+ println("Warning: attempt to rename " + origFileName + " to " + newFileName
+ + " was unsuccessful!", 1);
+ }
+ } else {
+ println("Warning: file " + origFileName + " does not exist! (in AdaGradCore.renameFile)", 1);
+ }
+ }
+
+ private void deleteFile(String fileName) {
+ if (fileExists(fileName)) {
+ File fd = new File(fileName);
+ if (!fd.delete()) {
+ println("Warning: attempt to delete " + fileName + " was unsuccessful!", 1);
+ }
+ }
+ }
+
+ private void writeLine(String line, BufferedWriter writer) throws IOException {
+ writer.write(line, 0, line.length());
+ writer.newLine();
+ writer.flush();
+ }
+
+ // need to re-write to handle different forms of lambda
+ public void finish() {
+ if (myDecoder != null) {
+ myDecoder.cleanUp();
+ }
+
+ // create config file with final values
+ createConfigFile(lambda, decoderConfigFileName + ".AdaGrad.final", decoderConfigFileName
+ + ".AdaGrad.orig");
+
+ // delete current decoder config file and decoder output
+ deleteFile(decoderConfigFileName);
+ deleteFile(decoderOutFileName);
+
+ // restore original name for config file (name was changed
+ // in initialize() so it doesn't get overwritten)
+ renameFile(decoderConfigFileName + ".AdaGrad.orig", decoderConfigFileName);
+
+ if (finalLambdaFileName != null) {
+ try {
+ PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
+ for (int c = 1; c <= numParams; ++c) {
+ outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
+ }
+ outFile_lambdas.close();
+
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.finish(): " + e.getMessage());
+ System.exit(99902);
+ }
+ }
+
+ }
+
+ private String[] cfgFileToArgsArray(String fileName) {
+ checkFile(fileName);
+
+ Vector<String> argsVector = new Vector<String>();
+
+ BufferedReader inFile = null;
+ try {
+ inFile = new BufferedReader(new FileReader(fileName));
+ String line, origLine;
+ do {
+ line = inFile.readLine();
+ origLine = line; // for error reporting purposes
+
+ if (line != null && line.length() > 0 && line.charAt(0) != '#') {
+
+ if (line.indexOf("#") != -1) { // discard comment
+ line = line.substring(0, line.indexOf("#"));
+ }
+
+ line = line.trim();
+
+ // now line should look like "-xxx XXX"
+
+ /*
+ * OBSOLETE MODIFICATION //SPECIAL HANDLING FOR AdaGrad CLASSIFIER PARAMETERS String[]
+ * paramA = line.split("\\s+");
+ *
+ * if( paramA[0].equals("-classifierParams") ) { String classifierParam = ""; for(int p=1;
+ * p<=paramA.length-1; p++) classifierParam += paramA[p]+" ";
+ *
+ * if(paramA.length>=2) { String[] tmpParamA = new String[2]; tmpParamA[0] = paramA[0];
+ * tmpParamA[1] = classifierParam; paramA = tmpParamA; } else {
+ * println("Malformed line in config file:"); println(origLine); System.exit(70); } }//END
+ * MODIFICATION
+ */
+
+ // cmu modification(from meteor for zmert)
+ // Parse args
+ ArrayList<String> argList = new ArrayList<String>();
+ StringBuilder arg = new StringBuilder();
+ boolean quoted = false;
+ for (int i = 0; i < line.length(); i++) {
+ if (Character.isWhitespace(line.charAt(i))) {
+ if (quoted)
+ arg.append(line.charAt(i));
+ else if (arg.length() > 0) {
+ argList.add(arg.toString());
+ arg = new StringBuilder();
+ }
+ } else if (line.charAt(i) == '\'') {
+ if (quoted) {
+ argList.add(arg.toString());
+ arg = new StringBuilder();
+ }
+ quoted = !quoted;
+ } else
+ arg.append(line.charAt(i));
+ }
+ if (arg.length() > 0)
+ argList.add(arg.toString());
+ // Create paramA
+ String[] paramA = new String[argList.size()];
+ for (int i = 0; i < paramA.length; paramA[i] = argList.get(i++))
+ ;
+ // END CMU MODIFICATION
+
+ if (paramA.length == 2 && paramA[0].charAt(0) == '-') {
+ argsVector.add(paramA[0]);
+ argsVector.add(paramA[1]);
+ } else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
+ // -m (metricName), -docSet are allowed to have extra optinos
+ for (int opt = 0; opt < paramA.length; ++opt) {
+ argsVector.add(paramA[opt]);
+ }
+ } else {
+ println("Malformed line in config file:");
+ println(origLine);
+ System.exit(70);
+ }
+
+ }
+ } while (line != null);
+
+ inFile.close();
+ } catch (FileNotFoundException e) {
+ println("AdaGrad configuration file " + fileName + " was not found!");
+ System.err.println("FileNotFoundException in AdaGradCore.cfgFileToArgsArray(String): "
+ + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err
+ .println("IOException in AdaGradCore.cfgFileToArgsArray(String): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ String[] argsArray = new String[argsVector.size()];
+
+ for (int i = 0; i < argsVector.size(); ++i) {
+ argsArray[i] = argsVector.elementAt(i);
+ }
+
+ return argsArray;
+ }
+
+ private void processArgsArray(String[] args) {
+ processArgsArray(args, true);
+ }
+
+ private void processArgsArray(String[] args, boolean firstTime) {
+ /* set default values */
+ // Relevant files
+ dirPrefix = null;
+ sourceFileName = null;
+ refFileName = "reference.txt";
+ refsPerSen = 1;
+ textNormMethod = 1;
+ paramsFileName = "params.txt";
+ docInfoFileName = null;
+ finalLambdaFileName = null;
+ // MERT specs
+ metricName = "BLEU";
+ metricName_display = metricName;
+ metricOptions = new String[2];
+ metricOptions[0] = "4";
+ metricOptions[1] = "closest";
+ docSubsetInfo = new int[7];
+ docSubsetInfo[0] = 0;
+ maxMERTIterations = 20;
+ prevMERTIterations = 20;
+ minMERTIterations = 5;
+ stopMinIts = 3;
+ stopSigValue = -1;
+ //
+ // /* possibly other early stopping criteria here */
+ //
+ numOptThreads = 1;
+ saveInterFiles = 3;
+ compressFiles = 0;
+ oneModificationPerIteration = false;
+ randInit = false;
+ seed = System.currentTimeMillis();
+ // useDisk = 2;
+ // Decoder specs
+ decoderCommandFileName = null;
+ passIterationToDecoder = false;
+ decoderOutFileName = "output.nbest";
+ validDecoderExitValue = 0;
+ decoderConfigFileName = "dec_cfg.txt";
+ sizeOfNBest = 100;
+ fakeFileNameTemplate = null;
+ fakeFileNamePrefix = null;
+ fakeFileNameSuffix = null;
+ // Output specs
+ verbosity = 1;
+ decVerbosity = 0;
+
+ int i = 0;
+
+ while (i < args.length) {
+ String option = args[i];
+ // Relevant files
+ if (option.equals("-dir")) {
+ dirPrefix = args[i + 1];
+ } else if (option.equals("-s")) {
+ sourceFileName = args[i + 1];
+ } else if (option.equals("-r")) {
+ refFileName = args[i + 1];
+ } else if (option.equals("-rps")) {
+ refsPerSen = Integer.parseInt(args[i + 1]);
+ if (refsPerSen < 1) {
+ println("refsPerSen must be positive.");
+ System.exit(10);
+ }
+ } else if (option.equals("-txtNrm")) {
+ textNormMethod = Integer.parseInt(args[i + 1]);
+ if (textNormMethod < 0 || textNormMethod > 4) {
+ println("textNormMethod should be between 0 and 4");
+ System.exit(10);
+ }
+ } else if (option.equals("-p")) {
+ paramsFileName = args[i + 1];
+ } else if (option.equals("-docInfo")) {
+ docInfoFileName = args[i + 1];
+ } else if (option.equals("-fin")) {
+ finalLambdaFileName = args[i + 1];
+ // MERT specs
+ } else if (option.equals("-m")) {
+ metricName = args[i + 1];
+ metricName_display = metricName;
+ if (EvaluationMetric.knownMetricName(metricName)) {
+ int optionCount = EvaluationMetric.metricOptionCount(metricName);
+ metricOptions = new String[optionCount];
+ for (int opt = 0; opt < optionCount; ++opt) {
+ metricOptions[opt] = args[i + opt + 2];
+ }
+ i += optionCount;
+ } else {
+ println("Unknown metric name " + metricName + ".");
+ System.exit(10);
+ }
+ } else if (option.equals("-docSet")) {
+ String method = args[i + 1];
+
+ if (method.equals("all")) {
+ docSubsetInfo[0] = 0;
+ i += 0;
+ } else if (method.equals("bottom")) {
+ String a = args[i + 2];
+ if (a.endsWith("d")) {
+ docSubsetInfo[0] = 1;
+ a = a.substring(0, a.indexOf("d"));
+ } else {
+ docSubsetInfo[0] = 2;
+ a = a.substring(0, a.indexOf("%"));
+ }
+ docSubsetInfo[5] = Integer.parseInt(a);
+ i += 1;
+ } else if (method.equals("top")) {
+ String a = args[i + 2];
+ if (a.endsWith("d")) {
+ docSubsetInfo[0] = 3;
+ a = a.substring(0, a.indexOf("d"));
+ } else {
+ docSubsetInfo[0] = 4;
+ a = a.substring(0, a.indexOf("%"));
+ }
+ docSubsetInfo[5] = Integer.parseInt(a);
+ i += 1;
+ } else if (method.equals("window")) {
+ String a1 = args[i + 2];
+ a1 = a1.substring(0, a1.indexOf("d")); // size of window
+ String a2 = args[i + 4];
+ if (a2.indexOf("p") > 0) {
+ docSubsetInfo[0] = 5;
+ a2 = a2.substring(0, a2.indexOf("p"));
+ } else {
+ docSubsetInfo[0] = 6;
+ a2 = a2.substring(0, a2.indexOf("r"));
+ }
+ docSubsetInfo[5] = Integer.parseInt(a1);
+ docSubsetInfo[6] = Integer.parseInt(a2);
+ i += 3;
+ } else {
+ println("Unknown docSet method " + method + ".");
+ System.exit(10);
+ }
+ } else if (option.equals("-maxIt")) {
+ maxMERTIterations = Integer.parseInt(args[i + 1]);
+ if (maxMERTIterations < 1) {
+ println("maxIt must be positive.");
+ System.exit(10);
+ }
+ } else if (option.equals("-minIt")) {
+ minMERTIterations = Integer.parseInt(args[i + 1]);
+ if (minMERTIterations < 1) {
+ println("minIt must be positive.");
+ System.exit(10);
+ }
+ } else if (option.equals("-prevIt")) {
+ prevMERTIterations = Integer.parseInt(args[i + 1]);
+ if (prevMERTIterations < 0) {
+ println("prevIt must be non-negative.");
+ System.exit(10);
+ }
+ } else if (option.equals("-stopIt")) {
+ stopMinIts = Integer.parseInt(args[i + 1]);
+ if (stopMinIts < 1) {
+ println("stopIts must be positive.");
+ System.exit(10);
+ }
+ } else if (option.equals("-stopSig")) {
+ stopSigValue = Double.parseDouble(args[i + 1]);
+ }
+ //
+ // /* possibly other early stopping criteria here */
+ //
+ else if (option.equals("-thrCnt")) {
+ numOptThreads = Integer.parseInt(args[i + 1]);
+ if (numOptThreads < 1) {
+ println("threadCount must be positive.");
+ System.exit(10);
+ }
+ } else if (option.equals("-save")) {
+ saveInterFiles = Integer.parseInt(args[i + 1]);
+ if (saveInterFiles < 0 || saveInterFiles > 3) {
+ println("save should be between 0 and 3");
+ System.exit(10);
+ }
+ } else if (option.equals("-compress")) {
+ compressFiles = Integer.parseInt(args[i + 1]);
+ if (compressFiles < 0 || compressFiles > 1) {
+ println("compressFiles should be either 0 or 1");
+ System.exit(10);
+ }
+ } else if (option.equals("-opi")) {
+ int opi = Integer.parseInt(args[i + 1]);
+ if (opi == 1) {
+ oneModificationPerIteration = true;
+ } else if (opi == 0) {
+ oneModificationPerIteration = false;
+ } else {
+ println("oncePerIt must be either 0 or 1.");
+ System.exit(10);
+ }
+ } else if (option.equals("-rand")) {
+ int rand = Integer.parseInt(args[i + 1]);
+ if (rand == 1) {
+ randInit = true;
+ } else if (rand == 0) {
+ randInit = false;
+ } else {
+ println("randInit must be either 0 or 1.");
+ System.exit(10);
+ }
+ } else if (option.equals("-seed")) {
+ if (args[i + 1].equals("time")) {
+ seed = System.currentTimeMillis();
+ } else {
+ seed = Long.parseLong(args[i + 1]);
+ }
+ }
+ /*
+ * else if (option.equals("-ud")) { useDisk = Integer.parseInt(args[i+1]); if (useDisk < 0 ||
+ * useDisk > 2) { println("useDisk should be between 0 and 2"); System.exit(10); } }
+ */
+
+ // for adagrad:
+ else if (option.equals("-needShuffle")) {
+ int shuffle = Integer.parseInt(args[i + 1]);
+ if (shuffle == 1)
+ needShuffle = true;
+ else if (shuffle == 0)
+ needShuffle = false;
+ else {
+ println("-needShuffle must be either 0 or 1.");
+ System.exit(10);
+ }
+ }
+ // average weights after each epoch or not
+ else if (option.equals("-needAvg")) {
+ int avg = Integer.parseInt(args[i + 1]);
+ if (avg == 1)
+ needAvg = true;
+ else if (avg == 0)
+ needAvg = false;
+ else {
+ println("-needAvg must be either 0 or 1.");
+ System.exit(10);
+ }
+ }
+ // return the best weight during tuning or not
+ else if (option.equals("-returnBest")) {
+ int retBest = Integer.parseInt(args[i + 1]);
+ if (retBest == 1)
+ returnBest = true;
+ else if (retBest == 0)
+ returnBest = false;
+ else {
+ println("-returnBest must be either 0 or 1.");
+ System.exit(10);
+ }
+ }
+ // mini-batch size
+ else if (option.equals("-batchSize")) {
+ batchSize = Integer.parseInt(args[i + 1]);
+ }
+ // regularization: l1 or l2
+ else if (option.equals("-regularization")) {
+ regularization = Integer.parseInt(args[i + 1]);
+ }
+ // step size coefficient
+ else if (option.equals("-eta")) {
+ eta = Double.parseDouble(args[i + 1]);
+ }
+ // regularization coefficient
+ else if (option.equals("-lambda")) {
+ lam = Double.parseDouble(args[i + 1]);
+ } else if (option.equals("-regularization")) {
+ regularization = Integer.parseInt(args[i + 1]);
+ }
+ // oracle selection mode
+ else if (option.equals("-oracleSelection")) {
+ oraSelectMode = Integer.parseInt(args[i + 1]);
+ }
+ // prediction selection mode
+ else if (option.equals("-predictionSelection")) {
+ predSelectMode = Integer.parseInt(args[i + 1]);
+ }
+ // AdaGrad internal iterations
+ else if (option.equals("-adagradIter")) {
+ adagradIter = Integer.parseInt(args[i + 1]);
+ }
+ // else if (option.equals("-sentForScaling")) {
+ // sentForScale = Double.parseDouble(args[i + 1]);
+ // if(sentForScale>1 || sentForScale<0) {
+ // println("-sentForScaling must be in [0,1]");
+ // System.exit(10);
+ // }
+ // }
+ else if (option.equals("-scoreRatio")) {
+ scoreRatio = Double.parseDouble(args[i + 1]);
+ if (scoreRatio <= 0) {
+ println("-scoreRatio must be positive");
+ System.exit(10);
+ }
+ } else if (option.equals("-needScaling")) {
+ int scale = Integer.parseInt(args[i + 1]);
+ if (scale == 1)
+ needScale = true;
+ else if (scale == 0)
+ needScale = false;
+ else {
+ println("-needScaling must be either 0 or 1.");
+ System.exit(10);
+ }
+ } else if (option.equals("-usePseudoCorpus")) {
+ int use = Integer.parseInt(args[i + 1]);
+ if (use == 1)
+ usePseudoBleu = true;
+ else if (use == 0)
+ usePseudoBleu = false;
+ else {
+ println("-usePseudoCorpus must be either 0 or 1.");
+ System.exit(10);
+ }
+ } else if (option.equals("-corpusDecay")) {
+ R = Double.parseDouble(args[i + 1]);
+ }
+
+ // Decoder specs
+ else if (option.equals("-cmd")) {
+ decoderCommandFileName = args[i + 1];
+ } else if (option.equals("-passIt")) {
+ int val = Integer.parseInt(args[i + 1]);
+ if (val < 0 || val > 1) {
+ println("passIterationToDecoder should be either 0 or 1");
+ System.exit(10);
+ }
+ passIterationToDecoder = (val == 1) ? true : false;
+ } else if (option.equals("-decOut")) {
+ decoderOutFileName = args[i + 1];
+ } else if (option.equals("-decExit")) {
+ validDecoderExitValue = Integer.parseInt(args[i + 1]);
+ } else if (option.equals("-dcfg")) {
+ decoderConfigFileName = args[i + 1];
+ } else if (option.equals("-N")) {
+ sizeOfNBest = Integer.parseInt(args[i + 1]);
+ if (sizeOfNBest < 1) {
+ println("N must be positive.");
+ System.exit(10);
+ }
+ }
+ // Output specs
+ else if (option.equals("-v")) {
+ verbosity = Integer.parseInt(args[i + 1]);
+ if (verbosity < 0 || verbosity > 4) {
+ println("verbosity should be between 0 and 4");
+ System.exit(10);
+ }
+ } else if (option.equals("-decV")) {
+ decVerbosity = Integer.parseInt(args[i + 1]);
+ if (decVerbosity < 0 || decVerbosity > 1) {
+ println("decVerbosity should be either 0 or 1");
+ System.exit(10);
+ }
+ } else if (option.equals("-fake")) {
+ fakeFileNameTemplate = args[i + 1];
+ int QM_i = fakeFileNameTemplate.indexOf("?");
+ if (QM_i <= 0) {
+ println("fakeFileNameTemplate must contain '?' to indicate position of iteration number");
+ System.exit(10);
+ }
+ fakeFileNamePrefix = fakeFileNameTemplate.substring(0, QM_i);
+ fakeFileNameSuffix = fakeFileNameTemplate.substring(QM_i + 1);
+ } else {
+ println("Unknown option " + option);
+ System.exit(10);
+ }
+
+ i += 2;
+
+ } // while (i)
+
+ if (maxMERTIterations < minMERTIterations) {
+
+ if (firstTime)
+ println("Warning: maxMERTIts is smaller than minMERTIts; " + "decreasing minMERTIts from "
+ + minMERTIterations + " to maxMERTIts " + "(i.e. " + maxMERTIterations + ").", 1);
+
+ minMERTIterations = maxMERTIterations;
+ }
+
+ if (dirPrefix != null) { // append dirPrefix to file names
+ refFileName = fullPath(dirPrefix, refFileName);
+ decoderOutFileName = fullPath(dirPrefix, decoderOutFileName);
+ paramsFileName = fullPath(dirPrefix, paramsFileName);
+ decoderConfigFileName = fullPath(dirPrefix, decoderConfigFileName);
+
+ if (sourceFileName != null) {
+ sourceFileName = fullPath(dirPrefix, sourceFileName);
+ }
+ if (docInfoFileName != null) {
+ docInfoFileName = fullPath(dirPrefix, docInfoFileName);
+ }
+ if (finalLambdaFileName != null) {
+ finalLambdaFileName = fullPath(dirPrefix, finalLambdaFileName);
+ }
+ if (decoderCommandFileName != null) {
+ decoderCommandFileName = fullPath(dirPrefix, decoderCommandFileName);
+ }
+ if (fakeFileNamePrefix != null) {
+ fakeFileNamePrefix = fullPath(dirPrefix, fakeFileNamePrefix);
+ }
+ }
+
+ // TODO: make this an argument
+ // TODO: also use this for the state file? could be tricky, since that file is created by
+ // ZMERT.java
+ // TODO: change name from tmpDirPrefix to tmpFilePrefix?
+ int k = decoderOutFileName.lastIndexOf("/");
+ if (k >= 0) {
+ tmpDirPrefix = decoderOutFileName.substring(0, k + 1) + "AdaGrad.";
+ } else {
+ tmpDirPrefix = "AdaGrad.";
+ }
+ println("tmpDirPrefix: " + tmpDirPrefix);
+
+ checkFile(paramsFileName);
+ checkFile(decoderConfigFileName);
+
+ boolean canRunCommand = fileExists(decoderCommandFileName);
+ if (decoderCommandFileName != null && !canRunCommand) {
+ // i.e. a decoder command file was specified, but it was not found
+ if (firstTime)
+ println("Warning: specified decoder command file " + decoderCommandFileName
+ + " was not found.", 1);
+ }
+ boolean canRunJoshua = fileExists(sourceFileName);
+ if (sourceFileName != null && !canRunJoshua) {
+ // i.e. a source file was specified, but it was not found
+ if (firstTime)
+ println("Warning: specified source file " + sourceFileName + " was not found.", 1);
+ }
+ boolean canRunFake = (fakeFileNameTemplate != null);
+
+ if (!canRunCommand && !canRunJoshua) { // can only run fake decoder
+
+ if (!canRunFake) {
+ println("AdaGrad cannot decode; must provide one of: command file (for external decoder),");
+ println(" source file (for Joshua decoder),");
+ println(" or prefix for existing output files (for fake decoder).");
+ System.exit(12);
+ }
+
+ int lastGoodIt = 0;
+ for (int it = 1; it <= maxMERTIterations; ++it) {
+ if (fileExists(fakeFileNamePrefix + it + fakeFileNameSuffix)) {
+ lastGoodIt = it;
+ } else {
+ break; // from for (it) loop
+ }
+ }
+
+ if (lastGoodIt == 0) {
+ println("Fake decoder cannot find first output file "
+ + (fakeFileNamePrefix + 1 + fakeFileNameSuffix));
+ System.exit(13);
+ } else if (lastGoodIt < maxMERTIterations) {
+ if (firstTime)
+ println("Warning: can only run fake decoder; existing output files "
+ + "are only available for the first " + lastGoodIt + " iteration(s).", 1);
+ }
+
+ }
+
+ if (refsPerSen > 1) {
+ // the provided refFileName might be a prefix
+ File dummy = new File(refFileName);
+ if (!dummy.exists()) {
+ refFileName = createUnifiedRefFile(refFileName, refsPerSen);
+ }
+ } else {
+ checkFile(refFileName);
+ }
+
+ if (firstTime) {
+ println("Processed the following args array:", 1);
+ print(" ", 1);
+ for (i = 0; i < args.length; ++i) {
+ print(args[i] + " ", 1);
+ }
+ println("", 1);
+ println("", 1);
+ }
+
+ } // processArgs(String[] args)
+
+ private void set_docSubsetInfo(int[] info) {
+
+ /*
+ * 1: -docSet bottom 8d 2: -docSet bottom 25% the bottom ceil(0.20*numDocs) documents 3: -docSet
+ * top 8d 4: -docSet top 25% the top ceil(0.20*numDocs) documents
+ *
+ * 5: -docSet window 11d around 90percentile 11 docs centered around 80th percentile (complain
+ * if not enough docs; don't adjust) 6: -docSet window 11d around 40rank 11 docs centered around
+ * doc ranked 50 (complain if not enough docs; don't adjust)
+ *
+ *
+ * [0]: method (0-6) [1]: first (1-indexed) [2]: last (1-indexed) [3]: size [4]: center [5]:
+ * arg1 (-1 for method 0) [6]: arg2 (-1 for methods 0-4)
+ */
+ if (info[0] == 0) { // all
+ info[1] = 1;
+ info[2] = numDocuments;
+ info[3] = numDocuments;
+ info[4] = (info[1] + info[2]) / 2;
+ }
+ if (info[0] == 1) { // bottom d
+ info[3] = info[5];
+ info[2] = numDocuments;
+ info[1] = numDocuments - info[3] + 1;
+ info[4] = (info[1] + info[2]) / 2;
+ }
+ if (info[0] == 2) { // bottom p
+ info[3] = (int) (Math.ceil((info[5] / 100.0) * numDocuments));
+ info[2] = numDocuments;
+ info[1] = numDocuments - info[3] + 1;
+ info[4] = (info[1] + info[2]) / 2;
+ }
+ if (info[0] == 3) { // top d
+ info[3] = info[5];
+ info[1] = 1;
+ info[2] = info[3];
+ info[4] = (info[1] + info[2]) / 2;
+ }
+ if (info[0] == 4) { // top p
+ info[3] = (int) (Math.ceil((info[5] / 100.0) * numDocuments));
+ info[1] = 1;
+ info[2] = info[3];
+ info[4] = (info[1] + info[2]) / 2;
+ }
+ if (info[0] == 5) { // window around percentile
+ info[3] = info[5];
+ info[4] = (int) (Math.floor((info[6] / 100.0) * numDocuments));
+ info[1] = info[4] - ((info[3] - 1) / 2);
+ info[2] = info[4] + ((info[3] - 1) / 2);
+ }
+ if (info[0] == 6) { // window around rank
+ info[3] = info[5];
+ info[4] = info[6];
+ info[1] = info[4] - ((info[3] - 1) / 2);
+ info[2] = info[4] + ((info[3] - 1) / 2);
+ }
+
+ }
+
+ private void checkFile(String fileName) {
+ if (!fileExists(fileName)) {
+ println("The file " + fileName + " was not found!");
+ System.exit(40);
+ }
+ }
+
+ private boolean fileExists(String fileName) {
+ if (fileName == null)
+ return false;
+ File checker = new File(fileName);
+ return checker.exists();
+ }
+
+ private void gzipFile(String inputFileName) {
+ gzipFile(inputFileName, inputFileName + ".gz");
+ }
+
+ private void gzipFile(String inputFileName, String gzippedFileName) {
+ // NOTE: this will delete the original file
+
+ try {
+ FileInputStream in = new FileInputStream(inputFileName);
+ GZIPOutputStream out = new GZIPOutputStream(new FileOutputStream(gzippedFileName));
+
+ byte[] buffer = new byte[4096];
+ int len;
+ while ((len = in.read(buffer)) > 0) {
+ out.write(buffer, 0, len);
+ }
+
+ in.close();
+ out.finish();
+ out.close();
+
+ deleteFile(inputFileName);
+
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.gzipFile(String,String): " + e.getMessage());
+ System.exit(99902);
+ }
+ }
+
+ @SuppressWarnings("unused")
+ private void gunzipFile(String gzippedFileName) {
+ if (gzippedFileName.endsWith(".gz")) {
+ gunzipFile(gzippedFileName, gzippedFileName.substring(0, gzippedFileName.length() - 3));
+ } else {
+ gunzipFile(gzippedFileName, gzippedFileName + ".dec");
+ }
+ }
+
+ private void gunzipFile(String gzippedFileName, String outputFileName) {
+ // NOTE: this will delete the original file
+
+ try {
+ GZIPInputStream in = new GZIPInputStream(new FileInputStream(gzippedFileName));
+ FileOutputStream out = new FileOutputStream(outputFileName);
+
+ byte[] buffer = new byte[4096];
+ int len;
+ while ((len = in.read(buffer)) > 0) {
+ out.write(buffer, 0, len);
+ }
+
+ in.close();
+ out.close();
+
+ deleteFile(gzippedFileName);
+
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.gunzipFile(String,String): " + e.getMessage());
+ System.exit(99902);
+ }
+ }
+
+ private String createUnifiedRefFile(String prefix, int numFiles) {
+ if (numFiles < 2) {
+ println("Warning: createUnifiedRefFile called with numFiles = " + numFiles + "; "
+ + "doing nothing.", 1);
+ return prefix;
+ } else {
+ File checker;
+ checker = new File(prefix + "1");
+
+ if (!checker.exists()) {
+ checker = new File(prefix + ".1");
+ if (!checker.exists()) {
+ println("Can't find reference files.");
+ System.exit(50);
+ } else {
+ prefix = prefix + ".";
+ }
+ }
+
+ String outFileName;
+ if (prefix.endsWith(".")) {
+ outFileName = prefix + "all";
+ } else {
+ outFileName = prefix + ".all";
+ }
+
+ try {
+ PrintWriter outFile = new PrintWriter(outFileName);
+
+ BufferedReader[] inFile = new BufferedReader[numFiles];
+
+ int nextIndex;
+ checker = new File(prefix + "0");
+ if (checker.exists()) {
+ nextIndex = 0;
+ } else {
+ nextIndex = 1;
+ }
+ int lineCount = countLines(prefix + nextIndex);
+
+ for (int r = 0; r < numFiles; ++r) {
+ if (countLines(prefix + nextIndex) != lineCount) {
+ println("Line count mismatch in " + (prefix + nextIndex) + ".");
+ System.exit(60);
+ }
+ InputStream inStream = new FileInputStream(new File(prefix + nextIndex));
+ inFile[r] = new BufferedReader(new InputStreamReader(inStream, "utf8"));
+ ++nextIndex;
+ }
+
+ String line;
+
+ for (int i = 0; i < lineCount; ++i) {
+ for (int r = 0; r < numFiles; ++r) {
+ line = inFile[r].readLine();
+ outFile.println(line);
+ }
+ }
+
+ outFile.close();
+
+ for (int r = 0; r < numFiles; ++r) {
+ inFile[r].close();
+ }
+ } catch (FileNotFoundException e) {
+ System.err
+ .println("FileNotFoundException in AdaGradCore.createUnifiedRefFile(String,int): "
+ + e.getMessage());
+ System.exit(99901);
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.createUnifiedRefFile(String,int): "
+ + e.getMessage());
+ System.exit(99902);
+ }
+
+ return outFileName;
+
+ }
+
+ } // createUnifiedRefFile(String prefix, int numFiles)
+
+ private String normalize(String str, int normMethod) {
+ if (normMethod == 0)
+ return str;
+
+ // replace HTML/SGML
+ str = str.replaceAll(""", "\"");
+ str = str.replaceAll("&", "&");
+ str = str.replaceAll("<", "<");
+ str = str.replaceAll(">", ">");
+ str = str.replaceAll("'", "'");
+
+ // split on these characters:
+ // ! " # $ % & ( ) * + / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
+ // i.e. ASCII 33-126, except alphanumeric, and except "," "-" "." "'"
+
+ // ! "# $%& ( ) * +/:;<=> ?@ [ \ ] ^_` { | }~
+ String split_on = "!\"#\\$%&\\(\\)\\*\\+/:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~";
+
+ // println("split_on: " + split_on);
+
+ for (int k = 0; k < split_on.length(); ++k) {
+ // for each split character, reprocess the string
+ String regex = "" + split_on.charAt(k);
+ if (regex.equals("\\")) {
+ ++k;
+ regex += split_on.charAt(k);
+ }
+ str = str.replaceAll(regex, " " + regex + " ");
+ }
+
+ // split on "." and "," and "-", conditioned on proper context
+
+ str = " " + str + " ";
+ str = str.replaceAll("\\s+", " ");
+
+ TreeSet<Integer> splitIndices = new TreeSet<Integer>();
+
+ for (int i = 0; i < str.length(); ++i) {
+ char ch = str.charAt(i);
+ if (ch == '.' || ch == ',') {
+ // split if either of the previous or next characters is a non-digit
+ char prev_ch = str.charAt(i - 1);
+ char next_ch = str.charAt(i + 1);
+ if (prev_ch < '0' || prev_ch > '9' || next_ch < '0' || next_ch > '9') {
+ splitIndices.add(i);
+ }
+ } else if (ch == '-') {
+ // split if preceded by a digit
+ char prev_ch = str.charAt(i - 1);
+ if (prev_ch >= '0' && prev_ch <= '9') {
+ splitIndices.add(i);
+ }
+ }
+ }
+
+ String str0 = str;
+ str = "";
+
+ for (int i = 0; i < str0.length(); ++i) {
+ if (splitIndices.contains(i)) {
+ str += " " + str0.charAt(i) + " ";
+ } else {
+ str += str0.charAt(i);
+ }
+ }
+
+ // rejoin i'm, we're, *'s, won't, don't, etc
+
+ str = " " + str + " ";
+ str = str.replaceAll("\\s+", " ");
+
+ str = str.replaceAll(" i 'm ", " i'm ");
+ str = str.replaceAll(" we 're ", " we're ");
+ str = str.replaceAll(" 's ", "'s ");
+ str = str.replaceAll(" 've ", "'ve ");
+ str = str.replaceAll(" 'll ", "'ll ");
+ str = str.replaceAll(" 'd ", "'d ");
+ str = str.replaceAll(" n't ", "n't ");
+
+ // remove spaces around dashes
+ if (normMethod == 2 || normMethod == 4) {
+
+ TreeSet<Integer> skipIndices = new TreeSet<Integer>();
+ str = " " + str + " ";
+
+ for (int i = 0; i < str.length(); ++i) {
+ char ch = str.charAt(i);
+ if (ch == '-') {
+ // rejoin if surrounded by spaces, and then letters
+ if (str.charAt(i - 1) == ' ' && str.charAt(i + 1) == ' ') {
+ if (Character.isLetter(str.charAt(i - 2)) && Character.isLetter(str.charAt(i + 2))) {
+ skipIndices.add(i - 1);
+ skipIndices.add(i + 1);
+ }
+ }
+ }
+ }
+
+ str0 = str;
+ str = "";
+
+ for (int i = 0; i < str0.length(); ++i) {
+ if (!skipIndices.contains(i)) {
+ str += str0.charAt(i);
+ }
+ }
+ }
+
+ // drop non-ASCII characters
+ if (normMethod == 3 || normMethod == 4) {
+
+ str0 = str;
+ str = "";
+
+ for (int i = 0; i < str0.length(); ++i) {
+ char ch = str0.charAt(i);
+ if (ch <= 127) { // i.e. if ASCII
+ str += ch;
+ }
+ }
+ }
+
+ str = str.replaceAll("\\s+", " ");
+
+ str = str.trim();
+
+ return str;
+ }
+
+ private int countLines(String fileName) {
+ int count = 0;
+
+ try {
+ BufferedReader inFile = new BufferedReader(new FileReader(fileName));
+
+ String line;
+ do {
+ line = inFile.readLine();
+ if (line != null)
+ ++count;
+ } while (line != null);
+
+ inFile.close();
+ } catch (IOException e) {
+ System.err.println("IOException in AdaGradCore.countLines(String): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ return count;
+ }
+
+ private int countNonEmptyLines(String fileName) {
+ int count = 0;
+
+ try {
+ BufferedReader inFile = new BufferedReader(new FileReader(fileName));
+
+ String line;
+ do {
+ line = inFile.readLine();
+ if (line != null && line.length() > 0)
+ ++count;
+ } while (line != null);
+
+ inFile.close();
+ } catch (IOException e) {
+ System.err
+ .println("IOException in AdaGradCore.countNonEmptyLines(String): " + e.getMessage());
+ System.exit(99902);
+ }
+
+ return count;
+ }
+
+ private String fullPath(String dir, String fileName) {
+ File dummyFile = new File(dir, fileName);
+ return dummyFile.getAbsolutePath();
+ }
+
+ @SuppressWarnings("unused")
+ private void cleanupMemory() {
+ cleanupMemory(100, false);
+ }
+
+ @SuppressWarnings("unused")
+ private void cleanupMemorySilently() {
+ cleanupMemory(100, true);
+ }
+
+ @SuppressWarnings("static-access")
+ private void cleanupMemory(int reps, boolean silent) {
+ int bytesPerMB = 1024 * 1024;
+
+ long totalMemBefore = myRuntime.totalMemory();
+ long freeMemBefore = myRuntime.freeMemory();
+ long usedMemBefore = totalMemBefore - freeMemBefore;
+
+ long usedCurr = usedMemBefore;
+ long usedPrev = usedCurr;
+
+ // perform garbage collection repeatedly, until there is no decrease in
+ // the amount of used memory
+ for (int i = 1; i <= reps; ++i) {
+ myRuntime.runFinalization();
+ myRuntime.gc();
+ (Thread.currentThread()).yield();
+
+ usedPrev = usedCurr;
+ usedCurr = myRuntime.totalMemory() - myRuntime.freeMemory();
+
+ if (usedCurr == usedPrev)
+ break;
+ }
+
+ if (!silent) {
+ long totalMemAfter = myRuntime.totalMemory();
+ long freeMemAfter = myRuntime.freeMemory();
+ long usedMemAfter = totalMemAfter - freeMemAfter;
+
+ println("GC: d_used = " + ((usedMemAfter - usedMemBefore) / bytesPerMB) + " MB "
+ + "(d_tot = " + ((totalMemAfter - totalMemBefore) / bytesPerMB) + " MB).", 2);
+ }
+ }
+
+ @SuppressWarnings("unused")
+ private void printMemoryUsage() {
+ int bytesPerMB = 1024 * 1024;
+ long totalMem = myRuntime.totalMemory();
+ long freeMem = myRuntime.freeMemory();
+ long usedMem = totalMem - freeMem;
+
+ println("Allocated memory: " + (totalMem / bytesPerMB) + " MB " + "(of which "
+ + (usedMem / bytesPerMB) + " MB is being used).", 2);
+ }
+
+ private void println(Object obj, int priority) {
+ if (priority <= verbosity)
+ println(obj);
+ }
+
+ private void print(Object obj, int priority) {
+ if (priority <= verbosity)
+ print(obj);
+ }
+
+ private void println(Object obj) {
+ System.out.println(obj);
+ }
+
+ private void print(Object obj) {
+ System.out.print(obj);
+ }
+
+ @SuppressWarnings("unused")
+ private void showProgress() {
+ ++progress;
+ if (progress % 100000 == 0)
+ print(".", 2);
+ }
+
+ private ArrayList<Double> randomLambda() {
+ ArrayList<Double> retLambda = new ArrayList<Double>(1 + numParams);
+
+ for (int c = 1; c <= numParams; ++c) {
+ if (isOptimizable[c]) {
+ double randVal = randGen.nextDouble(); // number in [0.0,1.0]
+ ++generatedRands;
+ randVal = randVal * (maxRandValue[c] - minRandValue[c]); // number in [0.0,max-min]
+ randVal = minRandValue[c] + randVal; // number in [min,max]
+ retLambda.set(c, randVal);
+ } else {
+ retLambda.set(c, defaultLambda[c]);
+ }
+ }
+
+ return retLambda;
+ }
+
+ private double[] randomPerturbation(double[] origLambda, int i, double method, double param,
+ double mult) {
+ double sigma = 0.0;
+ if (method == 1) {
+ sigma = 1.0 / Math.pow(i, param);
+ } else if (method == 2) {
+ sigma = Math.exp(-param * i);
+ } else if (method == 3) {
+ sigma = Math.max(0.0, 1.0 - (i / param));
+ }
+
+ sigma = mult * sigma;
+
+ double[] retLambda = new double[1 + numParams];
+
+ for (int c = 1; c <= numParams; ++c) {
+ if (isOptimizable[c]) {
+ double randVal = 2 * randGen.nextDouble() - 1.0; // number in [-1.0,1.0]
+ ++generatedRands;
+ randVal = randVal * sigma; // number in [-sigma,sigma]
+ randVal = randVal * origLambda[c]; // number in [-sigma*orig[c],sigma*orig[c]]
+ randVal = randVal + origLambda[c]; // number in
+ // [orig[c]-sigma*orig[c],orig[c]+sigma*orig[c]]
+ // = [orig[c]*(1-sigma),orig[c]*(1+sigma)]
+ retLambda[c] = randVal;
+ } else {
+ retLambda[c] = origLambda[c];
+ }
+ }
+
+ return retLambda;
+ }
+
+ @SuppressWarnings("unused")
+ private HashSet<Integer> indicesToDiscard(double[] slope, double[] offset) {
+ // some lines can be eliminated: the ones that have a lower offset
+ // than some other line with the same slope.
+ // That is, for any k1 and k2:
+ // if slope[k1] = slope[k2] and offset[k1] > offset[k2],
+ // then k2 can be eliminated.
+ // (This is actually important to do as it eliminates a bug.)
+ // print("discarding: ",4);
+
+ int numCandidates = slope.length;
+ HashSet<Integer> discardedIndices = new HashSet<Integer>();
+ HashMap<Double, Integer> indicesOfSlopes = new HashMap<Double, Integer>();
+ // maps slope to index of best candidate that has that slope.
+ // ("best" as in the one with the highest offset)
+
+ for (int k1 = 0; k1 < numCandidates; ++k1) {
+ double currSlope = slope[k1];
+ if (!indicesOfSlopes.containsKey(currSlope)) {
+ indicesOfSlopes.put(currSlope, k1);
+ } else {
+ int existingIndex = indicesOfSlopes.get(currSlope);
+ if (offset[existingIndex] > offset[k1]) {
+ discardedIndices.add(k1);
+ // print(k1 + " ",4);
+ } else if (offset[k1] > offset[existingIndex]) {
+ indicesOfSlopes.put(currSlope, k1);
+ discardedIndices.add(existingIndex);
+ // print(existingIndex + " ",4);
+ }
+ }
+ }
+
+ // old way of doing it; takes quadratic time (vs. linear time above)
+ /*
+ * for (int k1 = 0; k1 < numCandidates; ++k1) { for (int k2 = 0; k2 < numCandidates; ++k2) { if
+ * (k1 != k2 && slope[k1] == slope[k2] && offset[k1] > offset[k2]) { discardedIndices.add(k2);
+ * // print(k2 + " ",4); } } }
+ */
+
+ // println("",4);
+ return discardedIndices;
+ } // indicesToDiscard(double[] slope, double[] offset)
+}
diff --git a/src/joshua/adagrad/Optimizer.java b/src/joshua/adagrad/Optimizer.java
new file mode 100755
index 0000000..1270e12
--- /dev/null
+++ b/src/joshua/adagrad/Optimizer.java
@@ -0,0 +1,710 @@
+package joshua.adagrad;
+
+import java.util.Collections;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.Vector;
+import java.lang.Math;
+
+import joshua.corpus.Vocabulary;
+import joshua.metrics.EvaluationMetric;
+
+// this class implements the AdaGrad algorithm
+public class Optimizer {
+ public Optimizer(Vector<String>_output, boolean[] _isOptimizable, double[] _initialLambda,
+ HashMap<String, String>[] _feat_hash, HashMap<String, String>[] _stats_hash) {
+ output = _output; // (not used for now)
+ isOptimizable = _isOptimizable;
+ initialLambda = _initialLambda; // initial weights array
+ paramDim = initialLambda.length - 1;
+ initialLambda = _initialLambda;
+ feat_hash = _feat_hash; // feature hash table
+ stats_hash = _stats_hash; // suff. stats hash table
+ finalLambda = new double[initialLambda.length];
+ for(int i = 0; i < finalLambda.length; i++)
+ finalLambda[i] = initialLambda[i];
+ }
+
+ //run AdaGrad for one epoch
+ public double[] runOptimizer() {
+ List<Integer> sents = new ArrayList<Integer>();
+ for( int i = 0; i < sentNum; ++i )
+ sents.add(i);
+ double[] avgLambda = new double[initialLambda.length]; //only needed if averaging is required
+ for( int i = 0; i < initialLambda.length; ++i )
+ avgLambda[i] = 0;
+ for ( int iter = 0; iter < adagradIter; ++iter ) {
+ System.arraycopy(finalLambda, 1, initialLambda, 1, paramDim);
+ if(needShuffle)
+ Collections.shuffle(sents);
+
+ double oraMetric, oraScore, predMetric, predScore;
+ double[] oraPredScore = new double[4];
+ double loss = 0;
+ double diff = 0;
+ double sumMetricScore = 0;
+ double sumModelScore = 0;
+ String oraFeat = "";
+ String predFeat = "";
+ String[] oraPredFeat = new String[2];
+ String[] vecOraFeat;
+ String[] vecPredFeat;
+ String[] featInfo;
+ int thisBatchSize = 0;
+ int numBatch = 0;
+ int numUpdate = 0;
+ Iterator it;
+ Integer diffFeatId;
+
+ //update weights
+ Integer s;
+ int sentCount = 0;
+ double prevLambda = 0;
+ double diffFeatVal = 0;
+ double oldVal = 0;
+ double gdStep = 0;
+ double Hii = 0;
+ double gradiiSquare = 0;
+ int lastUpdateTime = 0;
+ HashMap<Integer, Integer> lastUpdate = new HashMap<Integer, Integer>();
+ HashMap<Integer, Double> lastVal = new HashMap<Integer, Double>();
+ HashMap<Integer, Double> H = new HashMap<Integer, Double>();
+ while( sentCount < sentNum ) {
+ loss = 0;
+ thisBatchSize = batchSize;
+ ++numBatch;
+ HashMap<Integer, Double> featDiff = new HashMap<Integer, Double>();
+ for(int b = 0; b < batchSize; ++b ) {
+ //find out oracle and prediction
+ s = sents.get(sentCount);
+ findOraPred(s, oraPredScore, oraPredFeat, finalLambda, featScale);
+
+ //the model scores here are already scaled in findOraPred
+ oraMetric = oraPredScore[0];
+ oraScore = oraPredScore[1];
+ predMetric = oraPredScore[2];
+ predScore = oraPredScore[3];
+ oraFeat = oraPredFeat[0];
+ predFeat = oraPredFeat[1];
+
+ //update the scale
+ if(needScale) { //otherwise featscale remains 1.0
+ sumMetricScore += Math.abs(oraMetric + predMetric);
+ //restore the original model score
+ sumModelScore += Math.abs(oraScore + predScore) / featScale;
+
+ if(sumModelScore/sumMetricScore > scoreRatio)
+ featScale = sumMetricScore/sumModelScore;
+ }
+ // processedSent++;
+
+ vecOraFeat = oraFeat.split("\\s+");
+ vecPredFeat = predFeat.split("\\s+");
+
+ //accumulate difference feature vector
+ if ( b == 0 ) {
+ for (int i = 0; i < vecOraFeat.length; i++) {
+ featInfo = vecOraFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
+ }
+ for (int i = 0; i < vecPredFeat.length; i++) {
+ featInfo = vecPredFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
+ if ( Math.abs(diff) > 1e-20 )
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ }
+ else //features only firing in the 2nd feature vector
+ featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
+ }
+ } else {
+ for (int i = 0; i < vecOraFeat.length; i++) {
+ featInfo = vecOraFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId)+Double.parseDouble(featInfo[1]);
+ if ( Math.abs(diff) > 1e-20 )
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ }
+ else //features only firing in the new oracle feature vector
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
+ }
+ for (int i = 0; i < vecPredFeat.length; i++) {
+ featInfo = vecPredFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
+ if ( Math.abs(diff) > 1e-20 )
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ }
+ else //features only firing in the new prediction feature vector
+ featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
+ }
+ }
+
+ //remember the model scores here are already scaled
+ double singleLoss = evalMetric.getToBeMinimized() ?
+ (predMetric-oraMetric) - (oraScore-predScore)/featScale:
+ (oraMetric-predMetric) - (oraScore-predScore)/featScale;
+ if(singleLoss > 0)
+ loss += singleLoss;
+ ++sentCount;
+ if( sentCount >= sentNum ) {
+ thisBatchSize = b + 1;
+ break;
+ }
+ } //for(int b : batchSize)
+
+ //System.out.println("\n\n"+sentCount+":");
+
+ if( loss > 0 ) {
+ //if(true) {
+ ++numUpdate;
+ //update weights (see Duchi'11, Eq.23. For l1-reg, use lazy update)
+ Set<Integer> diffFeatSet = featDiff.keySet();
+ it = diffFeatSet.iterator();
+ while(it.hasNext()) { //note these are all non-zero gradients!
+ diffFeatId = (Integer)it.next();
+ diffFeatVal = -1.0 * featDiff.get(diffFeatId); //gradient
+ if( regularization > 0 ) {
+ lastUpdateTime =
+ lastUpdate.get(diffFeatId) == null ? 0 : lastUpdate.get(diffFeatId);
+ if( lastUpdateTime < numUpdate - 1 ) {
+ //haven't been updated (gradient=0) for at least 2 steps
+ //lazy compute prevLambda now
+ oldVal =
+ lastVal.get(diffFeatId) == null ? initialLambda[diffFeatId] : lastVal.get(diffFeatId);
+ Hii =
+ H.get(diffFeatId) == null ? 0 : H.get(diffFeatId);
+ if(Math.abs(Hii) > 1e-20) {
+ if( regularization == 1 )
+ prevLambda =
+ Math.signum(oldVal) * clip( Math.abs(oldVal) - lam * eta * (numBatch - 1 - lastUpdateTime) / Hii );
+ else if( regularization == 2 ) {
+ prevLambda =
+ Math.pow( Hii/(lam+Hii), (numUpdate - 1 - lastUpdateTime) ) * oldVal;
+ if(needAvg) { //fill the gap due to lazy update
+ double prevLambdaCopy = prevLambda;
+ double scale = Hii/(lam+Hii);
+ for( int t = 0; t < numUpdate - 1 - lastUpdateTime; ++t ) {
+ avgLambda[diffFeatId] += prevLambdaCopy;
+ prevLambdaCopy /= scale;
+ }
+ }
+ }
+ } else {
+ if( regularization == 1 )
+ prevLambda = 0;
+ else if( regularization == 2 )
+ prevLambda = oldVal;
+ }
+ } else //just updated at last time step or just started
+ prevLambda = finalLambda[diffFeatId];
+ if(H.get(diffFeatId) != null) {
+ gradiiSquare = H.get(diffFeatId);
+ gradiiSquare *= gradiiSquare;
+ gradiiSquare += diffFeatVal * diffFeatVal;
+ Hii = Math.sqrt(gradiiSquare);
+ } else
+ Hii = Math.abs(diffFeatVal);
+ H.put(diffFeatId, Hii);
+ //update the weight
+ if( regularization == 1 ) {
+ gdStep = prevLambda - eta * diffFeatVal / Hii;
+ finalLambda[diffFeatId] = Math.signum(gdStep) * clip( Math.abs(gdStep) - lam * eta / Hii );
+ } else if(regularization == 2 ) {
+ finalLambda[diffFeatId] = (Hii * prevLambda - eta * diffFeatVal) / (lam + Hii);
+ if(needAvg)
+ avgLambda[diffFeatId] += finalLambda[diffFeatId];
+ }
+ lastUpdate.put(diffFeatId, numUpdate);
+ lastVal.put(diffFeatId, finalLambda[diffFeatId]);
+ } else { //if no regularization
+ if(H.get(diffFeatId) != null) {
+ gradiiSquare = H.get(diffFeatId);
+ gradiiSquare *= gradiiSquare;
+ gradiiSquare += diffFeatVal * diffFeatVal;
+ Hii = Math.sqrt(gradiiSquare);
+ } else
+ Hii = Math.abs(diffFeatVal);
+ H.put(diffFeatId, Hii);
+ finalLambda[diffFeatId] = finalLambda[diffFeatId] - eta * diffFeatVal / Hii;
+ if(needAvg)
+ avgLambda[diffFeatId] += finalLambda[diffFeatId];
+ }
+ } //while(it.hasNext())
+ } //if(loss > 0)
+ else { //no loss, therefore the weight update is skipped
+ //however, the avg weights still need to be accumulated
+ if( regularization == 0 ) {
+ for( int i = 1; i < finalLambda.length; ++i )
+ avgLambda[i] += finalLambda[i];
+ } else if( regularization == 2 ) {
+ if(needAvg) {
+ //due to lazy update, we need to figure out the actual
+ //weight vector at this point first...
+ for( int i = 1; i < finalLambda.length; ++i ) {
+ if( lastUpdate.get(i) != null ) {
+ if( lastUpdate.get(i) < numUpdate ) {
+ oldVal = lastVal.get(i);
+ Hii = H.get(i);
+ //lazy compute
+ avgLambda[i] +=
+ Math.pow( Hii/(lam+Hii), (numUpdate - lastUpdate.get(i)) ) * oldVal;
+ } else
+ avgLambda[i] += finalLambda[i];
+ }
+ avgLambda[i] += finalLambda[i];
+ }
+ }
+ }
+ }
+ } //while( sentCount < sentNum )
+ if( regularization > 0 ) {
+ for( int i = 1; i < finalLambda.length; ++i ) {
+ //now lazy compute those weights that haven't been taken care of
+ if( lastUpdate.get(i) == null )
+ finalLambda[i] = 0;
+ else if( lastUpdate.get(i) < numUpdate ) {
+ oldVal = lastVal.get(i);
+ Hii = H.get(i);
+ if( regularization == 1 )
+ finalLambda[i] =
+ Math.signum(oldVal) * clip( Math.abs(oldVal) - lam * eta * (numUpdate - lastUpdate.get(i)) / Hii );
+ else if( regularization == 2 ) {
+ finalLambda[i] =
+ Math.pow( Hii/(lam+Hii), (numUpdate - lastUpdate.get(i)) ) * oldVal;
+ if(needAvg) { //fill the gap due to lazy update
+ double prevLambdaCopy = finalLambda[i];
+ double scale = Hii/(lam+Hii);
+ for( int t = 0; t < numUpdate - lastUpdate.get(i); ++t ) {
+ avgLambda[i] += prevLambdaCopy;
+ prevLambdaCopy /= scale;
+ }
+ }
+ }
+ }
+ if( regularization == 2 && needAvg ) {
+ if( iter == adagradIter - 1 )
+ finalLambda[i] = avgLambda[i] / ( numBatch * adagradIter );
+ }
+ }
+ } else { //if no regularization
+ if( iter == adagradIter - 1 && needAvg ) {
+ for( int i = 1; i < finalLambda.length; ++i )
+ finalLambda[i] = avgLambda[i] / ( numBatch * adagradIter );
+ }
+ }
+
+ double initMetricScore;
+ if (iter == 0) {
+ initMetricScore = computeCorpusMetricScore(initialLambda);
+ finalMetricScore = computeCorpusMetricScore(finalLambda);
+ } else {
+ initMetricScore = finalMetricScore;
+ finalMetricScore = computeCorpusMetricScore(finalLambda);
+ }
+ // prepare the printing info
+ String result = " Initial "
+ + evalMetric.get_metricName() + "=" + String.format("%.4f", initMetricScore) + " Final "
+ + evalMetric.get_metricName() + "=" + String.format("%.4f", finalMetricScore);
+ //print lambda info
+ // int numParamToPrint = 0;
+ // numParamToPrint = paramDim > 10 ? 10 : paramDim; // how many parameters
+ // // to print
+ // result = paramDim > 10 ? "Final lambda (first 10): {" : "Final lambda: {";
+
+ // for (int i = 1; i <= numParamToPrint; ++i)
+ // result += String.format("%.4f", finalLambda[i]) + " ";
+
+ output.add(result);
+ } //for ( int iter = 0; iter < adagradIter; ++iter ) {
+
+ //non-optimizable weights should remain unchanged
+ ArrayList<Double> cpFixWt = new ArrayList<Double>();
+ for ( int i = 1; i < isOptimizable.length; ++i ) {
+ if ( ! isOptimizable[i] )
+ cpFixWt.add(finalLambda[i]);
+ }
+ normalizeLambda(finalLambda);
+ int countNonOpt = 0;
+ for ( int i = 1; i < isOptimizable.length; ++i ) {
+ if ( ! isOptimizable[i] ) {
+ finalLambda[i] = cpFixWt.get(countNonOpt);
+ ++countNonOpt;
+ }
+ }
+ return finalLambda;
+ }
+
+ private double clip(double x) {
+ return x > 0 ? x : 0;
+ }
+
+ public double computeCorpusMetricScore(double[] finalLambda) {
+ int suffStatsCount = evalMetric.get_suffStatsCount();
+ double modelScore;
+ double maxModelScore;
+ Set<String> candSet;
+ String candStr;
+ String[] feat_str;
+ String[] tmpStatsVal = new String[suffStatsCount];
+ int[] corpusStatsVal = new int[suffStatsCount];
+ for (int i = 0; i < suffStatsCount; i++)
+ corpusStatsVal[i] = 0;
+
+ for (int i = 0; i < sentNum; i++) {
+ candSet = feat_hash[i].keySet();
+
+ // find out the 1-best candidate for each sentence
+ // this depends on the training mode
+ maxModelScore = NegInf;
+ for (Iterator it = candSet.iterator(); it.hasNext();) {
+ modelScore = 0.0;
+ candStr = it.next().toString();
+
+ feat_str = feat_hash[i].get(candStr).split("\\s+");
+
+ String[] feat_info;
+
+ for (int f = 0; f < feat_str.length; f++) {
+ feat_info = feat_str[f].split("=");
+ modelScore +=
+ Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
+ }
+
+ if (maxModelScore < modelScore) {
+ maxModelScore = modelScore;
+ tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
+ // suff stats
+ }
+ }
+
+ for (int j = 0; j < suffStatsCount; j++)
+ corpusStatsVal[j] += Integer.parseInt(tmpStatsVal[j]); // accumulate
+ // corpus-leve
+ // suff stats
+ } // for( int i=0; i<sentNum; i++ )
+
+ return evalMetric.score(corpusStatsVal);
+ }
+
+ private void findOraPred(int sentId, double[] oraPredScore, String[] oraPredFeat, double[] lambda, double featScale)
+ {
+ double oraMetric=0, oraScore=0, predMetric=0, predScore=0;
+ String oraFeat="", predFeat="";
+ double candMetric = 0, candScore = 0; //metric and model scores for each cand
+ Set<String> candSet = stats_hash[sentId].keySet();
+ String cand = "";
+ String feats = "";
+ String oraCand = ""; //only used when BLEU/TER-BLEU is used as metric
+ String[] featStr;
+ String[] featInfo;
+
+ int actualFeatId;
+ double bestOraScore;
+ double worstPredScore;
+
+ if(oraSelectMode==1)
+ bestOraScore = NegInf; //larger score will be selected
+ else {
+ if(evalMetric.getToBeMinimized())
+ bestOraScore = PosInf; //smaller score will be selected
+ else
+ bestOraScore = NegInf;
+ }
+
+ if(predSelectMode==1 || predSelectMode==2)
+ worstPredScore = NegInf; //larger score will be selected
+ else {
+ if(evalMetric.getToBeMinimized())
+ worstPredScore = NegInf; //larger score will be selected
+ else
+ worstPredScore = PosInf;
+ }
+
+ for (Iterator it = candSet.iterator(); it.hasNext();) {
+ cand = it.next().toString();
+ candMetric = computeSentMetric(sentId, cand); //compute metric score
+
+ //start to compute model score
+ candScore = 0;
+ featStr = feat_hash[sentId].get(cand).split("\\s+");
+ feats = "";
+
+ for (int i = 0; i < featStr.length; i++) {
+ featInfo = featStr[i].split("=");
+ actualFeatId = Vocabulary.id(featInfo[0]);
+ candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
+ if ( (actualFeatId < isOptimizable.length && isOptimizable[actualFeatId]) ||
+ actualFeatId >= isOptimizable.length )
+ feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
+ }
+
+ candScore *= featScale; //scale the model score
+
+ //is this cand oracle?
+ if(oraSelectMode == 1) {//"hope", b=1, r=1
+ if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if( bestOraScore<=(candScore-candMetric) ) {
+ bestOraScore = candScore-candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ else {
+ if( bestOraScore<=(candScore+candMetric) ) {
+ bestOraScore = candScore+candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ }
+ else {//best metric score(ex: max BLEU), b=1, r=0
+ if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if( bestOraScore>=candMetric ) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ else {
+ if( bestOraScore<=candMetric ) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ }
+
+ //is this cand prediction?
+ if(predSelectMode == 1) {//"fear"
+ if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if( worstPredScore<=(candScore+candMetric) ) {
+ worstPredScore = candScore+candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ else {
+ if( worstPredScore<=(candScore-candMetric) ) {
+ worstPredScore = candScore-candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ }
+ else if(predSelectMode == 2) {//model prediction(max model score)
+ if( worstPredScore<=candScore ) {
+ worstPredScore = candScore;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ else {//worst metric score(ex: min BLEU)
+ if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if( worstPredScore<=candMetric ) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ else {
+ if( worstPredScore>=candMetric ) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ }
+ }
+
+ oraPredScore[0] = oraMetric;
+ oraPredScore[1] = oraScore;
+ oraPredScore[2] = predMetric;
+ oraPredScore[3] = predScore;
+ oraPredFeat[0] = oraFeat;
+ oraPredFeat[1] = predFeat;
+
+ //update the BLEU metric statistics if pseudo corpus is used to compute BLEU/TER-BLEU
+ if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu ) {
+ String statString;
+ String[] statVal_str;
+ statString = stats_hash[sentId].get(oraCand);
+ statVal_str = statString.split("\\s+");
+
+ for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
+ bleuHistory[sentId][j] = R*bleuHistory[sentId][j]+Integer.parseInt(statVal_str[j]);
+ }
+
+ if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu ) {
+ String statString;
+ String[] statVal_str;
+ statString = stats_hash[sentId].get(oraCand);
+ statVal_str = statString.split("\\s+");
+
+ for (int j = 0; j < evalMetric.get_suffStatsCount()-2; j++)
+ bleuHistory[sentId][j] = R*bleuHistory[sentId][j]+Integer.parseInt(statVal_str[j+2]); //the first 2 stats are TER stats
+ }
+ }
+
+ // compute *sentence-level* metric score for cand
+ private double computeSentMetric(int sentId, String cand) {
+ String statString;
+ String[] statVal_str;
+ int[] statVal = new int[evalMetric.get_suffStatsCount()];
+
+ statString = stats_hash[sentId].get(cand);
+ statVal_str = statString.split("\\s+");
+
+ if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
+ for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
+ statVal[j] = (int) (Integer.parseInt(statVal_str[j]) + bleuHistory[sentId][j]);
+ } else if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
+ for (int j = 0; j < evalMetric.get_suffStatsCount()-2; j++)
+ statVal[j+2] = (int)(Integer.parseInt(statVal_str[j+2]) + bleuHistory[sentId][j]); //only modify the BLEU stats part(TER has 2 stats)
+ } else { //in all other situations, use normal stats
+ for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
+ statVal[j] = Integer.parseInt(statVal_str[j]);
+ }
+
+ return evalMetric.score(statVal);
+ }
+
+ // from ZMERT
+ private void normalizeLambda(double[] origLambda) {
+ // private String[] normalizationOptions;
+ // How should a lambda[] vector be normalized (before decoding)?
+ // nO[0] = 0: no normalization
+ // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
+ // nO[0] = 2: scale so that the maximum absolute value is nO[1]
+ // nO[0] = 3: scale so that the minimum absolute value is nO[1]
+ // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
+
+ int normalizationMethod = (int) normalizationOptions[0];
+ double scalingFactor = 1.0;
+ if (normalizationMethod == 0) {
+ scalingFactor = 1.0;
+ } else if (normalizationMethod == 1) {
+ int c = (int) normalizationOptions[2];
+ scalingFactor = normalizationOptions[1] / Math.abs(origLambda[c]);
+ } else if (normalizationMethod == 2) {
+ double maxAbsVal = -1;
+ int maxAbsVal_c = 0;
+ for (int c = 1; c <= paramDim; ++c) {
+ if (Math.abs(origLambda[c]) > maxAbsVal) {
+ maxAbsVal = Math.abs(origLambda[c]);
+ maxAbsVal_c = c;
+ }
+ }
+ scalingFactor = normalizationOptions[1] / Math.abs(origLambda[maxAbsVal_c]);
+
+ } else if (normalizationMethod == 3) {
+ double minAbsVal = PosInf;
+ int minAbsVal_c = 0;
+
+ for (int c = 1; c <= paramDim; ++c) {
+ if (Math.abs(origLambda[c]) < minAbsVal) {
+ minAbsVal = Math.abs(origLambda[c]);
+ minAbsVal_c = c;
+ }
+ }
+ scalingFactor = normalizationOptions[1] / Math.abs(origLambda[minAbsVal_c]);
+
+ } else if (normalizationMethod == 4) {
+ double pow = normalizationOptions[1];
+ double norm = L_norm(origLambda, pow);
+ scalingFactor = normalizationOptions[2] / norm;
+ }
+
+ for (int c = 1; c <= paramDim; ++c) {
+ origLambda[c] *= scalingFactor;
+ }
+ }
+
+ // from ZMERT
+ private double L_norm(double[] A, double pow) {
+ // calculates the L-pow norm of A[]
+ // NOTE: this calculation ignores A[0]
+ double sum = 0.0;
+ for (int i = 1; i < A.length; ++i)
+ sum += Math.pow(Math.abs(A[i]), pow);
+
+ return Math.pow(sum, 1 / pow);
+ }
+
+ public static double getScale()
+ {
+ return featScale;
+ }
+
+ public static void initBleuHistory(int sentNum, int statCount)
+ {
+ bleuHistory = new double[sentNum][statCount];
+ for(int i=0; i<sentNum; i++) {
+ for(int j=0; j<statCount; j++) {
+ bleuHistory[i][j] = 0.0;
+ }
+ }
+ }
+
+ public double getMetricScore()
+ {
+ return finalMetricScore;
+ }
+
+ private Vector<String> output;
+ private double[] initialLambda;
+ private double[] finalLambda;
+ private double finalMetricScore;
+ private HashMap<String, String>[] feat_hash;
+ private HashMap<String, String>[] stats_hash;
+ private int paramDim;
+ private boolean[] isOptimizable;
+ public static int sentNum;
+ public static int adagradIter; //AdaGrad internal iterations
+ public static int oraSelectMode;
+ public static int predSelectMode;
+ public static int batchSize;
+ public static int regularization;
+ public static boolean needShuffle;
+ public static boolean needScale;
+ public static double scoreRatio;
+ public static boolean needAvg;
+ public static boolean usePseudoBleu;
+ public static double featScale = 1.0; //scale the features in order to make the model score comparable with metric score
+ //updates in each epoch if necessary
+ public static double eta;
+ public static double lam;
+ public static double R; //corpus decay(used only when pseudo corpus is used to compute BLEU)
+ public static EvaluationMetric evalMetric;
+ public static double[] normalizationOptions;
+ public static double[][] bleuHistory;
+
+ private final static double NegInf = (-1.0 / 0.0);
+ private final static double PosInf = (+1.0 / 0.0);
+}
diff --git a/src/joshua/corpus/Vocabulary.java b/src/joshua/corpus/Vocabulary.java
index f0526c8..5dfe657 100644
--- a/src/joshua/corpus/Vocabulary.java
+++ b/src/joshua/corpus/Vocabulary.java
@@ -1,5 +1,7 @@
package joshua.corpus;
+import static joshua.util.FormatUtils.isNonterminal;
+
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
@@ -8,48 +10,40 @@
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
-import java.util.Iterator;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.TreeMap;
import joshua.decoder.Decoder;
import joshua.decoder.ff.lm.NGramLanguageModel;
import joshua.util.FormatUtils;
-import joshua.util.MurmurHash;
/**
- * Static singular vocabulary class. Supports vocabulary freezing and (de-)serialization into a
- * vocabulary file.
+ * Static singular vocabulary class.
+ * Supports (de-)serialization into a vocabulary file.
*
* @author Juri Ganitkevitch
*/
public class Vocabulary {
- private static ArrayList<NGramLanguageModel> lms;
+ private final static ArrayList<NGramLanguageModel> lms = new ArrayList<NGramLanguageModel>();
- private static TreeMap<Long, Integer> hashToId;
- private static ArrayList<String> idToString;
- private static TreeMap<Long, String> hashToString;
+ private static List<String> idToString;
+ private static Map<String, Integer> stringToId;
+
+ private static volatile List<Integer> nonTerminalIndices;
private static final Integer lock = new Integer(0);
- private static final int UNKNOWN_ID;
- private static final String UNKNOWN_WORD;
+ static final int UNKNOWN_ID = 0;
+ static final String UNKNOWN_WORD = "<unk>";
public static final String START_SYM = "<s>";
public static final String STOP_SYM = "</s>";
static {
-
- UNKNOWN_ID = 0;
- UNKNOWN_WORD = "<unk>";
-
- lms = new ArrayList<NGramLanguageModel>();
-
clear();
}
@@ -73,9 +67,8 @@
* @return Returns true if vocabulary was read without mismatches or collisions.
* @throws IOException
*/
- public static boolean read(String file_name) throws IOException {
+ public static boolean read(final File vocab_file) throws IOException {
synchronized (lock) {
- File vocab_file = new File(file_name);
DataInputStream vocab_stream =
new DataInputStream(new BufferedInputStream(new FileInputStream(vocab_file)));
int size = vocab_stream.readInt();
@@ -109,42 +102,22 @@
}
}
- public static void freeze() {
- synchronized (lock) {
- int current_id = 1;
-
- TreeMap<Long, Integer> hash_to_id = new TreeMap<Long, Integer>();
- ArrayList<String> id_to_string = new ArrayList<String>(idToString.size() + 1);
- id_to_string.add(UNKNOWN_ID, UNKNOWN_WORD);
-
- Map.Entry<Long, Integer> walker = hashToId.firstEntry();
- while (walker != null) {
- String word = hashToString.get(walker.getKey());
- hash_to_id.put(walker.getKey(), (walker.getValue() < 0 ? -current_id : current_id));
- id_to_string.add(current_id, word);
- current_id++;
- walker = hashToId.higherEntry(walker.getKey());
- }
- idToString = id_to_string;
- hashToId = hash_to_id;
- }
- }
-
+ /**
+ * Get the id of the token if it already exists, new id is created otherwise.
+ *
+ * TODO: currently locks for every call.
+ * Separate constant (frozen) ids from changing (e.g. OOV) ids.
+ * Constant ids could be immutable -> no locking.
+ * Alternatively: could we use ConcurrentHashMap to not have to lock if actually contains it and only lock for modifications?
+ */
public static int id(String token) {
synchronized (lock) {
- long hash = 0;
- try {
- hash = MurmurHash.hash64(token);
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- }
- String hash_word = hashToString.get(hash);
- if (hash_word != null) {
- if (!token.equals(hash_word)) {
- Decoder.LOG(1, String.format("MurmurHash for the following symbols collides: '%s', '%s'", hash_word, token));
- }
- return hashToId.get(hash);
+ if (stringToId.containsKey(token)) {
+ return stringToId.get(token);
} else {
+ if (nonTerminalIndices != null && nt(token)) {
+ throw new IllegalArgumentException("After the nonterminal indices have been set by calling getNonterminalIndices you can't call id on new nonterminals anymore.");
+ }
int id = idToString.size() * (nt(token) ? -1 : 1);
// register this (token,id) mapping with each language
@@ -154,8 +127,7 @@
lm.registerWord(token, Math.abs(id));
idToString.add(token);
- hashToString.put(hash, token);
- hashToId.put(hash, id);
+ stringToId.put(token, id);
return id;
}
}
@@ -180,8 +152,10 @@
}
public static String word(int id) {
- id = Math.abs(id);
- return idToString.get(id);
+ synchronized (lock) {
+ id = Math.abs(id);
+ return idToString.get(id);
+ }
}
public static String getWords(int[] ids) {
@@ -199,27 +173,35 @@
return sb.deleteCharAt(sb.length() - 1).toString();
}
- private static boolean isNonterminal(String word) {
- return (word.substring(0,1).equals("[") && (word.substring(word.length() - 1,word.length()).equals("]")));
- }
-
/**
- * This method returns a list of all indices corresponding to Nonterminals in the Vocabulary
- * @return
+ * This method returns a list of all (positive) indices
+ * corresponding to Nonterminals in the Vocabulary.
*/
public static List<Integer> getNonterminalIndices()
{
- List<Integer> result = new ArrayList<Integer>();
- for(int i = 0; i < idToString.size(); i++)
- {
- String word = idToString.get(i);
- if(isNonterminal(word)){
- result.add(i);
+ if (nonTerminalIndices == null) {
+ synchronized (lock) {
+ if (nonTerminalIndices == null) {
+ nonTerminalIndices = findNonTerminalIndices();
+ }
}
- }
- return result;
+ }
+ return nonTerminalIndices;
}
-
+
+ /**
+ * Iterates over the Vocabulary and finds all non terminal indices.
+ */
+ private static List<Integer> findNonTerminalIndices() {
+ List<Integer> nonTerminalIndices = new ArrayList<Integer>();
+ for(int i = 0; i < idToString.size(); i++) {
+ final String word = idToString.get(i);
+ if(isNonterminal(word)){
+ nonTerminalIndices.add(i);
+ }
+ }
+ return nonTerminalIndices;
+ }
public static int getUnknownId() {
return UNKNOWN_ID;
@@ -253,59 +235,20 @@
return FormatUtils.getNonterminalIndex(word(id));
}
- private static void clear() {
- hashToId = new TreeMap<Long, Integer>();
- hashToString = new TreeMap<Long, String>();
- idToString = new ArrayList<String>();
-
- idToString.add(UNKNOWN_ID, UNKNOWN_WORD);
- }
-
/**
- * Used to indicate that a query has been made for a symbol that is not known.
- *
- * @author Lane Schwartz
+ * Clears the vocabulary and initializes it with an unknown word.
+ * Registered language models are left unchanged.
*/
- public static class UnknownSymbolException extends RuntimeException {
+ public static void clear() {
+ synchronized (lock) {
+ nonTerminalIndices = null;
- private static final long serialVersionUID = 1L;
-
- /**
- * Constructs an exception indicating that the specified identifier cannot be found in the
- * symbol table.
- *
- * @param id Integer identifier
- */
- public UnknownSymbolException(int id) {
- super("Identifier " + id + " cannot be found in the symbol table");
- }
-
- /**
- * Constructs an exception indicating that the specified symbol cannot be found in the symbol
- * table.
- *
- * @param symbol String symbol
- */
- public UnknownSymbolException(String symbol) {
- super("Symbol " + symbol + " cannot be found in the symbol table");
+ idToString = new ArrayList<String>();
+ stringToId = new HashMap<String, Integer>();
+
+ idToString.add(UNKNOWN_ID, UNKNOWN_WORD);
+ stringToId.put(UNKNOWN_WORD, UNKNOWN_ID);
}
}
-
- /**
- * Used to indicate that word hashing has produced a collision.
- *
- * @author Juri Ganitkevitch
- */
- public static class HashCollisionException extends RuntimeException {
-
- private static final long serialVersionUID = 1L;
-
- public HashCollisionException(String first, String second) {
- super("MurmurHash for the following symbols collides: '" + first + "', '" + second + "'");
- }
- }
-
- public static Iterator<String> wordIterator() {
- return idToString.iterator();
- }
+
}
diff --git a/src/joshua/decoder/ArgsParser.java b/src/joshua/decoder/ArgsParser.java
index 270030d..231f5a1 100644
--- a/src/joshua/decoder/ArgsParser.java
+++ b/src/joshua/decoder/ArgsParser.java
@@ -5,6 +5,8 @@
import java.nio.file.Files;
import java.nio.file.Paths;
+import joshua.util.io.LineReader;
+
/**
* @author orluke
*
@@ -18,8 +20,9 @@
* executed from the command line.
*
* @param args
+ * @throws IOException
*/
- public ArgsParser(String[] args, JoshuaConfiguration joshuaConfiguration) {
+ public ArgsParser(String[] args, JoshuaConfiguration joshuaConfiguration) throws IOException {
/*
* Look for a verbose flag, -v.
@@ -35,7 +38,10 @@
}
if (args[i].equals("-version")) {
- System.out.println("The Joshua machine translator, version 6.0 (git: ");
+ LineReader reader = new LineReader(String.format("%s/VERSION", System.getenv("JOSHUA")));
+ reader.readLine();
+ String version = reader.readLine().split("\\s+")[2];
+ System.out.println(String.format("The Joshua machine translator, version %s", version));
System.out.println("joshua-decoder.org");
System.exit(0);
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index f489a77..caa3258 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -19,7 +19,6 @@
import joshua.decoder.ff.tm.Grammar;
import joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
import joshua.decoder.ff.tm.packed.PackedGrammar;
-import joshua.decoder.hypergraph.HyperGraph;
import joshua.decoder.io.TranslationRequest;
import joshua.decoder.phrase.PhraseTable;
import joshua.decoder.segment_file.Sentence;
@@ -63,16 +62,6 @@
private List<Grammar> grammars;
private ArrayList<FeatureFunction> featureFunctions;
- /*
- * A sorted list of the feature names (so they can be output in the order they were read in)
- */
- public static ArrayList<String> feature_names = new ArrayList<String>();
-
- /*
- * Just the dense features.
- */
- public static ArrayList<String> dense_feature_names = new ArrayList<String>();
-
/* The feature weights. */
public static FeatureVector weights;
@@ -363,9 +352,7 @@
*/
private String mosesize(String feature) {
if (joshuaConfiguration.moses) {
- if (feature.equals("OOVPenalty"))
- return "OOV_Penalty";
- else if (feature.startsWith("tm_") || feature.startsWith("lm_"))
+ if (feature.startsWith("tm_") || feature.startsWith("lm_"))
return feature.replace("_", "-");
}
@@ -420,29 +407,11 @@
System.exit(17);
}
- /* Weights could be listed more than once if overridden from the command line */
- if (! weights.containsKey(pair[0])) {
- feature_names.add(pair[0]);
- if (FeatureVector.isDense(pair[0]))
- dense_feature_names.add(pair[0]);
- }
-
- weights.put(pair[0], Float.parseFloat(pair[1]));
+ weights.set(pair[0], Float.parseFloat(pair[1]));
}
- // This is mostly for compatibility with the Moses tuning script
- if (joshuaConfiguration.show_weights_and_quit) {
- for (String key : Decoder.dense_feature_names) {
- System.out.println(String.format("%s= %.5f", mosesize(key), weights.get(key)));
- }
- System.exit(0);
- }
-
- if (!weights.containsKey("BLEU"))
- Decoder.weights.put("BLEU", 0.0f);
-
- Decoder.LOG(1, String.format("Read %d sparse and %d dense weights", weights.size()
- - dense_feature_names.size(), dense_feature_names.size()));
+ Decoder.LOG(1, String.format("Read %d weights (%d of them dense)", weights.size(),
+ weights.DENSE_FEATURE_NAMES.size()));
// Do this before loading the grammars and the LM.
this.featureFunctions = new ArrayList<FeatureFunction>();
@@ -457,6 +426,18 @@
// Initialize the features: requires that LM model has been initialized.
this.initializeFeatureFunctions();
+ // This is mostly for compatibility with the Moses tuning script
+ if (joshuaConfiguration.show_weights_and_quit) {
+ for (int i = 0; i < weights.DENSE_FEATURE_NAMES.size(); i++) {
+ String name = weights.DENSE_FEATURE_NAMES.get(i);
+ if (joshuaConfiguration.moses)
+ System.out.println(String.format("%s= %.5f", mosesize(name), weights.getDense(i)));
+ else
+ System.out.println(String.format("%s %.5f", name, weights.getDense(i)));
+ }
+ System.exit(0);
+ }
+
// Sort the TM grammars (needed to do cube pruning)
if (joshuaConfiguration.amortized_sorting) {
Decoder.LOG(1, "Grammar sorting happening lazily on-demand.");
@@ -495,6 +476,9 @@
private void initializeTranslationGrammars() throws IOException {
if (joshuaConfiguration.tms.size() > 0) {
+
+ // collect packedGrammars to check if they use a shared vocabulary
+ final List<PackedGrammar> packed_grammars = new ArrayList<>();
// tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
for (String tmLine : joshuaConfiguration.tms) {
@@ -508,10 +492,12 @@
String path = parsedArgs.get("path");
Grammar grammar = null;
- if (! type.equals("moses")) {
+ if (! type.equals("moses") && ! type.equals("phrase")) {
if (new File(path).isDirectory()) {
try {
- grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
+ PackedGrammar packed_grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
+ packed_grammars.add(packed_grammar);
+ grammar = packed_grammar;
} catch (FileNotFoundException e) {
System.err.println(String.format("Couldn't load packed grammar from '%s'", path));
System.err.println("Perhaps it doesn't exist, or it may be an old packed file format.");
@@ -530,11 +516,14 @@
: -1;
joshuaConfiguration.search_algorithm = "stack";
- grammar = new PhraseTable(path, owner, joshuaConfiguration, maxSourceLen);
+ grammar = new PhraseTable(path, owner, type, joshuaConfiguration, maxSourceLen);
}
this.grammars.add(grammar);
}
+
+ checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
+
} else {
Decoder.LOG(1, "* WARNING: no grammars supplied! Supplying dummy glue grammar.");
MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration);
@@ -542,7 +531,7 @@
glueGrammar.addGlueRules(featureFunctions);
this.grammars.add(glueGrammar);
}
-
+
/* Now create a feature function for each owner */
HashSet<String> ownersSeen = new HashSet<String>();
@@ -550,7 +539,7 @@
String owner = Vocabulary.word(grammar.getOwner());
if (! ownersSeen.contains(owner)) {
this.featureFunctions.add(new PhraseModel(weights, new String[] { "tm", "-owner", owner },
- joshuaConfiguration));
+ joshuaConfiguration, grammar));
ownersSeen.add(owner);
}
}
@@ -558,6 +547,26 @@
Decoder.LOG(1, String.format("Memory used %.1f MB",
((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0)));
}
+
+ /**
+ * Checks if multiple packedGrammars have the same vocabulary by comparing their vocabulary file checksums.
+ */
+ private static void checkSharedVocabularyChecksumsForPackedGrammars(final List<PackedGrammar> packed_grammars) {
+ String previous_checksum = "";
+ for (PackedGrammar grammar : packed_grammars) {
+ final String checksum = grammar.computeVocabularyChecksum();
+ if (previous_checksum.isEmpty()) {
+ previous_checksum = checksum;
+ } else {
+ if (!checksum.equals(previous_checksum)) {
+ throw new RuntimeException(
+ "Trying to load multiple packed grammars with different vocabularies!" +
+ "Have you packed them jointly?");
+ }
+ previous_checksum = checksum;
+ }
+ }
+ }
/*
* This function reads the weights for the model. Feature names and their weights are listed one
@@ -590,11 +599,7 @@
feature = demoses(feature);
}
- weights.put(feature, value);
- feature_names.add(feature);
- if (FeatureVector.isDense(feature))
- dense_feature_names.add(feature);
-
+ weights.increment(feature, value);
}
} catch (FileNotFoundException ioe) {
System.err.println("* FATAL: Can't find weights-file '" + fileName + "'");
@@ -656,7 +661,10 @@
for (FeatureFunction feature : featureFunctions) {
Decoder.LOG(1, String.format("FEATURE: %s", feature.logString()));
+
}
+
+ weights.registerDenseFeatures(featureFunctions);
}
/**
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index fe3e142..dd98efa 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -1,5 +1,8 @@
package joshua.decoder;
+import static joshua.util.FormatUtils.cleanNonTerminal;
+import static joshua.util.FormatUtils.markup;
+
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
@@ -438,11 +441,11 @@
lattice_decoding = true;
} else if (parameter.equals(normalize_key("default-non-terminal"))) {
- default_non_terminal = String.format("[%s]", FormatUtils.cleanNonterminal(fds[1].trim()));
+ default_non_terminal = markup(cleanNonTerminal(fds[1].trim()));
logger.finest(String.format("default_non_terminal: %s", default_non_terminal));
} else if (parameter.equals(normalize_key("goal-symbol"))) {
- goal_symbol = String.format("[%s]", FormatUtils.cleanNonterminal(fds[1].trim()));
+ goal_symbol = markup(cleanNonTerminal(fds[1].trim()));
logger.finest("goalSymbol: " + goal_symbol);
} else if (parameter.equals(normalize_key("weights-file"))) {
diff --git a/src/joshua/decoder/Translation.java b/src/joshua/decoder/Translation.java
index 355e911..427e3d9 100644
--- a/src/joshua/decoder/Translation.java
+++ b/src/joshua/decoder/Translation.java
@@ -47,7 +47,7 @@
// We must put this weight as zero, otherwise we get an error when we try to retrieve it
// without checking
- Decoder.weights.put("BLEU", 0);
+ Decoder.weights.increment("BLEU", 0);
String best = ViterbiExtractor.extractViterbiString(hypergraph.goalNode).trim();
best = best.substring(best.indexOf(' ') + 1, best.lastIndexOf(' '));
@@ -74,10 +74,10 @@
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
if (joshuaConfiguration.rescoreForest) {
- Decoder.weights.put("BLEU", joshuaConfiguration.rescoreForestWeight);
+ Decoder.weights.increment("BLEU", joshuaConfiguration.rescoreForestWeight);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
- Decoder.weights.put("BLEU", -joshuaConfiguration.rescoreForestWeight);
+ Decoder.weights.increment("BLEU", -joshuaConfiguration.rescoreForestWeight);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
}
}
diff --git a/src/joshua/decoder/chart_parser/Chart.java b/src/joshua/decoder/chart_parser/Chart.java
index 769b741..d23d4ee 100644
--- a/src/joshua/decoder/chart_parser/Chart.java
+++ b/src/joshua/decoder/chart_parser/Chart.java
@@ -134,7 +134,6 @@
this.dotcharts = new DotChart[this.grammars.length];
for (int i = 0; i < this.grammars.length; i++)
this.dotcharts[i] = new DotChart(this.inputLattice, this.grammars[i], this,
- NonterminalMatcher.createNonterminalMatcher(config2),
this.grammars[i].isRegexpGrammar());
// Begin to do initialization work
diff --git a/src/joshua/decoder/chart_parser/ComputeNodeResult.java b/src/joshua/decoder/chart_parser/ComputeNodeResult.java
index 6fd294c..ddf858e 100644
--- a/src/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/joshua/decoder/chart_parser/ComputeNodeResult.java
@@ -90,8 +90,8 @@
if (Decoder.VERBOSE >= 4)
System.err.println(String.format("-> FEATURE %s = %.3f * %.3f = %.3f",
- feature.getName(), acc.getScore() / Decoder.weights.get(feature.getName()),
- Decoder.weights.get(feature.getName()), acc.getScore()));
+ feature.getName(), acc.getScore() / Decoder.weights.getSparse(feature.getName()),
+ Decoder.weights.getSparse(feature.getName()), acc.getScore()));
if (feature.isStateful()) {
futureCostEstimate += feature.estimateFutureCost(rule, newState, sentence);
diff --git a/src/joshua/decoder/chart_parser/DotChart.java b/src/joshua/decoder/chart_parser/DotChart.java
index 3caf352..c08cf37 100644
--- a/src/joshua/decoder/chart_parser/DotChart.java
+++ b/src/joshua/decoder/chart_parser/DotChart.java
@@ -74,12 +74,6 @@
/* If enabled, rule terminals are treated as regular expressions. */
private final boolean regexpMatching;
- /*
- * nonTerminalMatcher determines the behavior of nonterminal matching: strict or soft-syntactic
- * matching
- */
- private final NonterminalMatcher nonTerminalMatcher;
-
// ===============================================================
// Static fields
@@ -106,8 +100,7 @@
- public DotChart(Lattice<Token> input, Grammar grammar, Chart chart,
- NonterminalMatcher nonTerminalMatcher, boolean regExpMatching) {
+ public DotChart(Lattice<Token> input, Grammar grammar, Chart chart, boolean regExpMatching) {
this.dotChart = chart;
this.pGrammar = grammar;
@@ -115,7 +108,6 @@
this.sentLen = input.size();
this.dotcells = new ChartSpan<DotCell>(sentLen, null);
- this.nonTerminalMatcher = nonTerminalMatcher;
this.regexpMatching = regExpMatching;
seed();
@@ -269,17 +261,11 @@
* undocumented feature that introduces a complexity, in that the next "word" in the grammar
* rule might match more than one outgoing arc in the grammar trie.
*/
- List<Trie> child_tnodes = nonTerminalMatcher.produceMatchingChildTNodesNonterminalLevel(
- dotNode, superNode);
-
- if (!child_tnodes.isEmpty()) {
- for (Trie child_tnode : child_tnodes) {
- if (child_tnode != null) {
- if ((!skipUnary) || (child_tnode.hasExtensions())) {
- addDotItem(child_tnode, i, j, dotNode.getAntSuperNodes(), superNode, dotNode
- .getSourcePath().extendNonTerminal());
- }
- }
+ Trie child_node = dotNode.getTrieNode().match(superNode.lhs);
+ if (child_node != null) {
+ if ((!skipUnary) || (child_node.hasExtensions())) {
+ addDotItem(child_node, i, j, dotNode.getAntSuperNodes(), superNode, dotNode
+ .getSourcePath().extendNonTerminal());
}
}
}
diff --git a/src/joshua/decoder/chart_parser/NonterminalMatcher.java b/src/joshua/decoder/chart_parser/NonterminalMatcher.java
deleted file mode 100644
index 2f67fc9..0000000
--- a/src/joshua/decoder/chart_parser/NonterminalMatcher.java
+++ /dev/null
@@ -1,264 +0,0 @@
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import joshua.corpus.Vocabulary;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.chart_parser.DotChart.DotNode;
-import joshua.decoder.ff.tm.Trie;
-import joshua.decoder.ff.tm.packed.PackedGrammar.PackedRoot;
-import joshua.decoder.ff.tm.packed.PackedGrammar.PackedSlice.PackedTrie;
-
-/**
- * This abstract class and its implementations serve to refine the behavior of DotChart using
- * strategy. Basically there are different ways that nonterminals of rules can be matched, either
- * strict, or soft syntactic (nonterminals can all match each other). This interface defines a
- * method that produce matching nodes for the nonterminal level. The interface is then implemented
- * in different classes for the different types of matching (currently just strict or
- * soft-syntactic)
- *
- * The factory method produces different flavors of NonterminalMatcher corresponding to strict
- * (basic) matching, Regular Expression matching and soft syntactic matching. Notice that regular
- * expression matching and soft constraint matching can in fact be combined, getting the 'loosest'
- * way of matching possible.
- *
- * @author Gideon Maillette de Buy Wenniger <gemdbw AT gmail DOT com>
- *
- */
-public abstract class NonterminalMatcher {
-
- /**
- * How much nonterminals there may be maximal to use targeted querying rather than matching to
- * find the alternate nonterminals from the children table when doing soft syntactic translation.
- * Note that there is a tradeoff here: looping over all children and matching is rather is
- * expensive if there are many children, which is typically the case for big grammars as there are
- * many possible words at each level in the Trie. Targeted querying is only cheap provided the
- * number of nonterminals is small, otherwise it may in fact become more expensive than just
- * looping and matching.
- */
- private static final int MAX_TOTAL_NON_TERMINALS_FOR_TARGETED_QUERYING = 1000;
-
- protected static boolean isOOVLabelOrGoalLabel(String label,
- JoshuaConfiguration joshuaConfiguration) {
- return (label.equals(joshuaConfiguration.default_non_terminal) || label
- .equals(joshuaConfiguration.goal_symbol));
- }
-
- private static boolean useTargetdQuerying(List<Integer> nonterminalIndicesExceptForGoalAndOOV) {
- if (nonterminalIndicesExceptForGoalAndOOV.size() <= MAX_TOTAL_NON_TERMINALS_FOR_TARGETED_QUERYING) {
- return true;
- }
- return false;
- }
-
- /**
- * This method returns a list of all indices corresponding to Nonterminals in the Vocabulary
- *
- * @return
- */
- public static List<Integer> getAllNonterminalIndicesExceptForGoalAndOOV(
- JoshuaConfiguration joshuaConfiguration) {
- List<Integer> result = new ArrayList<Integer>();
- List<Integer> nonterminalIndices = Vocabulary.getNonterminalIndices();
- for (Integer nonterminalIndex : nonterminalIndices) {
- if (!isOOVLabelOrGoalLabel(Vocabulary.word(nonterminalIndex), joshuaConfiguration)) {
- result.add(nonterminalIndex);
- }
- }
- return result;
- }
-
- public static NonterminalMatcher createNonterminalMatcher(JoshuaConfiguration joshuaConfiguration) {
- List<Integer> allNonterminalIndicesExceptForGoalAndOOV = getAllNonterminalIndicesExceptForGoalAndOOV(joshuaConfiguration);
-
- if (allNonterminalIndicesExceptForGoalAndOOV.isEmpty()) {
- throw new RuntimeException(
- "Error: NonterminalMatcherFactory. createNonterminalMatcher - empty nonterminal indices table");
- }
-
- if (joshuaConfiguration.fuzzy_matching) {
- return new StandardNonterminalMatcherSoftConstraints(joshuaConfiguration,
- allNonterminalIndicesExceptForGoalAndOOV,
- useTargetdQuerying(allNonterminalIndicesExceptForGoalAndOOV));
- } else {
- return new StandardNonterminalMatcherStrict(joshuaConfiguration,
- allNonterminalIndicesExceptForGoalAndOOV,
- useTargetdQuerying(allNonterminalIndicesExceptForGoalAndOOV));
- }
- }
-
- // A list of nonTerminalIndices, to be used for faster retrieval of
- // Nonterminals in
- // soft syntactic matching
- private final List<Integer> nonterminalIndicesExceptForGoalAndOOV;
-
- protected final JoshuaConfiguration joshuaConfiguration;
- private final boolean useTargetQueryingToCollectAlternateNonterminals;
-
- protected NonterminalMatcher(JoshuaConfiguration joshuaConfiguration,
- List<Integer> nonterminalIndicesExceptForGoalAndOOV,
- boolean useTargetQueryingToCollectAlternateNonterminals) {
- this.joshuaConfiguration = joshuaConfiguration;
- this.nonterminalIndicesExceptForGoalAndOOV = nonterminalIndicesExceptForGoalAndOOV;
- this.useTargetQueryingToCollectAlternateNonterminals = useTargetQueryingToCollectAlternateNonterminals;
- }
-
- /**
- * This is the abstract method used to get the matching child nodes for the nonterminal level
- *
- * @param dotNode
- * @param superNode
- * @return
- */
- public abstract List<Trie> produceMatchingChildTNodesNonterminalLevel(DotNode dotNode,
- SuperNode superNode);
-
- private static boolean isNonterminal(int wordIndex) {
- return wordIndex < 0;
- }
-
- /**
- * This method finds Nonterminal entries from the children in the Children HashMap using a
- * targeted querying strategy, based on knowledge of what the Nonterminals are. Storing the
- * Nonterminals and Terminals in the Trie separately would be an even smarter strategy perhaps,
- * but requires a more thorough refactoring of the code
- *
- * @param childrenTbl
- * @return
- */
- private List<Trie> getNonTerminalsListFromChildrenByTargetedQuerying(
- HashMap<Integer, ? extends Trie> childrenTbl) {
- List<Trie> trieList = new ArrayList<Trie>();
-
- if (childrenTbl != null) {
- // get all the extensions, map to string, check for *, build regexp
-
- for (Integer index : this.nonterminalIndicesExceptForGoalAndOOV) {
-
- int nonterminalIndexTrieFormat = -index;
- if (childrenTbl.containsKey(nonterminalIndexTrieFormat)) {
- trieList.add(childrenTbl.get(nonterminalIndexTrieFormat));
- }
- }
- }
- return trieList;
-
- }
-
- private List<Trie> getNonTerminalsListFromChildrenByTrieEnumeration(Trie trie, int wordID) {
- HashMap<Integer, ? extends Trie> childrenTbl = trie.getChildren();
- List<Trie> trieList = new ArrayList<Trie>();
-
- Iterator<Integer> nonterminalIterator = trie.getNonterminalExtensionIterator();
- while (nonterminalIterator.hasNext()) {
- trieList.add(childrenTbl.get(nonterminalIterator.next()));
- }
-
- return trieList;
-
- }
-
- private boolean isPackedTrieType(Trie trie) {
- return (trie instanceof PackedTrie) || (trie instanceof PackedRoot);
- }
-
- protected List<Trie> matchAllEqualOrBothNonTerminalAndNotGoalOrOOV(DotNode dotNode, int wordID) {
-
- // logger.info("wordID: " + wordID + " Vocabulary.word(Math.abs(wordID)) "
- // + Vocabulary.word(Math.abs(wordID)));
-
- if (!isNonterminal(wordID)) {
- throw new RuntimeException("Error : expexted nonterminal, but did not get it "
- + "in matchAllEqualOrBothNonTerminalAndNotGoalOrOOV(DotNode dotNode, int wordID)");
- }
-
- // When we have a packed Trie or the boolean useTargetQueryingToCollectAlternateNonterminals
- // is set to false, we will us the Trie children enumeration to retrieve nonterminals
- // for packed tries this is efficient
- if (isPackedTrieType(dotNode.getTrieNode())
- || (!useTargetQueryingToCollectAlternateNonterminals)) {
- return getNonTerminalsListFromChildrenByTrieEnumeration(dotNode.getTrieNode(), wordID);
- } else {
- HashMap<Integer, ? extends Trie> childrenTbl = dotNode.getTrieNode().getChildren();
- return getNonTerminalsListFromChildrenByTargetedQuerying(childrenTbl);
- }
- }
-
- public static List<Trie> produceStandardMatchingChildTNodesNonterminalLevel(DotNode dotNode,
- SuperNode superNode) {
- Trie child_node = dotNode.getTrieNode().match(superNode.lhs);
- List<Trie> child_tnodes = Arrays.asList(child_node);
- return child_tnodes;
- }
-
- protected abstract static class StandardNonterminalMatcher extends NonterminalMatcher {
-
- protected StandardNonterminalMatcher(JoshuaConfiguration joshuaConfiguration,
- List<Integer> nonterminalIndicesExceptForGoalAndOOV,
- boolean useTargetQueryingToCollectAlternateNonterminals) {
- super(joshuaConfiguration, nonterminalIndicesExceptForGoalAndOOV,
- useTargetQueryingToCollectAlternateNonterminals);
- }
- }
-
- protected static class StandardNonterminalMatcherStrict extends StandardNonterminalMatcher {
-
- protected StandardNonterminalMatcherStrict(JoshuaConfiguration joshuaConfiguration,
- List<Integer> nonterminalIndicesExceptForGoalAndOOV,
- boolean useTargetQueryingToCollectAlternateNonterminals) {
- super(joshuaConfiguration, nonterminalIndicesExceptForGoalAndOOV,
- useTargetQueryingToCollectAlternateNonterminals);
- }
-
- @Override
- public List<Trie> produceMatchingChildTNodesNonterminalLevel(DotNode dotNode,
- SuperNode superNode) {
- return produceStandardMatchingChildTNodesNonterminalLevel(dotNode, superNode);
- }
- }
-
- protected static class StandardNonterminalMatcherSoftConstraints extends
- StandardNonterminalMatcher {
-
- /**
- *
- * @param joshuaConfiguration
- */
- protected StandardNonterminalMatcherSoftConstraints(JoshuaConfiguration joshuaConfiguration,
- List<Integer> nonterminalIndicesExceptForGoalAndOOV,
- boolean useTargetQueryingToCollectAlternateNonterminals) {
- super(joshuaConfiguration, nonterminalIndicesExceptForGoalAndOOV,
- useTargetQueryingToCollectAlternateNonterminals);
- }
-
- /**
- * This method will perform strict matching if the target node superNode is a Goal Symbol.
- * Otherwise it will call a method that produces all available substitutions that correspond to
- * Nonterminals.
- *
- * @param dotNode
- * @param superNode
- */
- public List<Trie> produceMatchingChildTNodesNonterminalLevel(DotNode dotNode,
- SuperNode superNode) {
-
- // We do not allow substitution of other things for GOAL labels or OOV
- // symbols
- if (isOOVLabelOrGoalLabel(Vocabulary.word(superNode.lhs), joshuaConfiguration)) {
- // logger.info("BLAA - Vocabulary.word(superNode.lhs)" +
- // Vocabulary.word(superNode.lhs));
- Trie child_node = dotNode.getTrieNode().match(superNode.lhs);
- // logger.info("child_node.toString()" + child_node);
- List<Trie> child_tnodes = Arrays.asList(child_node);
- return child_tnodes;
- } else {
- // logger.info("Vocabulary.word(superNode.lhs): " +
- // Vocabulary.word(superNode.lhs));
- return matchAllEqualOrBothNonTerminalAndNotGoalOrOOV(dotNode, superNode.lhs);
- }
- }
- }
-}
diff --git a/src/joshua/decoder/ff/ArityPhrasePenalty.java b/src/joshua/decoder/ff/ArityPhrasePenalty.java
index 7e320a2..d9c93cb 100644
--- a/src/joshua/decoder/ff/ArityPhrasePenalty.java
+++ b/src/joshua/decoder/ff/ArityPhrasePenalty.java
@@ -31,9 +31,6 @@
this.owner = Vocabulary.id(parsedArgs.get("owner"));
this.minArity = Integer.parseInt(parsedArgs.get("min-arity"));
this.maxArity = Integer.parseInt(parsedArgs.get("max-arity"));
-
- if (!weights.containsKey(name))
- System.err.println("WARNING: no weight found for feature '" + name + "'");
}
/**
diff --git a/src/joshua/decoder/ff/FeatureFunction.java b/src/joshua/decoder/ff/FeatureFunction.java
index d39cce6..a08ead2 100644
--- a/src/joshua/decoder/ff/FeatureFunction.java
+++ b/src/joshua/decoder/ff/FeatureFunction.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
@@ -53,6 +54,17 @@
* names, for templates that define multiple features.
*/
protected String name = null;
+
+ /*
+ * The list of features each function can contribute, along with the dense feature IDs.
+ */
+ protected String[] denseFeatureNames = null;
+ protected int[] denseFeatureIDs = null;
+
+ /*
+ * The first dense feature index
+ */
+ protected int denseFeatureIndex = -1;
// The list of arguments passed to the feature, and the hash for the parsed args
protected String[] args;
@@ -70,7 +82,7 @@
public String getName() {
return name;
}
-
+
// Whether the feature has state.
public abstract boolean isStateful();
@@ -82,10 +94,22 @@
this.parsedArgs = FeatureFunction.parseArgs(args);
}
+
+ /**
+ * Any feature function can use this to report dense features names to the master code. The
+ * parameter tells the feature function the index of the first available dense feature ID; the feature
+ * function will then use IDs (id..id+names.size()-1).
+ *
+ * @param id the id of the first dense feature id to use
+ * @return a list of dense feature names
+ */
+ public ArrayList<String> reportDenseFeatures(int id) {
+ return new ArrayList<String>();
+ }
public String logString() {
try {
- return String.format("%s (weight %.3f)", name, weights.get(name));
+ return String.format("%s (weight %.3f)", name, weights.getSparse(name));
} catch (RuntimeException e) {
return name;
}
@@ -270,6 +294,7 @@
public interface Accumulator {
public void add(String name, float value);
+ public void add(int id, float value);
}
public class ScoreAccumulator implements Accumulator {
@@ -279,10 +304,14 @@
this.score = 0.0f;
}
+ @Override
public void add(String name, float value) {
- if (weights.containsKey(name)) {
- score += value * weights.get(name);
- }
+ score += value * weights.getSparse(name);
+ }
+
+ @Override
+ public void add(int id, float value) {
+ score += value * weights.getDense(id);
}
public float getScore() {
@@ -297,12 +326,14 @@
this.features = new FeatureVector();
}
+ @Override
public void add(String name, float value) {
- if (features.containsKey(name)) {
- features.put(name, features.get(name) + value);
- } else {
- features.put(name, value);
- }
+ features.increment(name, value);
+ }
+
+ @Override
+ public void add(int id, float value) {
+ features.increment(id, value);
}
public FeatureVector getFeatures() {
diff --git a/src/joshua/decoder/ff/FeatureVector.java b/src/joshua/decoder/ff/FeatureVector.java
index d643159..9ad3436 100644
--- a/src/joshua/decoder/ff/FeatureVector.java
+++ b/src/joshua/decoder/ff/FeatureVector.java
@@ -1,7 +1,5 @@
package joshua.decoder.ff;
-import joshua.decoder.Decoder;
-
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -13,19 +11,38 @@
* An implementation of a sparse feature vector, using for representing both weights and feature
* values.
*
+ * This class is used to hold both the decoder weights and the feature values accumulated across
+ * each edge. When features are read in upon decoder startup, they all start out as sparse features
+ * and are stored in the hash table. After the feature functions have been loaded, the decoder
+ * queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
+ * returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
+ * feature array. Therefore, when a feature registers a dense feature, it should take care to
+ * query either {@link getDense()} or {@link getSparse} when asking for the feature values later on.
+ *
* @author Matt Post <post@cs.jhu.edu>
*/
public class FeatureVector {
- private HashMap<String, Float> features;
+ /*
+ * A list of the dense feature names. Increased via calls to registerDenseFeatures()
+ */
+ public static ArrayList<String> DENSE_FEATURE_NAMES = new ArrayList<String>();
+
+ /*
+ * The values of each of the dense features, defaulting to 0.
+ */
+ private ArrayList<Float> denseFeatures = null;
+
+ /*
+ * Value of sparse features.
+ */
+ private HashMap<String, Float> sparseFeatures;
public FeatureVector() {
- features = new HashMap<String, Float>();
- }
-
- public FeatureVector(String feature, float value) {
- features = new HashMap<String, Float>();
- features.put(feature, value);
+ sparseFeatures = new HashMap<String, Float>();
+ denseFeatures = new ArrayList<Float>(DENSE_FEATURE_NAMES.size());
+ for (int i = 0; i < denseFeatures.size(); i++)
+ denseFeatures.set(i, 0.0f);
}
/**
@@ -49,6 +66,8 @@
*/
public FeatureVector(String featureString, String prefix) {
+// System.err.println(String.format("FEATURES_OF(%s, %s)", featureString, prefix));
+
/*
* Read through the features on this rule, adding them to the feature vector. Unlabeled features
* are converted to a canonical form.
@@ -56,38 +75,90 @@
* Note that it's bad form to mix unlabeled features and the named feature index they are mapped
* to, but we are being liberal in what we accept.
*
- * IMPORTANT: Note that, for historical reasons, the sign is reversed on all scores.
+ * IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores.
* This is the source of *no end* of confusion and should be done away with.
*/
- features = new HashMap<String, Float>();
+ sparseFeatures = new HashMap<String, Float>();
+ denseFeatures = new ArrayList<Float>(DENSE_FEATURE_NAMES.size());
+ for (int i = 0; i < denseFeatures.size(); i++)
+ denseFeatures.set(i, 0.0f);
+
int denseFeatureIndex = 0;
if (!featureString.trim().equals("")) {
for (String token : featureString.split("\\s+")) {
if (token.indexOf('=') == -1) {
- features.put(String.format("%s%d", prefix, denseFeatureIndex), -Float.parseFloat(token));
+ /*
+ * If we encounter an unlabeled feature, it is the next dense feature
+ */
+ while (denseFeatures.size() <= denseFeatureIndex)
+ denseFeatures.add(0.0f);
+ denseFeatures.set(denseFeatureIndex, -Float.parseFloat(token));
denseFeatureIndex++;
} else {
+ /*
+ * Labeled features are of two types: if they start with the prefix, they are actually
+ * dense feature in disguise; otherwise, they are proper sparse features.
+ */
int splitPoint = token.indexOf('=');
- features.put(token.substring(0, splitPoint),
- Float.parseFloat(token.substring(splitPoint + 1)));
+ if (token.startsWith(prefix)) {
+// System.err.println(String.format(" PREFIX=%s '%s'.substring(%d,%d) = %s", prefix, token, prefix.length(), splitPoint,
+// token.substring(prefix.length(), splitPoint)));
+ int index = Integer.parseInt(token.substring(prefix.length(), splitPoint));
+ while (denseFeatures.size() <= index)
+ denseFeatures.add(0.0f);
+ denseFeatures.set(index, 1.0f * Float.parseFloat(token.substring(splitPoint + 1)));
+ } else {
+ sparseFeatures.put(token.substring(0, splitPoint),
+ Float.parseFloat(token.substring(splitPoint + 1)));
+ }
}
}
}
}
+
+ /**
+ * Register one or more dense features with the global weight vector. This assumes them global
+ * IDs, and then returns the index of the first feature (from which the calling feature function
+ * can infer them all). This *must* be called by every feature function wishing to register
+ * dense features!
+ *
+ * @param names
+ * @return
+ */
+ public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
+ for (FeatureFunction feature: featureFunctions) {
+ ArrayList<String> names = feature.reportDenseFeatures(denseFeatures.size());
+ for (String name: names) {
+ DENSE_FEATURE_NAMES.add(name);
+ denseFeatures.add(getSparse(name));
+ sparseFeatures.remove(name);
+ }
+ }
+ }
+
+ public ArrayList<Float> getDenseFeatures() {
+ return denseFeatures;
+ }
+
+ public HashMap<String,Float> getSparseFeatures() {
+ return sparseFeatures;
+ }
public Set<String> keySet() {
- return features.keySet();
+ return sparseFeatures.keySet();
}
public int size() {
- return features.size();
+ return sparseFeatures.size() + denseFeatures.size();
}
public FeatureVector clone() {
FeatureVector newOne = new FeatureVector();
- for (String key : this.features.keySet())
- newOne.put(key, this.features.get(key));
+ for (String key : this.sparseFeatures.keySet())
+ newOne.set(key, this.sparseFeatures.get(key));
+ for (int i = 0; i < denseFeatures.size(); i++)
+ newOne.set(i, denseFeatures.get(i));
return newOne;
}
@@ -97,9 +168,12 @@
* a value of 0.0f before subtraction.
*/
public void subtract(FeatureVector other) {
+ for (int i = 0; i < denseFeatures.size(); i++)
+ denseFeatures.set(i, denseFeatures.get(i) - other.getDense(i));
+
for (String key : other.keySet()) {
- float oldValue = (features.containsKey(key)) ? features.get(key) : 0.0f;
- features.put(key, oldValue - other.get(key));
+ float oldValue = (sparseFeatures.containsKey(key)) ? sparseFeatures.get(key) : 0.0f;
+ sparseFeatures.put(key, oldValue - other.getSparse(key));
}
}
@@ -108,44 +182,70 @@
* between the two being summed.
*/
public void add(FeatureVector other) {
+ while (denseFeatures.size() < other.denseFeatures.size())
+ denseFeatures.add(0.0f);
+
+ for (int i = 0; i < other.denseFeatures.size(); i++)
+ increment(i, other.getDense(i));
+
for (String key : other.keySet()) {
- if (!features.containsKey(key))
- features.put(key, other.get(key));
+ if (!sparseFeatures.containsKey(key))
+ sparseFeatures.put(key, other.getSparse(key));
else
- features.put(key, features.get(key) + other.get(key));
+ sparseFeatures.put(key, sparseFeatures.get(key) + other.getSparse(key));
}
}
- public boolean containsKey(final String feature) {
- return features.containsKey(feature);
- }
-
/**
- * This method returns the weight of a feature if it exists and otherwise throws a runtime error.
- * It is the duty of the programmer to check using the method containsKey if a feature with a
- * certain name exists. Previously this method would return 0 if the key did not exists, but this
- * lead to bugs in other parts of the code because Feature Names are often specified in capitals
- * but then lowercased, but in using the get method the lowercase form is not used consistently.
- * It is therefore good defensive programming to just throw an error when someone tries to get a
- * feature that does not exist - this will automatically eliminate such hard to debug errors. This
- * is what is now implemented.
+ * Return the weight of a sparse feature, indexed by its name.
*
* @param feature
- * @return
+ * @return the sparse feature's weight, or 0 if not found.
*/
- public float get(String feature) {
- if (features.containsKey(feature))
- return features.get(feature);
-
+ public float getSparse(String feature) {
+ if (sparseFeatures.containsKey(feature))
+ return sparseFeatures.get(feature);
+ return 0.0f;
+ }
+
+ public boolean hasValue(String name) {
+ return sparseFeatures.containsKey(name);
+ }
+
+ /**
+ * Return the weight of a dense feature, indexed by its feature index.
+ *
+ * @param id
+ * @return the dense feature's value, or 0 if not found.
+ */
+ public float getDense(int id) {
+ if (id < denseFeatures.size())
+ return denseFeatures.get(id);
return 0.0f;
}
- public void put(String feature, float value) {
- features.put(feature, value);
+ public void increment(String feature, float value) {
+ sparseFeatures.put(feature, getSparse(feature) + value);
+ }
+
+ public void increment(int id, float value) {
+ while (id >= denseFeatures.size())
+ denseFeatures.add(0.0f);
+ denseFeatures.set(id, getDense(id) + value);
+ }
+
+ public void set(String feature, float value) {
+ sparseFeatures.put(feature, value);
+ }
+
+ public void set(int id, float value) {
+ while (id >= denseFeatures.size())
+ denseFeatures.add(0.0f);
+ denseFeatures.set(id, value);
}
public Map<String, Float> getMap() {
- return features;
+ return sparseFeatures;
}
/**
@@ -153,16 +253,18 @@
*/
public float innerProduct(FeatureVector other) {
float cost = 0.0f;
- for (String key : features.keySet())
- if (other.containsKey(key))
- cost += features.get(key) * other.get(key);
+ for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++)
+ cost += getDense(i) * other.getDense(i);
+
+ for (String key : sparseFeatures.keySet())
+ cost += sparseFeatures.get(key) * other.getSparse(key);
return cost;
}
public void times(float value) {
- for (String key : features.keySet())
- features.put(key, features.get(key) * value);
+ for (String key : sparseFeatures.keySet())
+ sparseFeatures.put(key, sparseFeatures.get(key) * value);
}
/***
@@ -175,18 +277,17 @@
HashSet<String> printed_keys = new HashSet<String>();
// First print all the dense feature names in order
- for (String key: Decoder.dense_feature_names) {
- float value = features.containsKey(key) ? features.get(key) : 0.0f;
- outputString += String.format("%s=%.3f ", key.replaceAll("_", "-"), value);
- printed_keys.add(key);
+ for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+ outputString += String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replaceAll("_", "-"), denseFeatures.get(i));
+ printed_keys.add(DENSE_FEATURE_NAMES.get(i));
}
// Now print the sparse features
- ArrayList<String> keys = new ArrayList<String>(features.keySet());
+ ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
Collections.sort(keys);
for (String key: keys) {
if (! printed_keys.contains(key)) {
- float value = features.get(key);
+ float value = sparseFeatures.get(key);
if (key.equals("OOVPenalty"))
// force moses to see it as sparse
key = "OOV_Penalty";
@@ -207,24 +308,18 @@
HashSet<String> printed_keys = new HashSet<String>();
// First print all the dense feature names in order
- for (String key: Decoder.dense_feature_names) {
- float value = features.containsKey(key) ? features.get(key) : 0.0f;
- outputString += String.format("%s=%.3f ", key, value);
- printed_keys.add(key);
+ for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+ outputString += String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i));
+ printed_keys.add(DENSE_FEATURE_NAMES.get(i));
}
// Now print the rest of the features
- ArrayList<String> keys = new ArrayList<String>(features.keySet());
+ ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
Collections.sort(keys);
for (String key: keys)
if (! printed_keys.contains(key))
- outputString += String.format("%s=%.3f ", key, features.get(key));
+ outputString += String.format("%s=%.3f ", key, sparseFeatures.get(key));
return outputString.trim();
}
-
- public static boolean isDense(String feature) {
- return feature.startsWith("tm_") || feature.startsWith("lm_") || feature.equals("WordPenalty")
- || feature.equals("Distortion") || feature.equals("PhrasePenalty");
- }
}
diff --git a/src/joshua/decoder/ff/OOVPenalty.java b/src/joshua/decoder/ff/OOVPenalty.java
index 9c80829..27c475f 100644
--- a/src/joshua/decoder/ff/OOVPenalty.java
+++ b/src/joshua/decoder/ff/OOVPenalty.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -39,6 +40,15 @@
for (OOVItem item: config.oovList)
oovWeights.put(Vocabulary.id(item.label), item.weight);
}
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
/**
* OOV rules cover exactly one word, and such rules belong to a grammar whose owner is "oov". Each
@@ -50,7 +60,8 @@
Sentence sentence, Accumulator acc) {
if (rule != null && this.ownerID == rule.getOwner()) {
- acc.add(name, getValue(rule.getLHS()));
+// acc.add(name, getValue(rule.getLHS()));
+ acc.add(denseFeatureIndex, getValue(rule.getLHS()));
}
return null;
@@ -66,7 +77,7 @@
@Override
public float estimateCost(Rule rule, Sentence sentence) {
if (rule != null && this.ownerID == rule.getOwner())
- return weights.get(name) * getValue(rule.getLHS());
+ return weights.getDense(denseFeatureIndex) * getValue(rule.getLHS());
return 0.0f;
}
diff --git a/src/joshua/decoder/ff/PhraseModel.java b/src/joshua/decoder/ff/PhraseModel.java
index a64b38d..72e2c2c 100644
--- a/src/joshua/decoder/ff/PhraseModel.java
+++ b/src/joshua/decoder/ff/PhraseModel.java
@@ -1,11 +1,13 @@
package joshua.decoder.ff;
+import java.util.ArrayList;
import java.util.List;
import joshua.corpus.Vocabulary;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.chart_parser.SourcePath;
import joshua.decoder.ff.state_maintenance.DPState;
+import joshua.decoder.ff.tm.Grammar;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.segment_file.Sentence;
@@ -25,55 +27,56 @@
/* The owner of the grammar. */
private int ownerID;
-
+ private String owner;
+
private float[] phrase_weights = null;
- public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+ public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config, Grammar g) {
super(weights, "tm_", args, config);
String owner = parsedArgs.get("owner");
this.name = String.format("tm_%s", owner);
/*
- * This is an efficiency hack; we cache the full dot product of the weights with the dense
- * features, storing them as a value under the name "tm_OWNER". There won't be a weight for
- * that, so we add a weight to the weights vector. This weight will never be output because when
- * the k-best list is retrieved and the actual feature values asked for, the accumulator will
- * fetch the fine-grained dense features.
+ * Determine the number of features by querying the example grammar that was passed in.
*/
- if (weights.containsKey(name)) {
- System.err.println(String.format(
- "* FATAL: Your weights file contains an entry for '%s', shouldn't", name));
- System.exit(1);
- }
- weights.put(name, 1.0f);
-
- int num_features = 0;
- while (weights.containsKey(String.format("tm_%s_%d", owner, num_features)))
- num_features++;
+ phrase_weights = new float[g.getNumDenseFeatures()];
+// System.err.println(String.format("GOT %d FEATURES FOR %s", g.getNumDenseFeatures(), owner));
+ for (int i = 0; i < phrase_weights.length; i++)
+ phrase_weights[i] = weights.getSparse(String.format("tm_%s_%d", owner, i));
- phrase_weights = new float[num_features];
- for (int i = 0; i < num_features; i++)
- phrase_weights[i] = weights.get(String.format("tm_%s_%d", owner, i));
-
// Store the owner.
+ this.owner = owner;
this.ownerID = Vocabulary.id(owner);
}
/**
- * Estimates the cost of applying this rule, which is just the score of the precomputable
- * feature functions.
+ * Just register a single weight, tm_OWNER, and use that to set its precomputed cost
+ */
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+
+ ArrayList<String> names = new ArrayList<String>();
+ for (int i = 0; i < phrase_weights.length; i++)
+ names.add(String.format("tm_%s_%d", owner, i));
+ return names;
+ }
+
+ /**
+ * Estimates the cost of applying this rule, which is just the score of the precomputable feature
+ * functions.
*/
@Override
public float estimateCost(final Rule rule, Sentence sentence) {
-
+
if (rule != null && rule.getOwner() == ownerID) {
if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)
rule.setPrecomputableCost(phrase_weights, weights);
-
+
return rule.getPrecomputableCost();
}
-
+
return 0.0f;
}
@@ -90,17 +93,19 @@
* add each feature, but rather compute the inner product and add *that*. This is totally
* cheating; the Accumulator is supposed to be a generic object. But without this cheat
*/
- if (acc instanceof ScoreAccumulator) {
- if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY) {
-// float score = rule.getFeatureVector().innerProduct(weights);
- rule.setPrecomputableCost(phrase_weights, weights);
- }
- acc.add(name, rule.getPrecomputableCost());
- } else {
- FeatureVector features = rule.getFeatureVector();
- for (String key : features.keySet())
- acc.add(key, features.get(key));
+ if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY) {
+ // float score = rule.getFeatureVector().innerProduct(weights);
+ rule.setPrecomputableCost(phrase_weights, weights);
}
+
+// System.err.println(String.format("RULE = %s / %f", rule.getEnglishWords(), rule.getPrecomputableCost()));
+ for (int k = 0; k < phrase_weights.length; k++) {
+// System.err.println(String.format("k = %d, denseFeatureIndex = %d, owner = %s, ownerID = %d", k, denseFeatureIndex, owner, ownerID));
+ acc.add(k + denseFeatureIndex, rule.getDenseFeature(k));
+ }
+
+ for (String key: rule.getFeatureVector().keySet())
+ acc.add(key, rule.getFeatureVector().getSparse(key));
}
return null;
diff --git a/src/joshua/decoder/ff/PhrasePenalty.java b/src/joshua/decoder/ff/PhrasePenalty.java
index 90622e6..33ee154 100644
--- a/src/joshua/decoder/ff/PhrasePenalty.java
+++ b/src/joshua/decoder/ff/PhrasePenalty.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff;
+import java.util.ArrayList;
import java.util.List;
import joshua.corpus.Vocabulary;
@@ -24,12 +25,10 @@
public class PhrasePenalty extends StatelessFF {
private int owner = 0;
- private float weight = 0.0f;
private float value = 1.0f;
public PhrasePenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "PhrasePenalty", args, config);
- this.weight = weights.get(name);
if (parsedArgs.containsKey("owner"))
this.owner = Vocabulary.id(parsedArgs.get("owner"));
else // default
@@ -42,10 +41,18 @@
if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE
&& (owner == 0 || rule.getOwner() == owner))
- acc.add(name, value);
+ acc.add(denseFeatureIndex, value);
return null;
}
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
/**
* Returns the *weighted* estimate.
@@ -55,7 +62,7 @@
public float estimateCost(Rule rule, Sentence sentence) {
if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE
&& (owner == 0 || rule.getOwner() == owner))
- return weight * value;
+ return weights.getDense(denseFeatureIndex) * value;
return 0.0f;
}
}
diff --git a/src/joshua/decoder/ff/RuleCountBin.java b/src/joshua/decoder/ff/RuleCountBin.java
index c50aa25..501e9de 100644
--- a/src/joshua/decoder/ff/RuleCountBin.java
+++ b/src/joshua/decoder/ff/RuleCountBin.java
@@ -30,7 +30,7 @@
if (rule.getOwner() != Vocabulary.id("pt"))
return null;
- float rarityPenalty = -rule.getFeatureVector().get(String.format("tm_pt_%d", field));
+ float rarityPenalty = -rule.getFeatureVector().getSparse(String.format("tm_pt_%d", field));
int count = (int) (1.0 - Math.log(rarityPenalty));
String feature = "RuleCountBin_inf";
diff --git a/src/joshua/decoder/ff/SourcePathFF.java b/src/joshua/decoder/ff/SourcePathFF.java
index 4172a46..57414cd 100644
--- a/src/joshua/decoder/ff/SourcePathFF.java
+++ b/src/joshua/decoder/ff/SourcePathFF.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff;
+import java.util.ArrayList;
import java.util.List;
import joshua.decoder.JoshuaConfiguration;
@@ -24,12 +25,21 @@
public SourcePathFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "SourcePath", args, config);
}
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
@Override
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- acc.add(name, sourcePath.getPathCost());
+ acc.add(denseFeatureIndex, sourcePath.getPathCost());
return null;
}
}
diff --git a/src/joshua/decoder/ff/TargetBigram.java b/src/joshua/decoder/ff/TargetBigram.java
index 29dc992..1b842ac 100644
--- a/src/joshua/decoder/ff/TargetBigram.java
+++ b/src/joshua/decoder/ff/TargetBigram.java
@@ -76,8 +76,8 @@
}
} catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
+ System.err.println(String.format("* FATAL: couldn't load TargetBigram vocabulary '%s'", filename));
+ System.exit(1);
}
}
diff --git a/src/joshua/decoder/ff/WordPenalty.java b/src/joshua/decoder/ff/WordPenalty.java
index 62105e8..e73991b 100644
--- a/src/joshua/decoder/ff/WordPenalty.java
+++ b/src/joshua/decoder/ff/WordPenalty.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff;
+import java.util.ArrayList;
import java.util.List;
import joshua.decoder.JoshuaConfiguration;
@@ -21,7 +22,7 @@
public WordPenalty(final FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "WordPenalty", args, config);
-
+
if (parsedArgs.containsKey("value"))
OMEGA = Float.parseFloat(parsedArgs.get("value"));
}
@@ -29,17 +30,31 @@
@Override
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
-
- if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE)
- acc.add(name, OMEGA * (rule.getEnglish().length - rule.getArity()));
+ if (rule != null) {
+ // TODO: this is an inefficient way to do this. Find a better way to not apply this rule
+ // to start and stop glue rules when phrase-based decoding.
+ if (config.search_algorithm.equals("cky")
+ || (rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE))
+ // acc.add(name, OMEGA * (rule.getEnglish().length - rule.getArity()));
+ acc.add(denseFeatureIndex, OMEGA * (rule.getEnglish().length - rule.getArity()));
+ }
+
return null;
}
-
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
+
@Override
public float estimateCost(Rule rule, Sentence sentence) {
if (rule != null)
- return weights.get(name) * OMEGA * (rule.getEnglish().length - rule.getArity());
+ return weights.getDense(denseFeatureIndex) * OMEGA * (rule.getEnglish().length - rule.getArity());
return 0.0f;
}
}
diff --git a/src/joshua/decoder/ff/lm/KenLM.java b/src/joshua/decoder/ff/lm/KenLM.java
new file mode 100644
index 0000000..4202e1c
--- /dev/null
+++ b/src/joshua/decoder/ff/lm/KenLM.java
@@ -0,0 +1,173 @@
+package joshua.decoder.ff.lm;
+
+import joshua.decoder.ff.lm.NGramLanguageModel;
+import joshua.decoder.ff.state_maintenance.KenLMState;
+
+/**
+ * JNI wrapper for KenLM. This version of KenLM supports two use cases, implemented by the separate
+ * feature functions KenLMFF and LanguageModelFF. KenLMFF uses the RuleScore() interface in
+ * lm/left.hh, returning a state pointer representing the KenLM state, while LangaugeModelFF handles
+ * state by itself and just passes in the ngrams for scoring.
+ *
+ * @author Kenneth Heafield
+ * @author Matt Post <post@cs.jhu.edu>
+ */
+
+public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
+
+ static {
+ try {
+ System.loadLibrary("ken");
+ } catch (UnsatisfiedLinkError e) {
+ System.err.println("* FATAL: Can't find libken.so (libken.dylib on OS X) in $JOSHUA/lib");
+ System.err.println("* This probably means that the KenLM library didn't compile.");
+ System.err.println("* Make sure that BOOST_ROOT is set to the root of your boost");
+ System.err.println("* installation (it's not /opt/local/, the default), change to");
+ System.err.println("* $JOSHUA, and type 'ant kenlm'. If problems persist, see the");
+ System.err.println("* website (joshua-decoder.org).");
+ System.exit(1);
+ }
+ }
+
+ private final long pointer;
+
+ // this is read from the config file, used to set maximum order
+ private final int ngramOrder;
+ // inferred from model file (may be larger than ngramOrder)
+ private final int N;
+ // whether left-state minimization was requested
+ private boolean minimizing;
+
+ private final static native long construct(String file_name);
+
+ private final static native void destroy(long ptr);
+
+ private final static native int order(long ptr);
+
+ private final static native boolean registerWord(long ptr, String word, int id);
+
+ private final static native float prob(long ptr, int words[]);
+
+ private final static native StateProbPair probRule(long ptr, long pool, long words[]);
+
+ private final static native float estimateRule(long ptr, long words[]);
+
+ private final static native float probString(long ptr, int words[], int start);
+
+ public final static native long createPool();
+ public final static native void destroyPool(long pointer);
+
+ public KenLM(int order, String file_name) {
+ ngramOrder = order;
+
+ pointer = construct(file_name);
+ N = order(pointer);
+ }
+
+ public void destroy() {
+ destroy(pointer);
+ }
+
+ public int getOrder() {
+ return ngramOrder;
+ }
+
+ public boolean registerWord(String word, int id) {
+ return registerWord(pointer, word, id);
+ }
+
+ public float prob(int words[]) {
+ return prob(pointer, words);
+ }
+
+ // Apparently Zhifei starts some array indices at 1. Change to 0-indexing.
+ public float probString(int words[], int start) {
+ return probString(pointer, words, start - 1);
+ }
+
+ /**
+ * This function is the bridge to the interface in kenlm/lm/left.hh, which has KenLM score the
+ * whole rule. It takes a list of words and states retrieved from tail nodes (nonterminals in the
+ * rule). Nonterminals have a negative value so KenLM can distinguish them. The sentence number is
+ * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
+ * state and the LM probability incurred along this rule.
+ *
+ * @param words
+ * @param sentId
+ * @return
+ */
+ public StateProbPair probRule(long[] words, long poolPointer) {
+
+ StateProbPair pair = null;
+ try {
+ pair = probRule(pointer, poolPointer, words);
+ } catch (NoSuchMethodError e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ return pair;
+ }
+
+ /**
+ * Public facing function that estimates the cost of a rule, which value is used for sorting
+ * rules during cube pruning.
+ *
+ * @param words
+ * @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
+ */
+ public float estimateRule(long[] words) {
+ float estimate = 0.0f;
+ try {
+ estimate = estimateRule(pointer, words);
+ } catch (NoSuchMethodError e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ return estimate;
+ }
+
+
+ /**
+ * Inner class used to hold the results returned from KenLM with left-state minimization. Note
+ * that inner classes have to be static to be accessible from the JNI!
+ */
+ public static class StateProbPair {
+ public KenLMState state = null;
+ public float prob = 0.0f;
+
+ public StateProbPair(long state, float prob) {
+ this.state = new KenLMState(state);
+ this.prob = prob;
+ }
+ }
+
+ @Override
+ public int compareTo(KenLM other) {
+ if (this == other)
+ return 0;
+ else
+ return -1;
+ }
+
+ /**
+ * These functions are used if KenLM is invoked under LanguageModelFF instead of KenLMFF.
+ */
+ @Override
+ public float sentenceLogProbability(int[] sentence, int order, int startIndex) {
+ return probString(sentence, startIndex);
+ }
+
+ @Override
+ public float ngramLogProbability(int[] ngram, int order) {
+ if (order != N && order != ngram.length)
+ throw new RuntimeException("Lower order not supported.");
+ return prob(ngram);
+ }
+
+ @Override
+ public float ngramLogProbability(int[] ngram) {
+ return prob(ngram);
+ }
+}
diff --git a/src/joshua/decoder/ff/lm/LanguageModelFF.java b/src/joshua/decoder/ff/lm/LanguageModelFF.java
index cb01367..75eedef 100644
--- a/src/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -1,9 +1,5 @@
package joshua.decoder.ff.lm;
-import java.io.BufferedReader;
-
-import java.io.File;
-import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -18,7 +14,7 @@
import joshua.decoder.ff.FeatureVector;
import joshua.decoder.ff.StatefulFF;
import joshua.decoder.ff.lm.berkeley_lm.LMGrammarBerkeley;
-import joshua.decoder.ff.lm.kenlm.jni.KenLM;
+import joshua.decoder.ff.lm.KenLM;
import joshua.decoder.ff.state_maintenance.DPState;
import joshua.decoder.ff.state_maintenance.NgramDPState;
import joshua.decoder.ff.tm.Rule;
@@ -65,7 +61,7 @@
*/
protected final int ngramOrder;
- /**
+ /*
* We cache the weight of the feature since there is only one.
*/
protected float weight;
@@ -102,15 +98,16 @@
*/
private void read(String file_name) throws IOException {
- File class_file = new File(file_name);
- BufferedReader br = new BufferedReader(new FileReader(class_file));
- String line;
-
- while ((line = br.readLine()) != null) {
+ int lineno = 0;
+ for (String line: new joshua.util.io.LineReader(file_name, false)) {
+ lineno++;
String[] lineComp = line.trim().split("\\s+");
- this.classMap.put(Vocabulary.id(lineComp[0]), Integer.parseInt(lineComp[1]));
+ try {
+ this.classMap.put(Vocabulary.id(lineComp[0]), Integer.parseInt(lineComp[1]));
+ } catch (java.lang.ArrayIndexOutOfBoundsException e) {
+ System.err.println(String.format("* WARNING: bad vocab line #%d '%s'", lineno, line));
+ }
}
- br.close();
}
}
@@ -121,6 +118,7 @@
this.type = parsedArgs.get("lm_type");
this.ngramOrder = Integer.parseInt(parsedArgs.get("lm_order"));
this.path = parsedArgs.get("lm_file");
+
if (parsedArgs.containsKey("class_map"))
try {
this.isClassLM = true;
@@ -130,10 +128,20 @@
e.printStackTrace();
}
- this.weight = weights.get(name);
+ // The dense feature initialization hasn't happened yet, so we have to retrieve this as sparse
+ this.weight = weights.getSparse(name);
initializeLM();
}
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
/**
* Initializes the underlying language model.
@@ -406,7 +414,8 @@
}
}
}
- acc.add(name, transitionLogP);
+// acc.add(name, transitionLogP);
+ acc.add(denseFeatureIndex, transitionLogP);
if (left_context != null) {
return new NgramDPState(left_context, Arrays.copyOfRange(current, ccount - this.ngramOrder
@@ -448,7 +457,8 @@
}
// Tell the accumulator
- acc.add(name, res);
+// acc.add(name, res);
+ acc.add(denseFeatureIndex, res);
// State is the same
return new NgramDPState(leftContext, rightContext);
diff --git a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index b0d944c..e6a340e 100644
--- a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff.lm;
+import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
@@ -7,8 +8,8 @@
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.chart_parser.SourcePath;
import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.ff.lm.kenlm.jni.KenLM;
-import joshua.decoder.ff.lm.kenlm.jni.KenLM.StateProbPair;
+import joshua.decoder.ff.lm.KenLM;
+import joshua.decoder.ff.lm.KenLM.StateProbPair;
import joshua.decoder.ff.state_maintenance.DPState;
import joshua.decoder.ff.state_maintenance.KenLMState;
import joshua.decoder.ff.tm.Rule;
@@ -35,6 +36,15 @@
System.exit(-1);
}
}
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
/**
* Initializes the underlying language model.
@@ -125,7 +135,8 @@
StateProbPair pair = ((KenLM) languageModel).probRule(words, poolMap.get(sentID));
// Record the prob
- acc.add(name, pair.prob);
+// acc.add(name, pair.prob);
+ acc.add(denseFeatureIndex, pair.prob);
// Return the state
return pair.state;
diff --git a/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java b/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
index 1fd06aa..6c9bae4 100644
--- a/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
+++ b/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
@@ -23,8 +23,6 @@
*/
public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
-
-
private ArrayEncodedNgramLanguageModel<String> lm;
private static final Logger logger = Logger.getLogger(LMGrammarBerkeley.class.getName());
diff --git a/src/joshua/decoder/ff/lm/kenlm/LICENSE b/src/joshua/decoder/ff/lm/kenlm/LICENSE
deleted file mode 100644
index 9e2556e..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/LICENSE
+++ /dev/null
@@ -1,24 +0,0 @@
-Most of the code here is licensed under the LGPL. There are exceptions that
-have their own licenses, listed below. See comments in those files for more
-details.
-
-util/murmur_hash.cc
-util/string_piece.hh and util/string_piece.cc
-util/double-conversion/LICENSE covers util/double-conversion except Jamfile
-util/file.cc contains a modified implementation of mkstemp under the LGPL
-jam-files/LICENSE_1_0.txt covers jam-files except Jamroot
-
-For the rest:
-
- KenLM is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published
- by the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- KenLM is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with Avenue code. If not, see <http://www.gnu.org/licenses/>.
diff --git a/src/joshua/decoder/ff/lm/kenlm/Makefile b/src/joshua/decoder/ff/lm/kenlm/Makefile
deleted file mode 100644
index ecc38c3..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/Makefile
+++ /dev/null
@@ -1,76 +0,0 @@
-all: build_binary ngram_query install
-
-CC = g++
-
-# The max order for the LM, this was increased to incorporate Class LM (9grams)
-MAX_ORDER = 9
-
-# Set to the location of your boost libraries (only set if not in the environment)
-BOOST_ROOT ?= /opt/local
-
-# Should be set to blank or "-mt", depending on whether you have the boost multithreaded
-# libraries installed
-#BOOST_MT = -mt
-
-CORE = lm/bhiksha.o lm/binary_format.o lm/config.o lm/lm_exception.o lm/model.o lm/quantize.o lm/read_arpa.o lm/search_hashed.o lm/search_trie.o lm/sizes.o lm/trie.o lm/trie_sort.o lm/value_build.o lm/virtual_interface.o lm/vocab.o util/bit_packing.o util/ersatz_progress.o util/exception.o util/file.o util/file_piece.o util/mmap.o util/murmur_hash.o util/pool.o util/read_compressed.o util/scoped.o util/usage.o util/double-conversion/bignum.o util/double-conversion/bignum-dtoa.o util/double-conversion/cached-powers.o util/double-conversion/diy-fp.o util/double-conversion/double-conversion.o util/double-conversion/fast-dtoa.o util/double-conversion/fixed-dtoa.o util/double-conversion/strtod.o util/parallel_read.o
-
-HEADERS= lm/bhiksha.hh lm/binary_format.hh lm/blank.hh lm/builder/adjust_counts.hh lm/builder/corpus_count.hh lm/builder/discount.hh lm/builder/header_info.hh lm/builder/initial_probabilities.hh lm/builder/interpolate.hh lm/builder/joint_order.hh lm/builder/multi_stream.hh lm/builder/ngram.hh lm/builder/ngram_stream.hh lm/builder/pipeline.hh lm/builder/print.hh lm/builder/sort.hh lm/config.hh lm/enumerate_vocab.hh lm/facade.hh lm/left.hh lm/lm_exception.hh lm/max_order.hh lm/model.hh lm/model_type.hh lm/ngram_query.hh lm/partial.hh lm/quantize.hh lm/read_arpa.hh lm/return.hh lm/search_hashed.hh lm/search_trie.hh lm/sizes.hh lm/state.hh lm/trie.hh lm/trie_sort.hh lm/value_build.hh lm/value.hh lm/virtual_interface.hh lm/vocab.hh lm/weights.hh lm/word_index.hh util/bit_packing.hh util/ersatz_progress.hh util/exception.hh util/fake_ofstream.hh util/file.hh util/file_piece.hh util/getopt.hh util/have.hh util/joint_sort.hh util/mmap.hh util/multi_intersection.hh util/murmur_hash.hh util/pcqueue.hh util/pool.hh util/probing_hash_table.hh util/proxy_iterator.hh util/read_compressed.hh util/scoped.hh util/sized_iterator.hh util/sorted_uniform.hh util/stream/block.hh util/stream/chain.hh util/stream/config.hh util/stream/io.hh util/stream/line_input.hh util/stream/multi_progress.hh util/stream/sort.hh util/stream/stream.hh util/stream/timer.hh util/string_piece_hash.hh util/string_piece.hh util/thread_pool.hh util/tokenize_piece.hh util/usage.hh util/parallel_read.hh
-
-CPPFLAGS = $(CXXFLAGS) -I. -I$(BOOST_ROOT)/include -O3 -DKENLM_MAX_ORDER=$(MAX_ORDER) -DHAVE_ZLIB -DNDEBUG
-
-.cc.o: $(HEADERS)
- $(CC) -c $(CPPFLAGS) -fPIC -o $@ $<
-
-ifeq ($(shell uname -s),Linux)
-RT=-lrt
-endif
-
-#query-related executables
-EXES=build_binary ngram_query
-build_binary: $(CORE) $(HEADERS) lm/build_binary_main.cc
- $(CC) $(CPPFLAGS) $(CORE) lm/build_binary_main.cc -o build_binary -lz $(RT)
-ngram_query: $(CORE) $(HEADERS) lm/query_main.cc
- $(CC) $(CPPFLAGS) $(CORE) lm/query_main.cc -o ngram_query -lz $(RT)
-
-INSTALL=build_binary ngram_query
-
-#lmplz
-SHELL=bash
-ifeq (,$(wildcard $(BOOST_ROOT)/lib/libboost_thread.a))
- $(error BOOST_ROOT ($(BOOST_ROOT)) does not point to a Boost installation, quitting...)
-endif
-ifeq ($(shell $(CC) -L$(BOOST_ROOT)/lib -lboost_program_options$(BOOST_MT) -lboost_thread$(BOOST_MT) -x c++ - <<<'int main() {}' -o dummy && rm dummy && echo Boost),Boost)
- $(info Detected Boost)
-LMPLZ=lm/builder/adjust_counts.o lm/builder/corpus_count.o lm/builder/initial_probabilities.o lm/builder/interpolate.o lm/builder/lmplz_main.o lm/builder/pipeline.o lm/builder/print.o lm/builder/output.o util/stream/chain.o util/stream/io.o util/stream/line_input.o util/stream/multi_progress.o
-lmplz: $(CORE) $(HEADERS) $(LMPLZ)
- $(CC) $(CPPFLAGS) $(CORE) $(LMPLZ) -o lmplz -lz -L$(BOOST_ROOT)/lib -lboost_program_options$(BOOST_MT) -lboost_thread$(BOOST_MT) -lboost_system$(BOOST_MT) $(RT) -pthread
-EXES+=lmplz
-else
- echo "lmplz (used for building language models) failed to build. Is Boost installed?"
- echo "If yes, maybe you have the multithreaded versions installed. Edit"
- echo "\$JOSHUA/src/joshua/decoder/ff/lm/kenlm/Makefile and uncomment the BOOST_MT line,"
- echo "then rerun 'ant kenlm'"
-endif
-
-#jni object
-ifeq ($(shell uname -s),Darwin)
-libken.dylib: $(CORE) $(HEADERS) jni/wrap.cc
- $(CC) -I /System/Library/Frameworks/JavaVM.framework/Versions/A/Headers -DNO_ICU $(CPPFLAGS) jni/wrap.cc -I/System/Library/Frameworks/JavaVM.framework/Home/include -I/System/Library/Frameworks/JavaVM.framework/Home/include/linux $(CORE) -fpic -dynamiclib -Wl,-headerpad_max_install_names,-undefined,dynamic_lookup -o libken.dylib -lz -Wno-deprecated -pthread
-
-install: libken.dylib $(EXES)
- cp -f libken.dylib $(JOSHUA)/lib/
- cp -f $(EXES) $(JOSHUA)/bin/
-else
-libken.so: $(CORE) $(HEADERS) jni/wrap.cc
- $(CC) -I. -DNO_ICU $(CPPFLAGS) jni/wrap.cc -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux $(CORE) -fpic -shared -Wl,-soname,libken.so -o libken.so -lz -Wno-deprecated -pthread $(RT)
-
-install: libken.so $(EXES)
- cp -f libken.so $(JOSHUA)/lib/libken.so
- cp -f $(EXES) $(JOSHUA)/bin/
-endif
-
-.PHONY: clean
-
-clean:
- find . -name '*.o' -exec rm -f {} \;
- rm -f libken.dylib $(EXES)
diff --git a/src/joshua/decoder/ff/lm/kenlm/README.md b/src/joshua/decoder/ff/lm/kenlm/README.md
deleted file mode 100644
index dcacebc..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-# KenLM
-
-Language model inference code by Kenneth Heafield (kenlm at kheafield.com)
-
-I do development in master on https://github.com/kpu/kenlm/. Normally, it works, but I do not guarantee it will compile, give correct answers, or generate non-broken binary files. For a more stable release, get http://kheafield.com/code/kenlm.tar.gz .
-
-The website http://kheafield.com/code/kenlm/ has more documentation. If you're a decoder developer, please download the latest version from there instead of copying from another decoder.
-
-## Compiling
-See BUILDING.
-
-## Estimation
-lmplz estimates unpruned language models with modified Kneser-Ney smoothing. After compiling with bjam, run
-```bash
-bin/lmplz -o 5 <text >text.arpa
-```
-The algorithm is on-disk, using an amount of memory that you specify. See http://kheafield.com/code/kenlm/estimation/ for more.
-
-MT Marathon 2012 team members Ivan Pouzyrevsky and Mohammed Mediani contributed to the computation design and early implementation. Jon Clark contributed to the design, clarified points about smoothing, and added logging.
-
-## Filtering
-
-filter takes an ARPA or count file and removes entries that will never be queried. The filter criterion can be corpus-level vocabulary, sentence-level vocabulary, or sentence-level phrases. Run
-```bash
-bin/filter
-```
-and see http://kheafield.com/code/kenlm/filter/ for more documentation.
-
-## Querying
-
-Two data structures are supported: probing and trie. Probing is a probing hash table with keys that are 64-bit hashes of n-grams and floats as values. Trie is a fairly standard trie but with bit-level packing so it uses the minimum number of bits to store word indices and pointers. The trie node entries are sorted by word index. Probing is the fastest and uses the most memory. Trie uses the least memory and a bit slower.
-
-With trie, resident memory is 58% of IRST's smallest version and 21% of SRI's compact version. Simultaneously, trie CPU's use is 81% of IRST's fastest version and 84% of SRI's fast version. KenLM's probing hash table implementation goes even faster at the expense of using more memory. See http://kheafield.com/code/kenlm/benchmark/.
-
-Binary format via mmap is supported. Run `./build_binary` to make one then pass the binary file name to the appropriate Model constructor.
-
-## Platforms
-`murmur_hash.cc` and `bit_packing.hh` perform unaligned reads and writes that make the code architecture-dependent.
-It has been sucessfully tested on x86\_64, x86, and PPC64.
-ARM support is reportedly working, at least on the iphone.
-
-Runs on Linux, OS X, Cygwin, and MinGW.
-
-Hideo Okuma and Tomoyuki Yoshimura from NICT contributed ports to ARM and MinGW.
-
-## Compile-time configuration
-There are a number of macros you can set on the g++ command line or in util/have.hh .
-
-* `KENLM_MAX_ORDER` is the maximum order that can be loaded. This is done to make state an efficient POD rather than a vector.
-* `HAVE_ICU` If your code links against ICU, define this to disable the internal StringPiece and replace it with ICU's copy of StringPiece, avoiding naming conflicts.
-
-ARPA files can be read in compressed format with these options:
-* `HAVE_ZLIB` Supports gzip. Link with -lz. I have enabled this by default.
-* `HAVE_BZLIB` Supports bzip2. Link with -lbz2.
-* `HAVE_XZLIB` Supports xz. Link with -llzma.
-
-Note that these macros impact only `read_compressed.cc` and `read_compressed_test.cc`. The bjam build system will auto-detect bzip2 and xz support.
-
-## Decoder developers
-- I recommend copying the code and distributing it with your decoder. However, please send improvements upstream.
-
-- Omit the lm/filter directory if you do not want the language model filter. Only that and tests depend on Boost.
-
-- Select the macros you want, listed in the previous section.
-
-- There are two build systems: compile.sh and Jamroot+Jamfile. They're pretty simple and are intended to be reimplemented in your build system.
-
-- Use either the interface in `lm/model.hh` or `lm/virtual_interface.hh`. Interface documentation is in comments of `lm/virtual_interface.hh` and `lm/model.hh`.
-
-- There are several possible data structures in `model.hh`. Use `RecognizeBinary` in `binary_format.hh` to determine which one a user has provided. You probably already implement feature functions as an abstract virtual base class with several children. I suggest you co-opt this existing virtual dispatch by templatizing the language model feature implementation on the KenLM model identified by `RecognizeBinary`. This is the strategy used in Moses and cdec.
-
-- See `lm/config.hh` for run-time tuning options.
-
-## Contributors
-Contributions to KenLM are welcome. Please base your contributions on https://github.com/kpu/kenlm and send pull requests (or I might give you commit access). Downstream copies in Moses and cdec are maintained by overwriting them so do not make changes there.
-
-## Python module
-Contributed by Victor Chahuneau.
-
-### Installation
-
-```bash
-pip install https://github.com/kpu/kenlm/archive/master.zip
-```
-
-### Basic Usage
-```python
-import kenlm
-model = kenlm.LanguageModel('lm/test.arpa')
-sentence = 'this is a sentence .'
-print(model.score(sentence))
-```
-
----
-
-The name was Hieu Hoang's idea, not mine.
diff --git a/src/joshua/decoder/ff/lm/kenlm/jni/KenLM.java b/src/joshua/decoder/ff/lm/kenlm/jni/KenLM.java
deleted file mode 100644
index a110aa9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/jni/KenLM.java
+++ /dev/null
@@ -1,173 +0,0 @@
-package joshua.decoder.ff.lm.kenlm.jni;
-
-import joshua.decoder.ff.lm.NGramLanguageModel;
-import joshua.decoder.ff.state_maintenance.KenLMState;
-
-/**
- * JNI wrapper for KenLM. This version of KenLM supports two use cases, implemented by the separate
- * feature functions KenLMFF and LanguageModelFF. KenLMFF uses the RuleScore() interface in
- * lm/left.hh, returning a state pointer representing the KenLM state, while LangaugeModelFF handles
- * state by itself and just passes in the ngrams for scoring.
- *
- * @author Kenneth Heafield
- * @author Matt Post <post@cs.jhu.edu>
- */
-
-public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
-
- static {
- try {
- System.loadLibrary("ken");
- } catch (UnsatisfiedLinkError e) {
- System.err.println("* FATAL: Can't find libken.so (libken.dylib on OS X) in $JOSHUA/lib");
- System.err.println("* This probably means that the KenLM library didn't compile.");
- System.err.println("* Make sure that BOOST_ROOT is set to the root of your boost");
- System.err.println("* installation (it's not /opt/local/, the default), change to");
- System.err.println("* $JOSHUA, and type 'ant kenlm'. If problems persist, see the");
- System.err.println("* website (joshua-decoder.org).");
- System.exit(1);
- }
- }
-
- private final long pointer;
-
- // this is read from the config file, used to set maximum order
- private final int ngramOrder;
- // inferred from model file (may be larger than ngramOrder)
- private final int N;
- // whether left-state minimization was requested
- private boolean minimizing;
-
- private final static native long construct(String file_name);
-
- private final static native void destroy(long ptr);
-
- private final static native int order(long ptr);
-
- private final static native boolean registerWord(long ptr, String word, int id);
-
- private final static native float prob(long ptr, int words[]);
-
- private final static native StateProbPair probRule(long ptr, long pool, long words[]);
-
- private final static native float estimateRule(long ptr, long words[]);
-
- private final static native float probString(long ptr, int words[], int start);
-
- public final static native long createPool();
- public final static native void destroyPool(long pointer);
-
- public KenLM(int order, String file_name) {
- ngramOrder = order;
-
- pointer = construct(file_name);
- N = order(pointer);
- }
-
- public void destroy() {
- destroy(pointer);
- }
-
- public int getOrder() {
- return ngramOrder;
- }
-
- public boolean registerWord(String word, int id) {
- return registerWord(pointer, word, id);
- }
-
- public float prob(int words[]) {
- return prob(pointer, words);
- }
-
- // Apparently Zhifei starts some array indices at 1. Change to 0-indexing.
- public float probString(int words[], int start) {
- return probString(pointer, words, start - 1);
- }
-
- /**
- * This function is the bridge to the interface in kenlm/lm/left.hh, which has KenLM score the
- * whole rule. It takes a list of words and states retrieved from tail nodes (nonterminals in the
- * rule). Nonterminals have a negative value so KenLM can distinguish them. The sentence number is
- * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
- * state and the LM probability incurred along this rule.
- *
- * @param words
- * @param sentId
- * @return
- */
- public StateProbPair probRule(long[] words, long poolPointer) {
-
- StateProbPair pair = null;
- try {
- pair = probRule(pointer, poolPointer, words);
- } catch (NoSuchMethodError e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- return pair;
- }
-
- /**
- * Public facing function that estimates the cost of a rule, which value is used for sorting
- * rules during cube pruning.
- *
- * @param words
- * @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
- */
- public float estimateRule(long[] words) {
- float estimate = 0.0f;
- try {
- estimate = estimateRule(pointer, words);
- } catch (NoSuchMethodError e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- return estimate;
- }
-
-
- /**
- * Inner class used to hold the results returned from KenLM with left-state minimization. Note
- * that inner classes have to be static to be accessible from the JNI!
- */
- public static class StateProbPair {
- public KenLMState state = null;
- public float prob = 0.0f;
-
- public StateProbPair(long state, float prob) {
- this.state = new KenLMState(state);
- this.prob = prob;
- }
- }
-
- @Override
- public int compareTo(KenLM other) {
- if (this == other)
- return 0;
- else
- return -1;
- }
-
- /**
- * These functions are used if KenLM is invoked under LanguageModelFF instead of KenLMFF.
- */
- @Override
- public float sentenceLogProbability(int[] sentence, int order, int startIndex) {
- return probString(sentence, startIndex);
- }
-
- @Override
- public float ngramLogProbability(int[] ngram, int order) {
- if (order != N && order != ngram.length)
- throw new RuntimeException("Lower order not supported.");
- return prob(ngram);
- }
-
- @Override
- public float ngramLogProbability(int[] ngram) {
- return prob(ngram);
- }
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/jni/wrap.cc b/src/joshua/decoder/ff/lm/kenlm/jni/wrap.cc
deleted file mode 100644
index 963dda5..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/jni/wrap.cc
+++ /dev/null
@@ -1,410 +0,0 @@
-#include "lm/enumerate_vocab.hh"
-#include "lm/model.hh"
-#include "lm/left.hh"
-#include "lm/state.hh"
-#include "util/murmur_hash.hh"
-#include "util/pool.hh"
-
-#include <iostream>
-
-#include <string.h>
-#include <stdlib.h>
-#include <jni.h>
-#include <pthread.h>
-
-// Grr. Everybody's compiler is slightly different and I'm trying to not depend on boost.
-#include <ext/hash_map>
-
-// This is needed to compile on OS X Lion / gcc 4.2.1
-namespace __gnu_cxx {
-template<>
-struct hash<unsigned long long int> {
- size_t operator()(unsigned long long int __x) const {
- return __x;
- }
-};
-}
-
-// Verify that jint and lm::ngram::WordIndex are the same size. If this breaks
-// for you, there's a need to revise probString.
-namespace {
-
-template<bool> struct StaticCheck {
-};
-
-template<> struct StaticCheck<true> {
- typedef bool StaticAssertionPassed;
-};
-
-typedef StaticCheck<sizeof(jint) == sizeof(lm::WordIndex)>::StaticAssertionPassed FloatSize;
-
-typedef __gnu_cxx::hash_map<uint64_t, lm::ngram::ChartState*> PoolHash;
-
-/**
- * A Chart bundles together a hash_map that maps ChartState signatures to a single object
- * instantiated using a pool. This allows duplicate states to avoid allocating separate
- * state objects at multiple places throughout a sentence, and also allows state to be
- * shared across KenLMs for the same sentence.
- */
-struct Chart {
- // A cache for allocated chart objects
- PoolHash* poolHash;
- // Pool used to allocate new ones
- util::Pool* pool;
-
- Chart() {
- poolHash = new PoolHash();
- pool = new util::Pool();
- }
-
- ~Chart() {
- delete poolHash;
- pool->FreeAll();
- delete pool;
- }
-
- lm::ngram::ChartState* put(const lm::ngram::ChartState& state) {
- uint64_t hashValue = lm::ngram::hash_value(state);
-
- if (poolHash->find(hashValue) == poolHash->end()) {
- lm::ngram::ChartState* pointer = (lm::ngram::ChartState *)pool->Allocate(sizeof(lm::ngram::ChartState));
- *pointer = state;
- (*poolHash)[hashValue] = pointer;
- }
-
- return (*poolHash)[hashValue];
- }
-};
-
-// Vocab ids above what the vocabulary knows about are unknown and should
-// be mapped to that.
-void MapArray(const std::vector<lm::WordIndex>& map, jint *begin, jint *end) {
- for (jint *i = begin; i < end; ++i) {
- *i = map[*i];
- }
-}
-
-char *PieceCopy(const StringPiece &str) {
- char *ret = (char*) malloc(str.size() + 1);
- memcpy(ret, str.data(), str.size());
- ret[str.size()] = 0;
- return ret;
-}
-
-// Rather than handle several different instantiations over JNI, we'll just
-// do virtual calls C++-side.
-class VirtualBase {
-public:
- virtual ~VirtualBase() {
- }
-
- virtual float Prob(jint *begin, jint *end) const = 0;
-
- virtual float ProbRule(jlong *begin, jlong *end, lm::ngram::ChartState& state) const = 0;
-
- virtual float ProbString(jint * const begin, jint * const end,
- jint start) const = 0;
-
- virtual float EstimateRule(jlong *begin, jlong *end) const = 0;
-
- virtual uint8_t Order() const = 0;
-
- virtual bool RegisterWord(const StringPiece& word, const int joshua_id) = 0;
-
- void RememberReturnMethod(jclass chart_pair, jmethodID chart_pair_init) {
- chart_pair_ = chart_pair;
- chart_pair_init_ = chart_pair_init;
- }
-
- jclass ChartPair() const { return chart_pair_; }
- jmethodID ChartPairInit() const { return chart_pair_init_; }
-
-protected:
- VirtualBase() {
- }
-
-private:
- // Hack: these are remembered so we can avoid looking them up every time.
- jclass chart_pair_;
- jmethodID chart_pair_init_;
-};
-
-template<class Model> class VirtualImpl: public VirtualBase {
-public:
- VirtualImpl(const char *name) :
- m_(name) {
- // Insert unknown id mapping.
- map_.push_back(0);
- }
-
- ~VirtualImpl() {
- }
-
- float Prob(jint * const begin, jint * const end) const {
- MapArray(map_, begin, end);
-
- std::reverse(begin, end - 1);
- lm::ngram::State ignored;
- return m_.FullScoreForgotState(
- reinterpret_cast<const lm::WordIndex*>(begin),
- reinterpret_cast<const lm::WordIndex*>(end - 1), *(end - 1),
- ignored).prob;
- }
-
- float ProbRule(jlong * const begin, jlong * const end, lm::ngram::ChartState& state) const {
- if (begin == end) return 0.0;
- lm::ngram::RuleScore<Model> ruleScore(m_, state);
-
- if (*begin < 0) {
- ruleScore.BeginNonTerminal(*reinterpret_cast<const lm::ngram::ChartState*>(-*begin));
- } else {
- const lm::WordIndex word = map_[*begin];
- if (word == m_.GetVocabulary().BeginSentence()) {
- ruleScore.BeginSentence();
- } else {
- ruleScore.Terminal(word);
- }
- }
- for (jlong* i = begin + 1; i != end; i++) {
- long word = *i;
- if (word < 0)
- ruleScore.NonTerminal(*reinterpret_cast<const lm::ngram::ChartState*>(-word));
- else
- ruleScore.Terminal(map_[word]);
- }
- return ruleScore.Finish();
- }
-
- float EstimateRule(jlong * const begin, jlong * const end) const {
- if (begin == end) return 0.0;
- lm::ngram::ChartState nullState;
- lm::ngram::RuleScore<Model> ruleScore(m_, nullState);
-
- if (*begin < 0) {
- ruleScore.Reset();
- } else {
- const lm::WordIndex word = map_[*begin];
- if (word == m_.GetVocabulary().BeginSentence()) {
- ruleScore.BeginSentence();
- } else {
- ruleScore.Terminal(word);
- }
- }
- for (jlong* i = begin + 1; i != end; i++) {
- long word = *i;
- if (word < 0)
- ruleScore.Reset();
- else
- ruleScore.Terminal(map_[word]);
- }
- return ruleScore.Finish();
- }
-
- float ProbString(jint * const begin, jint * const end, jint start) const {
- MapArray(map_, begin, end);
-
- float prob;
- lm::ngram::State state;
- if (start == 0) {
- prob = 0;
- state = m_.NullContextState();
- } else {
- std::reverse(begin, begin + start);
- prob = m_.FullScoreForgotState(
- reinterpret_cast<const lm::WordIndex*>(begin),
- reinterpret_cast<const lm::WordIndex*>(begin + start),
- begin[start], state).prob;
- ++start;
- }
- lm::ngram::State state2;
- for (const jint *i = begin + start;;) {
- if (i >= end)
- break;
- float got = m_.Score(state, *i, state2);
- i++;
- prob += got;
- if (i >= end)
- break;
- got = m_.Score(state2, *i, state);
- i++;
- prob += got;
- }
- return prob;
- }
-
- uint8_t Order() const {
- return m_.Order();
- }
-
- bool RegisterWord(const StringPiece& word, const int joshua_id) {
- if (map_.size() <= joshua_id) {
- map_.resize(joshua_id + 1, 0);
- }
- bool already_present = false;
- if (map_[joshua_id] != 0)
- already_present = true;
- map_[joshua_id] = m_.GetVocabulary().Index(word);
- return already_present;
- }
-
-private:
- Model m_;
- std::vector<lm::WordIndex> map_;
-};
-
-VirtualBase *ConstructModel(const char *file_name) {
- using namespace lm::ngram;
- ModelType model_type;
- if (!RecognizeBinary(file_name, model_type))
- model_type = HASH_PROBING;
- switch (model_type) {
- case PROBING:
- return new VirtualImpl<ProbingModel>(file_name);
- case REST_PROBING:
- return new VirtualImpl<RestProbingModel>(file_name);
- case TRIE:
- return new VirtualImpl<TrieModel>(file_name);
- case ARRAY_TRIE:
- return new VirtualImpl<ArrayTrieModel>(file_name);
- case QUANT_TRIE:
- return new VirtualImpl<QuantTrieModel>(file_name);
- case QUANT_ARRAY_TRIE:
- return new VirtualImpl<QuantArrayTrieModel>(file_name);
- default:
- UTIL_THROW(
- lm::FormatLoadException,
- "Unrecognized file format " << (unsigned) model_type
- << " in file " << file_name);
- }
-}
-
-} // namespace
-
-extern "C" {
-
-JNIEXPORT jlong JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_construct(
- JNIEnv *env, jclass, jstring file_name) {
- const char *str = env->GetStringUTFChars(file_name, 0);
- if (!str)
- return 0;
-
- VirtualBase *ret;
- try {
- ret = ConstructModel(str);
-
- // Get a class reference for the type pair that char
- jclass local_chart_pair = env->FindClass("joshua/decoder/ff/lm/kenlm/jni/KenLM$StateProbPair");
- UTIL_THROW_IF(!local_chart_pair, util::Exception, "Failed to find joshua/decoder/ff/lm/kenlm/jni/KenLM$StateProbPair");
- jclass chart_pair = (jclass)env->NewGlobalRef(local_chart_pair);
- env->DeleteLocalRef(local_chart_pair);
-
- // Get the Method ID of the constructor which takes an int
- jmethodID chart_pair_init = env->GetMethodID(chart_pair, "<init>", "(JF)V");
- UTIL_THROW_IF(!chart_pair_init, util::Exception, "Failed to find init method");
-
- ret->RememberReturnMethod(chart_pair, chart_pair_init);
- } catch (std::exception &e) {
- std::cerr << e.what() << std::endl;
- abort();
- }
- env->ReleaseStringUTFChars(file_name, str);
- return reinterpret_cast<jlong>(ret);
-}
-
-JNIEXPORT void JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_destroy(
- JNIEnv *env, jclass, jlong pointer) {
- VirtualBase *base = reinterpret_cast<VirtualBase*>(pointer);
- env->DeleteGlobalRef(base->ChartPair());
- delete base;
-}
-
-JNIEXPORT long JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_createPool(
- JNIEnv *env, jclass) {
- return reinterpret_cast<long>(new Chart());
-}
-
-JNIEXPORT void JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_destroyPool(
- JNIEnv *env, jclass, jlong pointer) {
- Chart* chart = reinterpret_cast<Chart*>(pointer);
- delete chart;
-}
-
-JNIEXPORT jint JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_order(
- JNIEnv *env, jclass, jlong pointer) {
- return reinterpret_cast<VirtualBase*>(pointer)->Order();
-}
-
-JNIEXPORT jboolean JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_registerWord(
- JNIEnv *env, jclass, jlong pointer, jstring word, jint id) {
- const char *str = env->GetStringUTFChars(word, 0);
- if (!str)
- return false;
- jint ret;
- try {
- ret = reinterpret_cast<VirtualBase*>(pointer)->RegisterWord(str, id);
- } catch (std::exception &e) {
- std::cerr << e.what() << std::endl;
- abort();
- }
- env->ReleaseStringUTFChars(word, str);
- return ret;
-}
-
-JNIEXPORT jfloat JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_prob(
- JNIEnv *env, jclass, jlong pointer, jintArray arr) {
- jint length = env->GetArrayLength(arr);
- if (length <= 0)
- return 0.0;
- // GCC only.
- jint values[length];
- env->GetIntArrayRegion(arr, 0, length, values);
-
- return reinterpret_cast<const VirtualBase*>(pointer)->Prob(values,
- values + length);
-}
-
-JNIEXPORT jfloat JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_probString(
- JNIEnv *env, jclass, jlong pointer, jintArray arr, jint start) {
- jint length = env->GetArrayLength(arr);
- if (length <= start)
- return 0.0;
- // GCC only.
- jint values[length];
- env->GetIntArrayRegion(arr, 0, length, values);
-
- return reinterpret_cast<const VirtualBase*>(pointer)->ProbString(values,
- values + length, start);
-}
-
-JNIEXPORT jobject JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_probRule(
- JNIEnv *env, jclass, jlong pointer, jlong chartPtr, jlongArray arr) {
- jint length = env->GetArrayLength(arr);
- // GCC only.
- jlong values[length];
- env->GetLongArrayRegion(arr, 0, length, values);
-
- // Compute the probability
- lm::ngram::ChartState outState;
- const VirtualBase *base = reinterpret_cast<const VirtualBase*>(pointer);
- float prob = base->ProbRule(values, values + length, outState);
-
- Chart* chart = reinterpret_cast<Chart*>(chartPtr);
- lm::ngram::ChartState* outStatePtr = chart->put(outState);
-
- // Call back constructor to allocate a new instance, with an int argument
- return env->NewObject(base->ChartPair(), base->ChartPairInit(), (long)outStatePtr, prob);
-}
-
-JNIEXPORT jfloat JNICALL Java_joshua_decoder_ff_lm_kenlm_jni_KenLM_estimateRule(
- JNIEnv *env, jclass, jlong pointer, jlongArray arr) {
- jint length = env->GetArrayLength(arr);
- // GCC only.
- jlong values[length];
- env->GetLongArrayRegion(arr, 0, length, values);
-
- // Compute the probability
- return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(values,
- values + length);
-}
-
-} // extern
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/Jamfile b/src/joshua/decoder/ff/lm/kenlm/lm/Jamfile
deleted file mode 100644
index 227b220..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/Jamfile
+++ /dev/null
@@ -1,40 +0,0 @@
-# If you need higher order, change this option
-# Having this limit means that State can be
-# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of
-# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead
-max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
-if ( $(max-order) != 6 ) {
- echo "Setting KenLM maximum n-gram order to $(max-order)" ;
-}
-max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
-
-path-constant ORDER-LOG : bin/order.log ;
-update-if-changed $(ORDER-LOG) $(max-order) ;
-
-max-order += <dependency>$(ORDER-LOG) ;
-
-wrappers = ;
-local with-nplm = [ option.get "with-nplm" ] ;
-if $(with-nplm) {
- lib neuralLM : : <search>$(with-nplm)/src ;
- obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
- alias nplm : nplm.o neuralLM ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
- wrappers += nplm ;
-}
-
-fakelib kenlm : $(wrappers) [ glob *.cc : *main.cc *test.cc ] ../util//kenutil : <include>.. $(max-order) : : <include>.. $(max-order) ;
-
-import testing ;
-
-run left_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
-run model_test.cc kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ;
-run partial_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
-
-exes = ;
-for local p in [ glob *_main.cc ] {
- local name = [ MATCH "(.*)\_main.cc" : $(p) ] ;
- exe $(name) : $(p) kenlm ;
- exes += $(name) ;
-}
-
-alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/bhiksha.cc b/src/joshua/decoder/ff/lm/kenlm/lm/bhiksha.cc
deleted file mode 100644
index c8a18df..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/bhiksha.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-#include "lm/bhiksha.hh"
-
-#include "lm/binary_format.hh"
-#include "lm/config.hh"
-#include "util/file.hh"
-#include "util/exception.hh"
-
-#include <limits>
-
-namespace lm {
-namespace ngram {
-namespace trie {
-
-DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
- next_(util::BitsMask::ByMax(max_next)) {}
-
-const uint8_t kArrayBhikshaVersion = 0;
-
-// TODO: put this in binary file header instead when I change the binary file format again.
-void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
- uint8_t buffer[2];
- file.ReadForConfig(buffer, 2, offset);
- uint8_t version = buffer[0];
- uint8_t configured_bits = buffer[1];
- if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
- config.pointer_bhiksha_bits = configured_bits;
-}
-
-namespace {
-
-// Find argmin_{chopped \in [0, RequiredBits(max_next)]} ChoppedDelta(max_offset)
-uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
- uint8_t required = util::RequiredBits(max_next);
- uint8_t best_chop = 0;
- int64_t lowest_change = std::numeric_limits<int64_t>::max();
- // There are probably faster ways but I don't care because this is only done once per order at construction time.
- for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
- int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */
- - max_offset * static_cast<int64_t>(chop); /* savings in bits*/
- if (change < lowest_change) {
- lowest_change = change;
- best_chop = chop;
- }
- }
- return best_chop;
-}
-
-std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) {
- uint8_t required = util::RequiredBits(max_next);
- uint8_t chopping = ChopBits(max_offset, max_next, config);
- return (max_next >> (required - chopping)) + 1 /* we store 0 too */;
-}
-} // namespace
-
-uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
- return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */;
-}
-
-uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
- return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config);
-}
-
-namespace {
-
-void *AlignTo8(void *from) {
- uint8_t *val = reinterpret_cast<uint8_t*>(from);
- std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7;
- if (!remainder) return val;
- return val + 8 - remainder;
-}
-
-} // namespace
-
-ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config)
- : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))),
- offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */),
- offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)),
- write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */ + 1 /* first entry is 0 */),
- original_base_(base) {}
-
-void ArrayBhiksha::FinishedLoading(const Config &config) {
- // *offset_begin_ = 0 but without a const_cast.
- *(write_to_ - (write_to_ - offset_begin_)) = 0;
-
- if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected.");
-
- uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_);
- *(head_write++) = kArrayBhikshaVersion;
- *(head_write++) = config.pointer_bhiksha_bits;
-}
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/bhiksha.hh b/src/joshua/decoder/ff/lm/kenlm/lm/bhiksha.hh
deleted file mode 100644
index 134beb2..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/bhiksha.hh
+++ /dev/null
@@ -1,123 +0,0 @@
-/* Simple implementation of
- * @inproceedings{bhikshacompression,
- * author={Bhiksha Raj and Ed Whittaker},
- * year={2003},
- * title={Lossless Compression of Language Model Structure and Word Identifiers},
- * booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing},
- * pages={388--391},
- * }
- *
- * Currently only used for next pointers.
- */
-
-#ifndef LM_BHIKSHA_H
-#define LM_BHIKSHA_H
-
-#include "lm/model_type.hh"
-#include "lm/trie.hh"
-#include "util/bit_packing.hh"
-#include "util/sorted_uniform.hh"
-
-#include <algorithm>
-
-#include <stdint.h>
-#include <assert.h>
-
-namespace lm {
-namespace ngram {
-struct Config;
-class BinaryFormat;
-
-namespace trie {
-
-class DontBhiksha {
- public:
- static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
-
- static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &/*config*/) {}
-
- static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }
-
- static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) {
- return util::RequiredBits(max_next);
- }
-
- DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config);
-
- void ReadNext(const void *base, uint64_t bit_offset, uint64_t /*index*/, uint8_t total_bits, NodeRange &out) const {
- out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask);
- out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask);
- //assert(out.end >= out.begin);
- }
-
- void WriteNext(void *base, uint64_t bit_offset, uint64_t /*index*/, uint64_t value) {
- util::WriteInt57(base, bit_offset, next_.bits, value);
- }
-
- void FinishedLoading(const Config &/*config*/) {}
-
- uint8_t InlineBits() const { return next_.bits; }
-
- private:
- util::BitsMask next_;
-};
-
-class ArrayBhiksha {
- public:
- static const ModelType kModelTypeAdd = kArrayAdd;
-
- static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
-
- static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
-
- static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config);
-
- ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config);
-
- void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const {
- // Some assertions are commented out because they are expensive.
- // assert(*offset_begin_ == 0);
- // std::upper_bound returns the first element that is greater. Want the
- // last element that is <= to the index.
- const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1;
- // Since *offset_begin_ == 0, the position should be in range.
- // assert(begin_it >= offset_begin_);
- const uint64_t *end_it;
- for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
- // assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
- --end_it;
- // assert(end_it >= begin_it);
- out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
- util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
- out.end = ((end_it - offset_begin_) << next_inline_.bits) |
- util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
- // If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
- assert(out.end >= out.begin);
- }
-
- void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) {
- uint64_t encode = value >> next_inline_.bits;
- for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index;
- util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask);
- }
-
- void FinishedLoading(const Config &config);
-
- uint8_t InlineBits() const { return next_inline_.bits; }
-
- private:
- const util::BitsMask next_inline_;
-
- const uint64_t *const offset_begin_;
- const uint64_t *const offset_end_;
-
- uint64_t *write_to_;
-
- void *original_base_;
-};
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_BHIKSHA_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/binary_format.cc b/src/joshua/decoder/ff/lm/kenlm/lm/binary_format.cc
deleted file mode 100644
index 4811740..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/binary_format.cc
+++ /dev/null
@@ -1,299 +0,0 @@
-#include "lm/binary_format.hh"
-
-#include "lm/lm_exception.hh"
-#include "util/file.hh"
-#include "util/file_piece.hh"
-
-#include <cstddef>
-#include <cstring>
-#include <limits>
-#include <string>
-#include <cstdlib>
-
-#include <stdint.h>
-
-namespace lm {
-namespace ngram {
-
-const char *kModelNames[6] = {"probing hash tables", "probing hash tables with rest costs", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"};
-
-namespace {
-const char kMagicBeforeVersion[] = "mmap lm http://kheafield.com/code format version";
-const char kMagicBytes[] = "mmap lm http://kheafield.com/code format version 5\n\0";
-// This must be shorter than kMagicBytes and indicates an incomplete binary file (i.e. build failed).
-const char kMagicIncomplete[] = "mmap lm http://kheafield.com/code incomplete\n";
-const long int kMagicVersion = 5;
-
-// Old binary files built on 32-bit machines have this header.
-// TODO: eliminate with next binary release.
-struct OldSanity {
- char magic[sizeof(kMagicBytes)];
- float zero_f, one_f, minus_half_f;
- WordIndex one_word_index, max_word_index;
- uint64_t one_uint64;
-
- void SetToReference() {
- std::memset(this, 0, sizeof(OldSanity));
- std::memcpy(magic, kMagicBytes, sizeof(magic));
- zero_f = 0.0; one_f = 1.0; minus_half_f = -0.5;
- one_word_index = 1;
- max_word_index = std::numeric_limits<WordIndex>::max();
- one_uint64 = 1;
- }
-};
-
-
-// Test values aligned to 8 bytes.
-struct Sanity {
- char magic[ALIGN8(sizeof(kMagicBytes))];
- float zero_f, one_f, minus_half_f;
- WordIndex one_word_index, max_word_index, padding_to_8;
- uint64_t one_uint64;
-
- void SetToReference() {
- std::memset(this, 0, sizeof(Sanity));
- std::memcpy(magic, kMagicBytes, sizeof(kMagicBytes));
- zero_f = 0.0; one_f = 1.0; minus_half_f = -0.5;
- one_word_index = 1;
- max_word_index = std::numeric_limits<WordIndex>::max();
- padding_to_8 = 0;
- one_uint64 = 1;
- }
-};
-
-std::size_t TotalHeaderSize(unsigned char order) {
- return ALIGN8(sizeof(Sanity) + sizeof(FixedWidthParameters) + sizeof(uint64_t) * order);
-}
-
-void WriteHeader(void *to, const Parameters ¶ms) {
- Sanity header = Sanity();
- header.SetToReference();
- std::memcpy(to, &header, sizeof(Sanity));
- char *out = reinterpret_cast<char*>(to) + sizeof(Sanity);
-
- *reinterpret_cast<FixedWidthParameters*>(out) = params.fixed;
- out += sizeof(FixedWidthParameters);
-
- uint64_t *counts = reinterpret_cast<uint64_t*>(out);
- for (std::size_t i = 0; i < params.counts.size(); ++i) {
- counts[i] = params.counts[i];
- }
-}
-
-} // namespace
-
-bool IsBinaryFormat(int fd) {
- const uint64_t size = util::SizeFile(fd);
- if (size == util::kBadSize || (size <= static_cast<uint64_t>(sizeof(Sanity)))) return false;
- // Try reading the header.
- util::scoped_memory memory;
- try {
- util::MapRead(util::LAZY, fd, 0, sizeof(Sanity), memory);
- } catch (const util::Exception &e) {
- return false;
- }
- Sanity reference_header = Sanity();
- reference_header.SetToReference();
- if (!std::memcmp(memory.get(), &reference_header, sizeof(Sanity))) return true;
- if (!std::memcmp(memory.get(), kMagicIncomplete, strlen(kMagicIncomplete))) {
- UTIL_THROW(FormatLoadException, "This binary file did not finish building");
- }
- if (!std::memcmp(memory.get(), kMagicBeforeVersion, strlen(kMagicBeforeVersion))) {
- char *end_ptr;
- const char *begin_version = static_cast<const char*>(memory.get()) + strlen(kMagicBeforeVersion);
- long int version = std::strtol(begin_version, &end_ptr, 10);
- if ((end_ptr != begin_version) && version != kMagicVersion) {
- UTIL_THROW(FormatLoadException, "Binary file has version " << version << " but this implementation expects version " << kMagicVersion << " so you'll have to use the ARPA to rebuild your binary");
- }
-
- OldSanity old_sanity = OldSanity();
- old_sanity.SetToReference();
- UTIL_THROW_IF(!std::memcmp(memory.get(), &old_sanity, sizeof(OldSanity)), FormatLoadException, "Looks like this is an old 32-bit format. The old 32-bit format has been removed so that 64-bit and 32-bit files are exchangeable.");
- UTIL_THROW(FormatLoadException, "File looks like it should be loaded with mmap, but the test values don't match. Try rebuilding the binary format LM using the same code revision, compiler, and architecture");
- }
- return false;
-}
-
-void ReadHeader(int fd, Parameters &out) {
- util::SeekOrThrow(fd, sizeof(Sanity));
- util::ReadOrThrow(fd, &out.fixed, sizeof(out.fixed));
- if (out.fixed.probing_multiplier < 1.0)
- UTIL_THROW(FormatLoadException, "Binary format claims to have a probing multiplier of " << out.fixed.probing_multiplier << " which is < 1.0.");
-
- out.counts.resize(static_cast<std::size_t>(out.fixed.order));
- if (out.fixed.order) util::ReadOrThrow(fd, &*out.counts.begin(), sizeof(uint64_t) * out.fixed.order);
-}
-
-void MatchCheck(ModelType model_type, unsigned int search_version, const Parameters ¶ms) {
- if (params.fixed.model_type != model_type) {
- if (static_cast<unsigned int>(params.fixed.model_type) >= (sizeof(kModelNames) / sizeof(const char *)))
- UTIL_THROW(FormatLoadException, "The binary file claims to be model type " << static_cast<unsigned int>(params.fixed.model_type) << " but this is not implemented for in this inference code.");
- UTIL_THROW(FormatLoadException, "The binary file was built for " << kModelNames[params.fixed.model_type] << " but the inference code is trying to load " << kModelNames[model_type]);
- }
- UTIL_THROW_IF(search_version != params.fixed.search_version, FormatLoadException, "The binary file has " << kModelNames[params.fixed.model_type] << " version " << params.fixed.search_version << " but this code expects " << kModelNames[params.fixed.model_type] << " version " << search_version);
-}
-
-const std::size_t kInvalidSize = static_cast<std::size_t>(-1);
-
-BinaryFormat::BinaryFormat(const Config &config)
- : write_method_(config.write_method), write_mmap_(config.write_mmap), load_method_(config.load_method),
- header_size_(kInvalidSize), vocab_size_(kInvalidSize), vocab_string_offset_(kInvalidOffset) {}
-
-void BinaryFormat::InitializeBinary(int fd, ModelType model_type, unsigned int search_version, Parameters ¶ms) {
- file_.reset(fd);
- write_mmap_ = NULL; // Ignore write requests; this is already in binary format.
- ReadHeader(fd, params);
- MatchCheck(model_type, search_version, params);
- header_size_ = TotalHeaderSize(params.counts.size());
-}
-
-void BinaryFormat::ReadForConfig(void *to, std::size_t amount, uint64_t offset_excluding_header) const {
- assert(header_size_ != kInvalidSize);
- util::ErsatzPRead(file_.get(), to, amount, offset_excluding_header + header_size_);
-}
-
-void *BinaryFormat::LoadBinary(std::size_t size) {
- assert(header_size_ != kInvalidSize);
- const uint64_t file_size = util::SizeFile(file_.get());
- // The header is smaller than a page, so we have to map the whole header as well.
- uint64_t total_map = static_cast<uint64_t>(header_size_) + static_cast<uint64_t>(size);
- UTIL_THROW_IF(file_size != util::kBadSize && file_size < total_map, FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map);
-
- util::MapRead(load_method_, file_.get(), 0, util::CheckOverflow(total_map), mapping_);
-
- vocab_string_offset_ = total_map;
- return reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_;
-}
-
-void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) {
- vocab_size_ = memory_size;
- if (!write_mmap_) {
- header_size_ = 0;
- util::MapAnonymous(memory_size, memory_vocab_);
- return reinterpret_cast<uint8_t*>(memory_vocab_.get());
- }
- header_size_ = TotalHeaderSize(order);
- std::size_t total = util::CheckOverflow(static_cast<uint64_t>(header_size_) + static_cast<uint64_t>(memory_size));
- file_.reset(util::CreateOrThrow(write_mmap_));
- // some gccs complain about uninitialized variables even though all enum values are covered.
- void *vocab_base = NULL;
- switch (write_method_) {
- case Config::WRITE_MMAP:
- mapping_.reset(util::MapZeroedWrite(file_.get(), total), total, util::scoped_memory::MMAP_ALLOCATED);
- vocab_base = mapping_.get();
- break;
- case Config::WRITE_AFTER:
- util::ResizeOrThrow(file_.get(), 0);
- util::MapAnonymous(total, memory_vocab_);
- vocab_base = memory_vocab_.get();
- break;
- }
- strncpy(reinterpret_cast<char*>(vocab_base), kMagicIncomplete, header_size_);
- return reinterpret_cast<uint8_t*>(vocab_base) + header_size_;
-}
-
-void *BinaryFormat::GrowForSearch(std::size_t memory_size, std::size_t vocab_pad, void *&vocab_base) {
- assert(vocab_size_ != kInvalidSize);
- vocab_pad_ = vocab_pad;
- std::size_t new_size = header_size_ + vocab_size_ + vocab_pad_ + memory_size;
- vocab_string_offset_ = new_size;
- if (!write_mmap_ || write_method_ == Config::WRITE_AFTER) {
- util::MapAnonymous(memory_size, memory_search_);
- assert(header_size_ == 0 || write_mmap_);
- vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
- return reinterpret_cast<uint8_t*>(memory_search_.get());
- }
-
- assert(write_method_ == Config::WRITE_MMAP);
- // Also known as total size without vocab words.
- // Grow the file to accomodate the search, using zeros.
- // According to man mmap, behavior is undefined when the file is resized
- // underneath a mmap that is not a multiple of the page size. So to be
- // safe, we'll unmap it and map it again.
- mapping_.reset();
- util::ResizeOrThrow(file_.get(), new_size);
- void *ret;
- MapFile(vocab_base, ret);
- return ret;
-}
-
-void BinaryFormat::WriteVocabWords(const std::string &buffer, void *&vocab_base, void *&search_base) {
- // Checking Config's include_vocab is the responsibility of the caller.
- assert(header_size_ != kInvalidSize && vocab_size_ != kInvalidSize);
- if (!write_mmap_) {
- // Unchanged base.
- vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get());
- search_base = reinterpret_cast<uint8_t*>(memory_search_.get());
- return;
- }
- if (write_method_ == Config::WRITE_MMAP) {
- mapping_.reset();
- }
- util::SeekOrThrow(file_.get(), VocabStringReadingOffset());
- util::WriteOrThrow(file_.get(), &buffer[0], buffer.size());
- if (write_method_ == Config::WRITE_MMAP) {
- MapFile(vocab_base, search_base);
- } else {
- vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
- search_base = reinterpret_cast<uint8_t*>(memory_search_.get());
- }
-}
-
-void BinaryFormat::FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts) {
- if (!write_mmap_) return;
- switch (write_method_) {
- case Config::WRITE_MMAP:
- util::SyncOrThrow(mapping_.get(), mapping_.size());
- break;
- case Config::WRITE_AFTER:
- util::SeekOrThrow(file_.get(), 0);
- util::WriteOrThrow(file_.get(), memory_vocab_.get(), memory_vocab_.size());
- util::SeekOrThrow(file_.get(), header_size_ + vocab_size_ + vocab_pad_);
- util::WriteOrThrow(file_.get(), memory_search_.get(), memory_search_.size());
- util::FSyncOrThrow(file_.get());
- break;
- }
- // header and vocab share the same mmap.
- Parameters params = Parameters();
- memset(¶ms, 0, sizeof(Parameters));
- params.counts = counts;
- params.fixed.order = counts.size();
- params.fixed.probing_multiplier = config.probing_multiplier;
- params.fixed.model_type = model_type;
- params.fixed.has_vocabulary = config.include_vocab;
- params.fixed.search_version = search_version;
- switch (write_method_) {
- case Config::WRITE_MMAP:
- WriteHeader(mapping_.get(), params);
- util::SyncOrThrow(mapping_.get(), mapping_.size());
- break;
- case Config::WRITE_AFTER:
- {
- std::vector<uint8_t> buffer(TotalHeaderSize(counts.size()));
- WriteHeader(&buffer[0], params);
- util::SeekOrThrow(file_.get(), 0);
- util::WriteOrThrow(file_.get(), &buffer[0], buffer.size());
- }
- break;
- }
-}
-
-void BinaryFormat::MapFile(void *&vocab_base, void *&search_base) {
- mapping_.reset(util::MapOrThrow(vocab_string_offset_, true, util::kFileFlags, false, file_.get()), vocab_string_offset_, util::scoped_memory::MMAP_ALLOCATED);
- vocab_base = reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_;
- search_base = reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_ + vocab_size_ + vocab_pad_;
-}
-
-bool RecognizeBinary(const char *file, ModelType &recognized) {
- util::scoped_fd fd(util::OpenReadOrThrow(file));
- if (!IsBinaryFormat(fd.get())) {
- return false;
- }
- Parameters params;
- ReadHeader(fd.get(), params);
- recognized = params.fixed.model_type;
- return true;
-}
-
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/binary_format.hh b/src/joshua/decoder/ff/lm/kenlm/lm/binary_format.hh
deleted file mode 100644
index 136d6b1..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/binary_format.hh
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef LM_BINARY_FORMAT_H
-#define LM_BINARY_FORMAT_H
-
-#include "lm/config.hh"
-#include "lm/model_type.hh"
-#include "lm/read_arpa.hh"
-
-#include "util/file_piece.hh"
-#include "util/mmap.hh"
-#include "util/scoped.hh"
-
-#include <cstddef>
-#include <vector>
-
-#include <stdint.h>
-
-namespace lm {
-namespace ngram {
-
-extern const char *kModelNames[6];
-
-/*Inspect a file to determine if it is a binary lm. If not, return false.
- * If so, return true and set recognized to the type. This is the only API in
- * this header designed for use by decoder authors.
- */
-bool RecognizeBinary(const char *file, ModelType &recognized);
-
-struct FixedWidthParameters {
- unsigned char order;
- float probing_multiplier;
- // What type of model is this?
- ModelType model_type;
- // Does the end of the file have the actual strings in the vocabulary?
- bool has_vocabulary;
- unsigned int search_version;
-};
-
-// This is a macro instead of an inline function so constants can be assigned using it.
-#define ALIGN8(a) ((std::ptrdiff_t(((a)-1)/8)+1)*8)
-
-// Parameters stored in the header of a binary file.
-struct Parameters {
- FixedWidthParameters fixed;
- std::vector<uint64_t> counts;
-};
-
-class BinaryFormat {
- public:
- explicit BinaryFormat(const Config &config);
-
- // Reading a binary file:
- // Takes ownership of fd
- void InitializeBinary(int fd, ModelType model_type, unsigned int search_version, Parameters ¶ms);
- // Used to read parts of the file to update the config object before figuring out full size.
- void ReadForConfig(void *to, std::size_t amount, uint64_t offset_excluding_header) const;
- // Actually load the binary file and return a pointer to the beginning of the search area.
- void *LoadBinary(std::size_t size);
-
- uint64_t VocabStringReadingOffset() const {
- assert(vocab_string_offset_ != kInvalidOffset);
- return vocab_string_offset_;
- }
-
- // Writing a binary file or initializing in RAM from ARPA:
- // Size for vocabulary.
- void *SetupJustVocab(std::size_t memory_size, uint8_t order);
- // Warning: can change the vocaulary base pointer.
- void *GrowForSearch(std::size_t memory_size, std::size_t vocab_pad, void *&vocab_base);
- // Warning: can change vocabulary and search base addresses.
- void WriteVocabWords(const std::string &buffer, void *&vocab_base, void *&search_base);
- // Write the header at the beginning of the file.
- void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts);
-
- private:
- void MapFile(void *&vocab_base, void *&search_base);
-
- // Copied from configuration.
- const Config::WriteMethod write_method_;
- const char *write_mmap_;
- util::LoadMethod load_method_;
-
- // File behind memory, if any.
- util::scoped_fd file_;
-
- // If there is a file involved, a single mapping.
- util::scoped_memory mapping_;
-
- // If the data is only in memory, separately allocate each because the trie
- // knows vocab's size before it knows search's size (because SRILM might
- // have pruned).
- util::scoped_memory memory_vocab_, memory_search_;
-
- // Memory ranges. Note that these may not be contiguous and may not all
- // exist.
- std::size_t header_size_, vocab_size_, vocab_pad_;
- // aka end of search.
- uint64_t vocab_string_offset_;
-
- static const uint64_t kInvalidOffset = (uint64_t)-1;
-};
-
-bool IsBinaryFormat(int fd);
-
-} // namespace ngram
-} // namespace lm
-#endif // LM_BINARY_FORMAT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/blank.hh b/src/joshua/decoder/ff/lm/kenlm/lm/blank.hh
deleted file mode 100644
index 94a71ad..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/blank.hh
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef LM_BLANK_H
-#define LM_BLANK_H
-
-#include <limits>
-
-#include <stdint.h>
-#include <math.h>
-
-namespace lm {
-namespace ngram {
-
-/* Suppose "foo bar" appears with zero backoff but there is no trigram
- * beginning with these words. Then, when scoring "foo bar", the model could
- * return out_state containing "bar" or even null context if "bar" also has no
- * backoff and is never followed by another word. Then the backoff is set to
- * kNoExtensionBackoff. If the n-gram might be extended, then out_state must
- * contain the full n-gram, in which case kExtensionBackoff is set. In any
- * case, if an n-gram has non-zero backoff, the full state is returned so
- * backoff can be properly charged.
- * These differ only in sign bit because the backoff is in fact zero in either
- * case.
- */
-const float kNoExtensionBackoff = -0.0;
-const float kExtensionBackoff = 0.0;
-const uint64_t kNoExtensionQuant = 0;
-const uint64_t kExtensionQuant = 1;
-
-inline void SetExtension(float &backoff) {
- if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff;
-}
-
-// This compiles down nicely.
-inline bool HasExtension(const float &backoff) {
- typedef union { float f; uint32_t i; } UnionValue;
- UnionValue compare, interpret;
- compare.f = kNoExtensionBackoff;
- interpret.f = backoff;
- return compare.i != interpret.i;
-}
-
-} // namespace ngram
-} // namespace lm
-#endif // LM_BLANK_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/build_binary_main.cc b/src/joshua/decoder/ff/lm/kenlm/lm/build_binary_main.cc
deleted file mode 100644
index 2af2222..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/build_binary_main.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-#include "lm/model.hh"
-#include "lm/sizes.hh"
-#include "util/file_piece.hh"
-#include "util/usage.hh"
-
-#include <algorithm>
-#include <cstdlib>
-#include <exception>
-#include <iostream>
-#include <iomanip>
-#include <limits>
-
-#include <math.h>
-#include <stdlib.h>
-
-#ifdef WIN32
-#include "util/getopt.hh"
-#else
-#include <unistd.h>
-#endif
-
-namespace lm {
-namespace ngram {
-namespace {
-
-void Usage(const char *name, const char *default_mem) {
- std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-i] [-w mmap|after] [-p probing_multiplier] [-T trie_temporary] [-S trie_building_mem] [-q bits] [-b bits] [-a bits] [type] input.arpa [output.mmap]\n\n"
-"-u sets the log10 probability for <unk> if the ARPA file does not have one.\n"
-" Default is -100. The ARPA file will always take precedence.\n"
-"-s allows models to be built even if they do not have <s> and </s>.\n"
-"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
-"-w mmap|after determines how writing is done.\n"
-" mmap maps the binary file and writes to it. Default for trie.\n"
-" after allocates anonymous memory, builds, and writes. Default for probing.\n"
-"-r \"order1.arpa order2 order3 order4\" adds lower-order rest costs from these\n"
-" model files. order1.arpa must be an ARPA file. All others may be ARPA or\n"
-" the same data structure as being built. All files must have the same\n"
-" vocabulary. For probing, the unigrams must be in the same order.\n\n"
-"type is either probing or trie. Default is probing.\n\n"
-"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
-"-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n"
-"trie is a straightforward trie with bit-level packing. It uses the least\n"
-"memory and is still faster than SRI or IRST. Building the trie format uses an\n"
-"on-disk sort to save memory.\n"
-"-T is the temporary directory prefix. Default is the output file name.\n"
-"-S determines memory use for sorting. Default is " << default_mem << ". This is compatible\n"
-" with GNU sort. The number is followed by a unit: \% for percent of physical\n"
-" memory, b for bytes, K for Kilobytes, M for megabytes, then G,T,P,E,Z,Y. \n"
-" Default unit is K for Kilobytes.\n"
-"-q turns quantization on and sets the number of bits (e.g. -q 8).\n"
-"-b sets backoff quantization bits. Requires -q and defaults to that value.\n"
-"-a compresses pointers using an array of offsets. The parameter is the\n"
-" maximum number of bits encoded by the array. Memory is minimized subject\n"
-" to the maximum, so pick 255 to minimize memory.\n\n"
-"-h print this help message.\n\n"
-"Get a memory estimate by passing an ARPA file without an output file name.\n";
- exit(1);
-}
-
-// I could really use boost::lexical_cast right about now.
-float ParseFloat(const char *from) {
- char *end;
- float ret = strtod(from, &end);
- if (*end) throw util::ParseNumberException(from);
- return ret;
-}
-unsigned long int ParseUInt(const char *from) {
- char *end;
- unsigned long int ret = strtoul(from, &end, 10);
- if (*end) throw util::ParseNumberException(from);
- return ret;
-}
-
-uint8_t ParseBitCount(const char *from) {
- unsigned long val = ParseUInt(from);
- if (val > 25) {
- util::ParseNumberException e(from);
- e << " bit counts are limited to 25.";
- }
- return val;
-}
-
-void ParseFileList(const char *from, std::vector<std::string> &to) {
- to.clear();
- while (true) {
- const char *i;
- for (i = from; *i && *i != ' '; ++i) {}
- to.push_back(std::string(from, i - from));
- if (!*i) break;
- from = i + 1;
- }
-}
-
-void ProbingQuantizationUnsupported() {
- std::cerr << "Quantization is only implemented in the trie data structure." << std::endl;
- exit(1);
-}
-
-} // namespace ngram
-} // namespace lm
-} // namespace
-
-int main(int argc, char *argv[]) {
- using namespace lm::ngram;
-
- const char *default_mem = util::GuessPhysicalMemory() ? "80%" : "1G";
-
- if (argc == 2 && !strcmp(argv[1], "--help"))
- Usage(argv[0], default_mem);
-
- try {
- bool quantize = false, set_backoff_bits = false, bhiksha = false, set_write_method = false, rest = false;
- lm::ngram::Config config;
- config.building_memory = util::ParseSize(default_mem);
- int opt;
- while ((opt = getopt(argc, argv, "q:b:a:u:p:t:T:m:S:w:sir:h")) != -1) {
- switch(opt) {
- case 'q':
- config.prob_bits = ParseBitCount(optarg);
- if (!set_backoff_bits) config.backoff_bits = config.prob_bits;
- quantize = true;
- break;
- case 'b':
- config.backoff_bits = ParseBitCount(optarg);
- set_backoff_bits = true;
- break;
- case 'a':
- config.pointer_bhiksha_bits = ParseBitCount(optarg);
- bhiksha = true;
- break;
- case 'u':
- config.unknown_missing_logprob = ParseFloat(optarg);
- break;
- case 'p':
- config.probing_multiplier = ParseFloat(optarg);
- break;
- case 't': // legacy
- case 'T':
- config.temporary_directory_prefix = optarg;
- util::NormalizeTempPrefix(config.temporary_directory_prefix);
- break;
- case 'm': // legacy
- config.building_memory = ParseUInt(optarg) * 1048576;
- break;
- case 'S':
- config.building_memory = std::min(static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), util::ParseSize(optarg));
- break;
- case 'w':
- set_write_method = true;
- if (!strcmp(optarg, "mmap")) {
- config.write_method = Config::WRITE_MMAP;
- } else if (!strcmp(optarg, "after")) {
- config.write_method = Config::WRITE_AFTER;
- } else {
- Usage(argv[0], default_mem);
- }
- break;
- case 's':
- config.sentence_marker_missing = lm::SILENT;
- break;
- case 'i':
- config.positive_log_probability = lm::SILENT;
- break;
- case 'r':
- rest = true;
- ParseFileList(optarg, config.rest_lower_files);
- config.rest_function = Config::REST_LOWER;
- break;
- case 'h': // help
- default:
- Usage(argv[0], default_mem);
- }
- }
- if (!quantize && set_backoff_bits) {
- std::cerr << "You specified backoff quantization (-b) but not probability quantization (-q)" << std::endl;
- abort();
- }
- if (optind + 1 == argc) {
- ShowSizes(argv[optind], config);
- return 0;
- }
- const char *model_type;
- const char *from_file;
-
- if (optind + 2 == argc) {
- model_type = "probing";
- from_file = argv[optind];
- config.write_mmap = argv[optind + 1];
- } else if (optind + 3 == argc) {
- model_type = argv[optind];
- from_file = argv[optind + 1];
- config.write_mmap = argv[optind + 2];
- } else {
- Usage(argv[0], default_mem);
- return 1;
- }
- if (!strcmp(model_type, "probing")) {
- if (!set_write_method) config.write_method = Config::WRITE_AFTER;
- if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
- if (rest) {
- RestProbingModel(from_file, config);
- } else {
- ProbingModel(from_file, config);
- }
- } else if (!strcmp(model_type, "trie")) {
- if (rest) {
- std::cerr << "Rest + trie is not supported yet." << std::endl;
- return 1;
- }
- if (!set_write_method) config.write_method = Config::WRITE_MMAP;
- if (quantize) {
- if (bhiksha) {
- QuantArrayTrieModel(from_file, config);
- } else {
- QuantTrieModel(from_file, config);
- }
- } else {
- if (bhiksha) {
- ArrayTrieModel(from_file, config);
- } else {
- TrieModel(from_file, config);
- }
- }
- } else {
- Usage(argv[0], default_mem);
- }
- }
- catch (const std::exception &e) {
- std::cerr << e.what() << std::endl;
- std::cerr << "ERROR" << std::endl;
- return 1;
- }
- std::cerr << "SUCCESS" << std::endl;
- return 0;
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/Jamfile b/src/joshua/decoder/ff/lm/kenlm/lm/builder/Jamfile
deleted file mode 100644
index 1e0e18b..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/Jamfile
+++ /dev/null
@@ -1,13 +0,0 @@
-fakelib builder : [ glob *.cc : *test.cc *main.cc ]
- ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm
- : : : <library>/top//boost_thread $(timer-link) ;
-
-exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
-
-exe dump_counts : dump_counts_main.cc builder ;
-
-alias programs : lmplz dump_counts ;
-
-import testing ;
-unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
-unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ;
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts.cc
deleted file mode 100644
index 03ccbb9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts.cc
+++ /dev/null
@@ -1,351 +0,0 @@
-#include "lm/builder/adjust_counts.hh"
-#include "lm/builder/ngram_stream.hh"
-#include "util/stream/timer.hh"
-
-#include <algorithm>
-#include <iostream>
-#include <limits>
-
-namespace lm { namespace builder {
-
-BadDiscountException::BadDiscountException() throw() {}
-BadDiscountException::~BadDiscountException() throw() {}
-
-namespace {
-// Return last word in full that is different.
-const WordIndex* FindDifference(const NGram &full, const NGram &lower_last) {
- const WordIndex *cur_word = full.end() - 1;
- const WordIndex *pre_word = lower_last.end() - 1;
- // Find last difference.
- for (; pre_word >= lower_last.begin() && *pre_word == *cur_word; --cur_word, --pre_word) {}
- return cur_word;
-}
-
-class StatCollector {
- public:
- StatCollector(std::size_t order, std::vector<uint64_t> &counts, std::vector<uint64_t> &counts_pruned, std::vector<Discount> &discounts)
- : orders_(order), full_(orders_.back()), counts_(counts), counts_pruned_(counts_pruned), discounts_(discounts) {
- memset(&orders_[0], 0, sizeof(OrderStat) * order);
- }
-
- ~StatCollector() {}
-
- void CalculateDiscounts(const DiscountConfig &config) {
- counts_.resize(orders_.size());
- counts_pruned_.resize(orders_.size());
- for (std::size_t i = 0; i < orders_.size(); ++i) {
- const OrderStat &s = orders_[i];
- counts_[i] = s.count;
- counts_pruned_[i] = s.count_pruned;
- }
-
- discounts_ = config.overwrite;
- discounts_.resize(orders_.size());
- for (std::size_t i = config.overwrite.size(); i < orders_.size(); ++i) {
- const OrderStat &s = orders_[i];
- try {
- for (unsigned j = 1; j < 4; ++j) {
- // TODO: Specialize error message for j == 3, meaning 3+
- UTIL_THROW_IF(s.n[j] == 0, BadDiscountException, "Could not calculate Kneser-Ney discounts for "
- << (i+1) << "-grams with adjusted count " << (j+1) << " because we didn't observe any "
- << (i+1) << "-grams with adjusted count " << j << "; Is this small or artificial data?");
- }
-
- // See equation (26) in Chen and Goodman.
- discounts_[i].amount[0] = 0.0;
- float y = static_cast<float>(s.n[1]) / static_cast<float>(s.n[1] + 2.0 * s.n[2]);
- for (unsigned j = 1; j < 4; ++j) {
- discounts_[i].amount[j] = static_cast<float>(j) - static_cast<float>(j + 1) * y * static_cast<float>(s.n[j+1]) / static_cast<float>(s.n[j]);
- UTIL_THROW_IF(discounts_[i].amount[j] < 0.0 || discounts_[i].amount[j] > j, BadDiscountException, "ERROR: " << (i+1) << "-gram discount out of range for adjusted count " << j << ": " << discounts_[i].amount[j]);
- }
- } catch (const BadDiscountException &e) {
- switch (config.bad_action) {
- case THROW_UP:
- throw;
- case COMPLAIN:
- std::cerr << e.what() << " Substituting fallback discounts D1=" << config.fallback.amount[1] << " D2=" << config.fallback.amount[2] << " D3+=" << config.fallback.amount[3] << std::endl;
- case SILENT:
- break;
- }
- discounts_[i] = config.fallback;
- }
- }
- }
-
- void Add(std::size_t order_minus_1, uint64_t count, bool pruned = false) {
- OrderStat &stat = orders_[order_minus_1];
- ++stat.count;
- if (!pruned)
- ++stat.count_pruned;
- if (count < 5) ++stat.n[count];
- }
-
- void AddFull(uint64_t count, bool pruned = false) {
- ++full_.count;
- if (!pruned)
- ++full_.count_pruned;
- if (count < 5) ++full_.n[count];
- }
-
- private:
- struct OrderStat {
- // n_1 in equation 26 of Chen and Goodman etc
- uint64_t n[5];
- uint64_t count;
- uint64_t count_pruned;
- };
-
- std::vector<OrderStat> orders_;
- OrderStat &full_;
-
- std::vector<uint64_t> &counts_;
- std::vector<uint64_t> &counts_pruned_;
- std::vector<Discount> &discounts_;
-};
-
-// Reads all entries in order like NGramStream does.
-// But deletes any entries that have <s> in the 1st (not 0th) position on the
-// way out by putting other entries in their place. This disrupts the sort
-// order but we don't care because the data is going to be sorted again.
-class CollapseStream {
- public:
- CollapseStream(const util::stream::ChainPosition &position, uint64_t prune_threshold, const std::vector<bool>& prune_words) :
- current_(NULL, NGram::OrderFromSize(position.GetChain().EntrySize())),
- prune_threshold_(prune_threshold),
- prune_words_(prune_words),
- block_(position) {
- StartBlock();
- }
-
- const NGram &operator*() const { return current_; }
- const NGram *operator->() const { return ¤t_; }
-
- operator bool() const { return block_; }
-
- CollapseStream &operator++() {
- assert(block_);
-
- if (current_.begin()[1] == kBOS && current_.Base() < copy_from_) {
- memcpy(current_.Base(), copy_from_, current_.TotalSize());
- UpdateCopyFrom();
-
- // Mark highest order n-grams for later pruning
- if(current_.Count() <= prune_threshold_) {
- current_.Mark();
- }
-
- if(!prune_words_.empty()) {
- for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
- if(prune_words_[*i]) {
- current_.Mark();
- break;
- }
- }
- }
-
- }
-
- current_.NextInMemory();
- uint8_t *block_base = static_cast<uint8_t*>(block_->Get());
- if (current_.Base() == block_base + block_->ValidSize()) {
- block_->SetValidSize(copy_from_ + current_.TotalSize() - block_base);
- ++block_;
- StartBlock();
- }
-
- // Mark highest order n-grams for later pruning
- if(current_.Count() <= prune_threshold_) {
- current_.Mark();
- }
-
- if(!prune_words_.empty()) {
- for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
- if(prune_words_[*i]) {
- current_.Mark();
- break;
- }
- }
- }
-
- return *this;
- }
-
- private:
- void StartBlock() {
- for (; ; ++block_) {
- if (!block_) return;
- if (block_->ValidSize()) break;
- }
- current_.ReBase(block_->Get());
- copy_from_ = static_cast<uint8_t*>(block_->Get()) + block_->ValidSize();
- UpdateCopyFrom();
-
- // Mark highest order n-grams for later pruning
- if(current_.Count() <= prune_threshold_) {
- current_.Mark();
- }
-
- if(!prune_words_.empty()) {
- for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
- if(prune_words_[*i]) {
- current_.Mark();
- break;
- }
- }
- }
-
- }
-
- // Find last without bos.
- void UpdateCopyFrom() {
- for (copy_from_ -= current_.TotalSize(); copy_from_ >= current_.Base(); copy_from_ -= current_.TotalSize()) {
- if (NGram(copy_from_, current_.Order()).begin()[1] != kBOS) break;
- }
- }
-
- NGram current_;
-
- // Goes backwards in the block
- uint8_t *copy_from_;
- uint64_t prune_threshold_;
- const std::vector<bool>& prune_words_;
- util::stream::Link block_;
-};
-
-} // namespace
-
-void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
- UTIL_TIMER("(%w s) Adjusted counts\n");
-
- const std::size_t order = positions.size();
- StatCollector stats(order, counts_, counts_pruned_, discounts_);
- if (order == 1) {
-
- // Only unigrams. Just collect stats.
- for (NGramStream full(positions[0]); full; ++full) {
-
- // Do not prune <s> </s> <unk>
- if(*full->begin() > 2) {
- if(full->Count() <= prune_thresholds_[0])
- full->Mark();
-
- if(!prune_words_.empty() && prune_words_[*full->begin()])
- full->Mark();
- }
-
- stats.AddFull(full->UnmarkedCount(), full->IsMarked());
- }
-
- stats.CalculateDiscounts(discount_config_);
- return;
- }
-
- NGramStreams streams;
- streams.Init(positions, positions.size() - 1);
-
- CollapseStream full(positions[positions.size() - 1], prune_thresholds_.back(), prune_words_);
-
- // Initialization: <unk> has count 0 and so does <s>.
- NGramStream *lower_valid = streams.begin();
- const NGramStream *const streams_begin = streams.begin();
- streams[0]->Count() = 0;
- *streams[0]->begin() = kUNK;
- stats.Add(0, 0);
- (++streams[0])->Count() = 0;
- *streams[0]->begin() = kBOS;
- // <s> is not in stats yet because it will get put in later.
-
- // This keeps track of actual counts for lower orders. It is not output
- // (only adjusted counts are), but used to determine pruning.
- std::vector<uint64_t> actual_counts(positions.size(), 0);
- // Something of a hack: don't prune <s>.
- actual_counts[0] = std::numeric_limits<uint64_t>::max();
-
- // Iterate over full (the stream of the highest order ngrams)
- for (; full; ++full) {
- const WordIndex *different = FindDifference(*full, **lower_valid);
- std::size_t same = full->end() - 1 - different;
-
- // STEP 1: Output all the n-grams that changed.
- for (; lower_valid >= &streams[same]; --lower_valid) {
- uint64_t order_minus_1 = lower_valid - streams_begin;
- if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1])
- (*lower_valid)->Mark();
-
- if(!prune_words_.empty()) {
- for(WordIndex* i = (*lower_valid)->begin(); i != (*lower_valid)->end(); i++) {
- if(prune_words_[*i]) {
- (*lower_valid)->Mark();
- break;
- }
- }
- }
-
- stats.Add(order_minus_1, (*lower_valid)->UnmarkedCount(), (*lower_valid)->IsMarked());
- ++*lower_valid;
- }
-
- // STEP 2: Update n-grams that still match.
- // n-grams that match get count from the full entry.
- for (std::size_t i = 0; i < same; ++i) {
- actual_counts[i] += full->UnmarkedCount();
- }
- // Increment the number of unique extensions for the longest match.
- if (same) ++streams[same - 1]->Count();
-
- // STEP 3: Initialize new n-grams.
- // This is here because bos is also const WordIndex *, so copy gets
- // consistent argument types.
- const WordIndex *full_end = full->end();
- // Initialize and mark as valid up to bos.
- const WordIndex *bos;
- for (bos = different; (bos > full->begin()) && (*bos != kBOS); --bos) {
- NGramStream &to = *++lower_valid;
- std::copy(bos, full_end, to->begin());
- to->Count() = 1;
- actual_counts[lower_valid - streams_begin] = full->UnmarkedCount();
- }
- // Now bos indicates where <s> is or is the 0th word of full.
- if (bos != full->begin()) {
- // There is an <s> beyond the 0th word.
- NGramStream &to = *++lower_valid;
- std::copy(bos, full_end, to->begin());
-
- // Anything that begins with <s> has full non adjusted count.
- to->Count() = full->UnmarkedCount();
- actual_counts[lower_valid - streams_begin] = full->UnmarkedCount();
- } else {
- stats.AddFull(full->UnmarkedCount(), full->IsMarked());
- }
- assert(lower_valid >= &streams[0]);
- }
-
- // The above loop outputs n-grams when it observes changes. This outputs
- // the last n-grams.
- for (NGramStream *s = streams.begin(); s <= lower_valid; ++s) {
- uint64_t lower_count = actual_counts[(*s)->Order() - 1];
- if(lower_count <= prune_thresholds_[(*s)->Order() - 1])
- (*s)->Mark();
-
- if(!prune_words_.empty()) {
- for(WordIndex* i = (*s)->begin(); i != (*s)->end(); i++) {
- if(prune_words_[*i]) {
- (*s)->Mark();
- break;
- }
- }
- }
-
- stats.Add(s - streams.begin(), lower_count, (*s)->IsMarked());
- ++*s;
- }
- // Poison everyone! Except the N-grams which were already poisoned by the input.
- for (NGramStream *s = streams.begin(); s != streams.end(); ++s)
- s->Poison();
-
- stats.CalculateDiscounts(discount_config_);
-
- // NOTE: See special early-return case for unigrams near the top of this function
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts.hh
deleted file mode 100644
index b169950..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts.hh
+++ /dev/null
@@ -1,72 +0,0 @@
-#ifndef LM_BUILDER_ADJUST_COUNTS_H
-#define LM_BUILDER_ADJUST_COUNTS_H
-
-#include "lm/builder/discount.hh"
-#include "lm/lm_exception.hh"
-#include "util/exception.hh"
-
-#include <vector>
-
-#include <stdint.h>
-
-namespace util { namespace stream { class ChainPositions; } }
-
-namespace lm {
-namespace builder {
-
-class BadDiscountException : public util::Exception {
- public:
- BadDiscountException() throw();
- ~BadDiscountException() throw();
-};
-
-struct DiscountConfig {
- // Overrides discounts for orders [1,discount_override.size()].
- std::vector<Discount> overwrite;
- // If discounting fails for an order, copy them from here.
- Discount fallback;
- // What to do when discounts are out of range or would trigger divison by
- // zero. It it does something other than THROW_UP, use fallback_discount.
- WarningAction bad_action;
-};
-
-/* Compute adjusted counts.
- * Input: unique suffix sorted N-grams (and just the N-grams) with raw counts.
- * Output: [1,N]-grams with adjusted counts.
- * [1,N)-grams are in suffix order
- * N-grams are in undefined order (they're going to be sorted anyway).
- */
-class AdjustCounts {
- public:
- // counts: output
- // counts_pruned: output
- // discounts: mostly output. If the input already has entries, they will be kept.
- // prune_thresholds: input. n-grams with normal (not adjusted) count below this will be pruned.
- AdjustCounts(
- const std::vector<uint64_t> &prune_thresholds,
- std::vector<uint64_t> &counts,
- std::vector<uint64_t> &counts_pruned,
- const std::vector<bool> &prune_words,
- const DiscountConfig &discount_config,
- std::vector<Discount> &discounts)
- : prune_thresholds_(prune_thresholds), counts_(counts), counts_pruned_(counts_pruned),
- prune_words_(prune_words), discount_config_(discount_config), discounts_(discounts)
- {}
-
- void Run(const util::stream::ChainPositions &positions);
-
- private:
- const std::vector<uint64_t> &prune_thresholds_;
- std::vector<uint64_t> &counts_;
- std::vector<uint64_t> &counts_pruned_;
- const std::vector<bool> &prune_words_;
-
- DiscountConfig discount_config_;
- std::vector<Discount> &discounts_;
-};
-
-} // namespace builder
-} // namespace lm
-
-#endif // LM_BUILDER_ADJUST_COUNTS_H
-
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts_test.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts_test.cc
deleted file mode 100644
index 073c5df..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/adjust_counts_test.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "lm/builder/adjust_counts.hh"
-
-#include "lm/builder/ngram_stream.hh"
-#include "util/scoped.hh"
-
-#include <boost/thread/thread.hpp>
-#define BOOST_TEST_MODULE AdjustCounts
-#include <boost/test/unit_test.hpp>
-
-namespace lm { namespace builder { namespace {
-
-class KeepCopy {
- public:
- KeepCopy() : size_(0) {}
-
- void Run(const util::stream::ChainPosition &position) {
- for (util::stream::Link link(position); link; ++link) {
- mem_.call_realloc(size_ + link->ValidSize());
- memcpy(static_cast<uint8_t*>(mem_.get()) + size_, link->Get(), link->ValidSize());
- size_ += link->ValidSize();
- }
- }
-
- uint8_t *Get() { return static_cast<uint8_t*>(mem_.get()); }
- std::size_t Size() const { return size_; }
-
- private:
- util::scoped_malloc mem_;
- std::size_t size_;
-};
-
-struct Gram4 {
- WordIndex ids[4];
- uint64_t count;
-};
-
-class WriteInput {
- public:
- void Run(const util::stream::ChainPosition &position) {
- NGramStream input(position);
- Gram4 grams[] = {
- {{0,0,0,0},10},
- {{0,0,3,0},3},
- // bos
- {{1,1,1,2},5},
- {{0,0,3,2},5},
- };
- for (size_t i = 0; i < sizeof(grams) / sizeof(Gram4); ++i, ++input) {
- memcpy(input->begin(), grams[i].ids, sizeof(WordIndex) * 4);
- input->Count() = grams[i].count;
- }
- input.Poison();
- }
-};
-
-BOOST_AUTO_TEST_CASE(Simple) {
- KeepCopy outputs[4];
- std::vector<uint64_t> counts;
- std::vector<Discount> discount;
- {
- util::stream::ChainConfig config;
- config.total_memory = 100;
- config.block_count = 1;
- util::stream::Chains chains(4);
- for (unsigned i = 0; i < 4; ++i) {
- config.entry_size = NGram::TotalSize(i + 1);
- chains.push_back(config);
- }
-
- chains[3] >> WriteInput();
- util::stream::ChainPositions for_adjust(chains);
- for (unsigned i = 0; i < 4; ++i) {
- chains[i] >> boost::ref(outputs[i]);
- }
- chains >> util::stream::kRecycle;
- std::vector<uint64_t> counts_pruned(4);
- std::vector<uint64_t> prune_thresholds(4);
- DiscountConfig discount_config;
- discount_config.fallback = Discount();
- discount_config.bad_action = THROW_UP;
- BOOST_CHECK_THROW(AdjustCounts(prune_thresholds, counts, counts_pruned, discount_config, discount).Run(for_adjust), BadDiscountException);
- }
- BOOST_REQUIRE_EQUAL(4UL, counts.size());
- BOOST_CHECK_EQUAL(4UL, counts[0]);
- // These are no longer set because the discounts are bad.
-/* BOOST_CHECK_EQUAL(4UL, counts[1]);
- BOOST_CHECK_EQUAL(3UL, counts[2]);
- BOOST_CHECK_EQUAL(3UL, counts[3]);*/
- BOOST_REQUIRE_EQUAL(NGram::TotalSize(1) * 4, outputs[0].Size());
- NGram uni(outputs[0].Get(), 1);
- BOOST_CHECK_EQUAL(kUNK, *uni.begin());
- BOOST_CHECK_EQUAL(0ULL, uni.Count());
- uni.NextInMemory();
- BOOST_CHECK_EQUAL(kBOS, *uni.begin());
- BOOST_CHECK_EQUAL(0ULL, uni.Count());
- uni.NextInMemory();
- BOOST_CHECK_EQUAL(0UL, *uni.begin());
- BOOST_CHECK_EQUAL(2ULL, uni.Count());
- uni.NextInMemory();
- BOOST_CHECK_EQUAL(2ULL, uni.Count());
- BOOST_CHECK_EQUAL(2UL, *uni.begin());
-
- BOOST_REQUIRE_EQUAL(NGram::TotalSize(2) * 4, outputs[1].Size());
- NGram bi(outputs[1].Get(), 2);
- BOOST_CHECK_EQUAL(0UL, *bi.begin());
- BOOST_CHECK_EQUAL(0UL, *(bi.begin() + 1));
- BOOST_CHECK_EQUAL(1ULL, bi.Count());
- bi.NextInMemory();
-}
-
-}}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count.cc
deleted file mode 100644
index 7f3dafa..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-#include "lm/builder/corpus_count.hh"
-
-#include "lm/builder/ngram.hh"
-#include "lm/lm_exception.hh"
-#include "lm/vocab.hh"
-#include "lm/word_index.hh"
-#include "util/fake_ofstream.hh"
-#include "util/file.hh"
-#include "util/file_piece.hh"
-#include "util/murmur_hash.hh"
-#include "util/probing_hash_table.hh"
-#include "util/scoped.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/timer.hh"
-#include "util/tokenize_piece.hh"
-
-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-
-#include <functional>
-
-#include <stdint.h>
-
-namespace lm {
-namespace builder {
-namespace {
-
-#pragma pack(push)
-#pragma pack(4)
-struct VocabEntry {
- typedef uint64_t Key;
-
- uint64_t GetKey() const { return key; }
- void SetKey(uint64_t to) { key = to; }
-
- uint64_t key;
- lm::WordIndex value;
-};
-#pragma pack(pop)
-
-class DedupeHash : public std::unary_function<const WordIndex *, bool> {
- public:
- explicit DedupeHash(std::size_t order) : size_(order * sizeof(WordIndex)) {}
-
- std::size_t operator()(const WordIndex *start) const {
- return util::MurmurHashNative(start, size_);
- }
-
- private:
- const std::size_t size_;
-};
-
-class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
- public:
- explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
-
- bool operator()(const WordIndex *first, const WordIndex *second) const {
- return !memcmp(first, second, size_);
- }
-
- private:
- const std::size_t size_;
-};
-
-struct DedupeEntry {
- typedef WordIndex *Key;
- Key GetKey() const { return key; }
- void SetKey(WordIndex *to) { key = to; }
- Key key;
- static DedupeEntry Construct(WordIndex *at) {
- DedupeEntry ret;
- ret.key = at;
- return ret;
- }
-};
-
-
-// TODO: don't have this here, should be with probing hash table defaults?
-const float kProbingMultiplier = 1.5;
-
-typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;
-
-class Writer {
- public:
- Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
- : block_(position), gram_(block_->Get(), order),
- dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
- dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
- buffer_(new WordIndex[order - 1]),
- block_size_(position.GetChain().BlockSize()) {
- dedupe_.Clear();
- assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
- if (order == 1) {
- // Add special words. AdjustCounts is responsible if order != 1.
- AddUnigramWord(kUNK);
- AddUnigramWord(kBOS);
- }
- }
-
- ~Writer() {
- block_->SetValidSize(reinterpret_cast<const uint8_t*>(gram_.begin()) - static_cast<const uint8_t*>(block_->Get()));
- (++block_).Poison();
- }
-
- // Write context with a bunch of <s>
- void StartSentence() {
- for (WordIndex *i = gram_.begin(); i != gram_.end() - 1; ++i) {
- *i = kBOS;
- }
- }
-
- void Append(WordIndex word) {
- *(gram_.end() - 1) = word;
- Dedupe::MutableIterator at;
- bool found = dedupe_.FindOrInsert(DedupeEntry::Construct(gram_.begin()), at);
- if (found) {
- // Already present.
- NGram already(at->key, gram_.Order());
- ++(already.Count());
- // Shift left by one.
- memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
- return;
- }
- // Complete the write.
- gram_.Count() = 1;
- // Prepare the next n-gram.
- if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
- NGram last(gram_);
- gram_.NextInMemory();
- std::copy(last.begin() + 1, last.end(), gram_.begin());
- return;
- }
- // Block end. Need to store the context in a temporary buffer.
- std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
- dedupe_.Clear();
- block_->SetValidSize(block_size_);
- gram_.ReBase((++block_)->Get());
- std::copy(buffer_.get(), buffer_.get() + gram_.Order() - 1, gram_.begin());
- }
-
- private:
- void AddUnigramWord(WordIndex index) {
- *gram_.begin() = index;
- gram_.Count() = 0;
- gram_.NextInMemory();
- if (gram_.Base() == static_cast<uint8_t*>(block_->Get()) + block_size_) {
- block_->SetValidSize(block_size_);
- gram_.ReBase((++block_)->Get());
- }
- }
-
- util::stream::Link block_;
-
- NGram gram_;
-
- // This is the memory behind the invalid value in dedupe_.
- std::vector<WordIndex> dedupe_invalid_;
- // Hash table combiner implementation.
- Dedupe dedupe_;
-
- // Small buffer to hold existing ngrams when shifting across a block boundary.
- boost::scoped_array<WordIndex> buffer_;
-
- const std::size_t block_size_;
-};
-
-} // namespace
-
-float CorpusCount::DedupeMultiplier(std::size_t order) {
- return kProbingMultiplier * static_cast<float>(sizeof(DedupeEntry)) / static_cast<float>(NGram::TotalSize(order));
-}
-
-std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
- return ngram::GrowableVocab<ngram::WriteUniqueWords>::MemUsage(vocab_estimate);
-}
-
-CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::vector<bool> &prune_words, const std::string& prune_vocab_filename, std::size_t entries_per_block, WarningAction disallowed_symbol)
- : from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
- prune_words_(prune_words), prune_vocab_filename_(prune_vocab_filename),
- dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
- dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)),
- disallowed_symbol_action_(disallowed_symbol) {
-}
-
-namespace {
- void ComplainDisallowed(StringPiece word, WarningAction &action) {
- switch (action) {
- case SILENT:
- return;
- case COMPLAIN:
- std::cerr << "Warning: " << word << " appears in the input. All instances of <s>, </s>, and <unk> will be interpreted as whitespace." << std::endl;
- action = SILENT;
- return;
- case THROW_UP:
- UTIL_THROW(FormatLoadException, "Special word " << word << " is not allowed in the corpus. I plan to support models containing <unk> in the future. Pass --skip_symbols to convert these symbols to whitespace.");
- }
- }
-} // namespace
-
-void CorpusCount::Run(const util::stream::ChainPosition &position) {
- ngram::GrowableVocab<ngram::WriteUniqueWords> vocab(type_count_, vocab_write_);
- token_count_ = 0;
- type_count_ = 0;
- const WordIndex end_sentence = vocab.FindOrInsert("</s>");
- Writer writer(NGram::OrderFromSize(position.GetChain().EntrySize()), position, dedupe_mem_.get(), dedupe_mem_size_);
- uint64_t count = 0;
- bool delimiters[256];
- util::BoolCharacter::Build("\0\t\n\r ", delimiters);
- try {
- while(true) {
- StringPiece line(from_.ReadLine());
- writer.StartSentence();
- for (util::TokenIter<util::BoolCharacter, true> w(line, delimiters); w; ++w) {
- WordIndex word = vocab.FindOrInsert(*w);
- if (word <= 2) {
- ComplainDisallowed(*w, disallowed_symbol_action_);
- continue;
- }
- writer.Append(word);
- ++count;
- }
- writer.Append(end_sentence);
- }
- } catch (const util::EndOfFileException &e) {}
- token_count_ = count;
- type_count_ = vocab.Size();
-
- // Create list of unigrams that are supposed to be pruned
- if (!prune_vocab_filename_.empty()) {
- try {
- util::FilePiece prune_vocab_file(prune_vocab_filename_.c_str());
-
- prune_words_.resize(vocab.Size(), true);
- try {
- while (true) {
- StringPiece line(prune_vocab_file.ReadLine());
- for (util::TokenIter<util::BoolCharacter, true> w(line, delimiters); w; ++w)
- prune_words_[vocab.Index(*w)] = false;
- }
- } catch (const util::EndOfFileException &e) {}
-
- // Never prune <unk>, <s>, </s>
- prune_words_[kUNK] = false;
- prune_words_[kBOS] = false;
- prune_words_[kEOS] = false;
-
- } catch (const util::Exception &e) {
- std::cerr << e.what() << std::endl;
- abort();
- }
- }
-}
-
-} // namespace builder
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count.hh
deleted file mode 100644
index d3121ca..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count.hh
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef LM_BUILDER_CORPUS_COUNT_H
-#define LM_BUILDER_CORPUS_COUNT_H
-
-#include "lm/lm_exception.hh"
-#include "lm/word_index.hh"
-#include "util/scoped.hh"
-
-#include <cstddef>
-#include <string>
-#include <stdint.h>
-#include <vector>
-
-namespace util {
-class FilePiece;
-namespace stream {
-class ChainPosition;
-} // namespace stream
-} // namespace util
-
-namespace lm {
-namespace builder {
-
-class CorpusCount {
- public:
- // Memory usage will be DedupeMultipler(order) * block_size + total_chain_size + unknown vocab_hash_size
- static float DedupeMultiplier(std::size_t order);
-
- // How much memory vocabulary will use based on estimated size of the vocab.
- static std::size_t VocabUsage(std::size_t vocab_estimate);
-
- // token_count: out.
- // type_count aka vocabulary size. Initialize to an estimate. It is set to the exact value.
- CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::vector<bool> &prune_words, const std::string& prune_vocab_filename, std::size_t entries_per_block, WarningAction disallowed_symbol);
-
- void Run(const util::stream::ChainPosition &position);
-
- private:
- util::FilePiece &from_;
- int vocab_write_;
- uint64_t &token_count_;
- WordIndex &type_count_;
- std::vector<bool>& prune_words_;
- const std::string& prune_vocab_filename_;
-
- std::size_t dedupe_mem_size_;
- util::scoped_malloc dedupe_mem_;
-
- WarningAction disallowed_symbol_action_;
-};
-
-} // namespace builder
-} // namespace lm
-#endif // LM_BUILDER_CORPUS_COUNT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count_test.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count_test.cc
deleted file mode 100644
index 26cb634..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/corpus_count_test.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-#include "lm/builder/corpus_count.hh"
-
-#include "lm/builder/ngram.hh"
-#include "lm/builder/ngram_stream.hh"
-
-#include "util/file.hh"
-#include "util/file_piece.hh"
-#include "util/tokenize_piece.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/stream.hh"
-
-#define BOOST_TEST_MODULE CorpusCountTest
-#include <boost/test/unit_test.hpp>
-
-namespace lm { namespace builder { namespace {
-
-#define Check(str, count) { \
- BOOST_REQUIRE(stream); \
- w = stream->begin(); \
- for (util::TokenIter<util::AnyCharacter, true> t(str, " "); t; ++t, ++w) { \
- BOOST_CHECK_EQUAL(*t, v[*w]); \
- } \
- BOOST_CHECK_EQUAL((uint64_t)count, stream->Count()); \
- ++stream; \
-}
-
-BOOST_AUTO_TEST_CASE(Short) {
- util::scoped_fd input_file(util::MakeTemp("corpus_count_test_temp"));
- const char input[] = "looking on a little more loin\non a little more loin\non foo little more loin\nbar\n\n";
- // Blocks of 10 are
- // looking on a little more loin </s> on a little[duplicate] more[duplicate] loin[duplicate] </s>[duplicate] on[duplicate] foo
- // little more loin </s> bar </s> </s>
-
- util::WriteOrThrow(input_file.get(), input, sizeof(input) - 1);
- util::FilePiece input_piece(input_file.release(), "temp file");
-
- util::stream::ChainConfig config;
- config.entry_size = NGram::TotalSize(3);
- config.total_memory = config.entry_size * 20;
- config.block_count = 2;
-
- util::scoped_fd vocab(util::MakeTemp("corpus_count_test_vocab"));
-
- util::stream::Chain chain(config);
- NGramStream stream;
- uint64_t token_count;
- WordIndex type_count = 10;
- CorpusCount counter(input_piece, vocab.get(), token_count, type_count, chain.BlockSize() / chain.EntrySize(), SILENT);
- chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
-
- const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};
-
- WordIndex *w;
-
- Check("<s> <s> looking", 1);
- Check("<s> looking on", 1);
- Check("looking on a", 1);
- Check("on a little", 2);
- Check("a little more", 2);
- Check("little more loin", 2);
- Check("more loin </s>", 2);
- Check("<s> <s> on", 2);
- Check("<s> on a", 1);
- Check("<s> on foo", 1);
- Check("on foo little", 1);
- Check("foo little more", 1);
- Check("little more loin", 1);
- Check("more loin </s>", 1);
- Check("<s> <s> bar", 1);
- Check("<s> bar </s>", 1);
- Check("<s> <s> </s>", 1);
- BOOST_CHECK(!stream);
- BOOST_CHECK_EQUAL(sizeof(v) / sizeof(const char*), type_count);
-}
-
-}}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/dump_counts_main.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/dump_counts_main.cc
deleted file mode 100644
index fa00167..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/dump_counts_main.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "lm/builder/print.hh"
-#include "lm/word_index.hh"
-#include "util/file.hh"
-#include "util/read_compressed.hh"
-
-#include <boost/lexical_cast.hpp>
-
-#include <iostream>
-#include <vector>
-
-int main(int argc, char *argv[]) {
- if (argc != 4) {
- std::cerr << "Usage: " << argv[0] << " counts vocabulary order\n"
- "The counts file contains records with 4-byte vocabulary ids followed by 8-byte\n"
- "counts. Each record has order many vocabulary ids.\n"
- "The vocabulary file contains the words delimited by NULL in order of id.\n"
- "The vocabulary file may not be compressed because it is mmapped but the counts\n"
- "file can be compressed.\n";
- return 1;
- }
- util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
- util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
- lm::builder::VocabReconstitute vocab(vocab_file.get());
- unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
- std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
- while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
- UTIL_THROW_IF(got != record.size(), util::Exception, "Read " << got << " bytes at the end of file, which is not a complete record of length " << record.size());
- const lm::WordIndex *words = reinterpret_cast<const lm::WordIndex*>(&*record.begin());
- for (const lm::WordIndex *i = words; i != words + order; ++i) {
- UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ". Are you sure you have the right order and vocab file for these counts?");
- std::cout << vocab.Lookup(*i) << ' ';
- }
- // TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FakeOFStream.
- std::cout << *reinterpret_cast<const uint64_t*>(words + order) << '\n';
- }
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/header_info.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/header_info.hh
deleted file mode 100644
index 1461952..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/header_info.hh
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef LM_BUILDER_HEADER_INFO_H
-#define LM_BUILDER_HEADER_INFO_H
-
-#include <string>
-#include <vector>
-#include <stdint.h>
-
-// Some configuration info that is used to add
-// comments to the beginning of an ARPA file
-struct HeaderInfo {
- std::string input_file;
- uint64_t token_count;
- std::vector<uint64_t> counts_pruned;
-
- HeaderInfo() {}
-
- HeaderInfo(const std::string& input_file_in, uint64_t token_count_in, const std::vector<uint64_t> &counts_pruned_in)
- : input_file(input_file_in), token_count(token_count_in), counts_pruned(counts_pruned_in) {}
-
- // TODO: Add smoothing type
- // TODO: More info if multiple models were interpolated
-};
-
-#endif
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/initial_probabilities.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/initial_probabilities.cc
deleted file mode 100644
index b1dd96f..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/initial_probabilities.cc
+++ /dev/null
@@ -1,297 +0,0 @@
-#include "lm/builder/initial_probabilities.hh"
-
-#include "lm/builder/discount.hh"
-#include "lm/builder/ngram_stream.hh"
-#include "lm/builder/sort.hh"
-#include "lm/builder/hash_gamma.hh"
-#include "util/murmur_hash.hh"
-#include "util/file.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/io.hh"
-#include "util/stream/stream.hh"
-
-#include <vector>
-
-namespace lm { namespace builder {
-
-namespace {
-struct BufferEntry {
- // Gamma from page 20 of Chen and Goodman.
- float gamma;
- // \sum_w a(c w) for all w.
- float denominator;
-};
-
-struct HashBufferEntry : public BufferEntry {
- // Hash value of ngram. Used to join contexts with backoffs.
- uint64_t hash_value;
-};
-
-// Reads all entries in order like NGramStream does.
-// But deletes any entries that have CutoffCount below or equal to pruning
-// threshold.
-class PruneNGramStream {
- public:
- PruneNGramStream(const util::stream::ChainPosition &position) :
- current_(NULL, NGram::OrderFromSize(position.GetChain().EntrySize())),
- dest_(NULL, NGram::OrderFromSize(position.GetChain().EntrySize())),
- currentCount_(0),
- block_(position)
- {
- StartBlock();
- }
-
- NGram &operator*() { return current_; }
- NGram *operator->() { return ¤t_; }
-
- operator bool() const {
- return block_;
- }
-
- PruneNGramStream &operator++() {
- assert(block_);
-
- if(current_.Order() == 1 && *current_.begin() <= 2)
- dest_.NextInMemory();
- else if(currentCount_ > 0) {
- if(dest_.Base() < current_.Base()) {
- memcpy(dest_.Base(), current_.Base(), current_.TotalSize());
- }
- dest_.NextInMemory();
- }
-
- current_.NextInMemory();
-
- uint8_t *block_base = static_cast<uint8_t*>(block_->Get());
- if (current_.Base() == block_base + block_->ValidSize()) {
- block_->SetValidSize(dest_.Base() - block_base);
- ++block_;
- StartBlock();
- if (block_) {
- currentCount_ = current_.CutoffCount();
- }
- } else {
- currentCount_ = current_.CutoffCount();
- }
-
- return *this;
- }
-
- private:
- void StartBlock() {
- for (; ; ++block_) {
- if (!block_) return;
- if (block_->ValidSize()) break;
- }
- current_.ReBase(block_->Get());
- currentCount_ = current_.CutoffCount();
-
- dest_.ReBase(block_->Get());
- }
-
- NGram current_; // input iterator
- NGram dest_; // output iterator
-
- uint64_t currentCount_;
-
- util::stream::Link block_;
-};
-
-// Extract an array of HashedGamma from an array of BufferEntry.
-class OnlyGamma {
- public:
- OnlyGamma(bool pruning) : pruning_(pruning) {}
-
- void Run(const util::stream::ChainPosition &position) {
- for (util::stream::Link block_it(position); block_it; ++block_it) {
- if(pruning_) {
- const HashBufferEntry *in = static_cast<const HashBufferEntry*>(block_it->Get());
- const HashBufferEntry *end = static_cast<const HashBufferEntry*>(block_it->ValidEnd());
-
- // Just make it point to the beginning of the stream so it can be overwritten
- // With HashGamma values. Do not attempt to interpret the values until set below.
- HashGamma *out = static_cast<HashGamma*>(block_it->Get());
- for (; in < end; out += 1, in += 1) {
- // buffering, otherwise might overwrite values too early
- float gamma_buf = in->gamma;
- uint64_t hash_buf = in->hash_value;
-
- out->gamma = gamma_buf;
- out->hash_value = hash_buf;
- }
- block_it->SetValidSize((block_it->ValidSize() * sizeof(HashGamma)) / sizeof(HashBufferEntry));
- }
- else {
- float *out = static_cast<float*>(block_it->Get());
- const float *in = out;
- const float *end = static_cast<const float*>(block_it->ValidEnd());
- for (out += 1, in += 2; in < end; out += 1, in += 2) {
- *out = *in;
- }
- block_it->SetValidSize(block_it->ValidSize() / 2);
- }
- }
- }
-
- private:
- bool pruning_;
-};
-
-class AddRight {
- public:
- AddRight(const Discount &discount, const util::stream::ChainPosition &input, bool pruning)
- : discount_(discount), input_(input), pruning_(pruning) {}
-
- void Run(const util::stream::ChainPosition &output) {
- NGramStream in(input_);
- util::stream::Stream out(output);
-
- std::vector<WordIndex> previous(in->Order() - 1);
- // Silly windows requires this workaround to just get an invalid pointer when empty.
- void *const previous_raw = previous.empty() ? NULL : static_cast<void*>(&previous[0]);
- const std::size_t size = sizeof(WordIndex) * previous.size();
-
- for(; in; ++out) {
- memcpy(previous_raw, in->begin(), size);
- uint64_t denominator = 0;
- uint64_t normalizer = 0;
-
- uint64_t counts[4];
- memset(counts, 0, sizeof(counts));
- do {
- denominator += in->UnmarkedCount();
-
- // Collect unused probability mass from pruning.
- // Becomes 0 for unpruned ngrams.
- normalizer += in->UnmarkedCount() - in->CutoffCount();
-
- // Chen&Goodman do not mention counting based on cutoffs, but
- // backoff becomes larger than 1 otherwise, so probably needs
- // to count cutoffs. Counts normally without pruning.
- if(in->CutoffCount() > 0)
- ++counts[std::min(in->CutoffCount(), static_cast<uint64_t>(3))];
-
- } while (++in && !memcmp(previous_raw, in->begin(), size));
-
- BufferEntry &entry = *reinterpret_cast<BufferEntry*>(out.Get());
- entry.denominator = static_cast<float>(denominator);
- entry.gamma = 0.0;
- for (unsigned i = 1; i <= 3; ++i) {
- entry.gamma += discount_.Get(i) * static_cast<float>(counts[i]);
- }
-
- // Makes model sum to 1 with pruning (I hope).
- entry.gamma += normalizer;
-
- entry.gamma /= entry.denominator;
-
- if(pruning_) {
- // If pruning is enabled the stream actually contains HashBufferEntry, see InitialProbabilities(...),
- // so add a hash value that identifies the current ngram.
- static_cast<HashBufferEntry*>(&entry)->hash_value = util::MurmurHashNative(previous_raw, size);
- }
- }
- out.Poison();
- }
-
- private:
- const Discount &discount_;
- const util::stream::ChainPosition input_;
- bool pruning_;
-};
-
-class MergeRight {
- public:
- MergeRight(bool interpolate_unigrams, const util::stream::ChainPosition &from_adder, const Discount &discount)
- : interpolate_unigrams_(interpolate_unigrams), from_adder_(from_adder), discount_(discount) {}
-
- // calculate the initial probability of each n-gram (before order-interpolation)
- // Run() gets invoked once for each order
- void Run(const util::stream::ChainPosition &primary) {
- util::stream::Stream summed(from_adder_);
-
- PruneNGramStream grams(primary);
-
- // Without interpolation, the interpolation weight goes to <unk>.
- if (grams->Order() == 1) {
- BufferEntry sums(*static_cast<const BufferEntry*>(summed.Get()));
- // Special case for <unk>
- assert(*grams->begin() == kUNK);
- float gamma_assign;
- if (interpolate_unigrams_) {
- // Default: treat <unk> like a zeroton.
- gamma_assign = sums.gamma;
- grams->Value().uninterp.prob = 0.0;
- } else {
- // SRI: give all the interpolation mass to <unk>
- gamma_assign = 0.0;
- grams->Value().uninterp.prob = sums.gamma;
- }
- grams->Value().uninterp.gamma = gamma_assign;
- ++grams;
-
- // Special case for <s>: probability 1.0. This allows <s> to be
- // explicitly scores as part of the sentence without impacting
- // probability and computes q correctly as b(<s>).
- assert(*grams->begin() == kBOS);
- grams->Value().uninterp.prob = 1.0;
- grams->Value().uninterp.gamma = 0.0;
-
- while (++grams) {
- grams->Value().uninterp.prob = discount_.Apply(grams->Count()) / sums.denominator;
- grams->Value().uninterp.gamma = gamma_assign;
- }
- ++summed;
- return;
- }
-
- std::vector<WordIndex> previous(grams->Order() - 1);
- const std::size_t size = sizeof(WordIndex) * previous.size();
- for (; grams; ++summed) {
- memcpy(&previous[0], grams->begin(), size);
- const BufferEntry &sums = *static_cast<const BufferEntry*>(summed.Get());
-
- do {
- Payload &pay = grams->Value();
- pay.uninterp.prob = discount_.Apply(grams->UnmarkedCount()) / sums.denominator;
- pay.uninterp.gamma = sums.gamma;
- } while (++grams && !memcmp(&previous[0], grams->begin(), size));
- }
- }
-
- private:
- bool interpolate_unigrams_;
- util::stream::ChainPosition from_adder_;
- Discount discount_;
-};
-
-} // namespace
-
-void InitialProbabilities(
- const InitialProbabilitiesConfig &config,
- const std::vector<Discount> &discounts,
- util::stream::Chains &primary,
- util::stream::Chains &second_in,
- util::stream::Chains &gamma_out,
- const std::vector<uint64_t> &prune_thresholds,
- bool prune_vocab) {
- for (size_t i = 0; i < primary.size(); ++i) {
- util::stream::ChainConfig gamma_config = config.adder_out;
- if(prune_vocab || prune_thresholds[i] > 0)
- gamma_config.entry_size = sizeof(HashBufferEntry);
- else
- gamma_config.entry_size = sizeof(BufferEntry);
-
- util::stream::ChainPosition second(second_in[i].Add());
- second_in[i] >> util::stream::kRecycle;
- gamma_out.push_back(gamma_config);
- gamma_out[i] >> AddRight(discounts[i], second, prune_vocab || prune_thresholds[i] > 0);
-
- primary[i] >> MergeRight(config.interpolate_unigrams, gamma_out[i].Add(), discounts[i]);
-
- // Don't bother with the OnlyGamma thread for something to discard.
- if (i) gamma_out[i] >> OnlyGamma(prune_vocab || prune_thresholds[i] > 0);
- }
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/initial_probabilities.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/initial_probabilities.hh
deleted file mode 100644
index 57e09cd..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/initial_probabilities.hh
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef LM_BUILDER_INITIAL_PROBABILITIES_H
-#define LM_BUILDER_INITIAL_PROBABILITIES_H
-
-#include "lm/builder/discount.hh"
-#include "util/stream/config.hh"
-
-#include <vector>
-
-namespace util { namespace stream { class Chains; } }
-
-namespace lm {
-namespace builder {
-
-struct InitialProbabilitiesConfig {
- // These should be small buffers to keep the adder from getting too far ahead
- util::stream::ChainConfig adder_in;
- util::stream::ChainConfig adder_out;
- // SRILM doesn't normally interpolate unigrams.
- bool interpolate_unigrams;
-};
-
-/* Compute initial (uninterpolated) probabilities
- * primary: the normal chain of n-grams. Incoming is context sorted adjusted
- * counts. Outgoing has uninterpolated probabilities for use by Interpolate.
- * second_in: a second copy of the primary input. Discard the output.
- * gamma_out: Computed gamma values are output on these chains in suffix order.
- * The values are bare floats and should be buffered for interpolation to
- * use.
- */
-void InitialProbabilities(
- const InitialProbabilitiesConfig &config,
- const std::vector<Discount> &discounts,
- util::stream::Chains &primary,
- util::stream::Chains &second_in,
- util::stream::Chains &gamma_out,
- const std::vector<uint64_t> &prune_thresholds,
- bool prune_vocab);
-
-} // namespace builder
-} // namespace lm
-
-#endif // LM_BUILDER_INITIAL_PROBABILITIES_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/interpolate.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/interpolate.cc
deleted file mode 100644
index 7de7852..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/interpolate.cc
+++ /dev/null
@@ -1,160 +0,0 @@
-#include "lm/builder/interpolate.hh"
-
-#include "lm/builder/hash_gamma.hh"
-#include "lm/builder/joint_order.hh"
-#include "lm/builder/ngram_stream.hh"
-#include "lm/builder/sort.hh"
-#include "lm/lm_exception.hh"
-#include "util/fixed_array.hh"
-#include "util/murmur_hash.hh"
-
-#include <assert.h>
-#include <math.h>
-
-namespace lm { namespace builder {
-namespace {
-
-/* Calculate q, the collapsed probability and backoff, as defined in
- * @inproceedings{Heafield-rest,
- * author = {Kenneth Heafield and Philipp Koehn and Alon Lavie},
- * title = {Language Model Rest Costs and Space-Efficient Storage},
- * year = {2012},
- * month = {July},
- * booktitle = {Proceedings of the Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
- * address = {Jeju Island, Korea},
- * pages = {1169--1178},
- * url = {http://kheafield.com/professional/edinburgh/rest\_paper.pdf},
- * }
- * This is particularly convenient to calculate during interpolation because
- * the needed backoff terms are already accessed at the same time.
- */
-class OutputQ {
- public:
- explicit OutputQ(std::size_t order) : q_delta_(order) {}
-
- void Gram(unsigned order_minus_1, float full_backoff, ProbBackoff &out) {
- float &q_del = q_delta_[order_minus_1];
- if (order_minus_1) {
- // Divide by context's backoff (which comes in as out.backoff)
- q_del = q_delta_[order_minus_1 - 1] / out.backoff * full_backoff;
- } else {
- q_del = full_backoff;
- }
- out.prob = log10f(out.prob * q_del);
- // TODO: stop wastefully outputting this!
- out.backoff = 0.0;
- }
-
- private:
- // Product of backoffs in the numerator divided by backoffs in the
- // denominator. Does not include
- std::vector<float> q_delta_;
-};
-
-/* Default: output probability and backoff */
-class OutputProbBackoff {
- public:
- explicit OutputProbBackoff(std::size_t /*order*/) {}
-
- void Gram(unsigned /*order_minus_1*/, float full_backoff, ProbBackoff &out) const {
- // Correcting for numerical precision issues. Take that IRST.
- out.prob = std::min(0.0f, log10f(out.prob));
- out.backoff = log10f(full_backoff);
- }
-};
-
-template <class Output> class Callback {
- public:
- Callback(float uniform_prob, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab)
- : backoffs_(backoffs.size()), probs_(backoffs.size() + 2),
- prune_thresholds_(prune_thresholds),
- prune_vocab_(prune_vocab),
- output_(backoffs.size() + 1 /* order */) {
- probs_[0] = uniform_prob;
- for (std::size_t i = 0; i < backoffs.size(); ++i) {
- backoffs_.push_back(backoffs[i]);
- }
- }
-
- ~Callback() {
- for (std::size_t i = 0; i < backoffs_.size(); ++i) {
- if(prune_vocab_ || prune_thresholds_[i + 1] > 0)
- while(backoffs_[i])
- ++backoffs_[i];
-
- if (backoffs_[i]) {
- std::cerr << "Backoffs do not match for order " << (i + 1) << std::endl;
- abort();
- }
- }
- }
-
- void Enter(unsigned order_minus_1, NGram &gram) {
- Payload &pay = gram.Value();
- pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
- probs_[order_minus_1 + 1] = pay.complete.prob;
-
- float out_backoff;
- if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS && backoffs_[order_minus_1]) {
- if(prune_vocab_ || prune_thresholds_[order_minus_1 + 1] > 0) {
- //Compute hash value for current context
- uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex));
-
- const HashGamma *hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
- while(current_hash != hashed_backoff->hash_value && ++backoffs_[order_minus_1])
- hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
-
- if(current_hash == hashed_backoff->hash_value) {
- out_backoff = hashed_backoff->gamma;
- ++backoffs_[order_minus_1];
- } else {
- // Has been pruned away so it is not a context anymore
- out_backoff = 1.0;
- }
- } else {
- out_backoff = *static_cast<const float*>(backoffs_[order_minus_1].Get());
- ++backoffs_[order_minus_1];
- }
- } else {
- // Not a context.
- out_backoff = 1.0;
- }
-
- output_.Gram(order_minus_1, out_backoff, pay.complete);
- }
-
- void Exit(unsigned, const NGram &) const {}
-
- private:
- util::FixedArray<util::stream::Stream> backoffs_;
-
- std::vector<float> probs_;
- const std::vector<uint64_t>& prune_thresholds_;
- bool prune_vocab_;
-
- Output output_;
-};
-} // namespace
-
-Interpolate::Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t>& prune_thresholds, bool prune_vocab, bool output_q)
- : uniform_prob_(1.0 / static_cast<float>(vocab_size)), // Includes <unk> but excludes <s>.
- backoffs_(backoffs),
- prune_thresholds_(prune_thresholds),
- prune_vocab_(prune_vocab),
- output_q_(output_q) {}
-
-// perform order-wise interpolation
-void Interpolate::Run(const util::stream::ChainPositions &positions) {
- assert(positions.size() == backoffs_.size() + 1);
- if (output_q_) {
- typedef Callback<OutputQ> C;
- C callback(uniform_prob_, backoffs_, prune_thresholds_, prune_vocab_);
- JointOrder<C, SuffixOrder>(positions, callback);
- } else {
- typedef Callback<OutputProbBackoff> C;
- C callback(uniform_prob_, backoffs_, prune_thresholds_, prune_vocab_);
- JointOrder<C, SuffixOrder>(positions, callback);
- }
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/interpolate.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/interpolate.hh
deleted file mode 100644
index adfd919..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/interpolate.hh
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef LM_BUILDER_INTERPOLATE_H
-#define LM_BUILDER_INTERPOLATE_H
-
-#include "util/stream/multi_stream.hh"
-
-#include <vector>
-
-#include <stdint.h>
-
-namespace lm { namespace builder {
-
-/* Interpolate step.
- * Input: suffix sorted n-grams with (p_uninterpolated, gamma) from
- * InitialProbabilities.
- * Output: suffix sorted n-grams with complete probability
- */
-class Interpolate {
- public:
- // Normally vocab_size is the unigram count-1 (since p(<s>) = 0) but might
- // be larger when the user specifies a consistent vocabulary size.
- explicit Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, bool output_q_);
-
- void Run(const util::stream::ChainPositions &positions);
-
- private:
- float uniform_prob_;
- util::stream::ChainPositions backoffs_;
- const std::vector<uint64_t> prune_thresholds_;
- bool prune_vocab_;
- bool output_q_;
-};
-
-}} // namespaces
-#endif // LM_BUILDER_INTERPOLATE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/joint_order.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/joint_order.hh
deleted file mode 100644
index 9ed8909..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/joint_order.hh
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef LM_BUILDER_JOINT_ORDER_H
-#define LM_BUILDER_JOINT_ORDER_H
-
-#include "lm/builder/ngram_stream.hh"
-#include "lm/lm_exception.hh"
-
-#ifdef DEBUG
-#include "util/fixed_array.hh"
-#include <iostream>
-#endif
-
-#include <string.h>
-
-namespace lm { namespace builder {
-
-template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
- // Allow matching to reference streams[-1].
- NGramStreams streams_with_dummy;
- streams_with_dummy.InitWithDummy(positions);
- NGramStream *streams = streams_with_dummy.begin() + 1;
-
- unsigned int order;
- for (order = 0; order < positions.size() && streams[order]; ++order) {}
- assert(order); // should always have <unk>.
-
- // Debugging only: call comparison function to sanity check order.
-#ifdef DEBUG
- util::FixedArray<Compare> less_compare(order);
- for (unsigned i = 0; i < order; ++i)
- less_compare.push_back(i + 1);
-#endif // DEBUG
-
- unsigned int current = 0;
- while (true) {
- // Does the context match the lower one?
- if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
- callback.Enter(current, *streams[current]);
- // Transition to looking for extensions.
- if (++current < order) continue;
- }
-#ifdef DEBUG
- // match_check[current - 1] matches current-grams
- // The lower-order stream (which skips fewer current-grams) should always be <= the higher order-stream (which can skip current-grams).
- else if (!less_compare[current - 1](streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset)) {
- std::cerr << "Stream out of order detected" << std::endl;
- abort();
- }
-#endif // DEBUG
- // No extension left.
- while(true) {
- assert(current > 0);
- --current;
- callback.Exit(current, *streams[current]);
-
- if (++streams[current]) break;
-
- UTIL_THROW_IF(order != current + 1, FormatLoadException, "Detected n-gram without matching suffix");
-
- order = current;
- if (!order) return;
- }
- }
-}
-
-}} // namespaces
-
-#endif // LM_BUILDER_JOINT_ORDER_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/lmplz_main.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/lmplz_main.cc
deleted file mode 100644
index d3bd99d..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/lmplz_main.cc
+++ /dev/null
@@ -1,228 +0,0 @@
-#include "lm/builder/output.hh"
-#include "lm/builder/pipeline.hh"
-#include "lm/builder/print.hh"
-#include "lm/lm_exception.hh"
-#include "util/file.hh"
-#include "util/file_piece.hh"
-#include "util/usage.hh"
-
-#include <iostream>
-
-#include <boost/program_options.hpp>
-#include <boost/version.hpp>
-#include <vector>
-
-namespace {
-class SizeNotify {
- public:
- SizeNotify(std::size_t &out) : behind_(out) {}
-
- void operator()(const std::string &from) {
- behind_ = util::ParseSize(from);
- }
-
- private:
- std::size_t &behind_;
-};
-
-boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
- return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
-}
-
-// Parse and validate pruning thresholds then return vector of threshold counts
-// for each n-grams order.
-std::vector<uint64_t> ParsePruning(const std::vector<std::string> ¶m, std::size_t order) {
- // convert to vector of integers
- std::vector<uint64_t> prune_thresholds;
- prune_thresholds.reserve(order);
- for (std::vector<std::string>::const_iterator it(param.begin()); it != param.end(); ++it) {
- try {
- prune_thresholds.push_back(boost::lexical_cast<uint64_t>(*it));
- } catch(const boost::bad_lexical_cast &) {
- UTIL_THROW(util::Exception, "Bad pruning threshold " << *it);
- }
- }
-
- // Fill with zeros by default.
- if (prune_thresholds.empty()) {
- prune_thresholds.resize(order, 0);
- return prune_thresholds;
- }
-
- // validate pruning threshold if specified
- // throw if each n-gram order has not threshold specified
- UTIL_THROW_IF(prune_thresholds.size() > order, util::Exception, "You specified pruning thresholds for orders 1 through " << prune_thresholds.size() << " but the model only has order " << order);
- // threshold for unigram can only be 0 (no pruning)
-
- // check if threshold are not in decreasing order
- uint64_t lower_threshold = 0;
- for (std::vector<uint64_t>::iterator it = prune_thresholds.begin(); it != prune_thresholds.end(); ++it) {
- UTIL_THROW_IF(lower_threshold > *it, util::Exception, "Pruning thresholds should be in non-decreasing order. Otherwise substrings would be removed, which is bad for query-time data structures.");
- lower_threshold = *it;
- }
-
- // Pad to all orders using the last value.
- prune_thresholds.resize(order, prune_thresholds.back());
- return prune_thresholds;
-}
-
-lm::builder::Discount ParseDiscountFallback(const std::vector<std::string> ¶m) {
- lm::builder::Discount ret;
- UTIL_THROW_IF(param.size() > 3, util::Exception, "Specify at most three fallback discounts: 1, 2, and 3+");
- UTIL_THROW_IF(param.empty(), util::Exception, "Fallback discounting enabled, but no discount specified");
- ret.amount[0] = 0.0;
- for (unsigned i = 0; i < 3; ++i) {
- float discount = boost::lexical_cast<float>(param[i < param.size() ? i : (param.size() - 1)]);
- UTIL_THROW_IF(discount < 0.0 || discount > static_cast<float>(i+1), util::Exception, "The discount for count " << (i+1) << " was parsed as " << discount << " which is not in the range [0, " << (i+1) << "].");
- ret.amount[i + 1] = discount;
- }
- return ret;
-}
-
-} // namespace
-
-int main(int argc, char *argv[]) {
- try {
- namespace po = boost::program_options;
- po::options_description options("Language model building options");
- lm::builder::PipelineConfig pipeline;
-
- std::string text, arpa;
- std::vector<std::string> pruning;
- std::vector<std::string> discount_fallback;
- std::vector<std::string> discount_fallback_default;
- discount_fallback_default.push_back("0.5");
- discount_fallback_default.push_back("1");
- discount_fallback_default.push_back("1.5");
- bool verbose_header;
-
- options.add_options()
- ("help,h", po::bool_switch(), "Show this help message")
- ("order,o", po::value<std::size_t>(&pipeline.order)
-#if BOOST_VERSION >= 104200
- ->required()
-#endif
- , "Order of the model")
- ("interpolate_unigrams", po::value<bool>(&pipeline.initial_probs.interpolate_unigrams)->default_value(true)->implicit_value(true), "Interpolate the unigrams (default) as opposed to giving lots of mass to <unk> like SRI. If you want SRI's behavior with a large <unk> and the old lmplz default, use --interpolate_unigrams 0.")
- ("skip_symbols", po::bool_switch(), "Treat <s>, </s>, and <unk> as whitespace instead of throwing an exception")
- ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
- ("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
- ("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
- ("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
- ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
- ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
- ("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write a file containing the unique vocabulary strings delimited by null bytes")
- ("vocab_pad", po::value<uint64_t>(&pipeline.vocab_size_for_unk)->default_value(0), "If the vocabulary is smaller than this value, pad with <unk> to reach this size. Requires --interpolate_unigrams")
- ("verbose_header", po::bool_switch(&verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
- ("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
- ("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout")
- ("collapse_values", po::bool_switch(&pipeline.output_q), "Collapse probability and backoff into a single value, q that yields the same sentence-level probabilities. See http://kheafield.com/professional/edinburgh/rest_paper.pdf for more details, including a proof.")
- ("prune", po::value<std::vector<std::string> >(&pruning)->multitoken(), "Prune n-grams with count less than or equal to the given threshold. Specify one value for each order i.e. 0 0 1 to prune singleton trigrams and above. The sequence of values must be non-decreasing and the last value applies to any remaining orders. Default is to not prune, which is equivalent to --prune 0.")
- ("limit_vocab_file", po::value<std::string>(&pipeline.prune_vocab_file)->default_value(""), "Read allowed vocabulary separated by whitespace. N-grams that contain vocabulary items not in this list will be pruned. Can be combined with --prune arg")
- ("discount_fallback", po::value<std::vector<std::string> >(&discount_fallback)->multitoken()->implicit_value(discount_fallback_default, "0.5 1 1.5"), "The closed-form estimate for Kneser-Ney discounts does not work without singletons or doubletons. It can also fail if these values are out of range. This option falls back to user-specified discounts when the closed-form estimate fails. Note that this option is generally a bad idea: you should deduplicate your corpus instead. However, class-based models need custom discounts because they lack singleton unigrams. Provide up to three discounts (for adjusted counts 1, 2, and 3+), which will be applied to all orders where the closed-form estimates fail.");
- po::variables_map vm;
- po::store(po::parse_command_line(argc, argv, options), vm);
-
- if (argc == 1 || vm["help"].as<bool>()) {
- std::cerr <<
- "Builds unpruned language models with modified Kneser-Ney smoothing.\n\n"
- "Please cite:\n"
- "@inproceedings{Heafield-estimate,\n"
- " author = {Kenneth Heafield and Ivan Pouzyrevsky and Jonathan H. Clark and Philipp Koehn},\n"
- " title = {Scalable Modified {Kneser-Ney} Language Model Estimation},\n"
- " year = {2013},\n"
- " month = {8},\n"
- " booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics},\n"
- " address = {Sofia, Bulgaria},\n"
- " url = {http://kheafield.com/professional/edinburgh/estimate\\_paper.pdf},\n"
- "}\n\n"
- "Provide the corpus on stdin. The ARPA file will be written to stdout. Order of\n"
- "the model (-o) is the only mandatory option. As this is an on-disk program,\n"
- "setting the temporary file location (-T) and sorting memory (-S) is recommended.\n\n"
- "Memory sizes are specified like GNU sort: a number followed by a unit character.\n"
- "Valid units are \% for percentage of memory (supported platforms only) and (in\n"
- "increasing powers of 1024): b, K, M, G, T, P, E, Z, Y. Default is K (*1024).\n";
- uint64_t mem = util::GuessPhysicalMemory();
- if (mem) {
- std::cerr << "This machine has " << mem << " bytes of memory.\n\n";
- } else {
- std::cerr << "Unable to determine the amount of memory on this machine.\n\n";
- }
- std::cerr << options << std::endl;
- return 1;
- }
-
- po::notify(vm);
-
- // required() appeared in Boost 1.42.0.
-#if BOOST_VERSION < 104200
- if (!vm.count("order")) {
- std::cerr << "the option '--order' is required but missing" << std::endl;
- return 1;
- }
-#endif
-
- if (pipeline.vocab_size_for_unk && !pipeline.initial_probs.interpolate_unigrams) {
- std::cerr << "--vocab_pad requires --interpolate_unigrams be on" << std::endl;
- return 1;
- }
-
- if (vm["skip_symbols"].as<bool>()) {
- pipeline.disallowed_symbol_action = lm::COMPLAIN;
- } else {
- pipeline.disallowed_symbol_action = lm::THROW_UP;
- }
-
- if (vm.count("discount_fallback")) {
- pipeline.discount.fallback = ParseDiscountFallback(discount_fallback);
- pipeline.discount.bad_action = lm::COMPLAIN;
- } else {
- // Unused, just here to prevent the compiler from complaining about uninitialized.
- pipeline.discount.fallback = lm::builder::Discount();
- pipeline.discount.bad_action = lm::THROW_UP;
- }
-
- // parse pruning thresholds. These depend on order, so it is not done as a notifier.
- pipeline.prune_thresholds = ParsePruning(pruning, pipeline.order);
-
- if (!vm["limit_vocab_file"].as<std::string>().empty()) {
- pipeline.prune_vocab = true;
- }
- else {
- pipeline.prune_vocab = false;
- }
-
- util::NormalizeTempPrefix(pipeline.sort.temp_prefix);
-
- lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs;
- // TODO: evaluate options for these.
- initial.adder_in.total_memory = 32768;
- initial.adder_in.block_count = 2;
- initial.adder_out.total_memory = 32768;
- initial.adder_out.block_count = 2;
- pipeline.read_backoffs = initial.adder_out;
-
- util::scoped_fd in(0), out(1);
- if (vm.count("text")) {
- in.reset(util::OpenReadOrThrow(text.c_str()));
- }
- if (vm.count("arpa")) {
- out.reset(util::CreateOrThrow(arpa.c_str()));
- }
-
- // Read from stdin
- try {
- lm::builder::Output output;
- output.Add(new lm::builder::PrintARPA(out.release(), verbose_header));
- lm::builder::Pipeline(pipeline, in.release(), output);
- } catch (const util::MallocException &e) {
- std::cerr << e.what() << std::endl;
- std::cerr << "Try rerunning with a more conservative -S setting than " << vm["memory"].as<std::string>() << std::endl;
- return 1;
- }
- util::PrintUsage(std::cerr);
- } catch (const std::exception &e) {
- std::cerr << e.what() << std::endl;
- return 1;
- }
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/multi_stream.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/multi_stream.hh
deleted file mode 100644
index 707a98c..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/multi_stream.hh
+++ /dev/null
@@ -1,180 +0,0 @@
-#ifndef LM_BUILDER_MULTI_STREAM__
-#define LM_BUILDER_MULTI_STREAM__
-
-#include "lm/builder/ngram_stream.hh"
-#include "util/scoped.hh"
-#include "util/stream/chain.hh"
-
-#include <cstddef>
-#include <new>
-
-#include <assert.h>
-#include <stdlib.h>
-
-namespace lm { namespace builder {
-
-template <class T> class FixedArray {
- public:
- explicit FixedArray(std::size_t count) {
- Init(count);
- }
-
- FixedArray() : newed_end_(NULL) {}
-
- void Init(std::size_t count) {
- assert(!block_.get());
- block_.reset(malloc(sizeof(T) * count));
- if (!block_.get()) throw std::bad_alloc();
- newed_end_ = begin();
- }
-
- FixedArray(const FixedArray &from) {
- std::size_t size = from.newed_end_ - static_cast<const T*>(from.block_.get());
- Init(size);
- for (std::size_t i = 0; i < size; ++i) {
- new(end()) T(from[i]);
- Constructed();
- }
- }
-
- ~FixedArray() { clear(); }
-
- T *begin() { return static_cast<T*>(block_.get()); }
- const T *begin() const { return static_cast<const T*>(block_.get()); }
- // Always call Constructed after successful completion of new.
- T *end() { return newed_end_; }
- const T *end() const { return newed_end_; }
-
- T &back() { return *(end() - 1); }
- const T &back() const { return *(end() - 1); }
-
- std::size_t size() const { return end() - begin(); }
- bool empty() const { return begin() == end(); }
-
- T &operator[](std::size_t i) { return begin()[i]; }
- const T &operator[](std::size_t i) const { return begin()[i]; }
-
- template <class C> void push_back(const C &c) {
- new (end()) T(c);
- Constructed();
- }
-
- void clear() {
- for (T *i = begin(); i != end(); ++i)
- i->~T();
- newed_end_ = begin();
- }
-
- protected:
- void Constructed() {
- ++newed_end_;
- }
-
- private:
- util::scoped_malloc block_;
-
- T *newed_end_;
-};
-
-class Chains;
-
-class ChainPositions : public FixedArray<util::stream::ChainPosition> {
- public:
- ChainPositions() {}
-
- void Init(Chains &chains);
-
- explicit ChainPositions(Chains &chains) {
- Init(chains);
- }
-};
-
-class Chains : public FixedArray<util::stream::Chain> {
- private:
- template <class T, void (T::*ptr)(const ChainPositions &) = &T::Run> struct CheckForRun {
- typedef Chains type;
- };
-
- public:
- explicit Chains(std::size_t limit) : FixedArray<util::stream::Chain>(limit) {}
-
- template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
- threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
- return *this;
- }
-
- template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
- threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
- return *this;
- }
-
- Chains &operator>>(const util::stream::Recycler &recycler) {
- for (util::stream::Chain *i = begin(); i != end(); ++i)
- *i >> recycler;
- return *this;
- }
-
- void Wait(bool release_memory = true) {
- threads_.clear();
- for (util::stream::Chain *i = begin(); i != end(); ++i) {
- i->Wait(release_memory);
- }
- }
-
- private:
- boost::ptr_vector<util::stream::Thread> threads_;
-
- Chains(const Chains &);
- void operator=(const Chains &);
-};
-
-inline void ChainPositions::Init(Chains &chains) {
- FixedArray<util::stream::ChainPosition>::Init(chains.size());
- for (util::stream::Chain *i = chains.begin(); i != chains.end(); ++i) {
- new (end()) util::stream::ChainPosition(i->Add()); Constructed();
- }
-}
-
-inline Chains &operator>>(Chains &chains, ChainPositions &positions) {
- positions.Init(chains);
- return chains;
-}
-
-class NGramStreams : public FixedArray<NGramStream> {
- public:
- NGramStreams() {}
-
- // This puts a dummy NGramStream at the beginning (useful to algorithms that need to reference something at the beginning).
- void InitWithDummy(const ChainPositions &positions) {
- FixedArray<NGramStream>::Init(positions.size() + 1);
- new (end()) NGramStream(); Constructed();
- for (const util::stream::ChainPosition *i = positions.begin(); i != positions.end(); ++i) {
- push_back(*i);
- }
- }
-
- // Limit restricts to positions[0,limit)
- void Init(const ChainPositions &positions, std::size_t limit) {
- FixedArray<NGramStream>::Init(limit);
- for (const util::stream::ChainPosition *i = positions.begin(); i != positions.begin() + limit; ++i) {
- push_back(*i);
- }
- }
- void Init(const ChainPositions &positions) {
- Init(positions, positions.size());
- }
-
- NGramStreams(const ChainPositions &positions) {
- Init(positions);
- }
-};
-
-inline Chains &operator>>(Chains &chains, NGramStreams &streams) {
- ChainPositions positions;
- chains >> positions;
- streams.Init(positions);
- return chains;
-}
-
-}} // namespaces
-#endif // LM_BUILDER_MULTI_STREAM__
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/ngram.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/ngram.hh
deleted file mode 100644
index 0472bcb..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/ngram.hh
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef LM_BUILDER_NGRAM_H
-#define LM_BUILDER_NGRAM_H
-
-#include "lm/weights.hh"
-#include "lm/word_index.hh"
-
-#include <cstddef>
-
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-
-namespace lm {
-namespace builder {
-
-struct Uninterpolated {
- float prob; // Uninterpolated probability.
- float gamma; // Interpolation weight for lower order.
-};
-
-union Payload {
- uint64_t count;
- Uninterpolated uninterp;
- ProbBackoff complete;
-};
-
-class NGram {
- public:
- NGram(void *begin, std::size_t order)
- : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}
-
- const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
- uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }
-
- void ReBase(void *to) {
- std::size_t difference = end_ - begin_;
- begin_ = reinterpret_cast<WordIndex*>(to);
- end_ = begin_ + difference;
- }
-
- // Would do operator++ but that can get confusing for a stream.
- void NextInMemory() {
- ReBase(&Value() + 1);
- }
-
- // Lower-case in deference to STL.
- const WordIndex *begin() const { return begin_; }
- WordIndex *begin() { return begin_; }
- const WordIndex *end() const { return end_; }
- WordIndex *end() { return end_; }
-
- const Payload &Value() const { return *reinterpret_cast<const Payload *>(end_); }
- Payload &Value() { return *reinterpret_cast<Payload *>(end_); }
-
- uint64_t &Count() { return Value().count; }
- uint64_t Count() const { return Value().count; }
-
- std::size_t Order() const { return end_ - begin_; }
-
- static std::size_t TotalSize(std::size_t order) {
- return order * sizeof(WordIndex) + sizeof(Payload);
- }
- std::size_t TotalSize() const {
- // Compiler should optimize this.
- return TotalSize(Order());
- }
- static std::size_t OrderFromSize(std::size_t size) {
- std::size_t ret = (size - sizeof(Payload)) / sizeof(WordIndex);
- assert(size == TotalSize(ret));
- return ret;
- }
-
- // manipulate msb to signal that ngram can be pruned
- /*mjd**********************************************************************/
-
- bool IsMarked() const {
- return Value().count >> (sizeof(Value().count) * 8 - 1);
- }
-
- void Mark() {
- Value().count |= (1ul << (sizeof(Value().count) * 8 - 1));
- }
-
- void Unmark() {
- Value().count &= ~(1ul << (sizeof(Value().count) * 8 - 1));
- }
-
- uint64_t UnmarkedCount() const {
- return Value().count & ~(1ul << (sizeof(Value().count) * 8 - 1));
- }
-
- uint64_t CutoffCount() const {
- return IsMarked() ? 0 : UnmarkedCount();
- }
-
- /*mjd**********************************************************************/
-
- private:
- WordIndex *begin_, *end_;
-};
-
-const WordIndex kUNK = 0;
-const WordIndex kBOS = 1;
-const WordIndex kEOS = 2;
-
-} // namespace builder
-} // namespace lm
-
-#endif // LM_BUILDER_NGRAM_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/ngram_stream.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/ngram_stream.hh
deleted file mode 100644
index ab42734..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/ngram_stream.hh
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef LM_BUILDER_NGRAM_STREAM_H
-#define LM_BUILDER_NGRAM_STREAM_H
-
-#include "lm/builder/ngram.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/multi_stream.hh"
-#include "util/stream/stream.hh"
-
-#include <cstddef>
-
-namespace lm { namespace builder {
-
-class NGramStream {
- public:
- NGramStream() : gram_(NULL, 0) {}
-
- NGramStream(const util::stream::ChainPosition &position) : gram_(NULL, 0) {
- Init(position);
- }
-
- void Init(const util::stream::ChainPosition &position) {
- stream_.Init(position);
- gram_ = NGram(stream_.Get(), NGram::OrderFromSize(position.GetChain().EntrySize()));
- }
-
- NGram &operator*() { return gram_; }
- const NGram &operator*() const { return gram_; }
-
- NGram *operator->() { return &gram_; }
- const NGram *operator->() const { return &gram_; }
-
- void *Get() { return stream_.Get(); }
- const void *Get() const { return stream_.Get(); }
-
- operator bool() const { return stream_; }
- bool operator!() const { return !stream_; }
- void Poison() { stream_.Poison(); }
-
- NGramStream &operator++() {
- ++stream_;
- gram_.ReBase(stream_.Get());
- return *this;
- }
-
- private:
- NGram gram_;
- util::stream::Stream stream_;
-};
-
-inline util::stream::Chain &operator>>(util::stream::Chain &chain, NGramStream &str) {
- str.Init(chain.Add());
- return chain;
-}
-
-typedef util::stream::GenericStreams<NGramStream> NGramStreams;
-
-}} // namespaces
-#endif // LM_BUILDER_NGRAM_STREAM_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/output.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/output.cc
deleted file mode 100644
index 0fc0197..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/output.cc
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "lm/builder/output.hh"
-#include "util/stream/multi_stream.hh"
-
-#include <boost/ref.hpp>
-
-namespace lm { namespace builder {
-
-OutputHook::~OutputHook() {}
-
-void OutputHook::Apply(util::stream::Chains &chains) {
- chains >> boost::ref(*this);
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/output.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/output.hh
deleted file mode 100644
index 0ef769a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/output.hh
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef LM_BUILDER_OUTPUT_H
-#define LM_BUILDER_OUTPUT_H
-
-#include "lm/builder/header_info.hh"
-#include "util/file.hh"
-
-#include <boost/ptr_container/ptr_vector.hpp>
-#include <boost/utility.hpp>
-
-#include <map>
-
-namespace util { namespace stream { class Chains; class ChainPositions; } }
-
-/* Outputs from lmplz: ARPA< sharded files, etc */
-namespace lm { namespace builder {
-
-// These are different types of hooks. Values should be consecutive to enable a vector lookup.
-enum HookType {
- COUNT_HOOK, // Raw N-gram counts, highest order only.
- PROB_PARALLEL_HOOK, // Probability and backoff (or just q). Output must process the orders in parallel or there will be a deadlock.
- PROB_SEQUENTIAL_HOOK, // Probability and backoff (or just q). Output can process orders any way it likes. This requires writing the data to disk then reading. Useful for ARPA files, which put unigrams first etc.
- NUMBER_OF_HOOKS // Keep this last so we know how many values there are.
-};
-
-class Output;
-
-class OutputHook {
- public:
- explicit OutputHook(HookType hook_type) : type_(hook_type), master_(NULL) {}
-
- virtual ~OutputHook();
-
- virtual void Apply(util::stream::Chains &chains);
-
- virtual void Run(const util::stream::ChainPositions &positions) = 0;
-
- protected:
- const HeaderInfo &GetHeader() const;
- int GetVocabFD() const;
-
- private:
- friend class Output;
- const HookType type_;
- const Output *master_;
-};
-
-class Output : boost::noncopyable {
- public:
- Output() {}
-
- // Takes ownership.
- void Add(OutputHook *hook) {
- hook->master_ = this;
- outputs_[hook->type_].push_back(hook);
- }
-
- bool Have(HookType hook_type) const {
- return !outputs_[hook_type].empty();
- }
-
- void SetVocabFD(int to) { vocab_fd_ = to; }
- int GetVocabFD() const { return vocab_fd_; }
-
- void SetHeader(const HeaderInfo &header) { header_ = header; }
- const HeaderInfo &GetHeader() const { return header_; }
-
- void Apply(HookType hook_type, util::stream::Chains &chains) {
- for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
- entry->Apply(chains);
- }
- }
-
- private:
- boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
- int vocab_fd_;
- HeaderInfo header_;
-};
-
-inline const HeaderInfo &OutputHook::GetHeader() const {
- return master_->GetHeader();
-}
-
-inline int OutputHook::GetVocabFD() const {
- return master_->GetVocabFD();
-}
-
-}} // namespaces
-
-#endif // LM_BUILDER_OUTPUT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/pipeline.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/pipeline.cc
deleted file mode 100644
index fced0e3..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/pipeline.cc
+++ /dev/null
@@ -1,344 +0,0 @@
-#include "lm/builder/pipeline.hh"
-
-#include "lm/builder/adjust_counts.hh"
-#include "lm/builder/corpus_count.hh"
-#include "lm/builder/hash_gamma.hh"
-#include "lm/builder/initial_probabilities.hh"
-#include "lm/builder/interpolate.hh"
-#include "lm/builder/output.hh"
-#include "lm/builder/sort.hh"
-
-#include "lm/sizes.hh"
-
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/stream/io.hh"
-
-#include <algorithm>
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-namespace lm { namespace builder {
-
-namespace {
-void PrintStatistics(const std::vector<uint64_t> &counts, const std::vector<uint64_t> &counts_pruned, const std::vector<Discount> &discounts) {
- std::cerr << "Statistics:\n";
- for (size_t i = 0; i < counts.size(); ++i) {
- std::cerr << (i + 1) << ' ' << counts_pruned[i];
- if(counts[i] != counts_pruned[i])
- std::cerr << "/" << counts[i];
-
- for (size_t d = 1; d <= 3; ++d)
- std::cerr << " D" << d << (d == 3 ? "+=" : "=") << discounts[i].amount[d];
- std::cerr << '\n';
- }
-}
-
-class Master {
- public:
- explicit Master(PipelineConfig &config)
- : config_(config), chains_(config.order), files_(config.order) {
- config_.minimum_block = std::max(NGram::TotalSize(config_.order), config_.minimum_block);
- }
-
- const PipelineConfig &Config() const { return config_; }
-
- util::stream::Chains &MutableChains() { return chains_; }
-
- template <class T> Master &operator>>(const T &worker) {
- chains_ >> worker;
- return *this;
- }
-
- // This takes the (partially) sorted ngrams and sets up for adjusted counts.
- void InitForAdjust(util::stream::Sort<SuffixOrder, AddCombiner> &ngrams, WordIndex types) {
- const std::size_t each_order_min = config_.minimum_block * config_.block_count;
- // We know how many unigrams there are. Don't allocate more than needed to them.
- const std::size_t min_chains = (config_.order - 1) * each_order_min +
- std::min(types * NGram::TotalSize(1), each_order_min);
- // Do merge sort with calculated laziness.
- const std::size_t merge_using = ngrams.Merge(std::min(config_.TotalMemory() - min_chains, ngrams.DefaultLazy()));
-
- std::vector<uint64_t> count_bounds(1, types);
- CreateChains(config_.TotalMemory() - merge_using, count_bounds);
- ngrams.Output(chains_.back(), merge_using);
-
- // Setup unigram file.
- files_.push_back(util::MakeTemp(config_.TempPrefix()));
- }
-
- // For initial probabilities, but this is generic.
- void SortAndReadTwice(const std::vector<uint64_t> &counts, Sorts<ContextOrder> &sorts, util::stream::Chains &second, util::stream::ChainConfig second_config) {
- // Do merge first before allocating chain memory.
- for (std::size_t i = 1; i < config_.order; ++i) {
- sorts[i - 1].Merge(0);
- }
- // There's no lazy merge, so just divide memory amongst the chains.
- CreateChains(config_.TotalMemory(), counts);
- chains_.back().ActivateProgress();
- chains_[0] >> files_[0].Source();
- second_config.entry_size = NGram::TotalSize(1);
- second.push_back(second_config);
- second.back() >> files_[0].Source();
- for (std::size_t i = 1; i < config_.order; ++i) {
- util::scoped_fd fd(sorts[i - 1].StealCompleted());
- chains_[i].SetProgressTarget(util::SizeOrThrow(fd.get()));
- chains_[i] >> util::stream::PRead(util::DupOrThrow(fd.get()), true);
- second_config.entry_size = NGram::TotalSize(i + 1);
- second.push_back(second_config);
- second.back() >> util::stream::PRead(fd.release(), true);
- }
- }
-
- // There is no sort after this, so go for broke on lazy merging.
- template <class Compare> void MaximumLazyInput(const std::vector<uint64_t> &counts, Sorts<Compare> &sorts) {
- // Determine the minimum we can use for all the chains.
- std::size_t min_chains = 0;
- for (std::size_t i = 0; i < config_.order; ++i) {
- min_chains += std::min(counts[i] * NGram::TotalSize(i + 1), static_cast<uint64_t>(config_.minimum_block));
- }
- std::size_t for_merge = min_chains > config_.TotalMemory() ? 0 : (config_.TotalMemory() - min_chains);
- std::vector<std::size_t> laziness;
- // Prioritize longer n-grams.
- for (util::stream::Sort<SuffixOrder> *i = sorts.end() - 1; i >= sorts.begin(); --i) {
- laziness.push_back(i->Merge(for_merge));
- assert(for_merge >= laziness.back());
- for_merge -= laziness.back();
- }
- std::reverse(laziness.begin(), laziness.end());
-
- CreateChains(for_merge + min_chains, counts);
- chains_.back().ActivateProgress();
- chains_[0] >> files_[0].Source();
- for (std::size_t i = 1; i < config_.order; ++i) {
- sorts[i - 1].Output(chains_[i], laziness[i - 1]);
- }
- }
-
- void BufferFinal(const std::vector<uint64_t> &counts) {
- chains_[0] >> files_[0].Sink();
- for (std::size_t i = 1; i < config_.order; ++i) {
- files_.push_back(util::MakeTemp(config_.TempPrefix()));
- chains_[i] >> files_[i].Sink();
- }
- chains_.Wait(true);
- // Use less memory. Because we can.
- CreateChains(std::min(config_.sort.buffer_size * config_.order, config_.TotalMemory()), counts);
- for (std::size_t i = 0; i < config_.order; ++i) {
- chains_[i] >> files_[i].Source();
- }
- }
-
- template <class Compare> void SetupSorts(Sorts<Compare> &sorts) {
- sorts.Init(config_.order - 1);
- // Unigrams don't get sorted because their order is always the same.
- chains_[0] >> files_[0].Sink();
- for (std::size_t i = 1; i < config_.order; ++i) {
- sorts.push_back(chains_[i], config_.sort, Compare(i + 1));
- }
- chains_.Wait(true);
- }
-
- private:
- // Create chains, allocating memory to them. Totally heuristic. Count
- // bounds are upper bounds on the counts or not present.
- void CreateChains(std::size_t remaining_mem, const std::vector<uint64_t> &count_bounds) {
- std::vector<std::size_t> assignments;
- assignments.reserve(config_.order);
- // Start by assigning maximum memory usage (to be refined later).
- for (std::size_t i = 0; i < count_bounds.size(); ++i) {
- assignments.push_back(static_cast<std::size_t>(std::min(
- static_cast<uint64_t>(remaining_mem),
- count_bounds[i] * static_cast<uint64_t>(NGram::TotalSize(i + 1)))));
- }
- assignments.resize(config_.order, remaining_mem);
-
- // Now we know how much memory everybody wants. How much will they get?
- // Proportional to this.
- std::vector<float> portions;
- // Indices of orders that have yet to be assigned.
- std::vector<std::size_t> unassigned;
- for (std::size_t i = 0; i < config_.order; ++i) {
- portions.push_back(static_cast<float>((i+1) * NGram::TotalSize(i+1)));
- unassigned.push_back(i);
- }
- /*If somebody doesn't eat their full dinner, give it to the rest of the
- * family. Then somebody else might not eat their full dinner etc. Ends
- * when everybody unassigned is hungry.
- */
- float sum;
- bool found_more;
- std::vector<std::size_t> block_count(config_.order);
- do {
- sum = 0.0;
- for (std::size_t i = 0; i < unassigned.size(); ++i) {
- sum += portions[unassigned[i]];
- }
- found_more = false;
- // If the proportional assignment is more than needed, give it just what it needs.
- for (std::vector<std::size_t>::iterator i = unassigned.begin(); i != unassigned.end();) {
- if (assignments[*i] <= remaining_mem * (portions[*i] / sum)) {
- remaining_mem -= assignments[*i];
- block_count[*i] = 1;
- i = unassigned.erase(i);
- found_more = true;
- } else {
- ++i;
- }
- }
- } while (found_more);
- for (std::vector<std::size_t>::iterator i = unassigned.begin(); i != unassigned.end(); ++i) {
- assignments[*i] = remaining_mem * (portions[*i] / sum);
- block_count[*i] = config_.block_count;
- }
- chains_.clear();
- std::cerr << "Chain sizes:";
- for (std::size_t i = 0; i < config_.order; ++i) {
- std::cerr << ' ' << (i+1) << ":" << assignments[i];
- chains_.push_back(util::stream::ChainConfig(NGram::TotalSize(i + 1), block_count[i], assignments[i]));
- }
- std::cerr << std::endl;
- }
-
- PipelineConfig &config_;
-
- util::stream::Chains chains_;
- // Often only unigrams, but sometimes all orders.
- util::FixedArray<util::stream::FileBuffer> files_;
-};
-
-void CountText(int text_file /* input */, int vocab_file /* output */, Master &master, uint64_t &token_count, std::string &text_file_name, std::vector<bool> &prune_words) {
- const PipelineConfig &config = master.Config();
- std::cerr << "=== 1/5 Counting and sorting n-grams ===" << std::endl;
-
- const std::size_t vocab_usage = CorpusCount::VocabUsage(config.vocab_estimate);
- UTIL_THROW_IF(config.TotalMemory() < vocab_usage, util::Exception, "Vocab hash size estimate " << vocab_usage << " exceeds total memory " << config.TotalMemory());
- std::size_t memory_for_chain =
- // This much memory to work with after vocab hash table.
- static_cast<float>(config.TotalMemory() - vocab_usage) /
- // Solve for block size including the dedupe multiplier for one block.
- (static_cast<float>(config.block_count) + CorpusCount::DedupeMultiplier(config.order)) *
- // Chain likes memory expressed in terms of total memory.
- static_cast<float>(config.block_count);
- util::stream::Chain chain(util::stream::ChainConfig(NGram::TotalSize(config.order), config.block_count, memory_for_chain));
-
- WordIndex type_count = config.vocab_estimate;
- util::FilePiece text(text_file, NULL, &std::cerr);
- text_file_name = text.FileName();
- CorpusCount counter(text, vocab_file, token_count, type_count, prune_words, config.prune_vocab_file, chain.BlockSize() / chain.EntrySize(), config.disallowed_symbol_action);
- chain >> boost::ref(counter);
-
- util::stream::Sort<SuffixOrder, AddCombiner> sorter(chain, config.sort, SuffixOrder(config.order), AddCombiner());
- chain.Wait(true);
- std::cerr << "Unigram tokens " << token_count << " types " << type_count << std::endl;
- std::cerr << "=== 2/5 Calculating and sorting adjusted counts ===" << std::endl;
- master.InitForAdjust(sorter, type_count);
-}
-
-void InitialProbabilities(const std::vector<uint64_t> &counts, const std::vector<uint64_t> &counts_pruned, const std::vector<Discount> &discounts, Master &master, Sorts<SuffixOrder> &primary,
- util::FixedArray<util::stream::FileBuffer> &gammas, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab) {
- const PipelineConfig &config = master.Config();
- util::stream::Chains second(config.order);
-
- {
- Sorts<ContextOrder> sorts;
- master.SetupSorts(sorts);
- PrintStatistics(counts, counts_pruned, discounts);
- lm::ngram::ShowSizes(counts_pruned);
- std::cerr << "=== 3/5 Calculating and sorting initial probabilities ===" << std::endl;
- master.SortAndReadTwice(counts_pruned, sorts, second, config.initial_probs.adder_in);
- }
-
- util::stream::Chains gamma_chains(config.order);
- InitialProbabilities(config.initial_probs, discounts, master.MutableChains(), second, gamma_chains, prune_thresholds, prune_vocab);
- // Don't care about gamma for 0.
- gamma_chains[0] >> util::stream::kRecycle;
- gammas.Init(config.order - 1);
- for (std::size_t i = 1; i < config.order; ++i) {
- gammas.push_back(util::MakeTemp(config.TempPrefix()));
- gamma_chains[i] >> gammas[i - 1].Sink();
- }
- // Has to be done here due to gamma_chains scope.
- master.SetupSorts(primary);
-}
-
-void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &master, Sorts<SuffixOrder> &primary, util::FixedArray<util::stream::FileBuffer> &gammas) {
- std::cerr << "=== 4/5 Calculating and writing order-interpolated probabilities ===" << std::endl;
- const PipelineConfig &config = master.Config();
- master.MaximumLazyInput(counts, primary);
-
- util::stream::Chains gamma_chains(config.order - 1);
- for (std::size_t i = 0; i < config.order - 1; ++i) {
- util::stream::ChainConfig read_backoffs(config.read_backoffs);
-
- if(config.prune_vocab || config.prune_thresholds[i + 1] > 0)
- read_backoffs.entry_size = sizeof(HashGamma);
- else
- read_backoffs.entry_size = sizeof(float);
-
- gamma_chains.push_back(read_backoffs);
- gamma_chains.back() >> gammas[i].Source();
- }
- master >> Interpolate(std::max(master.Config().vocab_size_for_unk, counts[0] - 1 /* <s> is not included */), util::stream::ChainPositions(gamma_chains), config.prune_thresholds, config.prune_vocab, config.output_q);
- gamma_chains >> util::stream::kRecycle;
- master.BufferFinal(counts);
-}
-
-} // namespace
-
-void Pipeline(PipelineConfig &config, int text_file, Output &output) {
- // Some fail-fast sanity checks.
- if (config.sort.buffer_size * 4 > config.TotalMemory()) {
- config.sort.buffer_size = config.TotalMemory() / 4;
- std::cerr << "Warning: changing sort block size to " << config.sort.buffer_size << " bytes due to low total memory." << std::endl;
- }
- if (config.minimum_block < NGram::TotalSize(config.order)) {
- config.minimum_block = NGram::TotalSize(config.order);
- std::cerr << "Warning: raising minimum block to " << config.minimum_block << " to fit an ngram in every block." << std::endl;
- }
- UTIL_THROW_IF(config.sort.buffer_size < config.minimum_block, util::Exception, "Sort block size " << config.sort.buffer_size << " is below the minimum block size " << config.minimum_block << ".");
- UTIL_THROW_IF(config.TotalMemory() < config.minimum_block * config.order * config.block_count, util::Exception,
- "Not enough memory to fit " << (config.order * config.block_count) << " blocks with minimum size " << config.minimum_block << ". Increase memory to " << (config.minimum_block * config.order * config.block_count) << " bytes or decrease the minimum block size.");
-
- UTIL_TIMER("(%w s) Total wall time elapsed\n");
-
- Master master(config);
- // master's destructor will wait for chains. But they might be deadlocked if
- // this thread dies because e.g. it ran out of memory.
- try {
- util::scoped_fd vocab_file(config.vocab_file.empty() ?
- util::MakeTemp(config.TempPrefix()) :
- util::CreateOrThrow(config.vocab_file.c_str()));
- output.SetVocabFD(vocab_file.get());
- uint64_t token_count;
- std::string text_file_name;
-
- std::vector<bool> prune_words;
- CountText(text_file, vocab_file.get(), master, token_count, text_file_name, prune_words);
-
- std::vector<uint64_t> counts;
- std::vector<uint64_t> counts_pruned;
- std::vector<Discount> discounts;
- master >> AdjustCounts(config.prune_thresholds, counts, counts_pruned, prune_words, config.discount, discounts);
-
- {
- util::FixedArray<util::stream::FileBuffer> gammas;
- Sorts<SuffixOrder> primary;
- InitialProbabilities(counts, counts_pruned, discounts, master, primary, gammas, config.prune_thresholds, config.prune_vocab);
- InterpolateProbabilities(counts_pruned, master, primary, gammas);
- }
-
- std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
-
- output.SetHeader(HeaderInfo(text_file_name, token_count, counts_pruned));
- output.Apply(PROB_SEQUENTIAL_HOOK, master.MutableChains());
- master >> util::stream::kRecycle;
- master.MutableChains().Wait(true);
- } catch (const util::Exception &e) {
- std::cerr << e.what() << std::endl;
- abort();
- }
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/pipeline.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/pipeline.hh
deleted file mode 100644
index 8f4d821..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/pipeline.hh
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef LM_BUILDER_PIPELINE_H
-#define LM_BUILDER_PIPELINE_H
-
-#include "lm/builder/adjust_counts.hh"
-#include "lm/builder/initial_probabilities.hh"
-#include "lm/builder/header_info.hh"
-#include "lm/lm_exception.hh"
-#include "lm/word_index.hh"
-#include "util/stream/config.hh"
-#include "util/file_piece.hh"
-
-#include <string>
-#include <cstddef>
-
-namespace lm { namespace builder {
-
-class Output;
-
-struct PipelineConfig {
- std::size_t order;
- std::string vocab_file;
- util::stream::SortConfig sort;
- InitialProbabilitiesConfig initial_probs;
- util::stream::ChainConfig read_backoffs;
-
- // Estimated vocabulary size. Used for sizing CorpusCount memory and
- // initial probing hash table sizing, also in CorpusCount.
- lm::WordIndex vocab_estimate;
-
- // Minimum block size to tolerate.
- std::size_t minimum_block;
-
- // Number of blocks to use. This will be overridden to 1 if everything fits.
- std::size_t block_count;
-
- // n-gram count thresholds for pruning. 0 values means no pruning for
- // corresponding n-gram order
- std::vector<uint64_t> prune_thresholds; //mjd
- bool prune_vocab;
- std::string prune_vocab_file;
-
- // What to do with discount failures.
- DiscountConfig discount;
-
- // Compute collapsed q values instead of probability and backoff
- bool output_q;
-
- /* Computing the perplexity of LMs with different vocabularies is hard. For
- * example, the lowest perplexity is attained by a unigram model that
- * predicts p(<unk>) = 1 and has no other vocabulary. Also, linearly
- * interpolated models will sum to more than 1 because <unk> is duplicated
- * (SRI just pretends p(<unk>) = 0 for these purposes, which makes it sum to
- * 1 but comes with its own problems). This option will make the vocabulary
- * a particular size by replicating <unk> multiple times for purposes of
- * computing vocabulary size. It has no effect if the actual vocabulary is
- * larger. This parameter serves the same purpose as IRSTLM's "dub".
- */
- uint64_t vocab_size_for_unk;
-
- /* What to do the first time <s>, </s>, or <unk> appears in the input. If
- * this is anything but THROW_UP, then the symbol will always be treated as
- * whitespace.
- */
- WarningAction disallowed_symbol_action;
-
- const std::string &TempPrefix() const { return sort.temp_prefix; }
- std::size_t TotalMemory() const { return sort.total_memory; }
-};
-
-// Takes ownership of text_file and out_arpa.
-void Pipeline(PipelineConfig &config, int text_file, Output &output);
-
-}} // namespaces
-#endif // LM_BUILDER_PIPELINE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/print.cc b/src/joshua/decoder/ff/lm/kenlm/lm/builder/print.cc
deleted file mode 100644
index bb94833..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/print.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "lm/builder/print.hh"
-
-#include "util/fake_ofstream.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/scoped.hh"
-#include "util/stream/timer.hh"
-
-#include <sstream>
-
-#include <string.h>
-
-namespace lm { namespace builder {
-
-VocabReconstitute::VocabReconstitute(int fd) {
- uint64_t size = util::SizeOrThrow(fd);
- util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
- const char *const start = static_cast<const char*>(memory_.get());
- const char *i;
- for (i = start; i != start + size; i += strlen(i) + 1) {
- map_.push_back(i);
- }
- // Last one for LookupPiece.
- map_.push_back(i);
-}
-
-void PrintARPA::Run(const util::stream::ChainPositions &positions) {
- VocabReconstitute vocab(GetVocabFD());
-
- // Write header. TODO: integers in FakeOFStream.
- {
- std::stringstream stream;
- if (verbose_header_) {
- stream << "# Input file: " << GetHeader().input_file << '\n';
- stream << "# Token count: " << GetHeader().token_count << '\n';
- stream << "# Smoothing: Modified Kneser-Ney" << '\n';
- }
- stream << "\\data\\\n";
- for (size_t i = 0; i < positions.size(); ++i) {
- stream << "ngram " << (i+1) << '=' << GetHeader().counts_pruned[i] << '\n';
- }
- stream << '\n';
- std::string as_string(stream.str());
- util::WriteOrThrow(out_fd_.get(), as_string.data(), as_string.size());
- }
-
- util::FakeOFStream out(out_fd_.get());
- for (unsigned order = 1; order <= positions.size(); ++order) {
- out << "\\" << order << "-grams:" << '\n';
- for (NGramStream stream(positions[order - 1]); stream; ++stream) {
- // Correcting for numerical precision issues. Take that IRST.
- out << stream->Value().complete.prob << '\t' << vocab.Lookup(*stream->begin());
- for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
- out << ' ' << vocab.Lookup(*i);
- }
- if (order != positions.size())
- out << '\t' << stream->Value().complete.backoff;
- out << '\n';
-
- }
- out << '\n';
- }
- out << "\\end\\\n";
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/print.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/print.hh
deleted file mode 100644
index ba57f06..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/print.hh
+++ /dev/null
@@ -1,115 +0,0 @@
-#ifndef LM_BUILDER_PRINT_H
-#define LM_BUILDER_PRINT_H
-
-#include "lm/builder/ngram.hh"
-#include "lm/builder/ngram_stream.hh"
-#include "lm/builder/output.hh"
-#include "util/fake_ofstream.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/string_piece.hh"
-
-#include <ostream>
-
-#include <assert.h>
-
-// Warning: print routines read all unigrams before all bigrams before all
-// trigrams etc. So if other parts of the chain move jointly, you'll have to
-// buffer.
-
-namespace lm { namespace builder {
-
-class VocabReconstitute {
- public:
- // fd must be alive for life of this object; does not take ownership.
- explicit VocabReconstitute(int fd);
-
- const char *Lookup(WordIndex index) const {
- assert(index < map_.size() - 1);
- return map_[index];
- }
-
- StringPiece LookupPiece(WordIndex index) const {
- return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
- }
-
- std::size_t Size() const {
- // There's an extra entry to support StringPiece lengths.
- return map_.size() - 1;
- }
-
- private:
- util::scoped_memory memory_;
- std::vector<const char*> map_;
-};
-
-// Not defined, only specialized.
-template <class T> void PrintPayload(util::FakeOFStream &to, const Payload &payload);
-template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const Payload &payload) {
- // TODO slow
- to << boost::lexical_cast<std::string>(payload.count);
-}
-template <> inline void PrintPayload<Uninterpolated>(util::FakeOFStream &to, const Payload &payload) {
- to << log10(payload.uninterp.prob) << ' ' << log10(payload.uninterp.gamma);
-}
-template <> inline void PrintPayload<ProbBackoff>(util::FakeOFStream &to, const Payload &payload) {
- to << payload.complete.prob << ' ' << payload.complete.backoff;
-}
-
-// template parameter is the type stored.
-template <class V> class Print {
- public:
- static void DumpSeparateFiles(const VocabReconstitute &vocab, const std::string &file_base, util::stream::Chains &chains) {
- for (unsigned int i = 0; i < chains.size(); ++i) {
- std::string file(file_base + boost::lexical_cast<std::string>(i));
- chains[i] >> Print(vocab, util::CreateOrThrow(file.c_str()));
- }
- }
-
- explicit Print(const VocabReconstitute &vocab, int fd) : vocab_(vocab), to_(fd) {}
-
- void Run(const util::stream::ChainPositions &chains) {
- util::scoped_fd fd(to_);
- util::FakeOFStream out(to_);
- NGramStreams streams(chains);
- for (NGramStream *s = streams.begin(); s != streams.end(); ++s) {
- DumpStream(*s, out);
- }
- }
-
- void Run(const util::stream::ChainPosition &position) {
- util::scoped_fd fd(to_);
- util::FakeOFStream out(to_);
- NGramStream stream(position);
- DumpStream(stream, out);
- }
-
- private:
- void DumpStream(NGramStream &stream, util::FakeOFStream &to) {
- for (; stream; ++stream) {
- PrintPayload<V>(to, stream->Value());
- for (const WordIndex *w = stream->begin(); w != stream->end(); ++w) {
- to << ' ' << vocab_.Lookup(*w) << '=' << *w;
- }
- to << '\n';
- }
- }
-
- const VocabReconstitute &vocab_;
- int to_;
-};
-
-class PrintARPA : public OutputHook {
- public:
- explicit PrintARPA(int fd, bool verbose_header)
- : OutputHook(PROB_SEQUENTIAL_HOOK), out_fd_(fd), verbose_header_(verbose_header) {}
-
- void Run(const util::stream::ChainPositions &positions);
-
- private:
- util::scoped_fd out_fd_;
- bool verbose_header_;
-};
-
-}} // namespaces
-#endif // LM_BUILDER_PRINT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/sort.hh b/src/joshua/decoder/ff/lm/kenlm/lm/builder/sort.hh
deleted file mode 100644
index 712bb8e..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/builder/sort.hh
+++ /dev/null
@@ -1,244 +0,0 @@
-#ifndef LM_BUILDER_SORT_H
-#define LM_BUILDER_SORT_H
-
-#include "lm/builder/ngram_stream.hh"
-#include "lm/builder/ngram.hh"
-#include "lm/word_index.hh"
-#include "util/stream/sort.hh"
-
-#include "util/stream/timer.hh"
-
-#include <functional>
-#include <string>
-
-namespace lm {
-namespace builder {
-
-/**
- * Abstract parent class for defining custom n-gram comparators.
- */
-template <class Child> class Comparator : public std::binary_function<const void *, const void *, bool> {
- public:
-
- /**
- * Constructs a comparator capable of comparing two n-grams.
- *
- * @param order Number of words in each n-gram
- */
- explicit Comparator(std::size_t order) : order_(order) {}
-
- /**
- * Applies the comparator using the Compare method that must be defined in any class that inherits from this class.
- *
- * @param lhs A pointer to the n-gram on the left-hand side of the comparison
- * @param rhs A pointer to the n-gram on the right-hand side of the comparison
- *
- * @see ContextOrder::Compare
- * @see PrefixOrder::Compare
- * @see SuffixOrder::Compare
- */
- inline bool operator()(const void *lhs, const void *rhs) const {
- return static_cast<const Child*>(this)->Compare(static_cast<const WordIndex*>(lhs), static_cast<const WordIndex*>(rhs));
- }
-
- /** Gets the n-gram order defined for this comparator. */
- std::size_t Order() const { return order_; }
-
- protected:
- std::size_t order_;
-};
-
-/**
- * N-gram comparator that compares n-grams according to their reverse (suffix) order.
- *
- * This comparator compares n-grams lexicographically, one word at a time,
- * beginning with the last word of each n-gram and ending with the first word of each n-gram.
- *
- * Some examples of n-gram comparisons as defined by this comparator:
- * - a b c == a b c
- * - a b c < a b d
- * - a b c > a d b
- * - a b c > a b b
- * - a b c > x a c
- * - a b c < x y z
- */
-class SuffixOrder : public Comparator<SuffixOrder> {
- public:
-
- /**
- * Constructs a comparator capable of comparing two n-grams.
- *
- * @param order Number of words in each n-gram
- */
- explicit SuffixOrder(std::size_t order) : Comparator<SuffixOrder>(order) {}
-
- /**
- * Compares two n-grams lexicographically, one word at a time,
- * beginning with the last word of each n-gram and ending with the first word of each n-gram.
- *
- * @param lhs A pointer to the n-gram on the left-hand side of the comparison
- * @param rhs A pointer to the n-gram on the right-hand side of the comparison
- */
- inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
- for (std::size_t i = order_ - 1; i != 0; --i) {
- if (lhs[i] != rhs[i])
- return lhs[i] < rhs[i];
- }
- return lhs[0] < rhs[0];
- }
-
- static const unsigned kMatchOffset = 1;
-};
-
-
-/**
- * N-gram comparator that compares n-grams according to the reverse (suffix) order of the n-gram context.
- *
- * This comparator compares n-grams lexicographically, one word at a time,
- * beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
- * finally, this comparator compares the last word of each n-gram.
- *
- * Some examples of n-gram comparisons as defined by this comparator:
- * - a b c == a b c
- * - a b c < a b d
- * - a b c < a d b
- * - a b c > a b b
- * - a b c > x a c
- * - a b c < x y z
- */
-class ContextOrder : public Comparator<ContextOrder> {
- public:
-
- /**
- * Constructs a comparator capable of comparing two n-grams.
- *
- * @param order Number of words in each n-gram
- */
- explicit ContextOrder(std::size_t order) : Comparator<ContextOrder>(order) {}
-
- /**
- * Compares two n-grams lexicographically, one word at a time,
- * beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
- * finally, this comparator compares the last word of each n-gram.
- *
- * @param lhs A pointer to the n-gram on the left-hand side of the comparison
- * @param rhs A pointer to the n-gram on the right-hand side of the comparison
- */
- inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
- for (int i = order_ - 2; i >= 0; --i) {
- if (lhs[i] != rhs[i])
- return lhs[i] < rhs[i];
- }
- return lhs[order_ - 1] < rhs[order_ - 1];
- }
-};
-
-/**
- * N-gram comparator that compares n-grams according to their natural (prefix) order.
- *
- * This comparator compares n-grams lexicographically, one word at a time,
- * beginning with the first word of each n-gram and ending with the last word of each n-gram.
- *
- * Some examples of n-gram comparisons as defined by this comparator:
- * - a b c == a b c
- * - a b c < a b d
- * - a b c < a d b
- * - a b c > a b b
- * - a b c < x a c
- * - a b c < x y z
- */
-class PrefixOrder : public Comparator<PrefixOrder> {
- public:
-
- /**
- * Constructs a comparator capable of comparing two n-grams.
- *
- * @param order Number of words in each n-gram
- */
- explicit PrefixOrder(std::size_t order) : Comparator<PrefixOrder>(order) {}
-
- /**
- * Compares two n-grams lexicographically, one word at a time,
- * beginning with the first word of each n-gram and ending with the last word of each n-gram.
- *
- * @param lhs A pointer to the n-gram on the left-hand side of the comparison
- * @param rhs A pointer to the n-gram on the right-hand side of the comparison
- */
- inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
- for (std::size_t i = 0; i < order_; ++i) {
- if (lhs[i] != rhs[i])
- return lhs[i] < rhs[i];
- }
- return false;
- }
-
- static const unsigned kMatchOffset = 0;
-};
-
-// Sum counts for the same n-gram.
-struct AddCombiner {
- bool operator()(void *first_void, const void *second_void, const SuffixOrder &compare) const {
- NGram first(first_void, compare.Order());
- // There isn't a const version of NGram.
- NGram second(const_cast<void*>(second_void), compare.Order());
- if (memcmp(first.begin(), second.begin(), sizeof(WordIndex) * compare.Order())) return false;
- first.Count() += second.Count();
- return true;
- }
-};
-
-// The combiner is only used on a single chain, so I didn't bother to allow
-// that template.
-/**
- * Represents an @ref util::FixedArray "array" capable of storing @ref util::stream::Sort "Sort" objects.
- *
- * In the anticipated use case, an instance of this class will maintain one @ref util::stream::Sort "Sort" object
- * for each n-gram order (ranging from 1 up to the maximum n-gram order being processed).
- * Use in this manner would enable the n-grams each n-gram order to be sorted, in parallel.
- *
- * @tparam Compare An @ref Comparator "ngram comparator" to use during sorting.
- */
-template <class Compare> class Sorts : public util::FixedArray<util::stream::Sort<Compare> > {
- private:
- typedef util::stream::Sort<Compare> S;
- typedef util::FixedArray<S> P;
-
- public:
-
- /**
- * Constructs, but does not initialize.
- *
- * @ref util::FixedArray::Init() "Init" must be called before use.
- *
- * @see util::FixedArray::Init()
- */
- Sorts() {}
-
- /**
- * Constructs an @ref util::FixedArray "array" capable of storing a fixed number of @ref util::stream::Sort "Sort" objects.
- *
- * @param number The maximum number of @ref util::stream::Sort "sorters" that can be held by this @ref util::FixedArray "array"
- * @see util::FixedArray::FixedArray()
- */
- explicit Sorts(std::size_t number) : util::FixedArray<util::stream::Sort<Compare> >(number) {}
-
- /**
- * Constructs a new @ref util::stream::Sort "Sort" object which is stored in this @ref util::FixedArray "array".
- *
- * The new @ref util::stream::Sort "Sort" object is constructed using the provided @ref util::stream::SortConfig "SortConfig" and @ref Comparator "ngram comparator";
- * once constructed, a new worker @ref util::stream::Thread "thread" (owned by the @ref util::stream::Chain "chain") will sort the n-gram data stored
- * in the @ref util::stream::Block "blocks" of the provided @ref util::stream::Chain "chain".
- *
- * @see util::stream::Sort::Sort()
- * @see util::stream::Chain::operator>>()
- */
- void push_back(util::stream::Chain &chain, const util::stream::SortConfig &config, const Compare &compare) {
- new (P::end()) S(chain, config, compare); // use "placement new" syntax to initalize S in an already-allocated memory location
- P::Constructed();
- }
-};
-
-} // namespace builder
-} // namespace lm
-
-#endif // LM_BUILDER_SORT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/config.hh b/src/joshua/decoder/ff/lm/kenlm/lm/config.hh
deleted file mode 100644
index a4238cd..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/config.hh
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef LM_CONFIG_H
-#define LM_CONFIG_H
-
-#include "lm/lm_exception.hh"
-#include "util/mmap.hh"
-
-#include <iosfwd>
-#include <string>
-#include <vector>
-
-/* Configuration for ngram model. Separate header to reduce pollution. */
-
-namespace lm {
-
-class EnumerateVocab;
-
-namespace ngram {
-
-struct Config {
- // EFFECTIVE FOR BOTH ARPA AND BINARY READS
-
- // (default true) print progress bar to messages
- bool show_progress;
-
- // Where to log messages including the progress bar. Set to NULL for
- // silence.
- std::ostream *messages;
-
- std::ostream *ProgressMessages() const {
- return show_progress ? messages : 0;
- }
-
- // This will be called with every string in the vocabulary. See
- // enumerate_vocab.hh for more detail. Config does not take ownership; you
- // are still responsible for deleting it (or stack allocating).
- EnumerateVocab *enumerate_vocab;
-
-
- // ONLY EFFECTIVE WHEN READING ARPA
-
- // What to do when <unk> isn't in the provided model.
- WarningAction unknown_missing;
- // What to do when <s> or </s> is missing from the model.
- // If THROW_UP, the exception will be of type util::SpecialWordMissingException.
- WarningAction sentence_marker_missing;
-
- // What to do with a positive log probability. For COMPLAIN and SILENT, map
- // to 0.
- WarningAction positive_log_probability;
-
- // The probability to substitute for <unk> if it's missing from the model.
- // No effect if the model has <unk> or unknown_missing == THROW_UP.
- float unknown_missing_logprob;
-
- // Size multiplier for probing hash table. Must be > 1. Space is linear in
- // this. Time is probing_multiplier / (probing_multiplier - 1). No effect
- // for sorted variant.
- // If you find yourself setting this to a low number, consider using the
- // TrieModel which has lower memory consumption.
- float probing_multiplier;
-
- // Amount of memory to use for building. The actual memory usage will be
- // higher since this just sets sort buffer size. Only applies to trie
- // models.
- std::size_t building_memory;
-
- // Template for temporary directory appropriate for passing to mkdtemp.
- // The characters XXXXXX are appended before passing to mkdtemp. Only
- // applies to trie. If empty, defaults to write_mmap. If that's NULL,
- // defaults to input file name.
- std::string temporary_directory_prefix;
-
- // Level of complaining to do when loading from ARPA instead of binary format.
- enum ARPALoadComplain {ALL, EXPENSIVE, NONE};
- ARPALoadComplain arpa_complain;
-
- // While loading an ARPA file, also write out this binary format file. Set
- // to NULL to disable.
- const char *write_mmap;
-
- enum WriteMethod {
- WRITE_MMAP, // Map the file directly.
- WRITE_AFTER // Write after we're done.
- };
- WriteMethod write_method;
-
- // Include the vocab in the binary file? Only effective if write_mmap != NULL.
- bool include_vocab;
-
-
- // Left rest options. Only used when the model includes rest costs.
- enum RestFunction {
- REST_MAX, // Maximum of any score to the left
- REST_LOWER, // Use lower-order files given below.
- };
- RestFunction rest_function;
- // Only used for REST_LOWER.
- std::vector<std::string> rest_lower_files;
-
-
- // Quantization options. Only effective for QuantTrieModel. One value is
- // reserved for each of prob and backoff, so 2^bits - 1 buckets will be used
- // to quantize (and one of the remaining backoffs will be 0).
- uint8_t prob_bits, backoff_bits;
-
- // Bhiksha compression (simple form). Only works with trie.
- uint8_t pointer_bhiksha_bits;
-
-
- // ONLY EFFECTIVE WHEN READING BINARY
-
- // How to get the giant array into memory: lazy mmap, populate, read etc.
- // See util/mmap.hh for details of MapMethod.
- util::LoadMethod load_method;
-
-
- // Set defaults.
- Config();
-};
-
-} /* namespace ngram */ } /* namespace lm */
-
-#endif // LM_CONFIG_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/enumerate_vocab.hh b/src/joshua/decoder/ff/lm/kenlm/lm/enumerate_vocab.hh
deleted file mode 100644
index f5ce789..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/enumerate_vocab.hh
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef LM_ENUMERATE_VOCAB_H
-#define LM_ENUMERATE_VOCAB_H
-
-#include "lm/word_index.hh"
-#include "util/string_piece.hh"
-
-namespace lm {
-
-/* If you need the actual strings in the vocabulary, inherit from this class
- * and implement Add. Then put a pointer in Config.enumerate_vocab; it does
- * not take ownership. Add is called once per vocab word. index starts at 0
- * and increases by 1 each time. This is only used by the Model constructor;
- * the pointer is not retained by the class.
- */
-class EnumerateVocab {
- public:
- virtual ~EnumerateVocab() {}
-
- virtual void Add(WordIndex index, const StringPiece &str) = 0;
-
- protected:
- EnumerateVocab() {}
-};
-
-} // namespace lm
-
-#endif // LM_ENUMERATE_VOCAB_H
-
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/facade.hh b/src/joshua/decoder/ff/lm/kenlm/lm/facade.hh
deleted file mode 100644
index 8e12b62..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/facade.hh
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef LM_FACADE_H
-#define LM_FACADE_H
-
-#include "lm/virtual_interface.hh"
-#include "util/string_piece.hh"
-
-#include <string>
-
-namespace lm {
-namespace base {
-
-// Common model interface that depends on knowing the specific classes.
-// Curiously recurring template pattern.
-template <class Child, class StateT, class VocabularyT> class ModelFacade : public Model {
- public:
- typedef StateT State;
- typedef VocabularyT Vocabulary;
-
- /* Translate from void* to State */
- FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const {
- return static_cast<const Child*>(this)->FullScore(
- *reinterpret_cast<const State*>(in_state),
- new_word,
- *reinterpret_cast<State*>(out_state));
- }
-
- FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const {
- return static_cast<const Child*>(this)->FullScoreForgotState(
- context_rbegin,
- context_rend,
- new_word,
- *reinterpret_cast<State*>(out_state));
- }
-
- // Default Score function calls FullScore. Model can override this.
- float Score(const State &in_state, const WordIndex new_word, State &out_state) const {
- return static_cast<const Child*>(this)->FullScore(in_state, new_word, out_state).prob;
- }
-
- float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const {
- return static_cast<const Child*>(this)->Score(
- *reinterpret_cast<const State*>(in_state),
- new_word,
- *reinterpret_cast<State*>(out_state));
- }
-
- const State &BeginSentenceState() const { return begin_sentence_; }
- const State &NullContextState() const { return null_context_; }
- const Vocabulary &GetVocabulary() const { return *static_cast<const Vocabulary*>(&BaseVocabulary()); }
-
- protected:
- ModelFacade() : Model(sizeof(State)) {}
-
- virtual ~ModelFacade() {}
-
- // begin_sentence and null_context can disappear after. vocab should stay.
- void Init(const State &begin_sentence, const State &null_context, const Vocabulary &vocab, unsigned char order) {
- begin_sentence_ = begin_sentence;
- null_context_ = null_context;
- begin_sentence_memory_ = &begin_sentence_;
- null_context_memory_ = &null_context_;
- base_vocab_ = &vocab;
- order_ = order;
- }
-
- private:
- State begin_sentence_, null_context_;
-};
-
-} // mamespace base
-} // namespace lm
-
-#endif // LM_FACADE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/left.hh b/src/joshua/decoder/ff/lm/kenlm/lm/left.hh
deleted file mode 100644
index 36d6136..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/left.hh
+++ /dev/null
@@ -1,216 +0,0 @@
-/* Efficient left and right language model state for sentence fragments.
- * Intended usage:
- * Store ChartState with every chart entry.
- * To do a rule application:
- * 1. Make a ChartState object for your new entry.
- * 2. Construct RuleScore.
- * 3. Going from left to right, call Terminal or NonTerminal.
- * For terminals, just pass the vocab id.
- * For non-terminals, pass that non-terminal's ChartState.
- * If your decoder expects scores inclusive of subtree scores (i.e. you
- * label entries with the highest-scoring path), pass the non-terminal's
- * score as prob.
- * If your decoder expects relative scores and will walk the chart later,
- * pass prob = 0.0.
- * In other words, the only effect of prob is that it gets added to the
- * returned log probability.
- * 4. Call Finish. It returns the log probability.
- *
- * There's a couple more details:
- * Do not pass <s> to Terminal as it is formally not a word in the sentence,
- * only context. Instead, call BeginSentence. If called, it should be the
- * first call after RuleScore is constructed (since <s> is always the
- * leftmost).
- *
- * If the leftmost RHS is a non-terminal, it's faster to call BeginNonTerminal.
- *
- * Hashing and sorting comparison operators are provided. All state objects
- * are POD. If you intend to use memcmp on raw state objects, you must call
- * ZeroRemaining first, as the value of array entries beyond length is
- * otherwise undefined.
- *
- * Usage is of course not limited to chart decoding. Anything that generates
- * sentence fragments missing left context could benefit. For example, a
- * phrase-based decoder could pre-score phrases, storing ChartState with each
- * phrase, even if hypotheses are generated left-to-right.
- */
-
-#ifndef LM_LEFT_H
-#define LM_LEFT_H
-
-#include "lm/max_order.hh"
-#include "lm/state.hh"
-#include "lm/return.hh"
-
-#include "util/murmur_hash.hh"
-
-#include <algorithm>
-
-namespace lm {
-namespace ngram {
-
-template <class M> class RuleScore {
- public:
- explicit RuleScore(const M &model, ChartState &out) : model_(model), out_(&out), left_done_(false), prob_(0.0) {
- out.left.length = 0;
- out.right.length = 0;
- }
-
- void BeginSentence() {
- out_->right = model_.BeginSentenceState();
- // out_->left is empty.
- left_done_ = true;
- }
-
- void Terminal(WordIndex word) {
- State copy(out_->right);
- FullScoreReturn ret(model_.FullScore(copy, word, out_->right));
- if (left_done_) { prob_ += ret.prob; return; }
- if (ret.independent_left) {
- prob_ += ret.prob;
- left_done_ = true;
- return;
- }
- out_->left.pointers[out_->left.length++] = ret.extend_left;
- prob_ += ret.rest;
- if (out_->right.length != copy.length + 1)
- left_done_ = true;
- }
-
- // Faster version of NonTerminal for the case where the rule begins with a non-terminal.
- void BeginNonTerminal(const ChartState &in, float prob = 0.0) {
- prob_ = prob;
- *out_ = in;
- left_done_ = in.left.full;
- }
-
- void NonTerminal(const ChartState &in, float prob = 0.0) {
- prob_ += prob;
-
- if (!in.left.length) {
- if (in.left.full) {
- for (const float *i = out_->right.backoff; i < out_->right.backoff + out_->right.length; ++i) prob_ += *i;
- left_done_ = true;
- out_->right = in.right;
- }
- return;
- }
-
- if (!out_->right.length) {
- out_->right = in.right;
- if (left_done_) {
- prob_ += model_.UnRest(in.left.pointers, in.left.pointers + in.left.length, 1);
- return;
- }
- if (out_->left.length) {
- left_done_ = true;
- } else {
- out_->left = in.left;
- left_done_ = in.left.full;
- }
- return;
- }
-
- float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1];
- float *back = backoffs, *back2 = backoffs2;
- unsigned char next_use = out_->right.length;
-
- // First word
- if (ExtendLeft(in, next_use, 1, out_->right.backoff, back)) return;
-
- // Words after the first, so extending a bigram to begin with
- for (unsigned char extend_length = 2; extend_length <= in.left.length; ++extend_length) {
- if (ExtendLeft(in, next_use, extend_length, back, back2)) return;
- std::swap(back, back2);
- }
-
- if (in.left.full) {
- for (const float *i = back; i != back + next_use; ++i) prob_ += *i;
- left_done_ = true;
- out_->right = in.right;
- return;
- }
-
- // Right state was minimized, so it's already independent of the new words to the left.
- if (in.right.length < in.left.length) {
- out_->right = in.right;
- return;
- }
-
- // Shift exisiting words down.
- for (WordIndex *i = out_->right.words + next_use - 1; i >= out_->right.words; --i) {
- *(i + in.right.length) = *i;
- }
- // Add words from in.right.
- std::copy(in.right.words, in.right.words + in.right.length, out_->right.words);
- // Assemble backoff composed on the existing state's backoff followed by the new state's backoff.
- std::copy(in.right.backoff, in.right.backoff + in.right.length, out_->right.backoff);
- std::copy(back, back + next_use, out_->right.backoff + in.right.length);
- out_->right.length = in.right.length + next_use;
- }
-
- float Finish() {
- // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram.
- out_->left.full = left_done_ || (out_->left.length == model_.Order() - 1);
- return prob_;
- }
-
- void Reset() {
- prob_ = 0.0;
- left_done_ = false;
- out_->left.length = 0;
- out_->right.length = 0;
- }
- void Reset(ChartState &replacement) {
- out_ = &replacement;
- Reset();
- }
-
- private:
- bool ExtendLeft(const ChartState &in, unsigned char &next_use, unsigned char extend_length, const float *back_in, float *back_out) {
- ProcessRet(model_.ExtendLeft(
- out_->right.words, out_->right.words + next_use, // Words to extend into
- back_in, // Backoffs to use
- in.left.pointers[extend_length - 1], extend_length, // Words to be extended
- back_out, // Backoffs for the next score
- next_use)); // Length of n-gram to use in next scoring.
- if (next_use != out_->right.length) {
- left_done_ = true;
- if (!next_use) {
- // Early exit.
- out_->right = in.right;
- prob_ += model_.UnRest(in.left.pointers + extend_length, in.left.pointers + in.left.length, extend_length + 1);
- return true;
- }
- }
- // Continue scoring.
- return false;
- }
-
- void ProcessRet(const FullScoreReturn &ret) {
- if (left_done_) {
- prob_ += ret.prob;
- return;
- }
- if (ret.independent_left) {
- prob_ += ret.prob;
- left_done_ = true;
- return;
- }
- out_->left.pointers[out_->left.length++] = ret.extend_left;
- prob_ += ret.rest;
- }
-
- const M &model_;
-
- ChartState *out_;
-
- bool left_done_;
-
- float prob_;
-};
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_LEFT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/left_test.cc b/src/joshua/decoder/ff/lm/kenlm/lm/left_test.cc
deleted file mode 100644
index b456146..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/left_test.cc
+++ /dev/null
@@ -1,397 +0,0 @@
-#include "lm/left.hh"
-#include "lm/model.hh"
-
-#include "util/tokenize_piece.hh"
-
-#include <vector>
-
-#define BOOST_TEST_MODULE LeftTest
-#include <boost/test/unit_test.hpp>
-#include <boost/test/floating_point_comparison.hpp>
-
-namespace lm {
-namespace ngram {
-namespace {
-
-#define Term(word) score.Terminal(m.GetVocabulary().Index(word));
-#define VCheck(word, value) BOOST_CHECK_EQUAL(m.GetVocabulary().Index(word), value);
-
-// Apparently some Boost versions use templates and are pretty strict about types matching.
-#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
-
-template <class M> void Short(const M &m) {
- ChartState base;
- {
- RuleScore<M> score(m, base);
- Term("more");
- Term("loin");
- SLOPPY_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001);
- }
- BOOST_CHECK(base.left.full);
- BOOST_CHECK_EQUAL(2, base.left.length);
- BOOST_CHECK_EQUAL(1, base.right.length);
- VCheck("loin", base.right.words[0]);
-
- ChartState more_left;
- {
- RuleScore<M> score(m, more_left);
- Term("little");
- score.NonTerminal(base, -1.206319 - 0.3561665);
- // p(little more loin | null context)
- SLOPPY_CHECK_CLOSE(-1.56538, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(3, more_left.left.length);
- BOOST_CHECK_EQUAL(1, more_left.right.length);
- VCheck("loin", more_left.right.words[0]);
- BOOST_CHECK(more_left.left.full);
-
- ChartState shorter;
- {
- RuleScore<M> score(m, shorter);
- Term("to");
- score.NonTerminal(base, -1.206319 - 0.3561665);
- SLOPPY_CHECK_CLOSE(-0.30103 - 1.687872 - 1.206319 - 0.3561665, score.Finish(), 0.01);
- }
- BOOST_CHECK_EQUAL(1, shorter.left.length);
- BOOST_CHECK_EQUAL(1, shorter.right.length);
- VCheck("loin", shorter.right.words[0]);
- BOOST_CHECK(shorter.left.full);
-}
-
-template <class M> void Charge(const M &m) {
- ChartState base;
- {
- RuleScore<M> score(m, base);
- Term("on");
- Term("more");
- SLOPPY_CHECK_CLOSE(-1.509559 -0.4771212 -1.206319, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(1, base.left.length);
- BOOST_CHECK_EQUAL(1, base.right.length);
- VCheck("more", base.right.words[0]);
- BOOST_CHECK(base.left.full);
-
- ChartState extend;
- {
- RuleScore<M> score(m, extend);
- Term("looking");
- score.NonTerminal(base, -1.509559 -0.4771212 -1.206319);
- SLOPPY_CHECK_CLOSE(-3.91039, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(2, extend.left.length);
- BOOST_CHECK_EQUAL(1, extend.right.length);
- VCheck("more", extend.right.words[0]);
- BOOST_CHECK(extend.left.full);
-
- ChartState tobos;
- {
- RuleScore<M> score(m, tobos);
- score.BeginSentence();
- score.NonTerminal(extend, -3.91039);
- SLOPPY_CHECK_CLOSE(-3.471169, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(0, tobos.left.length);
- BOOST_CHECK_EQUAL(1, tobos.right.length);
-}
-
-template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
- float ret = 0.0;
- State right = begin_sentence ? m.BeginSentenceState() : m.NullContextState();
- for (std::vector<WordIndex>::const_iterator i = words.begin(); i != words.end(); ++i) {
- State copy(right);
- ret += m.Score(copy, *i, right);
- }
- return ret;
-}
-
-template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
- float ret = 0.0;
- ChartState state;
- state.left.length = 0;
- state.right.length = 0;
- state.left.full = false;
- for (std::vector<WordIndex>::const_reverse_iterator i = words.rbegin(); i != words.rend(); ++i) {
- ChartState copy(state);
- RuleScore<M> score(m, state);
- score.Terminal(*i);
- score.NonTerminal(copy, ret);
- ret = score.Finish();
- }
- if (begin_sentence) {
- ChartState copy(state);
- RuleScore<M> score(m, state);
- score.BeginSentence();
- score.NonTerminal(copy, ret);
- ret = score.Finish();
- }
- return ret;
-}
-
-template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
- std::vector<std::pair<ChartState, float> > states(words.size());
- for (unsigned int i = 0; i < words.size(); ++i) {
- RuleScore<M> score(m, states[i].first);
- score.Terminal(words[i]);
- states[i].second = score.Finish();
- }
- while (states.size() > 1) {
- std::vector<std::pair<ChartState, float> > upper((states.size() + 1) / 2);
- for (unsigned int i = 0; i < states.size() / 2; ++i) {
- RuleScore<M> score(m, upper[i].first);
- score.NonTerminal(states[i*2].first, states[i*2].second);
- score.NonTerminal(states[i*2+1].first, states[i*2+1].second);
- upper[i].second = score.Finish();
- }
- if (states.size() % 2) {
- upper.back() = states.back();
- }
- std::swap(states, upper);
- }
-
- if (states.empty()) return 0.0;
-
- if (begin_sentence) {
- ChartState ignored;
- RuleScore<M> score(m, ignored);
- score.BeginSentence();
- score.NonTerminal(states.front().first, states.front().second);
- return score.Finish();
- } else {
- return states.front().second;
- }
-
-}
-
-template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vector<WordIndex> &out) {
- out.clear();
- for (util::TokenIter<util::SingleCharacter, true> i(str, ' '); i; ++i) {
- out.push_back(m.GetVocabulary().Index(*i));
- }
-}
-
-#define TEXT_TEST(str) \
- LookupVocab(m, str, words); \
- expect = LeftToRight(m, words, rest); \
- SLOPPY_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \
- SLOPPY_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \
-
-// Build sentences, or parts thereof, from right to left.
-template <class M> void GrowBig(const M &m, bool rest = false) {
- std::vector<WordIndex> words;
- float expect;
- TEXT_TEST("in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
- TEXT_TEST("on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
- TEXT_TEST("on a little more loin also would consider higher to look good");
- TEXT_TEST("more loin also would consider higher to look good");
- TEXT_TEST("more loin also would consider higher to look");
- TEXT_TEST("also would consider higher to look");
- TEXT_TEST("also would consider higher");
- TEXT_TEST("would consider higher to look");
- TEXT_TEST("consider higher to look");
- TEXT_TEST("consider higher to");
- TEXT_TEST("consider higher");
-}
-
-template <class M> void GrowSmall(const M &m, bool rest = false) {
- std::vector<WordIndex> words;
- float expect;
- TEXT_TEST("in biarritz watching considering looking . </s>");
- TEXT_TEST("in biarritz watching considering looking .");
- TEXT_TEST("in biarritz");
-}
-
-template <class M> void AlsoWouldConsiderHigher(const M &m) {
- ChartState also;
- {
- RuleScore<M> score(m, also);
- score.Terminal(m.GetVocabulary().Index("also"));
- SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
- }
- ChartState would;
- {
- RuleScore<M> score(m, would);
- score.Terminal(m.GetVocabulary().Index("would"));
- SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
- }
- ChartState combine_also_would;
- {
- RuleScore<M> score(m, combine_also_would);
- score.NonTerminal(also, -1.687872);
- score.NonTerminal(would, -1.687872);
- SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(2, combine_also_would.right.length);
-
- ChartState also_would;
- {
- RuleScore<M> score(m, also_would);
- score.Terminal(m.GetVocabulary().Index("also"));
- score.Terminal(m.GetVocabulary().Index("would"));
- SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(2, also_would.right.length);
-
- ChartState consider;
- {
- RuleScore<M> score(m, consider);
- score.Terminal(m.GetVocabulary().Index("consider"));
- SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(1, consider.left.length);
- BOOST_CHECK_EQUAL(1, consider.right.length);
- BOOST_CHECK(!consider.left.full);
-
- ChartState higher;
- float higher_score;
- {
- RuleScore<M> score(m, higher);
- score.Terminal(m.GetVocabulary().Index("higher"));
- higher_score = score.Finish();
- }
- SLOPPY_CHECK_CLOSE(-1.509559, higher_score, 0.001);
- BOOST_CHECK_EQUAL(1, higher.left.length);
- BOOST_CHECK_EQUAL(1, higher.right.length);
- BOOST_CHECK(!higher.left.full);
- VCheck("higher", higher.right.words[0]);
- SLOPPY_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001);
-
- ChartState consider_higher;
- {
- RuleScore<M> score(m, consider_higher);
- score.NonTerminal(consider, -1.687872);
- score.NonTerminal(higher, higher_score);
- SLOPPY_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(2, consider_higher.left.length);
- BOOST_CHECK(!consider_higher.left.full);
-
- ChartState full;
- {
- RuleScore<M> score(m, full);
- score.NonTerminal(combine_also_would, -1.687872 - 2.0);
- score.NonTerminal(consider_higher, -1.509559 - 1.687872 - 0.30103);
- SLOPPY_CHECK_CLOSE(-10.6879, score.Finish(), 0.001);
- }
- BOOST_CHECK_EQUAL(4, full.right.length);
-}
-
-#define CHECK_SCORE(str, val) \
-{ \
- float got = val; \
- std::vector<WordIndex> indices; \
- LookupVocab(m, str, indices); \
- SLOPPY_CHECK_CLOSE(LeftToRight(m, indices), got, 0.001); \
-}
-
-template <class M> void FullGrow(const M &m) {
- std::vector<WordIndex> words;
- LookupVocab(m, "in biarritz watching considering looking . </s>", words);
-
- ChartState lexical[7];
- float lexical_scores[7];
- for (unsigned int i = 0; i < 7; ++i) {
- RuleScore<M> score(m, lexical[i]);
- score.Terminal(words[i]);
- lexical_scores[i] = score.Finish();
- }
- CHECK_SCORE("in", lexical_scores[0]);
- CHECK_SCORE("biarritz", lexical_scores[1]);
- CHECK_SCORE("watching", lexical_scores[2]);
- CHECK_SCORE("</s>", lexical_scores[6]);
-
- ChartState l1[4];
- float l1_scores[4];
- {
- RuleScore<M> score(m, l1[0]);
- score.NonTerminal(lexical[0], lexical_scores[0]);
- score.NonTerminal(lexical[1], lexical_scores[1]);
- CHECK_SCORE("in biarritz", l1_scores[0] = score.Finish());
- }
- {
- RuleScore<M> score(m, l1[1]);
- score.NonTerminal(lexical[2], lexical_scores[2]);
- score.NonTerminal(lexical[3], lexical_scores[3]);
- CHECK_SCORE("watching considering", l1_scores[1] = score.Finish());
- }
- {
- RuleScore<M> score(m, l1[2]);
- score.NonTerminal(lexical[4], lexical_scores[4]);
- score.NonTerminal(lexical[5], lexical_scores[5]);
- CHECK_SCORE("looking .", l1_scores[2] = score.Finish());
- }
- BOOST_CHECK_EQUAL(l1[2].left.length, 1);
- l1[3] = lexical[6];
- l1_scores[3] = lexical_scores[6];
-
- ChartState l2[2];
- float l2_scores[2];
- {
- RuleScore<M> score(m, l2[0]);
- score.NonTerminal(l1[0], l1_scores[0]);
- score.NonTerminal(l1[1], l1_scores[1]);
- CHECK_SCORE("in biarritz watching considering", l2_scores[0] = score.Finish());
- }
- {
- RuleScore<M> score(m, l2[1]);
- score.NonTerminal(l1[2], l1_scores[2]);
- score.NonTerminal(l1[3], l1_scores[3]);
- CHECK_SCORE("looking . </s>", l2_scores[1] = score.Finish());
- }
- BOOST_CHECK_EQUAL(l2[1].left.length, 1);
- BOOST_CHECK(l2[1].left.full);
-
- ChartState top;
- {
- RuleScore<M> score(m, top);
- score.NonTerminal(l2[0], l2_scores[0]);
- score.NonTerminal(l2[1], l2_scores[1]);
- CHECK_SCORE("in biarritz watching considering looking . </s>", score.Finish());
- }
-}
-
-const char *FileLocation() {
- if (boost::unit_test::framework::master_test_suite().argc < 2) {
- return "test.arpa";
- }
- return boost::unit_test::framework::master_test_suite().argv[1];
-}
-
-template <class M> void Everything() {
- Config config;
- config.messages = NULL;
- M m(FileLocation(), config);
-
- Short(m);
- Charge(m);
- GrowBig(m);
- AlsoWouldConsiderHigher(m);
- GrowSmall(m);
- FullGrow(m);
-}
-
-BOOST_AUTO_TEST_CASE(ProbingAll) {
- Everything<Model>();
-}
-BOOST_AUTO_TEST_CASE(TrieAll) {
- Everything<TrieModel>();
-}
-BOOST_AUTO_TEST_CASE(QuantTrieAll) {
- Everything<QuantTrieModel>();
-}
-BOOST_AUTO_TEST_CASE(ArrayQuantTrieAll) {
- Everything<QuantArrayTrieModel>();
-}
-BOOST_AUTO_TEST_CASE(ArrayTrieAll) {
- Everything<ArrayTrieModel>();
-}
-
-BOOST_AUTO_TEST_CASE(RestProbing) {
- Config config;
- config.messages = NULL;
- RestProbingModel m(FileLocation(), config);
- GrowBig(m, true);
-}
-
-} // namespace
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/lm_exception.cc b/src/joshua/decoder/ff/lm/kenlm/lm/lm_exception.cc
deleted file mode 100644
index 0b572e9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/lm_exception.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "lm/lm_exception.hh"
-
-#include<errno.h>
-#include<stdio.h>
-
-namespace lm {
-
-ConfigException::ConfigException() throw() {}
-ConfigException::~ConfigException() throw() {}
-
-LoadException::LoadException() throw() {}
-LoadException::~LoadException() throw() {}
-
-FormatLoadException::FormatLoadException() throw() {}
-FormatLoadException::~FormatLoadException() throw() {}
-
-VocabLoadException::VocabLoadException() throw() {}
-VocabLoadException::~VocabLoadException() throw() {}
-
-SpecialWordMissingException::SpecialWordMissingException() throw() {}
-SpecialWordMissingException::~SpecialWordMissingException() throw() {}
-
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/lm_exception.hh b/src/joshua/decoder/ff/lm/kenlm/lm/lm_exception.hh
deleted file mode 100644
index 8bb6108..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/lm_exception.hh
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef LM_LM_EXCEPTION_H
-#define LM_LM_EXCEPTION_H
-
-// Named to avoid conflict with util/exception.hh.
-
-#include "util/exception.hh"
-#include "util/string_piece.hh"
-
-#include <exception>
-#include <string>
-
-namespace lm {
-
-typedef enum {THROW_UP, COMPLAIN, SILENT} WarningAction;
-
-class ConfigException : public util::Exception {
- public:
- ConfigException() throw();
- ~ConfigException() throw();
-};
-
-class LoadException : public util::Exception {
- public:
- virtual ~LoadException() throw();
-
- protected:
- LoadException() throw();
-};
-
-class FormatLoadException : public LoadException {
- public:
- FormatLoadException() throw();
- ~FormatLoadException() throw();
-};
-
-class VocabLoadException : public LoadException {
- public:
- virtual ~VocabLoadException() throw();
- VocabLoadException() throw();
-};
-
-class SpecialWordMissingException : public VocabLoadException {
- public:
- explicit SpecialWordMissingException() throw();
- ~SpecialWordMissingException() throw();
-};
-
-} // namespace lm
-
-#endif // LM_LM_EXCEPTION
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/max_order.hh b/src/joshua/decoder/ff/lm/kenlm/lm/max_order.hh
deleted file mode 100644
index 5f181f3..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/max_order.hh
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef LM_MAX_ORDER_H
-#define LM_MAX_ORDER_H
-/* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM.
- * If not, this is the default maximum order.
- * Having this limit means that State can be
- * (kMaxOrder - 1) * sizeof(float) bytes instead of
- * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
- */
-#ifndef KENLM_ORDER_MESSAGE
-#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh."
-#endif
-
-#endif // LM_MAX_ORDER_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/model.hh b/src/joshua/decoder/ff/lm/kenlm/lm/model.hh
deleted file mode 100644
index 6925a56..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/model.hh
+++ /dev/null
@@ -1,156 +0,0 @@
-#ifndef LM_MODEL_H
-#define LM_MODEL_H
-
-#include "lm/bhiksha.hh"
-#include "lm/binary_format.hh"
-#include "lm/config.hh"
-#include "lm/facade.hh"
-#include "lm/quantize.hh"
-#include "lm/search_hashed.hh"
-#include "lm/search_trie.hh"
-#include "lm/state.hh"
-#include "lm/value.hh"
-#include "lm/vocab.hh"
-#include "lm/weights.hh"
-
-#include "util/murmur_hash.hh"
-
-#include <algorithm>
-#include <vector>
-
-#include <string.h>
-
-namespace util { class FilePiece; }
-
-namespace lm {
-namespace ngram {
-namespace detail {
-
-// Should return the same results as SRI.
-// ModelFacade typedefs Vocabulary so we use VocabularyT to avoid naming conflicts.
-template <class Search, class VocabularyT> class GenericModel : public base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> {
- private:
- typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P;
- public:
- // This is the model type returned by RecognizeBinary.
- static const ModelType kModelType;
-
- static const unsigned int kVersion = Search::kVersion;
-
- /* Get the size of memory that will be mapped given ngram counts. This
- * does not include small non-mapped control structures, such as this class
- * itself.
- */
- static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
-
- /* Load the model from a file. It may be an ARPA or binary file. Binary
- * files must have the format expected by this class or you'll get an
- * exception. So TrieModel can only load ARPA or binary created by
- * TrieModel. To classify binary files, call RecognizeBinary in
- * lm/binary_format.hh.
- */
- explicit GenericModel(const char *file, const Config &config = Config());
-
- /* Score p(new_word | in_state) and incorporate new_word into out_state.
- * Note that in_state and out_state must be different references:
- * &in_state != &out_state.
- */
- FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const;
-
- /* Slower call without in_state. Try to remember state, but sometimes it
- * would cost too much memory or your decoder isn't setup properly.
- * To use this function, make an array of WordIndex containing the context
- * vocabulary ids in reverse order. Then, pass the bounds of the array:
- * [context_rbegin, context_rend). The new_word is not part of the context
- * array unless you intend to repeat words.
- */
- FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
-
- /* Get the state for a context. Don't use this if you can avoid it. Use
- * BeginSentenceState or NullContextState and extend from those. If
- * you're only going to use this state to call FullScore once, use
- * FullScoreForgotState.
- * To use this function, make an array of WordIndex containing the context
- * vocabulary ids in reverse order. Then, pass the bounds of the array:
- * [context_rbegin, context_rend).
- */
- void GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const;
-
- /* More efficient version of FullScore where a partial n-gram has already
- * been scored.
- * NOTE: THE RETURNED .rest AND .prob ARE RELATIVE TO THE .rest RETURNED BEFORE.
- */
- FullScoreReturn ExtendLeft(
- // Additional context in reverse order. This will update add_rend to
- const WordIndex *add_rbegin, const WordIndex *add_rend,
- // Backoff weights to use.
- const float *backoff_in,
- // extend_left returned by a previous query.
- uint64_t extend_pointer,
- // Length of n-gram that the pointer corresponds to.
- unsigned char extend_length,
- // Where to write additional backoffs for [extend_length + 1, min(Order() - 1, return.ngram_length)]
- float *backoff_out,
- // Amount of additional content that should be considered by the next call.
- unsigned char &next_use) const;
-
- /* Return probabilities minus rest costs for an array of pointers. The
- * first length should be the length of the n-gram to which pointers_begin
- * points.
- */
- float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const {
- // Compiler should optimize this if away.
- return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0;
- }
-
- private:
- FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const;
-
- // Score bigrams and above. Do not include backoff.
- void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const;
-
- // Appears after Size in the cc file.
- void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config);
-
- void InitializeFromARPA(int fd, const char *file, const Config &config);
-
- float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const;
-
- BinaryFormat backing_;
-
- VocabularyT vocab_;
-
- Search search_;
-};
-
-} // namespace detail
-
-// Instead of typedef, inherit. This allows the Model etc to be forward declared.
-// Oh the joys of C and C++.
-#define LM_COMMA() ,
-#define LM_NAME_MODEL(name, from)\
-class name : public from {\
- public:\
- name(const char *file, const Config &config = Config()) : from(file, config) {}\
-};
-
-LM_NAME_MODEL(ProbingModel, detail::GenericModel<detail::HashedSearch<BackoffValue> LM_COMMA() ProbingVocabulary>);
-LM_NAME_MODEL(RestProbingModel, detail::GenericModel<detail::HashedSearch<RestValue> LM_COMMA() ProbingVocabulary>);
-LM_NAME_MODEL(TrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
-LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
-LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
-LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
-
-// Default implementation. No real reason for it to be the default.
-typedef ::lm::ngram::ProbingVocabulary Vocabulary;
-typedef ProbingModel Model;
-
-/* Autorecognize the file type, load, and return the virtual base class. Don't
- * use the virtual base class if you can avoid it. Instead, use the above
- * classes as template arguments to your own virtual feature function.*/
-base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING);
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_MODEL_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/model_test.cc b/src/joshua/decoder/ff/lm/kenlm/lm/model_test.cc
deleted file mode 100644
index 0f54724..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/model_test.cc
+++ /dev/null
@@ -1,448 +0,0 @@
-#include "lm/model.hh"
-
-#include <stdlib.h>
-#include <string.h>
-
-#define BOOST_TEST_MODULE ModelTest
-#include <boost/test/unit_test.hpp>
-#include <boost/test/floating_point_comparison.hpp>
-
-// Apparently some Boost versions use templates and are pretty strict about types matching.
-#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
-
-namespace lm {
-namespace ngram {
-
-std::ostream &operator<<(std::ostream &o, const State &state) {
- o << "State length " << static_cast<unsigned int>(state.length) << ':';
- for (const WordIndex *i = state.words; i < state.words + state.length; ++i) {
- o << ' ' << *i;
- }
- return o;
-}
-
-namespace {
-
-// Stupid bjam reverses the command line arguments randomly.
-const char *TestLocation() {
- if (boost::unit_test::framework::master_test_suite().argc < 3) {
- return "test.arpa";
- }
- char **argv = boost::unit_test::framework::master_test_suite().argv;
- return argv[strstr(argv[1], "nounk") ? 2 : 1];
-}
-const char *TestNoUnkLocation() {
- if (boost::unit_test::framework::master_test_suite().argc < 3) {
- return "test_nounk.arpa";
- }
- char **argv = boost::unit_test::framework::master_test_suite().argv;
- return argv[strstr(argv[1], "nounk") ? 1 : 2];
-}
-
-template <class Model> State GetState(const Model &model, const char *word, const State &in) {
- WordIndex context[in.length + 1];
- context[0] = model.GetVocabulary().Index(word);
- std::copy(in.words, in.words + in.length, context + 1);
- State ret;
- model.GetState(context, context + in.length + 1, ret);
- return ret;
-}
-
-#define StartTest(word, ngram, score, indep_left) \
- ret = model.FullScore( \
- state, \
- model.GetVocabulary().Index(word), \
- out);\
- SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
- BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
- BOOST_CHECK_GE(std::min<unsigned char>(ngram, 5 - 1), out.length); \
- BOOST_CHECK_EQUAL(indep_left, ret.independent_left); \
- BOOST_CHECK_EQUAL(out, GetState(model, word, state));
-
-#define AppendTest(word, ngram, score, indep_left) \
- StartTest(word, ngram, score, indep_left) \
- state = out;
-
-template <class M> void Starters(const M &model) {
- FullScoreReturn ret;
- Model::State state(model.BeginSentenceState());
- Model::State out;
-
- StartTest("looking", 2, -0.4846522, true);
-
- // , probability plus <s> backoff
- StartTest(",", 1, -1.383514 + -0.4149733, true);
- // <unk> probability plus <s> backoff
- StartTest("this_is_not_found", 1, -1.995635 + -0.4149733, true);
-}
-
-template <class M> void Continuation(const M &model) {
- FullScoreReturn ret;
- Model::State state(model.BeginSentenceState());
- Model::State out;
-
- AppendTest("looking", 2, -0.484652, true);
- AppendTest("on", 3, -0.348837, true);
- AppendTest("a", 4, -0.0155266, true);
- AppendTest("little", 5, -0.00306122, true);
- State preserve = state;
- AppendTest("the", 1, -4.04005, true);
- AppendTest("biarritz", 1, -1.9889, true);
- AppendTest("not_found", 1, -2.29666, true);
- AppendTest("more", 1, -1.20632 - 20.0, true);
- AppendTest(".", 2, -0.51363, true);
- AppendTest("</s>", 3, -0.0191651, true);
- BOOST_CHECK_EQUAL(0, state.length);
-
- state = preserve;
- AppendTest("more", 5, -0.00181395, true);
- BOOST_CHECK_EQUAL(4, state.length);
- AppendTest("loin", 5, -0.0432557, true);
- BOOST_CHECK_EQUAL(1, state.length);
-}
-
-template <class M> void Blanks(const M &model) {
- FullScoreReturn ret;
- State state(model.NullContextState());
- State out;
- AppendTest("also", 1, -1.687872, false);
- AppendTest("would", 2, -2, true);
- AppendTest("consider", 3, -3, true);
- State preserve = state;
- AppendTest("higher", 4, -4, true);
- AppendTest("looking", 5, -5, true);
- BOOST_CHECK_EQUAL(1, state.length);
-
- state = preserve;
- // also would consider not_found
- AppendTest("not_found", 1, -1.995635 - 7.0 - 0.30103, true);
-
- state = model.NullContextState();
- // higher looking is a blank.
- AppendTest("higher", 1, -1.509559, false);
- AppendTest("looking", 2, -1.285941 - 0.30103, false);
-
- State higher_looking = state;
-
- BOOST_CHECK_EQUAL(1, state.length);
- AppendTest("not_found", 1, -1.995635 - 0.4771212, true);
-
- state = higher_looking;
- // higher looking consider
- AppendTest("consider", 1, -1.687872 - 0.4771212, true);
-
- state = model.NullContextState();
- AppendTest("would", 1, -1.687872, false);
- BOOST_CHECK_EQUAL(1, state.length);
- AppendTest("consider", 2, -1.687872 -0.30103, false);
- BOOST_CHECK_EQUAL(2, state.length);
- AppendTest("higher", 3, -1.509559 - 0.30103, false);
- BOOST_CHECK_EQUAL(3, state.length);
- AppendTest("looking", 4, -1.285941 - 0.30103, false);
-}
-
-template <class M> void Unknowns(const M &model) {
- FullScoreReturn ret;
- State state(model.NullContextState());
- State out;
-
- AppendTest("not_found", 1, -1.995635, false);
- State preserve = state;
- AppendTest("not_found2", 2, -15.0, true);
- AppendTest("not_found3", 2, -15.0 - 2.0, true);
-
- state = preserve;
- AppendTest("however", 2, -4, true);
- AppendTest("not_found3", 3, -6, true);
-}
-
-template <class M> void MinimalState(const M &model) {
- FullScoreReturn ret;
- State state(model.NullContextState());
- State out;
-
- AppendTest("baz", 1, -6.535897, true);
- BOOST_CHECK_EQUAL(0, state.length);
- state = model.NullContextState();
- AppendTest("foo", 1, -3.141592, true);
- BOOST_CHECK_EQUAL(1, state.length);
- AppendTest("bar", 2, -6.0, true);
- // Has to include the backoff weight.
- BOOST_CHECK_EQUAL(1, state.length);
- AppendTest("bar", 1, -2.718281 + 3.0, true);
- BOOST_CHECK_EQUAL(1, state.length);
-
- state = model.NullContextState();
- AppendTest("to", 1, -1.687872, false);
- AppendTest("look", 2, -0.2922095, true);
- BOOST_CHECK_EQUAL(2, state.length);
- AppendTest("a", 3, -7, true);
-}
-
-template <class M> void ExtendLeftTest(const M &model) {
- State right;
- FullScoreReturn little(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("little"), right));
- const float kLittleProb = -1.285941;
- SLOPPY_CHECK_CLOSE(kLittleProb, little.prob, 0.001);
- unsigned char next_use;
- float backoff_out[4];
-
- FullScoreReturn extend_none(model.ExtendLeft(NULL, NULL, NULL, little.extend_left, 1, NULL, next_use));
- BOOST_CHECK_EQUAL(0, next_use);
- BOOST_CHECK_EQUAL(little.extend_left, extend_none.extend_left);
- SLOPPY_CHECK_CLOSE(little.prob - little.rest, extend_none.prob, 0.001);
- BOOST_CHECK_EQUAL(1, extend_none.ngram_length);
-
- const WordIndex a = model.GetVocabulary().Index("a");
- float backoff_in = 3.14;
- // a little
- FullScoreReturn extend_a(model.ExtendLeft(&a, &a + 1, &backoff_in, little.extend_left, 1, backoff_out, next_use));
- BOOST_CHECK_EQUAL(1, next_use);
- SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
- SLOPPY_CHECK_CLOSE(-0.09132547 - little.rest, extend_a.prob, 0.001);
- BOOST_CHECK_EQUAL(2, extend_a.ngram_length);
- BOOST_CHECK(!extend_a.independent_left);
-
- const WordIndex on = model.GetVocabulary().Index("on");
- FullScoreReturn extend_on(model.ExtendLeft(&on, &on + 1, &backoff_in, extend_a.extend_left, 2, backoff_out, next_use));
- BOOST_CHECK_EQUAL(1, next_use);
- SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001);
- SLOPPY_CHECK_CLOSE(-0.0283603 - (extend_a.rest + little.rest), extend_on.prob, 0.001);
- BOOST_CHECK_EQUAL(3, extend_on.ngram_length);
- BOOST_CHECK(!extend_on.independent_left);
-
- const WordIndex both[2] = {a, on};
- float backoff_in_arr[4];
- FullScoreReturn extend_both(model.ExtendLeft(both, both + 2, backoff_in_arr, little.extend_left, 1, backoff_out, next_use));
- BOOST_CHECK_EQUAL(2, next_use);
- SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
- SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001);
- SLOPPY_CHECK_CLOSE(-0.0283603 - little.rest, extend_both.prob, 0.001);
- BOOST_CHECK_EQUAL(3, extend_both.ngram_length);
- BOOST_CHECK(!extend_both.independent_left);
- BOOST_CHECK_EQUAL(extend_on.extend_left, extend_both.extend_left);
-}
-
-#define StatelessTest(word, provide, ngram, score) \
- ret = model.FullScoreForgotState(indices + num_words - word, indices + num_words - word + provide, indices[num_words - word - 1], state); \
- SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
- BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
- model.GetState(indices + num_words - word, indices + num_words - word + provide, before); \
- ret = model.FullScore(before, indices[num_words - word - 1], out); \
- BOOST_CHECK(state == out); \
- SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
- BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length);
-
-template <class M> void Stateless(const M &model) {
- const char *words[] = {"<s>", "looking", "on", "a", "little", "the", "biarritz", "not_found", "more", ".", "</s>"};
- const size_t num_words = sizeof(words) / sizeof(const char*);
- // Silience "array subscript is above array bounds" when extracting end pointer.
- WordIndex indices[num_words + 1];
- for (unsigned int i = 0; i < num_words; ++i) {
- indices[num_words - 1 - i] = model.GetVocabulary().Index(words[i]);
- }
- FullScoreReturn ret;
- State state, out, before;
-
- ret = model.FullScoreForgotState(indices + num_words - 1, indices + num_words, indices[num_words - 2], state);
- SLOPPY_CHECK_CLOSE(-0.484652, ret.prob, 0.001);
- StatelessTest(1, 1, 2, -0.484652);
-
- // looking
- StatelessTest(1, 2, 2, -0.484652);
- // on
- AppendTest("on", 3, -0.348837, true);
- StatelessTest(2, 3, 3, -0.348837);
- StatelessTest(2, 2, 3, -0.348837);
- StatelessTest(2, 1, 2, -0.4638903);
- // a
- StatelessTest(3, 4, 4, -0.0155266);
- // little
- AppendTest("little", 5, -0.00306122, true);
- StatelessTest(4, 5, 5, -0.00306122);
- // the
- AppendTest("the", 1, -4.04005, true);
- StatelessTest(5, 5, 1, -4.04005);
- // No context of the.
- StatelessTest(5, 0, 1, -1.687872);
- // biarritz
- StatelessTest(6, 1, 1, -1.9889);
- // not found
- StatelessTest(7, 1, 1, -2.29666);
- StatelessTest(7, 0, 1, -1.995635);
-
- WordIndex unk[1];
- unk[0] = 0;
- model.GetState(unk, unk + 1, state);
- BOOST_CHECK_EQUAL(1, state.length);
- BOOST_CHECK_EQUAL(static_cast<WordIndex>(0), state.words[0]);
-}
-
-template <class M> void NoUnkCheck(const M &model) {
- WordIndex unk_index = 0;
- State state;
-
- FullScoreReturn ret = model.FullScoreForgotState(&unk_index, &unk_index + 1, unk_index, state);
- SLOPPY_CHECK_CLOSE(-100.0, ret.prob, 0.001);
-}
-
-template <class M> void Everything(const M &m) {
- Starters(m);
- Continuation(m);
- Blanks(m);
- Unknowns(m);
- MinimalState(m);
- ExtendLeftTest(m);
- Stateless(m);
-}
-
-class ExpectEnumerateVocab : public EnumerateVocab {
- public:
- ExpectEnumerateVocab() {}
-
- void Add(WordIndex index, const StringPiece &str) {
- BOOST_CHECK_EQUAL(seen.size(), index);
- seen.push_back(std::string(str.data(), str.length()));
- }
-
- void Check(const base::Vocabulary &vocab) {
- BOOST_CHECK_EQUAL(37ULL, seen.size());
- BOOST_REQUIRE(!seen.empty());
- BOOST_CHECK_EQUAL("<unk>", seen[0]);
- for (WordIndex i = 0; i < seen.size(); ++i) {
- BOOST_CHECK_EQUAL(i, vocab.Index(seen[i]));
- }
- }
-
- void Clear() {
- seen.clear();
- }
-
- std::vector<std::string> seen;
-};
-
-template <class ModelT> void LoadingTest() {
- Config config;
- config.arpa_complain = Config::NONE;
- config.messages = NULL;
- config.probing_multiplier = 2.0;
- {
- ExpectEnumerateVocab enumerate;
- config.enumerate_vocab = &enumerate;
- ModelT m(TestLocation(), config);
- enumerate.Check(m.GetVocabulary());
- BOOST_CHECK_EQUAL((WordIndex)37, m.GetVocabulary().Bound());
- Everything(m);
- }
- {
- ExpectEnumerateVocab enumerate;
- config.enumerate_vocab = &enumerate;
- ModelT m(TestNoUnkLocation(), config);
- enumerate.Check(m.GetVocabulary());
- BOOST_CHECK_EQUAL((WordIndex)37, m.GetVocabulary().Bound());
- NoUnkCheck(m);
- }
-}
-
-BOOST_AUTO_TEST_CASE(probing) {
- LoadingTest<Model>();
-}
-BOOST_AUTO_TEST_CASE(trie) {
- LoadingTest<TrieModel>();
-}
-BOOST_AUTO_TEST_CASE(quant_trie) {
- LoadingTest<QuantTrieModel>();
-}
-BOOST_AUTO_TEST_CASE(bhiksha_trie) {
- LoadingTest<ArrayTrieModel>();
-}
-BOOST_AUTO_TEST_CASE(quant_bhiksha_trie) {
- LoadingTest<QuantArrayTrieModel>();
-}
-
-template <class ModelT> void BinaryTest(Config::WriteMethod write_method) {
- Config config;
- config.write_mmap = "test.binary";
- config.messages = NULL;
- config.write_method = write_method;
- ExpectEnumerateVocab enumerate;
- config.enumerate_vocab = &enumerate;
-
- {
- ModelT copy_model(TestLocation(), config);
- enumerate.Check(copy_model.GetVocabulary());
- enumerate.Clear();
- Everything(copy_model);
- }
-
- config.write_mmap = NULL;
-
- ModelType type;
- BOOST_REQUIRE(RecognizeBinary("test.binary", type));
- BOOST_CHECK_EQUAL(ModelT::kModelType, type);
-
- {
- ModelT binary("test.binary", config);
- enumerate.Check(binary.GetVocabulary());
- Everything(binary);
- }
- unlink("test.binary");
-
- // Now test without <unk>.
- config.write_mmap = "test_nounk.binary";
- config.messages = NULL;
- enumerate.Clear();
- {
- ModelT copy_model(TestNoUnkLocation(), config);
- enumerate.Check(copy_model.GetVocabulary());
- enumerate.Clear();
- NoUnkCheck(copy_model);
- }
- config.write_mmap = NULL;
- {
- ModelT binary(TestNoUnkLocation(), config);
- enumerate.Check(binary.GetVocabulary());
- NoUnkCheck(binary);
- }
- unlink("test_nounk.binary");
-}
-
-template <class ModelT> void BinaryTest() {
- BinaryTest<ModelT>(Config::WRITE_MMAP);
- BinaryTest<ModelT>(Config::WRITE_AFTER);
-}
-
-BOOST_AUTO_TEST_CASE(write_and_read_probing) {
- BinaryTest<ProbingModel>();
-}
-BOOST_AUTO_TEST_CASE(write_and_read_rest_probing) {
- BinaryTest<RestProbingModel>();
-}
-BOOST_AUTO_TEST_CASE(write_and_read_trie) {
- BinaryTest<TrieModel>();
-}
-BOOST_AUTO_TEST_CASE(write_and_read_quant_trie) {
- BinaryTest<QuantTrieModel>();
-}
-BOOST_AUTO_TEST_CASE(write_and_read_array_trie) {
- BinaryTest<ArrayTrieModel>();
-}
-BOOST_AUTO_TEST_CASE(write_and_read_quant_array_trie) {
- BinaryTest<QuantArrayTrieModel>();
-}
-
-BOOST_AUTO_TEST_CASE(rest_max) {
- Config config;
- config.arpa_complain = Config::NONE;
- config.messages = NULL;
-
- RestProbingModel model(TestLocation(), config);
- State state, out;
- FullScoreReturn ret(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("."), state));
- SLOPPY_CHECK_CLOSE(-0.2705918, ret.rest, 0.001);
- SLOPPY_CHECK_CLOSE(-0.01916512, model.FullScore(state, model.GetVocabulary().EndSentence(), out).rest, 0.001);
-}
-
-} // namespace
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/model_type.hh b/src/joshua/decoder/ff/lm/kenlm/lm/model_type.hh
deleted file mode 100644
index fbe1117..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/model_type.hh
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef LM_MODEL_TYPE_H
-#define LM_MODEL_TYPE_H
-
-namespace lm {
-namespace ngram {
-
-/* Not the best numbering system, but it grew this way for historical reasons
- * and I want to preserve existing binary files. */
-typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType;
-
-// Historical names.
-const ModelType HASH_PROBING = PROBING;
-const ModelType TRIE_SORTED = TRIE;
-const ModelType QUANT_TRIE_SORTED = QUANT_TRIE;
-const ModelType ARRAY_TRIE_SORTED = ARRAY_TRIE;
-const ModelType QUANT_ARRAY_TRIE_SORTED = QUANT_ARRAY_TRIE;
-
-const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE - TRIE);
-const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE - TRIE);
-
-} // namespace ngram
-} // namespace lm
-#endif // LM_MODEL_TYPE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/ngram_query.hh b/src/joshua/decoder/ff/lm/kenlm/lm/ngram_query.hh
deleted file mode 100644
index 5f330c5..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/ngram_query.hh
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef LM_NGRAM_QUERY_H
-#define LM_NGRAM_QUERY_H
-
-#include "lm/enumerate_vocab.hh"
-#include "lm/model.hh"
-#include "util/file_piece.hh"
-#include "util/usage.hh"
-
-#include <cstdlib>
-#include <iostream>
-#include <ostream>
-#include <istream>
-#include <string>
-
-#include <math.h>
-
-namespace lm {
-namespace ngram {
-
-struct BasicPrint {
- void Word(StringPiece, WordIndex, const FullScoreReturn &) const {}
- void Line(uint64_t oov, float total) const {
- std::cout << "Total: " << total << " OOV: " << oov << '\n';
- }
- void Summary(double, double, uint64_t, uint64_t) {}
-
-};
-
-struct FullPrint : public BasicPrint {
- void Word(StringPiece surface, WordIndex vocab, const FullScoreReturn &ret) const {
- std::cout << surface << '=' << vocab << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
- }
-
- void Summary(double ppl_including_oov, double ppl_excluding_oov, uint64_t corpus_oov, uint64_t corpus_tokens) {
- std::cout <<
- "Perplexity including OOVs:\t" << ppl_including_oov << "\n"
- "Perplexity excluding OOVs:\t" << ppl_excluding_oov << "\n"
- "OOVs:\t" << corpus_oov << "\n"
- "Tokens:\t" << corpus_tokens << '\n'
- ;
- }
-};
-
-template <class Model, class Printer> void Query(const Model &model, bool sentence_context) {
- Printer printer;
- typename Model::State state, out;
- lm::FullScoreReturn ret;
- StringPiece word;
-
- util::FilePiece in(0);
-
- double corpus_total = 0.0;
- double corpus_total_oov_only = 0.0;
- uint64_t corpus_oov = 0;
- uint64_t corpus_tokens = 0;
-
- while (true) {
- state = sentence_context ? model.BeginSentenceState() : model.NullContextState();
- float total = 0.0;
- uint64_t oov = 0;
-
- while (in.ReadWordSameLine(word)) {
- lm::WordIndex vocab = model.GetVocabulary().Index(word);
- ret = model.FullScore(state, vocab, out);
- if (vocab == model.GetVocabulary().NotFound()) {
- ++oov;
- corpus_total_oov_only += ret.prob;
- }
- total += ret.prob;
- printer.Word(word, vocab, ret);
- ++corpus_tokens;
- state = out;
- }
- // If people don't have a newline after their last query, this won't add a </s>.
- // Sue me.
- try {
- UTIL_THROW_IF('\n' != in.get(), util::Exception, "FilePiece is confused.");
- } catch (const util::EndOfFileException &e) { break; }
- if (sentence_context) {
- ret = model.FullScore(state, model.GetVocabulary().EndSentence(), out);
- total += ret.prob;
- ++corpus_tokens;
- printer.Word("</s>", model.GetVocabulary().EndSentence(), ret);
- }
- printer.Line(oov, total);
- corpus_total += total;
- corpus_oov += oov;
- }
- printer.Summary(
- pow(10.0, -(corpus_total / static_cast<double>(corpus_tokens))), // PPL including OOVs
- pow(10.0, -((corpus_total - corpus_total_oov_only) / static_cast<double>(corpus_tokens - corpus_oov))), // PPL excluding OOVs
- corpus_oov,
- corpus_tokens);
-}
-
-template <class Model> void Query(const char *file, const Config &config, bool sentence_context, bool show_words) {
- Model model(file, config);
- if (show_words) {
- Query<Model, FullPrint>(model, sentence_context);
- } else {
- Query<Model, BasicPrint>(model, sentence_context);
- }
-}
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_NGRAM_QUERY_H
-
-
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/partial.hh b/src/joshua/decoder/ff/lm/kenlm/lm/partial.hh
deleted file mode 100644
index d8adc69..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/partial.hh
+++ /dev/null
@@ -1,167 +0,0 @@
-#ifndef LM_PARTIAL_H
-#define LM_PARTIAL_H
-
-#include "lm/return.hh"
-#include "lm/state.hh"
-
-#include <algorithm>
-
-#include <assert.h>
-
-namespace lm {
-namespace ngram {
-
-struct ExtendReturn {
- float adjust;
- bool make_full;
- unsigned char next_use;
-};
-
-template <class Model> ExtendReturn ExtendLoop(
- const Model &model,
- unsigned char seen, const WordIndex *add_rbegin, const WordIndex *add_rend, const float *backoff_start,
- const uint64_t *pointers, const uint64_t *pointers_end,
- uint64_t *&pointers_write,
- float *backoff_write) {
- unsigned char add_length = add_rend - add_rbegin;
-
- float backoff_buf[2][KENLM_MAX_ORDER - 1];
- float *backoff_in = backoff_buf[0], *backoff_out = backoff_buf[1];
- std::copy(backoff_start, backoff_start + add_length, backoff_in);
-
- ExtendReturn value;
- value.make_full = false;
- value.adjust = 0.0;
- value.next_use = add_length;
-
- unsigned char i = 0;
- unsigned char length = pointers_end - pointers;
- // pointers_write is NULL means that the existing left state is full, so we should use completed probabilities.
- if (pointers_write) {
- // Using full context, writing to new left state.
- for (; i < length; ++i) {
- FullScoreReturn ret(model.ExtendLeft(
- add_rbegin, add_rbegin + value.next_use,
- backoff_in,
- pointers[i], i + seen + 1,
- backoff_out,
- value.next_use));
- std::swap(backoff_in, backoff_out);
- if (ret.independent_left) {
- value.adjust += ret.prob;
- value.make_full = true;
- ++i;
- break;
- }
- value.adjust += ret.rest;
- *pointers_write++ = ret.extend_left;
- if (value.next_use != add_length) {
- value.make_full = true;
- ++i;
- break;
- }
- }
- }
- // Using some of the new context.
- for (; i < length && value.next_use; ++i) {
- FullScoreReturn ret(model.ExtendLeft(
- add_rbegin, add_rbegin + value.next_use,
- backoff_in,
- pointers[i], i + seen + 1,
- backoff_out,
- value.next_use));
- std::swap(backoff_in, backoff_out);
- value.adjust += ret.prob;
- }
- float unrest = model.UnRest(pointers + i, pointers_end, i + seen + 1);
- // Using none of the new context.
- value.adjust += unrest;
-
- std::copy(backoff_in, backoff_in + value.next_use, backoff_write);
- return value;
-}
-
-template <class Model> float RevealBefore(const Model &model, const Right &reveal, const unsigned char seen, bool reveal_full, Left &left, Right &right) {
- assert(seen < reveal.length || reveal_full);
- uint64_t *pointers_write = reveal_full ? NULL : left.pointers;
- float backoff_buffer[KENLM_MAX_ORDER - 1];
- ExtendReturn value(ExtendLoop(
- model,
- seen, reveal.words + seen, reveal.words + reveal.length, reveal.backoff + seen,
- left.pointers, left.pointers + left.length,
- pointers_write,
- left.full ? backoff_buffer : (right.backoff + right.length)));
- if (reveal_full) {
- left.length = 0;
- value.make_full = true;
- } else {
- left.length = pointers_write - left.pointers;
- value.make_full |= (left.length == model.Order() - 1);
- }
- if (left.full) {
- for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i];
- } else {
- // If left wasn't full when it came in, put words into right state.
- std::copy(reveal.words + seen, reveal.words + seen + value.next_use, right.words + right.length);
- right.length += value.next_use;
- left.full = value.make_full || (right.length == model.Order() - 1);
- }
- return value.adjust;
-}
-
-template <class Model> float RevealAfter(const Model &model, Left &left, Right &right, const Left &reveal, unsigned char seen) {
- assert(seen < reveal.length || reveal.full);
- uint64_t *pointers_write = left.full ? NULL : (left.pointers + left.length);
- ExtendReturn value(ExtendLoop(
- model,
- seen, right.words, right.words + right.length, right.backoff,
- reveal.pointers + seen, reveal.pointers + reveal.length,
- pointers_write,
- right.backoff));
- if (reveal.full) {
- for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += right.backoff[i];
- right.length = 0;
- value.make_full = true;
- } else {
- right.length = value.next_use;
- value.make_full |= (right.length == model.Order() - 1);
- }
- if (!left.full) {
- left.length = pointers_write - left.pointers;
- left.full = value.make_full || (left.length == model.Order() - 1);
- }
- return value.adjust;
-}
-
-template <class Model> float Subsume(const Model &model, Left &first_left, const Right &first_right, const Left &second_left, Right &second_right, const unsigned int between_length) {
- assert(first_right.length < KENLM_MAX_ORDER);
- assert(second_left.length < KENLM_MAX_ORDER);
- assert(between_length < KENLM_MAX_ORDER - 1);
- uint64_t *pointers_write = first_left.full ? NULL : (first_left.pointers + first_left.length);
- float backoff_buffer[KENLM_MAX_ORDER - 1];
- ExtendReturn value(ExtendLoop(
- model,
- between_length, first_right.words, first_right.words + first_right.length, first_right.backoff,
- second_left.pointers, second_left.pointers + second_left.length,
- pointers_write,
- second_left.full ? backoff_buffer : (second_right.backoff + second_right.length)));
- if (second_left.full) {
- for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i];
- } else {
- std::copy(first_right.words, first_right.words + value.next_use, second_right.words + second_right.length);
- second_right.length += value.next_use;
- value.make_full |= (second_right.length == model.Order() - 1);
- }
- if (!first_left.full) {
- first_left.length = pointers_write - first_left.pointers;
- first_left.full = value.make_full || second_left.full || (first_left.length == model.Order() - 1);
- }
- assert(first_left.length < KENLM_MAX_ORDER);
- assert(second_right.length < KENLM_MAX_ORDER);
- return value.adjust;
-}
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_PARTIAL_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/partial_test.cc b/src/joshua/decoder/ff/lm/kenlm/lm/partial_test.cc
deleted file mode 100644
index 8d309c8..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/partial_test.cc
+++ /dev/null
@@ -1,199 +0,0 @@
-#include "lm/partial.hh"
-
-#include "lm/left.hh"
-#include "lm/model.hh"
-#include "util/tokenize_piece.hh"
-
-#define BOOST_TEST_MODULE PartialTest
-#include <boost/test/unit_test.hpp>
-#include <boost/test/floating_point_comparison.hpp>
-
-namespace lm {
-namespace ngram {
-namespace {
-
-const char *TestLocation() {
- if (boost::unit_test::framework::master_test_suite().argc < 2) {
- return "test.arpa";
- }
- return boost::unit_test::framework::master_test_suite().argv[1];
-}
-
-Config SilentConfig() {
- Config config;
- config.arpa_complain = Config::NONE;
- config.messages = NULL;
- return config;
-}
-
-struct ModelFixture {
- ModelFixture() : m(TestLocation(), SilentConfig()) {}
-
- RestProbingModel m;
-};
-
-BOOST_FIXTURE_TEST_SUITE(suite, ModelFixture)
-
-BOOST_AUTO_TEST_CASE(SimpleBefore) {
- Left left;
- left.full = false;
- left.length = 0;
- Right right;
- right.length = 0;
-
- Right reveal;
- reveal.length = 1;
- WordIndex period = m.GetVocabulary().Index(".");
- reveal.words[0] = period;
- reveal.backoff[0] = -0.845098;
-
- BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 0, false, left, right), 0.001);
- BOOST_CHECK_EQUAL(0, left.length);
- BOOST_CHECK(!left.full);
- BOOST_CHECK_EQUAL(1, right.length);
- BOOST_CHECK_EQUAL(period, right.words[0]);
- BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001);
-
- WordIndex more = m.GetVocabulary().Index("more");
- reveal.words[1] = more;
- reveal.backoff[1] = -0.4771212;
- reveal.length = 2;
- BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 1, false, left, right), 0.001);
- BOOST_CHECK_EQUAL(0, left.length);
- BOOST_CHECK(!left.full);
- BOOST_CHECK_EQUAL(2, right.length);
- BOOST_CHECK_EQUAL(period, right.words[0]);
- BOOST_CHECK_EQUAL(more, right.words[1]);
- BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001);
- BOOST_CHECK_CLOSE(-0.4771212, right.backoff[1], 0.001);
-}
-
-BOOST_AUTO_TEST_CASE(AlsoWouldConsider) {
- WordIndex would = m.GetVocabulary().Index("would");
- WordIndex consider = m.GetVocabulary().Index("consider");
-
- ChartState current;
- current.left.length = 1;
- current.left.pointers[0] = would;
- current.left.full = false;
- current.right.length = 1;
- current.right.words[0] = would;
- current.right.backoff[0] = -0.30103;
-
- Left after;
- after.full = false;
- after.length = 1;
- after.pointers[0] = consider;
-
- // adjustment for would consider
- BOOST_CHECK_CLOSE(-1.687872 - -0.2922095 - 0.30103, RevealAfter(m, current.left, current.right, after, 0), 0.001);
-
- BOOST_CHECK_EQUAL(2, current.left.length);
- BOOST_CHECK_EQUAL(would, current.left.pointers[0]);
- BOOST_CHECK_EQUAL(false, current.left.full);
-
- WordIndex also = m.GetVocabulary().Index("also");
- Right before;
- before.length = 1;
- before.words[0] = also;
- before.backoff[0] = -0.30103;
- // r(would) = -0.2922095 [i would], r(would -> consider) = -1.988902 [b(would) + p(consider)]
- // p(also -> would) = -2, p(also would -> consider) = -3
- BOOST_CHECK_CLOSE(-2 + 0.2922095 -3 + 1.988902, RevealBefore(m, before, 0, false, current.left, current.right), 0.001);
- BOOST_CHECK_EQUAL(0, current.left.length);
- BOOST_CHECK(current.left.full);
- BOOST_CHECK_EQUAL(2, current.right.length);
- BOOST_CHECK_EQUAL(would, current.right.words[0]);
- BOOST_CHECK_EQUAL(also, current.right.words[1]);
-}
-
-BOOST_AUTO_TEST_CASE(EndSentence) {
- WordIndex loin = m.GetVocabulary().Index("loin");
- WordIndex period = m.GetVocabulary().Index(".");
- WordIndex eos = m.GetVocabulary().EndSentence();
-
- ChartState between;
- between.left.length = 1;
- between.left.pointers[0] = eos;
- between.left.full = true;
- between.right.length = 0;
-
- Right before;
- before.words[0] = period;
- before.words[1] = loin;
- before.backoff[0] = -0.845098;
- before.backoff[1] = 0.0;
-
- before.length = 1;
- BOOST_CHECK_CLOSE(-0.0410707, RevealBefore(m, before, 0, true, between.left, between.right), 0.001);
- BOOST_CHECK_EQUAL(0, between.left.length);
-}
-
-float ScoreFragment(const RestProbingModel &model, unsigned int *begin, unsigned int *end, ChartState &out) {
- RuleScore<RestProbingModel> scorer(model, out);
- for (unsigned int *i = begin; i < end; ++i) {
- scorer.Terminal(*i);
- }
- return scorer.Finish();
-}
-
-void CheckAdjustment(const RestProbingModel &model, float expect, const Right &before_in, bool before_full, ChartState between, const Left &after_in) {
- Right before(before_in);
- Left after(after_in);
- after.full = false;
- float got = 0.0;
- for (unsigned int i = 1; i < 5; ++i) {
- if (before_in.length >= i) {
- before.length = i;
- got += RevealBefore(model, before, i - 1, false, between.left, between.right);
- }
- if (after_in.length >= i) {
- after.length = i;
- got += RevealAfter(model, between.left, between.right, after, i - 1);
- }
- }
- if (after_in.full) {
- after.full = true;
- got += RevealAfter(model, between.left, between.right, after, after.length);
- }
- if (before_full) {
- got += RevealBefore(model, before, before.length, true, between.left, between.right);
- }
- // Sometimes they're zero and BOOST_CHECK_CLOSE fails for this.
- BOOST_CHECK(fabs(expect - got) < 0.001);
-}
-
-void FullDivide(const RestProbingModel &model, StringPiece str) {
- std::vector<WordIndex> indices;
- for (util::TokenIter<util::SingleCharacter, true> i(str, ' '); i; ++i) {
- indices.push_back(model.GetVocabulary().Index(*i));
- }
- ChartState full_state;
- float full = ScoreFragment(model, &indices.front(), &indices.back() + 1, full_state);
-
- ChartState before_state;
- before_state.left.full = false;
- RuleScore<RestProbingModel> before_scorer(model, before_state);
- float before_score = 0.0;
- for (unsigned int before = 0; before < indices.size(); ++before) {
- for (unsigned int after = before; after <= indices.size(); ++after) {
- ChartState after_state, between_state;
- float after_score = ScoreFragment(model, &indices.front() + after, &indices.front() + indices.size(), after_state);
- float between_score = ScoreFragment(model, &indices.front() + before, &indices.front() + after, between_state);
- CheckAdjustment(model, full - before_score - after_score - between_score, before_state.right, before_state.left.full, between_state, after_state.left);
- }
- before_scorer.Terminal(indices[before]);
- before_score = before_scorer.Finish();
- }
-}
-
-BOOST_AUTO_TEST_CASE(Strings) {
- FullDivide(m, "also would consider");
- FullDivide(m, "looking on a little more loin . </s>");
- FullDivide(m, "in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-} // namespace
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/quantize.cc b/src/joshua/decoder/ff/lm/kenlm/lm/quantize.cc
deleted file mode 100644
index 273ea39..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/quantize.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Quantize into bins of equal size as described in
- * M. Federico and N. Bertoldi. 2006. How many bits are needed
- * to store probabilities for phrase-based translation? In Proc.
- * of the Workshop on Statistical Machine Translation, pages
- * 94–101, New York City, June. Association for Computa-
- * tional Linguistics.
- */
-
-#include "lm/quantize.hh"
-
-#include "lm/binary_format.hh"
-#include "lm/lm_exception.hh"
-#include "util/file.hh"
-
-#include <algorithm>
-#include <numeric>
-
-namespace lm {
-namespace ngram {
-
-namespace {
-
-void MakeBins(std::vector<float> &values, float *centers, uint32_t bins) {
- std::sort(values.begin(), values.end());
- std::vector<float>::const_iterator start = values.begin(), finish;
- for (uint32_t i = 0; i < bins; ++i, ++centers, start = finish) {
- finish = values.begin() + ((values.size() * static_cast<uint64_t>(i + 1)) / bins);
- if (finish == start) {
- // zero length bucket.
- *centers = i ? *(centers - 1) : -std::numeric_limits<float>::infinity();
- } else {
- *centers = std::accumulate(start, finish, 0.0) / static_cast<float>(finish - start);
- }
- }
-}
-
-const char kSeparatelyQuantizeVersion = 2;
-
-} // namespace
-
-void SeparatelyQuantize::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
- unsigned char buffer[3];
- file.ReadForConfig(buffer, 3, offset);
- char version = buffer[0];
- config.prob_bits = buffer[1];
- config.backoff_bits = buffer[2];
- if (version != kSeparatelyQuantizeVersion) UTIL_THROW(FormatLoadException, "This file has quantization version " << (unsigned)version << " but the code expects version " << (unsigned)kSeparatelyQuantizeVersion);
-}
-
-void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) {
- prob_bits_ = config.prob_bits;
- backoff_bits_ = config.backoff_bits;
- // We need the reserved values.
- if (config.prob_bits == 0) UTIL_THROW(ConfigException, "You can't quantize probability to zero");
- if (config.backoff_bits == 0) UTIL_THROW(ConfigException, "You can't quantize backoff to zero");
- if (config.prob_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing probability supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.prob_bits) << " bits.");
- if (config.backoff_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing backoff supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.backoff_bits) << " bits.");
- // Reserve 8 byte header for bit counts.
- actual_base_ = static_cast<uint8_t*>(base);
- float *start = reinterpret_cast<float*>(actual_base_ + 8);
- for (unsigned char i = 0; i < order - 2; ++i) {
- tables_[i][0] = Bins(prob_bits_, start);
- start += (1ULL << prob_bits_);
- tables_[i][1] = Bins(backoff_bits_, start);
- start += (1ULL << backoff_bits_);
- }
- longest_ = tables_[order - 2][0] = Bins(prob_bits_, start);
-}
-
-void SeparatelyQuantize::Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff) {
- TrainProb(order, prob);
-
- // Backoff
- float *centers = tables_[order - 2][1].Populate();
- *(centers++) = kNoExtensionBackoff;
- *(centers++) = kExtensionBackoff;
- MakeBins(backoff, centers, (1ULL << backoff_bits_) - 2);
-}
-
-void SeparatelyQuantize::TrainProb(uint8_t order, std::vector<float> &prob) {
- float *centers = tables_[order - 2][0].Populate();
- MakeBins(prob, centers, (1ULL << prob_bits_));
-}
-
-void SeparatelyQuantize::FinishedLoading(const Config &config) {
- uint8_t *actual_base = actual_base_;
- *(actual_base++) = kSeparatelyQuantizeVersion; // version
- *(actual_base++) = config.prob_bits;
- *(actual_base++) = config.backoff_bits;
-}
-
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/quantize.hh b/src/joshua/decoder/ff/lm/kenlm/lm/quantize.hh
deleted file mode 100644
index 84a3087..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/quantize.hh
+++ /dev/null
@@ -1,233 +0,0 @@
-#ifndef LM_QUANTIZE_H
-#define LM_QUANTIZE_H
-
-#include "lm/blank.hh"
-#include "lm/config.hh"
-#include "lm/max_order.hh"
-#include "lm/model_type.hh"
-#include "util/bit_packing.hh"
-
-#include <algorithm>
-#include <vector>
-
-#include <stdint.h>
-
-#include <iostream>
-
-namespace lm {
-namespace ngram {
-
-struct Config;
-class BinaryFormat;
-
-/* Store values directly and don't quantize. */
-class DontQuantize {
- public:
- static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
- static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &) {}
- static uint64_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; }
- static uint8_t MiddleBits(const Config &/*config*/) { return 63; }
- static uint8_t LongestBits(const Config &/*config*/) { return 31; }
-
- class MiddlePointer {
- public:
- MiddlePointer(const DontQuantize & /*quant*/, unsigned char /*order_minus_2*/, util::BitAddress address) : address_(address) {}
-
- MiddlePointer() : address_(NULL, 0) {}
-
- bool Found() const {
- return address_.base != NULL;
- }
-
- float Prob() const {
- return util::ReadNonPositiveFloat31(address_.base, address_.offset);
- }
-
- float Backoff() const {
- return util::ReadFloat32(address_.base, address_.offset + 31);
- }
-
- float Rest() const { return Prob(); }
-
- void Write(float prob, float backoff) {
- util::WriteNonPositiveFloat31(address_.base, address_.offset, prob);
- util::WriteFloat32(address_.base, address_.offset + 31, backoff);
- }
-
- private:
- util::BitAddress address_;
- };
-
- class LongestPointer {
- public:
- explicit LongestPointer(const DontQuantize &/*quant*/, util::BitAddress address) : address_(address) {}
-
- LongestPointer() : address_(NULL, 0) {}
-
- bool Found() const {
- return address_.base != NULL;
- }
-
- float Prob() const {
- return util::ReadNonPositiveFloat31(address_.base, address_.offset);
- }
-
- void Write(float prob) {
- util::WriteNonPositiveFloat31(address_.base, address_.offset, prob);
- }
-
- private:
- util::BitAddress address_;
- };
-
- DontQuantize() {}
-
- void SetupMemory(void * /*start*/, unsigned char /*order*/, const Config & /*config*/) {}
-
- static const bool kTrain = false;
- // These should never be called because kTrain is false.
- void Train(uint8_t /*order*/, std::vector<float> &/*prob*/, std::vector<float> &/*backoff*/) {}
- void TrainProb(uint8_t, std::vector<float> &/*prob*/) {}
-
- void FinishedLoading(const Config &) {}
-};
-
-class SeparatelyQuantize {
- private:
- class Bins {
- public:
- // Sigh C++ default constructor
- Bins() {}
-
- Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + (1ULL << bits)), bits_(bits), mask_((1ULL << bits) - 1) {}
-
- float *Populate() { return begin_; }
-
- uint64_t EncodeProb(float value) const {
- return Encode(value, 0);
- }
-
- uint64_t EncodeBackoff(float value) const {
- if (value == 0.0) {
- return HasExtension(value) ? kExtensionQuant : kNoExtensionQuant;
- }
- return Encode(value, 2);
- }
-
- float Decode(std::size_t off) const { return begin_[off]; }
-
- uint8_t Bits() const { return bits_; }
-
- uint64_t Mask() const { return mask_; }
-
- private:
- uint64_t Encode(float value, size_t reserved) const {
- const float *above = std::lower_bound(static_cast<const float*>(begin_) + reserved, end_, value);
- if (above == begin_ + reserved) return reserved;
- if (above == end_) return end_ - begin_ - 1;
- return above - begin_ - (value - *(above - 1) < *above - value);
- }
-
- float *begin_;
- const float *end_;
- uint8_t bits_;
- uint64_t mask_;
- };
-
- public:
- static const ModelType kModelTypeAdd = kQuantAdd;
-
- static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
-
- static uint64_t Size(uint8_t order, const Config &config) {
- uint64_t longest_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.prob_bits)) * sizeof(float);
- uint64_t middle_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.backoff_bits)) * sizeof(float) + longest_table;
- // unigrams are currently not quantized so no need for a table.
- return (order - 2) * middle_table + longest_table + /* for the bit counts and alignment padding) */ 8;
- }
-
- static uint8_t MiddleBits(const Config &config) { return config.prob_bits + config.backoff_bits; }
- static uint8_t LongestBits(const Config &config) { return config.prob_bits; }
-
- class MiddlePointer {
- public:
- MiddlePointer(const SeparatelyQuantize &quant, unsigned char order_minus_2, const util::BitAddress &address) : bins_(quant.GetTables(order_minus_2)), address_(address) {}
-
- MiddlePointer() : address_(NULL, 0) {}
-
- bool Found() const { return address_.base != NULL; }
-
- float Prob() const {
- return ProbBins().Decode(util::ReadInt25(address_.base, address_.offset + BackoffBins().Bits(), ProbBins().Bits(), ProbBins().Mask()));
- }
-
- float Backoff() const {
- return BackoffBins().Decode(util::ReadInt25(address_.base, address_.offset, BackoffBins().Bits(), BackoffBins().Mask()));
- }
-
- float Rest() const { return Prob(); }
-
- void Write(float prob, float backoff) const {
- util::WriteInt57(address_.base, address_.offset, ProbBins().Bits() + BackoffBins().Bits(),
- (ProbBins().EncodeProb(prob) << BackoffBins().Bits()) | BackoffBins().EncodeBackoff(backoff));
- }
-
- private:
- const Bins &ProbBins() const { return bins_[0]; }
- const Bins &BackoffBins() const { return bins_[1]; }
- const Bins *bins_;
-
- util::BitAddress address_;
- };
-
- class LongestPointer {
- public:
- LongestPointer(const SeparatelyQuantize &quant, const util::BitAddress &address) : table_(&quant.LongestTable()), address_(address) {}
-
- LongestPointer() : address_(NULL, 0) {}
-
- bool Found() const { return address_.base != NULL; }
-
- void Write(float prob) const {
- util::WriteInt25(address_.base, address_.offset, table_->Bits(), table_->EncodeProb(prob));
- }
-
- float Prob() const {
- return table_->Decode(util::ReadInt25(address_.base, address_.offset, table_->Bits(), table_->Mask()));
- }
-
- private:
- const Bins *table_;
- util::BitAddress address_;
- };
-
- SeparatelyQuantize() {}
-
- void SetupMemory(void *start, unsigned char order, const Config &config);
-
- static const bool kTrain = true;
- // Assumes 0.0 is removed from backoff.
- void Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff);
- // Train just probabilities (for longest order).
- void TrainProb(uint8_t order, std::vector<float> &prob);
-
- void FinishedLoading(const Config &config);
-
- const Bins *GetTables(unsigned char order_minus_2) const { return tables_[order_minus_2]; }
-
- const Bins &LongestTable() const { return longest_; }
-
- private:
- Bins tables_[KENLM_MAX_ORDER - 1][2];
-
- Bins longest_;
-
- uint8_t *actual_base_;
-
- uint8_t prob_bits_, backoff_bits_;
-};
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_QUANTIZE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/query_main.cc b/src/joshua/decoder/ff/lm/kenlm/lm/query_main.cc
deleted file mode 100644
index 3013ff2..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/query_main.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "lm/ngram_query.hh"
-#include "util/getopt.hh"
-
-#ifdef WITH_NPLM
-#include "lm/wrappers/nplm.hh"
-#endif
-
-#include <stdlib.h>
-
-void Usage(const char *name) {
- std::cerr <<
- "KenLM was compiled with maximum order " << KENLM_MAX_ORDER << ".\n"
- "Usage: " << name << " [-n] [-s] lm_file\n"
- "-n: Do not wrap the input in <s> and </s>.\n"
- "-s: Sentence totals only.\n"
- "-l lazy|populate|read|parallel: Load lazily, with populate, or malloc+read\n"
- "The default loading method is populate on Linux and read on others.\n";
- exit(1);
-}
-
-int main(int argc, char *argv[]) {
- if (argc == 1 || (argc == 2 && !strcmp(argv[1], "--help")))
- Usage(argv[0]);
-
- lm::ngram::Config config;
- bool sentence_context = true;
- bool show_words = true;
-
- int opt;
- while ((opt = getopt(argc, argv, "hnsl:")) != -1) {
- switch (opt) {
- case 'n':
- sentence_context = false;
- break;
- case 's':
- show_words = false;
- break;
- case 'l':
- if (!strcmp(optarg, "lazy")) {
- config.load_method = util::LAZY;
- } else if (!strcmp(optarg, "populate")) {
- config.load_method = util::POPULATE_OR_READ;
- } else if (!strcmp(optarg, "read")) {
- config.load_method = util::READ;
- } else if (!strcmp(optarg, "parallel")) {
- config.load_method = util::PARALLEL_READ;
- } else {
- Usage(argv[0]);
- }
- break;
- case 'h':
- default:
- Usage(argv[0]);
- }
- }
- if (optind + 1 != argc)
- Usage(argv[0]);
- const char *file = argv[optind];
- try {
- using namespace lm::ngram;
- ModelType model_type;
- if (RecognizeBinary(file, model_type)) {
- switch(model_type) {
- case PROBING:
- Query<lm::ngram::ProbingModel>(file, config, sentence_context, show_words);
- break;
- case REST_PROBING:
- Query<lm::ngram::RestProbingModel>(file, config, sentence_context, show_words);
- break;
- case TRIE:
- Query<TrieModel>(file, config, sentence_context, show_words);
- break;
- case QUANT_TRIE:
- Query<QuantTrieModel>(file, config, sentence_context, show_words);
- break;
- case ARRAY_TRIE:
- Query<ArrayTrieModel>(file, config, sentence_context, show_words);
- break;
- case QUANT_ARRAY_TRIE:
- Query<QuantArrayTrieModel>(file, config, sentence_context, show_words);
- break;
- default:
- std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
- abort();
- }
-#ifdef WITH_NPLM
- } else if (lm::np::Model::Recognize(file)) {
- lm::np::Model model(file);
- if (show_words) {
- Query<lm::np::Model, lm::ngram::FullPrint>(model, sentence_context);
- } else {
- Query<lm::np::Model, lm::ngram::BasicPrint>(model, sentence_context);
- }
-#endif
- } else {
- Query<ProbingModel>(file, config, sentence_context, show_words);
- }
- util::PrintUsage(std::cerr);
- } catch (const std::exception &e) {
- std::cerr << e.what() << std::endl;
- return 1;
- }
- return 0;
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/read_arpa.cc b/src/joshua/decoder/ff/lm/kenlm/lm/read_arpa.cc
deleted file mode 100644
index fb8bbfa..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/read_arpa.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-#include "lm/read_arpa.hh"
-
-#include "lm/blank.hh"
-#include "util/file.hh"
-
-#include <cmath>
-#include <cstdlib>
-#include <iostream>
-#include <sstream>
-#include <vector>
-
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-
-#ifdef WIN32
-#include <float.h>
-#endif
-
-namespace lm {
-
-// 1 for '\t', '\n', and ' '. This is stricter than isspace.
-const bool kARPASpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-
-namespace {
-
-bool IsEntirelyWhiteSpace(const StringPiece &line) {
- for (size_t i = 0; i < static_cast<size_t>(line.size()); ++i) {
- if (!isspace(line.data()[i])) return false;
- }
- return true;
-}
-
-const char kBinaryMagic[] = "mmap lm http://kheafield.com/code";
-
-// strtoull isn't portable enough :-(
-uint64_t ReadCount(const std::string &from) {
- std::stringstream stream(from);
- uint64_t ret;
- stream >> ret;
- UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from);
- return ret;
-}
-
-} // namespace
-
-void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
- number.clear();
- StringPiece line = in.ReadLine();
- // In general, ARPA files can have arbitrary text before "\data\"
- // But in KenLM, we require such lines to start with "#", so that
- // we can do stricter error checking
- while (IsEntirelyWhiteSpace(line) || starts_with(line, "#")) {
- line = in.ReadLine();
- }
-
- if (line != "\\data\\") {
- if ((line.size() >= 2) && (line.data()[0] == 0x1f) && (static_cast<unsigned char>(line.data()[1]) == 0x8b)) {
- UTIL_THROW(FormatLoadException, "Looks like a gzip file. If this is an ARPA file, pipe " << in.FileName() << " through zcat. If this already in binary format, you need to decompress it because mmap doesn't work on top of gzip.");
- }
- if (static_cast<size_t>(line.size()) >= strlen(kBinaryMagic) && StringPiece(line.data(), strlen(kBinaryMagic)) == kBinaryMagic)
- UTIL_THROW(FormatLoadException, "This looks like a binary file but got sent to the ARPA parser. Did you compress the binary file or pass a binary file where only ARPA files are accepted?");
- UTIL_THROW_IF(line.size() >= 4 && StringPiece(line.data(), 4) == "blmt", FormatLoadException, "This looks like an IRSTLM binary file. Did you forget to pass --text yes to compile-lm?");
- UTIL_THROW_IF(line == "iARPA", FormatLoadException, "This looks like an IRSTLM iARPA file. You need an ARPA file. Run\n compile-lm --text yes " << in.FileName() << " " << in.FileName() << ".arpa\nfirst.");
- UTIL_THROW(FormatLoadException, "first non-empty line was \"" << line << "\" not \\data\\.");
- }
- while (!IsEntirelyWhiteSpace(line = in.ReadLine())) {
- if (line.size() < 6 || strncmp(line.data(), "ngram ", 6)) UTIL_THROW(FormatLoadException, "count line \"" << line << "\"doesn't begin with \"ngram \"");
- // So strtol doesn't go off the end of line.
- std::string remaining(line.data() + 6, line.size() - 6);
- char *end_ptr;
- unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10);
- if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line);
- if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line);
- ++end_ptr;
- number.push_back(ReadCount(end_ptr));
- }
-}
-
-void ReadNGramHeader(util::FilePiece &in, unsigned int length) {
- StringPiece line;
- while (IsEntirelyWhiteSpace(line = in.ReadLine())) {}
- std::stringstream expected;
- expected << '\\' << length << "-grams:";
- if (line != expected.str()) UTIL_THROW(FormatLoadException, "Was expecting n-gram header " << expected.str() << " but got " << line << " instead");
-}
-
-void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
- switch (in.get()) {
- case '\t':
- {
- float got = in.ReadFloat();
- if (got != 0.0)
- UTIL_THROW(FormatLoadException, "Non-zero backoff " << got << " provided for an n-gram that should have no backoff");
- }
- break;
- case '\n':
- break;
- default:
- UTIL_THROW(FormatLoadException, "Expected tab or newline for backoff");
- }
-}
-
-void ReadBackoff(util::FilePiece &in, float &backoff) {
- // Always make zero negative.
- // Negative zero means that no (n+1)-gram has this n-gram as context.
- // Therefore the hypothesis state can be shorter. Of course, many n-grams
- // are context for (n+1)-grams. An algorithm in the data structure will go
- // back and set the backoff to positive zero in these cases.
- switch (in.get()) {
- case '\t':
- backoff = in.ReadFloat();
- if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff;
- {
-#ifdef WIN32
- int float_class = _fpclass(backoff);
- UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
-#else
- int float_class = std::fpclassify(backoff);
- UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
-#endif
- }
- UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
- break;
- case '\n':
- backoff = ngram::kNoExtensionBackoff;
- break;
- default:
- UTIL_THROW(FormatLoadException, "Expected tab or newline for backoff");
- }
-}
-
-void ReadEnd(util::FilePiece &in) {
- StringPiece line;
- do {
- line = in.ReadLine();
- } while (IsEntirelyWhiteSpace(line));
- if (line != "\\end\\") UTIL_THROW(FormatLoadException, "Expected \\end\\ but the ARPA file has " << line);
-
- try {
- while (true) {
- line = in.ReadLine();
- if (!IsEntirelyWhiteSpace(line)) UTIL_THROW(FormatLoadException, "Trailing line " << line);
- }
- } catch (const util::EndOfFileException &e) {}
-}
-
-void PositiveProbWarn::Warn(float prob) {
- switch (action_) {
- case THROW_UP:
- UTIL_THROW(FormatLoadException, "Positive log probability " << prob << " in the model. This is a bug in IRSTLM; you can set config.positive_log_probability = SILENT or pass -i to build_binary to substitute 0.0 for the log probability. Error");
- case COMPLAIN:
- std::cerr << "There's a positive log probability " << prob << " in the APRA file, probably because of a bug in IRSTLM. This and subsequent entires will be mapped to 0 log probability." << std::endl;
- action_ = SILENT;
- break;
- case SILENT:
- break;
- }
-}
-
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/return.hh b/src/joshua/decoder/ff/lm/kenlm/lm/return.hh
deleted file mode 100644
index 982ffd6..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/return.hh
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef LM_RETURN_H
-#define LM_RETURN_H
-
-#include <stdint.h>
-
-namespace lm {
-/* Structure returned by scoring routines. */
-struct FullScoreReturn {
- // log10 probability
- float prob;
-
- /* The length of n-gram matched. Do not use this for recombination.
- * Consider a model containing only the following n-grams:
- * -1 foo
- * -3.14 bar
- * -2.718 baz -5
- * -6 foo bar
- *
- * If you score ``bar'' then ngram_length is 1 and recombination state is the
- * empty string because bar has zero backoff and does not extend to the
- * right.
- * If you score ``foo'' then ngram_length is 1 and recombination state is
- * ``foo''.
- *
- * Ideally, keep output states around and compare them. Failing that,
- * get out_state.ValidLength() and use that length for recombination.
- */
- unsigned char ngram_length;
-
- /* Left extension information. If independent_left is set, then prob is
- * independent of words to the left (up to additional backoff). Otherwise,
- * extend_left indicates how to efficiently extend further to the left.
- */
- bool independent_left;
- uint64_t extend_left; // Defined only if independent_left
-
- // Rest cost for extension to the left.
- float rest;
-};
-
-} // namespace lm
-#endif // LM_RETURN_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/search_trie.cc b/src/joshua/decoder/ff/lm/kenlm/lm/search_trie.cc
deleted file mode 100644
index 5b0f55f..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/search_trie.cc
+++ /dev/null
@@ -1,600 +0,0 @@
-/* This is where the trie is built. It's on-disk. */
-#include "lm/search_trie.hh"
-
-#include "lm/bhiksha.hh"
-#include "lm/binary_format.hh"
-#include "lm/blank.hh"
-#include "lm/lm_exception.hh"
-#include "lm/max_order.hh"
-#include "lm/quantize.hh"
-#include "lm/trie.hh"
-#include "lm/trie_sort.hh"
-#include "lm/vocab.hh"
-#include "lm/weights.hh"
-#include "lm/word_index.hh"
-#include "util/ersatz_progress.hh"
-#include "util/mmap.hh"
-#include "util/proxy_iterator.hh"
-#include "util/scoped.hh"
-#include "util/sized_iterator.hh"
-
-#include <algorithm>
-#include <cstring>
-#include <cstdio>
-#include <cstdlib>
-#include <queue>
-#include <limits>
-#include <numeric>
-#include <vector>
-
-#if defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-#endif
-
-namespace lm {
-namespace ngram {
-namespace trie {
-namespace {
-
-void ReadOrThrow(FILE *from, void *data, size_t size) {
- UTIL_THROW_IF(1 != std::fread(data, size, 1, from), util::ErrnoException, "Short read");
-}
-
-int Compare(unsigned char order, const void *first_void, const void *second_void) {
- const WordIndex *first = reinterpret_cast<const WordIndex*>(first_void), *second = reinterpret_cast<const WordIndex*>(second_void);
- const WordIndex *end = first + order;
- for (; first != end; ++first, ++second) {
- if (*first < *second) return -1;
- if (*first > *second) return 1;
- }
- return 0;
-}
-
-struct ProbPointer {
- unsigned char array;
- uint64_t index;
-};
-
-// Array of n-grams and float indices.
-class BackoffMessages {
- public:
- void Init(std::size_t entry_size) {
- current_ = NULL;
- allocated_ = NULL;
- entry_size_ = entry_size;
- }
-
- void Add(const WordIndex *to, ProbPointer index) {
- while (current_ + entry_size_ > allocated_) {
- std::size_t allocated_size = allocated_ - (uint8_t*)backing_.get();
- Resize(std::max<std::size_t>(allocated_size * 2, entry_size_));
- }
- memcpy(current_, to, entry_size_ - sizeof(ProbPointer));
- *reinterpret_cast<ProbPointer*>(current_ + entry_size_ - sizeof(ProbPointer)) = index;
- current_ += entry_size_;
- }
-
- void Apply(float *const *const base, FILE *unigrams) {
- FinishedAdding();
- if (current_ == allocated_) return;
- rewind(unigrams);
- ProbBackoff weights;
- WordIndex unigram = 0;
- ReadOrThrow(unigrams, &weights, sizeof(weights));
- for (; current_ != allocated_; current_ += entry_size_) {
- const WordIndex &cur_word = *reinterpret_cast<const WordIndex*>(current_);
- for (; unigram < cur_word; ++unigram) {
- ReadOrThrow(unigrams, &weights, sizeof(weights));
- }
- if (!HasExtension(weights.backoff)) {
- weights.backoff = kExtensionBackoff;
- UTIL_THROW_IF(fseek(unigrams, -sizeof(weights), SEEK_CUR), util::ErrnoException, "Seeking backwards to denote unigram extension failed.");
- util::WriteOrThrow(unigrams, &weights, sizeof(weights));
- }
- const ProbPointer &write_to = *reinterpret_cast<const ProbPointer*>(current_ + sizeof(WordIndex));
- base[write_to.array][write_to.index] += weights.backoff;
- }
- backing_.reset();
- }
-
- void Apply(float *const *const base, RecordReader &reader) {
- FinishedAdding();
- if (current_ == allocated_) return;
- // We'll also use the same buffer to record messages to blanks that they extend.
- WordIndex *extend_out = reinterpret_cast<WordIndex*>(current_);
- const unsigned char order = (entry_size_ - sizeof(ProbPointer)) / sizeof(WordIndex);
- for (reader.Rewind(); reader && (current_ != allocated_); ) {
- switch (Compare(order, reader.Data(), current_)) {
- case -1:
- ++reader;
- break;
- case 1:
- // Message but nobody to receive it. Write it down at the beginning of the buffer so we can inform this blank that it extends.
- for (const WordIndex *w = reinterpret_cast<const WordIndex *>(current_); w != reinterpret_cast<const WordIndex *>(current_) + order; ++w, ++extend_out) *extend_out = *w;
- current_ += entry_size_;
- break;
- case 0:
- float &backoff = reinterpret_cast<ProbBackoff*>((uint8_t*)reader.Data() + order * sizeof(WordIndex))->backoff;
- if (!HasExtension(backoff)) {
- backoff = kExtensionBackoff;
- reader.Overwrite(&backoff, sizeof(float));
- } else {
- const ProbPointer &write_to = *reinterpret_cast<const ProbPointer*>(current_ + entry_size_ - sizeof(ProbPointer));
- base[write_to.array][write_to.index] += backoff;
- }
- current_ += entry_size_;
- break;
- }
- }
- // Now this is a list of blanks that extend right.
- entry_size_ = sizeof(WordIndex) * order;
- Resize(sizeof(WordIndex) * (extend_out - (const WordIndex*)backing_.get()));
- current_ = (uint8_t*)backing_.get();
- }
-
- // Call after Apply
- bool Extends(unsigned char order, const WordIndex *words) {
- if (current_ == allocated_) return false;
- assert(order * sizeof(WordIndex) == entry_size_);
- while (true) {
- switch(Compare(order, words, current_)) {
- case 1:
- current_ += entry_size_;
- if (current_ == allocated_) return false;
- break;
- case -1:
- return false;
- case 0:
- return true;
- }
- }
- }
-
- private:
- void FinishedAdding() {
- Resize(current_ - (uint8_t*)backing_.get());
- // Sort requests in same order as files.
- std::sort(
- util::SizedIterator(util::SizedProxy(backing_.get(), entry_size_)),
- util::SizedIterator(util::SizedProxy(current_, entry_size_)),
- util::SizedCompare<EntryCompare>(EntryCompare((entry_size_ - sizeof(ProbPointer)) / sizeof(WordIndex))));
- current_ = (uint8_t*)backing_.get();
- }
-
- void Resize(std::size_t to) {
- std::size_t current = current_ - (uint8_t*)backing_.get();
- backing_.call_realloc(to);
- current_ = (uint8_t*)backing_.get() + current;
- allocated_ = (uint8_t*)backing_.get() + to;
- }
-
- util::scoped_malloc backing_;
-
- uint8_t *current_, *allocated_;
-
- std::size_t entry_size_;
-};
-
-const float kBadProb = std::numeric_limits<float>::infinity();
-
-class SRISucks {
- public:
- SRISucks() {
- for (BackoffMessages *i = messages_; i != messages_ + KENLM_MAX_ORDER - 1; ++i)
- i->Init(sizeof(ProbPointer) + sizeof(WordIndex) * (i - messages_ + 1));
- }
-
- void Send(unsigned char begin, unsigned char order, const WordIndex *to, float prob_basis) {
- assert(prob_basis != kBadProb);
- ProbPointer pointer;
- pointer.array = order - 1;
- pointer.index = values_[order - 1].size();
- for (unsigned char i = begin; i < order; ++i) {
- messages_[i - 1].Add(to, pointer);
- }
- values_[order - 1].push_back(prob_basis);
- }
-
- void ObtainBackoffs(unsigned char total_order, FILE *unigram_file, RecordReader *reader) {
- for (unsigned char i = 0; i < KENLM_MAX_ORDER - 1; ++i) {
- it_[i] = values_[i].empty() ? NULL : &*values_[i].begin();
- }
- messages_[0].Apply(it_, unigram_file);
- BackoffMessages *messages = messages_ + 1;
- const RecordReader *end = reader + total_order - 2 /* exclude unigrams and longest order */;
- for (; reader != end; ++messages, ++reader) {
- messages->Apply(it_, *reader);
- }
- }
-
- ProbBackoff GetBlank(unsigned char total_order, unsigned char order, const WordIndex *indices) {
- assert(order > 1);
- ProbBackoff ret;
- ret.prob = *(it_[order - 1]++);
- ret.backoff = ((order != total_order - 1) && messages_[order - 1].Extends(order, indices)) ? kExtensionBackoff : kNoExtensionBackoff;
- return ret;
- }
-
- const std::vector<float> &Values(unsigned char order) const {
- return values_[order - 1];
- }
-
- private:
- // This used to be one array. Then I needed to separate it by order for quantization to work.
- std::vector<float> values_[KENLM_MAX_ORDER - 1];
- BackoffMessages messages_[KENLM_MAX_ORDER - 1];
-
- float *it_[KENLM_MAX_ORDER - 1];
-};
-
-class FindBlanks {
- public:
- FindBlanks(unsigned char order, const ProbBackoff *unigrams, SRISucks &messages)
- : counts_(order), unigrams_(unigrams), sri_(messages) {}
-
- float UnigramProb(WordIndex index) const {
- return unigrams_[index].prob;
- }
-
- void Unigram(WordIndex /*index*/) {
- ++counts_[0];
- }
-
- void MiddleBlank(const unsigned char order, const WordIndex *indices, unsigned char lower, float prob_basis) {
- sri_.Send(lower, order, indices + 1, prob_basis);
- ++counts_[order - 1];
- }
-
- void Middle(const unsigned char order, const void * /*data*/) {
- ++counts_[order - 1];
- }
-
- void Longest(const void * /*data*/) {
- ++counts_.back();
- }
-
- const std::vector<uint64_t> &Counts() const {
- return counts_;
- }
-
- private:
- std::vector<uint64_t> counts_;
-
- const ProbBackoff *unigrams_;
-
- SRISucks &sri_;
-};
-
-// Phase to actually write n-grams to the trie.
-template <class Quant, class Bhiksha> class WriteEntries {
- public:
- WriteEntries(RecordReader *contexts, const Quant &quant, UnigramValue *unigrams, BitPackedMiddle<Bhiksha> *middle, BitPackedLongest &longest, unsigned char order, SRISucks &sri) :
- contexts_(contexts),
- quant_(quant),
- unigrams_(unigrams),
- middle_(middle),
- longest_(longest),
- bigram_pack_((order == 2) ? static_cast<BitPacked&>(longest_) : static_cast<BitPacked&>(*middle_)),
- order_(order),
- sri_(sri) {}
-
- float UnigramProb(WordIndex index) const { return unigrams_[index].weights.prob; }
-
- void Unigram(WordIndex word) {
- unigrams_[word].next = bigram_pack_.InsertIndex();
- }
-
- void MiddleBlank(const unsigned char order, const WordIndex *indices, unsigned char /*lower*/, float /*prob_base*/) {
- ProbBackoff weights = sri_.GetBlank(order_, order, indices);
- typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(indices[order - 1])).Write(weights.prob, weights.backoff);
- }
-
- void Middle(const unsigned char order, const void *data) {
- RecordReader &context = contexts_[order - 1];
- const WordIndex *words = reinterpret_cast<const WordIndex*>(data);
- ProbBackoff weights = *reinterpret_cast<const ProbBackoff*>(words + order);
- if (context && !memcmp(data, context.Data(), sizeof(WordIndex) * order)) {
- SetExtension(weights.backoff);
- ++context;
- }
- typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(words[order - 1])).Write(weights.prob, weights.backoff);
- }
-
- void Longest(const void *data) {
- const WordIndex *words = reinterpret_cast<const WordIndex*>(data);
- typename Quant::LongestPointer(quant_, longest_.Insert(words[order_ - 1])).Write(reinterpret_cast<const Prob*>(words + order_)->prob);
- }
-
- private:
- RecordReader *contexts_;
- const Quant &quant_;
- UnigramValue *const unigrams_;
- BitPackedMiddle<Bhiksha> *const middle_;
- BitPackedLongest &longest_;
- BitPacked &bigram_pack_;
- const unsigned char order_;
- SRISucks &sri_;
-};
-
-struct Gram {
- Gram(const WordIndex *in_begin, unsigned char order) : begin(in_begin), end(in_begin + order) {}
-
- const WordIndex *begin, *end;
-
- // For queue, this is the direction we want.
- bool operator<(const Gram &other) const {
- return std::lexicographical_compare(other.begin, other.end, begin, end);
- }
-};
-
-template <class Doing> class BlankManager {
- public:
- BlankManager(unsigned char total_order, Doing &doing) : total_order_(total_order), been_length_(0), doing_(doing) {
- for (float *i = basis_; i != basis_ + KENLM_MAX_ORDER - 1; ++i) *i = kBadProb;
- }
-
- void Visit(const WordIndex *to, unsigned char length, float prob) {
- basis_[length - 1] = prob;
- unsigned char overlap = std::min<unsigned char>(length - 1, been_length_);
- const WordIndex *cur;
- WordIndex *pre;
- for (cur = to, pre = been_; cur != to + overlap; ++cur, ++pre) {
- if (*pre != *cur) break;
- }
- if (cur == to + length - 1) {
- *pre = *cur;
- been_length_ = length;
- return;
- }
- // There are blanks to insert starting with order blank.
- unsigned char blank = cur - to + 1;
- UTIL_THROW_IF(blank == 1, FormatLoadException, "Missing a unigram that appears as context.");
- const float *lower_basis;
- for (lower_basis = basis_ + blank - 2; *lower_basis == kBadProb; --lower_basis) {}
- unsigned char based_on = lower_basis - basis_ + 1;
- for (; cur != to + length - 1; ++blank, ++cur, ++pre) {
- assert(*lower_basis != kBadProb);
- doing_.MiddleBlank(blank, to, based_on, *lower_basis);
- *pre = *cur;
- // Mark that the probability is a blank so it shouldn't be used as the basis for a later n-gram.
- basis_[blank - 1] = kBadProb;
- }
- *pre = *cur;
- been_length_ = length;
- }
-
- private:
- const unsigned char total_order_;
-
- WordIndex been_[KENLM_MAX_ORDER];
- unsigned char been_length_;
-
- float basis_[KENLM_MAX_ORDER];
-
- Doing &doing_;
-};
-
-template <class Doing> void RecursiveInsert(const unsigned char total_order, const WordIndex unigram_count, RecordReader *input, std::ostream *progress_out, const char *message, Doing &doing) {
- util::ErsatzProgress progress(unigram_count + 1, progress_out, message);
- WordIndex unigram = 0;
- std::priority_queue<Gram> grams;
- if (unigram_count) grams.push(Gram(&unigram, 1));
- for (unsigned char i = 2; i <= total_order; ++i) {
- if (input[i-2]) grams.push(Gram(reinterpret_cast<const WordIndex*>(input[i-2].Data()), i));
- }
-
- BlankManager<Doing> blank(total_order, doing);
-
- while (!grams.empty()) {
- Gram top = grams.top();
- grams.pop();
- unsigned char order = top.end - top.begin;
- if (order == 1) {
- blank.Visit(&unigram, 1, doing.UnigramProb(unigram));
- doing.Unigram(unigram);
- progress.Set(unigram);
- if (++unigram < unigram_count) grams.push(top);
- } else {
- if (order == total_order) {
- blank.Visit(top.begin, order, reinterpret_cast<const Prob*>(top.end)->prob);
- doing.Longest(top.begin);
- } else {
- blank.Visit(top.begin, order, reinterpret_cast<const ProbBackoff*>(top.end)->prob);
- doing.Middle(order, top.begin);
- }
- RecordReader &reader = input[order - 2];
- if (++reader) grams.push(top);
- }
- }
-}
-
-void SanityCheckCounts(const std::vector<uint64_t> &initial, const std::vector<uint64_t> &fixed) {
- if (fixed[0] != initial[0]) UTIL_THROW(util::Exception, "Unigram count should be constant but initial is " << initial[0] << " and recounted is " << fixed[0]);
- if (fixed.back() != initial.back()) UTIL_THROW(util::Exception, "Longest count should be constant but it changed from " << initial.back() << " to " << fixed.back());
- for (unsigned char i = 0; i < initial.size(); ++i) {
- if (fixed[i] < initial[i]) UTIL_THROW(util::Exception, "Counts came out lower than expected. This shouldn't happen");
- }
-}
-
-template <class Quant> void TrainQuantizer(uint8_t order, uint64_t count, const std::vector<float> &additional, RecordReader &reader, util::ErsatzProgress &progress, Quant &quant) {
- std::vector<float> probs(additional), backoffs;
- probs.reserve(count + additional.size());
- backoffs.reserve(count);
- for (reader.Rewind(); reader; ++reader) {
- const ProbBackoff &weights = *reinterpret_cast<const ProbBackoff*>(reinterpret_cast<const uint8_t*>(reader.Data()) + sizeof(WordIndex) * order);
- probs.push_back(weights.prob);
- if (weights.backoff != 0.0) backoffs.push_back(weights.backoff);
- ++progress;
- }
- quant.Train(order, probs, backoffs);
-}
-
-template <class Quant> void TrainProbQuantizer(uint8_t order, uint64_t count, RecordReader &reader, util::ErsatzProgress &progress, Quant &quant) {
- std::vector<float> probs, backoffs;
- probs.reserve(count);
- for (reader.Rewind(); reader; ++reader) {
- const Prob &weights = *reinterpret_cast<const Prob*>(reinterpret_cast<const uint8_t*>(reader.Data()) + sizeof(WordIndex) * order);
- probs.push_back(weights.prob);
- ++progress;
- }
- quant.TrainProb(order, probs);
-}
-
-void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, RecordReader &contexts, UnigramValue *unigrams) {
- // Fill unigram probabilities.
- try {
- rewind(file);
- for (WordIndex i = 0; i < unigram_count; ++i) {
- ReadOrThrow(file, &unigrams[i].weights, sizeof(ProbBackoff));
- if (contexts && *reinterpret_cast<const WordIndex*>(contexts.Data()) == i) {
- SetExtension(unigrams[i].weights.backoff);
- ++contexts;
- }
- }
- } catch (util::Exception &e) {
- e << " while re-reading unigram probabilities";
- throw;
- }
-}
-
-} // namespace
-
-template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing) {
- RecordReader inputs[KENLM_MAX_ORDER - 1];
- RecordReader contexts[KENLM_MAX_ORDER - 1];
-
- for (unsigned char i = 2; i <= counts.size(); ++i) {
- inputs[i-2].Init(files.Full(i), i * sizeof(WordIndex) + (i == counts.size() ? sizeof(Prob) : sizeof(ProbBackoff)));
- contexts[i-2].Init(files.Context(i), (i-1) * sizeof(WordIndex));
- }
-
- SRISucks sri;
- std::vector<uint64_t> fixed_counts;
- util::scoped_FILE unigram_file;
- util::scoped_fd unigram_fd(files.StealUnigram());
- {
- util::scoped_memory unigrams;
- MapRead(util::POPULATE_OR_READ, unigram_fd.get(), 0, counts[0] * sizeof(ProbBackoff), unigrams);
- FindBlanks finder(counts.size(), reinterpret_cast<const ProbBackoff*>(unigrams.get()), sri);
- RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Identifying n-grams omitted by SRI", finder);
- fixed_counts = finder.Counts();
- }
- unigram_file.reset(util::FDOpenOrThrow(unigram_fd));
- for (const RecordReader *i = inputs; i != inputs + counts.size() - 2; ++i) {
- if (*i) UTIL_THROW(FormatLoadException, "There's a bug in the trie implementation: the " << (i - inputs + 2) << "-gram table did not complete reading");
- }
- SanityCheckCounts(counts, fixed_counts);
- counts = fixed_counts;
-
- sri.ObtainBackoffs(counts.size(), unigram_file.get(), inputs);
-
- void *vocab_relocate;
- void *search_base = backing.GrowForSearch(TrieSearch<Quant, Bhiksha>::Size(fixed_counts, config), vocab.UnkCountChangePadding(), vocab_relocate);
- vocab.Relocate(vocab_relocate);
- out.SetupMemory(reinterpret_cast<uint8_t*>(search_base), fixed_counts, config);
-
- for (unsigned char i = 2; i <= counts.size(); ++i) {
- inputs[i-2].Rewind();
- }
- if (Quant::kTrain) {
- util::ErsatzProgress progress(std::accumulate(counts.begin() + 1, counts.end(), 0),
- config.ProgressMessages(), "Quantizing");
- for (unsigned char i = 2; i < counts.size(); ++i) {
- TrainQuantizer(i, counts[i-1], sri.Values(i), inputs[i-2], progress, quant);
- }
- TrainProbQuantizer(counts.size(), counts.back(), inputs[counts.size() - 2], progress, quant);
- quant.FinishedLoading(config);
- }
-
- UnigramValue *unigrams = out.unigram_.Raw();
- PopulateUnigramWeights(unigram_file.get(), counts[0], contexts[0], unigrams);
- unigram_file.reset();
-
- for (unsigned char i = 2; i <= counts.size(); ++i) {
- inputs[i-2].Rewind();
- }
- // Fill entries except unigram probabilities.
- {
- WriteEntries<Quant, Bhiksha> writer(contexts, quant, unigrams, out.middle_begin_, out.longest_, counts.size(), sri);
- RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Writing trie", writer);
- // Write the last unigram entry, which is the end pointer for the bigrams.
- writer.Unigram(counts[0]);
- }
-
- // Do not disable this error message or else too little state will be returned. Both WriteEntries::Middle and returning state based on found n-grams will need to be fixed to handle this situation.
- for (unsigned char order = 2; order <= counts.size(); ++order) {
- const RecordReader &context = contexts[order - 2];
- if (context) {
- FormatLoadException e;
- e << "A " << static_cast<unsigned int>(order) << "-gram has context";
- const WordIndex *ctx = reinterpret_cast<const WordIndex*>(context.Data());
- for (const WordIndex *i = ctx; i != ctx + order - 1; ++i) {
- e << ' ' << *i;
- }
- e << " so this context must appear in the model as a " << static_cast<unsigned int>(order - 1) << "-gram but it does not";
- throw e;
- }
- }
-
- /* Set ending offsets so the last entry will be sized properly */
- // Last entry for unigrams was already set.
- if (out.middle_begin_ != out.middle_end_) {
- for (typename TrieSearch<Quant, Bhiksha>::Middle *i = out.middle_begin_; i != out.middle_end_ - 1; ++i) {
- i->FinishedLoading((i+1)->InsertIndex(), config);
- }
- (out.middle_end_ - 1)->FinishedLoading(out.longest_.InsertIndex(), config);
- }
-}
-
-template <class Quant, class Bhiksha> uint8_t *TrieSearch<Quant, Bhiksha>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
- quant_.SetupMemory(start, counts.size(), config);
- start += Quant::Size(counts.size(), config);
- unigram_.Init(start);
- start += Unigram::Size(counts[0]);
- FreeMiddles();
- middle_begin_ = static_cast<Middle*>(malloc(sizeof(Middle) * (counts.size() - 2)));
- middle_end_ = middle_begin_ + (counts.size() - 2);
- std::vector<uint8_t*> middle_starts(counts.size() - 2);
- for (unsigned char i = 2; i < counts.size(); ++i) {
- middle_starts[i-2] = start;
- start += Middle::Size(Quant::MiddleBits(config), counts[i-1], counts[0], counts[i], config);
- }
- // Crazy backwards thing so we initialize using pointers to ones that have already been initialized
- for (unsigned char i = counts.size() - 1; i >= 2; --i) {
- // use "placement new" syntax to initalize Middle in an already-allocated memory location
- new (middle_begin_ + i - 2) Middle(
- middle_starts[i-2],
- quant_.MiddleBits(config),
- counts[i-1],
- counts[0],
- counts[i],
- (i == counts.size() - 1) ? static_cast<const BitPacked&>(longest_) : static_cast<const BitPacked &>(middle_begin_[i-1]),
- config);
- }
- longest_.Init(start, quant_.LongestBits(config), counts[0]);
- return start + Longest::Size(Quant::LongestBits(config), counts.back(), counts[0]);
-}
-
-template <class Quant, class Bhiksha> void TrieSearch<Quant, Bhiksha>::InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, BinaryFormat &backing) {
- std::string temporary_prefix;
- if (!config.temporary_directory_prefix.empty()) {
- temporary_prefix = config.temporary_directory_prefix;
- } else if (config.write_mmap) {
- temporary_prefix = config.write_mmap;
- } else {
- temporary_prefix = file;
- }
- // At least 1MB sorting memory.
- SortedFiles sorted(config, f, counts, std::max<size_t>(config.building_memory, 1048576), temporary_prefix, vocab);
-
- BuildTrie(sorted, counts, config, *this, quant_, vocab, backing);
-}
-
-template class TrieSearch<DontQuantize, DontBhiksha>;
-template class TrieSearch<DontQuantize, ArrayBhiksha>;
-template class TrieSearch<SeparatelyQuantize, DontBhiksha>;
-template class TrieSearch<SeparatelyQuantize, ArrayBhiksha>;
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/search_trie.hh b/src/joshua/decoder/ff/lm/kenlm/lm/search_trie.hh
deleted file mode 100644
index d8838d2..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/search_trie.hh
+++ /dev/null
@@ -1,130 +0,0 @@
-#ifndef LM_SEARCH_TRIE_H
-#define LM_SEARCH_TRIE_H
-
-#include "lm/config.hh"
-#include "lm/model_type.hh"
-#include "lm/return.hh"
-#include "lm/trie.hh"
-#include "lm/weights.hh"
-
-#include "util/file.hh"
-#include "util/file_piece.hh"
-
-#include <vector>
-#include <cstdlib>
-
-#include <assert.h>
-
-namespace lm {
-namespace ngram {
-class BinaryFormat;
-class SortedVocabulary;
-namespace trie {
-
-template <class Quant, class Bhiksha> class TrieSearch;
-class SortedFiles;
-template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing);
-
-template <class Quant, class Bhiksha> class TrieSearch {
- public:
- typedef NodeRange Node;
-
- typedef ::lm::ngram::trie::UnigramPointer UnigramPointer;
- typedef typename Quant::MiddlePointer MiddlePointer;
- typedef typename Quant::LongestPointer LongestPointer;
-
- static const bool kDifferentRest = false;
-
- static const ModelType kModelType = static_cast<ModelType>(TRIE_SORTED + Quant::kModelTypeAdd + Bhiksha::kModelTypeAdd);
-
- static const unsigned int kVersion = 1;
-
- static void UpdateConfigFromBinary(const BinaryFormat &file, const std::vector<uint64_t> &counts, uint64_t offset, Config &config) {
- Quant::UpdateConfigFromBinary(file, offset, config);
- // Currently the unigram pointers are not compresssed, so there will only be a header for order > 2.
- if (counts.size() > 2)
- Bhiksha::UpdateConfigFromBinary(file, offset + Quant::Size(counts.size(), config) + Unigram::Size(counts[0]), config);
- }
-
- static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
- uint64_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]);
- for (unsigned char i = 1; i < counts.size() - 1; ++i) {
- ret += Middle::Size(Quant::MiddleBits(config), counts[i], counts[0], counts[i+1], config);
- }
- return ret + Longest::Size(Quant::LongestBits(config), counts.back(), counts[0]);
- }
-
- TrieSearch() : middle_begin_(NULL), middle_end_(NULL) {}
-
- ~TrieSearch() { FreeMiddles(); }
-
- uint8_t *SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config);
-
- void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, BinaryFormat &backing);
-
- unsigned char Order() const {
- return middle_end_ - middle_begin_ + 2;
- }
-
- ProbBackoff &UnknownUnigram() { return unigram_.Unknown(); }
-
- UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &independent_left, uint64_t &extend_left) const {
- extend_left = static_cast<uint64_t>(word);
- UnigramPointer ret(unigram_.Find(word, next));
- independent_left = (next.begin == next.end);
- return ret;
- }
-
- MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
- return MiddlePointer(quant_, extend_length - 2, middle_begin_[extend_length - 2].ReadEntry(extend_pointer, node));
- }
-
- MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_left) const {
- util::BitAddress address(middle_begin_[order_minus_2].Find(word, node, extend_left));
- independent_left = (address.base == NULL) || (node.begin == node.end);
- return MiddlePointer(quant_, order_minus_2, address);
- }
-
- LongestPointer LookupLongest(WordIndex word, const Node &node) const {
- return LongestPointer(quant_, longest_.Find(word, node));
- }
-
- bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
- assert(begin != end);
- bool independent_left;
- uint64_t ignored;
- LookupUnigram(*begin, node, independent_left, ignored);
- for (const WordIndex *i = begin + 1; i < end; ++i) {
- if (independent_left || !LookupMiddle(i - begin - 1, *i, node, independent_left, ignored).Found()) return false;
- }
- return true;
- }
-
- private:
- friend void BuildTrie<Quant, Bhiksha>(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing);
-
- // Middles are managed manually so we can delay construction and they don't have to be copyable.
- void FreeMiddles() {
- for (const Middle *i = middle_begin_; i != middle_end_; ++i) {
- i->~Middle();
- }
- std::free(middle_begin_);
- }
-
- typedef trie::BitPackedMiddle<Bhiksha> Middle;
-
- typedef trie::BitPackedLongest Longest;
- Longest longest_;
-
- Middle *middle_begin_, *middle_end_;
- Quant quant_;
-
- typedef ::lm::ngram::trie::Unigram Unigram;
- Unigram unigram_;
-};
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_SEARCH_TRIE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/sizes.cc b/src/joshua/decoder/ff/lm/kenlm/lm/sizes.cc
deleted file mode 100644
index 55ad586..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/sizes.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "lm/sizes.hh"
-#include "lm/model.hh"
-#include "util/file_piece.hh"
-
-#include <vector>
-#include <iomanip>
-
-namespace lm {
-namespace ngram {
-
-void ShowSizes(const std::vector<uint64_t> &counts, const lm::ngram::Config &config) {
- uint64_t sizes[6];
- sizes[0] = ProbingModel::Size(counts, config);
- sizes[1] = RestProbingModel::Size(counts, config);
- sizes[2] = TrieModel::Size(counts, config);
- sizes[3] = QuantTrieModel::Size(counts, config);
- sizes[4] = ArrayTrieModel::Size(counts, config);
- sizes[5] = QuantArrayTrieModel::Size(counts, config);
- uint64_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
- uint64_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
- uint64_t divide;
- char prefix;
- if (min_length < (1 << 10) * 10) {
- prefix = ' ';
- divide = 1;
- } else if (min_length < (1 << 20) * 10) {
- prefix = 'k';
- divide = 1 << 10;
- } else if (min_length < (1ULL << 30) * 10) {
- prefix = 'M';
- divide = 1 << 20;
- } else {
- prefix = 'G';
- divide = 1 << 30;
- }
- long int length = std::max<long int>(2, static_cast<long int>(ceil(log10((double) max_length / divide))));
- std::cerr << "Memory estimate for binary LM:\ntype ";
-
- // right align bytes.
- for (long int i = 0; i < length - 2; ++i) std::cerr << ' ';
-
- std::cerr << prefix << "B\n"
- "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n"
- "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r models -p " << config.probing_multiplier << "\n"
- "trie " << std::setw(length) << (sizes[2] / divide) << " without quantization\n"
- "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n"
- "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n"
- "trie " << std::setw(length) << (sizes[5] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n";
-}
-
-void ShowSizes(const std::vector<uint64_t> &counts) {
- lm::ngram::Config config;
- ShowSizes(counts, config);
-}
-
-void ShowSizes(const char *file, const lm::ngram::Config &config) {
- std::vector<uint64_t> counts;
- util::FilePiece f(file);
- lm::ReadARPACounts(f, counts);
- ShowSizes(counts, config);
-}
-
-}} //namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/state.hh b/src/joshua/decoder/ff/lm/kenlm/lm/state.hh
deleted file mode 100644
index f6c51d6..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/state.hh
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef LM_STATE_H
-#define LM_STATE_H
-
-#include "lm/max_order.hh"
-#include "lm/word_index.hh"
-#include "util/murmur_hash.hh"
-
-#include <string.h>
-
-namespace lm {
-namespace ngram {
-
-// This is a POD but if you want memcmp to return the same as operator==, call
-// ZeroRemaining first.
-class State {
- public:
- bool operator==(const State &other) const {
- if (length != other.length) return false;
- return !memcmp(words, other.words, length * sizeof(WordIndex));
- }
-
- // Three way comparison function.
- int Compare(const State &other) const {
- if (length != other.length) return length < other.length ? -1 : 1;
- return memcmp(words, other.words, length * sizeof(WordIndex));
- }
-
- bool operator<(const State &other) const {
- if (length != other.length) return length < other.length;
- return memcmp(words, other.words, length * sizeof(WordIndex)) < 0;
- }
-
- // Call this before using raw memcmp.
- void ZeroRemaining() {
- for (unsigned char i = length; i < KENLM_MAX_ORDER - 1; ++i) {
- words[i] = 0;
- backoff[i] = 0.0;
- }
- }
-
- unsigned char Length() const { return length; }
-
- // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
- // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
- WordIndex words[KENLM_MAX_ORDER - 1];
- float backoff[KENLM_MAX_ORDER - 1];
- unsigned char length;
-};
-
-typedef State Right;
-
-inline uint64_t hash_value(const State &state, uint64_t seed = 0) {
- return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length, seed);
-}
-
-struct Left {
- bool operator==(const Left &other) const {
- return
- length == other.length &&
- (!length || (pointers[length - 1] == other.pointers[length - 1] && full == other.full));
- }
-
- int Compare(const Left &other) const {
- if (length < other.length) return -1;
- if (length > other.length) return 1;
- if (length == 0) return 0; // Must be full.
- if (pointers[length - 1] > other.pointers[length - 1]) return 1;
- if (pointers[length - 1] < other.pointers[length - 1]) return -1;
- return (int)full - (int)other.full;
- }
-
- bool operator<(const Left &other) const {
- return Compare(other) == -1;
- }
-
- void ZeroRemaining() {
- for (uint64_t * i = pointers + length; i < pointers + KENLM_MAX_ORDER - 1; ++i)
- *i = 0;
- }
-
- uint64_t pointers[KENLM_MAX_ORDER - 1];
- unsigned char length;
- bool full;
-};
-
-inline uint64_t hash_value(const Left &left) {
- unsigned char add[2];
- add[0] = left.length;
- add[1] = left.full;
- return util::MurmurHashNative(add, 2, left.length ? left.pointers[left.length - 1] : 0);
-}
-
-struct ChartState {
- bool operator==(const ChartState &other) const {
- return (right == other.right) && (left == other.left);
- }
-
- int Compare(const ChartState &other) const {
- int lres = left.Compare(other.left);
- if (lres) return lres;
- return right.Compare(other.right);
- }
-
- bool operator<(const ChartState &other) const {
- return Compare(other) < 0;
- }
-
- void ZeroRemaining() {
- left.ZeroRemaining();
- right.ZeroRemaining();
- }
-
- Left left;
- State right;
-};
-
-inline uint64_t hash_value(const ChartState &state) {
- return hash_value(state.right, hash_value(state.left));
-}
-
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_STATE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/trie.cc b/src/joshua/decoder/ff/lm/kenlm/lm/trie.cc
deleted file mode 100644
index 5f8e7ce..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/trie.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-#include "lm/trie.hh"
-
-#include "lm/bhiksha.hh"
-#include "util/bit_packing.hh"
-#include "util/exception.hh"
-#include "util/sorted_uniform.hh"
-
-#include <assert.h>
-
-namespace lm {
-namespace ngram {
-namespace trie {
-namespace {
-
-class KeyAccessor {
- public:
- KeyAccessor(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_t total_bits)
- : base_(reinterpret_cast<const uint8_t*>(base)), key_mask_(key_mask), key_bits_(key_bits), total_bits_(total_bits) {}
-
- typedef uint64_t Key;
-
- Key operator()(uint64_t index) const {
- return util::ReadInt57(base_, index * static_cast<uint64_t>(total_bits_), key_bits_, key_mask_);
- }
-
- private:
- const uint8_t *const base_;
- const WordIndex key_mask_;
- const uint8_t key_bits_, total_bits_;
-};
-
-bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_t total_bits, uint64_t begin_index, uint64_t end_index, const uint64_t max_vocab, const uint64_t key, uint64_t &at_index) {
- KeyAccessor accessor(base, key_mask, key_bits, total_bits);
- if (!util::BoundedSortedUniformFind<uint64_t, KeyAccessor, util::PivotSelect<sizeof(WordIndex)>::T>(accessor, begin_index - 1, (uint64_t)0, end_index, max_vocab, key, at_index)) return false;
- return true;
-}
-} // namespace
-
-uint64_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) {
- uint8_t total_bits = util::RequiredBits(max_vocab) + remaining_bits;
- // Extra entry for next pointer at the end.
- // +7 then / 8 to round up bits and convert to bytes
- // +sizeof(uint64_t) so that ReadInt57 etc don't go segfault.
- // Note that this waste is O(order), not O(number of ngrams).
- return ((1 + entries) * total_bits + 7) / 8 + sizeof(uint64_t);
-}
-
-void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits) {
- util::BitPackingSanity();
- word_bits_ = util::RequiredBits(max_vocab);
- word_mask_ = (1ULL << word_bits_) - 1ULL;
- if (word_bits_ > 57) UTIL_THROW(util::Exception, "Sorry, word indices more than " << (1ULL << 57) << " are not implemented. Edit util/bit_packing.hh and fix the bit packing functions.");
- total_bits_ = word_bits_ + remaining_bits;
-
- base_ = static_cast<uint8_t*>(base);
- insert_index_ = 0;
- max_vocab_ = max_vocab;
-}
-
-template <class Bhiksha> uint64_t BitPackedMiddle<Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) {
- return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config));
-}
-
-template <class Bhiksha> BitPackedMiddle<Bhiksha>::BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config) :
- BitPacked(),
- quant_bits_(quant_bits),
- // If the offset of the method changes, also change TrieSearch::UpdateConfigFromBinary.
- bhiksha_(base, entries + 1, max_next, config),
- next_source_(&next_source) {
- if (entries + 1 >= (1ULL << 57) || (max_next >= (1ULL << 57))) UTIL_THROW(util::Exception, "Sorry, this does not support more than " << (1ULL << 57) << " n-grams of a particular order. Edit util/bit_packing.hh and fix the bit packing functions.");
- BaseInit(reinterpret_cast<uint8_t*>(base) + Bhiksha::Size(entries + 1, max_next, config), max_vocab, quant_bits_ + bhiksha_.InlineBits());
-}
-
-template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Insert(WordIndex word) {
- assert(word <= word_mask_);
- uint64_t at_pointer = insert_index_ * total_bits_;
-
- util::WriteInt57(base_, at_pointer, word_bits_, word);
- at_pointer += word_bits_;
- util::BitAddress ret(base_, at_pointer);
- at_pointer += quant_bits_;
- uint64_t next = next_source_->InsertIndex();
- bhiksha_.WriteNext(base_, at_pointer, insert_index_, next);
- ++insert_index_;
- return ret;
-}
-
-template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Find(WordIndex word, NodeRange &range, uint64_t &pointer) const {
- uint64_t at_pointer;
- if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) {
- return util::BitAddress(NULL, 0);
- }
- pointer = at_pointer;
- at_pointer *= total_bits_;
- at_pointer += word_bits_;
- bhiksha_.ReadNext(base_, at_pointer + quant_bits_, pointer, total_bits_, range);
-
- return util::BitAddress(base_, at_pointer);
-}
-
-template <class Bhiksha> void BitPackedMiddle<Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) {
- // Write at insert_index. . .
- uint64_t last_next_write = insert_index_ * total_bits_ +
- // at the offset where the next pointers are stored.
- (total_bits_ - bhiksha_.InlineBits());
- bhiksha_.WriteNext(base_, last_next_write, insert_index_, next_end);
- bhiksha_.FinishedLoading(config);
-}
-
-util::BitAddress BitPackedLongest::Insert(WordIndex index) {
- assert(index <= word_mask_);
- uint64_t at_pointer = insert_index_ * total_bits_;
- util::WriteInt57(base_, at_pointer, word_bits_, index);
- at_pointer += word_bits_;
- ++insert_index_;
- return util::BitAddress(base_, at_pointer);
-}
-
-util::BitAddress BitPackedLongest::Find(WordIndex word, const NodeRange &range) const {
- uint64_t at_pointer;
- if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) return util::BitAddress(NULL, 0);
- at_pointer = at_pointer * total_bits_ + word_bits_;
- return util::BitAddress(base_, at_pointer);
-}
-
-template class BitPackedMiddle<DontBhiksha>;
-template class BitPackedMiddle<ArrayBhiksha>;
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/trie.hh b/src/joshua/decoder/ff/lm/kenlm/lm/trie.hh
deleted file mode 100644
index cd39298..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/trie.hh
+++ /dev/null
@@ -1,146 +0,0 @@
-#ifndef LM_TRIE_H
-#define LM_TRIE_H
-
-#include "lm/weights.hh"
-#include "lm/word_index.hh"
-#include "util/bit_packing.hh"
-
-#include <cstddef>
-
-#include <stdint.h>
-
-namespace lm {
-namespace ngram {
-struct Config;
-namespace trie {
-
-struct NodeRange {
- uint64_t begin, end;
-};
-
-// TODO: if the number of unigrams is a concern, also bit pack these records.
-struct UnigramValue {
- ProbBackoff weights;
- uint64_t next;
- uint64_t Next() const { return next; }
-};
-
-class UnigramPointer {
- public:
- explicit UnigramPointer(const ProbBackoff &to) : to_(&to) {}
-
- UnigramPointer() : to_(NULL) {}
-
- bool Found() const { return to_ != NULL; }
-
- float Prob() const { return to_->prob; }
- float Backoff() const { return to_->backoff; }
- float Rest() const { return Prob(); }
-
- private:
- const ProbBackoff *to_;
-};
-
-class Unigram {
- public:
- Unigram() {}
-
- void Init(void *start) {
- unigram_ = static_cast<UnigramValue*>(start);
- }
-
- static uint64_t Size(uint64_t count) {
- // +1 in case unknown doesn't appear. +1 for the final next.
- return (count + 2) * sizeof(UnigramValue);
- }
-
- const ProbBackoff &Lookup(WordIndex index) const { return unigram_[index].weights; }
-
- ProbBackoff &Unknown() { return unigram_[0].weights; }
-
- UnigramValue *Raw() {
- return unigram_;
- }
-
- UnigramPointer Find(WordIndex word, NodeRange &next) const {
- UnigramValue *val = unigram_ + word;
- next.begin = val->next;
- next.end = (val+1)->next;
- return UnigramPointer(val->weights);
- }
-
- private:
- UnigramValue *unigram_;
-};
-
-class BitPacked {
- public:
- BitPacked() {}
-
- uint64_t InsertIndex() const {
- return insert_index_;
- }
-
- protected:
- static uint64_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits);
-
- void BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits);
-
- uint8_t word_bits_;
- uint8_t total_bits_;
- uint64_t word_mask_;
-
- uint8_t *base_;
-
- uint64_t insert_index_, max_vocab_;
-};
-
-template <class Bhiksha> class BitPackedMiddle : public BitPacked {
- public:
- static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config);
-
- // next_source need not be initialized.
- BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config);
-
- util::BitAddress Insert(WordIndex word);
-
- void FinishedLoading(uint64_t next_end, const Config &config);
-
- util::BitAddress Find(WordIndex word, NodeRange &range, uint64_t &pointer) const;
-
- util::BitAddress ReadEntry(uint64_t pointer, NodeRange &range) {
- uint64_t addr = pointer * total_bits_;
- addr += word_bits_;
- bhiksha_.ReadNext(base_, addr + quant_bits_, pointer, total_bits_, range);
- return util::BitAddress(base_, addr);
- }
-
- private:
- uint8_t quant_bits_;
- Bhiksha bhiksha_;
-
- const BitPacked *next_source_;
-};
-
-class BitPackedLongest : public BitPacked {
- public:
- static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) {
- return BaseSize(entries, max_vocab, quant_bits);
- }
-
- BitPackedLongest() {}
-
- void Init(void *base, uint8_t quant_bits, uint64_t max_vocab) {
- BaseInit(base, max_vocab, quant_bits);
- }
-
- util::BitAddress Insert(WordIndex word);
-
- util::BitAddress Find(WordIndex word, const NodeRange &node) const;
-};
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_TRIE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/trie_sort.cc b/src/joshua/decoder/ff/lm/kenlm/lm/trie_sort.cc
deleted file mode 100644
index c3f4687..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/trie_sort.cc
+++ /dev/null
@@ -1,304 +0,0 @@
-#include "lm/trie_sort.hh"
-
-#include "lm/config.hh"
-#include "lm/lm_exception.hh"
-#include "lm/read_arpa.hh"
-#include "lm/vocab.hh"
-#include "lm/weights.hh"
-#include "lm/word_index.hh"
-#include "util/file_piece.hh"
-#include "util/mmap.hh"
-#include "util/proxy_iterator.hh"
-#include "util/sized_iterator.hh"
-
-#include <algorithm>
-#include <cstring>
-#include <cstdio>
-#include <cstdlib>
-#include <deque>
-#include <iterator>
-#include <limits>
-#include <vector>
-
-namespace lm {
-namespace ngram {
-namespace trie {
-namespace {
-
-typedef util::SizedIterator NGramIter;
-
-// Proxy for an entry except there is some extra cruft between the entries. This is used to sort (n-1)-grams using the same memory as the sorted n-grams.
-class PartialViewProxy {
- public:
- PartialViewProxy() : attention_size_(0), inner_() {}
-
- PartialViewProxy(void *ptr, std::size_t block_size, std::size_t attention_size) : attention_size_(attention_size), inner_(ptr, block_size) {}
-
- operator std::string() const {
- return std::string(reinterpret_cast<const char*>(inner_.Data()), attention_size_);
- }
-
- PartialViewProxy &operator=(const PartialViewProxy &from) {
- memcpy(inner_.Data(), from.inner_.Data(), attention_size_);
- return *this;
- }
-
- PartialViewProxy &operator=(const std::string &from) {
- memcpy(inner_.Data(), from.data(), attention_size_);
- return *this;
- }
-
- const void *Data() const { return inner_.Data(); }
- void *Data() { return inner_.Data(); }
-
- friend void swap(PartialViewProxy first, PartialViewProxy second) {
- std::swap_ranges(reinterpret_cast<char*>(first.Data()), reinterpret_cast<char*>(first.Data()) + first.attention_size_, reinterpret_cast<char*>(second.Data()));
- }
-
- private:
- friend class util::ProxyIterator<PartialViewProxy>;
-
- typedef std::string value_type;
-
- const std::size_t attention_size_;
-
- typedef util::SizedInnerIterator InnerIterator;
- InnerIterator &Inner() { return inner_; }
- const InnerIterator &Inner() const { return inner_; }
- InnerIterator inner_;
-};
-
-typedef util::ProxyIterator<PartialViewProxy> PartialIter;
-
-FILE *DiskFlush(const void *mem_begin, const void *mem_end, const std::string &temp_prefix) {
- util::scoped_fd file(util::MakeTemp(temp_prefix));
- util::WriteOrThrow(file.get(), mem_begin, (uint8_t*)mem_end - (uint8_t*)mem_begin);
- return util::FDOpenOrThrow(file);
-}
-
-FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &temp_prefix, std::size_t entry_size, unsigned char order) {
- const size_t context_size = sizeof(WordIndex) * (order - 1);
- // Sort just the contexts using the same memory.
- PartialIter context_begin(PartialViewProxy(begin + sizeof(WordIndex), entry_size, context_size));
- PartialIter context_end(PartialViewProxy(end + sizeof(WordIndex), entry_size, context_size));
-
-#if defined(_WIN32) || defined(_WIN64)
- std::stable_sort
-#else
- std::sort
-#endif
- (context_begin, context_end, util::SizedCompare<EntryCompare, PartialViewProxy>(EntryCompare(order - 1)));
-
- util::scoped_FILE out(util::FMakeTemp(temp_prefix));
-
- // Write out to file and uniqueify at the same time. Could have used unique_copy if there was an appropriate OutputIterator.
- if (context_begin == context_end) return out.release();
- PartialIter i(context_begin);
- util::WriteOrThrow(out.get(), i->Data(), context_size);
- const void *previous = i->Data();
- ++i;
- for (; i != context_end; ++i) {
- if (memcmp(previous, i->Data(), context_size)) {
- util::WriteOrThrow(out.get(), i->Data(), context_size);
- previous = i->Data();
- }
- }
- return out.release();
-}
-
-struct ThrowCombine {
- void operator()(std::size_t entry_size, unsigned char order, const void *first, const void *second, FILE * /*out*/) const {
- const WordIndex *base = reinterpret_cast<const WordIndex*>(first);
- FormatLoadException e;
- e << "Duplicate n-gram detected with vocab ids";
- for (const WordIndex *i = base; i != base + order; ++i) {
- e << ' ' << *i;
- }
- throw e;
- }
-};
-
-// Useful for context files that just contain records with no value.
-struct FirstCombine {
- void operator()(std::size_t entry_size, unsigned char /*order*/, const void *first, const void * /*second*/, FILE *out) const {
- util::WriteOrThrow(out, first, entry_size);
- }
-};
-
-template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_file, const std::string &temp_prefix, std::size_t weights_size, unsigned char order, const Combine &combine) {
- std::size_t entry_size = sizeof(WordIndex) * order + weights_size;
- RecordReader first, second;
- first.Init(first_file, entry_size);
- second.Init(second_file, entry_size);
- util::scoped_FILE out_file(util::FMakeTemp(temp_prefix));
- EntryCompare less(order);
- while (first && second) {
- if (less(first.Data(), second.Data())) {
- util::WriteOrThrow(out_file.get(), first.Data(), entry_size);
- ++first;
- } else if (less(second.Data(), first.Data())) {
- util::WriteOrThrow(out_file.get(), second.Data(), entry_size);
- ++second;
- } else {
- combine(entry_size, order, first.Data(), second.Data(), out_file.get());
- ++first; ++second;
- }
- }
- for (RecordReader &remains = (first ? first : second); remains; ++remains) {
- util::WriteOrThrow(out_file.get(), remains.Data(), entry_size);
- }
- return out_file.release();
-}
-
-} // namespace
-
-void RecordReader::Init(FILE *file, std::size_t entry_size) {
- entry_size_ = entry_size;
- data_.reset(malloc(entry_size));
- UTIL_THROW_IF(!data_.get(), util::ErrnoException, "Failed to malloc read buffer");
- file_ = file;
- if (file) {
- rewind(file);
- remains_ = true;
- ++*this;
- } else {
- remains_ = false;
- }
-}
-
-void RecordReader::Overwrite(const void *start, std::size_t amount) {
- long internal = (uint8_t*)start - (uint8_t*)data_.get();
- UTIL_THROW_IF(fseek(file_, internal - entry_size_, SEEK_CUR), util::ErrnoException, "Couldn't seek backwards for revision");
- util::WriteOrThrow(file_, start, amount);
- long forward = entry_size_ - internal - amount;
-#if !defined(_WIN32) && !defined(_WIN64)
- if (forward)
-#endif
- UTIL_THROW_IF(fseek(file_, forward, SEEK_CUR), util::ErrnoException, "Couldn't seek forwards past revision");
-}
-
-void RecordReader::Rewind() {
- if (file_) {
- rewind(file_);
- remains_ = true;
- ++*this;
- } else {
- remains_ = false;
- }
-}
-
-SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab) {
- PositiveProbWarn warn(config.positive_log_probability);
- unigram_.reset(util::MakeTemp(file_prefix));
- {
- // In case <unk> appears.
- size_t size_out = (counts[0] + 1) * sizeof(ProbBackoff);
- util::scoped_mmap unigram_mmap(util::MapZeroedWrite(unigram_.get(), size_out), size_out);
- Read1Grams(f, counts[0], vocab, reinterpret_cast<ProbBackoff*>(unigram_mmap.get()), warn);
- CheckSpecials(config, vocab);
- if (!vocab.SawUnk()) ++counts[0];
- }
-
- // Only use as much buffer as we need.
- size_t buffer_use = 0;
- for (unsigned int order = 2; order < counts.size(); ++order) {
- buffer_use = std::max<size_t>(buffer_use, static_cast<size_t>((sizeof(WordIndex) * order + 2 * sizeof(float)) * counts[order - 1]));
- }
- buffer_use = std::max<size_t>(buffer_use, static_cast<size_t>((sizeof(WordIndex) * counts.size() + sizeof(float)) * counts.back()));
- buffer = std::min<size_t>(buffer, buffer_use);
-
- util::scoped_malloc mem;
- mem.reset(malloc(buffer));
- if (!mem.get()) UTIL_THROW(util::ErrnoException, "malloc failed for sort buffer size " << buffer);
-
- for (unsigned char order = 2; order <= counts.size(); ++order) {
- ConvertToSorted(f, vocab, counts, file_prefix, order, warn, mem.get(), buffer);
- }
- ReadEnd(f);
-}
-
-namespace {
-class Closer {
- public:
- explicit Closer(std::deque<FILE*> &files) : files_(files) {}
-
- ~Closer() {
- for (std::deque<FILE*>::iterator i = files_.begin(); i != files_.end(); ++i) {
- util::scoped_FILE deleter(*i);
- }
- }
-
- void PopFront() {
- util::scoped_FILE deleter(files_.front());
- files_.pop_front();
- }
- private:
- std::deque<FILE*> &files_;
-};
-} // namespace
-
-void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &file_prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size) {
- ReadNGramHeader(f, order);
- const size_t count = counts[order - 1];
- // Size of weights. Does it include backoff?
- const size_t words_size = sizeof(WordIndex) * order;
- const size_t weights_size = sizeof(float) + ((order == counts.size()) ? 0 : sizeof(float));
- const size_t entry_size = words_size + weights_size;
- const size_t batch_size = std::min(count, mem_size / entry_size);
- uint8_t *const begin = reinterpret_cast<uint8_t*>(mem);
-
- std::deque<FILE*> files, contexts;
- Closer files_closer(files), contexts_closer(contexts);
-
- for (std::size_t batch = 0, done = 0; done < count; ++batch) {
- uint8_t *out = begin;
- uint8_t *out_end = out + std::min(count - done, batch_size) * entry_size;
- if (order == counts.size()) {
- for (; out != out_end; out += entry_size) {
- std::reverse_iterator<WordIndex*> it(reinterpret_cast<WordIndex*>(out) + order);
- ReadNGram(f, order, vocab, it, *reinterpret_cast<Prob*>(out + words_size), warn);
- }
- } else {
- for (; out != out_end; out += entry_size) {
- std::reverse_iterator<WordIndex*> it(reinterpret_cast<WordIndex*>(out) + order);
- ReadNGram(f, order, vocab, it, *reinterpret_cast<ProbBackoff*>(out + words_size), warn);
- }
- }
- // Sort full records by full n-gram.
- util::SizedProxy proxy_begin(begin, entry_size), proxy_end(out_end, entry_size);
- // parallel_sort uses too much RAM. TODO: figure out why windows sort doesn't like my proxies.
-#if defined(_WIN32) || defined(_WIN64)
- std::stable_sort
-#else
- std::sort
-#endif
- (NGramIter(proxy_begin), NGramIter(proxy_end), util::SizedCompare<EntryCompare>(EntryCompare(order)));
- files.push_back(DiskFlush(begin, out_end, file_prefix));
- contexts.push_back(WriteContextFile(begin, out_end, file_prefix, entry_size, order));
-
- done += (out_end - begin) / entry_size;
- }
-
- // All individual files created. Merge them.
-
- while (files.size() > 1) {
- files.push_back(MergeSortedFiles(files[0], files[1], file_prefix, weights_size, order, ThrowCombine()));
- files_closer.PopFront();
- files_closer.PopFront();
- contexts.push_back(MergeSortedFiles(contexts[0], contexts[1], file_prefix, 0, order - 1, FirstCombine()));
- contexts_closer.PopFront();
- contexts_closer.PopFront();
- }
-
- if (!files.empty()) {
- // Steal from closers.
- full_[order - 2].reset(files.front());
- files.pop_front();
- context_[order - 2].reset(contexts.front());
- contexts.pop_front();
- }
-}
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/trie_sort.hh b/src/joshua/decoder/ff/lm/kenlm/lm/trie_sort.hh
deleted file mode 100644
index e5406d9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/trie_sort.hh
+++ /dev/null
@@ -1,114 +0,0 @@
-// Step of trie builder: create sorted files.
-
-#ifndef LM_TRIE_SORT_H
-#define LM_TRIE_SORT_H
-
-#include "lm/max_order.hh"
-#include "lm/word_index.hh"
-
-#include "util/file.hh"
-#include "util/scoped.hh"
-
-#include <cstddef>
-#include <functional>
-#include <string>
-#include <vector>
-
-#include <stdint.h>
-
-namespace util {
-class FilePiece;
-} // namespace util
-
-namespace lm {
-class PositiveProbWarn;
-namespace ngram {
-class SortedVocabulary;
-struct Config;
-
-namespace trie {
-
-class EntryCompare : public std::binary_function<const void*, const void*, bool> {
- public:
- explicit EntryCompare(unsigned char order) : order_(order) {}
-
- bool operator()(const void *first_void, const void *second_void) const {
- const WordIndex *first = static_cast<const WordIndex*>(first_void);
- const WordIndex *second = static_cast<const WordIndex*>(second_void);
- const WordIndex *end = first + order_;
- for (; first != end; ++first, ++second) {
- if (*first < *second) return true;
- if (*first > *second) return false;
- }
- return false;
- }
- private:
- unsigned char order_;
-};
-
-class RecordReader {
- public:
- RecordReader() : remains_(true) {}
-
- void Init(FILE *file, std::size_t entry_size);
-
- void *Data() { return data_.get(); }
- const void *Data() const { return data_.get(); }
-
- RecordReader &operator++() {
- std::size_t ret = fread(data_.get(), entry_size_, 1, file_);
- if (!ret) {
- UTIL_THROW_IF(!feof(file_), util::ErrnoException, "Error reading temporary file");
- remains_ = false;
- }
- return *this;
- }
-
- operator bool() const { return remains_; }
-
- void Rewind();
-
- std::size_t EntrySize() const { return entry_size_; }
-
- void Overwrite(const void *start, std::size_t amount);
-
- private:
- FILE *file_;
-
- util::scoped_malloc data_;
-
- bool remains_;
-
- std::size_t entry_size_;
-};
-
-class SortedFiles {
- public:
- // Build from ARPA
- SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, std::size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab);
-
- int StealUnigram() {
- return unigram_.release();
- }
-
- FILE *Full(unsigned char order) {
- return full_[order - 2].get();
- }
-
- FILE *Context(unsigned char of_order) {
- return context_[of_order - 2].get();
- }
-
- private:
- void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size);
-
- util::scoped_fd unigram_;
-
- util::scoped_FILE full_[KENLM_MAX_ORDER - 1], context_[KENLM_MAX_ORDER - 1];
-};
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_TRIE_SORT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/value.hh b/src/joshua/decoder/ff/lm/kenlm/lm/value.hh
deleted file mode 100644
index 36e8708..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/value.hh
+++ /dev/null
@@ -1,157 +0,0 @@
-#ifndef LM_VALUE_H
-#define LM_VALUE_H
-
-#include "lm/model_type.hh"
-#include "lm/value_build.hh"
-#include "lm/weights.hh"
-#include "util/bit_packing.hh"
-
-#include <stdint.h>
-
-namespace lm {
-namespace ngram {
-
-// Template proxy for probing unigrams and middle.
-template <class Weights> class GenericProbingProxy {
- public:
- explicit GenericProbingProxy(const Weights &to) : to_(&to) {}
-
- GenericProbingProxy() : to_(0) {}
-
- bool Found() const { return to_ != 0; }
-
- float Prob() const {
- util::FloatEnc enc;
- enc.f = to_->prob;
- enc.i |= util::kSignBit;
- return enc.f;
- }
-
- float Backoff() const { return to_->backoff; }
-
- bool IndependentLeft() const {
- util::FloatEnc enc;
- enc.f = to_->prob;
- return enc.i & util::kSignBit;
- }
-
- protected:
- const Weights *to_;
-};
-
-// Basic proxy for trie unigrams.
-template <class Weights> class GenericTrieUnigramProxy {
- public:
- explicit GenericTrieUnigramProxy(const Weights &to) : to_(&to) {}
-
- GenericTrieUnigramProxy() : to_(0) {}
-
- bool Found() const { return to_ != 0; }
- float Prob() const { return to_->prob; }
- float Backoff() const { return to_->backoff; }
- float Rest() const { return Prob(); }
-
- protected:
- const Weights *to_;
-};
-
-struct BackoffValue {
- typedef ProbBackoff Weights;
- static const ModelType kProbingModelType = PROBING;
-
- class ProbingProxy : public GenericProbingProxy<Weights> {
- public:
- explicit ProbingProxy(const Weights &to) : GenericProbingProxy<Weights>(to) {}
- ProbingProxy() {}
- float Rest() const { return Prob(); }
- };
-
- class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
- public:
- explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {}
- TrieUnigramProxy() {}
- float Rest() const { return Prob(); }
- };
-
- struct ProbingEntry {
- typedef uint64_t Key;
- typedef Weights Value;
- uint64_t key;
- ProbBackoff value;
- uint64_t GetKey() const { return key; }
- };
-
- struct TrieUnigramValue {
- Weights weights;
- uint64_t next;
- uint64_t Next() const { return next; }
- };
-
- const static bool kDifferentRest = false;
-
- template <class Model, class C> void Callback(const Config &, unsigned int, typename Model::Vocabulary &, C &callback) {
- NoRestBuild build;
- callback(build);
- }
-};
-
-struct RestValue {
- typedef RestWeights Weights;
- static const ModelType kProbingModelType = REST_PROBING;
-
- class ProbingProxy : public GenericProbingProxy<RestWeights> {
- public:
- explicit ProbingProxy(const Weights &to) : GenericProbingProxy<RestWeights>(to) {}
- ProbingProxy() {}
- float Rest() const { return to_->rest; }
- };
-
- class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
- public:
- explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {}
- TrieUnigramProxy() {}
- float Rest() const { return to_->rest; }
- };
-
-// gcc 4.1 doesn't properly back dependent types :-(.
-#pragma pack(push)
-#pragma pack(4)
- struct ProbingEntry {
- typedef uint64_t Key;
- typedef Weights Value;
- Key key;
- Value value;
- Key GetKey() const { return key; }
- };
-
- struct TrieUnigramValue {
- Weights weights;
- uint64_t next;
- uint64_t Next() const { return next; }
- };
-#pragma pack(pop)
-
- const static bool kDifferentRest = true;
-
- template <class Model, class C> void Callback(const Config &config, unsigned int order, typename Model::Vocabulary &vocab, C &callback) {
- switch (config.rest_function) {
- case Config::REST_MAX:
- {
- MaxRestBuild build;
- callback(build);
- }
- break;
- case Config::REST_LOWER:
- {
- LowerRestBuild<Model> build(config, order, vocab);
- callback(build);
- }
- break;
- }
- }
-};
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_VALUE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/value_build.cc b/src/joshua/decoder/ff/lm/kenlm/lm/value_build.cc
deleted file mode 100644
index 3ec3dce..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/value_build.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "lm/value_build.hh"
-
-#include "lm/model.hh"
-#include "lm/read_arpa.hh"
-
-namespace lm {
-namespace ngram {
-
-template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
- UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
- Config for_lower = config;
- for_lower.write_mmap = NULL;
- for_lower.rest_lower_files.clear();
-
- // Unigram models aren't supported, so this is a custom loader.
- // TODO: optimize the unigram loading?
- {
- util::FilePiece uni(config.rest_lower_files[0].c_str());
- std::vector<uint64_t> number;
- ReadARPACounts(uni, number);
- UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
- ReadNGramHeader(uni, 1);
- unigrams_.resize(number[0]);
- unigrams_[0] = config.unknown_missing_logprob;
- PositiveProbWarn warn;
- for (uint64_t i = 0; i < number[0]; ++i) {
- WordIndex w;
- Prob entry;
- ReadNGram(uni, 1, vocab, &w, entry, warn);
- unigrams_[w] = entry.prob;
- }
- }
-
- try {
- for (unsigned int i = 2; i < order; ++i) {
- models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
- UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
- }
- } catch (...) {
- for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
- delete *i;
- }
- models_.clear();
- throw;
- }
-
- // TODO: force/check same vocab.
-}
-
-template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
- for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
- delete *i;
- }
-}
-
-template class LowerRestBuild<ProbingModel>;
-
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/value_build.hh b/src/joshua/decoder/ff/lm/kenlm/lm/value_build.hh
deleted file mode 100644
index 6fd26ef..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/value_build.hh
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifndef LM_VALUE_BUILD_H
-#define LM_VALUE_BUILD_H
-
-#include "lm/weights.hh"
-#include "lm/word_index.hh"
-#include "util/bit_packing.hh"
-
-#include <vector>
-
-namespace lm {
-namespace ngram {
-
-struct Config;
-struct BackoffValue;
-struct RestValue;
-
-class NoRestBuild {
- public:
- typedef BackoffValue Value;
-
- NoRestBuild() {}
-
- void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
- void SetRest(const WordIndex *, unsigned int, const ProbBackoff &) const {}
-
- template <class Second> bool MarkExtends(ProbBackoff &weights, const Second &) const {
- util::UnsetSign(weights.prob);
- return false;
- }
-
- // Probing doesn't need to go back to unigram.
- const static bool kMarkEvenLower = false;
-};
-
-class MaxRestBuild {
- public:
- typedef RestValue Value;
-
- MaxRestBuild() {}
-
- void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
- void SetRest(const WordIndex *, unsigned int, RestWeights &weights) const {
- weights.rest = weights.prob;
- util::SetSign(weights.rest);
- }
-
- bool MarkExtends(RestWeights &weights, const RestWeights &to) const {
- util::UnsetSign(weights.prob);
- if (weights.rest >= to.rest) return false;
- weights.rest = to.rest;
- return true;
- }
- bool MarkExtends(RestWeights &weights, const Prob &to) const {
- util::UnsetSign(weights.prob);
- if (weights.rest >= to.prob) return false;
- weights.rest = to.prob;
- return true;
- }
-
- // Probing does need to go back to unigram.
- const static bool kMarkEvenLower = true;
-};
-
-template <class Model> class LowerRestBuild {
- public:
- typedef RestValue Value;
-
- LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab);
-
- ~LowerRestBuild();
-
- void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
- void SetRest(const WordIndex *vocab_ids, unsigned int n, RestWeights &weights) const {
- typename Model::State ignored;
- if (n == 1) {
- weights.rest = unigrams_[*vocab_ids];
- } else {
- weights.rest = models_[n-2]->FullScoreForgotState(vocab_ids + 1, vocab_ids + n, *vocab_ids, ignored).prob;
- }
- }
-
- template <class Second> bool MarkExtends(RestWeights &weights, const Second &) const {
- util::UnsetSign(weights.prob);
- return false;
- }
-
- const static bool kMarkEvenLower = false;
-
- std::vector<float> unigrams_;
-
- std::vector<const Model*> models_;
-};
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_VALUE_BUILD_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/virtual_interface.hh b/src/joshua/decoder/ff/lm/kenlm/lm/virtual_interface.hh
deleted file mode 100644
index 2a2690e..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/virtual_interface.hh
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef LM_VIRTUAL_INTERFACE_H
-#define LM_VIRTUAL_INTERFACE_H
-
-#include "lm/return.hh"
-#include "lm/word_index.hh"
-#include "util/string_piece.hh"
-
-#include <string>
-#include <string.h>
-
-namespace lm {
-namespace base {
-
-template <class T, class U, class V> class ModelFacade;
-
-/* Vocabulary interface. Call Index(string) and get a word index for use in
- * calling Model. It provides faster convenience functions for <s>, </s>, and
- * <unk> although you can also find these using Index.
- *
- * Some models do not load the mapping from index to string. If you need this,
- * check if the model Vocabulary class implements such a function and access it
- * directly.
- *
- * The Vocabulary object is always owned by the Model and can be retrieved from
- * the Model using BaseVocabulary() for this abstract interface or
- * GetVocabulary() for the actual implementation (in which case you'll need the
- * actual implementation of the Model too).
- */
-class Vocabulary {
- public:
- virtual ~Vocabulary();
-
- WordIndex BeginSentence() const { return begin_sentence_; }
- WordIndex EndSentence() const { return end_sentence_; }
- WordIndex NotFound() const { return not_found_; }
-
- /* Most implementations allow StringPiece lookups and need only override
- * Index(StringPiece). SRI requires null termination and overrides all
- * three methods.
- */
- virtual WordIndex Index(const StringPiece &str) const = 0;
- virtual WordIndex Index(const std::string &str) const {
- return Index(StringPiece(str));
- }
- virtual WordIndex Index(const char *str) const {
- return Index(StringPiece(str));
- }
-
- protected:
- // Call SetSpecial afterward.
- Vocabulary() {}
-
- Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) {
- SetSpecial(begin_sentence, end_sentence, not_found);
- }
-
- void SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found);
-
- WordIndex begin_sentence_, end_sentence_, not_found_;
-
- private:
- // Disable copy constructors. They're private and undefined.
- // Ersatz boost::noncopyable.
- Vocabulary(const Vocabulary &);
- Vocabulary &operator=(const Vocabulary &);
-};
-
-/* There are two ways to access a Model.
- *
- *
- * OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh).
- *
- * Every Model implements the scoring function:
- * float Score(
- * const Model::State &in_state,
- * const WordIndex new_word,
- * Model::State &out_state) const;
- *
- * It can also return the length of n-gram matched by the model:
- * FullScoreReturn FullScore(
- * const Model::State &in_state,
- * const WordIndex new_word,
- * Model::State &out_state) const;
- *
- *
- * There are also accessor functions:
- * const State &BeginSentenceState() const;
- * const State &NullContextState() const;
- * const Vocabulary &GetVocabulary() const;
- * unsigned int Order() const;
- *
- * NB: In case you're wondering why the model implementation looks like it's
- * missing these methods, see facade.hh.
- *
- * This is the fastest way to use a model and presents a normal State class to
- * be included in a hypothesis state structure.
- *
- *
- * OPTION 2: Use the virtual interface below.
- *
- * The virtual interface allow you to decide which Model to use at runtime
- * without templatizing everything on the Model type. However, each Model has
- * its own State class, so a single State cannot be efficiently provided (it
- * would require using the maximum memory of any Model's State or memory
- * allocation with each lookup). This means you become responsible for
- * allocating memory with size StateSize() and passing it to the Score or
- * FullScore functions provided here.
- *
- * For example, cdec has a std::string containing the entire state of a
- * hypothesis. It can reserve StateSize bytes in this string for the model
- * state.
- *
- * All the State objects are POD, so it's ok to use raw memory for storing
- * State.
- * in_state and out_state must not have the same address.
- */
-class Model {
- public:
- virtual ~Model();
-
- size_t StateSize() const { return state_size_; }
- const void *BeginSentenceMemory() const { return begin_sentence_memory_; }
- void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); }
- const void *NullContextMemory() const { return null_context_memory_; }
- void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); }
-
- // Requires in_state != out_state
- virtual float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0;
-
- // Requires in_state != out_state
- virtual FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0;
-
- // Prefer to use FullScore. The context words should be provided in reverse order.
- virtual FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const = 0;
-
- unsigned char Order() const { return order_; }
-
- const Vocabulary &BaseVocabulary() const { return *base_vocab_; }
-
- private:
- template <class T, class U, class V> friend class ModelFacade;
- explicit Model(size_t state_size) : state_size_(state_size) {}
-
- const size_t state_size_;
- const void *begin_sentence_memory_, *null_context_memory_;
-
- const Vocabulary *base_vocab_;
-
- unsigned char order_;
-
- // Disable copy constructors. They're private and undefined.
- // Ersatz boost::noncopyable.
- Model(const Model &);
- Model &operator=(const Model &);
-};
-
-} // mamespace base
-} // namespace lm
-
-#endif // LM_VIRTUAL_INTERFACE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/vocab.cc b/src/joshua/decoder/ff/lm/kenlm/lm/vocab.cc
deleted file mode 100644
index 2285d27..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/vocab.cc
+++ /dev/null
@@ -1,252 +0,0 @@
-#include "lm/vocab.hh"
-
-#include "lm/binary_format.hh"
-#include "lm/enumerate_vocab.hh"
-#include "lm/lm_exception.hh"
-#include "lm/config.hh"
-#include "lm/weights.hh"
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/joint_sort.hh"
-#include "util/murmur_hash.hh"
-#include "util/probing_hash_table.hh"
-
-#include <string>
-
-#include <string.h>
-
-namespace lm {
-namespace ngram {
-
-namespace detail {
-uint64_t HashForVocab(const char *str, std::size_t len) {
- // This proved faster than Boost's hash in speed trials: total load time Murmur 67090000, Boost 72210000
- // Chose to use 64A instead of native so binary format will be portable across 64 and 32 bit.
- return util::MurmurHash64A(str, len, 0);
-}
-} // namespace detail
-
-namespace {
-// Normally static initialization is a bad idea but MurmurHash is pure arithmetic, so this is ok.
-const uint64_t kUnknownHash = detail::HashForVocab("<unk>", 5);
-// Sadly some LMs have <UNK>.
-const uint64_t kUnknownCapHash = detail::HashForVocab("<UNK>", 5);
-
-void ReadWords(int fd, EnumerateVocab *enumerate, WordIndex expected_count, uint64_t offset) {
- util::SeekOrThrow(fd, offset);
- // Check that we're at the right place by reading <unk> which is always first.
- char check_unk[6];
- util::ReadOrThrow(fd, check_unk, 6);
- UTIL_THROW_IF(
- memcmp(check_unk, "<unk>", 6),
- FormatLoadException,
- "Vocabulary words are in the wrong place. This could be because the binary file was built with stale gcc and old kenlm. Stale gcc, including the gcc distributed with RedHat and OS X, has a bug that ignores pragma pack for template-dependent types. New kenlm works around this, so you'll save memory but have to rebuild any binary files using the probing data structure.");
- if (!enumerate) return;
- enumerate->Add(0, "<unk>");
-
- // Read all the words after unk.
- const std::size_t kInitialRead = 16384;
- std::string buf;
- buf.reserve(kInitialRead + 100);
- buf.resize(kInitialRead);
- WordIndex index = 1; // Read <unk> already.
- while (true) {
- std::size_t got = util::ReadOrEOF(fd, &buf[0], kInitialRead);
- if (got == 0) break;
- buf.resize(got);
- while (buf[buf.size() - 1]) {
- char next_char;
- util::ReadOrThrow(fd, &next_char, 1);
- buf.push_back(next_char);
- }
- // Ok now we have null terminated strings.
- for (const char *i = buf.data(); i != buf.data() + buf.size();) {
- std::size_t length = strlen(i);
- enumerate->Add(index++, StringPiece(i, length));
- i += length + 1 /* null byte */;
- }
- }
-
- UTIL_THROW_IF(expected_count != index, FormatLoadException, "The binary file has the wrong number of words at the end. This could be caused by a truncated binary file.");
-}
-
-} // namespace
-
-WriteWordsWrapper::WriteWordsWrapper(EnumerateVocab *inner) : inner_(inner) {}
-WriteWordsWrapper::~WriteWordsWrapper() {}
-
-void WriteWordsWrapper::Add(WordIndex index, const StringPiece &str) {
- if (inner_) inner_->Add(index, str);
- buffer_.append(str.data(), str.size());
- buffer_.push_back(0);
-}
-
-SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {}
-
-uint64_t SortedVocabulary::Size(uint64_t entries, const Config &/*config*/) {
- // Lead with the number of entries.
- return sizeof(uint64_t) + sizeof(uint64_t) * entries;
-}
-
-void SortedVocabulary::SetupMemory(void *start, std::size_t allocated, std::size_t entries, const Config &config) {
- assert(allocated >= Size(entries, config));
- // Leave space for number of entries.
- begin_ = reinterpret_cast<uint64_t*>(start) + 1;
- end_ = begin_;
- saw_unk_ = false;
-}
-
-void SortedVocabulary::Relocate(void *new_start) {
- std::size_t delta = end_ - begin_;
- begin_ = reinterpret_cast<uint64_t*>(new_start) + 1;
- end_ = begin_ + delta;
-}
-
-void SortedVocabulary::ConfigureEnumerate(EnumerateVocab *to, std::size_t max_entries) {
- enumerate_ = to;
- if (enumerate_) {
- enumerate_->Add(0, "<unk>");
- strings_to_enumerate_.resize(max_entries);
- }
-}
-
-WordIndex SortedVocabulary::Insert(const StringPiece &str) {
- uint64_t hashed = detail::HashForVocab(str);
- if (hashed == kUnknownHash || hashed == kUnknownCapHash) {
- saw_unk_ = true;
- return 0;
- }
- *end_ = hashed;
- if (enumerate_) {
- void *copied = string_backing_.Allocate(str.size());
- memcpy(copied, str.data(), str.size());
- strings_to_enumerate_[end_ - begin_] = StringPiece(static_cast<const char*>(copied), str.size());
- }
- ++end_;
- // This is 1 + the offset where it was inserted to make room for unk.
- return end_ - begin_;
-}
-
-void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) {
- if (enumerate_) {
- if (!strings_to_enumerate_.empty()) {
- util::PairedIterator<ProbBackoff*, StringPiece*> values(reorder_vocab + 1, &*strings_to_enumerate_.begin());
- util::JointSort(begin_, end_, values);
- }
- for (WordIndex i = 0; i < static_cast<WordIndex>(end_ - begin_); ++i) {
- // <unk> strikes again: +1 here.
- enumerate_->Add(i + 1, strings_to_enumerate_[i]);
- }
- strings_to_enumerate_.clear();
- string_backing_.FreeAll();
- } else {
- util::JointSort(begin_, end_, reorder_vocab + 1);
- }
- SetSpecial(Index("<s>"), Index("</s>"), 0);
- // Save size. Excludes UNK.
- *(reinterpret_cast<uint64_t*>(begin_) - 1) = end_ - begin_;
- // Includes UNK.
- bound_ = end_ - begin_ + 1;
-}
-
-void SortedVocabulary::LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset) {
- end_ = begin_ + *(reinterpret_cast<const uint64_t*>(begin_) - 1);
- SetSpecial(Index("<s>"), Index("</s>"), 0);
- bound_ = end_ - begin_ + 1;
- if (have_words) ReadWords(fd, to, bound_, offset);
-}
-
-namespace {
-const unsigned int kProbingVocabularyVersion = 0;
-} // namespace
-
-namespace detail {
-struct ProbingVocabularyHeader {
- // Lowest unused vocab id. This is also the number of words, including <unk>.
- unsigned int version;
- WordIndex bound;
-};
-} // namespace detail
-
-ProbingVocabulary::ProbingVocabulary() : enumerate_(NULL) {}
-
-uint64_t ProbingVocabulary::Size(uint64_t entries, float probing_multiplier) {
- return ALIGN8(sizeof(detail::ProbingVocabularyHeader)) + Lookup::Size(entries, probing_multiplier);
-}
-
-uint64_t ProbingVocabulary::Size(uint64_t entries, const Config &config) {
- return Size(entries, config.probing_multiplier);
-}
-
-void ProbingVocabulary::SetupMemory(void *start, std::size_t allocated) {
- header_ = static_cast<detail::ProbingVocabularyHeader*>(start);
- lookup_ = Lookup(static_cast<uint8_t*>(start) + ALIGN8(sizeof(detail::ProbingVocabularyHeader)), allocated);
- bound_ = 1;
- saw_unk_ = false;
-}
-
-void ProbingVocabulary::Relocate(void *new_start) {
- header_ = static_cast<detail::ProbingVocabularyHeader*>(new_start);
- lookup_.Relocate(static_cast<uint8_t*>(new_start) + ALIGN8(sizeof(detail::ProbingVocabularyHeader)));
-}
-
-void ProbingVocabulary::ConfigureEnumerate(EnumerateVocab *to, std::size_t /*max_entries*/) {
- enumerate_ = to;
- if (enumerate_) {
- enumerate_->Add(0, "<unk>");
- }
-}
-
-WordIndex ProbingVocabulary::Insert(const StringPiece &str) {
- uint64_t hashed = detail::HashForVocab(str);
- // Prevent unknown from going into the table.
- if (hashed == kUnknownHash || hashed == kUnknownCapHash) {
- saw_unk_ = true;
- return 0;
- } else {
- if (enumerate_) enumerate_->Add(bound_, str);
- lookup_.Insert(ProbingVocabularyEntry::Make(hashed, bound_));
- return bound_++;
- }
-}
-
-void ProbingVocabulary::FinishedLoading() {
- lookup_.FinishedInserting();
- header_->bound = bound_;
- header_->version = kProbingVocabularyVersion;
- SetSpecial(Index("<s>"), Index("</s>"), 0);
-}
-
-void ProbingVocabulary::LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset) {
- UTIL_THROW_IF(header_->version != kProbingVocabularyVersion, FormatLoadException, "The binary file has probing version " << header_->version << " but the code expects version " << kProbingVocabularyVersion << ". Please rerun build_binary using the same version of the code.");
- bound_ = header_->bound;
- SetSpecial(Index("<s>"), Index("</s>"), 0);
- if (have_words) ReadWords(fd, to, bound_, offset);
-}
-
-void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {
- switch(config.unknown_missing) {
- case SILENT:
- return;
- case COMPLAIN:
- if (config.messages) *config.messages << "The ARPA file is missing <unk>. Substituting log10 probability " << config.unknown_missing_logprob << "." << std::endl;
- break;
- case THROW_UP:
- UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing <unk> and the model is configured to throw an exception.");
- }
-}
-
-void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {
- switch (config.sentence_marker_missing) {
- case SILENT:
- return;
- case COMPLAIN:
- if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as <unk>.";
- break;
- case THROW_UP:
- UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check.");
- }
-}
-
-} // namespace ngram
-} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/vocab.hh b/src/joshua/decoder/ff/lm/kenlm/lm/vocab.hh
deleted file mode 100644
index d6ae07b..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/vocab.hh
+++ /dev/null
@@ -1,249 +0,0 @@
-#ifndef LM_VOCAB_H
-#define LM_VOCAB_H
-
-#include "lm/enumerate_vocab.hh"
-#include "lm/lm_exception.hh"
-#include "lm/virtual_interface.hh"
-#include "util/fake_ofstream.hh"
-#include "util/murmur_hash.hh"
-#include "util/pool.hh"
-#include "util/probing_hash_table.hh"
-#include "util/sorted_uniform.hh"
-#include "util/string_piece.hh"
-
-#include <limits>
-#include <string>
-#include <vector>
-
-namespace lm {
-struct ProbBackoff;
-class EnumerateVocab;
-
-namespace ngram {
-struct Config;
-
-namespace detail {
-uint64_t HashForVocab(const char *str, std::size_t len);
-inline uint64_t HashForVocab(const StringPiece &str) {
- return HashForVocab(str.data(), str.length());
-}
-struct ProbingVocabularyHeader;
-} // namespace detail
-
-class WriteWordsWrapper : public EnumerateVocab {
- public:
- WriteWordsWrapper(EnumerateVocab *inner);
-
- ~WriteWordsWrapper();
-
- void Add(WordIndex index, const StringPiece &str);
-
- const std::string &Buffer() const { return buffer_; }
-
- private:
- EnumerateVocab *inner_;
-
- std::string buffer_;
-};
-
-// Vocabulary based on sorted uniform find storing only uint64_t values and using their offsets as indices.
-class SortedVocabulary : public base::Vocabulary {
- public:
- SortedVocabulary();
-
- WordIndex Index(const StringPiece &str) const {
- const uint64_t *found;
- if (util::BoundedSortedUniformFind<const uint64_t*, util::IdentityAccessor<uint64_t>, util::Pivot64>(
- util::IdentityAccessor<uint64_t>(),
- begin_ - 1, 0,
- end_, std::numeric_limits<uint64_t>::max(),
- detail::HashForVocab(str), found)) {
- return found - begin_ + 1; // +1 because <unk> is 0 and does not appear in the lookup table.
- } else {
- return 0;
- }
- }
-
- // Size for purposes of file writing
- static uint64_t Size(uint64_t entries, const Config &config);
-
- // Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary.
- WordIndex Bound() const { return bound_; }
-
- // Everything else is for populating. I'm too lazy to hide and friend these, but you'll only get a const reference anyway.
- void SetupMemory(void *start, std::size_t allocated, std::size_t entries, const Config &config);
-
- void Relocate(void *new_start);
-
- void ConfigureEnumerate(EnumerateVocab *to, std::size_t max_entries);
-
- WordIndex Insert(const StringPiece &str);
-
- // Reorders reorder_vocab so that the IDs are sorted.
- void FinishedLoading(ProbBackoff *reorder_vocab);
-
- // Trie stores the correct counts including <unk> in the header. If this was previously sized based on a count exluding <unk>, padding with 8 bytes will make it the correct size based on a count including <unk>.
- std::size_t UnkCountChangePadding() const { return SawUnk() ? 0 : sizeof(uint64_t); }
-
- bool SawUnk() const { return saw_unk_; }
-
- void LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset);
-
- private:
- uint64_t *begin_, *end_;
-
- WordIndex bound_;
-
- bool saw_unk_;
-
- EnumerateVocab *enumerate_;
-
- // Actual strings. Used only when loading from ARPA and enumerate_ != NULL
- util::Pool string_backing_;
-
- std::vector<StringPiece> strings_to_enumerate_;
-};
-
-#pragma pack(push)
-#pragma pack(4)
-struct ProbingVocabularyEntry {
- uint64_t key;
- WordIndex value;
-
- typedef uint64_t Key;
- uint64_t GetKey() const { return key; }
- void SetKey(uint64_t to) { key = to; }
-
- static ProbingVocabularyEntry Make(uint64_t key, WordIndex value) {
- ProbingVocabularyEntry ret;
- ret.key = key;
- ret.value = value;
- return ret;
- }
-};
-#pragma pack(pop)
-
-// Vocabulary storing a map from uint64_t to WordIndex.
-class ProbingVocabulary : public base::Vocabulary {
- public:
- ProbingVocabulary();
-
- WordIndex Index(const StringPiece &str) const {
- Lookup::ConstIterator i;
- return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0;
- }
-
- static uint64_t Size(uint64_t entries, float probing_multiplier);
- // This just unwraps Config to get the probing_multiplier.
- static uint64_t Size(uint64_t entries, const Config &config);
-
- // Vocab words are [0, Bound()).
- WordIndex Bound() const { return bound_; }
-
- // Everything else is for populating. I'm too lazy to hide and friend these, but you'll only get a const reference anyway.
- void SetupMemory(void *start, std::size_t allocated);
- void SetupMemory(void *start, std::size_t allocated, std::size_t /*entries*/, const Config &/*config*/) {
- SetupMemory(start, allocated);
- }
-
- void Relocate(void *new_start);
-
- void ConfigureEnumerate(EnumerateVocab *to, std::size_t max_entries);
-
- WordIndex Insert(const StringPiece &str);
-
- template <class Weights> void FinishedLoading(Weights * /*reorder_vocab*/) {
- FinishedLoading();
- }
- void FinishedLoading();
-
- std::size_t UnkCountChangePadding() const { return 0; }
-
- bool SawUnk() const { return saw_unk_; }
-
- void LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset);
-
- private:
- typedef util::ProbingHashTable<ProbingVocabularyEntry, util::IdentityHash> Lookup;
-
- Lookup lookup_;
-
- WordIndex bound_;
-
- bool saw_unk_;
-
- EnumerateVocab *enumerate_;
-
- detail::ProbingVocabularyHeader *header_;
-};
-
-void MissingUnknown(const Config &config) throw(SpecialWordMissingException);
-void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);
-
-template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {
- if (!vocab.SawUnk()) MissingUnknown(config);
- if (vocab.BeginSentence() == vocab.NotFound()) MissingSentenceMarker(config, "<s>");
- if (vocab.EndSentence() == vocab.NotFound()) MissingSentenceMarker(config, "</s>");
-}
-
-class WriteUniqueWords {
- public:
- explicit WriteUniqueWords(int fd) : word_list_(fd) {}
-
- void operator()(const StringPiece &word) {
- word_list_ << word << '\0';
- }
-
- private:
- util::FakeOFStream word_list_;
-};
-
-class NoOpUniqueWords {
- public:
- NoOpUniqueWords() {}
- void operator()(const StringPiece &word) {}
-};
-
-template <class NewWordAction = NoOpUniqueWords> class GrowableVocab {
- public:
- static std::size_t MemUsage(WordIndex content) {
- return Lookup::MemUsage(content > 2 ? content : 2);
- }
-
- // Does not take ownership of write_wordi
- template <class NewWordConstruct> GrowableVocab(WordIndex initial_size, const NewWordConstruct &new_word_construct = NewWordAction())
- : lookup_(initial_size), new_word_(new_word_construct) {
- FindOrInsert("<unk>"); // Force 0
- FindOrInsert("<s>"); // Force 1
- FindOrInsert("</s>"); // Force 2
- }
-
- WordIndex Index(const StringPiece &str) const {
- Lookup::ConstIterator i;
- return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0;
- }
-
- WordIndex FindOrInsert(const StringPiece &word) {
- ProbingVocabularyEntry entry = ProbingVocabularyEntry::Make(util::MurmurHashNative(word.data(), word.size()), Size());
- Lookup::MutableIterator it;
- if (!lookup_.FindOrInsert(entry, it)) {
- new_word_(word);
- UTIL_THROW_IF(Size() >= std::numeric_limits<lm::WordIndex>::max(), VocabLoadException, "Too many vocabulary words. Change WordIndex to uint64_t in lm/word_index.hh");
- }
- return it->value;
- }
-
- WordIndex Size() const { return lookup_.Size(); }
-
- private:
- typedef util::AutoProbing<ProbingVocabularyEntry, util::IdentityHash> Lookup;
-
- Lookup lookup_;
-
- NewWordAction new_word_;
-};
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_VOCAB_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/weights.hh b/src/joshua/decoder/ff/lm/kenlm/lm/weights.hh
deleted file mode 100644
index da1963d..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/weights.hh
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef LM_WEIGHTS_H
-#define LM_WEIGHTS_H
-
-// Weights for n-grams. Probability and possibly a backoff.
-
-namespace lm {
-struct Prob {
- float prob;
-};
-// No inheritance so this will be a POD.
-struct ProbBackoff {
- float prob;
- float backoff;
-};
-struct RestWeights {
- float prob;
- float backoff;
- float rest;
-};
-
-} // namespace lm
-#endif // LM_WEIGHTS_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/word_index.hh b/src/joshua/decoder/ff/lm/kenlm/lm/word_index.hh
deleted file mode 100644
index a5a0fda..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/lm/word_index.hh
+++ /dev/null
@@ -1,14 +0,0 @@
-// Separate header because this is used often.
-#ifndef LM_WORD_INDEX_H
-#define LM_WORD_INDEX_H
-
-#include <limits.h>
-
-namespace lm {
-typedef unsigned int WordIndex;
-const WordIndex kMaxWordIndex = UINT_MAX;
-} // namespace lm
-
-typedef lm::WordIndex LMWordIndex;
-
-#endif
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/Jamfile b/src/joshua/decoder/ff/lm/kenlm/util/Jamfile
deleted file mode 100644
index 18b20a3..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/Jamfile
+++ /dev/null
@@ -1,36 +0,0 @@
-local compressed_flags = <include>.. <define>HAVE_ZLIB ;
-local compressed_deps = /top//z ;
-if [ test_library "bz2" ] && [ test_header "bzlib.h" ] {
- external-lib bz2 ;
- compressed_flags += <define>HAVE_BZLIB ;
- compressed_deps += bz2 ;
-}
-if [ test_library "lzma" ] && [ test_header "lzma.h" ] {
- external-lib lzma ;
- compressed_flags += <define>HAVE_XZLIB ;
- compressed_deps += lzma ;
-}
-
-#rt is needed for clock_gettime on linux. But it's already included with threading=multi
-lib rt ;
-
-obj read_compressed.o : read_compressed.cc : $(compressed_flags) ;
-alias read_compressed : read_compressed.o $(compressed_deps) ;
-obj read_compressed_test.o : read_compressed_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
-obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
-
-fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ;
-
-fakelib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc parallel_read pool.cc read_compressed scoped.cc string_piece.cc usage.cc double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
-
-exe cat_compressed : cat_compressed_main.cc kenutil ;
-
-alias programs : cat_compressed ;
-
-import testing ;
-
-run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
-for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
- local name = [ MATCH "(.*)\.cc" : $(t) ] ;
- unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_system ;
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/bit_packing.cc b/src/joshua/decoder/ff/lm/kenlm/util/bit_packing.cc
deleted file mode 100644
index 41999b7..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/bit_packing.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "util/bit_packing.hh"
-#include "util/exception.hh"
-
-#include <string.h>
-
-namespace util {
-
-namespace {
-template <bool> struct StaticCheck {};
-template <> struct StaticCheck<true> { typedef bool StaticAssertionPassed; };
-
-// If your float isn't 4 bytes, we're hosed.
-typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
-
-} // namespace
-
-uint8_t RequiredBits(uint64_t max_value) {
- if (!max_value) return 0;
- uint8_t ret = 1;
- while (max_value >>= 1) ++ret;
- return ret;
-}
-
-void BitPackingSanity() {
- const FloatEnc neg1 = { -1.0 }, pos1 = { 1.0 };
- if ((neg1.i ^ pos1.i) != 0x80000000) UTIL_THROW(Exception, "Sign bit is not 0x80000000");
- char mem[57+8];
- memset(mem, 0, sizeof(mem));
- const uint64_t test57 = 0x123456789abcdefULL;
- for (uint64_t b = 0; b < 57 * 8; b += 57) {
- WriteInt57(mem, b, 57, test57);
- }
- for (uint64_t b = 0; b < 57 * 8; b += 57) {
- if (test57 != ReadInt57(mem, b, 57, (1ULL << 57) - 1))
- UTIL_THROW(Exception, "The bit packing routines are failing for your architecture. Please send a bug report with your architecture, operating system, and compiler.");
- }
- // TODO: more checks.
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/bit_packing.hh b/src/joshua/decoder/ff/lm/kenlm/util/bit_packing.hh
deleted file mode 100644
index 1e34d9a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/bit_packing.hh
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef UTIL_BIT_PACKING_H
-#define UTIL_BIT_PACKING_H
-
-/* Bit-level packing routines
- *
- * WARNING WARNING WARNING:
- * The write functions assume that memory is zero initially. This makes them
- * faster and is the appropriate case for mmapped language model construction.
- * These routines assume that unaligned access to uint64_t is fast. This is
- * the case on x86_64. I'm not sure how fast unaligned 64-bit access is on
- * x86 but my target audience is large language models for which 64-bit is
- * necessary.
- *
- * Call the BitPackingSanity function to sanity check. Calling once suffices,
- * but it may be called multiple times when that's inconvenient.
- *
- * ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at
- * NICT.
- */
-
-#include <assert.h>
-#ifdef __APPLE__
-#include <architecture/byte_order.h>
-#elif __linux__
-#include <endian.h>
-#elif !defined(_WIN32) && !defined(_WIN64)
-#include <arpa/nameser_compat.h>
-#endif
-
-#include <stdint.h>
-
-#include <string.h>
-
-namespace util {
-
-// Fun fact: __BYTE_ORDER is wrong on Solaris Sparc, but the version without __ is correct.
-#if BYTE_ORDER == LITTLE_ENDIAN
-inline uint8_t BitPackShift(uint8_t bit, uint8_t /*length*/) {
- return bit;
-}
-#elif BYTE_ORDER == BIG_ENDIAN
-inline uint8_t BitPackShift(uint8_t bit, uint8_t length) {
- return 64 - length - bit;
-}
-#else
-#error "Bit packing code isn't written for your byte order."
-#endif
-
-inline uint64_t ReadOff(const void *base, uint64_t bit_off) {
-#if defined(__arm) || defined(__arm__)
- const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3);
- uint64_t value64;
- memcpy(&value64, base_off, sizeof(value64));
- return value64;
-#else
- return *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3));
-#endif
-}
-
-/* Pack integers up to 57 bits using their least significant digits.
- * The length is specified using mask:
- * Assumes mask == (1 << length) - 1 where length <= 57.
- */
-inline uint64_t ReadInt57(const void *base, uint64_t bit_off, uint8_t length, uint64_t mask) {
- return (ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, length)) & mask;
-}
-/* Assumes value < (1 << length) and length <= 57.
- * Assumes the memory is zero initially.
- */
-inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, uint64_t value) {
-#if defined(__arm) || defined(__arm__)
- uint8_t *base_off = reinterpret_cast<uint8_t*>(base) + (bit_off >> 3);
- uint64_t value64;
- memcpy(&value64, base_off, sizeof(value64));
- value64 |= (value << BitPackShift(bit_off & 7, length));
- memcpy(base_off, &value64, sizeof(value64));
-#else
- *reinterpret_cast<uint64_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
- (value << BitPackShift(bit_off & 7, length));
-#endif
-}
-
-/* Same caveats as above, but for a 25 bit limit. */
-inline uint32_t ReadInt25(const void *base, uint64_t bit_off, uint8_t length, uint32_t mask) {
-#if defined(__arm) || defined(__arm__)
- const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3);
- uint32_t value32;
- memcpy(&value32, base_off, sizeof(value32));
- return (value32 >> BitPackShift(bit_off & 7, length)) & mask;
-#else
- return (*reinterpret_cast<const uint32_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3)) >> BitPackShift(bit_off & 7, length)) & mask;
-#endif
-}
-
-inline void WriteInt25(void *base, uint64_t bit_off, uint8_t length, uint32_t value) {
-#if defined(__arm) || defined(__arm__)
- uint8_t *base_off = reinterpret_cast<uint8_t*>(base) + (bit_off >> 3);
- uint32_t value32;
- memcpy(&value32, base_off, sizeof(value32));
- value32 |= (value << BitPackShift(bit_off & 7, length));
- memcpy(base_off, &value32, sizeof(value32));
-#else
- *reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
- (value << BitPackShift(bit_off & 7, length));
-#endif
-}
-
-typedef union { float f; uint32_t i; } FloatEnc;
-
-inline float ReadFloat32(const void *base, uint64_t bit_off) {
- FloatEnc encoded;
- encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32);
- return encoded.f;
-}
-inline void WriteFloat32(void *base, uint64_t bit_off, float value) {
- FloatEnc encoded;
- encoded.f = value;
- WriteInt57(base, bit_off, 32, encoded.i);
-}
-
-const uint32_t kSignBit = 0x80000000;
-
-inline void SetSign(float &to) {
- FloatEnc enc;
- enc.f = to;
- enc.i |= kSignBit;
- to = enc.f;
-}
-
-inline void UnsetSign(float &to) {
- FloatEnc enc;
- enc.f = to;
- enc.i &= ~kSignBit;
- to = enc.f;
-}
-
-inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
- FloatEnc encoded;
- encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31);
- // Sign bit set means negative.
- encoded.i |= kSignBit;
- return encoded.f;
-}
-inline void WriteNonPositiveFloat31(void *base, uint64_t bit_off, float value) {
- FloatEnc encoded;
- encoded.f = value;
- encoded.i &= ~kSignBit;
- WriteInt57(base, bit_off, 31, encoded.i);
-}
-
-void BitPackingSanity();
-
-// Return bits required to store integers upto max_value. Not the most
-// efficient implementation, but this is only called a few times to size tries.
-uint8_t RequiredBits(uint64_t max_value);
-
-struct BitsMask {
- static BitsMask ByMax(uint64_t max_value) {
- BitsMask ret;
- ret.FromMax(max_value);
- return ret;
- }
- static BitsMask ByBits(uint8_t bits) {
- BitsMask ret;
- ret.bits = bits;
- ret.mask = (1ULL << bits) - 1;
- return ret;
- }
- void FromMax(uint64_t max_value) {
- bits = RequiredBits(max_value);
- mask = (1ULL << bits) - 1;
- }
- uint8_t bits;
- uint64_t mask;
-};
-
-struct BitAddress {
- BitAddress(void *in_base, uint64_t in_offset) : base(in_base), offset(in_offset) {}
-
- void *base;
- uint64_t offset;
-};
-
-} // namespace util
-
-#endif // UTIL_BIT_PACKING_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/bit_packing_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/bit_packing_test.cc
deleted file mode 100644
index 4edc200..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/bit_packing_test.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "util/bit_packing.hh"
-
-#define BOOST_TEST_MODULE BitPackingTest
-#include <boost/test/unit_test.hpp>
-
-#include <string.h>
-
-namespace util {
-namespace {
-
-const uint64_t test57 = 0x123456789abcdefULL;
-const uint32_t test25 = 0x1234567;
-
-BOOST_AUTO_TEST_CASE(ZeroBit57) {
- char mem[16];
- memset(mem, 0, sizeof(mem));
- WriteInt57(mem, 0, 57, test57);
- BOOST_CHECK_EQUAL(test57, ReadInt57(mem, 0, 57, (1ULL << 57) - 1));
-}
-
-BOOST_AUTO_TEST_CASE(EachBit57) {
- char mem[16];
- for (uint8_t b = 0; b < 8; ++b) {
- memset(mem, 0, sizeof(mem));
- WriteInt57(mem, b, 57, test57);
- BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
- }
-}
-
-BOOST_AUTO_TEST_CASE(Consecutive57) {
- char mem[57+8];
- memset(mem, 0, sizeof(mem));
- for (uint64_t b = 0; b < 57 * 8; b += 57) {
- WriteInt57(mem, b, 57, test57);
- BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
- }
- for (uint64_t b = 0; b < 57 * 8; b += 57) {
- BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
- }
-}
-
-BOOST_AUTO_TEST_CASE(Consecutive25) {
- char mem[25+8];
- memset(mem, 0, sizeof(mem));
- for (uint64_t b = 0; b < 25 * 8; b += 25) {
- WriteInt25(mem, b, 25, test25);
- BOOST_CHECK_EQUAL(test25, ReadInt25(mem, b, 25, (1ULL << 25) - 1));
- }
- for (uint64_t b = 0; b < 25 * 8; b += 25) {
- BOOST_CHECK_EQUAL(test25, ReadInt25(mem, b, 25, (1ULL << 25) - 1));
- }
-}
-
-BOOST_AUTO_TEST_CASE(Sanity) {
- BitPackingSanity();
-}
-
-} // namespace
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/cat_compressed_main.cc b/src/joshua/decoder/ff/lm/kenlm/util/cat_compressed_main.cc
deleted file mode 100644
index 2b4d729..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/cat_compressed_main.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-// Like cat but interprets compressed files.
-#include "util/file.hh"
-#include "util/read_compressed.hh"
-
-#include <string.h>
-#include <iostream>
-
-namespace {
-const std::size_t kBufSize = 16384;
-void Copy(util::ReadCompressed &from, int to) {
- util::scoped_malloc buffer(util::MallocOrThrow(kBufSize));
- while (std::size_t amount = from.Read(buffer.get(), kBufSize)) {
- util::WriteOrThrow(to, buffer.get(), amount);
- }
-}
-} // namespace
-
-int main(int argc, char *argv[]) {
- // Lane Schwartz likes -h and --help
- for (int i = 1; i < argc; ++i) {
- char *arg = argv[i];
- if (!strcmp(arg, "--")) break;
- if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
- std::cerr <<
- "A cat implementation that interprets compressed files.\n"
- "Usage: " << argv[0] << " [file1] [file2] ...\n"
- "If no file is provided, then stdin is read.\n";
- return 1;
- }
- }
-
- try {
- if (argc == 1) {
- util::ReadCompressed in(0);
- Copy(in, 1);
- } else {
- for (int i = 1; i < argc; ++i) {
- util::ReadCompressed in(util::OpenReadOrThrow(argv[i]));
- Copy(in, 1);
- }
- }
- } catch (const std::exception &e) {
- std::cerr << e.what() << std::endl;
- return 2;
- }
- return 0;
-}
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum-dtoa.cc b/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum-dtoa.cc
deleted file mode 100644
index b6c2e85..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum-dtoa.cc
+++ /dev/null
@@ -1,640 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <math.h>
-
-#include "bignum-dtoa.h"
-
-#include "bignum.h"
-#include "ieee.h"
-
-namespace double_conversion {
-
-static int NormalizedExponent(uint64_t significand, int exponent) {
- ASSERT(significand != 0);
- while ((significand & Double::kHiddenBit) == 0) {
- significand = significand << 1;
- exponent = exponent - 1;
- }
- return exponent;
-}
-
-
-// Forward declarations:
-// Returns an estimation of k such that 10^(k-1) <= v < 10^k.
-static int EstimatePower(int exponent);
-// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
-// and denominator.
-static void InitialScaledStartValues(uint64_t significand,
- int exponent,
- bool lower_boundary_is_closer,
- int estimated_power,
- bool need_boundary_deltas,
- Bignum* numerator,
- Bignum* denominator,
- Bignum* delta_minus,
- Bignum* delta_plus);
-// Multiplies numerator/denominator so that its values lies in the range 1-10.
-// Returns decimal_point s.t.
-// v = numerator'/denominator' * 10^(decimal_point-1)
-// where numerator' and denominator' are the values of numerator and
-// denominator after the call to this function.
-static void FixupMultiply10(int estimated_power, bool is_even,
- int* decimal_point,
- Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus);
-// Generates digits from the left to the right and stops when the generated
-// digits yield the shortest decimal representation of v.
-static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus,
- bool is_even,
- Vector<char> buffer, int* length);
-// Generates 'requested_digits' after the decimal point.
-static void BignumToFixed(int requested_digits, int* decimal_point,
- Bignum* numerator, Bignum* denominator,
- Vector<char>(buffer), int* length);
-// Generates 'count' digits of numerator/denominator.
-// Once 'count' digits have been produced rounds the result depending on the
-// remainder (remainders of exactly .5 round upwards). Might update the
-// decimal_point when rounding up (for example for 0.9999).
-static void GenerateCountedDigits(int count, int* decimal_point,
- Bignum* numerator, Bignum* denominator,
- Vector<char>(buffer), int* length);
-
-
-void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
- Vector<char> buffer, int* length, int* decimal_point) {
- ASSERT(v > 0);
- ASSERT(!Double(v).IsSpecial());
- uint64_t significand;
- int exponent;
- bool lower_boundary_is_closer;
- if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) {
- float f = static_cast<float>(v);
- ASSERT(f == v);
- significand = Single(f).Significand();
- exponent = Single(f).Exponent();
- lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser();
- } else {
- significand = Double(v).Significand();
- exponent = Double(v).Exponent();
- lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser();
- }
- bool need_boundary_deltas =
- (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE);
-
- bool is_even = (significand & 1) == 0;
- int normalized_exponent = NormalizedExponent(significand, exponent);
- // estimated_power might be too low by 1.
- int estimated_power = EstimatePower(normalized_exponent);
-
- // Shortcut for Fixed.
- // The requested digits correspond to the digits after the point. If the
- // number is much too small, then there is no need in trying to get any
- // digits.
- if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) {
- buffer[0] = '\0';
- *length = 0;
- // Set decimal-point to -requested_digits. This is what Gay does.
- // Note that it should not have any effect anyways since the string is
- // empty.
- *decimal_point = -requested_digits;
- return;
- }
-
- Bignum numerator;
- Bignum denominator;
- Bignum delta_minus;
- Bignum delta_plus;
- // Make sure the bignum can grow large enough. The smallest double equals
- // 4e-324. In this case the denominator needs fewer than 324*4 binary digits.
- // The maximum double is 1.7976931348623157e308 which needs fewer than
- // 308*4 binary digits.
- ASSERT(Bignum::kMaxSignificantBits >= 324*4);
- InitialScaledStartValues(significand, exponent, lower_boundary_is_closer,
- estimated_power, need_boundary_deltas,
- &numerator, &denominator,
- &delta_minus, &delta_plus);
- // We now have v = (numerator / denominator) * 10^estimated_power.
- FixupMultiply10(estimated_power, is_even, decimal_point,
- &numerator, &denominator,
- &delta_minus, &delta_plus);
- // We now have v = (numerator / denominator) * 10^(decimal_point-1), and
- // 1 <= (numerator + delta_plus) / denominator < 10
- switch (mode) {
- case BIGNUM_DTOA_SHORTEST:
- case BIGNUM_DTOA_SHORTEST_SINGLE:
- GenerateShortestDigits(&numerator, &denominator,
- &delta_minus, &delta_plus,
- is_even, buffer, length);
- break;
- case BIGNUM_DTOA_FIXED:
- BignumToFixed(requested_digits, decimal_point,
- &numerator, &denominator,
- buffer, length);
- break;
- case BIGNUM_DTOA_PRECISION:
- GenerateCountedDigits(requested_digits, decimal_point,
- &numerator, &denominator,
- buffer, length);
- break;
- default:
- UNREACHABLE();
- }
- buffer[*length] = '\0';
-}
-
-
-// The procedure starts generating digits from the left to the right and stops
-// when the generated digits yield the shortest decimal representation of v. A
-// decimal representation of v is a number lying closer to v than to any other
-// double, so it converts to v when read.
-//
-// This is true if d, the decimal representation, is between m- and m+, the
-// upper and lower boundaries. d must be strictly between them if !is_even.
-// m- := (numerator - delta_minus) / denominator
-// m+ := (numerator + delta_plus) / denominator
-//
-// Precondition: 0 <= (numerator+delta_plus) / denominator < 10.
-// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit
-// will be produced. This should be the standard precondition.
-static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus,
- bool is_even,
- Vector<char> buffer, int* length) {
- // Small optimization: if delta_minus and delta_plus are the same just reuse
- // one of the two bignums.
- if (Bignum::Equal(*delta_minus, *delta_plus)) {
- delta_plus = delta_minus;
- }
- *length = 0;
- while (true) {
- uint16_t digit;
- digit = numerator->DivideModuloIntBignum(*denominator);
- ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
- // digit = numerator / denominator (integer division).
- // numerator = numerator % denominator.
- buffer[(*length)++] = digit + '0';
-
- // Can we stop already?
- // If the remainder of the division is less than the distance to the lower
- // boundary we can stop. In this case we simply round down (discarding the
- // remainder).
- // Similarly we test if we can round up (using the upper boundary).
- bool in_delta_room_minus;
- bool in_delta_room_plus;
- if (is_even) {
- in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus);
- } else {
- in_delta_room_minus = Bignum::Less(*numerator, *delta_minus);
- }
- if (is_even) {
- in_delta_room_plus =
- Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
- } else {
- in_delta_room_plus =
- Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
- }
- if (!in_delta_room_minus && !in_delta_room_plus) {
- // Prepare for next iteration.
- numerator->Times10();
- delta_minus->Times10();
- // We optimized delta_plus to be equal to delta_minus (if they share the
- // same value). So don't multiply delta_plus if they point to the same
- // object.
- if (delta_minus != delta_plus) {
- delta_plus->Times10();
- }
- } else if (in_delta_room_minus && in_delta_room_plus) {
- // Let's see if 2*numerator < denominator.
- // If yes, then the next digit would be < 5 and we can round down.
- int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator);
- if (compare < 0) {
- // Remaining digits are less than .5. -> Round down (== do nothing).
- } else if (compare > 0) {
- // Remaining digits are more than .5 of denominator. -> Round up.
- // Note that the last digit could not be a '9' as otherwise the whole
- // loop would have stopped earlier.
- // We still have an assert here in case the preconditions were not
- // satisfied.
- ASSERT(buffer[(*length) - 1] != '9');
- buffer[(*length) - 1]++;
- } else {
- // Halfway case.
- // TODO(floitsch): need a way to solve half-way cases.
- // For now let's round towards even (since this is what Gay seems to
- // do).
-
- if ((buffer[(*length) - 1] - '0') % 2 == 0) {
- // Round down => Do nothing.
- } else {
- ASSERT(buffer[(*length) - 1] != '9');
- buffer[(*length) - 1]++;
- }
- }
- return;
- } else if (in_delta_room_minus) {
- // Round down (== do nothing).
- return;
- } else { // in_delta_room_plus
- // Round up.
- // Note again that the last digit could not be '9' since this would have
- // stopped the loop earlier.
- // We still have an ASSERT here, in case the preconditions were not
- // satisfied.
- ASSERT(buffer[(*length) -1] != '9');
- buffer[(*length) - 1]++;
- return;
- }
- }
-}
-
-
-// Let v = numerator / denominator < 10.
-// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point)
-// from left to right. Once 'count' digits have been produced we decide wether
-// to round up or down. Remainders of exactly .5 round upwards. Numbers such
-// as 9.999999 propagate a carry all the way, and change the
-// exponent (decimal_point), when rounding upwards.
-static void GenerateCountedDigits(int count, int* decimal_point,
- Bignum* numerator, Bignum* denominator,
- Vector<char>(buffer), int* length) {
- ASSERT(count >= 0);
- for (int i = 0; i < count - 1; ++i) {
- uint16_t digit;
- digit = numerator->DivideModuloIntBignum(*denominator);
- ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
- // digit = numerator / denominator (integer division).
- // numerator = numerator % denominator.
- buffer[i] = digit + '0';
- // Prepare for next iteration.
- numerator->Times10();
- }
- // Generate the last digit.
- uint16_t digit;
- digit = numerator->DivideModuloIntBignum(*denominator);
- if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
- digit++;
- }
- buffer[count - 1] = digit + '0';
- // Correct bad digits (in case we had a sequence of '9's). Propagate the
- // carry until we hat a non-'9' or til we reach the first digit.
- for (int i = count - 1; i > 0; --i) {
- if (buffer[i] != '0' + 10) break;
- buffer[i] = '0';
- buffer[i - 1]++;
- }
- if (buffer[0] == '0' + 10) {
- // Propagate a carry past the top place.
- buffer[0] = '1';
- (*decimal_point)++;
- }
- *length = count;
-}
-
-
-// Generates 'requested_digits' after the decimal point. It might omit
-// trailing '0's. If the input number is too small then no digits at all are
-// generated (ex.: 2 fixed digits for 0.00001).
-//
-// Input verifies: 1 <= (numerator + delta) / denominator < 10.
-static void BignumToFixed(int requested_digits, int* decimal_point,
- Bignum* numerator, Bignum* denominator,
- Vector<char>(buffer), int* length) {
- // Note that we have to look at more than just the requested_digits, since
- // a number could be rounded up. Example: v=0.5 with requested_digits=0.
- // Even though the power of v equals 0 we can't just stop here.
- if (-(*decimal_point) > requested_digits) {
- // The number is definitively too small.
- // Ex: 0.001 with requested_digits == 1.
- // Set decimal-point to -requested_digits. This is what Gay does.
- // Note that it should not have any effect anyways since the string is
- // empty.
- *decimal_point = -requested_digits;
- *length = 0;
- return;
- } else if (-(*decimal_point) == requested_digits) {
- // We only need to verify if the number rounds down or up.
- // Ex: 0.04 and 0.06 with requested_digits == 1.
- ASSERT(*decimal_point == -requested_digits);
- // Initially the fraction lies in range (1, 10]. Multiply the denominator
- // by 10 so that we can compare more easily.
- denominator->Times10();
- if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
- // If the fraction is >= 0.5 then we have to include the rounded
- // digit.
- buffer[0] = '1';
- *length = 1;
- (*decimal_point)++;
- } else {
- // Note that we caught most of similar cases earlier.
- *length = 0;
- }
- return;
- } else {
- // The requested digits correspond to the digits after the point.
- // The variable 'needed_digits' includes the digits before the point.
- int needed_digits = (*decimal_point) + requested_digits;
- GenerateCountedDigits(needed_digits, decimal_point,
- numerator, denominator,
- buffer, length);
- }
-}
-
-
-// Returns an estimation of k such that 10^(k-1) <= v < 10^k where
-// v = f * 2^exponent and 2^52 <= f < 2^53.
-// v is hence a normalized double with the given exponent. The output is an
-// approximation for the exponent of the decimal approimation .digits * 10^k.
-//
-// The result might undershoot by 1 in which case 10^k <= v < 10^k+1.
-// Note: this property holds for v's upper boundary m+ too.
-// 10^k <= m+ < 10^k+1.
-// (see explanation below).
-//
-// Examples:
-// EstimatePower(0) => 16
-// EstimatePower(-52) => 0
-//
-// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0.
-static int EstimatePower(int exponent) {
- // This function estimates log10 of v where v = f*2^e (with e == exponent).
- // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)).
- // Note that f is bounded by its container size. Let p = 53 (the double's
- // significand size). Then 2^(p-1) <= f < 2^p.
- //
- // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close
- // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)).
- // The computed number undershoots by less than 0.631 (when we compute log3
- // and not log10).
- //
- // Optimization: since we only need an approximated result this computation
- // can be performed on 64 bit integers. On x86/x64 architecture the speedup is
- // not really measurable, though.
- //
- // Since we want to avoid overshooting we decrement by 1e10 so that
- // floating-point imprecisions don't affect us.
- //
- // Explanation for v's boundary m+: the computation takes advantage of
- // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement
- // (even for denormals where the delta can be much more important).
-
- const double k1Log10 = 0.30102999566398114; // 1/lg(10)
-
- // For doubles len(f) == 53 (don't forget the hidden bit).
- const int kSignificandSize = Double::kSignificandSize;
- double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10);
- return static_cast<int>(estimate);
-}
-
-
-// See comments for InitialScaledStartValues.
-static void InitialScaledStartValuesPositiveExponent(
- uint64_t significand, int exponent,
- int estimated_power, bool need_boundary_deltas,
- Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus) {
- // A positive exponent implies a positive power.
- ASSERT(estimated_power >= 0);
- // Since the estimated_power is positive we simply multiply the denominator
- // by 10^estimated_power.
-
- // numerator = v.
- numerator->AssignUInt64(significand);
- numerator->ShiftLeft(exponent);
- // denominator = 10^estimated_power.
- denominator->AssignPowerUInt16(10, estimated_power);
-
- if (need_boundary_deltas) {
- // Introduce a common denominator so that the deltas to the boundaries are
- // integers.
- denominator->ShiftLeft(1);
- numerator->ShiftLeft(1);
- // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
- // denominator (of 2) delta_plus equals 2^e.
- delta_plus->AssignUInt16(1);
- delta_plus->ShiftLeft(exponent);
- // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
- delta_minus->AssignUInt16(1);
- delta_minus->ShiftLeft(exponent);
- }
-}
-
-
-// See comments for InitialScaledStartValues
-static void InitialScaledStartValuesNegativeExponentPositivePower(
- uint64_t significand, int exponent,
- int estimated_power, bool need_boundary_deltas,
- Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus) {
- // v = f * 2^e with e < 0, and with estimated_power >= 0.
- // This means that e is close to 0 (have a look at how estimated_power is
- // computed).
-
- // numerator = significand
- // since v = significand * 2^exponent this is equivalent to
- // numerator = v * / 2^-exponent
- numerator->AssignUInt64(significand);
- // denominator = 10^estimated_power * 2^-exponent (with exponent < 0)
- denominator->AssignPowerUInt16(10, estimated_power);
- denominator->ShiftLeft(-exponent);
-
- if (need_boundary_deltas) {
- // Introduce a common denominator so that the deltas to the boundaries are
- // integers.
- denominator->ShiftLeft(1);
- numerator->ShiftLeft(1);
- // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
- // denominator (of 2) delta_plus equals 2^e.
- // Given that the denominator already includes v's exponent the distance
- // to the boundaries is simply 1.
- delta_plus->AssignUInt16(1);
- // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
- delta_minus->AssignUInt16(1);
- }
-}
-
-
-// See comments for InitialScaledStartValues
-static void InitialScaledStartValuesNegativeExponentNegativePower(
- uint64_t significand, int exponent,
- int estimated_power, bool need_boundary_deltas,
- Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus) {
- // Instead of multiplying the denominator with 10^estimated_power we
- // multiply all values (numerator and deltas) by 10^-estimated_power.
-
- // Use numerator as temporary container for power_ten.
- Bignum* power_ten = numerator;
- power_ten->AssignPowerUInt16(10, -estimated_power);
-
- if (need_boundary_deltas) {
- // Since power_ten == numerator we must make a copy of 10^estimated_power
- // before we complete the computation of the numerator.
- // delta_plus = delta_minus = 10^estimated_power
- delta_plus->AssignBignum(*power_ten);
- delta_minus->AssignBignum(*power_ten);
- }
-
- // numerator = significand * 2 * 10^-estimated_power
- // since v = significand * 2^exponent this is equivalent to
- // numerator = v * 10^-estimated_power * 2 * 2^-exponent.
- // Remember: numerator has been abused as power_ten. So no need to assign it
- // to itself.
- ASSERT(numerator == power_ten);
- numerator->MultiplyByUInt64(significand);
-
- // denominator = 2 * 2^-exponent with exponent < 0.
- denominator->AssignUInt16(1);
- denominator->ShiftLeft(-exponent);
-
- if (need_boundary_deltas) {
- // Introduce a common denominator so that the deltas to the boundaries are
- // integers.
- numerator->ShiftLeft(1);
- denominator->ShiftLeft(1);
- // With this shift the boundaries have their correct value, since
- // delta_plus = 10^-estimated_power, and
- // delta_minus = 10^-estimated_power.
- // These assignments have been done earlier.
- // The adjustments if f == 2^p-1 (lower boundary is closer) are done later.
- }
-}
-
-
-// Let v = significand * 2^exponent.
-// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
-// and denominator. The functions GenerateShortestDigits and
-// GenerateCountedDigits will then convert this ratio to its decimal
-// representation d, with the required accuracy.
-// Then d * 10^estimated_power is the representation of v.
-// (Note: the fraction and the estimated_power might get adjusted before
-// generating the decimal representation.)
-//
-// The initial start values consist of:
-// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power.
-// - a scaled (common) denominator.
-// optionally (used by GenerateShortestDigits to decide if it has the shortest
-// decimal converting back to v):
-// - v - m-: the distance to the lower boundary.
-// - m+ - v: the distance to the upper boundary.
-//
-// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator.
-//
-// Let ep == estimated_power, then the returned values will satisfy:
-// v / 10^ep = numerator / denominator.
-// v's boundarys m- and m+:
-// m- / 10^ep == v / 10^ep - delta_minus / denominator
-// m+ / 10^ep == v / 10^ep + delta_plus / denominator
-// Or in other words:
-// m- == v - delta_minus * 10^ep / denominator;
-// m+ == v + delta_plus * 10^ep / denominator;
-//
-// Since 10^(k-1) <= v < 10^k (with k == estimated_power)
-// or 10^k <= v < 10^(k+1)
-// we then have 0.1 <= numerator/denominator < 1
-// or 1 <= numerator/denominator < 10
-//
-// It is then easy to kickstart the digit-generation routine.
-//
-// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST
-// or BIGNUM_DTOA_SHORTEST_SINGLE.
-
-static void InitialScaledStartValues(uint64_t significand,
- int exponent,
- bool lower_boundary_is_closer,
- int estimated_power,
- bool need_boundary_deltas,
- Bignum* numerator,
- Bignum* denominator,
- Bignum* delta_minus,
- Bignum* delta_plus) {
- if (exponent >= 0) {
- InitialScaledStartValuesPositiveExponent(
- significand, exponent, estimated_power, need_boundary_deltas,
- numerator, denominator, delta_minus, delta_plus);
- } else if (estimated_power >= 0) {
- InitialScaledStartValuesNegativeExponentPositivePower(
- significand, exponent, estimated_power, need_boundary_deltas,
- numerator, denominator, delta_minus, delta_plus);
- } else {
- InitialScaledStartValuesNegativeExponentNegativePower(
- significand, exponent, estimated_power, need_boundary_deltas,
- numerator, denominator, delta_minus, delta_plus);
- }
-
- if (need_boundary_deltas && lower_boundary_is_closer) {
- // The lower boundary is closer at half the distance of "normal" numbers.
- // Increase the common denominator and adapt all but the delta_minus.
- denominator->ShiftLeft(1); // *2
- numerator->ShiftLeft(1); // *2
- delta_plus->ShiftLeft(1); // *2
- }
-}
-
-
-// This routine multiplies numerator/denominator so that its values lies in the
-// range 1-10. That is after a call to this function we have:
-// 1 <= (numerator + delta_plus) /denominator < 10.
-// Let numerator the input before modification and numerator' the argument
-// after modification, then the output-parameter decimal_point is such that
-// numerator / denominator * 10^estimated_power ==
-// numerator' / denominator' * 10^(decimal_point - 1)
-// In some cases estimated_power was too low, and this is already the case. We
-// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k ==
-// estimated_power) but do not touch the numerator or denominator.
-// Otherwise the routine multiplies the numerator and the deltas by 10.
-static void FixupMultiply10(int estimated_power, bool is_even,
- int* decimal_point,
- Bignum* numerator, Bignum* denominator,
- Bignum* delta_minus, Bignum* delta_plus) {
- bool in_range;
- if (is_even) {
- // For IEEE doubles half-way cases (in decimal system numbers ending with 5)
- // are rounded to the closest floating-point number with even significand.
- in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
- } else {
- in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
- }
- if (in_range) {
- // Since numerator + delta_plus >= denominator we already have
- // 1 <= numerator/denominator < 10. Simply update the estimated_power.
- *decimal_point = estimated_power + 1;
- } else {
- *decimal_point = estimated_power;
- numerator->Times10();
- if (Bignum::Equal(*delta_minus, *delta_plus)) {
- delta_minus->Times10();
- delta_plus->AssignBignum(*delta_minus);
- } else {
- delta_minus->Times10();
- delta_plus->Times10();
- }
- }
-}
-
-} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/cached-powers.cc b/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/cached-powers.cc
deleted file mode 100644
index c676429..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/cached-powers.cc
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright 2006-2008 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdarg.h>
-#include <limits.h>
-#include <math.h>
-
-#include "utils.h"
-
-#include "cached-powers.h"
-
-namespace double_conversion {
-
-struct CachedPower {
- uint64_t significand;
- int16_t binary_exponent;
- int16_t decimal_exponent;
-};
-
-static const CachedPower kCachedPowers[] = {
- {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348},
- {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340},
- {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332},
- {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324},
- {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316},
- {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308},
- {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300},
- {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292},
- {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284},
- {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276},
- {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268},
- {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260},
- {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252},
- {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244},
- {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236},
- {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228},
- {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220},
- {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212},
- {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204},
- {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196},
- {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188},
- {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180},
- {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172},
- {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164},
- {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156},
- {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148},
- {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140},
- {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132},
- {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124},
- {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116},
- {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108},
- {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100},
- {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92},
- {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84},
- {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76},
- {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68},
- {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60},
- {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52},
- {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44},
- {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36},
- {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28},
- {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20},
- {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12},
- {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4},
- {UINT64_2PART_C(0x9c400000, 00000000), -50, 4},
- {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12},
- {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20},
- {UINT64_2PART_C(0x813f3978, f8940984), 30, 28},
- {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36},
- {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44},
- {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52},
- {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60},
- {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68},
- {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76},
- {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84},
- {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92},
- {UINT64_2PART_C(0x924d692c, a61be758), 269, 100},
- {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108},
- {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116},
- {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124},
- {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132},
- {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140},
- {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148},
- {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156},
- {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164},
- {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172},
- {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180},
- {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188},
- {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196},
- {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204},
- {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212},
- {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220},
- {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228},
- {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236},
- {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244},
- {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252},
- {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260},
- {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268},
- {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276},
- {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284},
- {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292},
- {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300},
- {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308},
- {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316},
- {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324},
- {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332},
- {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
-};
-
-static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers);
-static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent.
-static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10)
-// Difference between the decimal exponents in the table above.
-const int PowersOfTenCache::kDecimalExponentDistance = 8;
-const int PowersOfTenCache::kMinDecimalExponent = -348;
-const int PowersOfTenCache::kMaxDecimalExponent = 340;
-
-void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
- int min_exponent,
- int max_exponent,
- DiyFp* power,
- int* decimal_exponent) {
- int kQ = DiyFp::kSignificandSize;
- double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10);
- int foo = kCachedPowersOffset;
- int index =
- (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
- ASSERT(0 <= index && index < kCachedPowersLength);
- CachedPower cached_power = kCachedPowers[index];
- ASSERT(min_exponent <= cached_power.binary_exponent);
- ASSERT(cached_power.binary_exponent <= max_exponent);
- *decimal_exponent = cached_power.decimal_exponent;
- *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
-}
-
-
-void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent,
- DiyFp* power,
- int* found_exponent) {
- ASSERT(kMinDecimalExponent <= requested_exponent);
- ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance);
- int index =
- (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance;
- CachedPower cached_power = kCachedPowers[index];
- *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
- *found_exponent = cached_power.decimal_exponent;
- ASSERT(*found_exponent <= requested_exponent);
- ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance);
-}
-
-} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/double-conversion.cc b/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/double-conversion.cc
deleted file mode 100644
index febba6c..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/double-conversion.cc
+++ /dev/null
@@ -1,889 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <limits.h>
-#include <math.h>
-
-#include "double-conversion.h"
-
-#include "bignum-dtoa.h"
-#include "fast-dtoa.h"
-#include "fixed-dtoa.h"
-#include "ieee.h"
-#include "strtod.h"
-#include "utils.h"
-
-namespace double_conversion {
-
-const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() {
- int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN;
- static DoubleToStringConverter converter(flags,
- "Infinity",
- "NaN",
- 'e',
- -6, 21,
- 6, 0);
- return converter;
-}
-
-
-bool DoubleToStringConverter::HandleSpecialValues(
- double value,
- StringBuilder* result_builder) const {
- Double double_inspect(value);
- if (double_inspect.IsInfinite()) {
- if (infinity_symbol_ == NULL) return false;
- if (value < 0) {
- result_builder->AddCharacter('-');
- }
- result_builder->AddString(infinity_symbol_);
- return true;
- }
- if (double_inspect.IsNan()) {
- if (nan_symbol_ == NULL) return false;
- result_builder->AddString(nan_symbol_);
- return true;
- }
- return false;
-}
-
-
-void DoubleToStringConverter::CreateExponentialRepresentation(
- const char* decimal_digits,
- int length,
- int exponent,
- StringBuilder* result_builder) const {
- ASSERT(length != 0);
- result_builder->AddCharacter(decimal_digits[0]);
- if (length != 1) {
- result_builder->AddCharacter('.');
- result_builder->AddSubstring(&decimal_digits[1], length-1);
- }
- result_builder->AddCharacter(exponent_character_);
- if (exponent < 0) {
- result_builder->AddCharacter('-');
- exponent = -exponent;
- } else {
- if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) {
- result_builder->AddCharacter('+');
- }
- }
- if (exponent == 0) {
- result_builder->AddCharacter('0');
- return;
- }
- ASSERT(exponent < 1e4);
- const int kMaxExponentLength = 5;
- char buffer[kMaxExponentLength + 1];
- buffer[kMaxExponentLength] = '\0';
- int first_char_pos = kMaxExponentLength;
- while (exponent > 0) {
- buffer[--first_char_pos] = '0' + (exponent % 10);
- exponent /= 10;
- }
- result_builder->AddSubstring(&buffer[first_char_pos],
- kMaxExponentLength - first_char_pos);
-}
-
-
-void DoubleToStringConverter::CreateDecimalRepresentation(
- const char* decimal_digits,
- int length,
- int decimal_point,
- int digits_after_point,
- StringBuilder* result_builder) const {
- // Create a representation that is padded with zeros if needed.
- if (decimal_point <= 0) {
- // "0.00000decimal_rep".
- result_builder->AddCharacter('0');
- if (digits_after_point > 0) {
- result_builder->AddCharacter('.');
- result_builder->AddPadding('0', -decimal_point);
- ASSERT(length <= digits_after_point - (-decimal_point));
- result_builder->AddSubstring(decimal_digits, length);
- int remaining_digits = digits_after_point - (-decimal_point) - length;
- result_builder->AddPadding('0', remaining_digits);
- }
- } else if (decimal_point >= length) {
- // "decimal_rep0000.00000" or "decimal_rep.0000"
- result_builder->AddSubstring(decimal_digits, length);
- result_builder->AddPadding('0', decimal_point - length);
- if (digits_after_point > 0) {
- result_builder->AddCharacter('.');
- result_builder->AddPadding('0', digits_after_point);
- }
- } else {
- // "decima.l_rep000"
- ASSERT(digits_after_point > 0);
- result_builder->AddSubstring(decimal_digits, decimal_point);
- result_builder->AddCharacter('.');
- ASSERT(length - decimal_point <= digits_after_point);
- result_builder->AddSubstring(&decimal_digits[decimal_point],
- length - decimal_point);
- int remaining_digits = digits_after_point - (length - decimal_point);
- result_builder->AddPadding('0', remaining_digits);
- }
- if (digits_after_point == 0) {
- if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
- result_builder->AddCharacter('.');
- }
- if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
- result_builder->AddCharacter('0');
- }
- }
-}
-
-
-bool DoubleToStringConverter::ToShortestIeeeNumber(
- double value,
- StringBuilder* result_builder,
- DoubleToStringConverter::DtoaMode mode) const {
- ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE);
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- int decimal_point;
- bool sign;
- const int kDecimalRepCapacity = kBase10MaximalLength + 1;
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
-
- DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
-
- bool unique_zero = (flags_ & UNIQUE_ZERO) != 0;
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- int exponent = decimal_point - 1;
- if ((decimal_in_shortest_low_ <= exponent) &&
- (exponent < decimal_in_shortest_high_)) {
- CreateDecimalRepresentation(decimal_rep, decimal_rep_length,
- decimal_point,
- Max(0, decimal_rep_length - decimal_point),
- result_builder);
- } else {
- CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent,
- result_builder);
- }
- return true;
-}
-
-
-bool DoubleToStringConverter::ToFixed(double value,
- int requested_digits,
- StringBuilder* result_builder) const {
- ASSERT(kMaxFixedDigitsBeforePoint == 60);
- const double kFirstNonFixed = 1e60;
-
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- if (requested_digits > kMaxFixedDigitsAfterPoint) return false;
- if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false;
-
- // Find a sufficiently precise decimal representation of n.
- int decimal_point;
- bool sign;
- // Add space for the '\0' byte.
- const int kDecimalRepCapacity =
- kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1;
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
- DoubleToAscii(value, FIXED, requested_digits,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
-
- bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
- requested_digits, result_builder);
- return true;
-}
-
-
-bool DoubleToStringConverter::ToExponential(
- double value,
- int requested_digits,
- StringBuilder* result_builder) const {
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- if (requested_digits < -1) return false;
- if (requested_digits > kMaxExponentialDigits) return false;
-
- int decimal_point;
- bool sign;
- // Add space for digit before the decimal point and the '\0' character.
- const int kDecimalRepCapacity = kMaxExponentialDigits + 2;
- ASSERT(kDecimalRepCapacity > kBase10MaximalLength);
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
-
- if (requested_digits == -1) {
- DoubleToAscii(value, SHORTEST, 0,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
- } else {
- DoubleToAscii(value, PRECISION, requested_digits + 1,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
- ASSERT(decimal_rep_length <= requested_digits + 1);
-
- for (int i = decimal_rep_length; i < requested_digits + 1; ++i) {
- decimal_rep[i] = '0';
- }
- decimal_rep_length = requested_digits + 1;
- }
-
- bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- int exponent = decimal_point - 1;
- CreateExponentialRepresentation(decimal_rep,
- decimal_rep_length,
- exponent,
- result_builder);
- return true;
-}
-
-
-bool DoubleToStringConverter::ToPrecision(double value,
- int precision,
- StringBuilder* result_builder) const {
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) {
- return false;
- }
-
- // Find a sufficiently precise decimal representation of n.
- int decimal_point;
- bool sign;
- // Add one for the terminating null character.
- const int kDecimalRepCapacity = kMaxPrecisionDigits + 1;
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
-
- DoubleToAscii(value, PRECISION, precision,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
- ASSERT(decimal_rep_length <= precision);
-
- bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- // The exponent if we print the number as x.xxeyyy. That is with the
- // decimal point after the first digit.
- int exponent = decimal_point - 1;
-
- int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
- if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
- (decimal_point - precision + extra_zero >
- max_trailing_padding_zeroes_in_precision_mode_)) {
- // Fill buffer to contain 'precision' digits.
- // Usually the buffer is already at the correct length, but 'DoubleToAscii'
- // is allowed to return less characters.
- for (int i = decimal_rep_length; i < precision; ++i) {
- decimal_rep[i] = '0';
- }
-
- CreateExponentialRepresentation(decimal_rep,
- precision,
- exponent,
- result_builder);
- } else {
- CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
- Max(0, precision - decimal_point),
- result_builder);
- }
- return true;
-}
-
-
-static BignumDtoaMode DtoaToBignumDtoaMode(
- DoubleToStringConverter::DtoaMode dtoa_mode) {
- switch (dtoa_mode) {
- case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST;
- case DoubleToStringConverter::SHORTEST_SINGLE:
- return BIGNUM_DTOA_SHORTEST_SINGLE;
- case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED;
- case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
- default:
- UNREACHABLE();
- return BIGNUM_DTOA_SHORTEST; // To silence compiler.
- }
-}
-
-
-void DoubleToStringConverter::DoubleToAscii(double v,
- DtoaMode mode,
- int requested_digits,
- char* buffer,
- int buffer_length,
- bool* sign,
- int* length,
- int* point) {
- Vector<char> vector(buffer, buffer_length);
- ASSERT(!Double(v).IsSpecial());
- ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0);
-
- if (Double(v).Sign() < 0) {
- *sign = true;
- v = -v;
- } else {
- *sign = false;
- }
-
- if (mode == PRECISION && requested_digits == 0) {
- vector[0] = '\0';
- *length = 0;
- return;
- }
-
- if (v == 0) {
- vector[0] = '0';
- vector[1] = '\0';
- *length = 1;
- *point = 1;
- return;
- }
-
- bool fast_worked;
- switch (mode) {
- case SHORTEST:
- fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point);
- break;
- case SHORTEST_SINGLE:
- fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0,
- vector, length, point);
- break;
- case FIXED:
- fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point);
- break;
- case PRECISION:
- fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits,
- vector, length, point);
- break;
- default:
- UNREACHABLE();
- fast_worked = false;
- }
- if (fast_worked) return;
-
- // If the fast dtoa didn't succeed use the slower bignum version.
- BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode);
- BignumDtoa(v, bignum_mode, requested_digits, vector, length, point);
- vector[*length] = '\0';
-}
-
-
-// Consumes the given substring from the iterator.
-// Returns false, if the substring does not match.
-static bool ConsumeSubString(const char** current,
- const char* end,
- const char* substring) {
- ASSERT(**current == *substring);
- for (substring++; *substring != '\0'; substring++) {
- ++*current;
- if (*current == end || **current != *substring) return false;
- }
- ++*current;
- return true;
-}
-
-
-// Maximum number of significant digits in decimal representation.
-// The longest possible double in decimal representation is
-// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
-// (768 digits). If we parse a number whose first digits are equal to a
-// mean of 2 adjacent doubles (that could have up to 769 digits) the result
-// must be rounded to the bigger one unless the tail consists of zeros, so
-// we don't need to preserve all the digits.
-const int kMaxSignificantDigits = 772;
-
-
-// Returns true if a nonspace found and false if the end has reached.
-static inline bool AdvanceToNonspace(const char** current, const char* end) {
- while (*current != end) {
- if (**current != ' ') return true;
- ++*current;
- }
- return false;
-}
-
-
-static bool isDigit(int x, int radix) {
- return (x >= '0' && x <= '9' && x < '0' + radix)
- || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
- || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
-}
-
-
-static double SignedZero(bool sign) {
- return sign ? -0.0 : 0.0;
-}
-
-
-// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
-template <int radix_log_2>
-static double RadixStringToIeee(const char* current,
- const char* end,
- bool sign,
- bool allow_trailing_junk,
- double junk_string_value,
- bool read_as_double,
- const char** trailing_pointer) {
- ASSERT(current != end);
-
- const int kDoubleSize = Double::kSignificandSize;
- const int kSingleSize = Single::kSignificandSize;
- const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
-
- // Skip leading 0s.
- while (*current == '0') {
- ++current;
- if (current == end) {
- *trailing_pointer = end;
- return SignedZero(sign);
- }
- }
-
- int64_t number = 0;
- int exponent = 0;
- const int radix = (1 << radix_log_2);
-
- do {
- int digit;
- if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
- digit = static_cast<char>(*current) - '0';
- } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
- digit = static_cast<char>(*current) - 'a' + 10;
- } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
- digit = static_cast<char>(*current) - 'A' + 10;
- } else {
- if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) {
- break;
- } else {
- return junk_string_value;
- }
- }
-
- number = number * radix + digit;
- int overflow = static_cast<int>(number >> kSignificandSize);
- if (overflow != 0) {
- // Overflow occurred. Need to determine which direction to round the
- // result.
- int overflow_bits_count = 1;
- while (overflow > 1) {
- overflow_bits_count++;
- overflow >>= 1;
- }
-
- int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
- int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
- number >>= overflow_bits_count;
- exponent = overflow_bits_count;
-
- bool zero_tail = true;
- while (true) {
- ++current;
- if (current == end || !isDigit(*current, radix)) break;
- zero_tail = zero_tail && *current == '0';
- exponent += radix_log_2;
- }
-
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value;
- }
-
- int middle_value = (1 << (overflow_bits_count - 1));
- if (dropped_bits > middle_value) {
- number++; // Rounding up.
- } else if (dropped_bits == middle_value) {
- // Rounding to even to consistency with decimals: half-way case rounds
- // up if significant part is odd and down otherwise.
- if ((number & 1) != 0 || !zero_tail) {
- number++; // Rounding up.
- }
- }
-
- // Rounding up may cause overflow.
- if ((number & ((int64_t)1 << kSignificandSize)) != 0) {
- exponent++;
- number >>= 1;
- }
- break;
- }
- ++current;
- } while (current != end);
-
- ASSERT(number < ((int64_t)1 << kSignificandSize));
- ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
-
- *trailing_pointer = current;
-
- if (exponent == 0) {
- if (sign) {
- if (number == 0) return -0.0;
- number = -number;
- }
- return static_cast<double>(number);
- }
-
- ASSERT(number != 0);
- return Double(DiyFp(number, exponent)).value();
-}
-
-
-double StringToDoubleConverter::StringToIeee(
- const char* input,
- int length,
- int* processed_characters_count,
- bool read_as_double) const {
- const char* current = input;
- const char* end = input + length;
-
- *processed_characters_count = 0;
-
- const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0;
- const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0;
- const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0;
- const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0;
-
- // To make sure that iterator dereferencing is valid the following
- // convention is used:
- // 1. Each '++current' statement is followed by check for equality to 'end'.
- // 2. If AdvanceToNonspace returned false then current == end.
- // 3. If 'current' becomes equal to 'end' the function returns or goes to
- // 'parsing_done'.
- // 4. 'current' is not dereferenced after the 'parsing_done' label.
- // 5. Code before 'parsing_done' may rely on 'current != end'.
- if (current == end) return empty_string_value_;
-
- if (allow_leading_spaces || allow_trailing_spaces) {
- if (!AdvanceToNonspace(¤t, end)) {
- *processed_characters_count = current - input;
- return empty_string_value_;
- }
- if (!allow_leading_spaces && (input != current)) {
- // No leading spaces allowed, but AdvanceToNonspace moved forward.
- return junk_string_value_;
- }
- }
-
- // The longest form of simplified number is: "-<significant digits>.1eXXX\0".
- const int kBufferSize = kMaxSignificantDigits + 10;
- char buffer[kBufferSize]; // NOLINT: size is known at compile time.
- int buffer_pos = 0;
-
- // Exponent will be adjusted if insignificant digits of the integer part
- // or insignificant leading zeros of the fractional part are dropped.
- int exponent = 0;
- int significant_digits = 0;
- int insignificant_digits = 0;
- bool nonzero_digit_dropped = false;
-
- bool sign = false;
-
- if (*current == '+' || *current == '-') {
- sign = (*current == '-');
- ++current;
- const char* next_non_space = current;
- // Skip following spaces (if allowed).
- if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
- if (!allow_spaces_after_sign && (current != next_non_space)) {
- return junk_string_value_;
- }
- current = next_non_space;
- }
-
- if (infinity_symbol_ != NULL) {
- if (*current == infinity_symbol_[0]) {
- if (!ConsumeSubString(¤t, end, infinity_symbol_)) {
- return junk_string_value_;
- }
-
- if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
- return junk_string_value_;
- }
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value_;
- }
-
- ASSERT(buffer_pos == 0);
- *processed_characters_count = current - input;
- return sign ? -Double::Infinity() : Double::Infinity();
- }
- }
-
- if (nan_symbol_ != NULL) {
- if (*current == nan_symbol_[0]) {
- if (!ConsumeSubString(¤t, end, nan_symbol_)) {
- return junk_string_value_;
- }
-
- if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
- return junk_string_value_;
- }
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value_;
- }
-
- ASSERT(buffer_pos == 0);
- *processed_characters_count = current - input;
- return sign ? -Double::NaN() : Double::NaN();
- }
- }
-
- bool leading_zero = false;
- if (*current == '0') {
- ++current;
- if (current == end) {
- *processed_characters_count = current - input;
- return SignedZero(sign);
- }
-
- leading_zero = true;
-
- // It could be hexadecimal value.
- if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
- ++current;
- if (current == end || !isDigit(*current, 16)) {
- return junk_string_value_; // "0x".
- }
-
- const char* tail_pointer = NULL;
- double result = RadixStringToIeee<4>(current,
- end,
- sign,
- allow_trailing_junk,
- junk_string_value_,
- read_as_double,
- &tail_pointer);
- if (tail_pointer != NULL) {
- if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
- *processed_characters_count = tail_pointer - input;
- }
- return result;
- }
-
- // Ignore leading zeros in the integer part.
- while (*current == '0') {
- ++current;
- if (current == end) {
- *processed_characters_count = current - input;
- return SignedZero(sign);
- }
- }
- }
-
- bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0;
-
- // Copy significant digits of the integer part (if any) to the buffer.
- while (*current >= '0' && *current <= '9') {
- if (significant_digits < kMaxSignificantDigits) {
- ASSERT(buffer_pos < kBufferSize);
- buffer[buffer_pos++] = static_cast<char>(*current);
- significant_digits++;
- // Will later check if it's an octal in the buffer.
- } else {
- insignificant_digits++; // Move the digit into the exponential part.
- nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
- }
- octal = octal && *current < '8';
- ++current;
- if (current == end) goto parsing_done;
- }
-
- if (significant_digits == 0) {
- octal = false;
- }
-
- if (*current == '.') {
- if (octal && !allow_trailing_junk) return junk_string_value_;
- if (octal) goto parsing_done;
-
- ++current;
- if (current == end) {
- if (significant_digits == 0 && !leading_zero) {
- return junk_string_value_;
- } else {
- goto parsing_done;
- }
- }
-
- if (significant_digits == 0) {
- // octal = false;
- // Integer part consists of 0 or is absent. Significant digits start after
- // leading zeros (if any).
- while (*current == '0') {
- ++current;
- if (current == end) {
- *processed_characters_count = current - input;
- return SignedZero(sign);
- }
- exponent--; // Move this 0 into the exponent.
- }
- }
-
- // There is a fractional part.
- // We don't emit a '.', but adjust the exponent instead.
- while (*current >= '0' && *current <= '9') {
- if (significant_digits < kMaxSignificantDigits) {
- ASSERT(buffer_pos < kBufferSize);
- buffer[buffer_pos++] = static_cast<char>(*current);
- significant_digits++;
- exponent--;
- } else {
- // Ignore insignificant digits in the fractional part.
- nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
- }
- ++current;
- if (current == end) goto parsing_done;
- }
- }
-
- if (!leading_zero && exponent == 0 && significant_digits == 0) {
- // If leading_zeros is true then the string contains zeros.
- // If exponent < 0 then string was [+-]\.0*...
- // If significant_digits != 0 the string is not equal to 0.
- // Otherwise there are no digits in the string.
- return junk_string_value_;
- }
-
- // Parse exponential part.
- if (*current == 'e' || *current == 'E') {
- if (octal && !allow_trailing_junk) return junk_string_value_;
- if (octal) goto parsing_done;
- ++current;
- if (current == end) {
- if (allow_trailing_junk) {
- goto parsing_done;
- } else {
- return junk_string_value_;
- }
- }
- char sign = '+';
- if (*current == '+' || *current == '-') {
- sign = static_cast<char>(*current);
- ++current;
- if (current == end) {
- if (allow_trailing_junk) {
- goto parsing_done;
- } else {
- return junk_string_value_;
- }
- }
- }
-
- if (current == end || *current < '0' || *current > '9') {
- if (allow_trailing_junk) {
- goto parsing_done;
- } else {
- return junk_string_value_;
- }
- }
-
- const int max_exponent = INT_MAX / 2;
- ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
- int num = 0;
- do {
- // Check overflow.
- int digit = *current - '0';
- if (num >= max_exponent / 10
- && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
- num = max_exponent;
- } else {
- num = num * 10 + digit;
- }
- ++current;
- } while (current != end && *current >= '0' && *current <= '9');
-
- exponent += (sign == '-' ? -num : num);
- }
-
- if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
- return junk_string_value_;
- }
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value_;
- }
- if (allow_trailing_spaces) {
- AdvanceToNonspace(¤t, end);
- }
-
- parsing_done:
- exponent += insignificant_digits;
-
- if (octal) {
- double result;
- const char* tail_pointer = NULL;
- result = RadixStringToIeee<3>(buffer,
- buffer + buffer_pos,
- sign,
- allow_trailing_junk,
- junk_string_value_,
- read_as_double,
- &tail_pointer);
- ASSERT(tail_pointer != NULL);
- *processed_characters_count = current - input;
- return result;
- }
-
- if (nonzero_digit_dropped) {
- buffer[buffer_pos++] = '1';
- exponent--;
- }
-
- ASSERT(buffer_pos < kBufferSize);
- buffer[buffer_pos] = '\0';
-
- double converted;
- if (read_as_double) {
- converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
- } else {
- converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
- }
- *processed_characters_count = current - input;
- return sign? -converted: converted;
-}
-
-} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fixed-dtoa.cc b/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fixed-dtoa.cc
deleted file mode 100644
index d56b144..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fixed-dtoa.cc
+++ /dev/null
@@ -1,402 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <math.h>
-
-#include "fixed-dtoa.h"
-#include "ieee.h"
-
-namespace double_conversion {
-
-// Represents a 128bit type. This class should be replaced by a native type on
-// platforms that support 128bit integers.
-class UInt128 {
- public:
- UInt128() : high_bits_(0), low_bits_(0) { }
- UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { }
-
- void Multiply(uint32_t multiplicand) {
- uint64_t accumulator;
-
- accumulator = (low_bits_ & kMask32) * multiplicand;
- uint32_t part = static_cast<uint32_t>(accumulator & kMask32);
- accumulator >>= 32;
- accumulator = accumulator + (low_bits_ >> 32) * multiplicand;
- low_bits_ = (accumulator << 32) + part;
- accumulator >>= 32;
- accumulator = accumulator + (high_bits_ & kMask32) * multiplicand;
- part = static_cast<uint32_t>(accumulator & kMask32);
- accumulator >>= 32;
- accumulator = accumulator + (high_bits_ >> 32) * multiplicand;
- high_bits_ = (accumulator << 32) + part;
- ASSERT((accumulator >> 32) == 0);
- }
-
- void Shift(int shift_amount) {
- ASSERT(-64 <= shift_amount && shift_amount <= 64);
- if (shift_amount == 0) {
- return;
- } else if (shift_amount == -64) {
- high_bits_ = low_bits_;
- low_bits_ = 0;
- } else if (shift_amount == 64) {
- low_bits_ = high_bits_;
- high_bits_ = 0;
- } else if (shift_amount <= 0) {
- high_bits_ <<= -shift_amount;
- high_bits_ += low_bits_ >> (64 + shift_amount);
- low_bits_ <<= -shift_amount;
- } else {
- low_bits_ >>= shift_amount;
- low_bits_ += high_bits_ << (64 - shift_amount);
- high_bits_ >>= shift_amount;
- }
- }
-
- // Modifies *this to *this MOD (2^power).
- // Returns *this DIV (2^power).
- int DivModPowerOf2(int power) {
- if (power >= 64) {
- int result = static_cast<int>(high_bits_ >> (power - 64));
- high_bits_ -= static_cast<uint64_t>(result) << (power - 64);
- return result;
- } else {
- uint64_t part_low = low_bits_ >> power;
- uint64_t part_high = high_bits_ << (64 - power);
- int result = static_cast<int>(part_low + part_high);
- high_bits_ = 0;
- low_bits_ -= part_low << power;
- return result;
- }
- }
-
- bool IsZero() const {
- return high_bits_ == 0 && low_bits_ == 0;
- }
-
- int BitAt(int position) {
- if (position >= 64) {
- return static_cast<int>(high_bits_ >> (position - 64)) & 1;
- } else {
- return static_cast<int>(low_bits_ >> position) & 1;
- }
- }
-
- private:
- static const uint64_t kMask32 = 0xFFFFFFFF;
- // Value == (high_bits_ << 64) + low_bits_
- uint64_t high_bits_;
- uint64_t low_bits_;
-};
-
-
-static const int kDoubleSignificandSize = 53; // Includes the hidden bit.
-
-
-static void FillDigits32FixedLength(uint32_t number, int requested_length,
- Vector<char> buffer, int* length) {
- for (int i = requested_length - 1; i >= 0; --i) {
- buffer[(*length) + i] = '0' + number % 10;
- number /= 10;
- }
- *length += requested_length;
-}
-
-
-static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
- int number_length = 0;
- // We fill the digits in reverse order and exchange them afterwards.
- while (number != 0) {
- int digit = number % 10;
- number /= 10;
- buffer[(*length) + number_length] = '0' + digit;
- number_length++;
- }
- // Exchange the digits.
- int i = *length;
- int j = *length + number_length - 1;
- while (i < j) {
- char tmp = buffer[i];
- buffer[i] = buffer[j];
- buffer[j] = tmp;
- i++;
- j--;
- }
- *length += number_length;
-}
-
-
-static void FillDigits64FixedLength(uint64_t number, int requested_length,
- Vector<char> buffer, int* length) {
- const uint32_t kTen7 = 10000000;
- // For efficiency cut the number into 3 uint32_t parts, and print those.
- uint32_t part2 = static_cast<uint32_t>(number % kTen7);
- number /= kTen7;
- uint32_t part1 = static_cast<uint32_t>(number % kTen7);
- uint32_t part0 = static_cast<uint32_t>(number / kTen7);
-
- FillDigits32FixedLength(part0, 3, buffer, length);
- FillDigits32FixedLength(part1, 7, buffer, length);
- FillDigits32FixedLength(part2, 7, buffer, length);
-}
-
-
-static void FillDigits64(uint64_t number, Vector<char> buffer, int* length) {
- const uint32_t kTen7 = 10000000;
- // For efficiency cut the number into 3 uint32_t parts, and print those.
- uint32_t part2 = static_cast<uint32_t>(number % kTen7);
- number /= kTen7;
- uint32_t part1 = static_cast<uint32_t>(number % kTen7);
- uint32_t part0 = static_cast<uint32_t>(number / kTen7);
-
- if (part0 != 0) {
- FillDigits32(part0, buffer, length);
- FillDigits32FixedLength(part1, 7, buffer, length);
- FillDigits32FixedLength(part2, 7, buffer, length);
- } else if (part1 != 0) {
- FillDigits32(part1, buffer, length);
- FillDigits32FixedLength(part2, 7, buffer, length);
- } else {
- FillDigits32(part2, buffer, length);
- }
-}
-
-
-static void RoundUp(Vector<char> buffer, int* length, int* decimal_point) {
- // An empty buffer represents 0.
- if (*length == 0) {
- buffer[0] = '1';
- *decimal_point = 1;
- *length = 1;
- return;
- }
- // Round the last digit until we either have a digit that was not '9' or until
- // we reached the first digit.
- buffer[(*length) - 1]++;
- for (int i = (*length) - 1; i > 0; --i) {
- if (buffer[i] != '0' + 10) {
- return;
- }
- buffer[i] = '0';
- buffer[i - 1]++;
- }
- // If the first digit is now '0' + 10, we would need to set it to '0' and add
- // a '1' in front. However we reach the first digit only if all following
- // digits had been '9' before rounding up. Now all trailing digits are '0' and
- // we simply switch the first digit to '1' and update the decimal-point
- // (indicating that the point is now one digit to the right).
- if (buffer[0] == '0' + 10) {
- buffer[0] = '1';
- (*decimal_point)++;
- }
-}
-
-
-// The given fractionals number represents a fixed-point number with binary
-// point at bit (-exponent).
-// Preconditions:
-// -128 <= exponent <= 0.
-// 0 <= fractionals * 2^exponent < 1
-// The buffer holds the result.
-// The function will round its result. During the rounding-process digits not
-// generated by this function might be updated, and the decimal-point variable
-// might be updated. If this function generates the digits 99 and the buffer
-// already contained "199" (thus yielding a buffer of "19999") then a
-// rounding-up will change the contents of the buffer to "20000".
-static void FillFractionals(uint64_t fractionals, int exponent,
- int fractional_count, Vector<char> buffer,
- int* length, int* decimal_point) {
- ASSERT(-128 <= exponent && exponent <= 0);
- // 'fractionals' is a fixed-point number, with binary point at bit
- // (-exponent). Inside the function the non-converted remainder of fractionals
- // is a fixed-point number, with binary point at bit 'point'.
- if (-exponent <= 64) {
- // One 64 bit number is sufficient.
- ASSERT(fractionals >> 56 == 0);
- int point = -exponent;
- for (int i = 0; i < fractional_count; ++i) {
- if (fractionals == 0) break;
- // Instead of multiplying by 10 we multiply by 5 and adjust the point
- // location. This way the fractionals variable will not overflow.
- // Invariant at the beginning of the loop: fractionals < 2^point.
- // Initially we have: point <= 64 and fractionals < 2^56
- // After each iteration the point is decremented by one.
- // Note that 5^3 = 125 < 128 = 2^7.
- // Therefore three iterations of this loop will not overflow fractionals
- // (even without the subtraction at the end of the loop body). At this
- // time point will satisfy point <= 61 and therefore fractionals < 2^point
- // and any further multiplication of fractionals by 5 will not overflow.
- fractionals *= 5;
- point--;
- int digit = static_cast<int>(fractionals >> point);
- buffer[*length] = '0' + digit;
- (*length)++;
- fractionals -= static_cast<uint64_t>(digit) << point;
- }
- // If the first bit after the point is set we have to round up.
- if (((fractionals >> (point - 1)) & 1) == 1) {
- RoundUp(buffer, length, decimal_point);
- }
- } else { // We need 128 bits.
- ASSERT(64 < -exponent && -exponent <= 128);
- UInt128 fractionals128 = UInt128(fractionals, 0);
- fractionals128.Shift(-exponent - 64);
- int point = 128;
- for (int i = 0; i < fractional_count; ++i) {
- if (fractionals128.IsZero()) break;
- // As before: instead of multiplying by 10 we multiply by 5 and adjust the
- // point location.
- // This multiplication will not overflow for the same reasons as before.
- fractionals128.Multiply(5);
- point--;
- int digit = fractionals128.DivModPowerOf2(point);
- buffer[*length] = '0' + digit;
- (*length)++;
- }
- if (fractionals128.BitAt(point - 1) == 1) {
- RoundUp(buffer, length, decimal_point);
- }
- }
-}
-
-
-// Removes leading and trailing zeros.
-// If leading zeros are removed then the decimal point position is adjusted.
-static void TrimZeros(Vector<char> buffer, int* length, int* decimal_point) {
- while (*length > 0 && buffer[(*length) - 1] == '0') {
- (*length)--;
- }
- int first_non_zero = 0;
- while (first_non_zero < *length && buffer[first_non_zero] == '0') {
- first_non_zero++;
- }
- if (first_non_zero != 0) {
- for (int i = first_non_zero; i < *length; ++i) {
- buffer[i - first_non_zero] = buffer[i];
- }
- *length -= first_non_zero;
- *decimal_point -= first_non_zero;
- }
-}
-
-
-bool FastFixedDtoa(double v,
- int fractional_count,
- Vector<char> buffer,
- int* length,
- int* decimal_point) {
- const uint32_t kMaxUInt32 = 0xFFFFFFFF;
- uint64_t significand = Double(v).Significand();
- int exponent = Double(v).Exponent();
- // v = significand * 2^exponent (with significand a 53bit integer).
- // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we
- // don't know how to compute the representation. 2^73 ~= 9.5*10^21.
- // If necessary this limit could probably be increased, but we don't need
- // more.
- if (exponent > 20) return false;
- if (fractional_count > 20) return false;
- *length = 0;
- // At most kDoubleSignificandSize bits of the significand are non-zero.
- // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero
- // bits: 0..11*..0xxx..53*..xx
- if (exponent + kDoubleSignificandSize > 64) {
- // The exponent must be > 11.
- //
- // We know that v = significand * 2^exponent.
- // And the exponent > 11.
- // We simplify the task by dividing v by 10^17.
- // The quotient delivers the first digits, and the remainder fits into a 64
- // bit number.
- // Dividing by 10^17 is equivalent to dividing by 5^17*2^17.
- const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5); // 5^17
- uint64_t divisor = kFive17;
- int divisor_power = 17;
- uint64_t dividend = significand;
- uint32_t quotient;
- uint64_t remainder;
- // Let v = f * 2^e with f == significand and e == exponent.
- // Then need q (quotient) and r (remainder) as follows:
- // v = q * 10^17 + r
- // f * 2^e = q * 10^17 + r
- // f * 2^e = q * 5^17 * 2^17 + r
- // If e > 17 then
- // f * 2^(e-17) = q * 5^17 + r/2^17
- // else
- // f = q * 5^17 * 2^(17-e) + r/2^e
- if (exponent > divisor_power) {
- // We only allow exponents of up to 20 and therefore (17 - e) <= 3
- dividend <<= exponent - divisor_power;
- quotient = static_cast<uint32_t>(dividend / divisor);
- remainder = (dividend % divisor) << divisor_power;
- } else {
- divisor <<= divisor_power - exponent;
- quotient = static_cast<uint32_t>(dividend / divisor);
- remainder = (dividend % divisor) << exponent;
- }
- FillDigits32(quotient, buffer, length);
- FillDigits64FixedLength(remainder, divisor_power, buffer, length);
- *decimal_point = *length;
- } else if (exponent >= 0) {
- // 0 <= exponent <= 11
- significand <<= exponent;
- FillDigits64(significand, buffer, length);
- *decimal_point = *length;
- } else if (exponent > -kDoubleSignificandSize) {
- // We have to cut the number.
- uint64_t integrals = significand >> -exponent;
- uint64_t fractionals = significand - (integrals << -exponent);
- if (integrals > kMaxUInt32) {
- FillDigits64(integrals, buffer, length);
- } else {
- FillDigits32(static_cast<uint32_t>(integrals), buffer, length);
- }
- *decimal_point = *length;
- FillFractionals(fractionals, exponent, fractional_count,
- buffer, length, decimal_point);
- } else if (exponent < -128) {
- // This configuration (with at most 20 digits) means that all digits must be
- // 0.
- ASSERT(fractional_count <= 20);
- buffer[0] = '\0';
- *length = 0;
- *decimal_point = -fractional_count;
- } else {
- *decimal_point = 0;
- FillFractionals(significand, exponent, fractional_count,
- buffer, length, decimal_point);
- }
- TrimZeros(buffer, length, decimal_point);
- buffer[*length] = '\0';
- if ((*length) == 0) {
- // The string is empty and the decimal_point thus has no importance. Mimick
- // Gay's dtoa and and set it to -fractional_count.
- *decimal_point = -fractional_count;
- }
- return true;
-}
-
-} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/strtod.cc b/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/strtod.cc
deleted file mode 100644
index e298766..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/strtod.cc
+++ /dev/null
@@ -1,558 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdarg.h>
-#include <limits.h>
-
-#include "strtod.h"
-#include "bignum.h"
-#include "cached-powers.h"
-#include "ieee.h"
-
-namespace double_conversion {
-
-// 2^53 = 9007199254740992.
-// Any integer with at most 15 decimal digits will hence fit into a double
-// (which has a 53bit significand) without loss of precision.
-static const int kMaxExactDoubleIntegerDecimalDigits = 15;
-// 2^64 = 18446744073709551616 > 10^19
-static const int kMaxUint64DecimalDigits = 19;
-
-// Max double: 1.7976931348623157 x 10^308
-// Min non-zero double: 4.9406564584124654 x 10^-324
-// Any x >= 10^309 is interpreted as +infinity.
-// Any x <= 10^-324 is interpreted as 0.
-// Note that 2.5e-324 (despite being smaller than the min double) will be read
-// as non-zero (equal to the min non-zero double).
-static const int kMaxDecimalPower = 309;
-static const int kMinDecimalPower = -324;
-
-// 2^64 = 18446744073709551616
-static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
-
-
-static const double exact_powers_of_ten[] = {
- 1.0, // 10^0
- 10.0,
- 100.0,
- 1000.0,
- 10000.0,
- 100000.0,
- 1000000.0,
- 10000000.0,
- 100000000.0,
- 1000000000.0,
- 10000000000.0, // 10^10
- 100000000000.0,
- 1000000000000.0,
- 10000000000000.0,
- 100000000000000.0,
- 1000000000000000.0,
- 10000000000000000.0,
- 100000000000000000.0,
- 1000000000000000000.0,
- 10000000000000000000.0,
- 100000000000000000000.0, // 10^20
- 1000000000000000000000.0,
- // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
- 10000000000000000000000.0
-};
-static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
-
-// Maximum number of significant digits in the decimal representation.
-// In fact the value is 772 (see conversions.cc), but to give us some margin
-// we round up to 780.
-static const int kMaxSignificantDecimalDigits = 780;
-
-static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
- for (int i = 0; i < buffer.length(); i++) {
- if (buffer[i] != '0') {
- return buffer.SubVector(i, buffer.length());
- }
- }
- return Vector<const char>(buffer.start(), 0);
-}
-
-
-static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
- for (int i = buffer.length() - 1; i >= 0; --i) {
- if (buffer[i] != '0') {
- return buffer.SubVector(0, i + 1);
- }
- }
- return Vector<const char>(buffer.start(), 0);
-}
-
-
-static void CutToMaxSignificantDigits(Vector<const char> buffer,
- int exponent,
- char* significant_buffer,
- int* significant_exponent) {
- for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
- significant_buffer[i] = buffer[i];
- }
- // The input buffer has been trimmed. Therefore the last digit must be
- // different from '0'.
- ASSERT(buffer[buffer.length() - 1] != '0');
- // Set the last digit to be non-zero. This is sufficient to guarantee
- // correct rounding.
- significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
- *significant_exponent =
- exponent + (buffer.length() - kMaxSignificantDecimalDigits);
-}
-
-
-// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
-// If possible the input-buffer is reused, but if the buffer needs to be
-// modified (due to cutting), then the input needs to be copied into the
-// buffer_copy_space.
-static void TrimAndCut(Vector<const char> buffer, int exponent,
- char* buffer_copy_space, int space_size,
- Vector<const char>* trimmed, int* updated_exponent) {
- Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
- Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
- exponent += left_trimmed.length() - right_trimmed.length();
- if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
- ASSERT(space_size >= kMaxSignificantDecimalDigits);
- CutToMaxSignificantDigits(right_trimmed, exponent,
- buffer_copy_space, updated_exponent);
- *trimmed = Vector<const char>(buffer_copy_space,
- kMaxSignificantDecimalDigits);
- } else {
- *trimmed = right_trimmed;
- *updated_exponent = exponent;
- }
-}
-
-
-// Reads digits from the buffer and converts them to a uint64.
-// Reads in as many digits as fit into a uint64.
-// When the string starts with "1844674407370955161" no further digit is read.
-// Since 2^64 = 18446744073709551616 it would still be possible read another
-// digit if it was less or equal than 6, but this would complicate the code.
-static uint64_t ReadUint64(Vector<const char> buffer,
- int* number_of_read_digits) {
- uint64_t result = 0;
- int i = 0;
- while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
- int digit = buffer[i++] - '0';
- ASSERT(0 <= digit && digit <= 9);
- result = 10 * result + digit;
- }
- *number_of_read_digits = i;
- return result;
-}
-
-
-// Reads a DiyFp from the buffer.
-// The returned DiyFp is not necessarily normalized.
-// If remaining_decimals is zero then the returned DiyFp is accurate.
-// Otherwise it has been rounded and has error of at most 1/2 ulp.
-static void ReadDiyFp(Vector<const char> buffer,
- DiyFp* result,
- int* remaining_decimals) {
- int read_digits;
- uint64_t significand = ReadUint64(buffer, &read_digits);
- if (buffer.length() == read_digits) {
- *result = DiyFp(significand, 0);
- *remaining_decimals = 0;
- } else {
- // Round the significand.
- if (buffer[read_digits] >= '5') {
- significand++;
- }
- // Compute the binary exponent.
- int exponent = 0;
- *result = DiyFp(significand, exponent);
- *remaining_decimals = buffer.length() - read_digits;
- }
-}
-
-
-static bool DoubleStrtod(Vector<const char> trimmed,
- int exponent,
- double* result) {
-#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
- // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
- // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
- // result is not accurate.
- // We know that Windows32 uses 64 bits and is therefore accurate.
- // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
- // the same problem.
- return false;
-#endif
- if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
- int read_digits;
- // The trimmed input fits into a double.
- // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
- // can compute the result-double simply by multiplying (resp. dividing) the
- // two numbers.
- // This is possible because IEEE guarantees that floating-point operations
- // return the best possible approximation.
- if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
- // 10^-exponent fits into a double.
- *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
- ASSERT(read_digits == trimmed.length());
- *result /= exact_powers_of_ten[-exponent];
- return true;
- }
- if (0 <= exponent && exponent < kExactPowersOfTenSize) {
- // 10^exponent fits into a double.
- *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
- ASSERT(read_digits == trimmed.length());
- *result *= exact_powers_of_ten[exponent];
- return true;
- }
- int remaining_digits =
- kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
- if ((0 <= exponent) &&
- (exponent - remaining_digits < kExactPowersOfTenSize)) {
- // The trimmed string was short and we can multiply it with
- // 10^remaining_digits. As a result the remaining exponent now fits
- // into a double too.
- *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
- ASSERT(read_digits == trimmed.length());
- *result *= exact_powers_of_ten[remaining_digits];
- *result *= exact_powers_of_ten[exponent - remaining_digits];
- return true;
- }
- }
- return false;
-}
-
-
-// Returns 10^exponent as an exact DiyFp.
-// The given exponent must be in the range [1; kDecimalExponentDistance[.
-static DiyFp AdjustmentPowerOfTen(int exponent) {
- ASSERT(0 < exponent);
- ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
- // Simply hardcode the remaining powers for the given decimal exponent
- // distance.
- ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
- switch (exponent) {
- case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
- case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
- case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
- case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
- case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
- case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
- case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
- default:
- UNREACHABLE();
- return DiyFp(0, 0);
- }
-}
-
-
-// If the function returns true then the result is the correct double.
-// Otherwise it is either the correct double or the double that is just below
-// the correct double.
-static bool DiyFpStrtod(Vector<const char> buffer,
- int exponent,
- double* result) {
- DiyFp input;
- int remaining_decimals;
- ReadDiyFp(buffer, &input, &remaining_decimals);
- // Since we may have dropped some digits the input is not accurate.
- // If remaining_decimals is different than 0 than the error is at most
- // .5 ulp (unit in the last place).
- // We don't want to deal with fractions and therefore keep a common
- // denominator.
- const int kDenominatorLog = 3;
- const int kDenominator = 1 << kDenominatorLog;
- // Move the remaining decimals into the exponent.
- exponent += remaining_decimals;
- int error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
-
- int old_e = input.e();
- input.Normalize();
- error <<= old_e - input.e();
-
- ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
- if (exponent < PowersOfTenCache::kMinDecimalExponent) {
- *result = 0.0;
- return true;
- }
- DiyFp cached_power;
- int cached_decimal_exponent;
- PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
- &cached_power,
- &cached_decimal_exponent);
-
- if (cached_decimal_exponent != exponent) {
- int adjustment_exponent = exponent - cached_decimal_exponent;
- DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
- input.Multiply(adjustment_power);
- if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
- // The product of input with the adjustment power fits into a 64 bit
- // integer.
- ASSERT(DiyFp::kSignificandSize == 64);
- } else {
- // The adjustment power is exact. There is hence only an error of 0.5.
- error += kDenominator / 2;
- }
- }
-
- input.Multiply(cached_power);
- // The error introduced by a multiplication of a*b equals
- // error_a + error_b + error_a*error_b/2^64 + 0.5
- // Substituting a with 'input' and b with 'cached_power' we have
- // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
- // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
- int error_b = kDenominator / 2;
- int error_ab = (error == 0 ? 0 : 1); // We round up to 1.
- int fixed_error = kDenominator / 2;
- error += error_b + error_ab + fixed_error;
-
- old_e = input.e();
- input.Normalize();
- error <<= old_e - input.e();
-
- // See if the double's significand changes if we add/subtract the error.
- int order_of_magnitude = DiyFp::kSignificandSize + input.e();
- int effective_significand_size =
- Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
- int precision_digits_count =
- DiyFp::kSignificandSize - effective_significand_size;
- if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
- // This can only happen for very small denormals. In this case the
- // half-way multiplied by the denominator exceeds the range of an uint64.
- // Simply shift everything to the right.
- int shift_amount = (precision_digits_count + kDenominatorLog) -
- DiyFp::kSignificandSize + 1;
- input.set_f(input.f() >> shift_amount);
- input.set_e(input.e() + shift_amount);
- // We add 1 for the lost precision of error, and kDenominator for
- // the lost precision of input.f().
- error = (error >> shift_amount) + 1 + kDenominator;
- precision_digits_count -= shift_amount;
- }
- // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
- ASSERT(DiyFp::kSignificandSize == 64);
- ASSERT(precision_digits_count < 64);
- uint64_t one64 = 1;
- uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
- uint64_t precision_bits = input.f() & precision_bits_mask;
- uint64_t half_way = one64 << (precision_digits_count - 1);
- precision_bits *= kDenominator;
- half_way *= kDenominator;
- DiyFp rounded_input(input.f() >> precision_digits_count,
- input.e() + precision_digits_count);
- if (precision_bits >= half_way + error) {
- rounded_input.set_f(rounded_input.f() + 1);
- }
- // If the last_bits are too close to the half-way case than we are too
- // inaccurate and round down. In this case we return false so that we can
- // fall back to a more precise algorithm.
-
- *result = Double(rounded_input).value();
- if (half_way - error < precision_bits && precision_bits < half_way + error) {
- // Too imprecise. The caller will have to fall back to a slower version.
- // However the returned number is guaranteed to be either the correct
- // double, or the next-lower double.
- return false;
- } else {
- return true;
- }
-}
-
-
-// Returns
-// - -1 if buffer*10^exponent < diy_fp.
-// - 0 if buffer*10^exponent == diy_fp.
-// - +1 if buffer*10^exponent > diy_fp.
-// Preconditions:
-// buffer.length() + exponent <= kMaxDecimalPower + 1
-// buffer.length() + exponent > kMinDecimalPower
-// buffer.length() <= kMaxDecimalSignificantDigits
-static int CompareBufferWithDiyFp(Vector<const char> buffer,
- int exponent,
- DiyFp diy_fp) {
- ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
- ASSERT(buffer.length() + exponent > kMinDecimalPower);
- ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
- // Make sure that the Bignum will be able to hold all our numbers.
- // Our Bignum implementation has a separate field for exponents. Shifts will
- // consume at most one bigit (< 64 bits).
- // ln(10) == 3.3219...
- ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
- Bignum buffer_bignum;
- Bignum diy_fp_bignum;
- buffer_bignum.AssignDecimalString(buffer);
- diy_fp_bignum.AssignUInt64(diy_fp.f());
- if (exponent >= 0) {
- buffer_bignum.MultiplyByPowerOfTen(exponent);
- } else {
- diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
- }
- if (diy_fp.e() > 0) {
- diy_fp_bignum.ShiftLeft(diy_fp.e());
- } else {
- buffer_bignum.ShiftLeft(-diy_fp.e());
- }
- return Bignum::Compare(buffer_bignum, diy_fp_bignum);
-}
-
-
-// Returns true if the guess is the correct double.
-// Returns false, when guess is either correct or the next-lower double.
-static bool ComputeGuess(Vector<const char> trimmed, int exponent,
- double* guess) {
- if (trimmed.length() == 0) {
- *guess = 0.0;
- return true;
- }
- if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
- *guess = Double::Infinity();
- return true;
- }
- if (exponent + trimmed.length() <= kMinDecimalPower) {
- *guess = 0.0;
- return true;
- }
-
- if (DoubleStrtod(trimmed, exponent, guess) ||
- DiyFpStrtod(trimmed, exponent, guess)) {
- return true;
- }
- if (*guess == Double::Infinity()) {
- return true;
- }
- return false;
-}
-
-double Strtod(Vector<const char> buffer, int exponent) {
- char copy_buffer[kMaxSignificantDecimalDigits];
- Vector<const char> trimmed;
- int updated_exponent;
- TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
- &trimmed, &updated_exponent);
- exponent = updated_exponent;
-
- double guess;
- bool is_correct = ComputeGuess(trimmed, exponent, &guess);
- if (is_correct) return guess;
-
- DiyFp upper_boundary = Double(guess).UpperBoundary();
- int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
- if (comparison < 0) {
- return guess;
- } else if (comparison > 0) {
- return Double(guess).NextDouble();
- } else if ((Double(guess).Significand() & 1) == 0) {
- // Round towards even.
- return guess;
- } else {
- return Double(guess).NextDouble();
- }
-}
-
-float Strtof(Vector<const char> buffer, int exponent) {
- char copy_buffer[kMaxSignificantDecimalDigits];
- Vector<const char> trimmed;
- int updated_exponent;
- TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
- &trimmed, &updated_exponent);
- exponent = updated_exponent;
-
- double double_guess;
- bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
-
- float float_guess = static_cast<float>(double_guess);
- if (float_guess == double_guess) {
- // This shortcut triggers for integer values.
- return float_guess;
- }
-
- // We must catch double-rounding. Say the double has been rounded up, and is
- // now a boundary of a float, and rounds up again. This is why we have to
- // look at previous too.
- // Example (in decimal numbers):
- // input: 12349
- // high-precision (4 digits): 1235
- // low-precision (3 digits):
- // when read from input: 123
- // when rounded from high precision: 124.
- // To do this we simply look at the neigbors of the correct result and see
- // if they would round to the same float. If the guess is not correct we have
- // to look at four values (since two different doubles could be the correct
- // double).
-
- double double_next = Double(double_guess).NextDouble();
- double double_previous = Double(double_guess).PreviousDouble();
-
- float f1 = static_cast<float>(double_previous);
-#ifndef NDEBUG
- float f2 = float_guess;
-#endif
- float f3 = static_cast<float>(double_next);
- float f4;
- if (is_correct) {
- f4 = f3;
- } else {
- double double_next2 = Double(double_next).NextDouble();
- f4 = static_cast<float>(double_next2);
- }
-#ifndef NDEBUG
- ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
-#endif
-
- // If the guess doesn't lie near a single-precision boundary we can simply
- // return its float-value.
- if (f1 == f4) {
- return float_guess;
- }
-
- ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
- (f1 == f2 && f2 != f3 && f3 == f4) ||
- (f1 == f2 && f2 == f3 && f3 != f4));
-
- // guess and next are the two possible canditates (in the same way that
- // double_guess was the lower candidate for a double-precision guess).
- float guess = f1;
- float next = f4;
- DiyFp upper_boundary;
- if (guess == 0.0f) {
- float min_float = 1e-45f;
- upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
- } else {
- upper_boundary = Single(guess).UpperBoundary();
- }
- int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
- if (comparison < 0) {
- return guess;
- } else if (comparison > 0) {
- return next;
- } else if ((Single(guess).Significand() & 1) == 0) {
- // Round towards even.
- return guess;
- } else {
- return next;
- }
-}
-
-} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/ersatz_progress.cc b/src/joshua/decoder/ff/lm/kenlm/util/ersatz_progress.cc
deleted file mode 100644
index 498ab5c..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/ersatz_progress.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "util/ersatz_progress.hh"
-
-#include <algorithm>
-#include <ostream>
-#include <limits>
-#include <string>
-
-namespace util {
-
-namespace { const unsigned char kWidth = 100; }
-
-const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
-
-ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
-
-ErsatzProgress::~ErsatzProgress() {
- if (out_) Finished();
-}
-
-ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
- : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
- if (!out_) {
- next_ = std::numeric_limits<uint64_t>::max();
- return;
- }
- if (!message.empty()) *out_ << message << '\n';
- *out_ << kProgressBanner;
-}
-
-void ErsatzProgress::Milestone() {
- if (!out_) { current_ = 0; return; }
- if (!complete_) return;
- unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
-
- for (; stones_written_ < stone; ++stones_written_) {
- (*out_) << '*';
- }
- if (stone == kWidth) {
- (*out_) << std::endl;
- next_ = std::numeric_limits<uint64_t>::max();
- out_ = NULL;
- } else {
- next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
- }
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/ersatz_progress.hh b/src/joshua/decoder/ff/lm/kenlm/util/ersatz_progress.hh
deleted file mode 100644
index 535dbde..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/ersatz_progress.hh
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef UTIL_ERSATZ_PROGRESS_H
-#define UTIL_ERSATZ_PROGRESS_H
-
-#include <iostream>
-#include <string>
-
-#include <stdint.h>
-
-// Ersatz version of boost::progress so core language model doesn't depend on
-// boost. Also adds option to print nothing.
-
-namespace util {
-
-extern const char kProgressBanner[];
-
-class ErsatzProgress {
- public:
- // No output.
- ErsatzProgress();
-
- // Null means no output. The null value is useful for passing along the ostream pointer from another caller.
- explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
-
- ~ErsatzProgress();
-
- ErsatzProgress &operator++() {
- if (++current_ >= next_) Milestone();
- return *this;
- }
-
- ErsatzProgress &operator+=(uint64_t amount) {
- if ((current_ += amount) >= next_) Milestone();
- return *this;
- }
-
- void Set(uint64_t to) {
- if ((current_ = to) >= next_) Milestone();
- }
-
- void Finished() {
- Set(complete_);
- }
-
- private:
- void Milestone();
-
- uint64_t current_, next_, complete_;
- unsigned char stones_written_;
- std::ostream *out_;
-
- // noncopyable
- ErsatzProgress(const ErsatzProgress &other);
- ErsatzProgress &operator=(const ErsatzProgress &other);
-};
-
-} // namespace util
-
-#endif // UTIL_ERSATZ_PROGRESS_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/exception.cc b/src/joshua/decoder/ff/lm/kenlm/util/exception.cc
deleted file mode 100644
index eaf4cbc..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/exception.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "util/exception.hh"
-
-#ifdef __GXX_RTTI
-#include <typeinfo>
-#endif
-
-#include <errno.h>
-#include <string.h>
-
-namespace util {
-
-Exception::Exception() throw() {}
-Exception::~Exception() throw() {}
-
-Exception::Exception(const Exception &from) : std::exception() {
- stream_ << from.stream_.str();
-}
-
-Exception &Exception::operator=(const Exception &from) {
- stream_ << from.stream_.str();
- return *this;
-}
-
-const char *Exception::what() const throw() {
- text_ = stream_.str();
- return text_.c_str();
-}
-
-void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
- /* The child class might have set some text, but we want this to come first.
- * Another option would be passing this information to the constructor, but
- * then child classes would have to accept constructor arguments and pass
- * them down.
- */
- text_ = stream_.str();
- stream_.str("");
- stream_ << file << ':' << line;
- if (func) stream_ << " in " << func << " threw ";
- if (child_name) {
- stream_ << child_name;
- } else {
-#ifdef __GXX_RTTI
- stream_ << typeid(this).name();
-#else
- stream_ << "an exception";
-#endif
- }
- if (condition) stream_ << " because `" << condition;
- stream_ << "'.\n";
- stream_ << text_;
-}
-
-namespace {
-
-#ifdef __GNUC__
-const char *HandleStrerror(int ret, const char *buf) __attribute__ ((unused));
-const char *HandleStrerror(const char *ret, const char * /*buf*/) __attribute__ ((unused));
-#endif
-// At least one of these functions will not be called.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
-#endif
-// The XOPEN version.
-const char *HandleStrerror(int ret, const char *buf) {
- if (!ret) return buf;
- return NULL;
-}
-
-// The GNU version.
-const char *HandleStrerror(const char *ret, const char * /*buf*/) {
- return ret;
-}
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-} // namespace
-
-ErrnoException::ErrnoException() throw() : errno_(errno) {
- char buf[200];
- buf[0] = 0;
-#if defined(sun) || defined(_WIN32) || defined(_WIN64)
- const char *add = strerror(errno);
-#else
- const char *add = HandleStrerror(strerror_r(errno, buf, 200), buf);
-#endif
-
- if (add) {
- *this << add << ' ';
- }
-}
-
-ErrnoException::~ErrnoException() throw() {}
-
-OverflowException::OverflowException() throw() {}
-OverflowException::~OverflowException() throw() {}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/exception.hh b/src/joshua/decoder/ff/lm/kenlm/util/exception.hh
deleted file mode 100644
index 4e50a6f..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/exception.hh
+++ /dev/null
@@ -1,149 +0,0 @@
-#ifndef UTIL_EXCEPTION_H
-#define UTIL_EXCEPTION_H
-
-#include <exception>
-#include <limits>
-#include <sstream>
-#include <string>
-
-#include <stdint.h>
-
-namespace util {
-
-template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
-
-class Exception : public std::exception {
- public:
- Exception() throw();
- virtual ~Exception() throw();
-
- Exception(const Exception &from);
- Exception &operator=(const Exception &from);
-
- // Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe.
- const char *what() const throw();
-
- // For use by the UTIL_THROW macros.
- void SetLocation(
- const char *file,
- unsigned int line,
- const char *func,
- const char *child_name,
- const char *condition);
-
- private:
- template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
-
- // This helps restrict operator<< defined below.
- template <class T> struct ExceptionTag {
- typedef T Identity;
- };
-
- std::stringstream stream_;
- mutable std::string text_;
-};
-
-/* This implements the normal operator<< for Exception and all its children.
- * SFINAE means it only applies to Exception. Think of this as an ersatz
- * boost::enable_if.
- */
-template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
- e.stream_ << data;
- return e;
-}
-
-#ifdef __GNUC__
-#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
-#else
-#ifdef _WIN32
-#define UTIL_FUNC_NAME __FUNCTION__
-#else
-#define UTIL_FUNC_NAME NULL
-#endif
-#endif
-
-/* Create an instance of Exception, add the message Modify, and throw it.
- * Modify is appended to the what() message and can contain << for ostream
- * operations.
- *
- * do .. while kludge to swallow trailing ; character
- * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
- * Arg can be a constructor argument to the exception.
- */
-#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
- Exception UTIL_e Arg; \
- UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
- UTIL_e << Modify; \
- throw UTIL_e; \
-} while (0)
-
-#define UTIL_THROW_ARG(Exception, Arg, Modify) \
- UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
-
-#define UTIL_THROW(Exception, Modify) \
- UTIL_THROW_BACKEND(NULL, Exception, , Modify);
-
-#define UTIL_THROW2(Modify) \
- UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
-
-#if __GNUC__ >= 3
-#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
-#else
-#define UTIL_UNLIKELY(x) (x)
-#endif
-
-#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
- if (UTIL_UNLIKELY(Condition)) { \
- UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
- } \
-} while (0)
-
-#define UTIL_THROW_IF(Condition, Exception, Modify) \
- UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
-
-#define UTIL_THROW_IF2(Condition, Modify) \
- UTIL_THROW_IF_ARG(Condition, util::Exception, , Modify)
-
-// Exception that records errno and adds it to the message.
-class ErrnoException : public Exception {
- public:
- ErrnoException() throw();
-
- virtual ~ErrnoException() throw();
-
- int Error() const throw() { return errno_; }
-
- private:
- int errno_;
-};
-
-// file wasn't there, or couldn't be open for some reason
-class FileOpenException : public Exception {
- public:
- FileOpenException() throw() {}
- ~FileOpenException() throw() {}
-};
-
-// Utilities for overflow checking.
-class OverflowException : public Exception {
- public:
- OverflowException() throw();
- ~OverflowException() throw();
-};
-
-template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
- UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
- return value;
-}
-
-template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
- return value;
-}
-
-inline std::size_t CheckOverflow(uint64_t value) {
- return CheckOverflowInternal<sizeof(std::size_t)>(value);
-}
-
-} // namespace util
-
-#endif // UTIL_EXCEPTION_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/fake_ofstream.hh b/src/joshua/decoder/ff/lm/kenlm/util/fake_ofstream.hh
deleted file mode 100644
index 987fa80..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/fake_ofstream.hh
+++ /dev/null
@@ -1,105 +0,0 @@
-/* Like std::ofstream but without being incredibly slow. Backed by a raw fd.
- * Does not support many data types. Currently, it's targeted at writing ARPA
- * files quickly.
- */
-#ifndef UTIL_FAKE_OFSTREAM_H
-#define UTIL_FAKE_OFSTREAM_H
-
-#include "util/double-conversion/double-conversion.h"
-#include "util/double-conversion/utils.h"
-#include "util/file.hh"
-#include "util/scoped.hh"
-#include "util/string_piece.hh"
-
-#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
-#include <boost/lexical_cast.hpp>
-
-namespace util {
-class FakeOFStream {
- public:
- // Does not take ownership of out.
- // Allows default constructor, but must call SetFD.
- explicit FakeOFStream(int out = -1, std::size_t buffer_size = 1048576)
- : buf_(util::MallocOrThrow(buffer_size)),
- builder_(static_cast<char*>(buf_.get()), buffer_size),
- // Mostly the default but with inf instead. And no flags.
- convert_(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0),
- fd_(out),
- buffer_size_(buffer_size) {}
-
- ~FakeOFStream() {
- if (buf_.get()) Flush();
- }
-
- void SetFD(int to) {
- if (builder_.position()) Flush();
- fd_ = to;
- }
-
- FakeOFStream &operator<<(float value) {
- // Odd, but this is the largest number found in the comments.
- EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
- convert_.ToShortestSingle(value, &builder_);
- return *this;
- }
-
- FakeOFStream &operator<<(double value) {
- EnsureRemaining(double_conversion::DoubleToStringConverter::kMaxPrecisionDigits + 8);
- convert_.ToShortest(value, &builder_);
- return *this;
- }
-
- FakeOFStream &operator<<(StringPiece str) {
- if (str.size() > buffer_size_) {
- Flush();
- util::WriteOrThrow(fd_, str.data(), str.size());
- } else {
- EnsureRemaining(str.size());
- builder_.AddSubstring(str.data(), str.size());
- }
- return *this;
- }
-
- // Inefficient! TODO: more efficient implementation
- FakeOFStream &operator<<(unsigned value) {
- return *this << boost::lexical_cast<std::string>(value);
- }
-
- FakeOFStream &operator<<(char c) {
- EnsureRemaining(1);
- builder_.AddCharacter(c);
- return *this;
- }
-
- // Note this does not sync.
- void Flush() {
- util::WriteOrThrow(fd_, buf_.get(), builder_.position());
- builder_.Reset();
- }
-
- // Not necessary, but does assure the data is cleared.
- void Finish() {
- Flush();
- // It will segfault trying to null terminate otherwise.
- builder_.Finalize();
- buf_.reset();
- util::FSyncOrThrow(fd_);
- }
-
- private:
- void EnsureRemaining(std::size_t amount) {
- if (static_cast<std::size_t>(builder_.size() - builder_.position()) <= amount) {
- Flush();
- }
- }
-
- util::scoped_malloc buf_;
- double_conversion::StringBuilder builder_;
- double_conversion::DoubleToStringConverter convert_;
- int fd_;
- const std::size_t buffer_size_;
-};
-
-} // namespace
-
-#endif
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/file.cc b/src/joshua/decoder/ff/lm/kenlm/util/file.cc
deleted file mode 100644
index aa61cf9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/file.cc
+++ /dev/null
@@ -1,556 +0,0 @@
-#define _LARGEFILE64_SOURCE
-#define _FILE_OFFSET_BITS 64
-
-#include "util/file.hh"
-
-#include "util/exception.hh"
-
-#include <algorithm>
-#include <cstdlib>
-#include <cstdio>
-#include <iostream>
-#include <limits>
-#include <sstream>
-
-
-#include <assert.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdint.h>
-
-#if defined(__MINGW32__)
-#include <windows.h>
-#include <unistd.h>
-#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1. Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
-#elif defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
-namespace util {
-
-scoped_fd::~scoped_fd() {
- if (fd_ != -1 && close(fd_)) {
- std::cerr << "Could not close file " << fd_ << std::endl;
- std::abort();
- }
-}
-
-void scoped_FILE_closer::Close(std::FILE *file) {
- if (file && std::fclose(file)) {
- std::cerr << "Could not close file " << file << std::endl;
- std::abort();
- }
-}
-
-// Note that ErrnoException records errno before NameFromFD is called.
-FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
- *this << "in " << name_guess_ << ' ';
-}
-
-FDException::~FDException() throw() {}
-
-EndOfFileException::EndOfFileException() throw() {
- *this << "End of file";
-}
-EndOfFileException::~EndOfFileException() throw() {}
-
-int OpenReadOrThrow(const char *name) {
- int ret;
-#if defined(_WIN32) || defined(_WIN64)
- UTIL_THROW_IF(-1 == (ret = _open(name, _O_BINARY | _O_RDONLY)), ErrnoException, "while opening " << name);
-#else
- UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
-#endif
- return ret;
-}
-
-int CreateOrThrow(const char *name) {
- int ret;
-#if defined(_WIN32) || defined(_WIN64)
- UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
-#else
- UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
-#endif
- return ret;
-}
-
-uint64_t SizeFile(int fd) {
-#if defined __MINGW32__
- struct stat sb;
- // Does this handle 64-bit?
- int ret = fstat(fd, &sb);
- if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
- return sb.st_size;
-#elif defined(_WIN32) || defined(_WIN64)
- __int64 ret = _filelengthi64(fd);
- return (ret == -1) ? kBadSize : ret;
-#else // Not windows.
-
-#ifdef OS_ANDROID
- struct stat64 sb;
- int ret = fstat64(fd, &sb);
-#else
- struct stat sb;
- int ret = fstat(fd, &sb);
-#endif
- if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
- return sb.st_size;
-#endif
-}
-
-uint64_t SizeOrThrow(int fd) {
- uint64_t ret = SizeFile(fd);
- UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
- return ret;
-}
-
-void ResizeOrThrow(int fd, uint64_t to) {
-#if defined __MINGW32__
- // Does this handle 64-bit?
- int ret = ftruncate
-#elif defined(_WIN32) || defined(_WIN64)
- errno_t ret = _chsize_s
-#elif defined(OS_ANDROID)
- int ret = ftruncate64
-#else
- int ret = ftruncate
-#endif
- (fd, to);
- UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
-}
-
-namespace {
-std::size_t GuardLarge(std::size_t size) {
- // The following operating systems have broken read/write/pread/pwrite that
- // only supports up to 2^31.
- // OS X man pages claim to support 64-bit, but Kareem M. Darwish had problems
- // building with larger files, so APPLE is also here.
-#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
- return size < INT_MAX ? size : INT_MAX;
-#else
- return size;
-#endif
-}
-}
-
-std::size_t PartialRead(int fd, void *to, std::size_t amount) {
-#if defined(_WIN32) || defined(_WIN64)
- int ret = _read(fd, to, GuardLarge(amount));
-#else
- errno = 0;
- ssize_t ret;
- do {
- ret = read(fd, to, GuardLarge(amount));
- } while (ret == -1 && errno == EINTR);
-#endif
- UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
- return static_cast<std::size_t>(ret);
-}
-
-void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
- uint8_t *to = static_cast<uint8_t*>(to_void);
- while (amount) {
- std::size_t ret = PartialRead(fd, to, amount);
- UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
- amount -= ret;
- to += ret;
- }
-}
-
-std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
- uint8_t *to = static_cast<uint8_t*>(to_void);
- std::size_t remaining = amount;
- while (remaining) {
- std::size_t ret = PartialRead(fd, to, remaining);
- if (!ret) return amount - remaining;
- remaining -= ret;
- to += ret;
- }
- return amount;
-}
-
-void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
- const uint8_t *data = static_cast<const uint8_t*>(data_void);
- while (size) {
-#if defined(_WIN32) || defined(_WIN64)
- int ret;
-#else
- ssize_t ret;
-#endif
- errno = 0;
- do {
- ret =
-#if defined(_WIN32) || defined(_WIN64)
- _write
-#else
- write
-#endif
- (fd, data, GuardLarge(size));
- } while (ret == -1 && errno == EINTR);
- UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
- data += ret;
- size -= ret;
- }
-}
-
-void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
- if (!size) return;
- UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
-}
-
-#if defined(_WIN32) || defined(_WIN64)
-namespace {
-const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
-} // namespace
-#endif
-
-void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
- uint8_t *to = static_cast<uint8_t*>(to_void);
- while (size) {
-#if defined(_WIN32) || defined(_WIN64)
- /* BROKEN: changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
- // size_t might be 64-bit. DWORD is always 32.
- DWORD reading = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
- DWORD ret;
- OVERLAPPED overlapped;
- memset(&overlapped, 0, sizeof(OVERLAPPED));
- overlapped.Offset = static_cast<DWORD>(off);
- overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
- UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), Exception, "ReadFile failed for offset " << off);
-#else
- ssize_t ret;
- errno = 0;
- ret =
-#ifdef OS_ANDROID
- pread64
-#else
- pread
-#endif
- (fd, to, GuardLarge(size), off);
- if (ret <= 0) {
- if (ret == -1 && errno == EINTR) continue;
- UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
- UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
- }
-#endif
- size -= ret;
- off += ret;
- to += ret;
- }
-}
-
-void ErsatzPWrite(int fd, const void *from_void, std::size_t size, uint64_t off) {
- const uint8_t *from = static_cast<const uint8_t*>(from_void);
- while(size) {
-#if defined(_WIN32) || defined(_WIN64)
- /* Changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() */
- // size_t might be 64-bit. DWORD is always 32.
- DWORD writing = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
- DWORD ret;
- OVERLAPPED overlapped;
- memset(&overlapped, 0, sizeof(OVERLAPPED));
- overlapped.Offset = static_cast<DWORD>(off);
- overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
- UTIL_THROW_IF(!WriteFile((HANDLE)_get_osfhandle(fd), from, writing, &ret, &overlapped), Exception, "WriteFile failed for offset " << off);
-#else
- ssize_t ret;
- errno = 0;
- ret =
-#ifdef OS_ANDROID
- pwrite64
-#else
- pwrite
-#endif
- (fd, from, GuardLarge(size), off);
- if (ret <= 0) {
- if (ret == -1 && errno == EINTR) continue;
- UTIL_THROW_IF(ret == 0, EndOfFileException, " for writing " << size << " bytes at " << off << " from " << NameFromFD(fd));
- UTIL_THROW_ARG(FDException, (fd), "while writing " << size << " bytes at offset " << off);
- }
-#endif
- size -= ret;
- off += ret;
- from += ret;
- }
-}
-
-
-void FSyncOrThrow(int fd) {
-// Apparently windows doesn't have fsync?
-#if !defined(_WIN32) && !defined(_WIN64)
- UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
-#endif
-}
-
-namespace {
-
-// Static assert for 64-bit off_t size.
-#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
-template <unsigned> struct CheckOffT;
-template <> struct CheckOffT<8> {
- struct True {};
-};
-// If there's a compiler error on the next line, then off_t isn't 64 bit. And
-// that makes me a sad panda.
-typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
-#endif
-
-// Can't we all just get along?
-void InternalSeek(int fd, int64_t off, int whence) {
- if (
-#if defined __MINGW32__
- // Does this handle 64-bit?
- (off_t)-1 == lseek(fd, off, whence)
-#elif defined(_WIN32) || defined(_WIN64)
- (__int64)-1 == _lseeki64(fd, off, whence)
-#elif defined(OS_ANDROID)
- (off64_t)-1 == lseek64(fd, off, whence)
-#else
- (off_t)-1 == lseek(fd, off, whence)
-#endif
- ) UTIL_THROW_ARG(FDException, (fd), "while seeking to " << off << " whence " << whence);
-}
-} // namespace
-
-void SeekOrThrow(int fd, uint64_t off) {
- InternalSeek(fd, off, SEEK_SET);
-}
-
-void AdvanceOrThrow(int fd, int64_t off) {
- InternalSeek(fd, off, SEEK_CUR);
-}
-
-void SeekEnd(int fd) {
- InternalSeek(fd, 0, SEEK_END);
-}
-
-std::FILE *FDOpenOrThrow(scoped_fd &file) {
- std::FILE *ret = fdopen(file.get(), "r+b");
- UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
- file.release();
- return ret;
-}
-
-std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
- std::FILE *ret = fdopen(file.get(), "rb");
- UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
- file.release();
- return ret;
-}
-
-// Sigh. Windows temporary file creation is full of race conditions.
-#if defined(_WIN32) || defined(_WIN64)
-/* mkstemp extracted from libc/sysdeps/posix/tempname.c. Copyright
- (C) 1991-1999, 2000, 2001, 2006 Free Software Foundation, Inc.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version. */
-
-/* This has been modified from the original version to rename the function and
- * set the Windows temporary flag. */
-
-static const char letters[] =
-"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
-
-/* Generate a temporary file name based on TMPL. TMPL must match the
- rules for mk[s]temp (i.e. end in "XXXXXX"). The name constructed
- does not exist at the time of the call to mkstemp. TMPL is
- overwritten with the result. */
-int
-mkstemp_and_unlink(char *tmpl)
-{
- int len;
- char *XXXXXX;
- static unsigned long long value;
- unsigned long long random_time_bits;
- unsigned int count;
- int fd = -1;
- int save_errno = errno;
-
- /* A lower bound on the number of temporary files to attempt to
- generate. The maximum total number of temporary file names that
- can exist for a given template is 62**6. It should never be
- necessary to try all these combinations. Instead if a reasonable
- number of names is tried (we define reasonable as 62**3) fail to
- give the system administrator the chance to remove the problems. */
-#define ATTEMPTS_MIN (62 * 62 * 62)
-
- /* The number of times to attempt to generate a temporary file. To
- conform to POSIX, this must be no smaller than TMP_MAX. */
-#if ATTEMPTS_MIN < TMP_MAX
- unsigned int attempts = TMP_MAX;
-#else
- unsigned int attempts = ATTEMPTS_MIN;
-#endif
-
- len = strlen (tmpl);
- if (len < 6 || strcmp (&tmpl[len - 6], "XXXXXX"))
- {
- errno = EINVAL;
- return -1;
- }
-
-/* This is where the Xs start. */
- XXXXXX = &tmpl[len - 6];
-
- /* Get some more or less random data. */
- {
- SYSTEMTIME stNow;
- FILETIME ftNow;
-
- // get system time
- GetSystemTime(&stNow);
- stNow.wMilliseconds = 500;
- if (!SystemTimeToFileTime(&stNow, &ftNow))
- {
- errno = -1;
- return -1;
- }
-
- random_time_bits = (((unsigned long long)ftNow.dwHighDateTime << 32)
- | (unsigned long long)ftNow.dwLowDateTime);
- }
- value += random_time_bits ^ (unsigned long long)GetCurrentThreadId ();
-
- for (count = 0; count < attempts; value += 7777, ++count)
- {
- unsigned long long v = value;
-
- /* Fill in the random bits. */
- XXXXXX[0] = letters[v % 62];
- v /= 62;
- XXXXXX[1] = letters[v % 62];
- v /= 62;
- XXXXXX[2] = letters[v % 62];
- v /= 62;
- XXXXXX[3] = letters[v % 62];
- v /= 62;
- XXXXXX[4] = letters[v % 62];
- v /= 62;
- XXXXXX[5] = letters[v % 62];
-
- /* Modified for windows and to unlink */
- // fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
- int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
- flags |= _O_TEMPORARY;
- fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
- if (fd >= 0)
- {
- errno = save_errno;
- return fd;
- }
- else if (errno != EEXIST)
- return -1;
- }
-
- /* We got out of the loop because we ran out of combinations to try. */
- errno = EEXIST;
- return -1;
-}
-#else
-int
-mkstemp_and_unlink(char *tmpl) {
- int ret = mkstemp(tmpl);
- if (ret != -1) {
- UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
- }
- return ret;
-}
-#endif
-
-// If it's a directory, add a /. This lets users say -T /tmp without creating
-// /tmpAAAAAA
-void NormalizeTempPrefix(std::string &base) {
- if (base.empty()) return;
- if (base[base.size() - 1] == '/') return;
- struct stat sb;
- // It's fine for it to not exist.
- if (-1 == stat(base.c_str(), &sb)) return;
- if (
-#if defined(_WIN32) || defined(_WIN64)
- sb.st_mode & _S_IFDIR
-#else
- S_ISDIR(sb.st_mode)
-#endif
- ) base += '/';
-}
-
-int MakeTemp(const StringPiece &base) {
- std::string name(base.data(), base.size());
- name += "XXXXXX";
- name.push_back(0);
- int ret;
- UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
- return ret;
-}
-
-std::FILE *FMakeTemp(const StringPiece &base) {
- util::scoped_fd file(MakeTemp(base));
- return FDOpenOrThrow(file);
-}
-
-int DupOrThrow(int fd) {
- int ret = dup(fd);
- UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
- return ret;
-}
-
-namespace {
-// Try to name things but be willing to fail too.
-bool TryName(int fd, std::string &out) {
-#if defined(_WIN32) || defined(_WIN64)
- return false;
-#else
- std::string name("/proc/self/fd/");
- std::ostringstream convert;
- convert << fd;
- name += convert.str();
-
- struct stat sb;
- if (-1 == lstat(name.c_str(), &sb))
- return false;
- out.resize(sb.st_size + 1);
- // lstat gave us a size, but I've seen it grow, possibly due to symlinks on top of symlinks.
- while (true) {
- ssize_t ret = readlink(name.c_str(), &out[0], out.size());
- if (-1 == ret)
- return false;
- if ((size_t)ret < out.size()) {
- out.resize(ret);
- break;
- }
- // Exponential growth.
- out.resize(out.size() * 2);
- }
- // Don't use the non-file names.
- if (!out.empty() && out[0] != '/')
- return false;
- return true;
-#endif
-}
-} // namespace
-
-std::string NameFromFD(int fd) {
- std::string ret;
- if (TryName(fd, ret)) return ret;
- switch (fd) {
- case 0: return "stdin";
- case 1: return "stdout";
- case 2: return "stderr";
- }
- ret = "fd ";
- std::ostringstream convert;
- convert << fd;
- ret += convert.str();
- return ret;
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/file.hh b/src/joshua/decoder/ff/lm/kenlm/util/file.hh
deleted file mode 100644
index 7204b6a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/file.hh
+++ /dev/null
@@ -1,133 +0,0 @@
-#ifndef UTIL_FILE_H
-#define UTIL_FILE_H
-
-#include "util/exception.hh"
-#include "util/scoped.hh"
-#include "util/string_piece.hh"
-
-#include <cstddef>
-#include <cstdio>
-#include <string>
-
-#include <stdint.h>
-
-namespace util {
-
-class scoped_fd {
- public:
- scoped_fd() : fd_(-1) {}
-
- explicit scoped_fd(int fd) : fd_(fd) {}
-
- ~scoped_fd();
-
- void reset(int to = -1) {
- scoped_fd other(fd_);
- fd_ = to;
- }
-
- int get() const { return fd_; }
-
- int operator*() const { return fd_; }
-
- int release() {
- int ret = fd_;
- fd_ = -1;
- return ret;
- }
-
- private:
- int fd_;
-
- scoped_fd(const scoped_fd &);
- scoped_fd &operator=(const scoped_fd &);
-};
-
-struct scoped_FILE_closer {
- static void Close(std::FILE *file);
-};
-typedef scoped<std::FILE, scoped_FILE_closer> scoped_FILE;
-
-/* Thrown for any operation where the fd is known. */
-class FDException : public ErrnoException {
- public:
- explicit FDException(int fd) throw();
-
- virtual ~FDException() throw();
-
- // This may no longer be valid if the exception was thrown past open.
- int FD() const { return fd_; }
-
- // Guess from NameFromFD.
- const std::string &NameGuess() const { return name_guess_; }
-
- private:
- int fd_;
-
- std::string name_guess_;
-};
-
-// End of file reached.
-class EndOfFileException : public Exception {
- public:
- EndOfFileException() throw();
- ~EndOfFileException() throw();
-};
-
-// Open for read only.
-int OpenReadOrThrow(const char *name);
-// Create file if it doesn't exist, truncate if it does. Opened for write.
-int CreateOrThrow(const char *name);
-
-// Return value for SizeFile when it can't size properly.
-const uint64_t kBadSize = (uint64_t)-1;
-uint64_t SizeFile(int fd);
-uint64_t SizeOrThrow(int fd);
-
-void ResizeOrThrow(int fd, uint64_t to);
-
-std::size_t PartialRead(int fd, void *to, std::size_t size);
-void ReadOrThrow(int fd, void *to, std::size_t size);
-std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
-
-void WriteOrThrow(int fd, const void *data_void, std::size_t size);
-void WriteOrThrow(FILE *to, const void *data, std::size_t size);
-
-/* These call pread/pwrite in a loop. However, on Windows they call ReadFile/
- * WriteFile which changes the file pointer. So it's safe to call ErsatzPRead
- * and ErsatzPWrite concurrently (or any combination thereof). But it changes
- * the file pointer on windows, so it's not safe to call concurrently with
- * anything that uses the implicit file pointer e.g. the Read/Write functions
- * above.
- */
-void ErsatzPRead(int fd, void *to, std::size_t size, uint64_t off);
-void ErsatzPWrite(int fd, const void *data_void, std::size_t size, uint64_t off);
-
-void FSyncOrThrow(int fd);
-
-// Seeking
-void SeekOrThrow(int fd, uint64_t off);
-void AdvanceOrThrow(int fd, int64_t off);
-void SeekEnd(int fd);
-
-std::FILE *FDOpenOrThrow(scoped_fd &file);
-std::FILE *FDOpenReadOrThrow(scoped_fd &file);
-
-// Temporary files
-// Append a / if base is a directory.
-void NormalizeTempPrefix(std::string &base);
-int MakeTemp(const StringPiece &prefix);
-std::FILE *FMakeTemp(const StringPiece &prefix);
-
-// dup an fd.
-int DupOrThrow(int fd);
-
-/* Attempt get file name from fd. This won't always work (i.e. on Windows or
- * a pipe). The file might have been renamed. It's intended for diagnostics
- * and logging only.
- */
-std::string NameFromFD(int fd);
-
-} // namespace util
-
-#endif // UTIL_FILE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/file_piece.cc b/src/joshua/decoder/ff/lm/kenlm/util/file_piece.cc
deleted file mode 100644
index 4aaa250..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/file_piece.cc
+++ /dev/null
@@ -1,321 +0,0 @@
-#include "util/file_piece.hh"
-
-#include "util/double-conversion/double-conversion.h"
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-
-#if defined(_WIN32) || defined(_WIN64)
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
-#include <iostream>
-#include <string>
-#include <limits>
-
-#include <assert.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-namespace util {
-
-ParseNumberException::ParseNumberException(StringPiece value) throw() {
- *this << "Could not parse \"" << value << "\" into a number";
-}
-
-// Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
-const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-
-FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
- file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()),
- progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
- Initialize(name, show_progress, min_buffer);
-}
-
-namespace {
-std::string NamePossiblyFind(int fd, const char *name) {
- if (name) return name;
- return NameFromFD(fd);
-}
-} // namespace
-
-FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
- file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
- progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
- Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
-}
-
-FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) :
- total_size_(kBadSize), page_(SizePage()) {
- InitializeNoRead("istream", min_buffer);
-
- fallback_to_read_ = true;
- data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
- position_ = data_.begin();
- position_end_ = position_;
-
- fell_back_.Reset(stream);
-}
-
-FilePiece::~FilePiece() {}
-
-StringPiece FilePiece::ReadLine(char delim) {
- std::size_t skip = 0;
- while (true) {
- for (const char *i = position_ + skip; i < position_end_; ++i) {
- if (*i == delim) {
- StringPiece ret(position_, i - position_);
- position_ = i + 1;
- return ret;
- }
- }
- if (at_end_) {
- if (position_ == position_end_) {
- Shift();
- }
- return Consume(position_end_);
- }
- skip = position_end_ - position_;
- Shift();
- }
-}
-
-bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim) {
- try {
- to = ReadLine(delim);
- } catch (const util::EndOfFileException &e) { return false; }
- return true;
-}
-
-float FilePiece::ReadFloat() {
- return ReadNumber<float>();
-}
-double FilePiece::ReadDouble() {
- return ReadNumber<double>();
-}
-long int FilePiece::ReadLong() {
- return ReadNumber<long int>();
-}
-unsigned long int FilePiece::ReadULong() {
- return ReadNumber<unsigned long int>();
-}
-
-// Factored out so that istream can call this.
-void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
- file_name_ = name;
-
- default_map_size_ = page_ * std::max<std::size_t>((min_buffer / page_ + 1), 2);
- position_ = NULL;
- position_end_ = NULL;
- mapped_offset_ = 0;
- at_end_ = false;
-}
-
-void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) {
- InitializeNoRead(name, min_buffer);
-
- if (total_size_ == kBadSize) {
- // So the assertion passes.
- fallback_to_read_ = false;
- if (show_progress)
- *show_progress << "File " << name << " isn't normal. Using slower read() instead of mmap(). No progress bar." << std::endl;
- TransitionToRead();
- } else {
- fallback_to_read_ = false;
- }
- Shift();
- // gzip detect.
- if ((position_end_ >= position_ + ReadCompressed::kMagicSize) && ReadCompressed::DetectCompressedMagic(position_)) {
- if (!fallback_to_read_) {
- at_end_ = false;
- TransitionToRead();
- }
- }
-}
-
-namespace {
-
-static const double_conversion::StringToDoubleConverter kConverter(
- double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
- std::numeric_limits<double>::quiet_NaN(),
- std::numeric_limits<double>::quiet_NaN(),
- "inf",
- "NaN");
-
-void ParseNumber(const char *begin, const char *&end, float &out) {
- int count;
- out = kConverter.StringToFloat(begin, end - begin, &count);
- end = begin + count;
-}
-void ParseNumber(const char *begin, const char *&end, double &out) {
- int count;
- out = kConverter.StringToDouble(begin, end - begin, &count);
- end = begin + count;
-}
-void ParseNumber(const char *begin, const char *&end, long int &out) {
- char *silly_end;
- out = strtol(begin, &silly_end, 10);
- end = silly_end;
-}
-void ParseNumber(const char *begin, const char *&end, unsigned long int &out) {
- char *silly_end;
- out = strtoul(begin, &silly_end, 10);
- end = silly_end;
-}
-} // namespace
-
-template <class T> T FilePiece::ReadNumber() {
- SkipSpaces();
- while (last_space_ < position_) {
- if (at_end_) {
- // Hallucinate a null off the end of the file.
- std::string buffer(position_, position_end_);
- const char *buf = buffer.c_str();
- const char *end = buf + buffer.size();
- T ret;
- ParseNumber(buf, end, ret);
- if (buf == end) throw ParseNumberException(buffer);
- position_ += end - buf;
- return ret;
- }
- Shift();
- }
- const char *end = last_space_;
- T ret;
- ParseNumber(position_, end, ret);
- if (end == position_) throw ParseNumberException(ReadDelimited());
- position_ = end;
- return ret;
-}
-
-const char *FilePiece::FindDelimiterOrEOF(const bool *delim) {
- std::size_t skip = 0;
- while (true) {
- for (const char *i = position_ + skip; i < position_end_; ++i) {
- if (delim[static_cast<unsigned char>(*i)]) return i;
- }
- if (at_end_) {
- if (position_ == position_end_) Shift();
- return position_end_;
- }
- skip = position_end_ - position_;
- Shift();
- }
-}
-
-void FilePiece::Shift() {
- if (at_end_) {
- progress_.Finished();
- throw EndOfFileException();
- }
- uint64_t desired_begin = position_ - data_.begin() + mapped_offset_;
-
- if (!fallback_to_read_) MMapShift(desired_begin);
- // Notice an mmap failure might set the fallback.
- if (fallback_to_read_) ReadShift();
-
- for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
- if (kSpaces[static_cast<unsigned char>(*last_space_)]) break;
- }
-}
-
-void FilePiece::MMapShift(uint64_t desired_begin) {
- // Use mmap.
- uint64_t ignore = desired_begin % page_;
- // Duplicate request for Shift means give more data.
- if (position_ == data_.begin() + ignore && position_) {
- default_map_size_ *= 2;
- }
- // Local version so that in case of failure it doesn't overwrite the class variable.
- uint64_t mapped_offset = desired_begin - ignore;
-
- uint64_t mapped_size;
- if (default_map_size_ >= static_cast<std::size_t>(total_size_ - mapped_offset)) {
- at_end_ = true;
- mapped_size = total_size_ - mapped_offset;
- } else {
- mapped_size = default_map_size_;
- }
-
- // Forcibly clear the existing mmap first.
- data_.reset();
- try {
- MapRead(POPULATE_OR_LAZY, *file_, mapped_offset, mapped_size, data_);
- } catch (const util::ErrnoException &e) {
- if (desired_begin) {
- SeekOrThrow(*file_, desired_begin);
- }
- // The mmap was scheduled to end the file, but now we're going to read it.
- at_end_ = false;
- TransitionToRead();
- return;
- }
- mapped_offset_ = mapped_offset;
- position_ = data_.begin() + ignore;
- position_end_ = data_.begin() + mapped_size;
-
- progress_.Set(desired_begin);
-}
-
-void FilePiece::TransitionToRead() {
- assert(!fallback_to_read_);
- fallback_to_read_ = true;
- data_.reset();
- data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
- position_ = data_.begin();
- position_end_ = position_;
-
- try {
- fell_back_.Reset(file_.release());
- } catch (util::Exception &e) {
- e << " in file " << file_name_;
- throw;
- }
-}
-
-void FilePiece::ReadShift() {
- assert(fallback_to_read_);
- // Bytes [data_.begin(), position_) have been consumed.
- // Bytes [position_, position_end_) have been read into the buffer.
-
- // Start at the beginning of the buffer if there's nothing useful in it.
- if (position_ == position_end_) {
- mapped_offset_ += (position_end_ - data_.begin());
- position_ = data_.begin();
- position_end_ = position_;
- }
-
- std::size_t already_read = position_end_ - data_.begin();
-
- if (already_read == default_map_size_) {
- if (position_ == data_.begin()) {
- // Buffer too small.
- std::size_t valid_length = position_end_ - position_;
- default_map_size_ *= 2;
- data_.call_realloc(default_map_size_);
- UTIL_THROW_IF(!data_.get(), ErrnoException, "realloc failed for " << default_map_size_);
- position_ = data_.begin();
- position_end_ = position_ + valid_length;
- } else {
- std::size_t moving = position_end_ - position_;
- memmove(data_.get(), position_, moving);
- position_ = data_.begin();
- position_end_ = position_ + moving;
- already_read = moving;
- }
- }
-
- std::size_t read_return = fell_back_.Read(static_cast<uint8_t*>(data_.get()) + already_read, default_map_size_ - already_read);
- progress_.Set(fell_back_.RawAmount());
-
- if (read_return == 0) {
- at_end_ = true;
- }
- position_end_ += read_return;
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/file_piece.hh b/src/joshua/decoder/ff/lm/kenlm/util/file_piece.hh
deleted file mode 100644
index 5495ddc..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/file_piece.hh
+++ /dev/null
@@ -1,158 +0,0 @@
-#ifndef UTIL_FILE_PIECE_H
-#define UTIL_FILE_PIECE_H
-
-#include "util/ersatz_progress.hh"
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/read_compressed.hh"
-#include "util/string_piece.hh"
-
-#include <cstddef>
-#include <iosfwd>
-#include <string>
-
-#include <assert.h>
-#include <stdint.h>
-
-namespace util {
-
-class ParseNumberException : public Exception {
- public:
- explicit ParseNumberException(StringPiece value) throw();
- ~ParseNumberException() throw() {}
-};
-
-extern const bool kSpaces[256];
-
-// Memory backing the returned StringPiece may vanish on the next call.
-class FilePiece {
- public:
- // 1 MB default.
- explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
- // Takes ownership of fd. name is used for messages.
- explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
-
- /* Read from an istream. Don't use this if you can avoid it. Raw fd IO is
- * much faster. But sometimes you just have an istream like Boost's HTTP
- * server and want to parse it the same way.
- * name is just used for messages and FileName().
- */
- explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
-
- ~FilePiece();
-
- char get() {
- if (position_ == position_end_) {
- Shift();
- if (at_end_) throw EndOfFileException();
- }
- return *(position_++);
- }
-
- // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
- StringPiece ReadDelimited(const bool *delim = kSpaces) {
- SkipSpaces(delim);
- return Consume(FindDelimiterOrEOF(delim));
- }
-
- // Read word until the line or file ends.
- bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
- assert(delim[static_cast<unsigned char>('\n')]);
- // Skip non-enter spaces.
- for (; ; ++position_) {
- if (position_ == position_end_) {
- try {
- Shift();
- } catch (const util::EndOfFileException &e) { return false; }
- // And break out at end of file.
- if (position_ == position_end_) return false;
- }
- if (!delim[static_cast<unsigned char>(*position_)]) break;
- if (*position_ == '\n') return false;
- }
- // We can't be at the end of file because there's at least one character open.
- to = Consume(FindDelimiterOrEOF(delim));
- return true;
- }
-
- // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
- // It is similar to getline in that way.
- StringPiece ReadLine(char delim = '\n');
-
- // Doesn't throw EndOfFileException, just returns false.
- bool ReadLineOrEOF(StringPiece &to, char delim = '\n');
-
- float ReadFloat();
- double ReadDouble();
- long int ReadLong();
- unsigned long int ReadULong();
-
- // Skip spaces defined by isspace.
- void SkipSpaces(const bool *delim = kSpaces) {
- assert(position_ <= position_end_);
- for (; ; ++position_) {
- if (position_ == position_end_) {
- Shift();
- // And break out at end of file.
- if (position_ == position_end_) return;
- }
- assert(position_ < position_end_);
- if (!delim[static_cast<unsigned char>(*position_)]) return;
- }
- }
-
- uint64_t Offset() const {
- return position_ - data_.begin() + mapped_offset_;
- }
-
- const std::string &FileName() const { return file_name_; }
-
- private:
- void InitializeNoRead(const char *name, std::size_t min_buffer);
- // Calls InitializeNoRead, so don't call both.
- void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
-
- template <class T> T ReadNumber();
-
- StringPiece Consume(const char *to) {
- assert(to >= position_);
- StringPiece ret(position_, to - position_);
- position_ = to;
- return ret;
- }
-
- const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
-
- void Shift();
- // Backends to Shift().
- void MMapShift(uint64_t desired_begin);
-
- void TransitionToRead();
- void ReadShift();
-
- const char *position_, *last_space_, *position_end_;
-
- scoped_fd file_;
- const uint64_t total_size_;
- const uint64_t page_;
-
- std::size_t default_map_size_;
- uint64_t mapped_offset_;
-
- // Order matters: file_ should always be destroyed after this.
- scoped_memory data_;
-
- bool at_end_;
- bool fallback_to_read_;
-
- ErsatzProgress progress_;
-
- std::string file_name_;
-
- ReadCompressed fell_back_;
-};
-
-} // namespace util
-
-#endif // UTIL_FILE_PIECE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/file_piece_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/file_piece_test.cc
deleted file mode 100644
index 4361877..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/file_piece_test.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-// Tests might fail if you have creative characters in your path. Sue me.
-#include "util/file_piece.hh"
-
-#include "util/file.hh"
-#include "util/scoped.hh"
-
-#define BOOST_TEST_MODULE FilePieceTest
-#include <boost/test/unit_test.hpp>
-#include <fstream>
-#include <iostream>
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-namespace util {
-namespace {
-
-std::string FileLocation() {
- if (boost::unit_test::framework::master_test_suite().argc < 2) {
- return "file_piece.cc";
- }
- std::string ret(boost::unit_test::framework::master_test_suite().argv[1]);
- return ret;
-}
-
-/* istream */
-BOOST_AUTO_TEST_CASE(IStream) {
- std::fstream ref(FileLocation().c_str(), std::ios::in);
- std::fstream backing(FileLocation().c_str(), std::ios::in);
- FilePiece test(backing);
- std::string ref_line;
- while (getline(ref, ref_line)) {
- StringPiece test_line(test.ReadLine());
- BOOST_CHECK_EQUAL(ref_line, test_line);
- }
- BOOST_CHECK_THROW(test.get(), EndOfFileException);
- BOOST_CHECK_THROW(test.get(), EndOfFileException);
-}
-
-/* mmap implementation */
-BOOST_AUTO_TEST_CASE(MMapReadLine) {
- std::fstream ref(FileLocation().c_str(), std::ios::in);
- FilePiece test(FileLocation().c_str(), NULL, 1);
- std::string ref_line;
- while (getline(ref, ref_line)) {
- StringPiece test_line(test.ReadLine());
- // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
- if (!test_line.empty() || !ref_line.empty()) {
- BOOST_CHECK_EQUAL(ref_line, test_line);
- }
- }
- BOOST_CHECK_THROW(test.get(), EndOfFileException);
-}
-
-#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
-/* Apple isn't happy with the popen, fileno, dup. And I don't want to
- * reimplement popen. This is an issue with the test.
- */
-/* read() implementation */
-BOOST_AUTO_TEST_CASE(StreamReadLine) {
- std::fstream ref(FileLocation().c_str(), std::ios::in);
-
- std::string popen_args = "cat \"";
- popen_args += FileLocation();
- popen_args += '"';
-
- FILE *catter = popen(popen_args.c_str(), "r");
- BOOST_REQUIRE(catter);
-
- FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
- std::string ref_line;
- while (getline(ref, ref_line)) {
- StringPiece test_line(test.ReadLine());
- // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
- if (!test_line.empty() || !ref_line.empty()) {
- BOOST_CHECK_EQUAL(ref_line, test_line);
- }
- }
- BOOST_CHECK_THROW(test.get(), EndOfFileException);
- BOOST_REQUIRE(!pclose(catter));
-}
-#endif
-
-#ifdef HAVE_ZLIB
-
-// gzip file
-BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
- std::string location(FileLocation());
- std::fstream ref(location.c_str(), std::ios::in);
-
- std::string command("gzip <\"");
- command += location + "\" >\"" + location + "\".gz";
-
- BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
- FilePiece test((location + ".gz").c_str(), NULL, 1);
- unlink((location + ".gz").c_str());
- std::string ref_line;
- while (getline(ref, ref_line)) {
- StringPiece test_line(test.ReadLine());
- // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
- if (!test_line.empty() || !ref_line.empty()) {
- BOOST_CHECK_EQUAL(ref_line, test_line);
- }
- }
- BOOST_CHECK_THROW(test.get(), EndOfFileException);
-}
-
-// gzip stream. Apple doesn't like popen, fileno, dup. This is an issue with
-// the test.
-#if !defined __APPLE__ && !defined __MINGW32__
-BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
- std::fstream ref(FileLocation().c_str(), std::ios::in);
-
- std::string command("gzip <\"");
- command += FileLocation() + "\"";
-
- FILE * catter = popen(command.c_str(), "r");
- BOOST_REQUIRE(catter);
-
- FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
- std::string ref_line;
- while (getline(ref, ref_line)) {
- StringPiece test_line(test.ReadLine());
- // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
- if (!test_line.empty() || !ref_line.empty()) {
- BOOST_CHECK_EQUAL(ref_line, test_line);
- }
- }
- BOOST_CHECK_THROW(test.get(), EndOfFileException);
- BOOST_REQUIRE(!pclose(catter));
-}
-#endif // __APPLE__
-
-#endif // HAVE_ZLIB
-
-} // namespace
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/fixed_array.hh b/src/joshua/decoder/ff/lm/kenlm/util/fixed_array.hh
deleted file mode 100644
index 416b92f..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/fixed_array.hh
+++ /dev/null
@@ -1,153 +0,0 @@
-#ifndef UTIL_FIXED_ARRAY_H
-#define UTIL_FIXED_ARRAY_H
-
-#include "util/scoped.hh"
-
-#include <cstddef>
-
-#include <assert.h>
-#include <stdlib.h>
-
-namespace util {
-
-/**
- * Defines a fixed-size collection.
- *
- * Ever want an array of things by they don't have a default constructor or are
- * non-copyable? FixedArray allows constructing one at a time.
- */
-template <class T> class FixedArray {
- public:
- /** Initialize with a given size bound but do not construct the objects. */
- explicit FixedArray(std::size_t limit) {
- Init(limit);
- }
-
- /**
- * Constructs an instance, but does not initialize it.
- *
- * Any objects constructed in this manner must be subsequently @ref FixedArray::Init() "initialized" prior to use.
- *
- * @see FixedArray::Init()
- */
- FixedArray()
- : newed_end_(NULL)
-#ifndef NDEBUG
- , allocated_end_(NULL)
-#endif
- {}
-
- /**
- * Initialize with a given size bound but do not construct the objects.
- *
- * This method is responsible for allocating memory.
- * Objects stored in this array will be constructed in a location within this allocated memory.
- */
- void Init(std::size_t count) {
- assert(!block_.get());
- block_.reset(malloc(sizeof(T) * count));
- if (!block_.get()) throw std::bad_alloc();
- newed_end_ = begin();
-#ifndef NDEBUG
- allocated_end_ = begin() + count;
-#endif
- }
-
- /**
- * Constructs a copy of the provided array.
- *
- * @param from Array whose elements should be copied into this newly-constructed data structure.
- */
- FixedArray(const FixedArray &from) {
- std::size_t size = from.newed_end_ - static_cast<const T*>(from.block_.get());
- Init(size);
- for (std::size_t i = 0; i < size; ++i) {
- push_back(from[i]);
- }
- }
-
- /**
- * Frees the memory held by this object.
- */
- ~FixedArray() { clear(); }
-
- /** Gets a pointer to the first object currently stored in this data structure. */
- T *begin() { return static_cast<T*>(block_.get()); }
-
- /** Gets a const pointer to the last object currently stored in this data structure. */
- const T *begin() const { return static_cast<const T*>(block_.get()); }
-
- /** Gets a pointer to the last object currently stored in this data structure. */
- T *end() { return newed_end_; }
-
- /** Gets a const pointer to the last object currently stored in this data structure. */
- const T *end() const { return newed_end_; }
-
- /** Gets a reference to the last object currently stored in this data structure. */
- T &back() { return *(end() - 1); }
-
- /** Gets a const reference to the last object currently stored in this data structure. */
- const T &back() const { return *(end() - 1); }
-
- /** Gets the number of objects currently stored in this data structure. */
- std::size_t size() const { return end() - begin(); }
-
- /** Returns true if there are no objects currently stored in this data structure. */
- bool empty() const { return begin() == end(); }
-
- /**
- * Gets a reference to the object with index i currently stored in this data structure.
- *
- * @param i Index of the object to reference
- */
- T &operator[](std::size_t i) { return begin()[i]; }
-
- /**
- * Gets a const reference to the object with index i currently stored in this data structure.
- *
- * @param i Index of the object to reference
- */
- const T &operator[](std::size_t i) const { return begin()[i]; }
-
- /**
- * Constructs a new object using the provided parameter,
- * and stores it in this data structure.
- *
- * The memory backing the constructed object is managed by this data structure.
- */
- template <class C> void push_back(const C &c) {
- new (end()) T(c); // use "placement new" syntax to initalize T in an already-allocated memory location
- Constructed();
- }
-
- /**
- * Removes all elements from this array.
- */
- void clear() {
- for (T *i = begin(); i != end(); ++i)
- i->~T();
- newed_end_ = begin();
- }
-
- protected:
- // Always call Constructed after successful completion of new.
- void Constructed() {
- ++newed_end_;
-#ifndef NDEBUG
- assert(newed_end_ <= allocated_end_);
-#endif
- }
-
- private:
- util::scoped_malloc block_;
-
- T *newed_end_;
-
-#ifndef NDEBUG
- T *allocated_end_;
-#endif
-};
-
-} // namespace util
-
-#endif // UTIL_FIXED_ARRAY_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/getopt.c b/src/joshua/decoder/ff/lm/kenlm/util/getopt.c
deleted file mode 100644
index 992c96b..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/getopt.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-POSIX getopt for Windows
-
-AT&T Public License
-
-Code given out at the 1985 UNIFORUM conference in Dallas.
-*/
-
-#ifndef __GNUC__
-
-#include "getopt.hh"
-#include <stdio.h>
-#include <string.h>
-
-#define NULL 0
-#define EOF (-1)
-#define ERR(s, c) if(opterr){\
- char errbuf[2];\
- errbuf[0] = c; errbuf[1] = '\n';\
- fputs(argv[0], stderr);\
- fputs(s, stderr);\
- fputc(c, stderr);}
- //(void) write(2, argv[0], (unsigned)strlen(argv[0]));\
- //(void) write(2, s, (unsigned)strlen(s));\
- //(void) write(2, errbuf, 2);}
-
-int opterr = 1;
-int optind = 1;
-int optopt;
-char *optarg;
-
-int
-getopt(argc, argv, opts)
-int argc;
-char **argv, *opts;
-{
- static int sp = 1;
- register int c;
- register char *cp;
-
- if(sp == 1)
- if(optind >= argc ||
- argv[optind][0] != '-' || argv[optind][1] == '\0')
- return(EOF);
- else if(strcmp(argv[optind], "--") == NULL) {
- optind++;
- return(EOF);
- }
- optopt = c = argv[optind][sp];
- if(c == ':' || (cp=strchr(opts, c)) == NULL) {
- ERR(": illegal option -- ", c);
- if(argv[optind][++sp] == '\0') {
- optind++;
- sp = 1;
- }
- return('?');
- }
- if(*++cp == ':') {
- if(argv[optind][sp+1] != '\0')
- optarg = &argv[optind++][sp+1];
- else if(++optind >= argc) {
- ERR(": option requires an argument -- ", c);
- sp = 1;
- return('?');
- } else
- optarg = argv[optind++];
- sp = 1;
- } else {
- if(argv[optind][++sp] == '\0') {
- sp = 1;
- optind++;
- }
- optarg = NULL;
- }
- return(c);
-}
-
-#endif /* __GNUC__ */
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/getopt.hh b/src/joshua/decoder/ff/lm/kenlm/util/getopt.hh
deleted file mode 100644
index 50eab56..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/getopt.hh
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-POSIX getopt for Windows
-
-AT&T Public License
-
-Code given out at the 1985 UNIFORUM conference in Dallas.
-*/
-
-#ifdef __GNUC__
-#include <getopt.h>
-#endif
-#ifndef __GNUC__
-
-#ifndef UTIL_GETOPT_H
-#define UTIL_GETOPT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern int opterr;
-extern int optind;
-extern int optopt;
-extern char *optarg;
-extern int getopt(int argc, char **argv, char *opts);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* UTIL_GETOPT_H */
-#endif /* __GNUC__ */
-
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/mmap.cc b/src/joshua/decoder/ff/lm/kenlm/util/mmap.cc
deleted file mode 100644
index a3c8a02..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/mmap.cc
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Memory mapping wrappers.
- * ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at
- * NICT.
- */
-#include "util/mmap.hh"
-
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/parallel_read.hh"
-#include "util/scoped.hh"
-
-#include <iostream>
-
-#include <assert.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdlib.h>
-
-#if defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-#include <io.h>
-#else
-#include <sys/mman.h>
-#include <unistd.h>
-#endif
-
-namespace util {
-
-long SizePage() {
-#if defined(_WIN32) || defined(_WIN64)
- SYSTEM_INFO si;
- GetSystemInfo(&si);
- return si.dwAllocationGranularity;
-#else
- return sysconf(_SC_PAGE_SIZE);
-#endif
-}
-
-void SyncOrThrow(void *start, size_t length) {
-#if defined(_WIN32) || defined(_WIN64)
- UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap");
-#else
- UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap");
-#endif
-}
-
-void UnmapOrThrow(void *start, size_t length) {
-#if defined(_WIN32) || defined(_WIN64)
- UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
-#else
- UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed");
-#endif
-}
-
-scoped_mmap::~scoped_mmap() {
- if (data_ != (void*)-1) {
- try {
- // Thanks Denis Filimonov for pointing out NFS likes msync first.
- SyncOrThrow(data_, size_);
- UnmapOrThrow(data_, size_);
- } catch (const util::ErrnoException &e) {
- std::cerr << e.what();
- abort();
- }
- }
-}
-
-void scoped_memory::reset(void *data, std::size_t size, Alloc source) {
- switch(source_) {
- case MMAP_ALLOCATED:
- scoped_mmap(data_, size_);
- break;
- case ARRAY_ALLOCATED:
- delete [] reinterpret_cast<char*>(data_);
- break;
- case MALLOC_ALLOCATED:
- free(data_);
- break;
- case NONE_ALLOCATED:
- break;
- }
- data_ = data;
- size_ = size;
- source_ = source;
-}
-
-void scoped_memory::call_realloc(std::size_t size) {
- assert(source_ == MALLOC_ALLOCATED || source_ == NONE_ALLOCATED);
- void *new_data = realloc(data_, size);
- if (!new_data) {
- reset();
- } else {
- data_ = new_data;
- size_ = size;
- source_ = MALLOC_ALLOCATED;
- }
-}
-
-void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset) {
-#ifdef MAP_POPULATE // Linux specific
- if (prefault) {
- flags |= MAP_POPULATE;
- }
-#endif
-#if defined(_WIN32) || defined(_WIN64)
- int protectC = for_write ? PAGE_READWRITE : PAGE_READONLY;
- int protectM = for_write ? FILE_MAP_WRITE : FILE_MAP_READ;
- uint64_t total_size = size + offset;
- HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, total_size >> 32, static_cast<DWORD>(total_size), NULL);
- UTIL_THROW_IF(!hMapping, ErrnoException, "CreateFileMapping failed");
- LPVOID ret = MapViewOfFile(hMapping, protectM, offset >> 32, offset, size);
- CloseHandle(hMapping);
- UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed");
-#else
- int protect = for_write ? (PROT_READ | PROT_WRITE) : PROT_READ;
- void *ret;
- UTIL_THROW_IF((ret = mmap(NULL, size, protect, flags, fd, offset)) == MAP_FAILED, ErrnoException, "mmap failed for size " << size << " at offset " << offset);
-# ifdef MADV_HUGEPAGE
- /* We like huge pages but it's fine if we can't have them. Note that huge
- * pages are not supported for file-backed mmap on linux.
- */
- madvise(ret, size, MADV_HUGEPAGE);
-# endif
-#endif
- return ret;
-}
-
-const int kFileFlags =
-#if defined(_WIN32) || defined(_WIN64)
- 0 // MapOrThrow ignores flags on windows
-#elif defined(MAP_FILE)
- MAP_FILE | MAP_SHARED
-#else
- MAP_SHARED
-#endif
- ;
-
-void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out) {
- switch (method) {
- case LAZY:
- out.reset(MapOrThrow(size, false, kFileFlags, false, fd, offset), size, scoped_memory::MMAP_ALLOCATED);
- break;
- case POPULATE_OR_LAZY:
-#ifdef MAP_POPULATE
- case POPULATE_OR_READ:
-#endif
- out.reset(MapOrThrow(size, false, kFileFlags, true, fd, offset), size, scoped_memory::MMAP_ALLOCATED);
- break;
-#ifndef MAP_POPULATE
- case POPULATE_OR_READ:
-#endif
- case READ:
- out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
- SeekOrThrow(fd, offset);
- ReadOrThrow(fd, out.get(), size);
- break;
- case PARALLEL_READ:
- out.reset(MallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
- ParallelRead(fd, out.get(), size, offset);
- break;
- }
-}
-
-// Allocates zeroed memory in to.
-void MapAnonymous(std::size_t size, util::scoped_memory &to) {
- to.reset();
-#if defined(_WIN32) || defined(_WIN64)
- to.reset(calloc(1, size), size, scoped_memory::MALLOC_ALLOCATED);
-#else
- to.reset(MapOrThrow(size, true,
-# if defined(MAP_ANONYMOUS)
- MAP_ANONYMOUS | MAP_PRIVATE // Linux
-# else
- MAP_ANON | MAP_PRIVATE // BSD
-# endif
- , false, -1, 0), size, scoped_memory::MMAP_ALLOCATED);
-#endif
-}
-
-void *MapZeroedWrite(int fd, std::size_t size) {
- ResizeOrThrow(fd, 0);
- ResizeOrThrow(fd, size);
- return MapOrThrow(size, true, kFileFlags, false, fd, 0);
-}
-
-void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) {
- file.reset(CreateOrThrow(name));
- try {
- return MapZeroedWrite(file.get(), size);
- } catch (ErrnoException &e) {
- e << " in file " << name;
- throw;
- }
-}
-
-Rolling::Rolling(const Rolling ©_from, uint64_t increase) {
- *this = copy_from;
- IncreaseBase(increase);
-}
-
-Rolling &Rolling::operator=(const Rolling ©_from) {
- fd_ = copy_from.fd_;
- file_begin_ = copy_from.file_begin_;
- file_end_ = copy_from.file_end_;
- for_write_ = copy_from.for_write_;
- block_ = copy_from.block_;
- read_bound_ = copy_from.read_bound_;
-
- current_begin_ = 0;
- if (copy_from.IsPassthrough()) {
- current_end_ = copy_from.current_end_;
- ptr_ = copy_from.ptr_;
- } else {
- // Force call on next mmap.
- current_end_ = 0;
- ptr_ = NULL;
- }
- return *this;
-}
-
-Rolling::Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount) {
- current_begin_ = 0;
- current_end_ = 0;
- fd_ = fd;
- file_begin_ = offset;
- file_end_ = offset + amount;
- for_write_ = for_write;
- block_ = block;
- read_bound_ = read_bound;
-}
-
-void *Rolling::ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size) {
- out.reset();
- if (IsPassthrough()) return static_cast<uint8_t*>(get()) + index;
- uint64_t offset = index + file_begin_;
- // Round down to multiple of page size.
- uint64_t cruft = offset % static_cast<uint64_t>(SizePage());
- std::size_t map_size = static_cast<std::size_t>(size + cruft);
- out.reset(MapOrThrow(map_size, for_write_, kFileFlags, true, fd_, offset - cruft), map_size, scoped_memory::MMAP_ALLOCATED);
- return static_cast<uint8_t*>(out.get()) + static_cast<std::size_t>(cruft);
-}
-
-void Rolling::Roll(uint64_t index) {
- assert(!IsPassthrough());
- std::size_t amount;
- if (file_end_ - (index + file_begin_) > static_cast<uint64_t>(block_)) {
- amount = block_;
- current_end_ = index + amount - read_bound_;
- } else {
- amount = file_end_ - (index + file_begin_);
- current_end_ = index + amount;
- }
- ptr_ = static_cast<uint8_t*>(ExtractNonRolling(mem_, index, amount)) - index;
-
- current_begin_ = index;
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/mmap.hh b/src/joshua/decoder/ff/lm/kenlm/util/mmap.hh
deleted file mode 100644
index 9b1e120..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/mmap.hh
+++ /dev/null
@@ -1,192 +0,0 @@
-#ifndef UTIL_MMAP_H
-#define UTIL_MMAP_H
-// Utilities for mmaped files.
-
-#include <cstddef>
-#include <limits>
-
-#include <stdint.h>
-#include <sys/types.h>
-
-namespace util {
-
-class scoped_fd;
-
-long SizePage();
-
-// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
-class scoped_mmap {
- public:
- scoped_mmap() : data_((void*)-1), size_(0) {}
- scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {}
- ~scoped_mmap();
-
- void *get() const { return data_; }
-
- const uint8_t *begin() const { return reinterpret_cast<uint8_t*>(data_); }
- const uint8_t *end() const { return reinterpret_cast<uint8_t*>(data_) + size_; }
- std::size_t size() const { return size_; }
-
- void reset(void *data, std::size_t size) {
- scoped_mmap other(data_, size_);
- data_ = data;
- size_ = size;
- }
-
- void reset() {
- reset((void*)-1, 0);
- }
-
- private:
- void *data_;
- std::size_t size_;
-
- scoped_mmap(const scoped_mmap &);
- scoped_mmap &operator=(const scoped_mmap &);
-};
-
-/* For when the memory might come from mmap, new char[], or malloc. Uses NULL
- * and 0 for blanks even though mmap signals errors with (void*)-1). The reset
- * function checks that blank for mmap.
- */
-class scoped_memory {
- public:
- typedef enum {MMAP_ALLOCATED, ARRAY_ALLOCATED, MALLOC_ALLOCATED, NONE_ALLOCATED} Alloc;
-
- scoped_memory(void *data, std::size_t size, Alloc source)
- : data_(data), size_(size), source_(source) {}
-
- scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
-
- ~scoped_memory() { reset(); }
-
- void *get() const { return data_; }
- const char *begin() const { return reinterpret_cast<char*>(data_); }
- const char *end() const { return reinterpret_cast<char*>(data_) + size_; }
- std::size_t size() const { return size_; }
-
- Alloc source() const { return source_; }
-
- void reset() { reset(NULL, 0, NONE_ALLOCATED); }
-
- void reset(void *data, std::size_t size, Alloc from);
-
- // realloc allows the current data to escape hence the need for this call
- // If realloc fails, destroys the original too and get() returns NULL.
- void call_realloc(std::size_t to);
-
- private:
- void *data_;
- std::size_t size_;
-
- Alloc source_;
-
- scoped_memory(const scoped_memory &);
- scoped_memory &operator=(const scoped_memory &);
-};
-
-typedef enum {
- // mmap with no prepopulate
- LAZY,
- // On linux, pass MAP_POPULATE to mmap.
- POPULATE_OR_LAZY,
- // Populate on Linux. malloc and read on non-Linux.
- POPULATE_OR_READ,
- // malloc and read.
- READ,
- // malloc and read in parallel (recommended for Lustre)
- PARALLEL_READ,
-} LoadMethod;
-
-extern const int kFileFlags;
-
-// Wrapper around mmap to check it worked and hide some platform macros.
-void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0);
-
-void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out);
-
-void MapAnonymous(std::size_t size, scoped_memory &to);
-
-// Open file name with mmap of size bytes, all of which are initially zero.
-void *MapZeroedWrite(int fd, std::size_t size);
-void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
-
-// msync wrapper
-void SyncOrThrow(void *start, size_t length);
-
-// Forward rolling memory map with no overlap.
-class Rolling {
- public:
- Rolling() {}
-
- explicit Rolling(void *data) { Init(data); }
-
- Rolling(const Rolling ©_from, uint64_t increase = 0);
- Rolling &operator=(const Rolling ©_from);
-
- // For an actual rolling mmap.
- explicit Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount);
-
- // For a static mapping
- void Init(void *data) {
- ptr_ = data;
- current_end_ = std::numeric_limits<uint64_t>::max();
- current_begin_ = 0;
- // Mark as a pass-through.
- fd_ = -1;
- }
-
- void IncreaseBase(uint64_t by) {
- file_begin_ += by;
- ptr_ = static_cast<uint8_t*>(ptr_) + by;
- if (!IsPassthrough()) current_end_ = 0;
- }
-
- void DecreaseBase(uint64_t by) {
- file_begin_ -= by;
- ptr_ = static_cast<uint8_t*>(ptr_) - by;
- if (!IsPassthrough()) current_end_ = 0;
- }
-
- void *ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size);
-
- // Returns base pointer
- void *get() const { return ptr_; }
-
- // Returns base pointer.
- void *CheckedBase(uint64_t index) {
- if (index >= current_end_ || index < current_begin_) {
- Roll(index);
- }
- return ptr_;
- }
-
- // Returns indexed pointer.
- void *CheckedIndex(uint64_t index) {
- return static_cast<uint8_t*>(CheckedBase(index)) + index;
- }
-
- private:
- void Roll(uint64_t index);
-
- // True if this is just a thin wrapper on a pointer.
- bool IsPassthrough() const { return fd_ == -1; }
-
- void *ptr_;
- uint64_t current_begin_;
- uint64_t current_end_;
-
- scoped_memory mem_;
-
- int fd_;
- uint64_t file_begin_;
- uint64_t file_end_;
-
- bool for_write_;
- std::size_t block_;
- std::size_t read_bound_;
-};
-
-} // namespace util
-
-#endif // UTIL_MMAP_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/multi_intersection.hh b/src/joshua/decoder/ff/lm/kenlm/util/multi_intersection.hh
deleted file mode 100644
index 2955acc..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/multi_intersection.hh
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef UTIL_MULTI_INTERSECTION_H
-#define UTIL_MULTI_INTERSECTION_H
-
-#include <boost/optional.hpp>
-#include <boost/range/iterator_range.hpp>
-
-#include <algorithm>
-#include <functional>
-#include <vector>
-
-namespace util {
-
-namespace detail {
-template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
- bool operator()(const Range &left, const Range &right) const {
- return left.size() < right.size();
- }
-};
-
-/* Takes sets specified by their iterators and a boost::optional containing
- * the lowest intersection if any. Each set must be sorted in increasing
- * order. sets is changed to truncate the beginning of each sequence to the
- * location of the match or an empty set. Precondition: sets is not empty
- * since the intersection over null is the universe and this function does not
- * know the universe.
- */
-template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
- typedef std::vector<boost::iterator_range<Iterator> > Sets;
- typedef typename std::iterator_traits<Iterator>::value_type Value;
-
- assert(!sets.empty());
-
- if (sets.front().empty()) return boost::optional<Value>();
- // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
- Value highest(sets.front().front());
- for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
- i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
- if (i->empty()) return boost::optional<Value>();
- if (less(highest, i->front())) {
- highest = i->front();
- // start over
- i = sets.begin();
- } else {
- ++i;
- }
- }
- return boost::optional<Value>(highest);
-}
-
-} // namespace detail
-
-template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
- assert(!sets.empty());
-
- std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
- return detail::FirstIntersectionSorted(sets, less);
-}
-
-template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
- return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
-}
-
-template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
- typedef typename std::iterator_traits<Iterator>::value_type Value;
- assert(!sets.empty());
-
- std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
- boost::optional<Value> ret;
- for (boost::optional<Value> ret; (ret = detail::FirstIntersectionSorted(sets, less)); sets.front().advance_begin(1)) {
- out(*ret);
- }
-}
-
-template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
- AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
-}
-
-} // namespace util
-
-#endif // UTIL_MULTI_INTERSECTION_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/multi_intersection_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/multi_intersection_test.cc
deleted file mode 100644
index 970afc1..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/multi_intersection_test.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "util/multi_intersection.hh"
-
-#define BOOST_TEST_MODULE MultiIntersectionTest
-#include <boost/test/unit_test.hpp>
-
-namespace util {
-namespace {
-
-BOOST_AUTO_TEST_CASE(Empty) {
- std::vector<boost::iterator_range<const unsigned int*> > sets;
-
- sets.push_back(boost::iterator_range<const unsigned int*>(static_cast<const unsigned int*>(NULL), static_cast<const unsigned int*>(NULL)));
- BOOST_CHECK(!FirstIntersection(sets));
-}
-
-BOOST_AUTO_TEST_CASE(Single) {
- std::vector<unsigned int> nums;
- nums.push_back(1);
- nums.push_back(4);
- nums.push_back(100);
- std::vector<boost::iterator_range<std::vector<unsigned int>::const_iterator> > sets;
- sets.push_back(nums);
-
- boost::optional<unsigned int> ret(FirstIntersection(sets));
-
- BOOST_REQUIRE(ret);
- BOOST_CHECK_EQUAL(static_cast<unsigned int>(1), *ret);
-}
-
-template <class T, unsigned int len> boost::iterator_range<const T*> RangeFromArray(const T (&arr)[len]) {
- return boost::iterator_range<const T*>(arr, arr + len);
-}
-
-BOOST_AUTO_TEST_CASE(MultiNone) {
- unsigned int nums0[] = {1, 3, 4, 22};
- unsigned int nums1[] = {2, 5, 12};
- unsigned int nums2[] = {4, 17};
-
- std::vector<boost::iterator_range<const unsigned int*> > sets;
- sets.push_back(RangeFromArray(nums0));
- sets.push_back(RangeFromArray(nums1));
- sets.push_back(RangeFromArray(nums2));
-
- BOOST_CHECK(!FirstIntersection(sets));
-}
-
-BOOST_AUTO_TEST_CASE(MultiOne) {
- unsigned int nums0[] = {1, 3, 4, 17, 22};
- unsigned int nums1[] = {2, 5, 12, 17};
- unsigned int nums2[] = {4, 17};
-
- std::vector<boost::iterator_range<const unsigned int*> > sets;
- sets.push_back(RangeFromArray(nums0));
- sets.push_back(RangeFromArray(nums1));
- sets.push_back(RangeFromArray(nums2));
-
- boost::optional<unsigned int> ret(FirstIntersection(sets));
- BOOST_REQUIRE(ret);
- BOOST_CHECK_EQUAL(static_cast<unsigned int>(17), *ret);
-}
-
-} // namespace
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/murmur_hash.cc b/src/joshua/decoder/ff/lm/kenlm/util/murmur_hash.cc
deleted file mode 100644
index 189668c..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/murmur_hash.cc
+++ /dev/null
@@ -1,175 +0,0 @@
-/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
- * code is released to the public domain. For business purposes, Murmurhash is
- * under the MIT license."
- * This is modified from the original:
- * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
- * length changed to unsigned int.
- * placed in namespace util
- * add MurmurHashNative
- * default option = 0 for seed
- * ARM port from NICT
- */
-
-#include "util/murmur_hash.hh"
-#include <string.h>
-
-namespace util {
-
-//-----------------------------------------------------------------------------
-// MurmurHash2, 64-bit versions, by Austin Appleby
-
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
-// and endian-ness issues if used across multiple platforms.
-
-// 64-bit hash for 64-bit platforms
-
-uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t seed )
-{
- const uint64_t m = 0xc6a4a7935bd1e995ULL;
- const int r = 47;
-
- uint64_t h = seed ^ (len * m);
-
-#if defined(__arm) || defined(__arm__)
- const size_t ksize = sizeof(uint64_t);
- const unsigned char * data = (const unsigned char *)key;
- const unsigned char * end = data + (std::size_t)(len/8) * ksize;
-#else
- const uint64_t * data = (const uint64_t *)key;
- const uint64_t * end = data + (len/8);
-#endif
-
- while(data != end)
- {
-#if defined(__arm) || defined(__arm__)
- uint64_t k;
- memcpy(&k, data, ksize);
- data += ksize;
-#else
- uint64_t k = *data++;
-#endif
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- const unsigned char * data2 = (const unsigned char*)data;
-
- switch(len & 7)
- {
- case 7: h ^= uint64_t(data2[6]) << 48;
- case 6: h ^= uint64_t(data2[5]) << 40;
- case 5: h ^= uint64_t(data2[4]) << 32;
- case 4: h ^= uint64_t(data2[3]) << 24;
- case 3: h ^= uint64_t(data2[2]) << 16;
- case 2: h ^= uint64_t(data2[1]) << 8;
- case 1: h ^= uint64_t(data2[0]);
- h *= m;
- };
-
- h ^= h >> r;
- h *= m;
- h ^= h >> r;
-
- return h;
-}
-
-
-// 64-bit hash for 32-bit platforms
-
-uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t seed )
-{
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
-
- unsigned int h1 = seed ^ len;
- unsigned int h2 = 0;
-
-#if defined(__arm) || defined(__arm__)
- size_t ksize = sizeof(unsigned int);
- const unsigned char * data = (const unsigned char *)key;
-#else
- const unsigned int * data = (const unsigned int *)key;
-#endif
-
- unsigned int k1, k2;
- while(len >= 8)
- {
-#if defined(__arm) || defined(__arm__)
- memcpy(&k1, data, ksize);
- data += ksize;
- memcpy(&k2, data, ksize);
- data += ksize;
-#else
- k1 = *data++;
- k2 = *data++;
-#endif
-
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
-
- k2 *= m; k2 ^= k2 >> r; k2 *= m;
- h2 *= m; h2 ^= k2;
- len -= 4;
- }
-
- if(len >= 4)
- {
-#if defined(__arm) || defined(__arm__)
- memcpy(&k1, data, ksize);
- data += ksize;
-#else
- k1 = *data++;
-#endif
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h2 ^= ((unsigned char*)data)[2] << 16;
- case 2: h2 ^= ((unsigned char*)data)[1] << 8;
- case 1: h2 ^= ((unsigned char*)data)[0];
- h2 *= m;
- };
-
- h1 ^= h2 >> 18; h1 *= m;
- h2 ^= h1 >> 22; h2 *= m;
- h1 ^= h2 >> 17; h1 *= m;
- h2 ^= h1 >> 19; h2 *= m;
-
- uint64_t h = h1;
-
- h = (h << 32) | h2;
-
- return h;
-}
-
-// Trick to test for 64-bit architecture at compile time.
-namespace {
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
-#endif
-template <unsigned L> inline uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, uint64_t seed) {
- return MurmurHash64A(key, len, seed);
-}
-template <> inline uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, uint64_t seed) {
- return MurmurHash64B(key, len, seed);
-}
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-} // namespace
-
-uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed) {
- return MurmurHashNativeBackend<sizeof(void*)>(key, len, seed);
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/pcqueue.hh b/src/joshua/decoder/ff/lm/kenlm/util/pcqueue.hh
deleted file mode 100644
index d2ffee7..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/pcqueue.hh
+++ /dev/null
@@ -1,156 +0,0 @@
-#ifndef UTIL_PCQUEUE_H
-#define UTIL_PCQUEUE_H
-
-#include "util/exception.hh"
-
-#include <boost/interprocess/sync/interprocess_semaphore.hpp>
-#include <boost/scoped_array.hpp>
-#include <boost/thread/mutex.hpp>
-#include <boost/utility.hpp>
-
-#include <errno.h>
-
-#ifdef __APPLE__
-#include <mach/semaphore.h>
-#include <mach/task.h>
-#include <mach/mach_traps.h>
-#include <mach/mach.h>
-#endif // __APPLE__
-
-namespace util {
-
-/* OS X Maverick and Boost interprocess were doing "Function not implemented."
- * So this is my own wrapper around the mach kernel APIs.
- */
-#ifdef __APPLE__
-
-#define MACH_CALL(call) UTIL_THROW_IF(KERN_SUCCESS != (call), Exception, "Mach call failure")
-
-class Semaphore {
- public:
- explicit Semaphore(int value) : task_(mach_task_self()) {
- MACH_CALL(semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value));
- }
-
- ~Semaphore() {
- MACH_CALL(semaphore_destroy(task_, back_));
- }
-
- void wait() {
- MACH_CALL(semaphore_wait(back_));
- }
-
- void post() {
- MACH_CALL(semaphore_signal(back_));
- }
-
- private:
- semaphore_t back_;
- task_t task_;
-};
-
-inline void WaitSemaphore(Semaphore &semaphore) {
- semaphore.wait();
-}
-
-#else
-typedef boost::interprocess::interprocess_semaphore Semaphore;
-
-inline void WaitSemaphore (Semaphore &on) {
- while (1) {
- try {
- on.wait();
- break;
- }
- catch (boost::interprocess::interprocess_exception &e) {
- if (e.get_native_error() != EINTR) {
- throw;
- }
- }
- }
-}
-
-#endif // __APPLE__
-
-/**
- * Producer consumer queue safe for multiple producers and multiple consumers.
- * T must be default constructable and have operator=.
- * The value is copied twice for Consume(T &out) or three times for Consume(),
- * so larger objects should be passed via pointer.
- * Strong exception guarantee if operator= throws. Undefined if semaphores throw.
- */
-template <class T> class PCQueue : boost::noncopyable {
- public:
- explicit PCQueue(size_t size)
- : empty_(size), used_(0),
- storage_(new T[size]),
- end_(storage_.get() + size),
- produce_at_(storage_.get()),
- consume_at_(storage_.get()) {}
-
- // Add a value to the queue.
- void Produce(const T &val) {
- WaitSemaphore(empty_);
- {
- boost::unique_lock<boost::mutex> produce_lock(produce_at_mutex_);
- try {
- *produce_at_ = val;
- }
- catch (...) {
- empty_.post();
- throw;
- }
- if (++produce_at_ == end_) produce_at_ = storage_.get();
- }
- used_.post();
- }
-
- // Consume a value, assigning it to out.
- T& Consume(T &out) {
- WaitSemaphore(used_);
- {
- boost::unique_lock<boost::mutex> consume_lock(consume_at_mutex_);
- try {
- out = *consume_at_;
- }
- catch (...) {
- used_.post();
- throw;
- }
- if (++consume_at_ == end_) consume_at_ = storage_.get();
- }
- empty_.post();
- return out;
- }
-
- // Convenience version of Consume that copies the value to return.
- // The other version is faster.
- T Consume() {
- T ret;
- Consume(ret);
- return ret;
- }
-
- private:
- // Number of empty spaces in storage_.
- Semaphore empty_;
- // Number of occupied spaces in storage_.
- Semaphore used_;
-
- boost::scoped_array<T> storage_;
-
- T *const end_;
-
- // Index for next write in storage_.
- T *produce_at_;
- boost::mutex produce_at_mutex_;
-
- // Index for next read from storage_.
- T *consume_at_;
- boost::mutex consume_at_mutex_;
-
-};
-
-} // namespace util
-
-#endif // UTIL_PCQUEUE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/pool.cc b/src/joshua/decoder/ff/lm/kenlm/util/pool.cc
deleted file mode 100644
index 429ba15..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/pool.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "util/pool.hh"
-
-#include "util/scoped.hh"
-
-#include <stdlib.h>
-
-namespace util {
-
-Pool::Pool() {
- current_ = NULL;
- current_end_ = NULL;
-}
-
-Pool::~Pool() {
- FreeAll();
-}
-
-void Pool::FreeAll() {
- for (std::vector<void *>::const_iterator i(free_list_.begin()); i != free_list_.end(); ++i) {
- free(*i);
- }
- free_list_.clear();
- current_ = NULL;
- current_end_ = NULL;
-}
-
-void *Pool::More(std::size_t size) {
- std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
- uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
- free_list_.push_back(ret);
- current_ = ret + size;
- current_end_ = ret + amount;
- return ret;
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/pool.hh b/src/joshua/decoder/ff/lm/kenlm/util/pool.hh
deleted file mode 100644
index 89e793d..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/pool.hh
+++ /dev/null
@@ -1,45 +0,0 @@
-// Very simple pool. It can only allocate memory. And all of the memory it
-// allocates must be freed at the same time.
-
-#ifndef UTIL_POOL_H
-#define UTIL_POOL_H
-
-#include <vector>
-
-#include <stdint.h>
-
-namespace util {
-
-class Pool {
- public:
- Pool();
-
- ~Pool();
-
- void *Allocate(std::size_t size) {
- void *ret = current_;
- current_ += size;
- if (current_ < current_end_) {
- return ret;
- } else {
- return More(size);
- }
- }
-
- void FreeAll();
-
- private:
- void *More(std::size_t size);
-
- std::vector<void *> free_list_;
-
- uint8_t *current_, *current_end_;
-
- // no copying
- Pool(const Pool &);
- Pool &operator=(const Pool &);
-};
-
-} // namespace util
-
-#endif // UTIL_POOL_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table.hh b/src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table.hh
deleted file mode 100644
index ea228dd..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table.hh
+++ /dev/null
@@ -1,331 +0,0 @@
-#ifndef UTIL_PROBING_HASH_TABLE_H
-#define UTIL_PROBING_HASH_TABLE_H
-
-#include "util/exception.hh"
-#include "util/scoped.hh"
-
-#include <algorithm>
-#include <cstddef>
-#include <functional>
-#include <vector>
-
-#include <assert.h>
-#include <stdint.h>
-
-namespace util {
-
-/* Thrown when table grows too large */
-class ProbingSizeException : public Exception {
- public:
- ProbingSizeException() throw() {}
- ~ProbingSizeException() throw() {}
-};
-
-// std::identity is an SGI extension :-(
-struct IdentityHash {
- template <class T> T operator()(T arg) const { return arg; }
-};
-
-template <class EntryT, class HashT, class EqualT> class AutoProbing;
-
-/* Non-standard hash table
- * Buckets must be set at the beginning and must be greater than maximum number
- * of elements, else it throws ProbingSizeException.
- * Memory management and initialization is externalized to make it easier to
- * serialize these to disk and load them quickly.
- * Uses linear probing to find value.
- * Only insert and lookup operations.
- */
-template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class ProbingHashTable {
- public:
- typedef EntryT Entry;
- typedef typename Entry::Key Key;
- typedef const Entry *ConstIterator;
- typedef Entry *MutableIterator;
- typedef HashT Hash;
- typedef EqualT Equal;
-
- static uint64_t Size(uint64_t entries, float multiplier) {
- uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries)));
- return buckets * sizeof(Entry);
- }
-
- // Must be assigned to later.
- ProbingHashTable() : entries_(0)
-#ifdef DEBUG
- , initialized_(false)
-#endif
- {}
-
- ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal())
- : begin_(reinterpret_cast<MutableIterator>(start)),
- buckets_(allocated / sizeof(Entry)),
- end_(begin_ + buckets_),
- invalid_(invalid),
- hash_(hash_func),
- equal_(equal_func),
- entries_(0)
-#ifdef DEBUG
- , initialized_(true)
-#endif
- {}
-
- void Relocate(void *new_base) {
- begin_ = reinterpret_cast<MutableIterator>(new_base);
- end_ = begin_ + buckets_;
- }
-
- template <class T> MutableIterator Insert(const T &t) {
-#ifdef DEBUG
- assert(initialized_);
-#endif
- UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
- return UncheckedInsert(t);
- }
-
- // Return true if the value was found (and not inserted). This is consistent with Find but the opposite if hash_map!
- template <class T> bool FindOrInsert(const T &t, MutableIterator &out) {
-#ifdef DEBUG
- assert(initialized_);
-#endif
- for (MutableIterator i = Ideal(t);;) {
- Key got(i->GetKey());
- if (equal_(got, t.GetKey())) { out = i; return true; }
- if (equal_(got, invalid_)) {
- UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
- *i = t;
- out = i;
- return false;
- }
- if (++i == end_) i = begin_;
- }
- }
-
- void FinishedInserting() {}
-
- // Don't change anything related to GetKey,
- template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
-#ifdef DEBUG
- assert(initialized_);
-#endif
- for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
- Key got(i->GetKey());
- if (equal_(got, key)) { out = i; return true; }
- if (equal_(got, invalid_)) return false;
- if (++i == end_) i = begin_;
- }
- }
-
- // Like UnsafeMutableFind, but the key must be there.
- template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
- for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
- Key got(i->GetKey());
- if (equal_(got, key)) { return i; }
- assert(!equal_(got, invalid_));
- if (++i == end_) i = begin_;
- }
- }
-
-
- template <class Key> bool Find(const Key key, ConstIterator &out) const {
-#ifdef DEBUG
- assert(initialized_);
-#endif
- for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
- Key got(i->GetKey());
- if (equal_(got, key)) { out = i; return true; }
- if (equal_(got, invalid_)) return false;
- if (++i == end_) i = begin_;
- }
- }
-
- // Like Find but we're sure it must be there.
- template <class Key> ConstIterator MustFind(const Key key) const {
- for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
- Key got(i->GetKey());
- if (equal_(got, key)) { return i; }
- assert(!equal_(got, invalid_));
- if (++i == end_) i = begin_;
- }
- }
-
- void Clear() {
- Entry invalid;
- invalid.SetKey(invalid_);
- std::fill(begin_, end_, invalid);
- entries_ = 0;
- }
-
- // Return number of entries assuming no serialization went on.
- std::size_t SizeNoSerialization() const {
- return entries_;
- }
-
- // Return memory size expected by Double.
- std::size_t DoubleTo() const {
- return buckets_ * 2 * sizeof(Entry);
- }
-
- // Inform the table that it has double the amount of memory.
- // Pass clear_new = false if you are sure the new memory is initialized
- // properly (to invalid_) i.e. by mremap.
- void Double(void *new_base, bool clear_new = true) {
- begin_ = static_cast<MutableIterator>(new_base);
- MutableIterator old_end = begin_ + buckets_;
- buckets_ *= 2;
- end_ = begin_ + buckets_;
- if (clear_new) {
- Entry invalid;
- invalid.SetKey(invalid_);
- std::fill(old_end, end_, invalid);
- }
- std::vector<Entry> rolled_over;
- // Move roll-over entries to a buffer because they might not roll over anymore. This should be small.
- for (MutableIterator i = begin_; i != old_end && !equal_(i->GetKey(), invalid_); ++i) {
- rolled_over.push_back(*i);
- i->SetKey(invalid_);
- }
- /* Re-insert everything. Entries might go backwards to take over a
- * recently opened gap, stay, move to new territory, or wrap around. If
- * an entry wraps around, it might go to a pointer greater than i (which
- * can happen at the beginning) and it will be revisited to possibly fill
- * in a gap created later.
- */
- Entry temp;
- for (MutableIterator i = begin_; i != old_end; ++i) {
- if (!equal_(i->GetKey(), invalid_)) {
- temp = *i;
- i->SetKey(invalid_);
- UncheckedInsert(temp);
- }
- }
- // Put the roll-over entries back in.
- for (typename std::vector<Entry>::const_iterator i(rolled_over.begin()); i != rolled_over.end(); ++i) {
- UncheckedInsert(*i);
- }
- }
-
- // Mostly for tests, check consistency of every entry.
- void CheckConsistency() {
- MutableIterator last;
- for (last = end_ - 1; last >= begin_ && !equal_(last->GetKey(), invalid_); --last) {}
- UTIL_THROW_IF(last == begin_, ProbingSizeException, "Completely full");
- MutableIterator i;
- // Beginning can be wrap-arounds.
- for (i = begin_; !equal_(i->GetKey(), invalid_); ++i) {
- MutableIterator ideal = Ideal(*i);
- UTIL_THROW_IF(ideal > i && ideal <= last, Exception, "Inconsistency at position " << (i - begin_) << " should be at " << (ideal - begin_));
- }
- MutableIterator pre_gap = i;
- for (; i != end_; ++i) {
- if (equal_(i->GetKey(), invalid_)) {
- pre_gap = i;
- continue;
- }
- MutableIterator ideal = Ideal(*i);
- UTIL_THROW_IF(ideal > i || ideal <= pre_gap, Exception, "Inconsistency at position " << (i - begin_) << " with ideal " << (ideal - begin_));
- }
- }
-
- private:
- friend class AutoProbing<Entry, Hash, Equal>;
-
- template <class T> MutableIterator Ideal(const T &t) {
- return begin_ + (hash_(t.GetKey()) % buckets_);
- }
-
- template <class T> MutableIterator UncheckedInsert(const T &t) {
- for (MutableIterator i(Ideal(t));;) {
- if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
- if (++i == end_) { i = begin_; }
- }
- }
-
- MutableIterator begin_;
- std::size_t buckets_;
- MutableIterator end_;
- Key invalid_;
- Hash hash_;
- Equal equal_;
- std::size_t entries_;
-#ifdef DEBUG
- bool initialized_;
-#endif
-};
-
-// Resizable linear probing hash table. This owns the memory.
-template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class AutoProbing {
- private:
- typedef ProbingHashTable<EntryT, HashT, EqualT> Backend;
- public:
- static std::size_t MemUsage(std::size_t size, float multiplier = 1.5) {
- return Backend::Size(size, multiplier);
- }
-
- typedef EntryT Entry;
- typedef typename Entry::Key Key;
- typedef const Entry *ConstIterator;
- typedef Entry *MutableIterator;
- typedef HashT Hash;
- typedef EqualT Equal;
-
- AutoProbing(std::size_t initial_size = 10, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal()) :
- allocated_(Backend::Size(initial_size, 1.5)), mem_(util::MallocOrThrow(allocated_)), backend_(mem_.get(), allocated_, invalid, hash_func, equal_func) {
- threshold_ = initial_size * 1.2;
- Clear();
- }
-
- // Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup.
- template <class T> MutableIterator Insert(const T &t) {
- DoubleIfNeeded();
- return backend_.UncheckedInsert(t);
- }
-
- template <class T> bool FindOrInsert(const T &t, MutableIterator &out) {
- DoubleIfNeeded();
- return backend_.FindOrInsert(t, out);
- }
-
- template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
- return backend_.UnsafeMutableFind(key, out);
- }
-
- template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
- return backend_.UnsafeMutableMustFind(key);
- }
-
- template <class Key> bool Find(const Key key, ConstIterator &out) const {
- return backend_.Find(key, out);
- }
-
- template <class Key> ConstIterator MustFind(const Key key) const {
- return backend_.MustFind(key);
- }
-
- std::size_t Size() const {
- return backend_.SizeNoSerialization();
- }
-
- void Clear() {
- backend_.Clear();
- }
-
- private:
- void DoubleIfNeeded() {
- if (Size() < threshold_)
- return;
- mem_.call_realloc(backend_.DoubleTo());
- allocated_ = backend_.DoubleTo();
- backend_.Double(mem_.get());
- threshold_ *= 2;
- }
-
- std::size_t allocated_;
- util::scoped_malloc mem_;
- Backend backend_;
- std::size_t threshold_;
-};
-
-} // namespace util
-
-#endif // UTIL_PROBING_HASH_TABLE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table_test.cc
deleted file mode 100644
index 9f7948c..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/probing_hash_table_test.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-#include "util/probing_hash_table.hh"
-
-#include "util/murmur_hash.hh"
-#include "util/scoped.hh"
-
-#define BOOST_TEST_MODULE ProbingHashTableTest
-#include <boost/test/unit_test.hpp>
-#include <boost/scoped_array.hpp>
-#include <boost/functional/hash.hpp>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-namespace util {
-namespace {
-
-struct Entry {
- unsigned char key;
- typedef unsigned char Key;
-
- unsigned char GetKey() const {
- return key;
- }
-
- void SetKey(unsigned char to) {
- key = to;
- }
-
- uint64_t GetValue() const {
- return value;
- }
-
- uint64_t value;
-};
-
-typedef ProbingHashTable<Entry, boost::hash<unsigned char> > Table;
-
-BOOST_AUTO_TEST_CASE(simple) {
- size_t size = Table::Size(10, 1.2);
- boost::scoped_array<char> mem(new char[size]);
- memset(mem.get(), 0, size);
-
- Table table(mem.get(), size);
- const Entry *i = NULL;
- BOOST_CHECK(!table.Find(2, i));
- Entry to_ins;
- to_ins.key = 3;
- to_ins.value = 328920;
- table.Insert(to_ins);
- BOOST_REQUIRE(table.Find(3, i));
- BOOST_CHECK_EQUAL(3, i->GetKey());
- BOOST_CHECK_EQUAL(static_cast<uint64_t>(328920), i->GetValue());
- BOOST_CHECK(!table.Find(2, i));
-}
-
-struct Entry64 {
- uint64_t key;
- typedef uint64_t Key;
-
- Entry64() {}
-
- explicit Entry64(uint64_t key_in) {
- key = key_in;
- }
-
- Key GetKey() const { return key; }
- void SetKey(uint64_t to) { key = to; }
-};
-
-struct MurmurHashEntry64 {
- std::size_t operator()(uint64_t value) const {
- return util::MurmurHash64A(&value, 8);
- }
-};
-
-typedef ProbingHashTable<Entry64, MurmurHashEntry64> Table64;
-
-BOOST_AUTO_TEST_CASE(Double) {
- for (std::size_t initial = 19; initial < 30; ++initial) {
- size_t size = Table64::Size(initial, 1.2);
- scoped_malloc mem(MallocOrThrow(size));
- Table64 table(mem.get(), size, std::numeric_limits<uint64_t>::max());
- table.Clear();
- for (uint64_t i = 0; i < 19; ++i) {
- table.Insert(Entry64(i));
- }
- table.CheckConsistency();
- mem.call_realloc(table.DoubleTo());
- table.Double(mem.get());
- table.CheckConsistency();
- for (uint64_t i = 20; i < 40 ; ++i) {
- table.Insert(Entry64(i));
- }
- mem.call_realloc(table.DoubleTo());
- table.Double(mem.get());
- table.CheckConsistency();
- }
-}
-
-} // namespace
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/read_compressed.cc b/src/joshua/decoder/ff/lm/kenlm/util/read_compressed.cc
deleted file mode 100644
index cee9804..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/read_compressed.cc
+++ /dev/null
@@ -1,448 +0,0 @@
-#include "util/read_compressed.hh"
-
-#include "util/file.hh"
-#include "util/have.hh"
-#include "util/scoped.hh"
-
-#include <algorithm>
-#include <iostream>
-
-#include <assert.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef HAVE_ZLIB
-#include <zlib.h>
-#endif
-
-#ifdef HAVE_BZLIB
-#include <bzlib.h>
-#endif
-
-#ifdef HAVE_XZLIB
-#include <lzma.h>
-#endif
-
-namespace util {
-
-CompressedException::CompressedException() throw() {}
-CompressedException::~CompressedException() throw() {}
-
-GZException::GZException() throw() {}
-GZException::~GZException() throw() {}
-
-BZException::BZException() throw() {}
-BZException::~BZException() throw() {}
-
-XZException::XZException() throw() {}
-XZException::~XZException() throw() {}
-
-class ReadBase {
- public:
- virtual ~ReadBase() {}
-
- virtual std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) = 0;
-
- protected:
- static void ReplaceThis(ReadBase *with, ReadCompressed &thunk) {
- thunk.internal_.reset(with);
- }
-
- ReadBase *Current(ReadCompressed &thunk) { return thunk.internal_.get(); }
-
- static uint64_t &ReadCount(ReadCompressed &thunk) {
- return thunk.raw_amount_;
- }
-};
-
-namespace {
-
-ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, std::size_t already_size, bool require_compressed);
-
-// Completed file that other classes can thunk to.
-class Complete : public ReadBase {
- public:
- std::size_t Read(void *, std::size_t, ReadCompressed &) {
- return 0;
- }
-};
-
-class Uncompressed : public ReadBase {
- public:
- explicit Uncompressed(int fd) : fd_(fd) {}
-
- std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
- std::size_t got = PartialRead(fd_.get(), to, amount);
- ReadCount(thunk) += got;
- return got;
- }
-
- private:
- scoped_fd fd_;
-};
-
-class UncompressedWithHeader : public ReadBase {
- public:
- UncompressedWithHeader(int fd, const void *already_data, std::size_t already_size) : fd_(fd) {
- assert(already_size);
- buf_.reset(malloc(already_size));
- if (!buf_.get()) throw std::bad_alloc();
- memcpy(buf_.get(), already_data, already_size);
- remain_ = static_cast<uint8_t*>(buf_.get());
- end_ = remain_ + already_size;
- }
-
- std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
- assert(buf_.get());
- assert(remain_ != end_);
- std::size_t sending = std::min<std::size_t>(amount, end_ - remain_);
- memcpy(to, remain_, sending);
- remain_ += sending;
- if (remain_ == end_) {
- ReplaceThis(new Uncompressed(fd_.release()), thunk);
- }
- return sending;
- }
-
- private:
- scoped_malloc buf_;
- uint8_t *remain_;
- uint8_t *end_;
-
- scoped_fd fd_;
-};
-
-static const std::size_t kInputBuffer = 16384;
-
-template <class Compression> class StreamCompressed : public ReadBase {
- public:
- StreamCompressed(int fd, const void *already_data, std::size_t already_size)
- : file_(fd),
- in_buffer_(MallocOrThrow(kInputBuffer)),
- back_(memcpy(in_buffer_.get(), already_data, already_size), already_size) {}
-
- std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
- if (amount == 0) return 0;
- back_.SetOutput(to, amount);
- do {
- if (!back_.Stream().avail_in) ReadInput(thunk);
- if (!back_.Process()) {
- // reached end, at least for the compressed portion.
- std::size_t ret = static_cast<const uint8_t *>(static_cast<void*>(back_.Stream().next_out)) - static_cast<const uint8_t*>(to);
- ReplaceThis(ReadFactory(file_.release(), ReadCount(thunk), back_.Stream().next_in, back_.Stream().avail_in, true), thunk);
- if (ret) return ret;
- // We did not read anything this round, so clients might think EOF. Transfer responsibility to the next reader.
- return Current(thunk)->Read(to, amount, thunk);
- }
- } while (back_.Stream().next_out == to);
- return static_cast<const uint8_t*>(static_cast<void*>(back_.Stream().next_out)) - static_cast<const uint8_t*>(to);
- }
-
- private:
- void ReadInput(ReadCompressed &thunk) {
- assert(!back_.Stream().avail_in);
- std::size_t got = ReadOrEOF(file_.get(), in_buffer_.get(), kInputBuffer);
- back_.SetInput(in_buffer_.get(), got);
- ReadCount(thunk) += got;
- }
-
- scoped_fd file_;
- scoped_malloc in_buffer_;
-
- Compression back_;
-};
-
-#ifdef HAVE_ZLIB
-class GZip {
- public:
- GZip(const void *base, std::size_t amount) {
- SetInput(base, amount);
- stream_.zalloc = Z_NULL;
- stream_.zfree = Z_NULL;
- stream_.opaque = Z_NULL;
- stream_.msg = NULL;
- // 32 for zlib and gzip decoding with automatic header detection.
- // 15 for maximum window size.
- UTIL_THROW_IF(Z_OK != inflateInit2(&stream_, 32 + 15), GZException, "Failed to initialize zlib.");
- }
-
- ~GZip() {
- if (Z_OK != inflateEnd(&stream_)) {
- std::cerr << "zlib could not close properly." << std::endl;
- abort();
- }
- }
-
- void SetOutput(void *to, std::size_t amount) {
- stream_.next_out = static_cast<Bytef*>(to);
- stream_.avail_out = std::min<std::size_t>(std::numeric_limits<uInt>::max(), amount);
- }
-
- void SetInput(const void *base, std::size_t amount) {
- assert(amount < static_cast<std::size_t>(std::numeric_limits<uInt>::max()));
- stream_.next_in = const_cast<Bytef*>(static_cast<const Bytef*>(base));
- stream_.avail_in = amount;
- }
-
- const z_stream &Stream() const { return stream_; }
-
- bool Process() {
- int result = inflate(&stream_, 0);
- switch (result) {
- case Z_OK:
- return true;
- case Z_STREAM_END:
- return false;
- case Z_ERRNO:
- UTIL_THROW(ErrnoException, "zlib error");
- default:
- UTIL_THROW(GZException, "zlib encountered " << (stream_.msg ? stream_.msg : "an error ") << " code " << result);
- }
- }
-
- private:
- z_stream stream_;
-};
-#endif // HAVE_ZLIB
-
-#ifdef HAVE_BZLIB
-class BZip {
- public:
- BZip(const void *base, std::size_t amount) {
- memset(&stream_, 0, sizeof(stream_));
- SetInput(base, amount);
- HandleError(BZ2_bzDecompressInit(&stream_, 0, 0));
- }
-
- ~BZip() {
- try {
- HandleError(BZ2_bzDecompressEnd(&stream_));
- } catch (const std::exception &e) {
- std::cerr << e.what() << std::endl;
- abort();
- }
- }
-
- bool Process() {
- int ret = BZ2_bzDecompress(&stream_);
- if (ret == BZ_STREAM_END) return false;
- HandleError(ret);
- return true;
- }
-
- void SetOutput(void *base, std::size_t amount) {
- stream_.next_out = static_cast<char*>(base);
- stream_.avail_out = std::min<std::size_t>(std::numeric_limits<unsigned int>::max(), amount);
- }
-
- void SetInput(const void *base, std::size_t amount) {
- stream_.next_in = const_cast<char*>(static_cast<const char*>(base));
- stream_.avail_in = amount;
- }
-
- const bz_stream &Stream() const { return stream_; }
-
- private:
- void HandleError(int value) {
- switch(value) {
- case BZ_OK:
- return;
- case BZ_CONFIG_ERROR:
- UTIL_THROW(BZException, "bzip2 seems to be miscompiled.");
- case BZ_PARAM_ERROR:
- UTIL_THROW(BZException, "bzip2 Parameter error");
- case BZ_DATA_ERROR:
- UTIL_THROW(BZException, "bzip2 detected a corrupt file");
- case BZ_DATA_ERROR_MAGIC:
- UTIL_THROW(BZException, "bzip2 detected bad magic bytes. Perhaps this was not a bzip2 file after all?");
- case BZ_MEM_ERROR:
- throw std::bad_alloc();
- default:
- UTIL_THROW(BZException, "Unknown bzip2 error code " << value);
- }
- }
-
- bz_stream stream_;
-};
-#endif // HAVE_BZLIB
-
-#ifdef HAVE_XZLIB
-class XZip {
- public:
- XZip(const void *base, std::size_t amount)
- : stream_(), action_(LZMA_RUN) {
- memset(&stream_, 0, sizeof(stream_));
- SetInput(base, amount);
- HandleError(lzma_stream_decoder(&stream_, UINT64_MAX, 0));
- }
-
- ~XZip() {
- lzma_end(&stream_);
- }
-
- void SetOutput(void *base, std::size_t amount) {
- stream_.next_out = static_cast<uint8_t*>(base);
- stream_.avail_out = amount;
- }
-
- void SetInput(const void *base, std::size_t amount) {
- stream_.next_in = static_cast<const uint8_t*>(base);
- stream_.avail_in = amount;
- if (!amount) action_ = LZMA_FINISH;
- }
-
- const lzma_stream &Stream() const { return stream_; }
-
- bool Process() {
- lzma_ret status = lzma_code(&stream_, action_);
- if (status == LZMA_STREAM_END) return false;
- HandleError(status);
- return true;
- }
-
- private:
- void HandleError(lzma_ret value) {
- switch (value) {
- case LZMA_OK:
- return;
- case LZMA_MEM_ERROR:
- throw std::bad_alloc();
- case LZMA_FORMAT_ERROR:
- UTIL_THROW(XZException, "xzlib says file format not recognized");
- case LZMA_OPTIONS_ERROR:
- UTIL_THROW(XZException, "xzlib says unsupported compression options");
- case LZMA_DATA_ERROR:
- UTIL_THROW(XZException, "xzlib says this file is corrupt");
- case LZMA_BUF_ERROR:
- UTIL_THROW(XZException, "xzlib says unexpected end of input");
- default:
- UTIL_THROW(XZException, "unrecognized xzlib error " << value);
- }
- }
-
- lzma_stream stream_;
- lzma_action action_;
-};
-#endif // HAVE_XZLIB
-
-class IStreamReader : public ReadBase {
- public:
- explicit IStreamReader(std::istream &stream) : stream_(stream) {}
-
- std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
- if (!stream_.read(static_cast<char*>(to), amount)) {
- UTIL_THROW_IF(!stream_.eof(), ErrnoException, "istream error");
- amount = stream_.gcount();
- }
- ReadCount(thunk) += amount;
- return amount;
- }
-
- private:
- std::istream &stream_;
-};
-
-enum MagicResult {
- UTIL_UNKNOWN, UTIL_GZIP, UTIL_BZIP, UTIL_XZIP
-};
-
-MagicResult DetectMagic(const void *from_void, std::size_t length) {
- const uint8_t *header = static_cast<const uint8_t*>(from_void);
- if (length >= 2 && header[0] == 0x1f && header[1] == 0x8b) {
- return UTIL_GZIP;
- }
- const uint8_t kBZMagic[3] = {'B', 'Z', 'h'};
- if (length >= sizeof(kBZMagic) && !memcmp(header, kBZMagic, sizeof(kBZMagic))) {
- return UTIL_BZIP;
- }
- const uint8_t kXZMagic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
- if (length >= sizeof(kXZMagic) && !memcmp(header, kXZMagic, sizeof(kXZMagic))) {
- return UTIL_XZIP;
- }
- return UTIL_UNKNOWN;
-}
-
-ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, const std::size_t already_size, bool require_compressed) {
- scoped_fd hold(fd);
- std::string header(reinterpret_cast<const char*>(already_data), already_size);
- if (header.size() < ReadCompressed::kMagicSize) {
- std::size_t original = header.size();
- header.resize(ReadCompressed::kMagicSize);
- std::size_t got = ReadOrEOF(fd, &header[original], ReadCompressed::kMagicSize - original);
- raw_amount += got;
- header.resize(original + got);
- }
- if (header.empty()) {
- return new Complete();
- }
- switch (DetectMagic(&header[0], header.size())) {
- case UTIL_GZIP:
-#ifdef HAVE_ZLIB
- return new StreamCompressed<GZip>(hold.release(), header.data(), header.size());
-#else
- UTIL_THROW(CompressedException, "This looks like a gzip file but gzip support was not compiled in.");
-#endif
- case UTIL_BZIP:
-#ifdef HAVE_BZLIB
- return new StreamCompressed<BZip>(hold.release(), &header[0], header.size());
-#else
- UTIL_THROW(CompressedException, "This looks like a bzip file (it begins with BZh), but bzip support was not compiled in.");
-#endif
- case UTIL_XZIP:
-#ifdef HAVE_XZLIB
- return new StreamCompressed<XZip>(hold.release(), header.data(), header.size());
-#else
- UTIL_THROW(CompressedException, "This looks like an xz file, but xz support was not compiled in.");
-#endif
- default:
- UTIL_THROW_IF(require_compressed, CompressedException, "Uncompressed data detected after a compresssed file. This could be supported but usually indicates an error.");
- return new UncompressedWithHeader(hold.release(), header.data(), header.size());
- }
-}
-
-} // namespace
-
-bool ReadCompressed::DetectCompressedMagic(const void *from_void) {
- return DetectMagic(from_void, kMagicSize) != UTIL_UNKNOWN;
-}
-
-ReadCompressed::ReadCompressed(int fd) {
- Reset(fd);
-}
-
-ReadCompressed::ReadCompressed(std::istream &in) {
- Reset(in);
-}
-
-ReadCompressed::ReadCompressed() {}
-
-ReadCompressed::~ReadCompressed() {}
-
-void ReadCompressed::Reset(int fd) {
- raw_amount_ = 0;
- internal_.reset();
- internal_.reset(ReadFactory(fd, raw_amount_, NULL, 0, false));
-}
-
-void ReadCompressed::Reset(std::istream &in) {
- internal_.reset();
- internal_.reset(new IStreamReader(in));
-}
-
-std::size_t ReadCompressed::Read(void *to, std::size_t amount) {
- return internal_->Read(to, amount, *this);
-}
-
-std::size_t ReadCompressed::ReadOrEOF(void *const to_in, std::size_t amount) {
- uint8_t *to = reinterpret_cast<uint8_t*>(to_in);
- while (amount) {
- std::size_t got = Read(to, amount);
- if (!got) break;
- to += got;
- amount -= got;
- }
- return to - reinterpret_cast<uint8_t*>(to_in);
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/read_compressed.hh b/src/joshua/decoder/ff/lm/kenlm/util/read_compressed.hh
deleted file mode 100644
index 767ee94..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/read_compressed.hh
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef UTIL_READ_COMPRESSED_H
-#define UTIL_READ_COMPRESSED_H
-
-#include "util/exception.hh"
-#include "util/scoped.hh"
-
-#include <cstddef>
-
-#include <stdint.h>
-
-namespace util {
-
-class CompressedException : public Exception {
- public:
- CompressedException() throw();
- virtual ~CompressedException() throw();
-};
-
-class GZException : public CompressedException {
- public:
- GZException() throw();
- ~GZException() throw();
-};
-
-class BZException : public CompressedException {
- public:
- BZException() throw();
- ~BZException() throw();
-};
-
-class XZException : public CompressedException {
- public:
- XZException() throw();
- ~XZException() throw();
-};
-
-class ReadBase;
-
-class ReadCompressed {
- public:
- static const std::size_t kMagicSize = 6;
- // Must have at least kMagicSize bytes.
- static bool DetectCompressedMagic(const void *from);
-
- // Takes ownership of fd.
- explicit ReadCompressed(int fd);
-
- // Try to avoid using this. Use the fd instead.
- // There is no decompression support for istreams.
- explicit ReadCompressed(std::istream &in);
-
- // Must call Reset later.
- ReadCompressed();
-
- ~ReadCompressed();
-
- // Takes ownership of fd.
- void Reset(int fd);
-
- // Same advice as the constructor.
- void Reset(std::istream &in);
-
- std::size_t Read(void *to, std::size_t amount);
-
- // Repeatedly call read to fill a buffer unless EOF is hit.
- // Return number of bytes read.
- std::size_t ReadOrEOF(void *const to, std::size_t amount);
-
- uint64_t RawAmount() const { return raw_amount_; }
-
- private:
- friend class ReadBase;
-
- scoped_ptr<ReadBase> internal_;
-
- uint64_t raw_amount_;
-
- // No copying.
- ReadCompressed(const ReadCompressed &);
- void operator=(const ReadCompressed &);
-};
-
-} // namespace util
-
-#endif // UTIL_READ_COMPRESSED_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/read_compressed_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/read_compressed_test.cc
deleted file mode 100644
index 301e8f4..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/read_compressed_test.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-#include "util/read_compressed.hh"
-
-#include "util/file.hh"
-#include "util/have.hh"
-
-#define BOOST_TEST_MODULE ReadCompressedTest
-#include <boost/test/unit_test.hpp>
-#include <boost/scoped_ptr.hpp>
-
-#include <fstream>
-#include <string>
-
-#include <stdlib.h>
-
-#if defined __MINGW32__
-#include <time.h>
-#include <fcntl.h>
-
-#if !defined mkstemp
-// TODO insecure
-int mkstemp(char * stemplate)
-{
- char *filename = mktemp(stemplate);
- if (filename == NULL)
- return -1;
- return open(filename, O_RDWR | O_CREAT, 0600);
-}
-#endif
-
-#endif // defined
-
-namespace util {
-namespace {
-
-void ReadLoop(ReadCompressed &reader, void *to_void, std::size_t amount) {
- uint8_t *to = static_cast<uint8_t*>(to_void);
- while (amount) {
- std::size_t ret = reader.Read(to, amount);
- BOOST_REQUIRE(ret);
- to += ret;
- amount -= ret;
- }
-}
-
-const uint32_t kSize4 = 100000 / 4;
-
-std::string WriteRandom() {
- char name[] = "tempXXXXXX";
- scoped_fd original(mkstemp(name));
- BOOST_REQUIRE(original.get() > 0);
- for (uint32_t i = 0; i < kSize4; ++i) {
- WriteOrThrow(original.get(), &i, sizeof(uint32_t));
- }
- return name;
-}
-
-void VerifyRead(ReadCompressed &reader) {
- for (uint32_t i = 0; i < kSize4; ++i) {
- uint32_t got;
- ReadLoop(reader, &got, sizeof(uint32_t));
- BOOST_CHECK_EQUAL(i, got);
- }
-
- char ignored;
- BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1));
- // Test double EOF call.
- BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1));
-}
-
-void TestRandom(const char *compressor) {
- std::string name(WriteRandom());
-
- char gzname[] = "tempXXXXXX";
- scoped_fd gzipped(mkstemp(gzname));
-
- std::string command(compressor);
-#ifdef __CYGWIN__
- command += ".exe";
-#endif
- command += " <\"";
- command += name;
- command += "\" >\"";
- command += gzname;
- command += "\"";
- BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
-
- BOOST_CHECK_EQUAL(0, unlink(name.c_str()));
- BOOST_CHECK_EQUAL(0, unlink(gzname));
-
- ReadCompressed reader(gzipped.release());
- VerifyRead(reader);
-}
-
-BOOST_AUTO_TEST_CASE(Uncompressed) {
- TestRandom("cat");
-}
-
-#ifdef HAVE_ZLIB
-BOOST_AUTO_TEST_CASE(ReadGZ) {
- TestRandom("gzip");
-}
-#endif // HAVE_ZLIB
-
-#ifdef HAVE_BZLIB
-BOOST_AUTO_TEST_CASE(ReadBZ) {
- TestRandom("bzip2");
-}
-#endif // HAVE_BZLIB
-
-#ifdef HAVE_XZLIB
-BOOST_AUTO_TEST_CASE(ReadXZ) {
- TestRandom("xz");
-}
-#endif
-
-#ifdef HAVE_ZLIB
-BOOST_AUTO_TEST_CASE(AppendGZ) {
-}
-#endif
-
-BOOST_AUTO_TEST_CASE(IStream) {
- std::string name(WriteRandom());
- std::fstream stream(name.c_str(), std::ios::in);
- BOOST_CHECK_EQUAL(0, unlink(name.c_str()));
- ReadCompressed reader;
- reader.Reset(stream);
- VerifyRead(reader);
-}
-
-} // namespace
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/scoped.cc b/src/joshua/decoder/ff/lm/kenlm/util/scoped.cc
deleted file mode 100644
index de1d9e9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/scoped.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "util/scoped.hh"
-
-#include <cstdlib>
-#if !defined(_WIN32) && !defined(_WIN64)
-#include <sys/mman.h>
-#endif
-
-namespace util {
-
-MallocException::MallocException(std::size_t requested) throw() {
- *this << "for " << requested << " bytes ";
-}
-
-MallocException::~MallocException() throw() {}
-
-namespace {
-void *InspectAddr(void *addr, std::size_t requested, const char *func_name) {
- UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name);
- // These routines are often used for large chunks of memory where huge pages help.
-#if MADV_HUGEPAGE
- madvise(addr, requested, MADV_HUGEPAGE);
-#endif
- return addr;
-}
-} // namespace
-
-void *MallocOrThrow(std::size_t requested) {
- return InspectAddr(std::malloc(requested), requested, "malloc");
-}
-
-void *CallocOrThrow(std::size_t requested) {
- return InspectAddr(std::calloc(1, requested), requested, "calloc");
-}
-
-void scoped_malloc::call_realloc(std::size_t requested) {
- p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc");
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/scoped.hh b/src/joshua/decoder/ff/lm/kenlm/util/scoped.hh
deleted file mode 100644
index 60c36c3..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/scoped.hh
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef UTIL_SCOPED_H
-#define UTIL_SCOPED_H
-/* Other scoped objects in the style of scoped_ptr. */
-
-#include "util/exception.hh"
-#include <cstddef>
-#include <cstdlib>
-
-namespace util {
-
-class MallocException : public ErrnoException {
- public:
- explicit MallocException(std::size_t requested) throw();
- ~MallocException() throw();
-};
-
-void *MallocOrThrow(std::size_t requested);
-void *CallocOrThrow(std::size_t requested);
-
-/* Unfortunately, defining the operator* for void * makes the compiler complain.
- * So scoped is specialized to void. This includes the functionality common to
- * both, namely everything except reference.
- */
-template <class T, class Closer> class scoped_base {
- public:
- explicit scoped_base(T *p = NULL) : p_(p) {}
-
- ~scoped_base() { Closer::Close(p_); }
-
- void reset(T *p = NULL) {
- scoped_base other(p_);
- p_ = p;
- }
-
- T *get() { return p_; }
- const T *get() const { return p_; }
-
- T *operator->() { return p_; }
- const T *operator->() const { return p_; }
-
- T *release() {
- T *ret = p_;
- p_ = NULL;
- return ret;
- }
-
- protected:
- T *p_;
-
- private:
- scoped_base(const scoped_base &);
- scoped_base &operator=(const scoped_base &);
-};
-
-template <class T, class Closer> class scoped : public scoped_base<T, Closer> {
- public:
- explicit scoped(T *p = NULL) : scoped_base<T, Closer>(p) {}
-
- T &operator*() { return *scoped_base<T, Closer>::p_; }
- const T&operator*() const { return *scoped_base<T, Closer>::p_; }
-};
-
-template <class Closer> class scoped<void, Closer> : public scoped_base<void, Closer> {
- public:
- explicit scoped(void *p = NULL) : scoped_base<void, Closer>(p) {}
-};
-
-/* Closer for c functions like std::free and cmph cleanup functions */
-template <class T, void (*clean)(T*)> struct scoped_c_forward {
- static void Close(T *p) { clean(p); }
-};
-// Call a C function to delete stuff
-template <class T, void (*clean)(T*)> class scoped_c : public scoped<T, scoped_c_forward<T, clean> > {
- public:
- explicit scoped_c(T *p = NULL) : scoped<T, scoped_c_forward<T, clean> >(p) {}
-};
-
-class scoped_malloc : public scoped_c<void, std::free> {
- public:
- explicit scoped_malloc(void *p = NULL) : scoped_c<void, std::free>(p) {}
-
- void call_realloc(std::size_t to);
-};
-
-/* scoped_array using delete[] */
-struct scoped_delete_array_forward {
- template <class T> static void Close(T *p) { delete [] p; }
-};
-// Hat tip to boost.
-template <class T> class scoped_array : public scoped<T, scoped_delete_array_forward> {
- public:
- explicit scoped_array(T *p = NULL) : scoped<T, scoped_delete_array_forward>(p) {}
-
- T &operator[](std::size_t idx) { return scoped<T, scoped_delete_array_forward>::p_[idx]; }
- const T &operator[](std::size_t idx) const { return scoped<T, scoped_delete_array_forward>::p_[idx]; }
-};
-
-/* scoped_ptr using delete. If only there were a template typedef. */
-struct scoped_delete_forward {
- template <class T> static void Close(T *p) { delete p; }
-};
-template <class T> class scoped_ptr : public scoped<T, scoped_delete_forward> {
- public:
- explicit scoped_ptr(T *p = NULL) : scoped<T, scoped_delete_forward>(p) {}
-};
-
-} // namespace util
-
-#endif // UTIL_SCOPED_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/sized_iterator.hh b/src/joshua/decoder/ff/lm/kenlm/util/sized_iterator.hh
deleted file mode 100644
index 75f6886..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/sized_iterator.hh
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef UTIL_SIZED_ITERATOR_H
-#define UTIL_SIZED_ITERATOR_H
-
-#include "util/proxy_iterator.hh"
-
-#include <algorithm>
-#include <functional>
-#include <string>
-
-#include <stdint.h>
-#include <string.h>
-
-namespace util {
-
-class SizedInnerIterator {
- public:
- SizedInnerIterator() {}
-
- SizedInnerIterator(void *ptr, std::size_t size) : ptr_(static_cast<uint8_t*>(ptr)), size_(size) {}
-
- bool operator==(const SizedInnerIterator &other) const {
- return ptr_ == other.ptr_;
- }
- bool operator<(const SizedInnerIterator &other) const {
- return ptr_ < other.ptr_;
- }
- SizedInnerIterator &operator+=(std::ptrdiff_t amount) {
- ptr_ += amount * size_;
- return *this;
- }
- std::ptrdiff_t operator-(const SizedInnerIterator &other) const {
- return (ptr_ - other.ptr_) / size_;
- }
-
- const void *Data() const { return ptr_; }
- void *Data() { return ptr_; }
- std::size_t EntrySize() const { return size_; }
-
- friend void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
- std::swap(first.ptr_, second.ptr_);
- std::swap(first.size_, second.size_);
- }
-
- private:
- uint8_t *ptr_;
- std::size_t size_;
-};
-
-class SizedProxy {
- public:
- SizedProxy() {}
-
- SizedProxy(void *ptr, std::size_t size) : inner_(ptr, size) {}
-
- operator std::string() const {
- return std::string(reinterpret_cast<const char*>(inner_.Data()), inner_.EntrySize());
- }
-
- SizedProxy &operator=(const SizedProxy &from) {
- memcpy(inner_.Data(), from.inner_.Data(), inner_.EntrySize());
- return *this;
- }
-
- SizedProxy &operator=(const std::string &from) {
- memcpy(inner_.Data(), from.data(), inner_.EntrySize());
- return *this;
- }
-
- const void *Data() const { return inner_.Data(); }
- void *Data() { return inner_.Data(); }
-
- friend void swap(SizedProxy first, SizedProxy second) {
- std::swap_ranges(
- static_cast<char*>(first.inner_.Data()),
- static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
- static_cast<char*>(second.inner_.Data()));
- }
-
- private:
- friend class util::ProxyIterator<SizedProxy>;
-
- typedef std::string value_type;
-
- typedef SizedInnerIterator InnerIterator;
-
- InnerIterator &Inner() { return inner_; }
- const InnerIterator &Inner() const { return inner_; }
- InnerIterator inner_;
-};
-
-typedef ProxyIterator<SizedProxy> SizedIterator;
-
-inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }
-
-// Useful wrapper for a comparison function i.e. sort.
-template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
- public:
- explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
-
- bool operator()(const Proxy &first, const Proxy &second) const {
- return delegate_(first.Data(), second.Data());
- }
- bool operator()(const Proxy &first, const std::string &second) const {
- return delegate_(first.Data(), second.data());
- }
- bool operator()(const std::string &first, const Proxy &second) const {
- return delegate_(first.data(), second.Data());
- }
- bool operator()(const std::string &first, const std::string &second) const {
- return delegate_(first.data(), second.data());
- }
-
- const Delegate &GetDelegate() const { return delegate_; }
-
- private:
- const Delegate delegate_;
-};
-
-} // namespace util
-#endif // UTIL_SIZED_ITERATOR_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/sorted_uniform.hh b/src/joshua/decoder/ff/lm/kenlm/util/sorted_uniform.hh
deleted file mode 100644
index a3f6d02..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/sorted_uniform.hh
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef UTIL_SORTED_UNIFORM_H
-#define UTIL_SORTED_UNIFORM_H
-
-#include <algorithm>
-#include <cstddef>
-
-#include <assert.h>
-#include <stdint.h>
-
-namespace util {
-
-template <class T> class IdentityAccessor {
- public:
- typedef T Key;
- T operator()(const T *in) const { return *in; }
-};
-
-struct Pivot64 {
- static inline std::size_t Calc(uint64_t off, uint64_t range, std::size_t width) {
- std::size_t ret = static_cast<std::size_t>(static_cast<float>(off) / static_cast<float>(range) * static_cast<float>(width));
- // Cap for floating point rounding
- return (ret < width) ? ret : width - 1;
- }
-};
-
-// Use when off * width is <2^64. This is guaranteed when each of them is actually a 32-bit value.
-struct Pivot32 {
- static inline std::size_t Calc(uint64_t off, uint64_t range, uint64_t width) {
- return static_cast<std::size_t>((off * width) / (range + 1));
- }
-};
-
-// Usage: PivotSelect<sizeof(DataType)>::T
-template <unsigned> struct PivotSelect;
-template <> struct PivotSelect<8> { typedef Pivot64 T; };
-template <> struct PivotSelect<4> { typedef Pivot32 T; };
-template <> struct PivotSelect<2> { typedef Pivot32 T; };
-
-/* Binary search. */
-template <class Iterator, class Accessor> bool BinaryFind(
- const Accessor &accessor,
- Iterator begin,
- Iterator end,
- const typename Accessor::Key key, Iterator &out) {
- while (end > begin) {
- Iterator pivot(begin + (end - begin) / 2);
- typename Accessor::Key mid(accessor(pivot));
- if (mid < key) {
- begin = pivot + 1;
- } else if (mid > key) {
- end = pivot;
- } else {
- out = pivot;
- return true;
- }
- }
- return false;
-}
-
-// Search the range [before_it + 1, after_it - 1] for key.
-// Preconditions:
-// before_v <= key <= after_v
-// before_v <= all values in the range [before_it + 1, after_it - 1] <= after_v
-// range is sorted.
-template <class Iterator, class Accessor, class Pivot> bool BoundedSortedUniformFind(
- const Accessor &accessor,
- Iterator before_it, typename Accessor::Key before_v,
- Iterator after_it, typename Accessor::Key after_v,
- const typename Accessor::Key key, Iterator &out) {
- while (after_it - before_it > 1) {
- Iterator pivot(before_it + (1 + Pivot::Calc(key - before_v, after_v - before_v, after_it - before_it - 1)));
- typename Accessor::Key mid(accessor(pivot));
- if (mid < key) {
- before_it = pivot;
- before_v = mid;
- } else if (mid > key) {
- after_it = pivot;
- after_v = mid;
- } else {
- out = pivot;
- return true;
- }
- }
- return false;
-}
-
-template <class Iterator, class Accessor, class Pivot> bool SortedUniformFind(const Accessor &accessor, Iterator begin, Iterator end, const typename Accessor::Key key, Iterator &out) {
- if (begin == end) return false;
- typename Accessor::Key below(accessor(begin));
- if (key <= below) {
- if (key == below) { out = begin; return true; }
- return false;
- }
- // Make the range [begin, end].
- --end;
- typename Accessor::Key above(accessor(end));
- if (key >= above) {
- if (key == above) { out = end; return true; }
- return false;
- }
- return BoundedSortedUniformFind<Iterator, Accessor, Pivot>(accessor, begin, below, end, above, key, out);
-}
-
-} // namespace util
-
-#endif // UTIL_SORTED_UNIFORM_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/sorted_uniform_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/sorted_uniform_test.cc
deleted file mode 100644
index d9f6fad..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/sorted_uniform_test.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-#include "util/sorted_uniform.hh"
-
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/random/uniform_int.hpp>
-#include <boost/random/variate_generator.hpp>
-#include <boost/scoped_array.hpp>
-#include <boost/unordered_map.hpp>
-
-#define BOOST_TEST_MODULE SortedUniformTest
-#include <boost/test/unit_test.hpp>
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-
-namespace util {
-namespace {
-
-template <class KeyT, class ValueT> struct Entry {
- typedef KeyT Key;
- typedef ValueT Value;
-
- Key key;
- Value value;
-
- Key GetKey() const {
- return key;
- }
-
- Value GetValue() const {
- return value;
- }
-
- bool operator<(const Entry<Key,Value> &other) const {
- return key < other.key;
- }
-};
-
-template <class KeyT> struct Accessor {
- typedef KeyT Key;
- template <class Value> Key operator()(const Entry<Key, Value> *entry) const {
- return entry->GetKey();
- }
-};
-
-template <class Key, class Value> void Check(const Entry<Key, Value> *begin, const Entry<Key, Value> *end, const boost::unordered_map<Key, Value> &reference, const Key key) {
- typename boost::unordered_map<Key, Value>::const_iterator ref = reference.find(key);
- typedef const Entry<Key, Value> *It;
- // g++ can't tell that require will crash and burn.
- It i = NULL;
- bool ret = SortedUniformFind<It, Accessor<Key>, Pivot64>(Accessor<Key>(), begin, end, key, i);
- if (ref == reference.end()) {
- BOOST_CHECK(!ret);
- } else {
- BOOST_REQUIRE(ret);
- BOOST_CHECK_EQUAL(ref->second, i->GetValue());
- }
-}
-
-BOOST_AUTO_TEST_CASE(empty) {
- typedef const Entry<uint64_t, float> T;
- const T *i;
- bool ret = SortedUniformFind<const T*, Accessor<uint64_t>, Pivot64>(Accessor<uint64_t>(), (const T*)NULL, (const T*)NULL, (uint64_t)10, i);
- BOOST_CHECK(!ret);
-}
-
-template <class Key> void RandomTest(Key upper, size_t entries, size_t queries) {
- typedef unsigned char Value;
- boost::mt19937 rng;
- boost::uniform_int<Key> range_key(0, upper);
- boost::uniform_int<Value> range_value(0, 255);
- boost::variate_generator<boost::mt19937&, boost::uniform_int<Key> > gen_key(rng, range_key);
- boost::variate_generator<boost::mt19937&, boost::uniform_int<unsigned char> > gen_value(rng, range_value);
-
- typedef Entry<Key, Value> Ent;
- std::vector<Ent> backing;
- boost::unordered_map<Key, unsigned char> reference;
- Ent ent;
- for (size_t i = 0; i < entries; ++i) {
- Key key = gen_key();
- unsigned char value = gen_value();
- if (reference.insert(std::make_pair(key, value)).second) {
- ent.key = key;
- ent.value = value;
- backing.push_back(ent);
- }
- }
- std::sort(backing.begin(), backing.end());
-
- // Random queries.
- for (size_t i = 0; i < queries; ++i) {
- const Key key = gen_key();
- Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, key);
- }
-
- typename boost::unordered_map<Key, unsigned char>::const_iterator it = reference.begin();
- for (size_t i = 0; (i < queries) && (it != reference.end()); ++i, ++it) {
- Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, it->second);
- }
-}
-
-BOOST_AUTO_TEST_CASE(basic) {
- RandomTest<uint8_t>(11, 10, 200);
-}
-
-BOOST_AUTO_TEST_CASE(tiny_dense_random) {
- RandomTest<uint8_t>(11, 50, 200);
-}
-
-BOOST_AUTO_TEST_CASE(small_dense_random) {
- RandomTest<uint8_t>(100, 100, 200);
-}
-
-BOOST_AUTO_TEST_CASE(small_sparse_random) {
- RandomTest<uint8_t>(200, 15, 200);
-}
-
-BOOST_AUTO_TEST_CASE(medium_sparse_random) {
- RandomTest<uint16_t>(32000, 1000, 2000);
-}
-
-BOOST_AUTO_TEST_CASE(sparse_random) {
- RandomTest<uint64_t>(std::numeric_limits<uint64_t>::max(), 100000, 2000);
-}
-
-} // namespace
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/Jamfile b/src/joshua/decoder/ff/lm/kenlm/util/stream/Jamfile
deleted file mode 100644
index 2e99979..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/Jamfile
+++ /dev/null
@@ -1,12 +0,0 @@
-#if $(BOOST-VERSION) >= 104800 {
-# timer-link = <library>/top//boost_timer ;
-#} else {
-# timer-link = ;
-#}
-
-fakelib stream : chain.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ;
-
-import testing ;
-unit-test io_test : io_test.cc stream /top//boost_unit_test_framework ;
-unit-test stream_test : stream_test.cc stream /top//boost_unit_test_framework ;
-unit-test sort_test : sort_test.cc stream /top//boost_unit_test_framework ;
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/block.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/block.hh
deleted file mode 100644
index aa7e28b..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/block.hh
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef UTIL_STREAM_BLOCK_H
-#define UTIL_STREAM_BLOCK_H
-
-#include <cstddef>
-#include <stdint.h>
-
-namespace util {
-namespace stream {
-
-/**
- * Encapsulates a block of memory.
- */
-class Block {
- public:
-
- /**
- * Constructs an empty block.
- */
- Block() : mem_(NULL), valid_size_(0) {}
-
- /**
- * Constructs a block that encapsulates a segment of memory.
- *
- * @param[in] mem The segment of memory to encapsulate
- * @param[in] size The size of the memory segment in bytes
- */
- Block(void *mem, std::size_t size) : mem_(mem), valid_size_(size) {}
-
- /**
- * Set the number of bytes in this block that should be interpreted as valid.
- *
- * @param[in] to Number of bytes
- */
- void SetValidSize(std::size_t to) { valid_size_ = to; }
-
- /**
- * Gets the number of bytes in this block that should be interpreted as valid.
- * This is important because read might fill in less than Allocated at EOF.
- */
- std::size_t ValidSize() const { return valid_size_; }
-
- /** Gets a void pointer to the memory underlying this block. */
- void *Get() { return mem_; }
-
- /** Gets a const void pointer to the memory underlying this block. */
- const void *Get() const { return mem_; }
-
-
- /**
- * Gets a const void pointer to the end of the valid section of memory
- * encapsulated by this block.
- */
- const void *ValidEnd() const {
- return reinterpret_cast<const uint8_t*>(mem_) + valid_size_;
- }
-
- /**
- * Returns true if this block encapsulates a valid (non-NULL) block of memory.
- *
- * This method is a user-defined implicit conversion function to boolean;
- * among other things, this method enables bare instances of this class
- * to be used as the condition of an if statement.
- */
- operator bool() const { return mem_ != NULL; }
-
- /**
- * Returns true if this block is empty.
- *
- * In other words, if Get()==NULL, this method will return true.
- */
- bool operator!() const { return mem_ == NULL; }
-
- private:
- friend class Link;
-
- /**
- * Points this block's memory at NULL.
- *
- * This class defines poison as a block whose memory pointer is NULL.
- */
- void SetToPoison() {
- mem_ = NULL;
- }
-
- void *mem_;
- std::size_t valid_size_;
-};
-
-} // namespace stream
-} // namespace util
-
-#endif // UTIL_STREAM_BLOCK_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/chain.cc b/src/joshua/decoder/ff/lm/kenlm/util/stream/chain.cc
deleted file mode 100644
index ce29e42..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/chain.cc
+++ /dev/null
@@ -1,165 +0,0 @@
-#include "util/stream/chain.hh"
-
-#include "util/stream/io.hh"
-
-#include "util/exception.hh"
-#include "util/pcqueue.hh"
-
-#include <cstdlib>
-#include <new>
-#include <iostream>
-
-#include <stdint.h>
-#include <stdlib.h>
-
-namespace util {
-namespace stream {
-
-ChainConfigException::ChainConfigException() throw() { *this << "Chain configured with "; }
-ChainConfigException::~ChainConfigException() throw() {}
-
-Thread::~Thread() {
- thread_.join();
-}
-
-void Thread::UnhandledException(const std::exception &e) {
- std::cerr << e.what() << std::endl;
- abort();
-}
-
-void Recycler::Run(const ChainPosition &position) {
- for (Link l(position); l; ++l) {
- l->SetValidSize(position.GetChain().BlockSize());
- }
-}
-
-const Recycler kRecycle = Recycler();
-
-Chain::Chain(const ChainConfig &config) : config_(config), complete_called_(false) {
- UTIL_THROW_IF(!config.entry_size, ChainConfigException, "zero-size entries.");
- UTIL_THROW_IF(!config.block_count, ChainConfigException, "block count zero");
- UTIL_THROW_IF(config.total_memory < config.entry_size * config.block_count, ChainConfigException, config.total_memory << " total memory, too small for " << config.block_count << " blocks of containing entries of size " << config.entry_size);
- // Round down block size to a multiple of entry size.
- block_size_ = config.total_memory / (config.block_count * config.entry_size) * config.entry_size;
-}
-
-Chain::~Chain() {
- Wait();
-}
-
-ChainPosition Chain::Add() {
- if (!Running()) Start();
- PCQueue<Block> &in = queues_.back();
- queues_.push_back(new PCQueue<Block>(config_.block_count));
- return ChainPosition(in, queues_.back(), this, progress_);
-}
-
-Chain &Chain::operator>>(const WriteAndRecycle &writer) {
- threads_.push_back(new Thread(Complete(), writer));
- return *this;
-}
-
-Chain &Chain::operator>>(const PWriteAndRecycle &writer) {
- threads_.push_back(new Thread(Complete(), writer));
- return *this;
-}
-
-void Chain::Wait(bool release_memory) {
- if (queues_.empty()) {
- assert(threads_.empty());
- return; // Nothing to wait for.
- }
- if (!complete_called_) CompleteLoop();
- threads_.clear();
- for (std::size_t i = 0; queues_.front().Consume(); ++i) {
- if (i == config_.block_count) {
- std::cerr << "Chain ending without poison." << std::endl;
- abort();
- }
- }
- queues_.clear();
- progress_.Finished();
- complete_called_ = false;
- if (release_memory) memory_.reset();
-}
-
-void Chain::Start() {
- Wait(false);
- if (!memory_.get()) {
- // Allocate memory.
- assert(threads_.empty());
- assert(queues_.empty());
- std::size_t malloc_size = block_size_ * config_.block_count;
- memory_.reset(MallocOrThrow(malloc_size));
- }
- // This queue can accomodate all blocks.
- queues_.push_back(new PCQueue<Block>(config_.block_count));
- // Populate the lead queue with blocks.
- uint8_t *base = static_cast<uint8_t*>(memory_.get());
- for (std::size_t i = 0; i < config_.block_count; ++i) {
- queues_.front().Produce(Block(base, block_size_));
- base += block_size_;
- }
-}
-
-ChainPosition Chain::Complete() {
- assert(Running());
- UTIL_THROW_IF(complete_called_, util::Exception, "CompleteLoop() called twice");
- complete_called_ = true;
- return ChainPosition(queues_.back(), queues_.front(), this, progress_);
-}
-
-Link::Link() : in_(NULL), out_(NULL), poisoned_(true) {}
-
-void Link::Init(const ChainPosition &position) {
- UTIL_THROW_IF(in_, util::Exception, "Link::Init twice");
- in_ = position.in_;
- out_ = position.out_;
- poisoned_ = false;
- progress_ = position.progress_;
- in_->Consume(current_);
-}
-
-Link::Link(const ChainPosition &position) : in_(NULL) {
- Init(position);
-}
-
-Link::~Link() {
- if (current_) {
- // Probably an exception unwinding.
- std::cerr << "Last input should have been poison." << std::endl;
- // abort();
- } else {
- if (!poisoned_) {
- // Poison is a block whose memory pointer is NULL.
- //
- // Because we're in the else block,
- // we know that the memory pointer of current_ is NULL.
- //
- // Pass the current (poison) block!
- out_->Produce(current_);
- }
- }
-}
-
-Link &Link::operator++() {
- assert(current_);
- progress_ += current_.ValidSize();
- out_->Produce(current_);
- in_->Consume(current_);
- if (!current_) {
- poisoned_ = true;
- out_->Produce(current_);
- }
- return *this;
-}
-
-void Link::Poison() {
- assert(!poisoned_);
- current_.SetToPoison();
- out_->Produce(current_);
- poisoned_ = true;
-}
-
-} // namespace stream
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/chain.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/chain.hh
deleted file mode 100644
index 5086508..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/chain.hh
+++ /dev/null
@@ -1,339 +0,0 @@
-#ifndef UTIL_STREAM_CHAIN_H
-#define UTIL_STREAM_CHAIN_H
-
-#include "util/stream/block.hh"
-#include "util/stream/config.hh"
-#include "util/stream/multi_progress.hh"
-#include "util/scoped.hh"
-
-#include <boost/ptr_container/ptr_vector.hpp>
-#include <boost/thread/thread.hpp>
-
-#include <cstddef>
-
-#include <assert.h>
-
-namespace util {
-template <class T> class PCQueue;
-namespace stream {
-
-class ChainConfigException : public Exception {
- public:
- ChainConfigException() throw();
- ~ChainConfigException() throw();
-};
-
-class Chain;
-
-/**
- * Encapsulates a @ref PCQueue "producer queue" and a @ref PCQueue "consumer queue" within a @ref Chain "chain".
- *
- * Specifies position in chain for Link constructor.
- */
-class ChainPosition {
- public:
- const Chain &GetChain() const { return *chain_; }
- private:
- friend class Chain;
- friend class Link;
- ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress)
- : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
-
- PCQueue<Block> *in_, *out_;
-
- Chain *chain_;
-
- WorkerProgress progress_;
-};
-
-
-/**
- * Encapsulates a worker thread processing data at a given position in the chain.
- *
- * Each instance of this class owns one boost thread in which the worker is Run().
- */
-class Thread {
- public:
-
- /**
- * Constructs a new Thread in which the provided Worker is Run().
- *
- * Position is usually ChainPosition but if there are multiple streams involved, this can be ChainPositions.
- *
- * After a call to this constructor, the provided worker will be running within a boost thread owned by the newly constructed Thread object.
- */
- template <class Position, class Worker> Thread(const Position &position, const Worker &worker)
- : thread_(boost::ref(*this), position, worker) {}
-
- ~Thread();
-
- /**
- * Launches the provided worker in this object's boost thread.
- *
- * This method is called automatically by this class's @ref Thread() "constructor".
- */
- template <class Position, class Worker> void operator()(const Position &position, Worker &worker) {
- try {
- worker.Run(position);
- } catch (const std::exception &e) {
- UnhandledException(e);
- }
- }
-
- private:
- void UnhandledException(const std::exception &e);
-
- boost::thread thread_;
-};
-
-/**
- * This resets blocks to full valid size. Used to close the loop in Chain by recycling blocks.
- */
-class Recycler {
- public:
- /**
- * Resets the blocks in the chain such that the blocks' respective valid sizes match the chain's block size.
- *
- * @see Block::SetValidSize()
- * @see Chain::BlockSize()
- */
- void Run(const ChainPosition &position);
-};
-
-extern const Recycler kRecycle;
-class WriteAndRecycle;
-class PWriteAndRecycle;
-
-/**
- * Represents a sequence of workers, through which @ref Block "blocks" can pass.
- */
-class Chain {
- private:
- template <class T, void (T::*ptr)(const ChainPosition &) = &T::Run> struct CheckForRun {
- typedef Chain type;
- };
-
- public:
-
- /**
- * Constructs a configured Chain.
- *
- * @param config Specifies how to configure the Chain.
- */
- explicit Chain(const ChainConfig &config);
-
- /**
- * Destructs a Chain.
- *
- * This method waits for the chain's threads to complete,
- * and frees the memory held by this chain.
- */
- ~Chain();
-
- void ActivateProgress() {
- assert(!Running());
- progress_.Activate();
- }
-
- void SetProgressTarget(uint64_t target) {
- progress_.SetTarget(target);
- }
-
- /**
- * Gets the number of bytes in each record of a Block.
- *
- * @see ChainConfig::entry_size
- */
- std::size_t EntrySize() const {
- return config_.entry_size;
- }
-
- /**
- * Gets the inital @ref Block::ValidSize "valid size" for @ref Block "blocks" in this chain.
- *
- * @see Block::ValidSize
- */
- std::size_t BlockSize() const {
- return block_size_;
- }
-
- /** Two ways to add to the chain: Add() or operator>>. */
- ChainPosition Add();
-
- /**
- * Adds a new worker to this chain,
- * and runs that worker in a new Thread owned by this chain.
- *
- * The worker must have a Run method that accepts a position argument.
- *
- * @see Thread::operator()()
- */
- template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
- assert(!complete_called_);
- threads_.push_back(new Thread(Add(), worker));
- return *this;
- }
-
- /**
- * Adds a new worker to this chain (but avoids copying that worker),
- * and runs that worker in a new Thread owned by this chain.
- *
- * The worker must have a Run method that accepts a position argument.
- *
- * @see Thread::operator()()
- */
- template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
- assert(!complete_called_);
- threads_.push_back(new Thread(Add(), worker));
- return *this;
- }
-
- // Note that Link and Stream also define operator>> outside this class.
-
- // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.
- void CompleteLoop() {
- threads_.push_back(new Thread(Complete(), kRecycle));
- }
-
- /**
- * Adds a Recycler worker to this chain,
- * and runs that worker in a new Thread owned by this chain.
- */
- Chain &operator>>(const Recycler &) {
- CompleteLoop();
- return *this;
- }
-
- /**
- * Adds a WriteAndRecycle worker to this chain,
- * and runs that worker in a new Thread owned by this chain.
- */
- Chain &operator>>(const WriteAndRecycle &writer);
- Chain &operator>>(const PWriteAndRecycle &writer);
-
- // Chains are reusable. Call Wait to wait for everything to finish and free memory.
- void Wait(bool release_memory = true);
-
- // Waits for the current chain to complete (if any) then starts again.
- void Start();
-
- bool Running() const { return !queues_.empty(); }
-
- private:
- ChainPosition Complete();
-
- ChainConfig config_;
-
- std::size_t block_size_;
-
- scoped_malloc memory_;
-
- boost::ptr_vector<PCQueue<Block> > queues_;
-
- bool complete_called_;
-
- boost::ptr_vector<Thread> threads_;
-
- MultiProgress progress_;
-};
-
-// Create the link in the worker thread using the position token.
-/**
- * Represents a C++ style iterator over @ref Block "blocks".
- */
-class Link {
- public:
-
- // Either default construct and Init or just construct all at once.
-
- /**
- * Constructs an @ref Init "initialized" link.
- *
- * @see Init
- */
- explicit Link(const ChainPosition &position);
-
- /**
- * Constructs a link that must subsequently be @ref Init "initialized".
- *
- * @see Init
- */
- Link();
-
- /**
- * Initializes the link with the input @ref PCQueue "consumer queue" and output @ref PCQueue "producer queue" at a given @ref ChainPosition "position" in the @ref Chain "chain".
- *
- * @see Link()
- */
- void Init(const ChainPosition &position);
-
- /**
- * Destructs the link object.
- *
- * If necessary, this method will pass a poison block
- * to this link's output @ref PCQueue "producer queue".
- *
- * @see Block::SetToPoison()
- */
- ~Link();
-
- /**
- * Gets a reference to the @ref Block "block" at this link.
- */
- Block &operator*() { return current_; }
-
- /**
- * Gets a const reference to the @ref Block "block" at this link.
- */
- const Block &operator*() const { return current_; }
-
- /**
- * Gets a pointer to the @ref Block "block" at this link.
- */
- Block *operator->() { return ¤t_; }
-
- /**
- * Gets a const pointer to the @ref Block "block" at this link.
- */
- const Block *operator->() const { return ¤t_; }
-
- /**
- * Gets the link at the next @ref ChainPosition "position" in the @ref Chain "chain".
- */
- Link &operator++();
-
- /**
- * Returns true if the @ref Block "block" at this link encapsulates a valid (non-NULL) block of memory.
- *
- * This method is a user-defined implicit conversion function to boolean;
- * among other things, this method enables bare instances of this class
- * to be used as the condition of an if statement.
- */
- operator bool() const { return current_; }
-
- /**
- * @ref Block::SetToPoison() "Poisons" the @ref Block "block" at this link,
- * and passes this now-poisoned block to this link's output @ref PCQueue "producer queue".
- *
- * @see Block::SetToPoison()
- */
- void Poison();
-
- private:
- Block current_;
- PCQueue<Block> *in_, *out_;
-
- bool poisoned_;
-
- WorkerProgress progress_;
-};
-
-inline Chain &operator>>(Chain &chain, Link &link) {
- link.Init(chain.Add());
- return chain;
-}
-
-} // namespace stream
-} // namespace util
-
-#endif // UTIL_STREAM_CHAIN_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/config.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/config.hh
deleted file mode 100644
index 6bad36b..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/config.hh
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef UTIL_STREAM_CONFIG_H
-#define UTIL_STREAM_CONFIG_H
-
-#include <cstddef>
-#include <string>
-
-namespace util { namespace stream {
-
-/**
- * Represents how a chain should be configured.
- */
-struct ChainConfig {
-
- /** Constructs an configuration with underspecified (or default) parameters. */
- ChainConfig() {}
-
- /**
- * Constructs a chain configuration object.
- *
- * @param [in] in_entry_size Number of bytes in each record.
- * @param [in] in_block_count Number of blocks in the chain.
- * @param [in] in_total_memory Total number of bytes available to the chain.
- * This value will be divided amongst the blocks in the chain.
- */
- ChainConfig(std::size_t in_entry_size, std::size_t in_block_count, std::size_t in_total_memory)
- : entry_size(in_entry_size), block_count(in_block_count), total_memory(in_total_memory) {}
-
- /**
- * Number of bytes in each record.
- */
- std::size_t entry_size;
-
- /**
- * Number of blocks in the chain.
- */
- std::size_t block_count;
-
- /**
- * Total number of bytes available to the chain.
- * This value will be divided amongst the blocks in the chain.
- * Chain's constructor will make this a multiple of entry_size.
- */
- std::size_t total_memory;
-};
-
-
-/**
- * Represents how a sorter should be configured.
- */
-struct SortConfig {
-
- /** Filename prefix where temporary files should be placed. */
- std::string temp_prefix;
-
- /** Size of each input/output buffer. */
- std::size_t buffer_size;
-
- /** Total memory to use when running alone. */
- std::size_t total_memory;
-};
-
-}} // namespaces
-#endif // UTIL_STREAM_CONFIG_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/io.cc b/src/joshua/decoder/ff/lm/kenlm/util/stream/io.cc
deleted file mode 100644
index fa8467a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/io.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-#include "util/stream/io.hh"
-
-#include "util/file.hh"
-#include "util/stream/chain.hh"
-
-#include <cstddef>
-
-namespace util {
-namespace stream {
-
-ReadSizeException::ReadSizeException() throw() {}
-ReadSizeException::~ReadSizeException() throw() {}
-
-void Read::Run(const ChainPosition &position) {
- const std::size_t block_size = position.GetChain().BlockSize();
- const std::size_t entry_size = position.GetChain().EntrySize();
- for (Link link(position); link; ++link) {
- std::size_t got = util::ReadOrEOF(file_, link->Get(), block_size);
- UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << ".");
- if (got == 0) {
- link.Poison();
- return;
- } else {
- link->SetValidSize(got);
- }
- }
-}
-
-void PRead::Run(const ChainPosition &position) {
- scoped_fd owner;
- if (own_) owner.reset(file_);
- const uint64_t size = SizeOrThrow(file_);
- UTIL_THROW_IF(size % static_cast<uint64_t>(position.GetChain().EntrySize()), ReadSizeException, "File size " << file_ << " size is " << size << " not a multiple of " << position.GetChain().EntrySize());
- const std::size_t block_size = position.GetChain().BlockSize();
- const uint64_t block_size64 = static_cast<uint64_t>(block_size);
- Link link(position);
- uint64_t offset = 0;
- for (; offset + block_size64 < size; offset += block_size64, ++link) {
- ErsatzPRead(file_, link->Get(), block_size, offset);
- link->SetValidSize(block_size);
- }
- // size - offset is <= block_size, so it casts to 32-bit fine.
- if (size - offset) {
- ErsatzPRead(file_, link->Get(), size - offset, offset);
- link->SetValidSize(size - offset);
- ++link;
- }
- link.Poison();
-}
-
-void Write::Run(const ChainPosition &position) {
- for (Link link(position); link; ++link) {
- WriteOrThrow(file_, link->Get(), link->ValidSize());
- }
-}
-
-void WriteAndRecycle::Run(const ChainPosition &position) {
- const std::size_t block_size = position.GetChain().BlockSize();
- for (Link link(position); link; ++link) {
- WriteOrThrow(file_, link->Get(), link->ValidSize());
- link->SetValidSize(block_size);
- }
-}
-
-void PWriteAndRecycle::Run(const ChainPosition &position) {
- const std::size_t block_size = position.GetChain().BlockSize();
- uint64_t offset = 0;
- for (Link link(position); link; ++link) {
- ErsatzPWrite(file_, link->Get(), link->ValidSize(), offset);
- offset += link->ValidSize();
- link->SetValidSize(block_size);
- }
- // Trim file to size.
- util::ResizeOrThrow(file_, offset);
-}
-
-} // namespace stream
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/io.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/io.hh
deleted file mode 100644
index 8dae2cb..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/io.hh
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef UTIL_STREAM_IO_H
-#define UTIL_STREAM_IO_H
-
-#include "util/exception.hh"
-#include "util/file.hh"
-
-namespace util {
-namespace stream {
-
-class ChainPosition;
-
-class ReadSizeException : public util::Exception {
- public:
- ReadSizeException() throw();
- ~ReadSizeException() throw();
-};
-
-class Read {
- public:
- explicit Read(int fd) : file_(fd) {}
- void Run(const ChainPosition &position);
- private:
- int file_;
-};
-
-// Like read but uses pread so that the file can be accessed from multiple threads.
-class PRead {
- public:
- explicit PRead(int fd, bool take_own = false) : file_(fd), own_(take_own) {}
- void Run(const ChainPosition &position);
- private:
- int file_;
- bool own_;
-};
-
-class Write {
- public:
- explicit Write(int fd) : file_(fd) {}
- void Run(const ChainPosition &position);
- private:
- int file_;
-};
-
-// It's a common case that stuff is written and then recycled. So rather than
-// spawn another thread to Recycle, this combines the two roles.
-class WriteAndRecycle {
- public:
- explicit WriteAndRecycle(int fd) : file_(fd) {}
- void Run(const ChainPosition &position);
- private:
- int file_;
-};
-
-class PWriteAndRecycle {
- public:
- explicit PWriteAndRecycle(int fd) : file_(fd) {}
- void Run(const ChainPosition &position);
- private:
- int file_;
-};
-
-
-// Reuse the same file over and over again to buffer output.
-class FileBuffer {
- public:
- explicit FileBuffer(int fd) : file_(fd) {}
-
- PWriteAndRecycle Sink() const {
- util::SeekOrThrow(file_.get(), 0);
- return PWriteAndRecycle(file_.get());
- }
-
- PRead Source() const {
- return PRead(file_.get());
- }
-
- uint64_t Size() const {
- return SizeOrThrow(file_.get());
- }
-
- private:
- scoped_fd file_;
-};
-
-} // namespace stream
-} // namespace util
-#endif // UTIL_STREAM_IO_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/line_input.cc b/src/joshua/decoder/ff/lm/kenlm/util/stream/line_input.cc
deleted file mode 100644
index dafa502..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/line_input.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "util/stream/line_input.hh"
-
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/read_compressed.hh"
-#include "util/stream/chain.hh"
-
-#include <algorithm>
-#include <vector>
-
-namespace util { namespace stream {
-
-void LineInput::Run(const ChainPosition &position) {
- ReadCompressed reader(fd_);
- // Holding area for beginning of line to be placed in next block.
- std::vector<char> carry;
-
- for (Link block(position); ; ++block) {
- char *to = static_cast<char*>(block->Get());
- char *begin = to;
- char *end = to + position.GetChain().BlockSize();
- std::copy(carry.begin(), carry.end(), to);
- to += carry.size();
- while (to != end) {
- std::size_t got = reader.Read(to, end - to);
- if (!got) {
- // EOF
- block->SetValidSize(to - begin);
- ++block;
- block.Poison();
- return;
- }
- to += got;
- }
-
- // Find the last newline.
- char *newline;
- for (newline = to - 1; ; --newline) {
- UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ". Is this a text file?");
- if (*newline == '\n') break;
- }
-
- // Copy everything after the last newline to the carry.
- carry.clear();
- carry.resize(to - (newline + 1));
- std::copy(newline + 1, to, &*carry.begin());
-
- block->SetValidSize(newline + 1 - begin);
- }
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_progress.cc b/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_progress.cc
deleted file mode 100644
index 8ba1038..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_progress.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-#include "util/stream/multi_progress.hh"
-
-// TODO: merge some functionality with the simple progress bar?
-#include "util/ersatz_progress.hh"
-
-#include <iostream>
-#include <limits>
-
-#include <string.h>
-
-#if !defined(_WIN32) && !defined(_WIN64)
-#include <unistd.h>
-#endif
-
-namespace util { namespace stream {
-
-namespace {
-const char kDisplayCharacters[] = "-+*#0123456789";
-
-uint64_t Next(unsigned char stone, uint64_t complete) {
- return (static_cast<uint64_t>(stone + 1) * complete + MultiProgress::kWidth - 1) / MultiProgress::kWidth;
-}
-
-} // namespace
-
-MultiProgress::MultiProgress() : active_(false), complete_(std::numeric_limits<uint64_t>::max()), character_handout_(0) {}
-
-MultiProgress::~MultiProgress() {
- if (active_ && complete_ != std::numeric_limits<uint64_t>::max())
- std::cerr << '\n';
-}
-
-void MultiProgress::Activate() {
- active_ =
-#if !defined(_WIN32) && !defined(_WIN64)
- // Is stderr a terminal?
- (isatty(2) == 1)
-#else
- true
-#endif
- ;
-}
-
-void MultiProgress::SetTarget(uint64_t complete) {
- if (!active_) return;
- complete_ = complete;
- if (!complete) complete_ = 1;
- memset(display_, 0, sizeof(display_));
- character_handout_ = 0;
- std::cerr << kProgressBanner;
-}
-
-WorkerProgress MultiProgress::Add() {
- if (!active_)
- return WorkerProgress(std::numeric_limits<uint64_t>::max(), *this, '\0');
- std::size_t character_index;
- {
- boost::unique_lock<boost::mutex> lock(mutex_);
- character_index = character_handout_++;
- if (character_handout_ == sizeof(kDisplayCharacters) - 1)
- character_handout_ = 0;
- }
- return WorkerProgress(Next(0, complete_), *this, kDisplayCharacters[character_index]);
-}
-
-void MultiProgress::Finished() {
- if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
- std::cerr << '\n';
- complete_ = std::numeric_limits<uint64_t>::max();
-}
-
-void MultiProgress::Milestone(WorkerProgress &worker) {
- if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
- unsigned char stone = std::min(static_cast<uint64_t>(kWidth), worker.current_ * kWidth / complete_);
- for (char *i = &display_[worker.stone_]; i < &display_[stone]; ++i) {
- *i = worker.character_;
- }
- worker.next_ = Next(stone, complete_);
- worker.stone_ = stone;
- {
- boost::unique_lock<boost::mutex> lock(mutex_);
- std::cerr << '\r' << display_ << std::flush;
- }
-}
-
-}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_progress.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_progress.hh
deleted file mode 100644
index 82e698a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_progress.hh
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Progress bar suitable for chains of workers */
-#ifndef UTIL_STREAM_MULTI_PROGRESS_H
-#define UTIL_STREAM_MULTI_PROGRESS_H
-
-#include <boost/thread/mutex.hpp>
-
-#include <cstddef>
-
-#include <stdint.h>
-
-namespace util { namespace stream {
-
-class WorkerProgress;
-
-class MultiProgress {
- public:
- static const unsigned char kWidth = 100;
-
- MultiProgress();
-
- ~MultiProgress();
-
- // Turns on showing (requires SetTarget too).
- void Activate();
-
- void SetTarget(uint64_t complete);
-
- WorkerProgress Add();
-
- void Finished();
-
- private:
- friend class WorkerProgress;
- void Milestone(WorkerProgress &worker);
-
- bool active_;
-
- uint64_t complete_;
-
- boost::mutex mutex_;
-
- // \0 at the end.
- char display_[kWidth + 1];
-
- std::size_t character_handout_;
-
- MultiProgress(const MultiProgress &);
- MultiProgress &operator=(const MultiProgress &);
-};
-
-class WorkerProgress {
- public:
- // Default contrutor must be initialized with operator= later.
- WorkerProgress() : parent_(NULL) {}
-
- // Not threadsafe for the same worker by default.
- WorkerProgress &operator++() {
- if (++current_ >= next_) {
- parent_->Milestone(*this);
- }
- return *this;
- }
-
- WorkerProgress &operator+=(uint64_t amount) {
- current_ += amount;
- if (current_ >= next_) {
- parent_->Milestone(*this);
- }
- return *this;
- }
-
- private:
- friend class MultiProgress;
- WorkerProgress(uint64_t next, MultiProgress &parent, char character)
- : current_(0), next_(next), parent_(&parent), stone_(0), character_(character) {}
-
- uint64_t current_, next_;
-
- MultiProgress *parent_;
-
- // Previous milestone reached.
- unsigned char stone_;
-
- // Character to display in bar.
- char character_;
-};
-
-}} // namespaces
-
-#endif // UTIL_STREAM_MULTI_PROGRESS_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_stream.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_stream.hh
deleted file mode 100644
index 0ee7fab..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/multi_stream.hh
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef UTIL_STREAM_MULTI_STREAM_H
-#define UTIL_STREAM_MULTI_STREAM_H
-
-#include "util/fixed_array.hh"
-#include "util/scoped.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/stream.hh"
-
-#include <cstddef>
-#include <new>
-
-#include <assert.h>
-#include <stdlib.h>
-
-namespace util { namespace stream {
-
-class Chains;
-
-class ChainPositions : public util::FixedArray<util::stream::ChainPosition> {
- public:
- ChainPositions() {}
-
- void Init(Chains &chains);
-
- explicit ChainPositions(Chains &chains) {
- Init(chains);
- }
-};
-
-class Chains : public util::FixedArray<util::stream::Chain> {
- private:
- template <class T, void (T::*ptr)(const ChainPositions &) = &T::Run> struct CheckForRun {
- typedef Chains type;
- };
-
- public:
- // Must call Init.
- Chains() {}
-
- explicit Chains(std::size_t limit) : util::FixedArray<util::stream::Chain>(limit) {}
-
- template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
- threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
- return *this;
- }
-
- template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
- threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
- return *this;
- }
-
- Chains &operator>>(const util::stream::Recycler &recycler) {
- for (util::stream::Chain *i = begin(); i != end(); ++i)
- *i >> recycler;
- return *this;
- }
-
- void Wait(bool release_memory = true) {
- threads_.clear();
- for (util::stream::Chain *i = begin(); i != end(); ++i) {
- i->Wait(release_memory);
- }
- }
-
- private:
- boost::ptr_vector<util::stream::Thread> threads_;
-
- Chains(const Chains &);
- void operator=(const Chains &);
-};
-
-inline void ChainPositions::Init(Chains &chains) {
- util::FixedArray<util::stream::ChainPosition>::Init(chains.size());
- for (util::stream::Chain *i = chains.begin(); i != chains.end(); ++i) {
- // use "placement new" syntax to initalize ChainPosition in an already-allocated memory location
- new (end()) util::stream::ChainPosition(i->Add()); Constructed();
- }
-}
-
-inline Chains &operator>>(Chains &chains, ChainPositions &positions) {
- positions.Init(chains);
- return chains;
-}
-
-template <class T> class GenericStreams : public util::FixedArray<T> {
- private:
- typedef util::FixedArray<T> P;
- public:
- GenericStreams() {}
-
- // This puts a dummy T at the beginning (useful to algorithms that need to reference something at the beginning).
- void InitWithDummy(const ChainPositions &positions) {
- P::Init(positions.size() + 1);
- new (P::end()) T(); // use "placement new" syntax to initalize T in an already-allocated memory location
- P::Constructed();
- for (const util::stream::ChainPosition *i = positions.begin(); i != positions.end(); ++i) {
- P::push_back(*i);
- }
- }
-
- // Limit restricts to positions[0,limit)
- void Init(const ChainPositions &positions, std::size_t limit) {
- P::Init(limit);
- for (const util::stream::ChainPosition *i = positions.begin(); i != positions.begin() + limit; ++i) {
- P::push_back(*i);
- }
- }
- void Init(const ChainPositions &positions) {
- Init(positions, positions.size());
- }
-
- GenericStreams(const ChainPositions &positions) {
- Init(positions);
- }
-};
-
-template <class T> inline Chains &operator>>(Chains &chains, GenericStreams<T> &streams) {
- ChainPositions positions;
- chains >> positions;
- streams.Init(positions);
- return chains;
-}
-
-typedef GenericStreams<Stream> Streams;
-
-}} // namespaces
-#endif // UTIL_STREAM_MULTI_STREAM_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/sort.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/sort.hh
deleted file mode 100644
index 9082cfd..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/sort.hh
+++ /dev/null
@@ -1,550 +0,0 @@
-/* Usage:
- * Sort<Compare> sorter(temp, compare);
- * Chain(config) >> Read(file) >> sorter.Unsorted();
- * Stream stream;
- * Chain chain(config) >> sorter.Sorted(internal_config, lazy_config) >> stream;
- *
- * Note that sorter must outlive any threads that use Unsorted or Sorted.
- *
- * Combiners take the form:
- * bool operator()(void *into, const void *option, const Compare &compare) const
- * which returns true iff a combination happened. The sorting algorithm
- * guarantees compare(into, option). But it does not guarantee
- * compare(option, into).
- * Currently, combining is only done in merge steps, not during on-the-fly
- * sort. Use a hash table for that.
- */
-
-#ifndef UTIL_STREAM_SORT_H
-#define UTIL_STREAM_SORT_H
-
-#include "util/stream/chain.hh"
-#include "util/stream/config.hh"
-#include "util/stream/io.hh"
-#include "util/stream/stream.hh"
-#include "util/stream/timer.hh"
-
-#include "util/file.hh"
-#include "util/scoped.hh"
-#include "util/sized_iterator.hh"
-
-#include <algorithm>
-#include <iostream>
-#include <queue>
-#include <string>
-
-namespace util {
-namespace stream {
-
-struct NeverCombine {
- template <class Compare> bool operator()(const void *, const void *, const Compare &) const {
- return false;
- }
-};
-
-// Manage the offsets of sorted blocks in a file.
-class Offsets {
- public:
- explicit Offsets(int fd) : log_(fd) {
- Reset();
- }
-
- int File() const { return log_; }
-
- void Append(uint64_t length) {
- if (!length) return;
- ++block_count_;
- if (length == cur_.length) {
- ++cur_.run;
- return;
- }
- WriteOrThrow(log_, &cur_, sizeof(Entry));
- cur_.length = length;
- cur_.run = 1;
- }
-
- void FinishedAppending() {
- WriteOrThrow(log_, &cur_, sizeof(Entry));
- SeekOrThrow(log_, sizeof(Entry)); // Skip 0,0 at beginning.
- cur_.run = 0;
- if (block_count_) {
- ReadOrThrow(log_, &cur_, sizeof(Entry));
- assert(cur_.length);
- assert(cur_.run);
- }
- }
-
- uint64_t RemainingBlocks() const { return block_count_; }
-
- uint64_t TotalOffset() const { return output_sum_; }
-
- uint64_t PeekSize() const {
- return cur_.length;
- }
-
- uint64_t NextSize() {
- assert(block_count_);
- uint64_t ret = cur_.length;
- output_sum_ += ret;
-
- --cur_.run;
- --block_count_;
- if (!cur_.run && block_count_) {
- ReadOrThrow(log_, &cur_, sizeof(Entry));
- assert(cur_.length);
- assert(cur_.run);
- }
- return ret;
- }
-
- void Reset() {
- SeekOrThrow(log_, 0);
- ResizeOrThrow(log_, 0);
- cur_.length = 0;
- cur_.run = 0;
- block_count_ = 0;
- output_sum_ = 0;
- }
-
- private:
- int log_;
-
- struct Entry {
- uint64_t length;
- uint64_t run;
- };
- Entry cur_;
-
- uint64_t block_count_;
-
- uint64_t output_sum_;
-};
-
-// A priority queue of entries backed by file buffers
-template <class Compare> class MergeQueue {
- public:
- MergeQueue(int fd, std::size_t buffer_size, std::size_t entry_size, const Compare &compare)
- : queue_(Greater(compare)), in_(fd), buffer_size_(buffer_size), entry_size_(entry_size) {}
-
- void Push(void *base, uint64_t offset, uint64_t amount) {
- queue_.push(Entry(base, in_, offset, amount, buffer_size_));
- }
-
- const void *Top() const {
- return queue_.top().Current();
- }
-
- void Pop() {
- Entry top(queue_.top());
- queue_.pop();
- if (top.Increment(in_, buffer_size_, entry_size_))
- queue_.push(top);
- }
-
- std::size_t Size() const {
- return queue_.size();
- }
-
- bool Empty() const {
- return queue_.empty();
- }
-
- private:
- // Priority queue contains these entries.
- class Entry {
- public:
- Entry() {}
-
- Entry(void *base, int fd, uint64_t offset, uint64_t amount, std::size_t buf_size) {
- offset_ = offset;
- remaining_ = amount;
- buffer_end_ = static_cast<uint8_t*>(base) + buf_size;
- Read(fd, buf_size);
- }
-
- bool Increment(int fd, std::size_t buf_size, std::size_t entry_size) {
- current_ += entry_size;
- if (current_ != buffer_end_) return true;
- return Read(fd, buf_size);
- }
-
- const void *Current() const { return current_; }
-
- private:
- bool Read(int fd, std::size_t buf_size) {
- current_ = buffer_end_ - buf_size;
- std::size_t amount;
- if (static_cast<uint64_t>(buf_size) < remaining_) {
- amount = buf_size;
- } else if (!remaining_) {
- return false;
- } else {
- amount = remaining_;
- buffer_end_ = current_ + remaining_;
- }
- ErsatzPRead(fd, current_, amount, offset_);
- offset_ += amount;
- assert(current_ <= buffer_end_);
- remaining_ -= amount;
- return true;
- }
-
- // Buffer
- uint8_t *current_, *buffer_end_;
- // File
- uint64_t remaining_, offset_;
- };
-
- // Wrapper comparison function for queue entries.
- class Greater : public std::binary_function<const Entry &, const Entry &, bool> {
- public:
- explicit Greater(const Compare &compare) : compare_(compare) {}
-
- bool operator()(const Entry &first, const Entry &second) const {
- return compare_(second.Current(), first.Current());
- }
-
- private:
- const Compare compare_;
- };
-
- typedef std::priority_queue<Entry, std::vector<Entry>, Greater> Queue;
- Queue queue_;
-
- const int in_;
- const std::size_t buffer_size_;
- const std::size_t entry_size_;
-};
-
-/* A worker object that merges. If the number of pieces to merge exceeds the
- * arity, it outputs multiple sorted blocks, recording to out_offsets.
- * However, users will only every see a single sorted block out output because
- * Sort::Sorted insures the arity is higher than the number of pieces before
- * returning this.
- */
-template <class Compare, class Combine> class MergingReader {
- public:
- MergingReader(int in, Offsets *in_offsets, Offsets *out_offsets, std::size_t buffer_size, std::size_t total_memory, const Compare &compare, const Combine &combine) :
- compare_(compare), combine_(combine),
- in_(in),
- in_offsets_(in_offsets), out_offsets_(out_offsets),
- buffer_size_(buffer_size), total_memory_(total_memory) {}
-
- void Run(const ChainPosition &position) {
- Run(position, false);
- }
-
- void Run(const ChainPosition &position, bool assert_one) {
- // Special case: nothing to read.
- if (!in_offsets_->RemainingBlocks()) {
- Link l(position);
- l.Poison();
- return;
- }
- // If there's just one entry, just read.
- if (in_offsets_->RemainingBlocks() == 1) {
- // Sequencing is important.
- uint64_t offset = in_offsets_->TotalOffset();
- uint64_t amount = in_offsets_->NextSize();
- ReadSingle(offset, amount, position);
- if (out_offsets_) out_offsets_->Append(amount);
- return;
- }
-
- Stream str(position);
- scoped_malloc buffer(MallocOrThrow(total_memory_));
- uint8_t *const buffer_end = static_cast<uint8_t*>(buffer.get()) + total_memory_;
-
- const std::size_t entry_size = position.GetChain().EntrySize();
-
- while (in_offsets_->RemainingBlocks()) {
- // Use bigger buffers if there's less remaining.
- uint64_t per_buffer = static_cast<uint64_t>(std::max<std::size_t>(
- buffer_size_,
- static_cast<std::size_t>((static_cast<uint64_t>(total_memory_) / in_offsets_->RemainingBlocks()))));
- per_buffer -= per_buffer % entry_size;
- assert(per_buffer);
-
- // Populate queue.
- MergeQueue<Compare> queue(in_, per_buffer, entry_size, compare_);
- for (uint8_t *buf = static_cast<uint8_t*>(buffer.get());
- in_offsets_->RemainingBlocks() && (buf + std::min(per_buffer, in_offsets_->PeekSize()) <= buffer_end);) {
- uint64_t offset = in_offsets_->TotalOffset();
- uint64_t size = in_offsets_->NextSize();
- queue.Push(buf, offset, size);
- buf += static_cast<std::size_t>(std::min<uint64_t>(size, per_buffer));
- }
- // This shouldn't happen but it's probably better to die than loop indefinitely.
- if (queue.Size() < 2 && in_offsets_->RemainingBlocks()) {
- std::cerr << "Bug in sort implementation: not merging at least two stripes." << std::endl;
- abort();
- }
- if (assert_one && in_offsets_->RemainingBlocks()) {
- std::cerr << "Bug in sort implementation: should only be one merge group for lazy sort" << std::endl;
- abort();
- }
-
- uint64_t written = 0;
- // Merge including combiner support.
- memcpy(str.Get(), queue.Top(), entry_size);
- for (queue.Pop(); !queue.Empty(); queue.Pop()) {
- if (!combine_(str.Get(), queue.Top(), compare_)) {
- ++written; ++str;
- memcpy(str.Get(), queue.Top(), entry_size);
- }
- }
- ++written; ++str;
- if (out_offsets_)
- out_offsets_->Append(written * entry_size);
- }
- str.Poison();
- }
-
- private:
- void ReadSingle(uint64_t offset, const uint64_t size, const ChainPosition &position) {
- // Special case: only one to read.
- const uint64_t end = offset + size;
- const uint64_t block_size = position.GetChain().BlockSize();
- Link l(position);
- for (; offset + block_size < end; ++l, offset += block_size) {
- ErsatzPRead(in_, l->Get(), block_size, offset);
- l->SetValidSize(block_size);
- }
- ErsatzPRead(in_, l->Get(), end - offset, offset);
- l->SetValidSize(end - offset);
- (++l).Poison();
- return;
- }
-
- Compare compare_;
- Combine combine_;
-
- int in_;
-
- protected:
- Offsets *in_offsets_;
-
- private:
- Offsets *out_offsets_;
-
- std::size_t buffer_size_;
- std::size_t total_memory_;
-};
-
-// The lazy step owns the remaining files. This keeps track of them.
-template <class Compare, class Combine> class OwningMergingReader : public MergingReader<Compare, Combine> {
- private:
- typedef MergingReader<Compare, Combine> P;
- public:
- OwningMergingReader(int data, const Offsets &offsets, std::size_t buffer, std::size_t lazy, const Compare &compare, const Combine &combine)
- : P(data, NULL, NULL, buffer, lazy, compare, combine),
- data_(data),
- offsets_(offsets) {}
-
- void Run(const ChainPosition &position) {
- P::in_offsets_ = &offsets_;
- scoped_fd data(data_);
- scoped_fd offsets_file(offsets_.File());
- P::Run(position, true);
- }
-
- private:
- int data_;
- Offsets offsets_;
-};
-
-// Don't use this directly. Worker that sorts blocks.
-template <class Compare> class BlockSorter {
- public:
- BlockSorter(Offsets &offsets, const Compare &compare) :
- offsets_(&offsets), compare_(compare) {}
-
- void Run(const ChainPosition &position) {
- const std::size_t entry_size = position.GetChain().EntrySize();
- for (Link link(position); link; ++link) {
- // Record the size of each block in a separate file.
- offsets_->Append(link->ValidSize());
- void *end = static_cast<uint8_t*>(link->Get()) + link->ValidSize();
-#if defined(_WIN32) || defined(_WIN64)
- std::stable_sort
-#else
- std::sort
-#endif
- (SizedIt(link->Get(), entry_size),
- SizedIt(end, entry_size),
- compare_);
- }
- offsets_->FinishedAppending();
- }
-
- private:
- Offsets *offsets_;
- SizedCompare<Compare> compare_;
-};
-
-class BadSortConfig : public Exception {
- public:
- BadSortConfig() throw() {}
- ~BadSortConfig() throw() {}
-};
-
-/** Sort */
-template <class Compare, class Combine = NeverCombine> class Sort {
- public:
- /** Constructs an object capable of sorting */
- Sort(Chain &in, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine())
- : config_(config),
- data_(MakeTemp(config.temp_prefix)),
- offsets_file_(MakeTemp(config.temp_prefix)), offsets_(offsets_file_.get()),
- compare_(compare), combine_(combine),
- entry_size_(in.EntrySize()) {
- UTIL_THROW_IF(!entry_size_, BadSortConfig, "Sorting entries of size 0");
- // Make buffer_size a multiple of the entry_size.
- config_.buffer_size -= config_.buffer_size % entry_size_;
- UTIL_THROW_IF(!config_.buffer_size, BadSortConfig, "Sort buffer too small");
- UTIL_THROW_IF(config_.total_memory < config_.buffer_size * 4, BadSortConfig, "Sorting memory " << config_.total_memory << " is too small for four buffers (two read and two write).");
- in >> BlockSorter<Compare>(offsets_, compare_) >> WriteAndRecycle(data_.get());
- }
-
- uint64_t Size() const {
- return SizeOrThrow(data_.get());
- }
-
- // Do merge sort, terminating when lazy merge could be done with the
- // specified memory. Return the minimum memory necessary to do lazy merge.
- std::size_t Merge(std::size_t lazy_memory) {
- if (offsets_.RemainingBlocks() <= 1) return 0;
- const uint64_t lazy_arity = std::max<uint64_t>(1, lazy_memory / config_.buffer_size);
- uint64_t size = Size();
- /* No overflow because
- * offsets_.RemainingBlocks() * config_.buffer_size <= lazy_memory ||
- * size < lazy_memory
- */
- if (offsets_.RemainingBlocks() <= lazy_arity || size <= static_cast<uint64_t>(lazy_memory))
- return std::min<std::size_t>(size, offsets_.RemainingBlocks() * config_.buffer_size);
-
- scoped_fd data2(MakeTemp(config_.temp_prefix));
- int fd_in = data_.get(), fd_out = data2.get();
- scoped_fd offsets2_file(MakeTemp(config_.temp_prefix));
- Offsets offsets2(offsets2_file.get());
- Offsets *offsets_in = &offsets_, *offsets_out = &offsets2;
-
- // Double buffered writing.
- ChainConfig chain_config;
- chain_config.entry_size = entry_size_;
- chain_config.block_count = 2;
- chain_config.total_memory = config_.buffer_size * 2;
- Chain chain(chain_config);
-
- while (offsets_in->RemainingBlocks() > lazy_arity) {
- if (size <= static_cast<uint64_t>(lazy_memory)) break;
- std::size_t reading_memory = config_.total_memory - 2 * config_.buffer_size;
- if (size < static_cast<uint64_t>(reading_memory)) {
- reading_memory = static_cast<std::size_t>(size);
- }
- SeekOrThrow(fd_in, 0);
- chain >>
- MergingReader<Compare, Combine>(
- fd_in,
- offsets_in, offsets_out,
- config_.buffer_size,
- reading_memory,
- compare_, combine_) >>
- WriteAndRecycle(fd_out);
- chain.Wait();
- offsets_out->FinishedAppending();
- ResizeOrThrow(fd_in, 0);
- offsets_in->Reset();
- std::swap(fd_in, fd_out);
- std::swap(offsets_in, offsets_out);
- size = SizeOrThrow(fd_in);
- }
-
- SeekOrThrow(fd_in, 0);
- if (fd_in == data2.get()) {
- data_.reset(data2.release());
- offsets_file_.reset(offsets2_file.release());
- offsets_ = offsets2;
- }
- if (offsets_.RemainingBlocks() <= 1) return 0;
- // No overflow because the while loop exited.
- return std::min(size, offsets_.RemainingBlocks() * static_cast<uint64_t>(config_.buffer_size));
- }
-
- // Output to chain, using this amount of memory, maximum, for lazy merge
- // sort.
- void Output(Chain &out, std::size_t lazy_memory) {
- Merge(lazy_memory);
- out.SetProgressTarget(Size());
- out >> OwningMergingReader<Compare, Combine>(data_.get(), offsets_, config_.buffer_size, lazy_memory, compare_, combine_);
- data_.release();
- offsets_file_.release();
- }
-
- /* If a pipeline step is reading sorted input and writing to a different
- * sort order, then there's a trade-off between using RAM to read lazily
- * (avoiding copying the file) and using RAM to increase block size and,
- * therefore, decrease the number of merge sort passes in the next
- * iteration.
- *
- * Merge sort takes log_{arity}(pieces) passes. Thus, each time the chain
- * block size is multiplied by arity, the number of output passes decreases
- * by one. Up to a constant, then, log_{arity}(chain) is the number of
- * passes saved. Chain simply divides the memory evenly over all blocks.
- *
- * Lazy sort saves this many passes (up to a constant)
- * log_{arity}((memory-lazy)/block_count) + 1
- * Non-lazy sort saves this many passes (up to the same constant):
- * log_{arity}(memory/block_count)
- * Add log_{arity}(block_count) to both:
- * log_{arity}(memory-lazy) + 1 versus log_{arity}(memory)
- * Take arity to the power of both sizes (arity > 1)
- * (memory - lazy)*arity versus memory
- * Solve for lazy
- * lazy = memory * (arity - 1) / arity
- */
- std::size_t DefaultLazy() {
- float arity = static_cast<float>(config_.total_memory / config_.buffer_size);
- return static_cast<std::size_t>(static_cast<float>(config_.total_memory) * (arity - 1.0) / arity);
- }
-
- // Same as Output with default lazy memory setting.
- void Output(Chain &out) {
- Output(out, DefaultLazy());
- }
-
- // Completely merge sort and transfer ownership to the caller.
- int StealCompleted() {
- // Merge all the way.
- Merge(0);
- SeekOrThrow(data_.get(), 0);
- offsets_file_.reset();
- return data_.release();
- }
-
- private:
- SortConfig config_;
-
- scoped_fd data_;
-
- scoped_fd offsets_file_;
- Offsets offsets_;
-
- const Compare compare_;
- const Combine combine_;
- const std::size_t entry_size_;
-};
-
-// returns bytes to be read on demand.
-template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = NeverCombine()) {
- Sort<Compare, Combine> sorter(chain, config, compare, combine);
- chain.Wait(true);
- uint64_t size = sorter.Size();
- sorter.Output(chain);
- return size;
-}
-
-} // namespace stream
-} // namespace util
-
-#endif // UTIL_STREAM_SORT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/sort_test.cc b/src/joshua/decoder/ff/lm/kenlm/util/stream/sort_test.cc
deleted file mode 100644
index fd7705c..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/sort_test.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "util/stream/sort.hh"
-
-#define BOOST_TEST_MODULE SortTest
-#include <boost/test/unit_test.hpp>
-
-#include <algorithm>
-
-#include <unistd.h>
-
-namespace util { namespace stream { namespace {
-
-struct CompareUInt64 : public std::binary_function<const void *, const void *, bool> {
- bool operator()(const void *first, const void *second) const {
- return *static_cast<const uint64_t*>(first) < *reinterpret_cast<const uint64_t*>(second);
- }
-};
-
-const uint64_t kSize = 100000;
-
-struct Putter {
- Putter(std::vector<uint64_t> &shuffled) : shuffled_(shuffled) {}
-
- void Run(const ChainPosition &position) {
- Stream put_shuffled(position);
- for (uint64_t i = 0; i < shuffled_.size(); ++i, ++put_shuffled) {
- *static_cast<uint64_t*>(put_shuffled.Get()) = shuffled_[i];
- }
- put_shuffled.Poison();
- }
- std::vector<uint64_t> &shuffled_;
-};
-
-BOOST_AUTO_TEST_CASE(FromShuffled) {
- std::vector<uint64_t> shuffled;
- shuffled.reserve(kSize);
- for (uint64_t i = 0; i < kSize; ++i) {
- shuffled.push_back(i);
- }
- std::random_shuffle(shuffled.begin(), shuffled.end());
-
- ChainConfig config;
- config.entry_size = 8;
- config.total_memory = 800;
- config.block_count = 3;
-
- SortConfig merge_config;
- merge_config.temp_prefix = "sort_test_temp";
- merge_config.buffer_size = 800;
- merge_config.total_memory = 3300;
-
- Chain chain(config);
- chain >> Putter(shuffled);
- BlockingSort(chain, merge_config, CompareUInt64(), NeverCombine());
- Stream sorted;
- chain >> sorted >> kRecycle;
- for (uint64_t i = 0; i < kSize; ++i, ++sorted) {
- BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(sorted.Get()));
- }
- BOOST_CHECK(!sorted);
-}
-
-}}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/stream.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/stream.hh
deleted file mode 100644
index 7ea1c9f..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/stream.hh
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef UTIL_STREAM_STREAM_H
-#define UTIL_STREAM_STREAM_H
-
-#include "util/stream/chain.hh"
-
-#include <boost/noncopyable.hpp>
-
-#include <assert.h>
-#include <stdint.h>
-
-namespace util {
-namespace stream {
-
-class Stream : boost::noncopyable {
- public:
- Stream() : current_(NULL), end_(NULL) {}
-
- void Init(const ChainPosition &position) {
- entry_size_ = position.GetChain().EntrySize();
- block_size_ = position.GetChain().BlockSize();
- block_it_.Init(position);
- StartBlock();
- }
-
- explicit Stream(const ChainPosition &position) {
- Init(position);
- }
-
- operator bool() const { return current_ != NULL; }
- bool operator!() const { return current_ == NULL; }
-
- const void *Get() const { return current_; }
- void *Get() { return current_; }
-
- void Poison() {
- block_it_->SetValidSize(current_ - static_cast<uint8_t*>(block_it_->Get()));
- ++block_it_;
- block_it_.Poison();
- }
-
- Stream &operator++() {
- assert(*this);
- assert(current_ < end_);
- current_ += entry_size_;
- if (current_ == end_) {
- ++block_it_;
- StartBlock();
- }
- return *this;
- }
-
- private:
- void StartBlock() {
- for (; block_it_ && !block_it_->ValidSize(); ++block_it_) {}
- current_ = static_cast<uint8_t*>(block_it_->Get());
- end_ = current_ + block_it_->ValidSize();
- }
-
- // The following are pointers to raw memory
- // current_ is the current record
- // end_ is the end of the block (so we know when to move to the next block)
- uint8_t *current_, *end_;
-
- std::size_t entry_size_;
- std::size_t block_size_;
-
- Link block_it_;
-};
-
-inline Chain &operator>>(Chain &chain, Stream &stream) {
- stream.Init(chain.Add());
- return chain;
-}
-
-} // namespace stream
-} // namespace util
-#endif // UTIL_STREAM_STREAM_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/timer.hh b/src/joshua/decoder/ff/lm/kenlm/util/stream/timer.hh
deleted file mode 100644
index 06488a1..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/stream/timer.hh
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef UTIL_STREAM_TIMER_H
-#define UTIL_STREAM_TIMER_H
-
-// Sorry Jon, this was adding library dependencies in Moses and people complained.
-
-/*#include <boost/version.hpp>
-
-#if BOOST_VERSION >= 104800
-#include <boost/timer/timer.hpp>
-#define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
-#else
-//#warning Using Boost older than 1.48. Timing information will not be available.*/
-#define UTIL_TIMER(str)
-//#endif
-
-#endif // UTIL_STREAM_TIMER_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/string_piece.cc b/src/joshua/decoder/ff/lm/kenlm/util/string_piece.cc
deleted file mode 100644
index ec394b9..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/string_piece.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright 2004 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in string_piece.hh.
-
-#include "util/string_piece.hh"
-
-#include <algorithm>
-
-#include <limits.h>
-
-#ifndef HAVE_ICU
-
-typedef StringPiece::size_type size_type;
-
-void StringPiece::CopyToString(std::string* target) const {
- target->assign(ptr_, length_);
-}
-
-size_type StringPiece::find(const StringPiece& s, size_type pos) const {
- // Not sure why length_ < 0 was here since it's std::size_t.
- if (/*length_ < 0 || */pos > static_cast<size_type>(length_))
- return npos;
-
- const char* result = std::search(ptr_ + pos, ptr_ + length_,
- s.ptr_, s.ptr_ + s.length_);
- const size_type xpos = result - ptr_;
- return xpos + s.length_ <= length_ ? xpos : npos;
-}
-
-size_type StringPiece::find(char c, size_type pos) const {
- if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {
- return npos;
- }
- const char* result = std::find(ptr_ + pos, ptr_ + length_, c);
- return result != ptr_ + length_ ? result - ptr_ : npos;
-}
-
-size_type StringPiece::rfind(const StringPiece& s, size_type pos) const {
- if (length_ < s.length_) return npos;
- const size_t ulen = length_;
- if (s.length_ == 0) return std::min(ulen, pos);
-
- const char* last = ptr_ + std::min(ulen - s.length_, pos) + s.length_;
- const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
- return result != last ? result - ptr_ : npos;
-}
-
-size_type StringPiece::rfind(char c, size_type pos) const {
- if (length_ <= 0) return npos;
- for (int i = std::min(pos, static_cast<size_type>(length_ - 1));
- i >= 0; --i) {
- if (ptr_[i] == c) {
- return i;
- }
- }
- return npos;
-}
-
-// For each character in characters_wanted, sets the index corresponding
-// to the ASCII code of that character to 1 in table. This is used by
-// the find_.*_of methods below to tell whether or not a character is in
-// the lookup table in constant time.
-// The argument `table' must be an array that is large enough to hold all
-// the possible values of an unsigned char. Thus it should be be declared
-// as follows:
-// bool table[UCHAR_MAX + 1]
-static inline void BuildLookupTable(const StringPiece& characters_wanted,
- bool* table) {
- const size_type length = characters_wanted.length();
- const char* const data = characters_wanted.data();
- for (size_type i = 0; i < length; ++i) {
- table[static_cast<unsigned char>(data[i])] = true;
- }
-}
-
-size_type StringPiece::find_first_of(const StringPiece& s,
- size_type pos) const {
- if (length_ == 0 || s.length_ == 0)
- return npos;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.length_ == 1)
- return find_first_of(s.ptr_[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (size_type i = pos; i < length_; ++i) {
- if (lookup[static_cast<unsigned char>(ptr_[i])]) {
- return i;
- }
- }
- return npos;
-}
-
-size_type StringPiece::find_first_not_of(const StringPiece& s,
- size_type pos) const {
- if (length_ == 0)
- return npos;
-
- if (s.length_ == 0)
- return 0;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.length_ == 1)
- return find_first_not_of(s.ptr_[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (size_type i = pos; i < length_; ++i) {
- if (!lookup[static_cast<unsigned char>(ptr_[i])]) {
- return i;
- }
- }
- return npos;
-}
-
-size_type StringPiece::find_first_not_of(char c, size_type pos) const {
- if (length_ == 0)
- return npos;
-
- for (; pos < length_; ++pos) {
- if (ptr_[pos] != c) {
- return pos;
- }
- }
- return npos;
-}
-
-size_type StringPiece::find_last_of(const StringPiece& s, size_type pos) const {
- if (length_ == 0 || s.length_ == 0)
- return npos;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.length_ == 1)
- return find_last_of(s.ptr_[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (size_type i = std::min(pos, length_ - 1); ; --i) {
- if (lookup[static_cast<unsigned char>(ptr_[i])])
- return i;
- if (i == 0)
- break;
- }
- return npos;
-}
-
-size_type StringPiece::find_last_not_of(const StringPiece& s,
- size_type pos) const {
- if (length_ == 0)
- return npos;
-
- size_type i = std::min(pos, length_ - 1);
- if (s.length_ == 0)
- return i;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.length_ == 1)
- return find_last_not_of(s.ptr_[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (; ; --i) {
- if (!lookup[static_cast<unsigned char>(ptr_[i])])
- return i;
- if (i == 0)
- break;
- }
- return npos;
-}
-
-size_type StringPiece::find_last_not_of(char c, size_type pos) const {
- if (length_ == 0)
- return npos;
-
- for (size_type i = std::min(pos, length_ - 1); ; --i) {
- if (ptr_[i] != c)
- return i;
- if (i == 0)
- break;
- }
- return npos;
-}
-
-StringPiece StringPiece::substr(size_type pos, size_type n) const {
- if (pos > length_) pos = length_;
- if (n > length_ - pos) n = length_ - pos;
- return StringPiece(ptr_ + pos, n);
-}
-
-const size_type StringPiece::npos = size_type(-1);
-
-#endif // !HAVE_ICU
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/string_piece.hh b/src/joshua/decoder/ff/lm/kenlm/util/string_piece.hh
deleted file mode 100644
index 114e254..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/string_piece.hh
+++ /dev/null
@@ -1,270 +0,0 @@
-/* If you use ICU in your program, then compile with -DHAVE_ICU -licui18n. If
- * you don't use ICU, then this will use the Google implementation from Chrome.
- * This has been modified from the original version to let you choose.
- */
-
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// Copied from strings/stringpiece.h with modifications
-//
-// A string-like object that points to a sized piece of memory.
-//
-// Functions or methods may use const StringPiece& parameters to accept either
-// a "const char*" or a "string" value that will be implicitly converted to
-// a StringPiece. The implicit conversion means that it is often appropriate
-// to include this .h file in other files rather than forward-declaring
-// StringPiece as would be appropriate for most other Google classes.
-//
-// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
-// conversions from "const char*" to "string" and back again.
-//
-
-#ifndef UTIL_STRING_PIECE_H
-#define UTIL_STRING_PIECE_H
-
-#include "util/have.hh"
-
-#include <cstring>
-#include <iosfwd>
-#include <ostream>
-
-#ifdef HAVE_ICU
-#include <unicode/stringpiece.h>
-#include <unicode/uversion.h>
-
-// Old versions of ICU don't define operator== and operator!=.
-#if (U_ICU_VERSION_MAJOR_NUM < 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM < 4))
-#warning You are using an old version of ICU. Consider upgrading to ICU >= 4.6.
-inline bool operator==(const StringPiece& x, const StringPiece& y) {
- if (x.size() != y.size())
- return false;
-
- return std::memcmp(x.data(), y.data(), x.size()) == 0;
-}
-
-inline bool operator!=(const StringPiece& x, const StringPiece& y) {
- return !(x == y);
-}
-#endif // old version of ICU
-
-U_NAMESPACE_BEGIN
-
-inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) {
- int longersize = longer.size(), prefixsize = prefix.size();
- return longersize >= prefixsize && std::memcmp(longer.data(), prefix.data(), prefixsize) == 0;
-}
-
-#else
-
-#include <algorithm>
-#include <cstddef>
-#include <string>
-#include <string.h>
-
-#ifdef WIN32
-#undef max
-#undef min
-#endif
-
-class StringPiece {
- public:
- typedef size_t size_type;
-
- private:
- const char* ptr_;
- size_type length_;
-
- public:
- // We provide non-explicit singleton constructors so users can pass
- // in a "const char*" or a "string" wherever a "StringPiece" is
- // expected.
- StringPiece() : ptr_(NULL), length_(0) { }
- StringPiece(const char* str)
- : ptr_(str), length_((str == NULL) ? 0 : strlen(str)) { }
- StringPiece(const std::string& str)
- : ptr_(str.data()), length_(str.size()) { }
- StringPiece(const char* offset, size_type len)
- : ptr_(offset), length_(len) { }
-
- // data() may return a pointer to a buffer with embedded NULs, and the
- // returned buffer may or may not be null terminated. Therefore it is
- // typically a mistake to pass data() to a routine that expects a NUL
- // terminated string.
- const char* data() const { return ptr_; }
- size_type size() const { return length_; }
- size_type length() const { return length_; }
- bool empty() const { return length_ == 0; }
-
- void clear() { ptr_ = NULL; length_ = 0; }
- void set(const char* data, size_type len) { ptr_ = data; length_ = len; }
- void set(const char* str) {
- ptr_ = str;
- length_ = str ? strlen(str) : 0;
- }
- void set(const void* data, size_type len) {
- ptr_ = reinterpret_cast<const char*>(data);
- length_ = len;
- }
-
- char operator[](size_type i) const { return ptr_[i]; }
-
- void remove_prefix(size_type n) {
- ptr_ += n;
- length_ -= n;
- }
-
- void remove_suffix(size_type n) {
- length_ -= n;
- }
-
- int compare(const StringPiece& x) const {
- int r = wordmemcmp(ptr_, x.ptr_, std::min(length_, x.length_));
- if (r == 0) {
- if (length_ < x.length_) r = -1;
- else if (length_ > x.length_) r = +1;
- }
- return r;
- }
-
- std::string as_string() const {
- // std::string doesn't like to take a NULL pointer even with a 0 size.
- return std::string(!empty() ? data() : "", size());
- }
-
- void CopyToString(std::string* target) const;
- void AppendToString(std::string* target) const;
-
- // Does "this" start with "x"
- bool starts_with(const StringPiece& x) const {
- return ((length_ >= x.length_) &&
- (wordmemcmp(ptr_, x.ptr_, x.length_) == 0));
- }
-
- // Does "this" end with "x"
- bool ends_with(const StringPiece& x) const {
- return ((length_ >= x.length_) &&
- (wordmemcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
- }
-
- // standard STL container boilerplate
- typedef char value_type;
- typedef const char* pointer;
- typedef const char& reference;
- typedef const char& const_reference;
- typedef ptrdiff_t difference_type;
- static const size_type npos;
- typedef const char* const_iterator;
- typedef const char* iterator;
- typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
- typedef std::reverse_iterator<iterator> reverse_iterator;
- iterator begin() const { return ptr_; }
- iterator end() const { return ptr_ + length_; }
- const_reverse_iterator rbegin() const {
- return const_reverse_iterator(ptr_ + length_);
- }
- const_reverse_iterator rend() const {
- return const_reverse_iterator(ptr_);
- }
-
- size_type max_size() const { return length_; }
- size_type capacity() const { return length_; }
-
- size_type copy(char* buf, size_type n, size_type pos = 0) const;
-
- size_type find(const StringPiece& s, size_type pos = 0) const;
- size_type find(char c, size_type pos = 0) const;
- size_type rfind(const StringPiece& s, size_type pos = npos) const;
- size_type rfind(char c, size_type pos = npos) const;
-
- size_type find_first_of(const StringPiece& s, size_type pos = 0) const;
- size_type find_first_of(char c, size_type pos = 0) const {
- return find(c, pos);
- }
- size_type find_first_not_of(const StringPiece& s, size_type pos = 0) const;
- size_type find_first_not_of(char c, size_type pos = 0) const;
- size_type find_last_of(const StringPiece& s, size_type pos = npos) const;
- size_type find_last_of(char c, size_type pos = npos) const {
- return rfind(c, pos);
- }
- size_type find_last_not_of(const StringPiece& s, size_type pos = npos) const;
- size_type find_last_not_of(char c, size_type pos = npos) const;
-
- StringPiece substr(size_type pos, size_type n = npos) const;
-
- static int wordmemcmp(const char* p, const char* p2, size_type N) {
- return std::memcmp(p, p2, N);
- }
-};
-
-inline bool operator==(const StringPiece& x, const StringPiece& y) {
- if (x.size() != y.size())
- return false;
-
- return std::memcmp(x.data(), y.data(), x.size()) == 0;
-}
-
-inline bool operator!=(const StringPiece& x, const StringPiece& y) {
- return !(x == y);
-}
-
-inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) {
- return longer.starts_with(prefix);
-}
-
-#endif // HAVE_ICU undefined
-
-inline bool operator<(const StringPiece& x, const StringPiece& y) {
- const int r = std::memcmp(x.data(), y.data(),
- std::min(x.size(), y.size()));
- return ((r < 0) || ((r == 0) && (x.size() < y.size())));
-}
-
-inline bool operator>(const StringPiece& x, const StringPiece& y) {
- return y < x;
-}
-
-inline bool operator<=(const StringPiece& x, const StringPiece& y) {
- return !(x > y);
-}
-
-inline bool operator>=(const StringPiece& x, const StringPiece& y) {
- return !(x < y);
-}
-
-// allow StringPiece to be logged (needed for unit testing).
-inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
- return o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
-}
-
-#ifdef HAVE_ICU
-U_NAMESPACE_END
-using U_NAMESPACE_QUALIFIER StringPiece;
-#endif
-
-#endif // UTIL_STRING_PIECE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/thread_pool.hh b/src/joshua/decoder/ff/lm/kenlm/util/thread_pool.hh
deleted file mode 100644
index d1a883a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/thread_pool.hh
+++ /dev/null
@@ -1,95 +0,0 @@
-#ifndef UTIL_THREAD_POOL_H
-#define UTIL_THREAD_POOL_H
-
-#include "util/pcqueue.hh"
-
-#include <boost/ptr_container/ptr_vector.hpp>
-#include <boost/optional.hpp>
-#include <boost/thread.hpp>
-
-#include <iostream>
-
-#include <stdlib.h>
-
-namespace util {
-
-template <class HandlerT> class Worker : boost::noncopyable {
- public:
- typedef HandlerT Handler;
- typedef typename Handler::Request Request;
-
- template <class Construct> Worker(PCQueue<Request> &in, Construct &construct, const Request &poison)
- : in_(in), handler_(construct), poison_(poison), thread_(boost::ref(*this)) {}
-
- // Only call from thread.
- void operator()() {
- Request request;
- while (1) {
- in_.Consume(request);
- if (request == poison_) return;
- try {
- (*handler_)(request);
- }
- catch(const std::exception &e) {
- std::cerr << "Handler threw " << e.what() << std::endl;
- abort();
- }
- catch(...) {
- std::cerr << "Handler threw an exception, dropping request" << std::endl;
- abort();
- }
- }
- }
-
- void Join() {
- thread_.join();
- }
-
- private:
- PCQueue<Request> &in_;
-
- boost::optional<Handler> handler_;
-
- const Request poison_;
-
- boost::thread thread_;
-};
-
-template <class HandlerT> class ThreadPool : boost::noncopyable {
- public:
- typedef HandlerT Handler;
- typedef typename Handler::Request Request;
-
- template <class Construct> ThreadPool(size_t queue_length, size_t workers, Construct handler_construct, Request poison) : in_(queue_length), poison_(poison) {
- for (size_t i = 0; i < workers; ++i) {
- workers_.push_back(new Worker<Handler>(in_, handler_construct, poison));
- }
- }
-
- ~ThreadPool() {
- for (size_t i = 0; i < workers_.size(); ++i) {
- Produce(poison_);
- }
- for (typename boost::ptr_vector<Worker<Handler> >::iterator i = workers_.begin(); i != workers_.end(); ++i) {
- i->Join();
- }
- }
-
- void Produce(const Request &request) {
- in_.Produce(request);
- }
-
- // For adding to the queue.
- PCQueue<Request> &In() { return in_; }
-
- private:
- PCQueue<Request> in_;
-
- boost::ptr_vector<Worker<Handler> > workers_;
-
- Request poison_;
-};
-
-} // namespace util
-
-#endif // UTIL_THREAD_POOL_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/tokenize_piece.hh b/src/joshua/decoder/ff/lm/kenlm/util/tokenize_piece.hh
deleted file mode 100644
index 908c8da..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/tokenize_piece.hh
+++ /dev/null
@@ -1,151 +0,0 @@
-#ifndef UTIL_TOKENIZE_PIECE_H
-#define UTIL_TOKENIZE_PIECE_H
-
-#include "util/exception.hh"
-#include "util/string_piece.hh"
-
-#include <boost/iterator/iterator_facade.hpp>
-
-#include <algorithm>
-
-#include <string.h>
-
-namespace util {
-
-// Thrown on dereference when out of tokens to parse
-class OutOfTokens : public Exception {
- public:
- OutOfTokens() throw() {}
- ~OutOfTokens() throw() {}
-};
-
-class SingleCharacter {
- public:
- SingleCharacter() {}
- explicit SingleCharacter(char delim) : delim_(delim) {}
-
- StringPiece Find(const StringPiece &in) const {
- return StringPiece(std::find(in.data(), in.data() + in.size(), delim_), 1);
- }
-
- private:
- char delim_;
-};
-
-class MultiCharacter {
- public:
- MultiCharacter() {}
-
- explicit MultiCharacter(const StringPiece &delimiter) : delimiter_(delimiter) {}
-
- StringPiece Find(const StringPiece &in) const {
- return StringPiece(std::search(in.data(), in.data() + in.size(), delimiter_.data(), delimiter_.data() + delimiter_.size()), delimiter_.size());
- }
-
- private:
- StringPiece delimiter_;
-};
-
-class AnyCharacter {
- public:
- AnyCharacter() {}
- explicit AnyCharacter(const StringPiece &chars) : chars_(chars) {}
-
- StringPiece Find(const StringPiece &in) const {
- return StringPiece(std::find_first_of(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
- }
-
- private:
- StringPiece chars_;
-};
-
-class BoolCharacter {
- public:
- BoolCharacter() {}
-
- explicit BoolCharacter(const bool *delimiter) { delimiter_ = delimiter; }
-
- StringPiece Find(const StringPiece &in) const {
- for (const char *i = in.data(); i != in.data() + in.size(); ++i) {
- if (delimiter_[static_cast<unsigned char>(*i)]) return StringPiece(i, 1);
- }
- return StringPiece(in.data() + in.size(), 0);
- }
-
- template <unsigned Length> static void Build(const char (&characters)[Length], bool (&out)[256]) {
- memset(out, 0, sizeof(out));
- for (const char *i = characters; i != characters + Length; ++i) {
- out[static_cast<unsigned char>(*i)] = true;
- }
- }
-
- private:
- const bool *delimiter_;
-};
-
-class AnyCharacterLast {
- public:
- AnyCharacterLast() {}
-
- explicit AnyCharacterLast(const StringPiece &chars) : chars_(chars) {}
-
- StringPiece Find(const StringPiece &in) const {
- return StringPiece(std::find_end(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
- }
-
- private:
- StringPiece chars_;
-};
-
-template <class Find, bool SkipEmpty = false> class TokenIter : public boost::iterator_facade<TokenIter<Find, SkipEmpty>, const StringPiece, boost::forward_traversal_tag> {
- public:
- TokenIter() {}
-
- template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
- increment();
- }
-
- bool operator!() const {
- return current_.data() == 0;
- }
- operator bool() const {
- return current_.data() != 0;
- }
-
- static TokenIter<Find, SkipEmpty> end() {
- return TokenIter<Find, SkipEmpty>();
- }
-
- private:
- friend class boost::iterator_core_access;
-
- void increment() {
- do {
- StringPiece found(finder_.Find(after_));
- current_ = StringPiece(after_.data(), found.data() - after_.data());
- if (found.data() == after_.data() + after_.size()) {
- after_ = StringPiece(NULL, 0);
- } else {
- after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size());
- }
- } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.
- }
-
- bool equal(const TokenIter<Find, SkipEmpty> &other) const {
- return current_.data() == other.current_.data();
- }
-
- const StringPiece &dereference() const {
- UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
- return current_;
- }
-
- StringPiece current_;
- StringPiece after_;
-
- Find finder_;
-};
-
-} // namespace util
-
-#endif // UTIL_TOKENIZE_PIECE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/unistd.hh b/src/joshua/decoder/ff/lm/kenlm/util/unistd.hh
deleted file mode 100644
index 0379c49..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/unistd.hh
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef UTIL_UNISTD_H
-#define UTIL_UNISTD_H
-
-#if defined(_WIN32) || defined(_WIN64)
-
-// Windows doesn't define <unistd.h>
-//
-// So we define what we need here instead:
-//
-#define STDIN_FILENO=0
-#define STDOUT_FILENO=1
-
-
-#else // Huzzah for POSIX!
-
-#include <unistd.h>
-
-#endif
-
-
-
-#endif // UTIL_UNISTD_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/usage.cc b/src/joshua/decoder/ff/lm/kenlm/util/usage.cc
deleted file mode 100644
index a597300..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/usage.cc
+++ /dev/null
@@ -1,283 +0,0 @@
-#include "util/usage.hh"
-
-#include "util/exception.hh"
-
-#include <fstream>
-#include <ostream>
-#include <sstream>
-#include <set>
-#include <string>
-
-#include <string.h>
-#include <ctype.h>
-#include <time.h>
-#if defined(_WIN32) || defined(_WIN64)
-// This code lifted from physmem.c in gnulib. See the copyright statement
-// below.
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-/* MEMORYSTATUSEX is missing from older windows headers, so define
- a local replacement. */
-typedef struct
-{
- DWORD dwLength;
- DWORD dwMemoryLoad;
- DWORDLONG ullTotalPhys;
- DWORDLONG ullAvailPhys;
- DWORDLONG ullTotalPageFile;
- DWORDLONG ullAvailPageFile;
- DWORDLONG ullTotalVirtual;
- DWORDLONG ullAvailVirtual;
- DWORDLONG ullAvailExtendedVirtual;
-} lMEMORYSTATUSEX;
-// Is this really supposed to be defined like this?
-typedef int WINBOOL;
-typedef WINBOOL (WINAPI *PFN_MS_EX) (lMEMORYSTATUSEX*);
-#else
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-
-#if defined(__MACH__) || defined(__FreeBSD__) || defined(__APPLE__)
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#endif
-
-namespace util {
-namespace {
-
-#if defined(__MACH__)
-typedef struct timeval Wall;
-Wall GetWall() {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return tv;
-}
-#elif defined(_WIN32) || defined(_WIN64)
-typedef time_t Wall;
-Wall GetWall() {
- return time(NULL);
-}
-#else
-typedef struct timespec Wall;
-Wall GetWall() {
- Wall ret;
- clock_gettime(CLOCK_MONOTONIC, &ret);
- return ret;
-}
-#endif
-
-// gcc possible-unused function flags
-#ifdef __GNUC__
-double Subtract(time_t first, time_t second) __attribute__ ((unused));
-double DoubleSec(time_t tv) __attribute__ ((unused));
-#if !defined(_WIN32) && !defined(_WIN64)
-double Subtract(const struct timeval &first, const struct timeval &second) __attribute__ ((unused));
-double Subtract(const struct timespec &first, const struct timespec &second) __attribute__ ((unused));
-double DoubleSec(const struct timeval &tv) __attribute__ ((unused));
-double DoubleSec(const struct timespec &tv) __attribute__ ((unused));
-#endif
-#endif
-
-// Some of these functions are only used on some platforms.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
-#endif
-// These all assume first > second
-double Subtract(time_t first, time_t second) {
- return difftime(first, second);
-}
-double DoubleSec(time_t tv) {
- return static_cast<double>(tv);
-}
-#if !defined(_WIN32) && !defined(_WIN64)
-double Subtract(const struct timeval &first, const struct timeval &second) {
- return static_cast<double>(first.tv_sec - second.tv_sec) + static_cast<double>(first.tv_usec - second.tv_usec) / 1000000.0;
-}
-double Subtract(const struct timespec &first, const struct timespec &second) {
- return static_cast<double>(first.tv_sec - second.tv_sec) + static_cast<double>(first.tv_nsec - second.tv_nsec) / 1000000000.0;
-}
-double DoubleSec(const struct timeval &tv) {
- return static_cast<double>(tv.tv_sec) + (static_cast<double>(tv.tv_usec) / 1000000.0);
-}
-double DoubleSec(const struct timespec &tv) {
- return static_cast<double>(tv.tv_sec) + (static_cast<double>(tv.tv_nsec) / 1000000000.0);
-}
-#endif
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-class RecordStart {
- public:
- RecordStart() {
- started_ = GetWall();
- }
-
- const Wall &Started() const {
- return started_;
- }
-
- private:
- Wall started_;
-};
-
-const RecordStart kRecordStart;
-
-const char *SkipSpaces(const char *at) {
- for (; *at == ' ' || *at == '\t'; ++at) {}
- return at;
-}
-} // namespace
-
-double WallTime() {
- return Subtract(GetWall(), kRecordStart.Started());
-}
-
-void PrintUsage(std::ostream &out) {
-#if !defined(_WIN32) && !defined(_WIN64)
- // Linux doesn't set memory usage in getrusage :-(
- std::set<std::string> headers;
- headers.insert("VmPeak:");
- headers.insert("VmRSS:");
- headers.insert("Name:");
-
- std::ifstream status("/proc/self/status", std::ios::in);
- std::string header, value;
- while ((status >> header) && getline(status, value)) {
- if (headers.find(header) != headers.end()) {
- out << header << SkipSpaces(value.c_str()) << '\t';
- }
- }
-
- struct rusage usage;
- if (getrusage(RUSAGE_SELF, &usage)) {
- perror("getrusage");
- return;
- }
- out << "RSSMax:" << usage.ru_maxrss << " kB" << '\t';
- out << "user:" << DoubleSec(usage.ru_utime) << "\tsys:" << DoubleSec(usage.ru_stime) << '\t';
- out << "CPU:" << (DoubleSec(usage.ru_utime) + DoubleSec(usage.ru_stime));
- out << '\t';
-#endif
-
- out << "real:" << WallTime() << '\n';
-}
-
-/* Adapted from physmem.c in gnulib 831b84c59ef413c57a36b67344467d66a8a2ba70 */
-/* Calculate the size of physical memory.
-
- Copyright (C) 2000-2001, 2003, 2005-2006, 2009-2013 Free Software
- Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 2.1 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* Written by Paul Eggert. */
-uint64_t GuessPhysicalMemory() {
-#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
- {
- long pages = sysconf(_SC_PHYS_PAGES);
- long page_size = sysconf(_SC_PAGESIZE);
- if (pages != -1 && page_size != -1)
- return static_cast<uint64_t>(pages) * static_cast<uint64_t>(page_size);
- }
-#endif
-#ifdef HW_PHYSMEM
- { /* This works on *bsd and darwin. */
- unsigned int physmem;
- size_t len = sizeof physmem;
- static int mib[2] = { CTL_HW, HW_PHYSMEM };
-
- if (sysctl (mib, sizeof(mib) / sizeof(mib[0]), &physmem, &len, NULL, 0) == 0
- && len == sizeof (physmem))
- return static_cast<uint64_t>(physmem);
- }
-#endif
-
-#if defined(_WIN32) || defined(_WIN64)
- { /* this works on windows */
- PFN_MS_EX pfnex;
- HMODULE h = GetModuleHandle (TEXT("kernel32.dll"));
-
- if (!h)
- return 0;
-
- /* Use GlobalMemoryStatusEx if available. */
- if ((pfnex = (PFN_MS_EX) GetProcAddress (h, "GlobalMemoryStatusEx")))
- {
- lMEMORYSTATUSEX lms_ex;
- lms_ex.dwLength = sizeof lms_ex;
- if (!pfnex (&lms_ex))
- return 0;
- return lms_ex.ullTotalPhys;
- }
-
- /* Fall back to GlobalMemoryStatus which is always available.
- but returns wrong results for physical memory > 4GB. */
- else
- {
- MEMORYSTATUS ms;
- GlobalMemoryStatus (&ms);
- return ms.dwTotalPhys;
- }
- }
-#endif
- return 0;
-}
-
-namespace {
-class SizeParseError : public Exception {
- public:
- explicit SizeParseError(const std::string &str) throw() {
- *this << "Failed to parse " << str << " into a memory size ";
- }
-};
-
-template <class Num> uint64_t ParseNum(const std::string &arg) {
- std::stringstream stream(arg);
- Num value;
- stream >> value;
- UTIL_THROW_IF_ARG(!stream, SizeParseError, (arg), "for the leading number.");
- std::string after;
- stream >> after;
- UTIL_THROW_IF_ARG(after.size() > 1, SizeParseError, (arg), "because there are more than two characters after the number.");
- std::string throwaway;
- UTIL_THROW_IF_ARG(stream >> throwaway, SizeParseError, (arg), "because there was more cruft " << throwaway << " after the number.");
-
- // Silly sort, using kilobytes as your default unit.
- if (after.empty()) after = "K";
- if (after == "%") {
- uint64_t mem = GuessPhysicalMemory();
- UTIL_THROW_IF_ARG(!mem, SizeParseError, (arg), "because % was specified but the physical memory size could not be determined.");
- return static_cast<uint64_t>(static_cast<double>(value) * static_cast<double>(mem) / 100.0);
- }
-
- std::string units("bKMGTPEZY");
- std::string::size_type index = units.find(after[0]);
- UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");
- for (std::string::size_type i = 0; i < index; ++i) {
- value *= 1024;
- }
- return static_cast<uint64_t>(value);
-}
-
-} // namespace
-
-uint64_t ParseSize(const std::string &arg) {
- return arg.find('.') == std::string::npos ? ParseNum<double>(arg) : ParseNum<uint64_t>(arg);
-}
-
-} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/usage.hh b/src/joshua/decoder/ff/lm/kenlm/util/usage.hh
deleted file mode 100644
index e578b0a..0000000
--- a/src/joshua/decoder/ff/lm/kenlm/util/usage.hh
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef UTIL_USAGE_H
-#define UTIL_USAGE_H
-#include <cstddef>
-#include <iosfwd>
-#include <string>
-
-#include <stdint.h>
-
-namespace util {
-// Time in seconds since process started. Zero on unsupported platforms.
-double WallTime();
-
-void PrintUsage(std::ostream &to);
-
-// Determine how much physical memory there is. Return 0 on failure.
-uint64_t GuessPhysicalMemory();
-
-// Parse a size like unix sort. Sadly, this means the default multiplier is K.
-uint64_t ParseSize(const std::string &arg);
-} // namespace util
-#endif // UTIL_USAGE_H
diff --git a/src/joshua/decoder/ff/phrase/Distortion.java b/src/joshua/decoder/ff/phrase/Distortion.java
index e023698..959a0d7 100644
--- a/src/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/joshua/decoder/ff/phrase/Distortion.java
@@ -1,5 +1,6 @@
package joshua.decoder.ff.phrase;
+import java.util.ArrayList;
import java.util.List;
import joshua.decoder.JoshuaConfiguration;
@@ -23,6 +24,15 @@
System.exit(1);
}
}
+
+ @Override
+ public ArrayList<String> reportDenseFeatures(int index) {
+ denseFeatureIndex = index;
+
+ ArrayList<String> names = new ArrayList<String>();
+ names.add(name);
+ return names;
+ }
@Override
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
@@ -32,7 +42,8 @@
int start_point = j - rule.getFrench().length + rule.getArity();
int jump_size = Math.abs(tailNodes.get(0).j - start_point);
- acc.add(name, -jump_size);
+// acc.add(name, -jump_size);
+ acc.add(denseFeatureIndex, -jump_size);
}
// System.err.println(String.format("DISTORTION(%d, %d) from %d = %d", i, j, tailNodes != null ? tailNodes.get(0).j : -1, jump_size));
diff --git a/src/joshua/decoder/ff/tm/CreateGlueGrammar.java b/src/joshua/decoder/ff/tm/CreateGlueGrammar.java
new file mode 100644
index 0000000..14d5ead
--- /dev/null
+++ b/src/joshua/decoder/ff/tm/CreateGlueGrammar.java
@@ -0,0 +1,110 @@
+package joshua.decoder.ff.tm;
+
+import static joshua.decoder.ff.tm.packed.PackedGrammar.VOCABULARY_FILENAME;
+import static joshua.util.FormatUtils.cleanNonTerminal;
+import static joshua.util.FormatUtils.isNonterminal;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.util.io.LineReader;
+
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+
+public class CreateGlueGrammar {
+
+
+ private final Set<String> nonTerminalSymbols = new HashSet<>();
+ private static final Logger log = Logger.getLogger(CreateGlueGrammar.class.getName());
+
+ @Option(name = "--grammar", aliases = {"-g"}, required = true, usage = "provide grammar to determine list of NonTerminal symbols.")
+ private String grammarPath;
+
+ @Option(name = "--goal", aliases = {"-goal"}, required = false, usage = "specify custom GOAL symbol. Default: 'GOAL'")
+ private String goalSymbol = cleanNonTerminal(new JoshuaConfiguration().goal_symbol);
+
+ /* Rule templates */
+ // [GOAL] ||| <s> ||| <s> ||| 0
+ private static final String R_START = "[%1$s] ||| <s> ||| <s> ||| 0";
+ // [GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+ private static final String R_TWO = "[%1$s] ||| [%1$s,1] [%2$s,2] ||| [%1$s,1] [%2$s,2] ||| -1";
+ // [GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0
+ private static final String R_END = "[%1$s] ||| [%1$s,1] </s> ||| [%1$s,1] </s> ||| 0";
+ // [GOAL] ||| <s> [X,1] </s> ||| <s> [X,1] </s> ||| 0
+ private static final String R_TOP = "[%1$s] ||| <s> [%2$s,1] </s> ||| <s> [%2$s,1] </s> ||| 0";
+
+ private void run() throws IOException {
+
+ File grammar_file = new File(grammarPath);
+ if (!grammar_file.exists()) {
+ throw new IOException("Grammar file doesn't exist: " + grammarPath);
+ }
+
+ // in case of a packedGrammar, we read the serialized vocabulary,
+ // collecting all cleaned nonTerminal symbols.
+ if (grammar_file.isDirectory()) {
+ Vocabulary.read(new File(grammarPath + File.separator + VOCABULARY_FILENAME));
+ for (int i = 0; i < Vocabulary.size(); ++i) {
+ final String token = Vocabulary.word(i);
+ if (isNonterminal(token)) {
+ nonTerminalSymbols.add(cleanNonTerminal(token));
+ }
+ }
+ // otherwise we collect cleaned left-hand sides from the rules in the text grammar.
+ } else {
+ final LineReader reader = new LineReader(grammarPath);
+ while (reader.hasNext()) {
+ final String line = reader.next();
+ int lhsStart = line.indexOf("[") + 1;
+ int lhsEnd = line.indexOf("]");
+ if (lhsStart < 1 || lhsEnd < 0) {
+ log.info(String.format("malformed rule: %s\n", line));
+ continue;
+ }
+ final String lhs = line.substring(lhsStart, lhsEnd);
+ System.err.println(lhs);
+ nonTerminalSymbols.add(lhs);
+ }
+ }
+
+ log.info(
+ String.format("%d nonTerminal symbols read: %s",
+ nonTerminalSymbols.size(),
+ nonTerminalSymbols.toString()));
+
+ // write glue rules to stdout
+
+ System.out.println(String.format(R_START, goalSymbol));
+
+ for (String nt : nonTerminalSymbols)
+ System.out.println(String.format(R_TWO, goalSymbol, nt));
+
+ System.out.println(String.format(R_END, goalSymbol));
+
+ for (String nt : nonTerminalSymbols)
+ System.out.println(String.format(R_TOP, goalSymbol, nt));
+
+ }
+
+ public static void main(String[] args) throws IOException {
+ final CreateGlueGrammar glueCreator = new CreateGlueGrammar();
+ final CmdLineParser parser = new CmdLineParser(glueCreator);
+
+ try {
+ parser.parseArgument(args);
+ glueCreator.run();
+ } catch (CmdLineException e) {
+ log.info(e.toString());
+ parser.printUsage(System.err);
+ System.exit(1);
+ }
+ }
+}
diff --git a/src/joshua/decoder/ff/tm/Grammar.java b/src/joshua/decoder/ff/tm/Grammar.java
index 72850bb..a3b7062 100644
--- a/src/joshua/decoder/ff/tm/Grammar.java
+++ b/src/joshua/decoder/ff/tm/Grammar.java
@@ -64,6 +64,13 @@
* @return the number of rules stored in the grammar
*/
int getNumRules();
+
+ /**
+ * Returns the number of dense features.
+ *
+ * @return the number of dense features
+ */
+ int getNumDenseFeatures();
/**
* This is used to construct a manual rule supported from outside the grammar, but the owner
diff --git a/src/joshua/decoder/ff/tm/MosesPhraseRule.java b/src/joshua/decoder/ff/tm/MosesPhraseRule.java
deleted file mode 100644
index 84a4792..0000000
--- a/src/joshua/decoder/ff/tm/MosesPhraseRule.java
+++ /dev/null
@@ -1,61 +0,0 @@
-package joshua.decoder.ff.tm;
-
-/***
- * A class for reading in rules from a Moses phrase table. Most of the conversion work is done
- * in {@link joshua.decoder.ff.tm.format.MosesFormatReader}. This includes prepending every
- * rule with a nonterminal, so that the phrase-based decoder can assume the same hypergraph
- * format as the hierarchical decoder (by pretending to be a strictly left-branching grammar and
- * dispensing with the notion of coverage spans). However, prepending the nonterminals means all
- * the alignments are off by 1. We do not want to fix those when reading in due to the expense,
- * so instead we use this rule which adjust the alignments on the fly.
- *
- * Also, we only convert the Moses dense features on the fly, via this class.
- *
- * TODO: this class should also be responsible for prepending the nonterminals.
- *
- * @author Matt Post
- *
- */
-public class MosesPhraseRule extends Rule {
-
- private String mosesFeatureString = null;
-
- public MosesPhraseRule(int lhs, int[] french, int[] english, String sparse_features, int arity,
- String alignment) {
- super(lhs, french, english, null, arity, alignment);
- mosesFeatureString = sparse_features;
- }
-
- /**
- * Moses features are probabilities; we need to convert them here by taking the negative log prob.
- * We do this only when the rule is used to amortize.
- */
- @Override
- public String getFeatureString() {
- if (sparseFeatures == null) {
- StringBuffer values = new StringBuffer();
- for (String value: mosesFeatureString.split(" ")) {
- float f = Float.parseFloat(value);
- values.append(String.format("%f ", f <= 0.0 ? -100 : -Math.log(f)));
- }
- sparseFeatures = values.toString().trim();
- }
- return sparseFeatures;
- }
-
- /**
- * This is the exact same as the parent implementation, but we need to add 1 to each alignment
- * point to account for the nonterminal [X] that was prepended to each rule.
- */
- @Override
- public byte[] getAlignment() {
- if (alignment == null) {
- String[] tokens = getAlignmentString().split("[-\\s]+");
- alignment = new byte[tokens.length + 2];
- alignment[0] = alignment[1] = 0;
- for (int i = 0; i < tokens.length; i++)
- alignment[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
- }
- return alignment;
- }
-}
diff --git a/src/joshua/decoder/ff/tm/PhraseRule.java b/src/joshua/decoder/ff/tm/PhraseRule.java
new file mode 100644
index 0000000..e42a7ee
--- /dev/null
+++ b/src/joshua/decoder/ff/tm/PhraseRule.java
@@ -0,0 +1,61 @@
+package joshua.decoder.ff.tm;
+
+/***
+ * A class for reading in rules from a Moses phrase table. Most of the conversion work is done
+ * in {@link joshua.decoder.ff.tm.format.PhraseFormatReader}. This includes prepending every
+ * rule with a nonterminal, so that the phrase-based decoder can assume the same hypergraph
+ * format as the hierarchical decoder (by pretending to be a strictly left-branching grammar and
+ * dispensing with the notion of coverage spans). However, prepending the nonterminals means all
+ * the alignments are off by 1. We do not want to fix those when reading in due to the expense,
+ * so instead we use this rule which adjust the alignments on the fly.
+ *
+ * Also, we only convert the Moses dense features on the fly, via this class.
+ *
+ * TODO: this class should also be responsible for prepending the nonterminals.
+ *
+ * @author Matt Post
+ *
+ */
+public class PhraseRule extends Rule {
+
+ private String mosesFeatureString = null;
+
+ public PhraseRule(int lhs, int[] french, int[] english, String sparse_features, int arity,
+ String alignment) {
+ super(lhs, french, english, null, arity, alignment);
+ mosesFeatureString = sparse_features;
+ }
+
+ /**
+ * Moses features are probabilities; we need to convert them here by taking the negative log prob.
+ * We do this only when the rule is used to amortize.
+ */
+ @Override
+ public String getFeatureString() {
+ if (sparseFeatureString == null) {
+ StringBuffer values = new StringBuffer();
+ for (String value: mosesFeatureString.split(" ")) {
+ float f = Float.parseFloat(value);
+ values.append(String.format("%f ", f <= 0.0 ? -100 : -Math.log(f)));
+ }
+ sparseFeatureString = values.toString().trim();
+ }
+ return sparseFeatureString;
+ }
+
+ /**
+ * This is the exact same as the parent implementation, but we need to add 1 to each alignment
+ * point to account for the nonterminal [X] that was prepended to each rule.
+ */
+ @Override
+ public byte[] getAlignment() {
+ if (alignment == null) {
+ String[] tokens = getAlignmentString().split("[-\\s]+");
+ alignment = new byte[tokens.length + 2];
+ alignment[0] = alignment[1] = 0;
+ for (int i = 0; i < tokens.length; i++)
+ alignment[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
+ }
+ return alignment;
+ }
+}
diff --git a/src/joshua/decoder/ff/tm/Rule.java b/src/joshua/decoder/ff/tm/Rule.java
index cf80db6..c701ac6 100644
--- a/src/joshua/decoder/ff/tm/Rule.java
+++ b/src/joshua/decoder/ff/tm/Rule.java
@@ -38,7 +38,8 @@
protected int arity;
// And a string containing the sparse ones
- protected String sparseFeatures;
+ protected FeatureVector features = null;
+ protected String sparseFeatureString;
/*
* a feature function will be fired for this rule only if the owner of the rule matches the owner
@@ -79,7 +80,7 @@
int owner) {
this.lhs = lhs;
this.pFrench = sourceRhs;
- this.sparseFeatures = sparseFeatures;
+ this.sparseFeatureString = sparseFeatures;
this.arity = arity;
this.owner = owner;
this.english = targetRhs;
@@ -89,7 +90,7 @@
public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) {
this.lhs = lhs;
this.pFrench = sourceRhs;
- this.sparseFeatures = sparseFeatures;
+ this.sparseFeatureString = sparseFeatures;
this.arity = arity;
this.owner = -1;
this.english = targetRhs;
@@ -201,10 +202,11 @@
* sparse (labeled) ones, but it's not required.
*/
- FeatureVector features = (owner != -1)
+ if (features == null)
+ features = (owner != -1)
? new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_")
: new FeatureVector();
-
+
return features;
}
@@ -237,33 +239,26 @@
return precomputableCost;
}
+ public float getDenseFeature(int k) {
+ return getFeatureVector().getDense(k);
+ }
+
public void setPrecomputableCost(float[] phrase_weights, FeatureVector weights) {
- int denseFeatureIndex = 0;
float cost = 0.0f;
-
- if (!getFeatureString().trim().equals("")) {
- StringTokenizer st = new StringTokenizer(getFeatureString());
- while (st.hasMoreTokens()) {
- String token = st.nextToken();
- if (token.indexOf('=') == -1) {
-// System.err.println(String.format("VALUE(%s) = %.5f", token, -Float.parseFloat(token)));
- try {
- cost += phrase_weights[denseFeatureIndex++] * -Float.parseFloat(token);
- } catch (java.lang.ArrayIndexOutOfBoundsException e) {
- /* This occurs if there are more values stored in the rule than there are weights
- * found in the config file. Consistent with treating unfound weights as have a value
- * of 0, we just skip it here.
- */
- ;
- }
- } else {
- int splitPoint = token.indexOf('=');
- String name = token.substring(0, splitPoint);
- float value = Float.parseFloat(token.substring(splitPoint + 1));
- cost += weights.get(name) * value;
- }
- }
+
+// System.err.println(String.format("// Setting precomputable cost for for %s/%s", getEnglishWords(), getFrenchWords()));
+ FeatureVector features = getFeatureVector();
+ for (int i = 0; i < features.getDenseFeatures().size() && i < phrase_weights.length; i++) {
+// System.err.println(String.format(" %d -> %.5f", i, features.get(i)));
+ cost += phrase_weights[i] * features.getDense(i);
}
+
+ for (String key: features.getSparseFeatures().keySet()) {
+// System.err.println(String.format(" %s -> %.5f", key, features.get(key)));
+ cost += weights.getSparse(key) * features.getSparse(key);
+ }
+
+// System.err.println(String.format("-> %f", cost));
this.precomputableCost = cost;
}
@@ -352,7 +347,7 @@
}
public String getFeatureString() {
- return sparseFeatures;
+ return sparseFeatureString;
}
/**
diff --git a/src/joshua/decoder/ff/tm/format/MosesFormatReader.java b/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
deleted file mode 100644
index 93badba..0000000
--- a/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ /dev/null
@@ -1,91 +0,0 @@
-package joshua.decoder.ff.tm.format;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.tm.MosesPhraseRule;
-import joshua.util.io.LineReader;
-
-/***
- * This class reads in the Moses phrase table format, with support for the source and target side,
- * list of features, and word alignments. It works by simply casting the phrase-based rules to
- * left-branching hierarchical rules and passing them on to its parent class, {@HieroFormatReader}.
- *
- * There is also a tool to convert the grammars directly, so that they can be suitably packed. Usage:
- *
- * <pre>
- * cat PHRASE_TABLE | java -cp $JOSHUA/class joshua.decoder.ff.tm.format.PhraseFormatReader > grammar
- * </pre>
- *
- * @author Matt Post <post@cs.jhu.edu>
- *
- */
-
-public class MosesFormatReader extends HieroFormatReader {
-
- private int lhs;
-
- public MosesFormatReader(String grammarFile) {
- super(grammarFile);
- this.lhs = Vocabulary.id("[X]");
- }
-
- public MosesFormatReader() {
- super();
- this.lhs = Vocabulary.id("[X]");
- }
-
- /**
- * This munges a Moses-style phrase table into a grammar.
- *
- * mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
- *
- * becomes
- *
- * [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
- *
- */
- @Override
- public MosesPhraseRule parseLine(String line) {
- String[] fields = line.split(fieldDelimiter);
-
- int arity = 1;
-
- // foreign side
- String[] foreignWords = fields[0].split("\\s+");
- int[] french = new int[foreignWords.length + 1];
- french[0] = lhs;
- for (int i = 0; i < foreignWords.length; i++) {
- french[i+1] = Vocabulary.id(foreignWords[i]);
- }
-
- // English side
- String[] englishWords = fields[1].split("\\s+");
- int[] english = new int[englishWords.length + 1];
- english[0] = -1;
- for (int i = 0; i < englishWords.length; i++) {
- english[i+1] = Vocabulary.id(englishWords[i]);
- }
-
- // transform feature values
- String sparse_features = fields[2];
-
-// System.out.println(String.format("parseLine: %s\n ->%s", line, sparse_features));
-
- // alignments
- String alignment = (fields.length > 3) ? fields[3] : null;
-
- return new MosesPhraseRule(lhs, french, english, sparse_features, arity, alignment);
- }
-
- /**
- * Converts a Moses phrase table to a Joshua grammar.
- *
- * @param args
- */
- public static void main(String[] args) {
- MosesFormatReader reader = new MosesFormatReader();
- for (String line: new LineReader(System.in)) {
- MosesPhraseRule rule = reader.parseLine(line);
- System.out.println(rule.textFormat());
- }
- }
-}
diff --git a/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java b/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java
new file mode 100644
index 0000000..1d15227
--- /dev/null
+++ b/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java
@@ -0,0 +1,110 @@
+package joshua.decoder.ff.tm.format;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.ff.tm.PhraseRule;
+import joshua.util.io.LineReader;
+
+/***
+ * This class reads in the Moses phrase table format, with support for the source and target side,
+ * list of features, and word alignments. It works by simply casting the phrase-based rules to
+ * left-branching hierarchical rules and passing them on to its parent class, {@HieroFormatReader}.
+ *
+ * There is also a tool to convert the grammars directly, so that they can be suitably packed. Usage:
+ *
+ * <pre>
+ * cat PHRASE_TABLE | java -cp $JOSHUA/class joshua.decoder.ff.tm.format.PhraseFormatReader > grammar
+ * </pre>
+ *
+ * @author Matt Post <post@cs.jhu.edu>
+ *
+ */
+
+public class PhraseFormatReader extends HieroFormatReader {
+
+ private int lhs;
+
+ /* Whether we are reading a Moses phrase table or Thrax phrase table */
+ private boolean moses_format = false;
+
+ public PhraseFormatReader(String grammarFile, boolean is_moses) {
+ super(grammarFile);
+ this.lhs = Vocabulary.id("[X]");
+ this.moses_format = is_moses;
+ }
+
+ public PhraseFormatReader() {
+ super();
+ this.lhs = Vocabulary.id("[X]");
+ }
+
+ /**
+ * When dealing with Moses format, this munges a Moses-style phrase table into a grammar.
+ *
+ * mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
+ *
+ * becomes
+ *
+ * [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
+ *
+ * For thrax-extracted phrasal grammars, it transforms
+ *
+ * [X] ||| mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
+ *
+ * into
+ *
+ * [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
+ */
+ @Override
+ public PhraseRule parseLine(String line) {
+ String[] fields = line.split(fieldDelimiter);
+
+ int arity = 1;
+
+ /* For Thrax phrase-based grammars, skip over the beginning nonterminal */
+ int fieldIndex = 0;
+ if (! moses_format)
+ fieldIndex++;
+
+ // foreign side
+ String[] foreignWords = fields[fieldIndex].split("\\s+");
+ int[] french = new int[foreignWords.length + 1];
+ french[0] = lhs;
+ for (int i = 0; i < foreignWords.length; i++) {
+ french[i+1] = Vocabulary.id(foreignWords[i]);
+ }
+
+ // English side
+ fieldIndex++;
+ String[] englishWords = fields[fieldIndex].split("\\s+");
+ int[] english = new int[englishWords.length + 1];
+ english[0] = -1;
+ for (int i = 0; i < englishWords.length; i++) {
+ english[i+1] = Vocabulary.id(englishWords[i]);
+ }
+
+ // transform feature values
+ fieldIndex++;
+ String sparse_features = fields[fieldIndex];
+
+// System.out.println(String.format("parseLine: %s\n ->%s", line, sparse_features));
+
+ // alignments
+ fieldIndex++;
+ String alignment = (fields.length > fieldIndex) ? fields[fieldIndex] : null;
+
+ return new PhraseRule(lhs, french, english, sparse_features, arity, alignment);
+ }
+
+ /**
+ * Converts a Moses phrase table to a Joshua grammar.
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ PhraseFormatReader reader = new PhraseFormatReader();
+ for (String line: new LineReader(System.in)) {
+ PhraseRule rule = reader.parseLine(line);
+ System.out.println(rule.textFormat());
+ }
+ }
+}
diff --git a/src/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index 33e56bd..0f67c27 100644
--- a/src/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -1,6 +1,6 @@
package joshua.decoder.ff.tm.hash_based;
-import java.io.IOException;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -15,7 +15,7 @@
import joshua.decoder.ff.tm.GrammarReader;
import joshua.decoder.ff.tm.Trie;
import joshua.decoder.ff.tm.format.HieroFormatReader;
-import joshua.decoder.ff.tm.format.MosesFormatReader;
+import joshua.decoder.ff.tm.format.PhraseFormatReader;
import joshua.decoder.ff.tm.format.SamtFormatReader;
import joshua.util.FormatUtils;
@@ -41,6 +41,8 @@
/* The number of distinct source sides. */
private int qtyRuleBins = 0;
+ private int numDenseFeatures = 0;
+
/* The trie root. */
private MemoryBasedTrie root = null;
@@ -48,7 +50,7 @@
private String grammarFile;
private GrammarReader<Rule> modelReader;
-
+
/* Whether the grammar's rules contain regular expressions. */
private boolean isRegexpGrammar = false;
@@ -71,7 +73,7 @@
this.owner = Vocabulary.id(owner);
}
- public MemoryBasedBatchGrammar(GrammarReader<Rule> gr,JoshuaConfiguration joshuaConfiguration) {
+ public MemoryBasedBatchGrammar(GrammarReader<Rule> gr, JoshuaConfiguration joshuaConfiguration) {
// this.defaultOwner = Vocabulary.id(defaultOwner);
// this.defaultLHS = Vocabulary.id(defaultLHSSymbol);
this(joshuaConfiguration);
@@ -79,7 +81,8 @@
}
public MemoryBasedBatchGrammar(String formatKeyword, String grammarFile, String owner,
- String defaultLHSSymbol, int spanLimit, JoshuaConfiguration joshuaConfiguration) throws IOException {
+ String defaultLHSSymbol, int spanLimit, JoshuaConfiguration joshuaConfiguration)
+ throws IOException {
this(joshuaConfiguration);
this.owner = Vocabulary.id(owner);
@@ -87,7 +90,7 @@
this.spanLimit = spanLimit;
this.grammarFile = grammarFile;
this.setRegexpGrammar(formatKeyword.equals("regexp"));
-
+
// ==== loading grammar
this.modelReader = createReader(formatKeyword, grammarFile);
if (modelReader != null) {
@@ -98,7 +101,7 @@
}
} else {
Decoder.LOG(1, "Couldn't create a GrammarReader for file " + grammarFile + " with format "
- + formatKeyword);
+ + formatKeyword);
}
this.printGrammar();
@@ -112,7 +115,7 @@
} else if ("samt".equals(format)) {
return new SamtFormatReader(grammarFile);
} else if ("phrase".equals(format) || "moses".equals(format)) {
- return new MosesFormatReader(grammarFile);
+ return new PhraseFormatReader(grammarFile, format.equals("moses"));
} else {
throw new RuntimeException(String.format("* FATAL: unknown grammar format '%s'", format));
}
@@ -120,7 +123,6 @@
return null;
}
-
// ===============================================================
// Methods
// ===============================================================
@@ -128,7 +130,7 @@
public void setSpanLimit(int spanLimit) {
this.spanLimit = spanLimit;
}
-
+
@Override
public int getNumRules() {
return this.qtyRulesRead;
@@ -147,7 +149,8 @@
if (this.spanLimit == -1) { // mono-glue grammar
return (i == 0);
} else {
-// System.err.println(String.format("%s HASRULEFORSPAN(%d,%d,%d)/%d = %s", Vocabulary.word(this.owner), i, j, pathLength, spanLimit, pathLength <= this.spanLimit));
+ // System.err.println(String.format("%s HASRULEFORSPAN(%d,%d,%d)/%d = %s",
+ // Vocabulary.word(this.owner), i, j, pathLength, spanLimit, pathLength <= this.spanLimit));
return (pathLength <= this.spanLimit);
}
}
@@ -163,19 +166,22 @@
// TODO: Why two increments?
this.qtyRulesRead++;
-
-// if (owner == -1) {
-// System.err.println("* FATAL: MemoryBasedBatchGrammar::addRule(): owner not set for grammar");
-// System.exit(1);
-// }
+
+ // if (owner == -1) {
+ // System.err.println("* FATAL: MemoryBasedBatchGrammar::addRule(): owner not set for grammar");
+ // System.exit(1);
+ // }
rule.setOwner(owner);
+ if (numDenseFeatures == 0)
+ numDenseFeatures = rule.getFeatureVector().getDenseFeatures().size();
+
// === identify the position, and insert the trie nodes as necessary
MemoryBasedTrie pos = root;
int[] french = rule.getFrench();
-
+
maxSourcePhraseLength = Math.max(maxSourcePhraseLength, french.length);
-
+
for (int k = 0; k < french.length; k++) {
int curSymID = french[k];
@@ -207,7 +213,8 @@
}
protected void printGrammar() {
- Decoder.LOG(1, String.format("MemoryBasedBatchGrammar: Read %d rules with %d distinct source sides from '%s'",
+ Decoder.LOG(1, String.format(
+ "MemoryBasedBatchGrammar: Read %d rules with %d distinct source sides from '%s'",
this.qtyRulesRead, this.qtyRuleBins, grammarFile));
}
@@ -221,7 +228,7 @@
public boolean isRegexpGrammar() {
return this.isRegexpGrammar;
}
-
+
public void setRegexpGrammar(boolean value) {
this.isRegexpGrammar = value;
}
@@ -234,58 +241,60 @@
*/
@Override
public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) {
-
- // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now almost
+
+ // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now
+ // almost
// certainly is)
- final int targetWord = this.joshuaConfiguration.mark_oovs
- ? Vocabulary.id(Vocabulary.word(sourceWord) + "_OOV")
- : sourceWord;
+ final int targetWord = this.joshuaConfiguration.mark_oovs ? Vocabulary.id(Vocabulary
+ .word(sourceWord) + "_OOV") : sourceWord;
int[] sourceWords = { sourceWord };
int[] targetWords = { targetWord };
final String oovAlignment = "0-0";
-
+
if (this.joshuaConfiguration.oovList != null && this.joshuaConfiguration.oovList.size() != 0) {
- for (OOVItem item: this.joshuaConfiguration.oovList) {
- Rule oovRule = new Rule(
- Vocabulary.id(item.label), sourceWords, targetWords, "", 0,
+ for (OOVItem item : this.joshuaConfiguration.oovList) {
+ Rule oovRule = new Rule(Vocabulary.id(item.label), sourceWords, targetWords, "", 0,
oovAlignment);
addRule(oovRule);
oovRule.estimateRuleCost(featureFunctions);
}
} else {
int nt_i = Vocabulary.id(this.joshuaConfiguration.default_non_terminal);
- Rule oovRule = new Rule(nt_i, sourceWords, targetWords, "", 0,
- oovAlignment);
+ Rule oovRule = new Rule(nt_i, sourceWords, targetWords, "", 0, oovAlignment);
addRule(oovRule);
oovRule.estimateRuleCost(featureFunctions);
}
}
-
+
/**
* Adds a default set of glue rules.
*
- * @param featureFunctions
+ * @param featureFunctions
*/
public void addGlueRules(ArrayList<FeatureFunction> featureFunctions) {
HieroFormatReader reader = new HieroFormatReader();
- String goalNT = FormatUtils.cleanNonterminal(joshuaConfiguration.goal_symbol);
- String defaultNT = FormatUtils.cleanNonterminal(joshuaConfiguration.default_non_terminal);
-
+ String goalNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
+ String defaultNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.default_non_terminal);
+
String[] ruleStrings = new String[] {
String.format("[%s] ||| %s ||| %s ||| 0", goalNT, Vocabulary.START_SYM,
Vocabulary.START_SYM),
- String.format("[%s] ||| [%s,1] [%s,2] ||| [%s,1] [%s,2] ||| -1",
- goalNT, goalNT, defaultNT, goalNT, defaultNT),
- String.format("[%s] ||| [%s,1] %s ||| [%s,1] %s ||| 0",
- goalNT, goalNT, Vocabulary.STOP_SYM, goalNT, Vocabulary.STOP_SYM)
- };
-
- for (String ruleString: ruleStrings) {
+ String.format("[%s] ||| [%s,1] [%s,2] ||| [%s,1] [%s,2] ||| -1", goalNT, goalNT, defaultNT,
+ goalNT, defaultNT),
+ String.format("[%s] ||| [%s,1] %s ||| [%s,1] %s ||| 0", goalNT, goalNT,
+ Vocabulary.STOP_SYM, goalNT, Vocabulary.STOP_SYM) };
+
+ for (String ruleString : ruleStrings) {
Rule rule = reader.parseLine(ruleString);
addRule(rule);
rule.estimateRuleCost(featureFunctions);
}
}
+
+ @Override
+ public int getNumDenseFeatures() {
+ return numDenseFeatures;
+ }
}
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 0bc9e79..7a3506e 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -28,25 +28,40 @@
* shared vocabulary, and then rely on Joshua's ability to query multiple grammars for rules to
* solve this problem. This is not currently implemented but could be done directly in the
* Grammar Packer.
+ *
+ * *UPDATE 10/2015*
+ * The introduction of a SliceAggregatingTrie together with sorting the grammar by the full source string
+ * (not just by the first source word) allows distributing rules with the same first source word
+ * across multiple slices.
+ * @author fhieber
*/
+import static java.util.Collections.sort;
+
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.IntBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import joshua.corpus.Vocabulary;
import joshua.decoder.Decoder;
@@ -59,6 +74,7 @@
import joshua.decoder.ff.tm.RuleCollection;
import joshua.decoder.ff.tm.Trie;
import joshua.decoder.ff.tm.hash_based.ExtensionIterator;
+import joshua.decoder.ff.tm.packed.SliceAggregatingTrie;
import joshua.util.encoding.EncoderConfiguration;
import joshua.util.encoding.FloatEncoder;
import joshua.util.io.LineReader;
@@ -69,6 +85,9 @@
private PackedRoot root;
private ArrayList<PackedSlice> slices;
+ private final File vocabFile; // store path to vocabulary file
+
+ public static final String VOCABULARY_FILENAME = "vocabulary";
// The grammar specification keyword (e.g., "thrax" or "moses")
private String type;
@@ -80,9 +99,11 @@
this.type = type;
// Read the vocabulary.
- String vocabFile = grammar_dir + File.separator + "vocabulary";
+ vocabFile = new File(grammar_dir + File.separator + VOCABULARY_FILENAME);
Decoder.LOG(1, String.format("Reading vocabulary: %s", vocabFile));
- Vocabulary.read(vocabFile);
+ if (!Vocabulary.read(vocabFile)) {
+ throw new RuntimeException("mismatches or collisions while reading on-disk vocabulary");
+ }
// Read the config
String configFile = grammar_dir + File.separator + "config";
@@ -99,17 +120,18 @@
// Set phrase owner.
this.owner = Vocabulary.id(owner);
- String[] listing = new File(grammar_dir).list();
+ final List<String> listing = Arrays.asList(new File(grammar_dir).list());
+ sort(listing); // File.list() has arbitrary sort order
slices = new ArrayList<PackedSlice>();
- for (int i = 0; i < listing.length; i++) {
- if (listing[i].startsWith("slice_") && listing[i].endsWith(".source"))
- slices.add(new PackedSlice(grammar_dir + File.separator + listing[i].substring(0, 11)));
+ for (String prefix : listing) {
+ if (prefix.startsWith("slice_") && prefix.endsWith(".source"))
+ slices.add(new PackedSlice(grammar_dir + File.separator + prefix.substring(0, 11)));
}
long count = 0;
for (PackedSlice s : slices)
count += s.estimated.length;
- root = new PackedRoot(this);
+ root = new PackedRoot(slices);
Decoder.LOG(1, String.format("Loaded %d rules", count));
}
@@ -132,30 +154,121 @@
return num_rules;
}
+ @Override
+ public int getNumDenseFeatures() {
+ return encoding.getNumDenseFeatures();
+ }
+
public Rule constructManualRule(int lhs, int[] src, int[] tgt, float[] scores, int arity) {
return null;
}
+
+ /**
+ * Computes the MD5 checksum of the vocabulary file.
+ * Can be used for comparing vocabularies across multiple packedGrammars.
+ */
+ public String computeVocabularyChecksum() {
+ MessageDigest md;
+ try {
+ md = MessageDigest.getInstance("MD5");
+ } catch (NoSuchAlgorithmException e) {
+ throw new RuntimeException("Unknown checksum algorithm");
+ }
+ byte[] buffer = new byte[1024];
+ try (final InputStream is = Files.newInputStream(Paths.get(vocabFile.toString()));
+ DigestInputStream dis = new DigestInputStream(is, md)) {
+ while (dis.read(buffer) != -1) {}
+ } catch (IOException e) {
+ throw new RuntimeException("Can not find vocabulary file. This should not happen.");
+ }
+ byte[] digest = md.digest();
+ // convert the byte to hex format
+ StringBuffer sb = new StringBuffer("");
+ for (int i = 0; i < digest.length; i++) {
+ sb.append(Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1));
+ }
+ return sb.toString();
+ }
+ /**
+ * PackedRoot represents the root of the packed grammar trie.
+ * Tries for different source-side firstwords are organized in
+ * packedSlices on disk. A packedSlice can contain multiple trie
+ * roots (i.e. multiple source-side firstwords).
+ * The PackedRoot builds a lookup table, mapping from
+ * source-side firstwords to the addresses in the packedSlices
+ * that represent the subtrie for a particular firstword.
+ * If the GrammarPacker has to distribute rules for a
+ * source-side firstword across multiple slices, a
+ * SliceAggregatingTrie node is created that aggregates those
+ * tries to hide
+ * this additional complexity from the grammar interface
+ * This feature allows packing of grammars where the list of rules
+ * for a single source-side firstword would exceed the maximum array
+ * size of Java (2gb).
+ */
public final class PackedRoot implements Trie {
- private HashMap<Integer, PackedSlice> lookup;
+ private final HashMap<Integer, Trie> lookup;
- public PackedRoot(PackedGrammar grammar) {
- lookup = new HashMap<Integer, PackedSlice>();
-
- for (PackedSlice ps : grammar.slices) {
- int num_children = ps.source[0];
- for (int i = 0; i < num_children; i++)
- lookup.put(ps.source[2 * i + 1], ps);
+ public PackedRoot(final List<PackedSlice> slices) {
+ final Map<Integer, List<Trie>> childTries = collectChildTries(slices);
+ lookup = buildLookupTable(childTries);
+ }
+
+ /**
+ * Determines whether trie nodes for source first-words are spread over
+ * multiple packedSlices by counting their occurrences.
+ * @param slices
+ * @return A mapping from first word ids to a list of trie nodes.
+ */
+ private Map<Integer, List<Trie>> collectChildTries(final List<PackedSlice> slices) {
+ final Map<Integer, List<Trie>> childTries = new HashMap<>();
+ for (PackedSlice packedSlice : slices) {
+
+ // number of tries stored in this packedSlice
+ final int num_children = packedSlice.source[0];
+ for (int i = 0; i < num_children; i++) {
+ final int id = packedSlice.source[2 * i + 1];
+
+ /* aggregate tries with same root id
+ * obtain a Trie node, already at the correct address in the packedSlice.
+ * In other words, the lookup index already points to the correct trie node in the packedSlice.
+ * packedRoot.match() thus can directly return the result of lookup.get(id);
+ */
+ if (!childTries.containsKey(id)) {
+ childTries.put(id, new ArrayList<Trie>(1));
+ }
+ final Trie trie = packedSlice.root().match(id);
+ childTries.get(id).add(trie);
+ }
}
+ return childTries;
+ }
+
+ /**
+ * Build a lookup table for children tries.
+ * If the list contains only a single child node, a regular trie node
+ * is inserted into the table; otherwise a SliceAggregatingTrie node is
+ * created that hides this partitioning into multiple packedSlices
+ * upstream.
+ */
+ private HashMap<Integer,Trie> buildLookupTable(final Map<Integer, List<Trie>> childTries) {
+ HashMap<Integer,Trie> lookup = new HashMap<>(childTries.size());
+ for (int id : childTries.keySet()) {
+ final List<Trie> tries = childTries.get(id);
+ if (tries.size() == 1) {
+ lookup.put(id, tries.get(0));
+ } else {
+ lookup.put(id, new SliceAggregatingTrie(tries));
+ }
+ }
+ return lookup;
}
@Override
public Trie match(int word_id) {
- PackedSlice ps = lookup.get(word_id);
- if (ps != null)
- return ps.root().match(word_id);
- return null;
+ return lookup.get(word_id);
}
@Override
@@ -165,19 +278,12 @@
@Override
public HashMap<Integer, ? extends Trie> getChildren() {
- HashMap<Integer, Trie> children = new HashMap<Integer, Trie>();
- for (int key : lookup.keySet())
- children.put(key, match(key));
- return children;
+ return lookup;
}
@Override
public ArrayList<? extends Trie> getExtensions() {
- ArrayList<Trie> tries = new ArrayList<Trie>();
- for (int key : lookup.keySet()) {
- tries.add(match(key));
- }
- return tries;
+ return new ArrayList<>(lookup.values());
}
@Override
@@ -221,6 +327,9 @@
private MappedByteBuffer alignments;
private int[] alignmentLookup;
+ /**
+ * Provides a cache of packedTrie nodes to be used in getTrie.
+ */
private HashMap<Integer, PackedTrie> tries;
public PackedSlice(String prefix) throws IOException {
@@ -515,7 +624,7 @@
ArrayList<Rule> rules = new ArrayList<Rule>(num_rules);
for (int i = 0; i < num_rules; i++) {
- if (type.equals("moses"))
+ if (type.equals("moses") || type.equals("phrase"))
rules.add(new PackedPhrasePair(rule_position + 3 * i));
else
rules.add(new PackedRule(rule_position + 3 * i));
@@ -785,7 +894,7 @@
@Override
public FeatureVector getFeatureVector() {
if (features == null) {
- features = new FeatureVector(getFeatures(source[address + 2]), "");
+ features = new FeatureVector(getFeatures(source[address + 2]), "tm_" + Vocabulary.word(owner) + "_");
}
return features;
diff --git a/src/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java b/src/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
new file mode 100644
index 0000000..62783fd
--- /dev/null
+++ b/src/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
@@ -0,0 +1,217 @@
+package joshua.decoder.ff.tm.packed;
+
+import static java.util.Collections.emptyList;
+import static java.util.Collections.unmodifiableList;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.ff.tm.RuleCollection;
+import joshua.decoder.ff.tm.Trie;
+import joshua.decoder.ff.tm.hash_based.ExtensionIterator;
+
+/**
+ * SliceAggregatingTrie collapses multiple tries
+ * with the same source root (i.e. tries from multiple packed slices).
+ *
+ * Consider the example below.
+ * Without SliceAggregatingTries, the following grammar rules could have only
+ * partitioned by splitting rule lists when the first word of SOURCE changes. (">" markers).
+ *
+ * Using a SliceAggregatingTrie allows splitting at changes of second SOURCE words (">>" marker).
+ *
+ * EXAMPLE: (LHS ||| SOURCE ||| TARGET)
+ * [X] ||| - ||| -
+ * >
+ * [X] ||| [X] ||| [X]
+ * >>
+ * [X] ||| [X] a ||| [X] A
+ * [X] ||| [X] a ||| [X] A
+ * >>
+ * [X] ||| [X] b ||| [X] B
+ * >
+ * [X] ||| u ||| u
+ *
+ * A SliceAggregatingTrie node behaves just like a regular Trie node but subsumes a list of extensions/children.
+ * This class hides the complexity of having multiple tries with the same root
+ * from nodes one level up.
+ * Similar to PackedRoot, it maintains a lookup table of children's
+ * source-side words to know
+ * in which subtrie (i.e. packedSlice) it needs to traverse into when
+ * match() is called.
+ * A SliceAggregatingTrie never holds any rules associated with it, thus
+ * rules with the source-side represented by the SliceAggregatingTrie node
+ * must be found in exactly one of the subtries.
+ * (!) This assumption relies on the sort order of the packed grammar.
+ * If the grammar was incorrectly sorted and then packed, construction
+ * of SliceAggregatingTrie nodes fails.
+ *
+ * @author fhieber
+ */
+public class SliceAggregatingTrie implements Trie, RuleCollection {
+
+ /**
+ * A multitude of packedTries with the same source-side
+ * firstword. The order is induced by the
+ * sorting order of the text grammar that was input to the GrammarPacker.
+ * This implies that rules for the node represented by this SliceAggregatingTrie
+ * instance must be found in ONE of the sub tries.
+ * This is checked below in the constructor.
+ */
+ private final List<Trie> tries;
+ /** reference to the only subtrie that can contain rules. Set by buildLookupTable() */
+ private Trie trieWithRules = null;
+
+ /** Maintains an index of all children of all sub tries */
+ private final HashMap<Integer, Trie> lookup = new HashMap<>();
+
+ public SliceAggregatingTrie(final List<Trie> tries) {
+ if (tries == null || tries.isEmpty()) {
+ throw new RuntimeException(
+ "SliceAggregatingTrie node requires at least one packedTrie");
+ }
+ this.tries = unmodifiableList(tries);
+ buildLookupTable();
+ }
+
+ /**
+ * Fills the lookup table for child nodes.
+ * Also performs various checks to ensure correctness of the
+ * PackedTrie aggregation.
+ */
+ private void buildLookupTable() {
+ final Set<Integer> seen_child_ids = new HashSet<>();
+ Trie previous_trie = null;
+ boolean first = true;
+ for (final Trie trie : this.tries) {
+ /*
+ * perform some checks to make sure tries are correctly split.
+ */
+ if (!first) {
+ if (!haveSameSourceSide(previous_trie, trie) || !haveSameArity(previous_trie, trie)) {
+ throw new RuntimeException("SliceAggregatingTrie's subtries differ in sourceSide or arity. Was the text grammar sorted insufficiently?");
+ }
+ } else {
+ first = false;
+ }
+ previous_trie = trie;
+
+ if (trie.hasRules()) {
+ if (trieWithRules != null) {
+ throw new RuntimeException("SliceAggregatingTrie can only have one subtrie with rules. Was the text grammar sorted insufficiently?");
+ }
+ trieWithRules = trie;
+ }
+
+ final HashMap<Integer, ? extends Trie> children = trie.getChildren();
+ for (int id : children.keySet()) {
+ if (seen_child_ids.contains(id)) {
+ throw new RuntimeException("SliceAggregatingTrie's subtries contain non-disjoint child words. Was the text grammar sorted insufficiently?");
+ }
+ seen_child_ids.add(id);
+ lookup.put(id, children.get(id));
+ }
+ }
+ }
+
+ private boolean haveSameSourceSide(final Trie t1, final Trie t2) {
+ return Arrays.equals(
+ t1.getRuleCollection().getSourceSide(),
+ t2.getRuleCollection().getSourceSide());
+ }
+
+ private boolean haveSameArity(final Trie t1, final Trie t2) {
+ return t1.getRuleCollection().getArity() == t2.getRuleCollection().getArity();
+ }
+
+ @Override
+ public Trie match(int wordId) {
+ return lookup.get(wordId);
+ }
+
+ @Override
+ public boolean hasExtensions() {
+ return !lookup.isEmpty();
+ }
+
+ @Override
+ public Collection<? extends Trie> getExtensions() {
+ return new ArrayList<>(lookup.values());
+ }
+
+ @Override
+ public HashMap<Integer, ? extends Trie> getChildren() {
+ return lookup;
+ }
+
+ @Override
+ public Iterator<Integer> getTerminalExtensionIterator() {
+ return new ExtensionIterator(lookup, true);
+ }
+
+ @Override
+ public Iterator<Integer> getNonterminalExtensionIterator() {
+ return new ExtensionIterator(lookup, true);
+ }
+
+ @Override
+ public RuleCollection getRuleCollection() {
+ return this;
+ }
+
+ /*
+ * The following method's return values depend on whether there is
+ * a single subtrie encoding rules (trieWithRules).
+ * All other subtries can only contain rules some levels deeper.
+ */
+
+ @Override
+ public boolean hasRules() {
+ return trieWithRules == null ? false : trieWithRules.hasRules();
+ }
+
+ @Override
+ public List<Rule> getRules() {
+ if (!hasRules()) {
+ return emptyList();
+ }
+ return trieWithRules.getRuleCollection().getRules();
+ }
+
+ @Override
+ public List<Rule> getSortedRules(List<FeatureFunction> models) {
+ if (!hasRules()) {
+ return emptyList();
+ }
+ return trieWithRules.getRuleCollection().getSortedRules(models);
+ }
+
+ @Override
+ public boolean isSorted() {
+ return !hasRules() ? false : trieWithRules.getRuleCollection().isSorted();
+ }
+
+ /*
+ * The constructor checked that all sub tries have the same arity and sourceSide.
+ * We can thus simply return the value from the first in list.
+ */
+
+ @Override
+ public int[] getSourceSide() {
+ return tries.get(0).getRuleCollection().getSourceSide();
+ }
+
+ @Override
+ public int getArity() {
+ return tries.get(0).getRuleCollection().getArity();
+ }
+
+}
diff --git a/src/joshua/decoder/hypergraph/KBestExtractor.java b/src/joshua/decoder/hypergraph/KBestExtractor.java
index bc0e6ae..ef17799 100644
--- a/src/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/joshua/decoder/hypergraph/KBestExtractor.java
@@ -626,7 +626,7 @@
* @return
*/
public float getCost() {
- return cost - weights.get("BLEU") * bleu;
+ return cost - weights.getSparse("BLEU") * bleu;
}
public String toString() {
@@ -901,9 +901,9 @@
if (word.startsWith("[") && word.endsWith("]"))
quotedWords += String.format("%s ", word);
else
- quotedWords += String.format(" \"%s\"", word);
+ quotedWords += String.format("\"%s\" ", word);
- return quotedWords.substring(1);
+ return quotedWords.substring(0, quotedWords.length() - 1);
}
@Override
diff --git a/src/joshua/decoder/phrase/ChartState.java b/src/joshua/decoder/phrase/ChartState.java
deleted file mode 100644
index 5cd0c16..0000000
--- a/src/joshua/decoder/phrase/ChartState.java
+++ /dev/null
@@ -1,13 +0,0 @@
-package joshua.decoder.phrase;
-
-public class ChartState {
-
- public Left left;
- public Right right;
-
- public ChartState() {
- left = new Left();
- right = new Right();
- }
-
-}
diff --git a/src/joshua/decoder/phrase/ContextBase.java b/src/joshua/decoder/phrase/ContextBase.java
deleted file mode 100644
index d9f981f..0000000
--- a/src/joshua/decoder/phrase/ContextBase.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package joshua.decoder.phrase;
-
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
-
-public class ContextBase {
- private JoshuaConfiguration config;
-
- public ContextBase(JoshuaConfiguration config) {
- this.config = config;
- }
-
- public int PopLimit() {
- return config.pop_limit;
- }
-
- public float LMWeight() {
- return Decoder.weights.get("lm_0");
- }
-
- public JoshuaConfiguration GetConfig() {
- return config;
- }
-}
diff --git a/src/joshua/decoder/phrase/Left.java b/src/joshua/decoder/phrase/Left.java
deleted file mode 100644
index 2d1d6d7..0000000
--- a/src/joshua/decoder/phrase/Left.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package joshua.decoder.phrase;
-
-public class Left implements State {
-
- public long[] pointers; // KENLM_MAX_ORDER - 1
- public byte length;
- public boolean full;
-
- public byte getLength() {
- return length;
- }
-
- public long identify(byte index) {
- return pointers[index];
- }
-
-}
diff --git a/src/joshua/decoder/phrase/PhraseTable.java b/src/joshua/decoder/phrase/PhraseTable.java
index ef694a3..b335dc2 100644
--- a/src/joshua/decoder/phrase/PhraseTable.java
+++ b/src/joshua/decoder/phrase/PhraseTable.java
@@ -34,13 +34,13 @@
* @param config
* @throws IOException
*/
- public PhraseTable(String grammarFile, String owner, JoshuaConfiguration config, int maxSource)
+ public PhraseTable(String grammarFile, String owner, String type, JoshuaConfiguration config, int maxSource)
throws IOException {
this.config = config;
int spanLimit = 0;
if (new File(grammarFile).isDirectory()) {
- this.backend = new PackedGrammar(grammarFile, spanLimit, owner, "moses", config);
+ this.backend = new PackedGrammar(grammarFile, spanLimit, owner, type, config);
if (this.backend.getMaxSourcePhraseLength() == -1) {
System.err.println("FATAL: Using a packed grammar for a phrase table backend requires that you");
System.err.println(" packed the grammar with Joshua 6.0.2 or greater");
@@ -48,7 +48,7 @@
}
} else {
- this.backend = new MemoryBasedBatchGrammar("moses", grammarFile, owner, "[X]", spanLimit, config);
+ this.backend = new MemoryBasedBatchGrammar(type, grammarFile, owner, "[X]", spanLimit, config);
}
}
@@ -175,4 +175,9 @@
public int getOwner() {
return backend.getOwner();
}
+
+ @Override
+ public int getNumDenseFeatures() {
+ return backend.getNumDenseFeatures();
+ }
}
diff --git a/src/joshua/decoder/phrase/Right.java b/src/joshua/decoder/phrase/Right.java
deleted file mode 100644
index 8bf977a..0000000
--- a/src/joshua/decoder/phrase/Right.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package joshua.decoder.phrase;
-
-public class Right implements State {
-
- public int[] words; // c++ typedef uint WordIndex, KENLM_MAX_ORDER - 1
- public float[] backoff; // KENLM_MAX_ORDER - 1
- public byte length;
-
- public byte getLength() {
- return length;
- }
-
- public long identify(byte index) {
- return words[index];
- }
-
- @Override
- public boolean equals(Object obj) {
- if (! (obj instanceof Right))
- return false;
-
- Right other = (Right)obj;
- if (getLength() != other.getLength())
- return false;
- if (words.length != other.words.length)
- return false;
- for (int i = 0; i < words.length; i++)
- if (words[i] != other.words[i])
- return false;
-
- return true;
- }
-
- @Override
- public int hashCode() {
- return java.util.Arrays.hashCode(words) * length;
- }
-}
diff --git a/src/joshua/decoder/phrase/ScoreHistory.java b/src/joshua/decoder/phrase/ScoreHistory.java
deleted file mode 100644
index e340140..0000000
--- a/src/joshua/decoder/phrase/ScoreHistory.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package joshua.decoder.phrase;
-
-import java.util.Vector;
-
-class ScoreHistory {
- public Vector<Float> scores;
- float total;
-}
diff --git a/src/joshua/decoder/phrase/Scorer.java b/src/joshua/decoder/phrase/Scorer.java
deleted file mode 100644
index 1a22d77..0000000
--- a/src/joshua/decoder/phrase/Scorer.java
+++ /dev/null
@@ -1,76 +0,0 @@
-package joshua.decoder.phrase;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.ff.lm.kenlm.jni.KenLM;
-
-public class Scorer {
-
- private FeatureVector weights;
- private KenLM model;
-
- public Scorer(String lm_file, String weights_file) {
- weights = Decoder.weights;
-
- this.model = new KenLM(5, lm_file);
-
- // The global vocabulary needs to know about language models so that it can map from the
- // global IDs to the LM's private vocabulary
- Vocabulary.registerLanguageModel(this.model);
- System.err.println(String.format("Loaded a %d-gram language model from '%s'",
- this.model.getOrder(), lm_file));
- }
-
- public float parse(String features) {
- int index = 0;
- float sum = 0.0f;
- for (String valuestr : features.split(" ")) {
- float value = Float.parseFloat(valuestr);
- sum += value * weights.get(String.format("tm_%d", index));
- index++;
- }
-
- return sum;
- }
-
- public FeatureVector getWeights() {
- return weights;
- }
-
- public KenLM LanguageModel() {
- return model;
- }
-
- public float LM(int is) {
- return LM(new int[] { is });
- }
-
- /**
- * Provides preliminary scoring for a phrase by scoring each word with only words from that
- * phrase as history.
- *
- * @param is a sequence of word IDs
- * @param state the chart state (unused)
- * @return the language model probability of the phrase
- */
- public float LM(int[] is) {
- float prob = model.prob(is);
- System.err.println(String.format("prob(%s,%d) = %.3f", Vocabulary.getWords(is), is.length, prob));
- return prob;
- }
-
- public float TargetWordCount(int num_words) {
- return weights.get("target_word_insertion") * num_words;
- }
-
- public float transition(Hypothesis hypothesis, TargetPhrases phrases, int source_begin,
- int source_end) {
- int jump_size = Math.abs(hypothesis.LastSourceIndex() - source_begin);
- return (jump_size * weights.get("distortion"));
- }
-
- public float passThrough() {
- return -100.0f;
- }
-}
diff --git a/src/joshua/decoder/phrase/State.java b/src/joshua/decoder/phrase/State.java
deleted file mode 100644
index 8d6b5cd..0000000
--- a/src/joshua/decoder/phrase/State.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package joshua.decoder.phrase;
-
-public interface State {
- byte getLength();
-
- long identify(byte index);
-}
diff --git a/src/joshua/decoder/segment_file/Sentence.java b/src/joshua/decoder/segment_file/Sentence.java
index 3d79250..83230db 100644
--- a/src/joshua/decoder/segment_file/Sentence.java
+++ b/src/joshua/decoder/segment_file/Sentence.java
@@ -94,8 +94,8 @@
.replaceAll("\\]", "-rsb-")
.replaceAll("\\|", "-pipe-");
- // A maxlen of 0 means no limit. Only trim lattices that are linear chains.
- if (joshuaConfiguration.maxlen != 0 && isLinearChain())
+ // Only trim strings
+ if (joshuaConfiguration.lattice_decoding && ! source.startsWith("((("))
adjustForLength(joshuaConfiguration.maxlen);
}
@@ -346,7 +346,7 @@
assert isLinearChain();
List<Token> tokens = new ArrayList<Token>();
for (Node<Token> node: getLattice().getNodes())
- if (node.getOutgoingArcs().size() > 0)
+ if (node != null && node.getOutgoingArcs().size() > 0)
tokens.add(node.getOutgoingArcs().get(0).getLabel());
return tokens;
}
@@ -379,11 +379,17 @@
}
public Lattice<Token> getLattice() {
- if (this.sourceLattice == null)
- this.sourceLattice = (config.lattice_decoding && rawSource().startsWith("((("))
- ? Lattice.createTokenLatticeFromPLF(rawSource())
- : Lattice.createTokenLatticeFromString(String.format("%s %s %s", Vocabulary.START_SYM,
+ if (this.sourceLattice == null) {
+ if (config.lattice_decoding && rawSource().startsWith("(((")) {
+ if (config.search_algorithm.equals("stack")) {
+ System.err.println("* FATAL: lattice decoding currently not supported for stack-based search algorithm.");
+ System.exit(12);
+ }
+ this.sourceLattice = Lattice.createTokenLatticeFromPLF(rawSource());
+ } else
+ this.sourceLattice = Lattice.createTokenLatticeFromString(String.format("%s %s %s", Vocabulary.START_SYM,
rawSource(), Vocabulary.STOP_SYM));
+ }
return this.sourceLattice;
}
diff --git a/src/joshua/lattice/Lattice.java b/src/joshua/lattice/Lattice.java
index c7267a4..e9cfd59 100644
--- a/src/joshua/lattice/Lattice.java
+++ b/src/joshua/lattice/Lattice.java
@@ -133,8 +133,8 @@
}
public static Lattice<Token> createTokenLatticeFromPLF(String data) {
- Map<Integer, Node<Token>> nodes = new HashMap<Integer, Node<Token>>();
-
+ ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>();
+
// This matches a sequence of tuples, which describe arcs leaving this node
Pattern nodePattern = Pattern.compile("(.+?)\\(\\s*(\\(.+?\\),\\s*)\\s*\\)(.*)");
@@ -151,7 +151,7 @@
int nodeID = 0;
Node<Token> startNode = new Node<Token>(nodeID);
- nodes.put(nodeID, startNode);
+ nodes.add(startNode);
while (nodeMatcher.matches()) {
@@ -161,15 +161,15 @@
nodeID++;
Node<Token> currentNode = null;
- if (nodes.containsKey(nodeID)) {
+ if (nodeID < nodes.size() && nodes.get(nodeID) != null) {
currentNode = nodes.get(nodeID);
} else {
currentNode = new Node<Token>(nodeID);
- nodes.put(nodeID, currentNode);
+ while (nodeID > nodes.size())
+ nodes.add(new Node<Token>(nodes.size()));
+ nodes.add(currentNode);
}
- logger.fine("Node " + nodeID + ":");
-
Matcher arcMatcher = arcPattern.matcher(nodeData);
int numArcs = 0;
if (!arcMatcher.matches()) {
@@ -182,16 +182,17 @@
int destinationNodeID = nodeID + Integer.valueOf(arcMatcher.group(3));
Node<Token> destinationNode;
- if (nodes.containsKey(destinationNodeID)) {
+ if (destinationNodeID < nodes.size() && nodes.get(destinationNodeID) != null) {
destinationNode = nodes.get(destinationNodeID);
} else {
destinationNode = new Node<Token>(destinationNodeID);
- nodes.put(destinationNodeID, destinationNode);
+ while (destinationNodeID > nodes.size())
+ nodes.add(new Node<Token>(nodes.size()));
+ nodes.add(destinationNode);
}
String remainingArcs = arcMatcher.group(4);
- logger.fine("\t" + arcLabel + " " + arcWeight + " " + destinationNodeID);
Token arcToken = new Token(arcLabel);
currentNode.addArc(destinationNode, arcWeight, arcToken);
@@ -204,26 +205,18 @@
}
/* Add <s> to the start of the lattice. */
- if (nodes.containsKey(1)) {
+ if (nodes.size() > 1 && nodes.get(1) != null) {
Node<Token> firstNode = nodes.get(1);
startNode.addArc(firstNode, 0.0f, new Token(Vocabulary.START_SYM));
}
- /* Add </s> as a final state, and connect it to all end-state nodes. */
- Node<Token> endNode = new Node<Token>(++nodeID);
- for (Node<Token> node : nodes.values()) {
- if (node.getOutgoingArcs().size() == 0)
- node.addArc(endNode, 0.0f, new Token(Vocabulary.STOP_SYM));
- }
- // Add the endnode after the above loop so as to avoid a self-loop.
- nodes.put(nodeID, endNode);
+ /* Add </s> as a final state, connect it to the previous end-state */
+ nodeID = nodes.get(nodes.size()-1).getNumber() + 1;
+ Node<Token> endNode = new Node<Token>(nodeID);
+ nodes.get(nodes.size()-1).addArc(endNode, 0.0f, new Token(Vocabulary.STOP_SYM));
+ nodes.add(endNode);
- List<Node<Token>> nodeList = new ArrayList<Node<Token>>(nodes.values());
- Collections.sort(nodeList, new NodeIdentifierComparator());
-
- logger.fine(nodeList.toString());
-
- return new Lattice<Token>(nodeList, latticeIsAmbiguous);
+ return new Lattice<Token>(nodes, latticeIsAmbiguous);
}
/**
@@ -498,4 +491,4 @@
this.nodes = sortedNodes;
}
-}
\ No newline at end of file
+}
diff --git a/src/joshua/mira/MIRACore.java b/src/joshua/mira/MIRACore.java
index e548533..aa8feb9 100755
--- a/src/joshua/mira/MIRACore.java
+++ b/src/joshua/mira/MIRACore.java
@@ -10,8 +10,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
@@ -92,11 +90,11 @@
// 4: apply 1+2+3
private int numParams;
- //total number of firing features
- //this number may increase overtime as new n-best lists are decoded
- //initially it is equal to the # of params in the parameter config file
+ // total number of firing features
+ // this number may increase overtime as new n-best lists are decoded
+ // initially it is equal to the # of params in the parameter config file
private int numParamsOld;
- //number of features before observing the new features fired in the current iteration
+ // number of features before observing the new features fired in the current iteration
private double[] normalizationOptions;
// How should a lambda[] vector be normalized (before decoding)?
@@ -110,7 +108,7 @@
/* NOTE: indexing starts at 1 in the following few arrays: */
/* *********************************************************** */
- //private double[] lambda;
+ // private double[] lambda;
private ArrayList<Double> lambda = new ArrayList<Double>();
// the current weight vector. NOTE: indexing starts at 1.
private ArrayList<Double> bestLambda = new ArrayList<Double>();
@@ -209,20 +207,22 @@
// used by mira
private boolean needShuffle = true; // shuffle the training sentences or not
- private boolean needAvg = true; //average the weihgts or not?
- private boolean runPercep = false; //run perceptron instead of mira
- private boolean usePseudoBleu = true; //need to use pseudo corpus to compute bleu?
- private boolean returnBest = true; //return the best weight during tuning
- private boolean needScale = true; //need scaling?
+ private boolean needAvg = true; // average the weihgts or not?
+ private boolean runPercep = false; // run perceptron instead of mira
+ private boolean usePseudoBleu = true; // need to use pseudo corpus to compute bleu?
+ private boolean returnBest = false; // return the best weight during tuning
+ private boolean needScale = true; // need scaling?
private String trainingMode;
private int oraSelectMode = 1;
private int predSelectMode = 1;
private int miraIter = 1;
- private double C = 0.01; //relaxation coefficient
- private double R = 0.99; //corpus decay when pseudo corpus is used for bleu computation
- //private double sentForScale = 0.15; //percentage of sentences for scale factor estimation
- private double scoreRatio = 5.0; //sclale so that model_score/metric_score = scoreratio
- private double prevMetricScore = 0; //final metric score of the previous iteration, used only when returnBest = true
+ private int batchSize = 1;
+ private double C = 0.01; // relaxation coefficient
+ private double R = 0.99; // corpus decay when pseudo corpus is used for bleu computation
+ // private double sentForScale = 0.15; //percentage of sentences for scale factor estimation
+ private double scoreRatio = 5.0; // sclale so that model_score/metric_score = scoreratio
+ private double prevMetricScore = 0; // final metric score of the previous iteration, used only
+ // when returnBest = true
private String dirPrefix; // where are all these files located?
private String paramsFileName, docInfoFileName, finalLambdaFileName;
@@ -297,15 +297,15 @@
BufferedReader inFile_names = new BufferedReader(new FileReader(paramsFileName));
for (int c = 1; c <= numParams; ++c) {
- String line = "";
- while (line != null && line.length() == 0) { // skip empty lines
- line = inFile_names.readLine();
- }
-
- // save feature names
- String paramName = (line.substring(0, line.indexOf("|||"))).trim();
- Vocabulary.id(paramName);
- // System.err.println(String.format("VOCAB(%s) = %d", paramName, id));
+ String line = "";
+ while (line != null && line.length() == 0) { // skip empty lines
+ line = inFile_names.readLine();
+ }
+
+ // save feature names
+ String paramName = (line.substring(0, line.indexOf("|||"))).trim();
+ Vocabulary.id(paramName);
+ // System.err.println(String.format("VOCAB(%s) = %d", paramName, id));
}
inFile_names.close();
@@ -319,9 +319,9 @@
// the parameter file contains one line per parameter
// and one line for the normalization method
- // indexing starts at 1 in these arrays
- for ( int p = 0; p <= numParams; ++p )
- lambda.add(new Double(0));
+ // indexing starts at 1 in these arrays
+ for (int p = 0; p <= numParams; ++p)
+ lambda.add(new Double(0));
bestLambda.add(new Double(0));
// why only lambda is a list? because the size of lambda
// may increase over time, but other arrays are specified in
@@ -388,7 +388,7 @@
EvaluationMetric.set_tmpDirPrefix(tmpDirPrefix);
evalMetric = EvaluationMetric.getMetric(metricName, metricOptions);
- //used only if returnBest = true
+ // used only if returnBest = true
prevMetricScore = evalMetric.getToBeMinimized() ? PosInf : NegInf;
// length of sufficient statistics
@@ -398,8 +398,8 @@
// set static data members for the IntermediateOptimizer class
/*
* IntermediateOptimizer.set_MERTparams(numSentences, numDocuments, docOfSentence,
- * docSubsetInfo, numParams, normalizationOptions, isOptimizable
- * oneModificationPerIteration, evalMetric, tmpDirPrefix, verbosity);
+ * docSubsetInfo, numParams, normalizationOptions, isOptimizable oneModificationPerIteration,
+ * evalMetric, tmpDirPrefix, verbosity);
*/
// print info
@@ -425,7 +425,7 @@
println("c Default value\tOptimizable?\tRand. val. range", 1);
for (int c = 1; c <= numParams; ++c) {
- print(c + " " + f4.format(lambda.get(c).doubleValue()) + "\t\t", 1);
+ print(c + " " + f4.format(lambda.get(c).doubleValue()) + "\t\t", 1);
if (!isOptimizable[c]) {
println(" No", 1);
@@ -566,11 +566,11 @@
// printMemoryUsage();
println("----------------------------------------------------", 1);
println("", 1);
- if ( ! returnBest )
- println("FINAL lambda: " + lambdaToString(lambda), 1);
- // + " (" + metricName_display + ": " + FINAL_score + ")",1);
+ if (!returnBest)
+ println("FINAL lambda: " + lambdaToString(lambda), 1);
+ // + " (" + metricName_display + ": " + FINAL_score + ")",1);
else
- println("BEST lambda: " + lambdaToString(lambda), 1);
+ println("BEST lambda: " + lambdaToString(lambda), 1);
// delete intermediate .temp.*.it* decoder output files
for (int iteration = 1; iteration <= maxIts; ++iteration) {
@@ -597,7 +597,7 @@
// this is the key function!
@SuppressWarnings("unchecked")
public double[] run_single_iteration(int iteration, int minIts, int maxIts, int prevIts,
- int earlyStop, int[] maxIndex) {
+ int earlyStop, int[] maxIndex) {
double FINAL_score = 0;
double[] retA = new double[3];
@@ -634,9 +634,9 @@
/***************/
if (iteration == 1) {
- println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
+ println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
} else {
- println("Redecoding using weight vector " + lambdaToString(lambda), 1);
+ println("Redecoding using weight vector " + lambdaToString(lambda), 1);
}
// generate the n-best file after decoding
@@ -709,7 +709,7 @@
// initLambda[0] is not used!
double[] initialLambda = new double[1 + numParams];
for (int i = 1; i <= numParams; ++i)
- initialLambda[i] = lambda.get(i);
+ initialLambda[i] = lambda.get(i);
// the "score" in initialScore refers to that
// assigned by the evaluation metric)
@@ -897,13 +897,6 @@
// extract feature value
featVal_str = feats_str.split("\\s+");
- if (feats_str.indexOf('=') != -1) {
- for (String featurePair : featVal_str) {
- String[] pair = featurePair.split("=");
- String name = pair[0];
- Double value = Double.parseDouble(pair[1]);
- }
- }
existingCandStats.put(sents_str, stats_str);
candCount[i] += 1;
newCandidatesAdded[it] += 1;
@@ -1030,8 +1023,8 @@
BufferedReader inFile_statsMergedKnown = new BufferedReader(new InputStreamReader(
instream_statsMergedKnown, "utf8"));
- //num of features before observing new firing features from this iteration
- numParamsOld = numParams;
+ // num of features before observing new firing features from this iteration
+ numParamsOld = numParams;
for (int i = 0; i < numSentences; ++i) {
// reprocess candidates from previous iterations
@@ -1092,24 +1085,24 @@
stats_hash[i].put(sents_str, stats_str);
featVal_str = feats_str.split("\\s+");
-
- if (feats_str.indexOf('=') != -1) {
- for (String featurePair : featVal_str) {
- String[] pair = featurePair.split("=");
- String name = pair[0];
- Double value = Double.parseDouble(pair[1]);
- int featId = Vocabulary.id(name);
- //need to identify newly fired feats here
- //in this case currFeatVal is not given the value
- //of the new feat, since the corresponding weight is
- //initialized as zero anyway
- if (featId > numParams) {
- ++numParams;
- lambda.add(new Double(0));
- }
+ if (feats_str.indexOf('=') != -1) {
+ for (String featurePair : featVal_str) {
+ String[] pair = featurePair.split("=");
+ String name = pair[0];
+ Double value = Double.parseDouble(pair[1]);
+ int featId = Vocabulary.id(name);
+
+ // need to identify newly fired feats here
+ // in this case currFeatVal is not given the value
+ // of the new feat, since the corresponding weight is
+ // initialized as zero anyway
+ if (featId > numParams) {
+ ++numParams;
+ lambda.add(new Double(0));
}
- }
+ }
+ }
existingCandStats.put(sents_str, stats_str);
candCount[i] += 1;
@@ -1198,8 +1191,8 @@
println("", 1);
- println("Number of features observed so far: " + numParams);
- println("", 1);
+ println("Number of features observed so far: " + numParams);
+ println("", 1);
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException in MIRACore.run_single_iteration(6): "
@@ -1217,16 +1210,16 @@
println("", 1);
println("--- MIRA iteration #" + iteration + " ending @ " + (new Date()) + " ---", 1);
println("", 1);
- deleteFile(tmpDirPrefix + "temp.stats.merged");
+ deleteFile(tmpDirPrefix + "temp.stats.merged");
- if (returnBest) {
- //note that bestLambda.size() <= lambda.size()
- for ( int p = 1; p < bestLambda.size(); ++p )
- lambda.set(p, bestLambda.get(p));
- //and set the rest of lambda to be 0
- for ( int p = 0; p < lambda.size() - bestLambda.size(); ++p )
- lambda.set(p+bestLambda.size(), new Double(0));
- }
+ if (returnBest) {
+ // note that bestLambda.size() <= lambda.size()
+ for (int p = 1; p < bestLambda.size(); ++p)
+ lambda.set(p, bestLambda.get(p));
+ // and set the rest of lambda to be 0
+ for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
+ lambda.set(p + bestLambda.size(), new Double(0));
+ }
return null; // this means that the old values should be kept by the caller
} else {
@@ -1249,67 +1242,70 @@
Optimizer.runPercep = runPercep;
Optimizer.C = C;
Optimizer.needAvg = needAvg;
- //Optimizer.sentForScale = sentForScale;
+ // Optimizer.sentForScale = sentForScale;
Optimizer.scoreRatio = scoreRatio;
Optimizer.evalMetric = evalMetric;
Optimizer.normalizationOptions = normalizationOptions;
Optimizer.needScale = needScale;
+ Optimizer.batchSize = batchSize;
- //if need to use bleu stats history
- if( iteration == 1 ) {
- if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
- Optimizer.initBleuHistory(numSentences, evalMetric.get_suffStatsCount());
- Optimizer.usePseudoBleu = usePseudoBleu;
- Optimizer.R = R;
- }
- if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
- Optimizer.initBleuHistory(numSentences, evalMetric.get_suffStatsCount()-2); //Stats count of TER=2
- Optimizer.usePseudoBleu = usePseudoBleu;
- Optimizer.R = R;
- }
+ // if need to use bleu stats history
+ if (iteration == 1) {
+ if (evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
+ Optimizer.initBleuHistory(numSentences, evalMetric.get_suffStatsCount());
+ Optimizer.usePseudoBleu = usePseudoBleu;
+ Optimizer.R = R;
+ }
+ if (evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
+ Optimizer.initBleuHistory(numSentences, evalMetric.get_suffStatsCount() - 2); // Stats
+ // count of
+ // TER=2
+ Optimizer.usePseudoBleu = usePseudoBleu;
+ Optimizer.R = R;
+ }
}
Vector<String> output = new Vector<String>();
- //note: initialLambda[] has length = numParamsOld
- //augmented with new feature weights, initial values are 0
+ // note: initialLambda[] has length = numParamsOld
+ // augmented with new feature weights, initial values are 0
double[] initialLambdaNew = new double[1 + numParams];
System.arraycopy(initialLambda, 1, initialLambdaNew, 1, numParamsOld);
- //finalLambda[] has length = numParams (considering new features)
+ // finalLambda[] has length = numParams (considering new features)
double[] finalLambda = new double[1 + numParams];
Optimizer opt = new Optimizer(output, isOptimizable, initialLambdaNew, feat_hash, stats_hash);
finalLambda = opt.runOptimizer();
- if ( returnBest ) {
- double metricScore = opt.getMetricScore();
- if ( ! evalMetric.getToBeMinimized() ) {
- if ( metricScore > prevMetricScore ) {
- prevMetricScore = metricScore;
- for ( int p = 1; p < bestLambda.size(); ++p )
- bestLambda.set(p, finalLambda[p]);
- if ( 1 + numParams > bestLambda.size() ) {
- for ( int p = bestLambda.size(); p <= numParams; ++p )
- bestLambda.add(p, finalLambda[p]);
- }
- }
- } else {
- if ( metricScore < prevMetricScore ) {
- prevMetricScore = metricScore;
- for ( int p = 1; p < bestLambda.size(); ++p )
- bestLambda.set(p, finalLambda[p]);
- if ( 1 + numParams > bestLambda.size() ) {
- for ( int p = bestLambda.size(); p <= numParams; ++p )
- bestLambda.add(p, finalLambda[p]);
- }
- }
- }
+ if (returnBest) {
+ double metricScore = opt.getMetricScore();
+ if (!evalMetric.getToBeMinimized()) {
+ if (metricScore > prevMetricScore) {
+ prevMetricScore = metricScore;
+ for (int p = 1; p < bestLambda.size(); ++p)
+ bestLambda.set(p, finalLambda[p]);
+ if (1 + numParams > bestLambda.size()) {
+ for (int p = bestLambda.size(); p <= numParams; ++p)
+ bestLambda.add(p, finalLambda[p]);
+ }
+ }
+ } else {
+ if (metricScore < prevMetricScore) {
+ prevMetricScore = metricScore;
+ for (int p = 1; p < bestLambda.size(); ++p)
+ bestLambda.set(p, finalLambda[p]);
+ if (1 + numParams > bestLambda.size()) {
+ for (int p = bestLambda.size(); p <= numParams; ++p)
+ bestLambda.add(p, finalLambda[p]);
+ }
+ }
+ }
}
- // System.out.println(finalLambda.length);
- // for( int i=0; i<finalLambda.length-1; i++ )
- // System.out.println(finalLambda[i+1]);
+ // System.out.println(finalLambda.length);
+ // for( int i=0; i<finalLambda.length-1; i++ )
+ // System.out.println(finalLambda[i+1]);
/************* end optimization **************/
@@ -1321,12 +1317,12 @@
boolean anyParamChangedSignificantly = false;
for (int c = 1; c <= numParams; ++c) {
- if (finalLambda[c] != lambda.get(c)) {
- anyParamChanged = true;
- }
- if (Math.abs(finalLambda[c] - lambda.get(c)) > stopSigValue) {
- anyParamChangedSignificantly = true;
- }
+ if (finalLambda[c] != lambda.get(c)) {
+ anyParamChanged = true;
+ }
+ if (Math.abs(finalLambda[c] - lambda.get(c)) > stopSigValue) {
+ anyParamChangedSignificantly = true;
+ }
}
// System.arraycopy(finalLambda,1,lambda,1,numParams);
@@ -1360,14 +1356,14 @@
println("Some early stopping criteria has been observed " + "in " + stopMinIts
+ " consecutive iterations; exiting MIRA.", 1);
println("", 1);
-
- if ( returnBest ) {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, bestLambda.get(f));
- } else {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, finalLambda[f]);
- }
+
+ if (returnBest) {
+ for (int f = 1; f <= bestLambda.size() - 1; ++f)
+ lambda.set(f, bestLambda.get(f));
+ } else {
+ for (int f = 1; f <= numParams; ++f)
+ lambda.set(f, finalLambda[f]);
+ }
break; // exit for (iteration) loop preemptively
}
@@ -1377,25 +1373,24 @@
println("Maximum number of MIRA iterations reached; exiting MIRA.", 1);
println("", 1);
- if ( returnBest ) {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, bestLambda.get(f));
- } else {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, finalLambda[f]);
- }
+ if (returnBest) {
+ for (int f = 1; f <= bestLambda.size() - 1; ++f)
+ lambda.set(f, bestLambda.get(f));
+ } else {
+ for (int f = 1; f <= numParams; ++f)
+ lambda.set(f, finalLambda[f]);
+ }
break; // exit for (iteration) loop
}
// use the new wt vector to decode the next iteration
// (interpolation with previous wt vector)
- double interCoef = 1.0; //no interpolation for now
+ double interCoef = 1.0; // no interpolation for now
for (int i = 1; i <= numParams; i++)
- lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
+ lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
- println("Next iteration will decode with lambda: "
- + lambdaToString(lambda), 1);
+ println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
println("", 1);
// printMemoryUsage();
@@ -1423,11 +1418,11 @@
private String lambdaToString(ArrayList<Double> lambdaA) {
String retStr = "{";
int featToPrint = numParams > 15 ? 15 : numParams;
- //print at most the first 15 features
+ // print at most the first 15 features
retStr += "(listing the first " + featToPrint + " lambdas)";
for (int c = 1; c <= featToPrint - 1; ++c) {
- retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
+ retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
}
retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
@@ -1596,7 +1591,8 @@
}
- private void createConfigFile(ArrayList<Double> params, String cfgFileName, String templateFileName) {
+ private void createConfigFile(ArrayList<Double> params, String cfgFileName,
+ String templateFileName) {
try {
// i.e. create cfgFileName, which is similar to templateFileName, but with
// params[] as parameter values
@@ -1606,33 +1602,33 @@
BufferedReader inFeatDefFile = null;
PrintWriter outFeatDefFile = null;
- int origFeatNum = 0; //feat num in the template file
+ int origFeatNum = 0; // feat num in the template file
String line = inFile.readLine();
while (line != null) {
- int c_match = -1;
- for (int c = 1; c <= numParams; ++c) {
- if (line.startsWith(Vocabulary.word(c) + " ")) {
- c_match = c;
- ++origFeatNum;
- break;
- }
+ int c_match = -1;
+ for (int c = 1; c <= numParams; ++c) {
+ if (line.startsWith(Vocabulary.word(c) + " ")) {
+ c_match = c;
+ ++origFeatNum;
+ break;
}
-
- if (c_match == -1) {
- outFile.println(line);
- } else {
- if ( Math.abs(params.get(c_match).doubleValue()) > 1e-20 )
- outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
- }
-
- line = inFile.readLine();
+ }
+
+ if (c_match == -1) {
+ outFile.println(line);
+ } else {
+ if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
+ outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
+ }
+
+ line = inFile.readLine();
}
- //now append weights of new features
- for (int c = origFeatNum+1; c <= numParams; ++c) {
- if ( Math.abs(params.get(c).doubleValue()) > 1e-20 )
- outFile.println(Vocabulary.word(c) + " " + params.get(c));
+ // now append weights of new features
+ for (int c = origFeatNum + 1; c <= numParams; ++c) {
+ if (Math.abs(params.get(c).doubleValue()) > 1e-20)
+ outFile.println(Vocabulary.word(c) + " " + params.get(c));
}
inFile.close();
@@ -1681,7 +1677,12 @@
if (!isOptimizable[c]) { // skip next two values
dummy = inFile_init.next();
dummy = inFile_init.next();
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
} else {
+ // the next two values are not used, only to be consistent with ZMERT's params file format
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
// set minRandValue[c] and maxRandValue[c] (range for random values)
dummy = inFile_init.next();
if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
@@ -1706,7 +1707,7 @@
System.exit(21);
}
- // check for odd values
+ // check for odd values
if (minRandValue[c] == maxRandValue[c]) {
println("Warning: lambda[" + c + "] has " + "minRandValue = maxRandValue = "
+ minRandValue[c] + ".", 1);
@@ -2000,7 +2001,7 @@
try {
PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
for (int c = 1; c <= numParams; ++c) {
- outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
+ outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
}
outFile_lambdas.close();
@@ -2081,8 +2082,7 @@
if (paramA.length == 2 && paramA[0].charAt(0) == '-') {
argsVector.add(paramA[0]);
argsVector.add(paramA[1]);
- } else if (paramA.length > 2
- && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
+ } else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
// -m (metricName), -docSet are allowed to have extra optinos
for (int opt = 0; opt < paramA.length; ++opt) {
argsVector.add(paramA[opt]);
@@ -2340,106 +2340,107 @@
// for mira:
else if (option.equals("-needShuffle")) {
- int shuffle = Integer.parseInt(args[i + 1]);
- if(shuffle==1)
- needShuffle = true;
- else if(shuffle==0)
- needShuffle = false;
- else {
- println("-needShuffle must be either 0 or 1.");
- System.exit(10);
- }
+ int shuffle = Integer.parseInt(args[i + 1]);
+ if (shuffle == 1)
+ needShuffle = true;
+ else if (shuffle == 0)
+ needShuffle = false;
+ else {
+ println("-needShuffle must be either 0 or 1.");
+ System.exit(10);
+ }
}
- //average weights after each epoch or not
+ // average weights after each epoch or not
else if (option.equals("-needAvg")) {
- int avg = Integer.parseInt(args[i + 1]);
- if(avg==1)
- needAvg = true;
- else if(avg==0)
- needAvg = false;
- else {
- println("-needAvg must be either 0 or 1.");
- System.exit(10);
- }
+ int avg = Integer.parseInt(args[i + 1]);
+ if (avg == 1)
+ needAvg = true;
+ else if (avg == 0)
+ needAvg = false;
+ else {
+ println("-needAvg must be either 0 or 1.");
+ System.exit(10);
+ }
}
- //return the best weight during tuning or not
+ // return the best weight during tuning or not
else if (option.equals("-returnBest")) {
- int retBest = Integer.parseInt(args[i + 1]);
- if(retBest == 1)
- returnBest = true;
- else if(retBest == 0)
- returnBest = false;
- else {
- println("-returnBest must be either 0 or 1.");
- System.exit(10);
- }
+ int retBest = Integer.parseInt(args[i + 1]);
+ if (retBest == 1)
+ returnBest = true;
+ else if (retBest == 0)
+ returnBest = false;
+ else {
+ println("-returnBest must be either 0 or 1.");
+ System.exit(10);
+ }
}
- //run perceptron or not
+ // run perceptron or not
else if (option.equals("-runPercep")) {
- int per = Integer.parseInt(args[i + 1]);
- if(per==1)
- runPercep = true;
- else if(per==0)
- runPercep = false;
- else {
- println("-runPercep must be either 0 or 1.");
- System.exit(10);
- }
+ int per = Integer.parseInt(args[i + 1]);
+ if (per == 1)
+ runPercep = true;
+ else if (per == 0)
+ runPercep = false;
+ else {
+ println("-runPercep must be either 0 or 1.");
+ System.exit(10);
+ }
}
// oracle selection mode
else if (option.equals("-oracleSelection")) {
- oraSelectMode = Integer.parseInt(args[i + 1]);
+ oraSelectMode = Integer.parseInt(args[i + 1]);
}
// prediction selection mode
else if (option.equals("-predictionSelection")) {
- predSelectMode = Integer.parseInt(args[i + 1]);
+ predSelectMode = Integer.parseInt(args[i + 1]);
}
// MIRA internal iterations
else if (option.equals("-miraIter")) {
- miraIter = Integer.parseInt(args[i + 1]);
+ miraIter = Integer.parseInt(args[i + 1]);
+ }
+ // mini-batch size
+ else if (option.equals("-batchSize")) {
+ batchSize = Integer.parseInt(args[i + 1]);
}
// relaxation coefficient
else if (option.equals("-C")) {
- C = Double.parseDouble(args[i + 1]);
+ C = Double.parseDouble(args[i + 1]);
}
// else if (option.equals("-sentForScaling")) {
- // sentForScale = Double.parseDouble(args[i + 1]);
- // if(sentForScale>1 || sentForScale<0) {
- // println("-sentForScaling must be in [0,1]");
- // System.exit(10);
- // }
+ // sentForScale = Double.parseDouble(args[i + 1]);
+ // if(sentForScale>1 || sentForScale<0) {
+ // println("-sentForScaling must be in [0,1]");
+ // System.exit(10);
+ // }
// }
else if (option.equals("-scoreRatio")) {
- scoreRatio = Double.parseDouble(args[i + 1]);
- if(scoreRatio<=0) {
- println("-scoreRatio must be positive");
- System.exit(10);
- }
- }
- else if (option.equals("-needScaling")) {
- int scale = Integer.parseInt(args[i + 1]);
- if(scale==1)
- needScale = true;
- else if(scale==0)
- needScale = false;
- else {
- println("-needScaling must be either 0 or 1.");
- System.exit(10);
- }
- }
- else if (option.equals("-usePseudoCorpus")) {
- int use = Integer.parseInt(args[i + 1]);
- if(use==1)
- usePseudoBleu = true;
- else if(use==0)
- usePseudoBleu = false;
- else {
- println("-usePseudoCorpus must be either 0 or 1.");
- System.exit(10);
- }
- }
- else if (option.equals("-corpusDecay")) {
- R = Double.parseDouble(args[i + 1]);
+ scoreRatio = Double.parseDouble(args[i + 1]);
+ if (scoreRatio <= 0) {
+ println("-scoreRatio must be positive");
+ System.exit(10);
+ }
+ } else if (option.equals("-needScaling")) {
+ int scale = Integer.parseInt(args[i + 1]);
+ if (scale == 1)
+ needScale = true;
+ else if (scale == 0)
+ needScale = false;
+ else {
+ println("-needScaling must be either 0 or 1.");
+ System.exit(10);
+ }
+ } else if (option.equals("-usePseudoCorpus")) {
+ int use = Integer.parseInt(args[i + 1]);
+ if (use == 1)
+ usePseudoBleu = true;
+ else if (use == 0)
+ usePseudoBleu = false;
+ else {
+ println("-usePseudoCorpus must be either 0 or 1.");
+ System.exit(10);
+ }
+ } else if (option.equals("-corpusDecay")) {
+ R = Double.parseDouble(args[i + 1]);
}
// Decoder specs
@@ -3085,7 +3086,7 @@
}
private ArrayList<Double> randomLambda() {
- ArrayList<Double> retLambda = new ArrayList<Double>(1+numParams);
+ ArrayList<Double> retLambda = new ArrayList<Double>(1 + numParams);
for (int c = 1; c <= numParams; ++c) {
if (isOptimizable[c]) {
@@ -3095,7 +3096,7 @@
randVal = minRandValue[c] + randVal; // number in [min,max]
retLambda.set(c, randVal);
} else {
- retLambda.set(c, defaultLambda[c]);
+ retLambda.set(c, defaultLambda[c]);
}
}
diff --git a/src/joshua/mira/Optimizer.java b/src/joshua/mira/Optimizer.java
index e2e2fdd..07365e2 100755
--- a/src/joshua/mira/Optimizer.java
+++ b/src/joshua/mira/Optimizer.java
@@ -14,584 +14,584 @@
// this class implements the MIRA algorithm
public class Optimizer {
- public Optimizer(Vector<String>_output, boolean[] _isOptimizable, double[] _initialLambda,
+ public Optimizer(Vector<String> _output, boolean[] _isOptimizable, double[] _initialLambda,
HashMap<String, String>[] _feat_hash, HashMap<String, String>[] _stats_hash) {
output = _output; // (not used for now)
isOptimizable = _isOptimizable;
initialLambda = _initialLambda; // initial weights array
- paramDim = initialLambda.length - 1;
+ paramDim = initialLambda.length - 1;
initialLambda = _initialLambda;
feat_hash = _feat_hash; // feature hash table
stats_hash = _stats_hash; // suff. stats hash table
finalLambda = new double[initialLambda.length];
- for(int i = 0; i < finalLambda.length; i++)
+ for (int i = 0; i < finalLambda.length; i++)
finalLambda[i] = initialLambda[i];
}
- //run MIRA for one epoch
+ // run MIRA for one epoch
public double[] runOptimizer() {
- for ( int iter = 0; iter < miraIter; ++iter ) {
- System.arraycopy(finalLambda, 1, initialLambda, 1, paramDim);
-
- List<Integer> sents = new ArrayList<Integer>();
- for( int i = 0; i < sentNum; ++i )
- sents.add(i);
-
- if(needShuffle)
- Collections.shuffle(sents);
-
- double oraMetric, oraScore, predMetric, predScore;
- double[] oraPredScore = new double[4];
- double eta = 1.0; //learning rate, will not be changed if run percep
- double avgEta = 0; //average eta, just for analysis
- double loss = 0;
- double featNorm = 0;
- double featDiffVal;
- double sumMetricScore = 0;
- double sumModelScore = 0;
- String oraFeat = "";
- String predFeat = "";
- String[] oraPredFeat = new String[2];
- String[] vecOraFeat;
- String[] vecPredFeat;
- String[] featInfo;
- boolean first = true;
- //int processedSent = 0;
- Iterator it;
- Integer diffFeatId;
- double[] avgLambda = new double[initialLambda.length]; //only needed if averaging is required
- for(int i=0; i<initialLambda.length; i++)
- avgLambda[i] = 0.0;
+ List<Integer> sents = new ArrayList<Integer>();
+ for (int i = 0; i < sentNum; ++i)
+ sents.add(i);
+ double[] avgLambda = new double[initialLambda.length]; // only needed if averaging is required
+ for (int i = 0; i < initialLambda.length; i++)
+ avgLambda[i] = 0.0;
+ double[] bestLambda = new double[initialLambda.length]; // only needed if averaging is required
+ for (int i = 0; i < initialLambda.length; i++)
+ bestLambda[i] = 0.0;
+ double bestMetricScore = evalMetric.getToBeMinimized() ? PosInf : NegInf;
+ int bestIter = 0;
+ for (int iter = 0; iter < miraIter; ++iter) {
+ System.arraycopy(finalLambda, 1, initialLambda, 1, paramDim);
+ if (needShuffle)
+ Collections.shuffle(sents);
- //update weights
- for(Integer s : sents) {
+ double oraMetric, oraScore, predMetric, predScore;
+ double[] oraPredScore = new double[4];
+ double eta = 1.0; // learning rate, will not be changed if run percep
+ double avgEta = 0; // average eta, just for analysis
+ double loss = 0;
+ double diff = 0;
+ double featNorm = 0;
+ double sumMetricScore = 0;
+ double sumModelScore = 0;
+ String oraFeat = "";
+ String predFeat = "";
+ String[] oraPredFeat = new String[2];
+ String[] vecOraFeat;
+ String[] vecPredFeat;
+ String[] featInfo;
+ int thisBatchSize = 0;
+ int numBatch = 0;
+ int numUpdate = 0;
+ Iterator it;
+ Integer diffFeatId;
+
+ // update weights
+ Integer s;
+ int sentCount = 0;
+ while( sentCount < sentNum ) {
+ loss = 0;
+ thisBatchSize = batchSize;
+ ++numBatch;
+ HashMap<Integer, Double> featDiff = new HashMap<Integer, Double>();
+ for(int b = 0; b < batchSize; ++b ) {
//find out oracle and prediction
- if(first)
- findOraPred(s, oraPredScore, oraPredFeat, initialLambda, featScale);
- else
- findOraPred(s, oraPredScore, oraPredFeat, finalLambda, featScale);
-
- //the model scores here are already scaled in findOraPred
+ s = sents.get(sentCount);
+ // find out oracle and prediction
+ findOraPred(s, oraPredScore, oraPredFeat, finalLambda, featScale);
+
+ // the model scores here are already scaled in findOraPred
oraMetric = oraPredScore[0];
oraScore = oraPredScore[1];
predMetric = oraPredScore[2];
predScore = oraPredScore[3];
oraFeat = oraPredFeat[0];
predFeat = oraPredFeat[1];
-
- //update the scale
- if(needScale) { //otherwise featscale remains 1.0
- sumMetricScore += java.lang.Math.abs(oraMetric+predMetric);
- sumModelScore += java.lang.Math.abs(oraScore+predScore)/featScale; //restore the original model score
-
- if(sumModelScore/sumMetricScore > scoreRatio)
- featScale = sumMetricScore/sumModelScore;
+
+ // update the scale
+ if (needScale) { // otherwise featscale remains 1.0
+ sumMetricScore += java.lang.Math.abs(oraMetric + predMetric);
+ // restore the original model score
+ sumModelScore += java.lang.Math.abs(oraScore + predScore) / featScale;
- // /* a different scaling strategy
- // if( (1.0*processedSent/sentNum) < sentForScale ) { //still need to scale
- // double newFeatScale = java.lang.Math.abs(scoreRatio*sumMetricDiff / sumModelDiff); //to make sure modelScore*featScale/metricScore = scoreRatio
-
- // //update the scale only when difference is significant
- // if( java.lang.Math.abs(newFeatScale-featScale)/featScale > 0.2 )
- // featScale = newFeatScale;
- // }*/
+ if (sumModelScore / sumMetricScore > scoreRatio)
+ featScale = sumMetricScore / sumModelScore;
}
- // processedSent++;
- HashMap<Integer, Double> allPredFeat = new HashMap<Integer, Double>();
- HashMap<Integer, Double> featDiff = new HashMap<Integer, Double>();
-
vecOraFeat = oraFeat.split("\\s+");
vecPredFeat = predFeat.split("\\s+");
-
- for (int i = 0; i < vecOraFeat.length; i++) {
- featInfo = vecOraFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
- }
-
- for (int i = 0; i < vecPredFeat.length; i++) {
- featInfo = vecPredFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- allPredFeat.put(diffFeatId, Double.parseDouble(featInfo[1]));
- if (featDiff.containsKey(diffFeatId)) //overlapping features
- featDiff.put(diffFeatId, featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]));
- else //features only firing in the 2nd feature vector
- featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
- }
-
- if(!runPercep) { //otherwise eta=1.0
- featNorm = 0;
-
- Collection<Double> allDiff = featDiff.values();
- for(it =allDiff.iterator(); it.hasNext();) {
- featDiffVal = (Double) it.next();
- featNorm += featDiffVal*featDiffVal;
+
+ //accumulate difference feature vector
+ if ( b == 0 ) {
+ for (int i = 0; i < vecOraFeat.length; i++) {
+ featInfo = vecOraFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
}
-
- //a few sanity checks
- if(! evalMetric.getToBeMinimized()) {
- if(oraSelectMode==1 && predSelectMode==1) { //"hope-fear" selection
- /* ora_score+ora_metric > pred_score+pred_metric
- * pred_score-pred_metric > ora_score-ora_metric
- * => ora_metric > pred_metric */
- if(oraMetric+1e-10 < predMetric) {
- System.err.println("WARNING: for hope-fear selection, oracle metric score must be greater than prediction metric score!");
- System.err.println("Something is wrong!");
- }
+ for (int i = 0; i < vecPredFeat.length; i++) {
+ featInfo = vecPredFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
+ if ( Math.abs(diff) > 1e-20 )
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
}
-
- if(oraSelectMode==2 || predSelectMode==3) {
- if(oraMetric+1e-10 < predMetric) {
- System.err.println("WARNING: for max-metric oracle selection or min-metric prediction selection, the oracle metric " +
- "score must be greater than the prediction metric score!");
- System.err.println("Something is wrong!");
- }
- }
- } else {
- if(oraSelectMode==1 && predSelectMode==1) { //"hope-fear" selection
- /* ora_score-ora_metric > pred_score-pred_metric
- * pred_score+pred_metric > ora_score+ora_metric
- * => ora_metric < pred_metric */
- if(oraMetric-1e-10 > predMetric) {
- System.err.println("WARNING: for hope-fear selection, oracle metric score must be smaller than prediction metric score!");
- System.err.println("Something is wrong!");
- }
- }
-
- if(oraSelectMode==2 || predSelectMode==3) {
- if(oraMetric-1e-10 > predMetric) {
- System.err.println("WARNING: for min-metric oracle selection or max-metric prediction selection, the oracle metric " +
- "score must be smaller than the prediction metric score!");
- System.err.println("Something is wrong!");
- }
- }
+ else //features only firing in the 2nd feature vector
+ featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
}
-
- if(predSelectMode==2) {
- if(predScore+1e-10 < oraScore) {
- System.err.println("WARNING: for max-model prediction selection, the prediction model score must be greater than oracle model score!");
- System.err.println("Something is wrong!");
+ } else {
+ for (int i = 0; i < vecOraFeat.length; i++) {
+ featInfo = vecOraFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId)+Double.parseDouble(featInfo[1]);
+ if ( Math.abs(diff) > 1e-20 )
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
}
+ else //features only firing in the new oracle feature vector
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
}
-
- //cost - margin
- //remember the model scores here are already scaled
- loss = evalMetric.getToBeMinimized() ? //cost should always be non-negative
- (predMetric-oraMetric) - (oraScore-predScore)/featScale:
- (oraMetric-predMetric) - (oraScore-predScore)/featScale;
-
- if( loss <= 0 )
- eta = 0;
- else
- //eta = C < loss/(featNorm*featScale*featScale) ? C : loss/(featNorm*featScale*featScale); //feat vector not scaled before
- eta = C < loss/(featNorm) ? C : loss/(featNorm); //feat vector not scaled before
- }
-
- avgEta += eta;
-
- Set<Integer> diffFeatSet = featDiff.keySet();
- it = diffFeatSet.iterator();
-
- if(first) {
- first = false;
-
- if( eta!=0 ) {
- while(it.hasNext()) {
- diffFeatId = (Integer)it.next();
- finalLambda[diffFeatId] = initialLambda[diffFeatId] + eta*featDiff.get(diffFeatId);
+ for (int i = 0; i < vecPredFeat.length; i++) {
+ featInfo = vecPredFeat[i].split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
+ if ( Math.abs(diff) > 1e-20 )
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
}
+ else //features only firing in the new prediction feature vector
+ featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
}
}
- else {
- if( eta!=0 ) {
- while(it.hasNext()) {
- diffFeatId = (Integer)it.next();
- finalLambda[diffFeatId] = finalLambda[diffFeatId] + eta*featDiff.get(diffFeatId);
- }
- }
+ if (!runPercep) { // otherwise eta=1.0
+ // remember the model scores here are already scaled
+ double singleLoss = evalMetric.getToBeMinimized() ?
+ (predMetric - oraMetric) - (oraScore - predScore) / featScale
+ : (oraMetric - predMetric) - (oraScore - predScore) / featScale;
+ loss += singleLoss;
}
-
- if(needAvg) {
- for(int i=0; i<avgLambda.length; i++)
- avgLambda[i] += finalLambda[i];
+ ++sentCount;
+ if( sentCount >= sentNum ) {
+ thisBatchSize = b + 1;
+ break;
+ }
+ } //for(int b = 0; b < batchSize; ++b)
+
+ if (!runPercep) { // otherwise eta=1.0
+ featNorm = 0;
+ Collection<Double> allDiff = featDiff.values();
+ for (it = allDiff.iterator(); it.hasNext();) {
+ diff = (Double) it.next();
+ featNorm += diff * diff / ( thisBatchSize * thisBatchSize );
}
}
-
- if(needAvg) {
- for(int i=0; i<finalLambda.length; i++)
- finalLambda[i] = avgLambda[i]/sentNum;
+ if( loss <= 0 )
+ eta = 0;
+ else {
+ loss /= thisBatchSize;
+ // feat vector not scaled before
+ eta = C < loss / featNorm ? C : loss / featNorm;
}
-
- avgEta /= sentNum;
+ avgEta += eta;
+ Set<Integer> diffFeatSet = featDiff.keySet();
+ it = diffFeatSet.iterator();
+ if ( java.lang.Math.abs(eta) > 1e-20 ) {
+ while (it.hasNext()) {
+ diffFeatId = (Integer) it.next();
+ finalLambda[diffFeatId] =
+ finalLambda[diffFeatId] + eta * featDiff.get(diffFeatId) / thisBatchSize;
+ }
+ }
+ if (needAvg) {
+ for (int i = 0; i < avgLambda.length; ++i)
+ avgLambda[i] += finalLambda[i];
+ }
+ } //while( sentCount < sentNum )
- /*
- * for( int i=0; i<finalLambda.length; i++ ) System.out.print(finalLambda[i]+" ");
- * System.out.println(); System.exit(0);
- */
+ avgEta /= numBatch;
- double initMetricScore = computeCorpusMetricScore(initialLambda); // compute the initial corpus-level metric score
- finalMetricScore = computeCorpusMetricScore(finalLambda); // compute final corpus-level metric score // the
+ /*
+ * for( int i=0; i<finalLambda.length; i++ ) System.out.print(finalLambda[i]+" ");
+ * System.out.println(); System.exit(0);
+ */
- // prepare the printing info
- String result = "Iter "+iter+": Avg learning rate="+String.format("%.4f", avgEta);
- result += " Initial "
- + evalMetric.get_metricName() + "=" + String.format("%.4f", initMetricScore) + " Final "
- + evalMetric.get_metricName() + "=" + String.format("%.4f", finalMetricScore);
- //print lambda info
- // int numParamToPrint = 0;
- // numParamToPrint = paramDim > 10 ? 10 : paramDim; // how many parameters
- // // to print
- // result = paramDim > 10 ? "Final lambda (first 10): {" : "Final lambda: {";
-
- // for (int i = 1; i <= numParamToPrint; ++i)
- // result += String.format("%.4f", finalLambda[i]) + " ";
-
- output.add(result);
- } //for ( int iter = 0; iter < miraIter; ++iter ) {
-
- //non-optimizable weights should remain unchanged
- ArrayList<Double> cpFixWt = new ArrayList<Double>();
- for ( int i = 1; i < isOptimizable.length; ++i ) {
- if ( ! isOptimizable[i] )
- cpFixWt.add(finalLambda[i]);
+ double initMetricScore;
+ if(iter == 0 ) {
+ initMetricScore = computeCorpusMetricScore(initialLambda);
+ if(needAvg)
+ finalMetricScore = computeCorpusMetricScore(avgLambda);
+ else
+ finalMetricScore = computeCorpusMetricScore(finalLambda);
+ } else {
+ initMetricScore = finalMetricScore;
+ if(needAvg)
+ finalMetricScore = computeCorpusMetricScore(avgLambda);
+ else
+ finalMetricScore = computeCorpusMetricScore(finalLambda);
}
- normalizeLambda(finalLambda);
- int countNonOpt = 0;
- for ( int i = 1; i < isOptimizable.length; ++i ) {
- if ( ! isOptimizable[i] ) {
- finalLambda[i] = cpFixWt.get(countNonOpt);
- ++countNonOpt;
+
+ if(evalMetric.getToBeMinimized()) {
+ if( finalMetricScore < bestMetricScore ) {
+ bestMetricScore = finalMetricScore;
+ bestIter = iter;
+ for( int i = 0; i < finalLambda.length; ++i )
+ bestLambda[i] = needAvg ? avgLambda[i] : finalLambda[i];
+ }
+ } else {
+ if( finalMetricScore > bestMetricScore ) {
+ bestMetricScore = finalMetricScore;
+ bestIter = iter;
+ for( int i = 0; i < finalLambda.length; ++i )
+ bestLambda[i] = needAvg ? avgLambda[i] : finalLambda[i];
}
}
- return finalLambda;
+
+ if ( iter == miraIter - 1 ) {
+ for (int i = 0; i < finalLambda.length; ++i)
+ finalLambda[i] =
+ needAvg ? bestLambda[i] / ( numBatch * ( bestIter + 1 ) ) : bestLambda[i];
+ }
+
+ // prepare the printing info
+ String result = "Iter " + iter + ": Avg learning rate=" + String.format("%.4f", avgEta);
+ result += " Initial " + evalMetric.get_metricName() + "="
+ + String.format("%.4f", initMetricScore) + " Final " + evalMetric.get_metricName() + "="
+ + String.format("%.4f", finalMetricScore);
+ output.add(result);
+ } // for ( int iter = 0; iter < miraIter; ++iter )
+ String result = "Best " + evalMetric.get_metricName() + "="
+ + String.format("%.4f", bestMetricScore)
+ + " (iter = " + bestIter + ")\n";
+ output.add(result);
+ finalMetricScore = bestMetricScore;
+
+ // non-optimizable weights should remain unchanged
+ ArrayList<Double> cpFixWt = new ArrayList<Double>();
+ for (int i = 1; i < isOptimizable.length; ++i) {
+ if (!isOptimizable[i])
+ cpFixWt.add(finalLambda[i]);
+ }
+ normalizeLambda(finalLambda);
+ int countNonOpt = 0;
+ for (int i = 1; i < isOptimizable.length; ++i) {
+ if (!isOptimizable[i]) {
+ finalLambda[i] = cpFixWt.get(countNonOpt);
+ ++countNonOpt;
+ }
+ }
+ return finalLambda;
}
public double computeCorpusMetricScore(double[] finalLambda) {
- int suffStatsCount = evalMetric.get_suffStatsCount();
- double modelScore;
- double maxModelScore;
- Set<String> candSet;
- String candStr;
- String[] feat_str;
- String[] tmpStatsVal = new String[suffStatsCount];
- int[] corpusStatsVal = new int[suffStatsCount];
- for (int i = 0; i < suffStatsCount; i++)
- corpusStatsVal[i] = 0;
+ int suffStatsCount = evalMetric.get_suffStatsCount();
+ double modelScore;
+ double maxModelScore;
+ Set<String> candSet;
+ String candStr;
+ String[] feat_str;
+ String[] tmpStatsVal = new String[suffStatsCount];
+ int[] corpusStatsVal = new int[suffStatsCount];
+ for (int i = 0; i < suffStatsCount; i++)
+ corpusStatsVal[i] = 0;
- for (int i = 0; i < sentNum; i++) {
- candSet = feat_hash[i].keySet();
+ for (int i = 0; i < sentNum; i++) {
+ candSet = feat_hash[i].keySet();
+ // find out the 1-best candidate for each sentence
+ // this depends on the training mode
+ maxModelScore = NegInf;
+ for (Iterator it = candSet.iterator(); it.hasNext();) {
+ modelScore = 0.0;
+ candStr = it.next().toString();
+ feat_str = feat_hash[i].get(candStr).split("\\s+");
+ String[] feat_info;
+ for (int f = 0; f < feat_str.length; f++) {
+ feat_info = feat_str[f].split("=");
+ modelScore += Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
+ }
+ if (maxModelScore < modelScore) {
+ maxModelScore = modelScore;
+ tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
+ // suff stats
+ }
+ }
- // find out the 1-best candidate for each sentence
- // this depends on the training mode
- maxModelScore = -99999999999.0;
+ for (int j = 0; j < suffStatsCount; j++)
+ corpusStatsVal[j] += Integer.parseInt(tmpStatsVal[j]); // accumulate
+ // corpus-leve
+ // suff stats
+ } // for( int i=0; i<sentNum; i++ )
+
+ return evalMetric.score(corpusStatsVal);
+ }
+
+ private void findOraPred(int sentId, double[] oraPredScore, String[] oraPredFeat,
+ double[] lambda, double featScale) {
+ double oraMetric = 0, oraScore = 0, predMetric = 0, predScore = 0;
+ String oraFeat = "", predFeat = "";
+ double candMetric = 0, candScore = 0; // metric and model scores for each cand
+ Set<String> candSet = stats_hash[sentId].keySet();
+ String cand = "";
+ String feats = "";
+ String oraCand = ""; // only used when BLEU/TER-BLEU is used as metric
+ String[] featStr;
+ String[] featInfo;
+
+ int actualFeatId;
+ double bestOraScore;
+ double worstPredScore;
+
+ if (oraSelectMode == 1)
+ bestOraScore = NegInf; // larger score will be selected
+ else {
+ if (evalMetric.getToBeMinimized())
+ bestOraScore = PosInf; // smaller score will be selected
+ else
+ bestOraScore = NegInf;
+ }
+
+ if (predSelectMode == 1 || predSelectMode == 2)
+ worstPredScore = NegInf; // larger score will be selected
+ else {
+ if (evalMetric.getToBeMinimized())
+ worstPredScore = NegInf; // larger score will be selected
+ else
+ worstPredScore = PosInf;
+ }
+
for (Iterator it = candSet.iterator(); it.hasNext();) {
- modelScore = 0.0;
- candStr = it.next().toString();
+ cand = it.next().toString();
+ candMetric = computeSentMetric(sentId, cand); // compute metric score
- feat_str = feat_hash[i].get(candStr).split("\\s+");
+ // start to compute model score
+ candScore = 0;
+ featStr = feat_hash[sentId].get(cand).split("\\s+");
+ feats = "";
- String[] feat_info;
+ for (int i = 0; i < featStr.length; i++) {
+ featInfo = featStr[i].split("=");
+ actualFeatId = Vocabulary.id(featInfo[0]);
+ candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
+ if ((actualFeatId < isOptimizable.length && isOptimizable[actualFeatId])
+ || actualFeatId >= isOptimizable.length)
+ feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
+ }
- for (int f = 0; f < feat_str.length; f++) {
- feat_info = feat_str[f].split("=");
- modelScore +=
- Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
- }
+ candScore *= featScale; // scale the model score
- if (maxModelScore < modelScore) {
- maxModelScore = modelScore;
- tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
- // suff stats
- }
+ // is this cand oracle?
+ if (oraSelectMode == 1) {// "hope", b=1, r=1
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (bestOraScore <= (candScore - candMetric)) {
+ bestOraScore = candScore - candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ } else {
+ if (bestOraScore <= (candScore + candMetric)) {
+ bestOraScore = candScore + candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ } else {// best metric score(ex: max BLEU), b=1, r=0
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (bestOraScore >= candMetric) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ } else {
+ if (bestOraScore <= candMetric) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ }
+
+ // is this cand prediction?
+ if (predSelectMode == 1) {// "fear"
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (worstPredScore <= (candScore + candMetric)) {
+ worstPredScore = candScore + candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {
+ if (worstPredScore <= (candScore - candMetric)) {
+ worstPredScore = candScore - candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ } else if (predSelectMode == 2) {// model prediction(max model score)
+ if (worstPredScore <= candScore) {
+ worstPredScore = candScore;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {// worst metric score(ex: min BLEU)
+ if (evalMetric.getToBeMinimized()) {// if the smaller the metric score, the better
+ if (worstPredScore <= candMetric) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {
+ if (worstPredScore >= candMetric) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ }
}
- for (int j = 0; j < suffStatsCount; j++)
- corpusStatsVal[j] += Integer.parseInt(tmpStatsVal[j]); // accumulate
- // corpus-leve
- // suff stats
- } // for( int i=0; i<sentNum; i++ )
+ oraPredScore[0] = oraMetric;
+ oraPredScore[1] = oraScore;
+ oraPredScore[2] = predMetric;
+ oraPredScore[3] = predScore;
+ oraPredFeat[0] = oraFeat;
+ oraPredFeat[1] = predFeat;
- return evalMetric.score(corpusStatsVal);
+ // update the BLEU metric statistics if pseudo corpus is used to compute BLEU/TER-BLEU
+ if (evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
+ String statString;
+ String[] statVal_str;
+ statString = stats_hash[sentId].get(oraCand);
+ statVal_str = statString.split("\\s+");
+
+ for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
+ bleuHistory[sentId][j] = R * bleuHistory[sentId][j] + Integer.parseInt(statVal_str[j]);
+ }
+
+ if (evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
+ String statString;
+ String[] statVal_str;
+ statString = stats_hash[sentId].get(oraCand);
+ statVal_str = statString.split("\\s+");
+
+ for (int j = 0; j < evalMetric.get_suffStatsCount() - 2; j++)
+ bleuHistory[sentId][j] = R * bleuHistory[sentId][j] + Integer.parseInt(statVal_str[j + 2]); // the
+ // first
+ // 2
+ // stats
+ // are
+ // TER
+ // stats
+ }
}
-
- private void findOraPred(int sentId, double[] oraPredScore, String[] oraPredFeat, double[] lambda, double featScale)
- {
- double oraMetric=0, oraScore=0, predMetric=0, predScore=0;
- String oraFeat="", predFeat="";
- double candMetric = 0, candScore = 0; //metric and model scores for each cand
- Set<String> candSet = stats_hash[sentId].keySet();
- String cand = "";
- String feats = "";
- String oraCand = ""; //only used when BLEU/TER-BLEU is used as metric
- String[] featStr;
- String[] featInfo;
-
- int actualFeatId;
- double bestOraScore;
- double worstPredScore;
-
- if(oraSelectMode==1)
- bestOraScore = NegInf; //larger score will be selected
- else {
- if(evalMetric.getToBeMinimized())
- bestOraScore = PosInf; //smaller score will be selected
- else
- bestOraScore = NegInf;
- }
-
- if(predSelectMode==1 || predSelectMode==2)
- worstPredScore = NegInf; //larger score will be selected
- else {
- if(evalMetric.getToBeMinimized())
- worstPredScore = NegInf; //larger score will be selected
- else
- worstPredScore = PosInf;
- }
-
- for (Iterator it = candSet.iterator(); it.hasNext();) {
- cand = it.next().toString();
- candMetric = computeSentMetric(sentId, cand); //compute metric score
-
- //start to compute model score
- candScore = 0;
- featStr = feat_hash[sentId].get(cand).split("\\s+");
- feats = "";
- for (int i = 0; i < featStr.length; i++) {
- featInfo = featStr[i].split("=");
- actualFeatId = Vocabulary.id(featInfo[0]);
- candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
- if ( (actualFeatId < isOptimizable.length && isOptimizable[actualFeatId]) ||
- actualFeatId >= isOptimizable.length )
- feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
- }
-
- candScore *= featScale; //scale the model score
-
- //is this cand oracle?
- if(oraSelectMode == 1) {//"hope", b=1, r=1
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( bestOraScore<=(candScore-candMetric) ) {
- bestOraScore = candScore-candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- else {
- if( bestOraScore<=(candScore+candMetric) ) {
- bestOraScore = candScore+candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- }
- else {//best metric score(ex: max BLEU), b=1, r=0
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( bestOraScore>=candMetric ) {
- bestOraScore = candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- else {
- if( bestOraScore<=candMetric ) {
- bestOraScore = candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- }
-
- //is this cand prediction?
- if(predSelectMode == 1) {//"fear"
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( worstPredScore<=(candScore+candMetric) ) {
- worstPredScore = candScore+candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- else {
- if( worstPredScore<=(candScore-candMetric) ) {
- worstPredScore = candScore-candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- }
- else if(predSelectMode == 2) {//model prediction(max model score)
- if( worstPredScore<=candScore ) {
- worstPredScore = candScore;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- else {//worst metric score(ex: min BLEU)
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( worstPredScore<=candMetric ) {
- worstPredScore = candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- else {
- if( worstPredScore>=candMetric ) {
- worstPredScore = candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- }
- }
-
- oraPredScore[0] = oraMetric;
- oraPredScore[1] = oraScore;
- oraPredScore[2] = predMetric;
- oraPredScore[3] = predScore;
- oraPredFeat[0] = oraFeat;
- oraPredFeat[1] = predFeat;
-
- //update the BLEU metric statistics if pseudo corpus is used to compute BLEU/TER-BLEU
- if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu ) {
- String statString;
- String[] statVal_str;
- statString = stats_hash[sentId].get(oraCand);
- statVal_str = statString.split("\\s+");
-
- for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
- bleuHistory[sentId][j] = R*bleuHistory[sentId][j]+Integer.parseInt(statVal_str[j]);
- }
-
- if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu ) {
- String statString;
- String[] statVal_str;
- statString = stats_hash[sentId].get(oraCand);
- statVal_str = statString.split("\\s+");
-
- for (int j = 0; j < evalMetric.get_suffStatsCount()-2; j++)
- bleuHistory[sentId][j] = R*bleuHistory[sentId][j]+Integer.parseInt(statVal_str[j+2]); //the first 2 stats are TER stats
- }
- }
-
// compute *sentence-level* metric score for cand
private double computeSentMetric(int sentId, String cand) {
- String statString;
- String[] statVal_str;
- int[] statVal = new int[evalMetric.get_suffStatsCount()];
+ String statString;
+ String[] statVal_str;
+ int[] statVal = new int[evalMetric.get_suffStatsCount()];
- statString = stats_hash[sentId].get(cand);
- statVal_str = statString.split("\\s+");
+ statString = stats_hash[sentId].get(cand);
+ statVal_str = statString.split("\\s+");
- if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
- for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
- statVal[j] = (int) (Integer.parseInt(statVal_str[j]) + bleuHistory[sentId][j]);
- } else if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
- for (int j = 0; j < evalMetric.get_suffStatsCount()-2; j++)
- statVal[j+2] = (int)(Integer.parseInt(statVal_str[j+2]) + bleuHistory[sentId][j]); //only modify the BLEU stats part(TER has 2 stats)
- } else { //in all other situations, use normal stats
- for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
- statVal[j] = Integer.parseInt(statVal_str[j]);
- }
+ if (evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) {
+ for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
+ statVal[j] = (int) (Integer.parseInt(statVal_str[j]) + bleuHistory[sentId][j]);
+ } else if (evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) {
+ for (int j = 0; j < evalMetric.get_suffStatsCount() - 2; j++)
+ statVal[j + 2] = (int) (Integer.parseInt(statVal_str[j + 2]) + bleuHistory[sentId][j]); // only
+ // modify
+ // the
+ // BLEU
+ // stats
+ // part(TER
+ // has
+ // 2
+ // stats)
+ } else { // in all other situations, use normal stats
+ for (int j = 0; j < evalMetric.get_suffStatsCount(); j++)
+ statVal[j] = Integer.parseInt(statVal_str[j]);
+ }
- return evalMetric.score(statVal);
+ return evalMetric.score(statVal);
}
// from ZMERT
private void normalizeLambda(double[] origLambda) {
- // private String[] normalizationOptions;
- // How should a lambda[] vector be normalized (before decoding)?
- // nO[0] = 0: no normalization
- // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
- // nO[0] = 2: scale so that the maximum absolute value is nO[1]
- // nO[0] = 3: scale so that the minimum absolute value is nO[1]
- // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
+ // private String[] normalizationOptions;
+ // How should a lambda[] vector be normalized (before decoding)?
+ // nO[0] = 0: no normalization
+ // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
+ // nO[0] = 2: scale so that the maximum absolute value is nO[1]
+ // nO[0] = 3: scale so that the minimum absolute value is nO[1]
+ // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
- int normalizationMethod = (int) normalizationOptions[0];
- double scalingFactor = 1.0;
- if (normalizationMethod == 0) {
- scalingFactor = 1.0;
- } else if (normalizationMethod == 1) {
- int c = (int) normalizationOptions[2];
- scalingFactor = normalizationOptions[1] / Math.abs(origLambda[c]);
- } else if (normalizationMethod == 2) {
- double maxAbsVal = -1;
- int maxAbsVal_c = 0;
- for (int c = 1; c <= paramDim; ++c) {
- if (Math.abs(origLambda[c]) > maxAbsVal) {
- maxAbsVal = Math.abs(origLambda[c]);
- maxAbsVal_c = c;
- }
+ int normalizationMethod = (int) normalizationOptions[0];
+ double scalingFactor = 1.0;
+ if (normalizationMethod == 0) {
+ scalingFactor = 1.0;
+ } else if (normalizationMethod == 1) {
+ int c = (int) normalizationOptions[2];
+ scalingFactor = normalizationOptions[1] / Math.abs(origLambda[c]);
+ } else if (normalizationMethod == 2) {
+ double maxAbsVal = -1;
+ int maxAbsVal_c = 0;
+ for (int c = 1; c <= paramDim; ++c) {
+ if (Math.abs(origLambda[c]) > maxAbsVal) {
+ maxAbsVal = Math.abs(origLambda[c]);
+ maxAbsVal_c = c;
+ }
+ }
+ scalingFactor = normalizationOptions[1] / Math.abs(origLambda[maxAbsVal_c]);
+
+ } else if (normalizationMethod == 3) {
+ double minAbsVal = PosInf;
+ int minAbsVal_c = 0;
+
+ for (int c = 1; c <= paramDim; ++c) {
+ if (Math.abs(origLambda[c]) < minAbsVal) {
+ minAbsVal = Math.abs(origLambda[c]);
+ minAbsVal_c = c;
+ }
+ }
+ scalingFactor = normalizationOptions[1] / Math.abs(origLambda[minAbsVal_c]);
+
+ } else if (normalizationMethod == 4) {
+ double pow = normalizationOptions[1];
+ double norm = L_norm(origLambda, pow);
+ scalingFactor = normalizationOptions[2] / norm;
}
- scalingFactor = normalizationOptions[1] / Math.abs(origLambda[maxAbsVal_c]);
-
- } else if (normalizationMethod == 3) {
- double minAbsVal = PosInf;
- int minAbsVal_c = 0;
for (int c = 1; c <= paramDim; ++c) {
- if (Math.abs(origLambda[c]) < minAbsVal) {
- minAbsVal = Math.abs(origLambda[c]);
- minAbsVal_c = c;
- }
+ origLambda[c] *= scalingFactor;
}
- scalingFactor = normalizationOptions[1] / Math.abs(origLambda[minAbsVal_c]);
-
- } else if (normalizationMethod == 4) {
- double pow = normalizationOptions[1];
- double norm = L_norm(origLambda, pow);
- scalingFactor = normalizationOptions[2] / norm;
- }
-
- for (int c = 1; c <= paramDim; ++c) {
- origLambda[c] *= scalingFactor;
- }
}
// from ZMERT
private double L_norm(double[] A, double pow) {
- // calculates the L-pow norm of A[]
- // NOTE: this calculation ignores A[0]
- double sum = 0.0;
- for (int i = 1; i < A.length; ++i)
- sum += Math.pow(Math.abs(A[i]), pow);
+ // calculates the L-pow norm of A[]
+ // NOTE: this calculation ignores A[0]
+ double sum = 0.0;
+ for (int i = 1; i < A.length; ++i)
+ sum += Math.pow(Math.abs(A[i]), pow);
- return Math.pow(sum, 1 / pow);
+ return Math.pow(sum, 1 / pow);
}
- public static double getScale()
- {
- return featScale;
+ public static double getScale() {
+ return featScale;
}
-
- public static void initBleuHistory(int sentNum, int statCount)
- {
- bleuHistory = new double[sentNum][statCount];
- for(int i=0; i<sentNum; i++) {
- for(int j=0; j<statCount; j++) {
- bleuHistory[i][j] = 0.0;
+
+ public static void initBleuHistory(int sentNum, int statCount) {
+ bleuHistory = new double[sentNum][statCount];
+ for (int i = 0; i < sentNum; i++) {
+ for (int j = 0; j < statCount; j++) {
+ bleuHistory[i][j] = 0.0;
+ }
}
- }
}
-
- public double getMetricScore()
- {
+
+ public double getMetricScore() {
return finalMetricScore;
}
-
+
private Vector<String> output;
private double[] initialLambda;
private double[] finalLambda;
@@ -601,23 +601,25 @@
private int paramDim;
private boolean[] isOptimizable;
public static int sentNum;
- public static int miraIter; //MIRA internal iterations
+ public static int miraIter; // MIRA internal iterations
public static int oraSelectMode;
public static int predSelectMode;
+ public static int batchSize;
public static boolean needShuffle;
public static boolean needScale;
public static double scoreRatio;
public static boolean runPercep;
public static boolean needAvg;
public static boolean usePseudoBleu;
- public static double featScale = 1.0; //scale the features in order to make the model score comparable with metric score
- //updates in each epoch if necessary
- public static double C; //relaxation coefficient
- public static double R; //corpus decay(used only when pseudo corpus is used to compute BLEU)
+ public static double featScale = 1.0; // scale the features in order to make the model score
+ // comparable with metric score
+ // updates in each epoch if necessary
+ public static double C; // relaxation coefficient
+ public static double R; // corpus decay(used only when pseudo corpus is used to compute BLEU)
public static EvaluationMetric evalMetric;
public static double[] normalizationOptions;
public static double[][] bleuHistory;
-
+
private final static double NegInf = (-1.0 / 0.0);
private final static double PosInf = (+1.0 / 0.0);
}
diff --git a/src/joshua/pro/ClassifierPerceptron.java b/src/joshua/pro/ClassifierPerceptron.java
index df221d5..7f06456 100755
--- a/src/joshua/pro/ClassifierPerceptron.java
+++ b/src/joshua/pro/ClassifierPerceptron.java
@@ -38,12 +38,6 @@
// {
// numPosSamp++;
score = 0;
-
- /*
- * for( int d=0; d<featDim; d++ ) //inner product { //System.out.printf("%.2f ",
- * Double.parseDouble(featVal[d])); score += Double.parseDouble(featVal[d]) * lambda[d+1];
- */
-
for (int d = 0; d < featVal.length - 1; d++) {
feat_info = featVal[d].split(":");
score += Double.parseDouble(feat_info[1]) * lambda[Integer.parseInt(feat_info[0])];
@@ -55,35 +49,15 @@
if (score <= bias) // incorrect classification
{
numError++;
-
- /*
- * for( int d=0; d<featDim; d++ ) { lambda[d+1] += learningRate*label *
- * Double.parseDouble(featVal[d]); sum_lambda[d+1] += learningRate*lambda[d+1]; }
- */
-
- // System.out.println("\t"+s);
- // for (int d = 0; d < lambda.length; ++d )
- // System.out.print(String.format("%.4f",lambda[d])+" ");
- // System.out.println("-----");
-
for (int d = 0; d < featVal.length - 1; d++) {
feat_info = featVal[d].split(":");
int featID = Integer.parseInt(feat_info[0]);
lambda[featID] += learningRate * label * Double.parseDouble(feat_info[1]);
sum_lambda[featID] += lambda[featID];
}
-
- // System.out.println(samples.get(s));
- // System.out.println("-----");
- // for (int d = 0; d < lambda.length; ++d )
- // System.out.print(String.format("%.4f",lambda[d])+" ");
- // System.out.println();
}
// }//if( featVal[featDim].equals("1") )
}
-
- // System.out.printf("(%.2f%%) ",numError*100.0/numPosSamp);
-
if (numError == 0) break;
}
diff --git a/src/joshua/pro/Optimizer.java b/src/joshua/pro/Optimizer.java
index 544a880..bc01d21 100755
--- a/src/joshua/pro/Optimizer.java
+++ b/src/joshua/pro/Optimizer.java
@@ -119,7 +119,7 @@
feat_str = feat_hash[i].get(candStr).split("\\s+");
for (int f = 0; f < feat_str.length; f++) {
- String[] feat_info = feat_str[f].split("[=:]");
+ String[] feat_info = feat_str[f].split("[=]");
modelScore +=
Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
}
@@ -258,7 +258,7 @@
int feat_id;
for (int i = 0; i < feat_str_j1.length; i++) {
- feat_info = feat_str_j1[i].split("[:=]");
+ feat_info = feat_str_j1[i].split("[=]");
feat_id = Vocabulary.id(feat_info[0]);
if ( (feat_id < isOptimizable.length &&
isOptimizable[feat_id]) ||
@@ -266,7 +266,7 @@
feat_diff.put( feat_id, feat_info[1] );
}
for (int i = 0; i < feat_str_j2.length; i++) {
- feat_info = feat_str_j2[i].split("[:=]");
+ feat_info = feat_str_j2[i].split("[=]");
feat_id = Vocabulary.id(feat_info[0]);
if ( (feat_id < isOptimizable.length &&
isOptimizable[feat_id]) ||
diff --git a/src/joshua/pro/PROCore.java b/src/joshua/pro/PROCore.java
index 949667a..05c6abd 100755
--- a/src/joshua/pro/PROCore.java
+++ b/src/joshua/pro/PROCore.java
@@ -10,8 +10,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
@@ -92,11 +90,11 @@
// 4: apply 1+2+3
private int numParams;
- //total number of firing features
- //this number may increase overtime as new n-best lists are decoded
- //initially it is equal to the # of params in the parameter config file
+ // total number of firing features
+ // this number may increase overtime as new n-best lists are decoded
+ // initially it is equal to the # of params in the parameter config file
private int numParamsOld;
- //number of features before observing the new features fired in the current iteration
+ // number of features before observing the new features fired in the current iteration
private double[] normalizationOptions;
// How should a lambda[] vector be normalized (before decoding)?
@@ -110,7 +108,7 @@
/* NOTE: indexing starts at 1 in the following few arrays: */
/* *********************************************************** */
- //private double[] lambda;
+ // private double[] lambda;
private ArrayList<Double> lambda = new ArrayList<Double>();
// the current weight vector. NOTE: indexing starts at 1.
private ArrayList<Double> bestLambda = new ArrayList<Double>();
@@ -214,8 +212,9 @@
private int Xi;
private double interCoef;
private double metricDiff;
- private double prevMetricScore = 0; //final metric score of the previous iteration, used only when returnBest = true
- private boolean returnBest = true; //return the best weight during tuning
+ private double prevMetricScore = 0; // final metric score of the previous iteration, used only
+ // when returnBest = true
+ private boolean returnBest = false; // return the best weight during tuning
private String dirPrefix; // where are all these files located?
private String paramsFileName, docInfoFileName, finalLambdaFileName;
@@ -290,15 +289,15 @@
BufferedReader inFile_names = new BufferedReader(new FileReader(paramsFileName));
for (int c = 1; c <= numParams; ++c) {
- String line = "";
- while (line != null && line.length() == 0) { // skip empty lines
- line = inFile_names.readLine();
- }
-
- // save feature names
- String paramName = (line.substring(0, line.indexOf("|||"))).trim();
- Vocabulary.id(paramName);
- // System.err.println(String.format("VOCAB(%s) = %d", paramName, id));
+ String line = "";
+ while (line != null && line.length() == 0) { // skip empty lines
+ line = inFile_names.readLine();
+ }
+
+ // save feature names
+ String paramName = (line.substring(0, line.indexOf("|||"))).trim();
+ Vocabulary.id(paramName);
+ // System.err.println(String.format("VOCAB(%s) = %d", paramName, id));
}
inFile_names.close();
@@ -312,9 +311,9 @@
// the parameter file contains one line per parameter
// and one line for the normalization method
- // indexing starts at 1 in these arrays
- for ( int p = 0; p <= numParams; ++p )
- lambda.add(new Double(0));
+ // indexing starts at 1 in these arrays
+ for (int p = 0; p <= numParams; ++p)
+ lambda.add(new Double(0));
bestLambda.add(new Double(0));
// why only lambda is a list? because the size of lambda
// may increase over time, but other arrays are specified in
@@ -381,7 +380,7 @@
EvaluationMetric.set_tmpDirPrefix(tmpDirPrefix);
evalMetric = EvaluationMetric.getMetric(metricName, metricOptions);
- //used only if returnBest = true
+ // used only if returnBest = true
prevMetricScore = evalMetric.getToBeMinimized() ? PosInf : NegInf;
// length of sufficient statistics
@@ -391,8 +390,8 @@
// set static data members for the IntermediateOptimizer class
/*
* IntermediateOptimizer.set_MERTparams(numSentences, numDocuments, docOfSentence,
- * docSubsetInfo, numParams, normalizationOptions, isOptimizable
- * oneModificationPerIteration, evalMetric, tmpDirPrefix, verbosity);
+ * docSubsetInfo, numParams, normalizationOptions, isOptimizable oneModificationPerIteration,
+ * evalMetric, tmpDirPrefix, verbosity);
*/
// print info
@@ -418,7 +417,7 @@
println("c Default value\tOptimizable?\tRand. val. range", 1);
for (int c = 1; c <= numParams; ++c) {
- print(c + " " + f4.format(lambda.get(c).doubleValue()) + "\t\t", 1);
+ print(c + " " + f4.format(lambda.get(c).doubleValue()) + "\t\t", 1);
if (!isOptimizable[c]) {
println(" No", 1);
@@ -560,12 +559,12 @@
println("----------------------------------------------------", 1);
println("", 1);
- if ( ! returnBest )
- println("FINAL lambda: " + lambdaToString(lambda), 1);
- // + " (" + metricName_display + ": " + FINAL_score + ")",1);
+ if (!returnBest)
+ println("FINAL lambda: " + lambdaToString(lambda), 1);
+ // + " (" + metricName_display + ": " + FINAL_score + ")",1);
else
- println("BEST lambda: " + lambdaToString(lambda), 1);
- // + " (" + metricName_display + ": " + FINAL_score + ")",1);
+ println("BEST lambda: " + lambdaToString(lambda), 1);
+ // + " (" + metricName_display + ": " + FINAL_score + ")",1);
// delete intermediate .temp.*.it* decoder output files
for (int iteration = 1; iteration <= maxIts; ++iteration) {
@@ -592,7 +591,7 @@
// this is the key function!
@SuppressWarnings("unchecked")
public double[] run_single_iteration(int iteration, int minIts, int maxIts, int prevIts,
- int earlyStop, int[] maxIndex) {
+ int earlyStop, int[] maxIndex) {
double FINAL_score = 0;
double[] retA = new double[3];
@@ -629,9 +628,9 @@
/***************/
if (iteration == 1) {
- println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
+ println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
} else {
- println("Redecoding using weight vector " + lambdaToString(lambda), 1);
+ println("Redecoding using weight vector " + lambdaToString(lambda), 1);
}
// generate the n-best file after decoding
@@ -704,7 +703,7 @@
// initLambda[0] is not used!
double[] initialLambda = new double[1 + numParams];
for (int i = 1; i <= numParams; ++i)
- initialLambda[i] = lambda.get(i);
+ initialLambda[i] = lambda.get(i);
// the "score" in initialScore refers to that
// assigned by the evaluation metric)
@@ -891,19 +890,19 @@
// extract feature value
featVal_str = feats_str.split("\\s+");
- if (feats_str.indexOf('=') != -1) {
- for (String featurePair : featVal_str) {
- String[] pair = featurePair.split("=");
- String name = pair[0];
- Double value = Double.parseDouble(pair[1]);
- int featId = Vocabulary.id(name);
- //need to identify newly fired feats here
- if (featId > numParams) {
- ++numParams;
- lambda.add(new Double(0));
- }
+ if (feats_str.indexOf('=') != -1) {
+ for (String featurePair : featVal_str) {
+ String[] pair = featurePair.split("=");
+ String name = pair[0];
+ Double value = Double.parseDouble(pair[1]);
+ int featId = Vocabulary.id(name);
+ // need to identify newly fired feats here
+ if (featId > numParams) {
+ ++numParams;
+ lambda.add(new Double(0));
}
- }
+ }
+ }
existingCandStats.put(sents_str, stats_str);
candCount[i] += 1;
newCandidatesAdded[it] += 1;
@@ -1029,8 +1028,8 @@
BufferedReader inFile_statsMergedKnown = new BufferedReader(new InputStreamReader(
instream_statsMergedKnown, "utf8"));
- //num of features before observing new firing features from this iteration
- numParamsOld = numParams;
+ // num of features before observing new firing features from this iteration
+ numParamsOld = numParams;
for (int i = 0; i < numSentences; ++i) {
// reprocess candidates from previous iterations
@@ -1091,20 +1090,19 @@
stats_hash[i].put(sents_str, stats_str);
featVal_str = feats_str.split("\\s+");
-
- if (feats_str.indexOf('=') != -1) {
- for (String featurePair : featVal_str) {
- String[] pair = featurePair.split("=");
- String name = pair[0];
- Double value = Double.parseDouble(pair[1]);
- int featId = Vocabulary.id(name);
- //need to identify newly fired feats here
- if (featId > numParams) {
- ++numParams;
- lambda.add(new Double(0));
- }
- }
- }
+
+ if (feats_str.indexOf('=') != -1) {
+ for (String featurePair : featVal_str) {
+ String[] pair = featurePair.split("=");
+ String name = pair[0];
+ int featId = Vocabulary.id(name);
+ // need to identify newly fired feats here
+ if (featId > numParams) {
+ ++numParams;
+ lambda.add(new Double(0));
+ }
+ }
+ }
existingCandStats.put(sents_str, stats_str);
candCount[i] += 1;
@@ -1193,8 +1191,8 @@
println("", 1);
- println("Number of features observed so far: " + numParams);
- println("", 1);
+ println("Number of features observed so far: " + numParams);
+ println("", 1);
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException in PROCore.run_single_iteration(6): "
@@ -1212,16 +1210,16 @@
println("", 1);
println("--- PRO iteration #" + iteration + " ending @ " + (new Date()) + " ---", 1);
println("", 1);
- deleteFile(tmpDirPrefix + "temp.stats.merged");
+ deleteFile(tmpDirPrefix + "temp.stats.merged");
- if (returnBest) {
- //note that bestLambda.size() <= lambda.size()
- for ( int p = 1; p < bestLambda.size(); ++p )
- lambda.set(p, bestLambda.get(p));
- //and set the rest of lambda to be 0
- for ( int p = 0; p < lambda.size() - bestLambda.size(); ++p )
- lambda.set(p+bestLambda.size(), new Double(0));
- }
+ if (returnBest) {
+ // note that bestLambda.size() <= lambda.size()
+ for (int p = 1; p < bestLambda.size(); ++p)
+ lambda.set(p, bestLambda.get(p));
+ // and set the rest of lambda to be 0
+ for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
+ lambda.set(p + bestLambda.size(), new Double(0));
+ }
return null; // this means that the old values should be kept by the caller
} else {
@@ -1238,12 +1236,12 @@
Vector<String> output = new Vector<String>();
- //note: initialLambda[] has length = numParamsOld
- //augmented with new feature weights, initial values are 0
+ // note: initialLambda[] has length = numParamsOld
+ // augmented with new feature weights, initial values are 0
double[] initialLambdaNew = new double[1 + numParams];
System.arraycopy(initialLambda, 1, initialLambdaNew, 1, numParamsOld);
- //finalLambda[] has length = numParams (considering new features)
+ // finalLambda[] has length = numParams (considering new features)
double[] finalLambda = new double[1 + numParams];
Optimizer opt = new Optimizer(seed + iteration, isOptimizable, output, initialLambdaNew,
@@ -1251,34 +1249,34 @@
classifierAlg, classifierParams);
finalLambda = opt.run_Optimizer();
- if ( returnBest ) {
- double metricScore = opt.getMetricScore();
- if ( ! evalMetric.getToBeMinimized() ) {
- if ( metricScore > prevMetricScore ) {
- prevMetricScore = metricScore;
- for ( int p = 1; p < bestLambda.size(); ++p )
- bestLambda.set(p, finalLambda[p]);
- if ( 1 + numParams > bestLambda.size() ) {
- for ( int p = bestLambda.size(); p <= numParams; ++p )
- bestLambda.add(p, finalLambda[p]);
- }
- }
- } else {
- if ( metricScore < prevMetricScore ) {
- prevMetricScore = metricScore;
- for ( int p = 1; p < bestLambda.size(); ++p )
- bestLambda.set(p, finalLambda[p]);
- if ( 1 + numParams > bestLambda.size() ) {
- for ( int p = bestLambda.size(); p <= numParams; ++p )
- bestLambda.add(p, finalLambda[p]);
- }
- }
- }
+ if (returnBest) {
+ double metricScore = opt.getMetricScore();
+ if (!evalMetric.getToBeMinimized()) {
+ if (metricScore > prevMetricScore) {
+ prevMetricScore = metricScore;
+ for (int p = 1; p < bestLambda.size(); ++p)
+ bestLambda.set(p, finalLambda[p]);
+ if (1 + numParams > bestLambda.size()) {
+ for (int p = bestLambda.size(); p <= numParams; ++p)
+ bestLambda.add(p, finalLambda[p]);
+ }
+ }
+ } else {
+ if (metricScore < prevMetricScore) {
+ prevMetricScore = metricScore;
+ for (int p = 1; p < bestLambda.size(); ++p)
+ bestLambda.set(p, finalLambda[p]);
+ if (1 + numParams > bestLambda.size()) {
+ for (int p = bestLambda.size(); p <= numParams; ++p)
+ bestLambda.add(p, finalLambda[p]);
+ }
+ }
+ }
}
// System.out.println(finalLambda.length);
// for( int i=0; i<finalLambda.length-1; i++ )
- // System.out.print(finalLambda[i+1]+" ");
+ // System.out.print(finalLambda[i+1]+" ");
// System.out.println();
/************* end optimization **************/
@@ -1291,12 +1289,12 @@
boolean anyParamChangedSignificantly = false;
for (int c = 1; c <= numParams; ++c) {
- if (finalLambda[c] != lambda.get(c)) {
- anyParamChanged = true;
- }
- if (Math.abs(finalLambda[c] - lambda.get(c)) > stopSigValue) {
- anyParamChangedSignificantly = true;
- }
+ if (finalLambda[c] != lambda.get(c)) {
+ anyParamChanged = true;
+ }
+ if (Math.abs(finalLambda[c] - lambda.get(c)) > stopSigValue) {
+ anyParamChangedSignificantly = true;
+ }
}
// System.arraycopy(finalLambda,1,lambda,1,numParams);
@@ -1331,13 +1329,14 @@
+ " consecutive iterations; exiting PRO.", 1);
println("", 1);
- if ( returnBest ) {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, bestLambda.get(f));
- } else {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, finalLambda[f]);
- }
+ if (returnBest) {
+ // note that numParams >= bestLamba.size()-1 here!
+ for (int f = 1; f <= bestLambda.size() - 1; ++f)
+ lambda.set(f, bestLambda.get(f));
+ } else {
+ for (int f = 1; f <= numParams; ++f)
+ lambda.set(f, finalLambda[f]);
+ }
break; // exit for (iteration) loop preemptively
}
@@ -1347,13 +1346,14 @@
println("Maximum number of PRO iterations reached; exiting PRO.", 1);
println("", 1);
- if ( returnBest ) {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, bestLambda.get(f));
- } else {
- for ( int f = 1; f <= numParams; ++f )
- lambda.set(f, finalLambda[f]);
- }
+ if (returnBest) {
+ // note that numParams >= bestLamba.size()-1 here!
+ for (int f = 1; f <= bestLambda.size() - 1; ++f)
+ lambda.set(f, bestLambda.get(f));
+ } else {
+ for (int f = 1; f <= numParams; ++f)
+ lambda.set(f, finalLambda[f]);
+ }
break; // exit for (iteration) loop
}
@@ -1361,10 +1361,9 @@
// use the new wt vector to decode the next iteration
// (interpolation with previous wt vector)
for (int i = 1; i <= numParams; i++)
- lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
+ lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
- println("Next iteration will decode with lambda: "
- + lambdaToString(lambda), 1);
+ println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
println("", 1);
// printMemoryUsage();
@@ -1392,11 +1391,11 @@
private String lambdaToString(ArrayList<Double> lambdaA) {
String retStr = "{";
int featToPrint = numParams > 15 ? 15 : numParams;
- //print at most the first 15 features
+ // print at most the first 15 features
retStr += "(listing the first " + featToPrint + " lambdas)";
for (int c = 1; c <= featToPrint - 1; ++c) {
- retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
+ retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
}
retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
@@ -1565,7 +1564,8 @@
}
- private void createConfigFile(ArrayList<Double> params, String cfgFileName, String templateFileName) {
+ private void createConfigFile(ArrayList<Double> params, String cfgFileName,
+ String templateFileName) {
try {
// i.e. create cfgFileName, which is similar to templateFileName, but with
// params[] as parameter values
@@ -1575,33 +1575,33 @@
BufferedReader inFeatDefFile = null;
PrintWriter outFeatDefFile = null;
- int origFeatNum = 0; //feat num in the template file
+ int origFeatNum = 0; // feat num in the template file
String line = inFile.readLine();
while (line != null) {
- int c_match = -1;
- for (int c = 1; c <= numParams; ++c) {
- if (line.startsWith(Vocabulary.word(c) + " ")) {
- c_match = c;
- ++origFeatNum;
- break;
- }
+ int c_match = -1;
+ for (int c = 1; c <= numParams; ++c) {
+ if (line.startsWith(Vocabulary.word(c) + " ")) {
+ c_match = c;
+ ++origFeatNum;
+ break;
}
-
- if (c_match == -1) {
- outFile.println(line);
- } else {
- if ( Math.abs(params.get(c_match).doubleValue()) > 1e-20 )
- outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
- }
-
- line = inFile.readLine();
+ }
+
+ if (c_match == -1) {
+ outFile.println(line);
+ } else {
+ if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
+ outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
+ }
+
+ line = inFile.readLine();
}
- //now append weights of new features
- for (int c = origFeatNum+1; c <= numParams; ++c) {
- if ( Math.abs(params.get(c).doubleValue()) > 1e-20 )
- outFile.println(Vocabulary.word(c) + " " + params.get(c));
+ // now append weights of new features
+ for (int c = origFeatNum + 1; c <= numParams; ++c) {
+ if (Math.abs(params.get(c).doubleValue()) > 1e-20)
+ outFile.println(Vocabulary.word(c) + " " + params.get(c));
}
inFile.close();
@@ -1650,7 +1650,12 @@
if (!isOptimizable[c]) { // skip next two values
dummy = inFile_init.next();
dummy = inFile_init.next();
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
} else {
+ // the next two values are not used, only to be consistent with ZMERT's params file format
+ dummy = inFile_init.next();
+ dummy = inFile_init.next();
// set minRandValue[c] and maxRandValue[c] (range for random values)
dummy = inFile_init.next();
if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
@@ -1675,7 +1680,7 @@
System.exit(21);
}
- // check for odd values
+ // check for odd values
if (minRandValue[c] == maxRandValue[c]) {
println("Warning: lambda[" + c + "] has " + "minRandValue = maxRandValue = "
+ minRandValue[c] + ".", 1);
@@ -1969,7 +1974,7 @@
try {
PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
for (int c = 1; c <= numParams; ++c) {
- outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
+ outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
}
outFile_lambdas.close();
@@ -2050,8 +2055,7 @@
if (paramA.length == 2 && paramA[0].charAt(0) == '-') {
argsVector.add(paramA[0]);
argsVector.add(paramA[1]);
- } else if (paramA.length > 2
- && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
+ } else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
// -m (metricName), -docSet are allowed to have extra optinos
for (int opt = 0; opt < paramA.length; ++opt) {
argsVector.add(paramA[opt]);
@@ -2324,17 +2328,17 @@
else if (option.equals("-Xi")) {
Xi = Integer.parseInt(args[i + 1]);
}
- //return the best weight during tuning or not
+ // return the best weight during tuning or not
else if (option.equals("-returnBest")) {
- int retBest = Integer.parseInt(args[i + 1]);
- if(retBest == 1)
- returnBest = true;
- else if(retBest == 0)
- returnBest = false;
- else {
- println("-returnBest must be either 0 or 1.");
- System.exit(10);
- }
+ int retBest = Integer.parseInt(args[i + 1]);
+ if (retBest == 1)
+ returnBest = true;
+ else if (retBest == 0)
+ returnBest = false;
+ else {
+ println("-returnBest must be either 0 or 1.");
+ System.exit(10);
+ }
}
// interpolation coefficient between current & previous weights
else if (option.equals("-interCoef")) {
@@ -2988,7 +2992,7 @@
}
private ArrayList<Double> randomLambda() {
- ArrayList<Double> retLambda = new ArrayList<Double>(1+numParams);
+ ArrayList<Double> retLambda = new ArrayList<Double>(1 + numParams);
for (int c = 1; c <= numParams; ++c) {
if (isOptimizable[c]) {
@@ -2998,7 +3002,7 @@
randVal = minRandValue[c] + randVal; // number in [min,max]
retLambda.set(c, randVal);
} else {
- retLambda.set(c, defaultLambda[c]);
+ retLambda.set(c, defaultLambda[c]);
}
}
diff --git a/src/joshua/tools/GrammarPacker.java b/src/joshua/tools/GrammarPacker.java
index 72280e2..fb13ee4 100644
--- a/src/joshua/tools/GrammarPacker.java
+++ b/src/joshua/tools/GrammarPacker.java
@@ -1,5 +1,7 @@
package joshua.tools;
+import static joshua.decoder.ff.tm.packed.PackedGrammar.VOCABULARY_FILENAME;
+
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
@@ -12,7 +14,6 @@
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
-import java.util.PriorityQueue;
import java.util.Queue;
import java.util.TreeMap;
import java.util.logging.Logger;
@@ -28,12 +29,12 @@
private static final Logger logger = Logger.getLogger(GrammarPacker.class.getName());
- // Approximate maximum size of a slice in number of rules
- private static int SLICE_SIZE;
// Size limit for slice in bytes.
- private static int DATA_SIZE_LIMIT;
+ private static int DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);
// Estimated average number of feature entries for one rule.
- private static int DATA_SIZE_ESTIMATE;
+ private static int DATA_SIZE_ESTIMATE = 20;
+
+ private static final String SOURCE_WORDS_SEPARATOR = " ||| ";
// Output directory name.
private String output;
@@ -41,6 +42,17 @@
// Input grammar to be packed.
private String grammar;
+ public String getGrammar() {
+ return grammar;
+ }
+
+ public String getOutputDirectory() {
+ return output;
+ }
+
+ // Approximate maximum size of a slice in number of rules
+ private int approximateMaximumSliceSize;
+
private boolean labeled;
private boolean packAlignments;
@@ -54,20 +66,16 @@
private int max_source_len;
- static {
- SLICE_SIZE = 1000000;
- DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);
- DATA_SIZE_ESTIMATE = 20;
- }
-
public GrammarPacker(String grammar_filename, String config_filename, String output_filename,
- String alignments_filename, String featuredump_filename, boolean grammar_alignments)
+ String alignments_filename, String featuredump_filename, boolean grammar_alignments,
+ int approximateMaximumSliceSize)
throws IOException {
this.labeled = true;
this.grammar = grammar_filename;
this.output = output_filename;
this.dump = featuredump_filename;
this.grammarAlignments = grammar_alignments;
+ this.approximateMaximumSliceSize = approximateMaximumSliceSize;
this.max_source_len = 0;
// TODO: Always open encoder config? This is debatable.
@@ -79,7 +87,7 @@
logger.info("No alignments file or grammar specified, skipping.");
} else if (alignments != null && !new File(alignments_filename).exists()) {
logger.severe("Alignments file does not exist: " + alignments);
- System.exit(0);
+ System.exit(1);
}
if (config_filename != null) {
@@ -88,12 +96,13 @@
} else {
logger.info("No config specified. Attempting auto-detection of feature types.");
}
+ logger.info(String.format("Approximate maximum slice size (in # of rules) set to %s", approximateMaximumSliceSize));
File working_dir = new File(output);
working_dir.mkdir();
if (!working_dir.exists()) {
logger.severe("Failed creating output directory.");
- System.exit(0);
+ System.exit(1);
}
}
@@ -110,11 +119,11 @@
if (fields.length < 2) {
logger.severe("Incomplete line in config.");
- System.exit(0);
+ System.exit(1);
}
if ("slice_size".equals(fields[0])) {
// Number of records to concurrently load into memory for sorting.
- SLICE_SIZE = Integer.parseInt(fields[1]);
+ approximateMaximumSliceSize = Integer.parseInt(fields[1]);
}
}
reader.close();
@@ -150,11 +159,7 @@
logger.info("Writing encoding.");
types.write(output + File.separator + "encoding");
- logger.info("Freezing vocab.");
- Vocabulary.freeze();
-
- logger.info("Writing vocab.");
- Vocabulary.write(output + File.separator + "vocabulary");
+ writeVocabulary();
String configFile = output + File.separator + "config";
logger.info(String.format("Writing config to '%s'", configFile));
@@ -170,13 +175,12 @@
encoderConfig.load(output + File.separator + "encoding");
logger.info("Beginning packing pass.");
- Queue<PackingFileTuple> slices = new PriorityQueue<PackingFileTuple>();
// Actual binarization pass. Slice and pack source, target and data.
grammar_reader = new LineReader(grammar);
if (packAlignments && !grammarAlignments)
alignment_reader = new LineReader(alignments);
- binarize(grammar_reader, alignment_reader, slices);
+ binarize(grammar_reader, alignment_reader);
logger.info("Packing complete.");
logger.info("Packed grammar in: " + output);
@@ -220,18 +224,21 @@
Vocabulary.id(lhs);
try {
- // Add symbols to vocabulary.
+ /* Add symbols to vocabulary.
+ * NOTE: In case of nonterminals, we add both stripped versions ("[X]")
+ * and "[X,1]" to the vocabulary.
+ */
for (String source_word : source) {
- if (FormatUtils.isNonterminal(source_word))
- Vocabulary.id(FormatUtils.stripNt(source_word));
- else
- Vocabulary.id(source_word);
+ Vocabulary.id(source_word);
+ if (FormatUtils.isNonterminal(source_word)) {
+ Vocabulary.id(FormatUtils.stripNonTerminalIndex(source_word));
+ }
}
for (String target_word : target) {
- if (FormatUtils.isNonterminal(target_word))
- Vocabulary.id(FormatUtils.stripNt(target_word));
- else
- Vocabulary.id(target_word);
+ Vocabulary.id(target_word);
+ if (FormatUtils.isNonterminal(target_word)) {
+ Vocabulary.id(FormatUtils.stripNonTerminalIndex(target_word));
+ }
}
} catch (java.lang.StringIndexOutOfBoundsException e) {
System.err.println(String.format("* Skipping bad grammar line '%s'", line));
@@ -255,14 +262,22 @@
}
}
- private void binarize(LineReader grammar_reader, LineReader alignment_reader,
- Queue<PackingFileTuple> slices) throws IOException {
+ /**
+ * Returns a String encoding the first two source words.
+ * If there is only one source word, use empty string for the second.
+ */
+ private String getFirstTwoSourceWords(final String[] source_words) {
+ return source_words[0] + SOURCE_WORDS_SEPARATOR + ((source_words.length > 1) ? source_words[1] : "");
+ }
+
+ private void binarize(LineReader grammar_reader, LineReader alignment_reader) throws IOException {
int counter = 0;
int slice_counter = 0;
int num_slices = 0;
boolean ready_to_flush = false;
- String first_source_word = null;
+ // to determine when flushing is possible
+ String prev_first_two_source_words = null;
PackingTrie<SourceValue> source_trie = new PackingTrie<SourceValue>();
PackingTrie<TargetValue> target_trie = new PackingTrie<TargetValue>();
@@ -306,23 +321,33 @@
// Reached slice limit size, indicate that we're closing up.
if (!ready_to_flush
- && (slice_counter > SLICE_SIZE || feature_buffer.overflowing() || (packAlignments && alignment_buffer
- .overflowing()))) {
+ && (slice_counter > approximateMaximumSliceSize
+ || feature_buffer.overflowing()
+ || (packAlignments && alignment_buffer.overflowing()))) {
ready_to_flush = true;
- first_source_word = source_words[0];
+ // store the first two source words when slice size limit was reached
+ prev_first_two_source_words = getFirstTwoSourceWords(source_words);
}
- // Finished closing up.
- if (ready_to_flush && !first_source_word.equals(source_words[0])) {
- slices.add(flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices));
- source_trie.clear();
- target_trie.clear();
- feature_buffer.clear();
- if (packAlignments)
- alignment_buffer.clear();
+ // ready to flush
+ if (ready_to_flush) {
+ final String first_two_source_words = getFirstTwoSourceWords(source_words);
+ // the grammar can only be partitioned at the level of first two source word changes.
+ // Thus, we can only flush if the current first two source words differ from the ones
+ // when the slice size limit was reached.
+ if (!first_two_source_words.equals(prev_first_two_source_words)) {
+ logger.warning(String.format("ready to flush and first two words have changed (%s vs. %s)", prev_first_two_source_words, first_two_source_words));
+ logger.info(String.format("flushing %d rules to slice.", slice_counter));
+ flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices);
+ source_trie.clear();
+ target_trie.clear();
+ feature_buffer.clear();
+ if (packAlignments)
+ alignment_buffer.clear();
- num_slices++;
- slice_counter = 0;
- ready_to_flush = false;
+ num_slices++;
+ slice_counter = 0;
+ ready_to_flush = false;
+ }
}
int alignment_index = -1;
@@ -386,7 +411,7 @@
int[] source = new int[source_words.length];
for (int i = 0; i < source_words.length; i++) {
if (FormatUtils.isNonterminal(source_words[i]))
- source[i] = Vocabulary.id(FormatUtils.stripNt(source_words[i]));
+ source[i] = Vocabulary.id(FormatUtils.stripNonTerminalIndex(source_words[i]));
else
source[i] = Vocabulary.id(source_words[i]);
}
@@ -404,7 +429,8 @@
}
target_trie.add(target, tv);
}
- slices.add(flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices));
+ // flush last slice and clear buffers
+ flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices);
}
/**
@@ -421,7 +447,7 @@
* @param id
* @throws IOException
*/
- private PackingFileTuple flush(PackingTrie<SourceValue> source_trie,
+ private void flush(PackingTrie<SourceValue> source_trie,
PackingTrie<TargetValue> target_trie, FeatureBuffer feature_buffer,
AlignmentBuffer alignment_buffer, int id) throws IOException {
// Make a slice object for this piece of the grammar.
@@ -546,74 +572,12 @@
feature_stream.close();
if (packAlignments)
alignment_stream.close();
-
- return slice;
}
- public static void main(String[] args) throws IOException {
- String grammar_filename = null;
- String config_filename = null;
- String output_prefix = null;
- String alignments_filename = null;
- String featuredump_filename = null;
- boolean grammar_alignments = false;
-
- if (args.length < 1 || args[0].equals("-h")) {
- System.err.println("Usage: " + GrammarPacker.class.toString());
- System.err.println(" -g grammar_file translation grammar to process");
- System.err.println(" -p packed_name prefix for *.packed output directory");
- System.err.println(" [-c config_file packing configuration file]");
- System.err.println(" [-fa alignment_file alignment_file]");
- System.err.println(" [-ga alignments in grammar]");
- System.err.println(" [-d dump_file dump feature stats]");
- System.err.println();
- System.exit(-1);
- }
-
- for (int i = 0; i < args.length; i++) {
- if ("-g".equals(args[i]) && (i < args.length - 1)) {
- grammar_filename = args[++i];
- } else if ("-p".equals(args[i]) && (i < args.length - 1)) {
- output_prefix = args[++i];
- } else if ("-c".equals(args[i]) && (i < args.length - 1)) {
- config_filename = args[++i];
- } else if ("-fa".equals(args[i]) && (i < args.length - 1)) {
- alignments_filename = args[++i];
- } else if ("-ga".equals(args[i])) {
- grammar_alignments = true;
- } else if ("-d".equals(args[i]) && (i < args.length - 1)) {
- featuredump_filename = args[++i];
- }
- }
- if (grammar_filename == null) {
- logger.severe("Grammar file not specified.");
- return;
- }
- if (!new File(grammar_filename).exists()) {
- logger.severe("Grammar file not found: " + grammar_filename);
- }
- if (config_filename != null && !new File(config_filename).exists()) {
- logger.severe("Config file not found: " + config_filename);
- }
-
- String output_filename = null;
- if (output_prefix != null) {
- output_filename = output_prefix;
- } else {
- output_filename = grammar_filename + ".packed";
- }
-
- if (new File(output_filename).exists()) {
- logger.severe("File or directory already exists: " + output_filename);
- logger.severe("Will not overwrite.");
- return;
- } else {
- logger.info("Will be writing to " + output_filename);
- }
-
- GrammarPacker packer = new GrammarPacker(grammar_filename, config_filename, output_filename,
- alignments_filename, featuredump_filename, grammar_alignments);
- packer.pack();
+ public void writeVocabulary() throws IOException {
+ final String vocabularyFilename = output + File.separator + VOCABULARY_FILENAME;
+ logger.info("Writing vocabulary to " + vocabularyFilename);
+ Vocabulary.write(vocabularyFilename);
}
/**
@@ -758,7 +722,7 @@
// Allocate a reasonably-sized buffer for the feature data.
private void allocate() {
- backing = new byte[SLICE_SIZE * DATA_SIZE_ESTIMATE];
+ backing = new byte[approximateMaximumSliceSize * DATA_SIZE_ESTIMATE];
buffer = ByteBuffer.wrap(backing);
}
diff --git a/src/joshua/tools/GrammarPackerCli.java b/src/joshua/tools/GrammarPackerCli.java
new file mode 100644
index 0000000..84eb2eb
--- /dev/null
+++ b/src/joshua/tools/GrammarPackerCli.java
@@ -0,0 +1,137 @@
+package joshua.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.StringArrayOptionHandler;
+
+public class GrammarPackerCli {
+
+ private static final Logger log = Logger.getLogger(GrammarPackerCli.class.getName());
+
+ // Input grammars to be packed (with a joint vocabulary)
+ @Option(name = "--grammars", aliases = {"-g", "-i"}, handler = StringArrayOptionHandler.class, required = true, usage = "list of grammars to pack (jointly, i.e. they share the same vocabulary)")
+ private List<String> grammars = new ArrayList<>();
+
+ // Output grammars
+ @Option(name = "--outputs", aliases = {"-p", "-o"}, handler = StringArrayOptionHandler.class, required = true, usage = "output directories of packed grammars.")
+ private List<String> outputs = new ArrayList<>();
+
+ // Output grammars
+ @Option(name = "--alignments", aliases = {"-a", "--fa"}, handler = StringArrayOptionHandler.class, required = false, usage = "alignment files")
+ private List<String> alignments_filenames = new ArrayList<>();
+
+ // Config filename
+ @Option(name = "--config_file", aliases = {"-c"}, required = false, usage = "(optional) packing configuration file")
+ private String config_filename;
+
+ @Option(name = "--dump_files", aliases = {"-d"}, handler = StringArrayOptionHandler.class, usage = "(optional) dump feature stats to file")
+ private List<String> featuredump_filenames = new ArrayList<>();
+
+ @Option(name = "--ga", usage = "whether alignments are present in the grammar")
+ private boolean grammar_alignments = false;
+
+ @Option(name = "--slice_size", aliases = {"-s"}, required = false, usage = "approximate slice size in # of rules (default=1000000)")
+ private int slice_size = 1000000;
+
+
+ private void run() throws IOException {
+
+ final List<String> missingFilenames = new ArrayList<>(grammars.size());
+ for (final String g : grammars) {
+ if (!new File(g).exists()) {
+ missingFilenames.add(g);
+ }
+ }
+ if (!missingFilenames.isEmpty()) {
+ throw new IOException("Input grammar files not found: " + missingFilenames.toString());
+ }
+
+ if (config_filename != null && !new File(config_filename).exists()) {
+ throw new IOException("Config file not found: " + config_filename);
+ }
+
+ if (!outputs.isEmpty()) {
+ if (outputs.size() != grammars.size()) {
+ throw new IOException("Must provide an output directory for each grammar");
+ }
+ final List<String> existingOutputs = new ArrayList<>(outputs.size());
+ for (final String o : outputs) {
+ if (new File(o).exists()) {
+ existingOutputs.add(o);
+ }
+ }
+ if (!existingOutputs.isEmpty()) {
+ throw new IOException("These output directories already exist (will not overwrite): " + existingOutputs.toString());
+ }
+ }
+ if (outputs.isEmpty()) {
+ for (final String g : grammars) {
+ outputs.add(g + ".packed");
+ }
+ }
+
+ if (!alignments_filenames.isEmpty()) {
+ final List<String> missingAlignmentFiles = new ArrayList<>(alignments_filenames.size());
+ for (final String a : alignments_filenames) {
+ if (!new File(a).exists()) {
+ missingAlignmentFiles.add(a);
+ }
+ }
+ if (!missingAlignmentFiles.isEmpty()) {
+ throw new IOException("Alignment files not found: " + missingAlignmentFiles.toString());
+ }
+ }
+
+ // create Packer instances for each grammar
+ final List<GrammarPacker> packers = new ArrayList<>(grammars.size());
+ for (int i = 0; i < grammars.size(); i++) {
+ log.info("Starting GrammarPacker for " + grammars.get(i));
+ final String alignment_filename = alignments_filenames.isEmpty() ? null : alignments_filenames.get(i);
+ final String featuredump_filename = featuredump_filenames.isEmpty() ? null : featuredump_filenames.get(i);
+ final GrammarPacker packer = new GrammarPacker(
+ grammars.get(i),
+ config_filename,
+ outputs.get(i),
+ alignment_filename,
+ featuredump_filename,
+ grammar_alignments,
+ slice_size);
+ packers.add(packer);
+ }
+
+ // run all packers in sequence, accumulating vocabulary items
+ for (final GrammarPacker packer : packers) {
+ log.info("Starting GrammarPacker for " + packer.getGrammar());
+ packer.pack();
+ log.info("PackedGrammar located at " + packer.getOutputDirectory());
+ }
+
+ // for each packed grammar, overwrite the internally serialized vocabulary with the current global one.
+ for (final GrammarPacker packer : packers) {
+ log.info("Writing final common Vocabulary to " + packer.getOutputDirectory());
+ packer.writeVocabulary();
+ }
+ }
+
+ public static void main(String[] args) throws IOException {
+ final GrammarPackerCli cli = new GrammarPackerCli();
+ final CmdLineParser parser = new CmdLineParser(cli);
+
+ try {
+ parser.parseArgument(args);
+ cli.run();
+ } catch (CmdLineException e) {
+ log.info(e.toString());
+ parser.printUsage(System.err);
+ System.exit(1);
+ }
+ }
+
+}
diff --git a/src/joshua/util/FormatUtils.java b/src/joshua/util/FormatUtils.java
index bed0234..0ca6928 100644
--- a/src/joshua/util/FormatUtils.java
+++ b/src/joshua/util/FormatUtils.java
@@ -2,8 +2,6 @@
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
-import java.util.HashMap;
-import java.util.Map;
import java.util.regex.Pattern;
/**
@@ -13,12 +11,8 @@
* @author Lane Schwartz
*/
public class FormatUtils {
-
- private static Map<String, String> cache;
-
- static {
- cache = new HashMap<String, String>();
- }
+
+ private static final String INDEX_SEPARATOR = ",";
/**
* Determines whether the string is a nonterminal by checking that the first character is [
@@ -34,31 +28,38 @@
/**
* Nonterminals are stored in the vocabulary in square brackets. This removes them when you
* just want the raw nonterminal word.
+ * Supports indexed and non-indexed nonTerminals:
+ * [GOAL] -> GOAL
+ * [X,1] -> [X]
*
* @param nt the nonterminal, e.g., "[GOAL]"
* @return the cleaned nonterminal, e.g., "GOAL"
*/
- public static String cleanNonterminal(String nt) {
- if (isNonterminal(nt))
+ public static String cleanNonTerminal(String nt) {
+ if (isNonterminal(nt)) {
+ if (isIndexedNonTerminal(nt)) {
+ // strip ",.*]"
+ return nt.substring(1, nt.indexOf(INDEX_SEPARATOR));
+ }
+ // strip "]"
return nt.substring(1, nt.length() - 1);
+ }
return nt;
}
-
- public static String cleanIndexedNonterminal(String nt) {
- return nt.substring(1, nt.length() - 3);
+
+ private static boolean isIndexedNonTerminal(String nt) {
+ return nt.contains(INDEX_SEPARATOR);
}
- public static String stripNt(String nt) {
- String stripped = cache.get(nt);
- if (stripped == null) {
- stripped = markup(cleanIndexedNonterminal(nt));
- cache.put(nt, stripped);
- }
- return stripped;
+ /**
+ * Removes the index from a nonTerminal: [X,1] -> [X].
+ */
+ public static String stripNonTerminalIndex(String nt) {
+ return markup(cleanNonTerminal(nt));
}
public static int getNonterminalIndex(String nt) {
- return Integer.parseInt(nt.substring(nt.length() - 2, nt.length() - 1));
+ return Integer.parseInt(nt.substring(nt.indexOf(INDEX_SEPARATOR) + 1, nt.length() - 1));
}
/**
@@ -75,7 +76,7 @@
}
public static String markup(String nt, int index) {
- return "[" + nt + "," + index + "]";
+ return "[" + nt + INDEX_SEPARATOR + index + "]";
}
/**
diff --git a/src/joshua/util/MurmurHash.java b/src/joshua/util/MurmurHash.java
deleted file mode 100644
index d736d24..0000000
--- a/src/joshua/util/MurmurHash.java
+++ /dev/null
@@ -1,208 +0,0 @@
-package joshua.util;
-
-import java.io.UnsupportedEncodingException;
-
-/**
- * MurmurHash 2.0.
- *
- * The murmur hash is a relative fast hash function from http://murmurhash.googlepages.com/ for
- * platforms with efficient multiplication.
- *
- * This is a re-implementation of the original C code plus some additional features.
- *
- * Public domain.
- *
- * @author Viliam Holub
- * @version 1.0.2
- *
- */
-public final class MurmurHash {
-
- private final static String ENCODING = "UTF-16";
-
- /**
- * Generates 32 bit hash from byte array of the given length and seed.
- *
- * @param data byte array to hash
- * @param length length of the array to hash
- * @param seed initial seed value
- * @return 32 bit hash of the given array
- */
- public static int hash32(final byte[] data, int length, int seed) {
- // 'm' and 'r' are mixing constants generated offline.
- // They're not really 'magic', they just happen to work well.
- final int m = 0x5bd1e995;
- final int r = 24;
- // Initialize the hash to a random value
- int h = seed ^ length;
- int length4 = length / 4;
-
- for (int i = 0; i < length4; i++) {
- final int i4 = i * 4;
- int k =
- (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16)
- + ((data[i4 + 3] & 0xff) << 24);
- k *= m;
- k ^= k >>> r;
- k *= m;
- h *= m;
- h ^= k;
- }
-
- // Handle the last few bytes of the input array
- switch (length % 4) {
- case 3:
- h ^= (data[(length & ~3) + 2] & 0xff) << 16;
- case 2:
- h ^= (data[(length & ~3) + 1] & 0xff) << 8;
- case 1:
- h ^= (data[length & ~3] & 0xff);
- h *= m;
- }
-
- h ^= h >>> 13;
- h *= m;
- h ^= h >>> 15;
-
- return h;
- }
-
-
- /**
- * Generates 32 bit hash from byte array with default seed value.
- *
- * @param data byte array to hash
- * @param length length of the array to hash
- * @return 32 bit hash of the given array
- */
- public static int hash32(final byte[] data, int length) {
- return hash32(data, length, 0x9747b28c);
- }
-
-
- /**
- * Generates 32 bit hash from a string.
- *
- * @param text string to hash
- * @return 32 bit hash of the given string
- * @throws UnsupportedEncodingException
- */
- public static int hash32(final String text) throws UnsupportedEncodingException {
- final byte[] bytes = text.getBytes(ENCODING);
- return hash32(bytes, bytes.length);
- }
-
-
- /**
- * Generates 32 bit hash from a substring.
- *
- * @param text string to hash
- * @param from starting index
- * @param length length of the substring to hash
- * @return 32 bit hash of the given string
- * @throws UnsupportedEncodingException
- */
- public static int hash32(final String text, int from, int length)
- throws UnsupportedEncodingException {
- return hash32(text.substring(from, from + length));
- }
-
-
- /**
- * Generates 64 bit hash from byte array of the given length and seed.
- *
- * @param data byte array to hash
- * @param length length of the array to hash
- * @param seed initial seed value
- * @return 64 bit hash of the given array
- */
- public static long hash64(final byte[] data, int length, int seed) {
- final long m = 0xc6a4a7935bd1e995L;
- final int r = 47;
-
- long h = (seed & 0xffffffffl) ^ (length * m);
-
- int length8 = length / 8;
-
- for (int i = 0; i < length8; i++) {
- final int i8 = i * 8;
- long k =
- ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
- + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
- + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
- + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
-
- k *= m;
- k ^= k >>> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- switch (length % 8) {
- case 7:
- h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
- case 6:
- h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
- case 5:
- h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
- case 4:
- h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
- case 3:
- h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
- case 2:
- h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
- case 1:
- h ^= (long) (data[length & ~7] & 0xff);
- h *= m;
- };
-
- h ^= h >>> r;
- h *= m;
- h ^= h >>> r;
-
- return h;
- }
-
-
- /**
- * Generates 64 bit hash from byte array with default seed value.
- *
- * @param data byte array to hash
- * @param length length of the array to hash
- * @return 64 bit hash of the given string
- */
- public static long hash64(final byte[] data, int length) {
- return hash64(data, length, 0xe17a1465);
- }
-
-
- /**
- * Generates 64 bit hash from a string.
- *
- * @param text string to hash
- * @return 64 bit hash of the given string
- * @throws UnsupportedEncodingException
- */
- public static long hash64(final String text) throws UnsupportedEncodingException {
- byte[] bytes;
- bytes = text.getBytes(ENCODING);
- return hash64(bytes, bytes.length);
- }
-
-
- /**
- * Generates 64 bit hash from a substring.
- *
- * @param text string to hash
- * @param from starting index
- * @param length length of the substring to hash
- * @return 64 bit hash of the given array
- * @throws UnsupportedEncodingException
- */
- public static long hash64(final String text, int from, int length)
- throws UnsupportedEncodingException {
- return hash64(text.substring(from, from + length));
- }
-}
diff --git a/src/joshua/util/encoding/EncoderConfiguration.java b/src/joshua/util/encoding/EncoderConfiguration.java
index 4547599..da3bc4e 100644
--- a/src/joshua/util/encoding/EncoderConfiguration.java
+++ b/src/joshua/util/encoding/EncoderConfiguration.java
@@ -24,10 +24,16 @@
private boolean labeled;
+ private int numDenseFeatures = 0;
+
public EncoderConfiguration() {
this.outerToInner = new HashMap<Integer, Integer>();
}
+ public int getNumDenseFeatures() {
+ return numDenseFeatures;
+ }
+
public int getNumFeatures() {
return encoders.length;
}
@@ -59,6 +65,10 @@
if (labeled) {
String feature_name = in_stream.readUTF();
outer_id = Vocabulary.id(feature_name);
+ try {
+ Integer.parseInt(feature_name);
+ numDenseFeatures++;
+ } catch (NumberFormatException e) {}
} else {
outer_id = in_stream.readInt();
}
diff --git a/src/kenlm/.gitignore b/src/kenlm/.gitignore
new file mode 100644
index 0000000..6baae3e
--- /dev/null
+++ b/src/kenlm/.gitignore
@@ -0,0 +1,32 @@
+bin/
+lm/bin/
+util/bin/
+util/file_piece.cc.gz
+lib/
+dist
+*.swp
+*.o
+query
+build_binary
+kenlm_max_order
+lm/left_test
+lm/model_test
+util/bit_packing_test
+util/file_piece_test
+util/joint_sort_test
+util/probing_hash_table_test
+util/read_compressed_test
+util/sorted_uniform_test
+previous.sh
+jam-files/bjam-1_55
+jam-files/engine/bin.*/
+jam-files/engine/bootstrap/
+doc/
+._*
+include/
+windows/Win32
+windows/x64
+windows/*.user
+windows/*.sdf
+windows/*.opensdf
+windows/*.suo
diff --git a/src/kenlm/BUILDING b/src/kenlm/BUILDING
new file mode 100644
index 0000000..e706815
--- /dev/null
+++ b/src/kenlm/BUILDING
@@ -0,0 +1,10 @@
+If you have Boost >= 1.36.0 installed:
+ ./bjam
+ ./bjam install --prefix=/usr #optional
+
+If you only want the query code and do not care about compression (.gz, .bz2, and .xz):
+ ./compile_query_only.sh
+
+Windows:
+ The windows directory has visual studio files. Note that you need to compile
+ the kenlm project before build_binary and ngram_query projects.
diff --git a/src/kenlm/CMakeLists.txt b/src/kenlm/CMakeLists.txt
new file mode 100644
index 0000000..15d9931
--- /dev/null
+++ b/src/kenlm/CMakeLists.txt
@@ -0,0 +1,57 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+
+# Define a single cmake project
+project(kenlm)
+
+# Compile all executables into bin/
+set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
+
+# Compile all libraries into lib/
+set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
+
+# Tell cmake that we want unit tests to be compiled
+include(CTest)
+enable_testing()
+
+# Add our CMake helper functions
+include(cmake/KenLMFunctions.cmake)
+
+# And our helper modules
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)
+
+# We need boost
+find_package(Boost 1.36.0 REQUIRED COMPONENTS
+ program_options
+ system
+ thread
+ unit_test_framework
+)
+
+
+
+
+# Define where include files live
+include_directories(
+ ${PROJECT_SOURCE_DIR}
+ ${Boost_INCLUDE_DIRS}
+)
+
+option(BUILD_INTERPOLATE "Build language model interpolation code" OFF)
+
+# Process subdirectories
+add_subdirectory(util)
+add_subdirectory(lm)
+
diff --git a/src/kenlm/COPYING b/src/kenlm/COPYING
new file mode 100644
index 0000000..4362b49
--- /dev/null
+++ b/src/kenlm/COPYING
@@ -0,0 +1,502 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/src/joshua/decoder/ff/lm/kenlm/COPYING b/src/kenlm/COPYING.3
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/COPYING
rename to src/kenlm/COPYING.3
diff --git a/src/joshua/decoder/ff/lm/kenlm/COPYING.LESSER b/src/kenlm/COPYING.LESSER.3
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/COPYING.LESSER
rename to src/kenlm/COPYING.LESSER.3
diff --git a/src/kenlm/Doxyfile b/src/kenlm/Doxyfile
new file mode 100644
index 0000000..3abab65
--- /dev/null
+++ b/src/kenlm/Doxyfile
@@ -0,0 +1,1519 @@
+# Doxyfile 1.6.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = KenLM
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = doc
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = YES
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = YES
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = YES
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = YES
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = YES
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = lm lm/builder lm/filter lm/interpolate lm/wrappers util util/double-conversion util/stream
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# If the HTML_TIMESTAMP tag is set to YES then the generated HTML
+# documentation will contain the timesstamp.
+
+HTML_TIMESTAMP = NO
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE =
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION =
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE = 10
+
+# When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP)
+# there is already a search function so this one should typically
+# be disabled.
+
+SEARCHENGINE = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = NO
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
diff --git a/src/kenlm/Jamroot b/src/kenlm/Jamroot
new file mode 100644
index 0000000..6570c23
--- /dev/null
+++ b/src/kenlm/Jamroot
@@ -0,0 +1,68 @@
+#BUILDING KENLM
+#
+#DEPENDENCIES
+#Remember that some distributions split packages into development and binary
+#versions. You need the development versions.
+#
+#zlib and Boost >= 1.36.0 are required. Don't have Boost? You can compile the
+#queries part with ./compile_query_only.sh .
+#--with-boost=/path/to/boost uses a Boost install from a custom path.
+#
+#The following optional packages are autodetected:
+#libtcmalloc_minimal from google-perftools
+#bzip2
+#lzma aka xz
+#
+#INSTALLATION
+#--prefix=/path/to/prefix sets the install prefix [default is source root].
+#--bindir=/path/to/prefix/bin sets the bin directory [PREFIX/bin]
+#--libdir=/path/to/prefix/lib sets the lib directory [PREFIX/lib]
+#--includedir=/path/to/prefix/include installs headers.
+# Does not install if missing. No argument defaults to PREFIX/include .
+#--install-scripts=/path/to/scripts copies scripts into a directory.
+# Does not install if missing. No argument defaults to PREFIX/scripts .
+#--git appends the git revision to the prefix directory.
+#
+#BUILD OPTIONS
+# By default, the build is multi-threaded, optimized, and statically linked.
+# Pass these to change the build:
+#
+# threading=single|multi controls threading (default multi)
+#
+# variant=release|debug|profile builds optimized (default), for debug, or for
+# profiling
+#
+# link=static|shared controls preferred linking (default static)
+# --static forces static linking (the default will fall
+# back to shared)
+#
+# debug-symbols=on|off include or exclude (default) debugging
+# information also known as -g
+# --without-libsegfault does not link with libSegFault
+#
+# --max-kenlm-order maximum ngram order that kenlm can process (default 6)
+#
+#CONTROLLING THE BUILD
+#-a to build from scratch
+#-j$NCPUS to compile in parallel
+#--clean to clean
+
+path-constant TOP : . ;
+include $(TOP)/jam-files/sanity.jam ;
+
+boost 103600 ;
+project : requirements $(requirements) <include>. ;
+project : default-build <threading>multi <warnings>on <variant>release <link>static ;
+
+external-lib z ;
+
+build-project lm ;
+build-project util ;
+
+lib kenlm : lm//kenlm ;
+
+install-bin-libs lm//programs util//programs kenlm ;
+install-headers prefix-include : [ glob-tree *.hh : dist include prefix-include ] : . ;
+alias install : prefix-bin prefix-lib prefix-include ;
+explicit headers ;
+explicit install ;
diff --git a/src/kenlm/LICENSE b/src/kenlm/LICENSE
new file mode 100644
index 0000000..6dfbb00
--- /dev/null
+++ b/src/kenlm/LICENSE
@@ -0,0 +1,26 @@
+Most of the code here is licensed under the LGPL. There are exceptions that
+have their own licenses, listed below. See comments in those files for more
+details.
+
+util/getopt.* is getopt for Windows
+util/murmur_hash.cc
+util/string_piece.hh and util/string_piece.cc
+util/double-conversion/LICENSE covers util/double-conversion except Jamfile
+util/file.cc contains a modified implementation of mkstemp under the LGPL
+util/integer_to_string.* is BSD
+jam-files/LICENSE_1_0.txt covers jam-files except Jamroot
+
+For the rest:
+
+ KenLM is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation, either version 2.1 of the License, or
+ (at your option) any later version.
+
+ KenLM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License 2.1
+ along with KenLM code. If not, see <http://www.gnu.org/licenses/lgpl-2.1.html>.
diff --git a/src/kenlm/README.md b/src/kenlm/README.md
new file mode 100644
index 0000000..1288504
--- /dev/null
+++ b/src/kenlm/README.md
@@ -0,0 +1,96 @@
+# kenlm
+
+Language model inference code by Kenneth Heafield (kenlm at kheafield.com)
+
+I do development in master on https://github.com/kpu/kenlm/. Normally, it works, but I do not guarantee it will compile, give correct answers, or generate non-broken binary files. For a more stable release, get http://kheafield.com/code/kenlm.tar.gz .
+
+The website http://kheafield.com/code/kenlm/ has more documentation. If you're a decoder developer, please download the latest version from there instead of copying from another decoder.
+
+## Compiling
+See BUILDING.
+
+## Compiling with your own build system
+If you want to compile with your own build system (Makefile etc) or to use as a library, there are a number of macros you can set on the g++ command line or in util/have.hh .
+
+* `KENLM_MAX_ORDER` is the maximum order that can be loaded. This is done to make state an efficient POD rather than a vector.
+* `HAVE_ICU` If your code links against ICU, define this to disable the internal StringPiece and replace it with ICU's copy of StringPiece, avoiding naming conflicts.
+
+ARPA files can be read in compressed format with these options:
+* `HAVE_ZLIB` Supports gzip. Link with -lz.
+* `HAVE_BZLIB` Supports bzip2. Link with -lbz2.
+* `HAVE_XZLIB` Supports xz. Link with -llzma.
+
+Note that these macros impact only `read_compressed.cc` and `read_compressed_test.cc`. The bjam build system will auto-detect bzip2 and xz support.
+
+## Estimation
+lmplz estimates unpruned language models with modified Kneser-Ney smoothing. After compiling with bjam, run
+```bash
+bin/lmplz -o 5 <text >text.arpa
+```
+The algorithm is on-disk, using an amount of memory that you specify. See http://kheafield.com/code/kenlm/estimation/ for more.
+
+MT Marathon 2012 team members Ivan Pouzyrevsky and Mohammed Mediani contributed to the computation design and early implementation. Jon Clark contributed to the design, clarified points about smoothing, and added logging.
+
+## Filtering
+
+filter takes an ARPA or count file and removes entries that will never be queried. The filter criterion can be corpus-level vocabulary, sentence-level vocabulary, or sentence-level phrases. Run
+```bash
+bin/filter
+```
+and see http://kheafield.com/code/kenlm/filter/ for more documentation.
+
+## Querying
+
+Two data structures are supported: probing and trie. Probing is a probing hash table with keys that are 64-bit hashes of n-grams and floats as values. Trie is a fairly standard trie but with bit-level packing so it uses the minimum number of bits to store word indices and pointers. The trie node entries are sorted by word index. Probing is the fastest and uses the most memory. Trie uses the least memory and a bit slower.
+
+With trie, resident memory is 58% of IRST's smallest version and 21% of SRI's compact version. Simultaneously, trie CPU's use is 81% of IRST's fastest version and 84% of SRI's fast version. KenLM's probing hash table implementation goes even faster at the expense of using more memory. See http://kheafield.com/code/kenlm/benchmark/.
+
+Binary format via mmap is supported. Run `./build_binary` to make one then pass the binary file name to the appropriate Model constructor.
+
+## Platforms
+`murmur_hash.cc` and `bit_packing.hh` perform unaligned reads and writes that make the code architecture-dependent.
+It has been sucessfully tested on x86\_64, x86, and PPC64.
+ARM support is reportedly working, at least on the iphone.
+
+Runs on Linux, OS X, Cygwin, and MinGW.
+
+Hideo Okuma and Tomoyuki Yoshimura from NICT contributed ports to ARM and MinGW.
+
+## Decoder developers
+- I recommend copying the code and distributing it with your decoder. However, please send improvements upstream.
+
+- Omit the lm/filter directory if you do not want the language model filter. Only that and tests depend on Boost.
+
+- Select the macros you want, listed in the previous section.
+
+- There are two build systems: compile.sh and Jamroot+Jamfile. They're pretty simple and are intended to be reimplemented in your build system.
+
+- Use either the interface in `lm/model.hh` or `lm/virtual_interface.hh`. Interface documentation is in comments of `lm/virtual_interface.hh` and `lm/model.hh`.
+
+- There are several possible data structures in `model.hh`. Use `RecognizeBinary` in `binary_format.hh` to determine which one a user has provided. You probably already implement feature functions as an abstract virtual base class with several children. I suggest you co-opt this existing virtual dispatch by templatizing the language model feature implementation on the KenLM model identified by `RecognizeBinary`. This is the strategy used in Moses and cdec.
+
+- See `lm/config.hh` for run-time tuning options.
+
+## Contributors
+Contributions to KenLM are welcome. Please base your contributions on https://github.com/kpu/kenlm and send pull requests (or I might give you commit access). Downstream copies in Moses and cdec are maintained by overwriting them so do not make changes there.
+
+## Python module
+Contributed by Victor Chahuneau.
+
+### Installation
+
+```bash
+pip install https://github.com/kpu/kenlm/archive/master.zip
+```
+
+### Basic Usage
+```python
+import kenlm
+model = kenlm.LanguageModel('lm/test.arpa')
+sentence = 'this is a sentence .'
+print(model.score(sentence))
+```
+
+---
+
+The name was Hieu Hoang's idea, not mine.
diff --git a/src/kenlm/bjam b/src/kenlm/bjam
new file mode 100755
index 0000000..939ea99
--- /dev/null
+++ b/src/kenlm/bjam
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -e
+top="$(dirname "$0")"
+if
+ bjam="$(which bjam 2>/dev/null)" && #exists
+ [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true
+ ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" </dev/null >/dev/null && #bjam in path isn't this script
+ "${bjam}" --sanity-test 2>/dev/null |grep Sane >/dev/null && #The test in jam-files/sanity.jam passes
+ (cd "${top}/jam-files/fail" && ! "${bjam}") >/dev/null #Returns non-zero on failure
+then
+ #Delegate to system bjam
+ exec "${bjam}" "$@"
+fi
+
+if [ ! -x "$top"/jam-files/bjam-1_55 ]; then
+ pushd "$top/jam-files/engine"
+ ./build.sh
+ cp -f bin.*/bjam ../bjam-1_55
+ popd
+fi
+
+export BOOST_BUILD_PATH="$top"/jam-files/boost-build
+exec "$top"/jam-files/bjam-1_55 "$@"
diff --git a/src/kenlm/clean_query_only.sh b/src/kenlm/clean_query_only.sh
new file mode 100755
index 0000000..2636265
--- /dev/null
+++ b/src/kenlm/clean_query_only.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+rm -rf {lm,util,util/double-conversion}/*.o bin/{query,build_binary}
diff --git a/src/kenlm/cmake/KenLMFunctions.cmake b/src/kenlm/cmake/KenLMFunctions.cmake
new file mode 100644
index 0000000..cf400c1
--- /dev/null
+++ b/src/kenlm/cmake/KenLMFunctions.cmake
@@ -0,0 +1,82 @@
+# Helper functions used across the CMake build system
+
+include(CMakeParseArguments)
+
+# Adds a bunch of executables to the build, each depending on the specified
+# dependent object files and linking against the specified libraries
+function(AddExes)
+ set(multiValueArgs EXES DEPENDS LIBRARIES)
+ cmake_parse_arguments(AddExes "" "" "${multiValueArgs}" ${ARGN})
+
+ # Iterate through the executable list
+ foreach(exe ${AddExes_EXES})
+
+ # Compile the executable, linking against the requisite dependent object files
+ add_executable(${exe} ${exe}_main.cc ${AddExes_DEPENDS})
+
+ # Link the executable against the supplied libraries
+ if(AddExes_LIBRARIES)
+ target_link_libraries(${exe} ${AddExes_LIBRARIES})
+ endif()
+
+ # Group executables together
+ set_target_properties(${exe} PROPERTIES FOLDER executables)
+
+ # End for loop
+ endforeach(exe)
+
+ # Install the executable files
+ install(TARGETS ${AddExes_EXES} DESTINATION bin)
+endfunction()
+
+# Adds a single test to the build, depending on the specified dependent
+# object files, linking against the specified libraries, and with the
+# specified command line arguments
+function(KenLMAddTest)
+ cmake_parse_arguments(KenLMAddTest "" "TEST"
+ "DEPENDS;LIBRARIES;TEST_ARGS" ${ARGN})
+
+ # Compile the executable, linking against the requisite dependent object files
+ add_executable(${KenLMAddTest_TEST}
+ ${KenLMAddTest_TEST}.cc
+ ${KenLMAddTest_DEPENDS})
+
+ # Require the following compile flag
+ set_target_properties(${KenLMAddTest_TEST} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+
+ if(KenLMAddTest_LIBRARIES)
+ target_link_libraries(${KenLMAddTest_TEST} ${KenLMAddTest_LIBRARIES})
+ endif()
+
+ set(test_params "")
+ if(KenLMAddTest_TEST_ARGS)
+ set(test_params ${KenLMAddTest_TEST_ARGS})
+ endif()
+
+ # Specify command arguments for how to run each unit test
+ #
+ # Assuming that foo was defined via add_executable(foo ...),
+ # the syntax $<TARGET_FILE:foo> gives the full path to the executable.
+ #
+ add_test(NAME ${KenLMAddTest_TEST}
+ COMMAND $<TARGET_FILE:${KenLMAddTest_TEST}> ${test_params})
+
+ # Group unit tests together
+ set_target_properties(${KenLMAddTest_TEST} PROPERTIES FOLDER "unit_tests")
+endfunction()
+
+# Adds a bunch of tests to the build, each depending on the specified
+# dependent object files and linking against the specified libraries
+function(AddTests)
+ set(multiValueArgs TESTS DEPENDS LIBRARIES TEST_ARGS)
+ cmake_parse_arguments(AddTests "" "" "${multiValueArgs}" ${ARGN})
+
+ # Iterate through the Boost tests list
+ foreach(test ${KENLM_BOOST_TESTS_LIST})
+
+ KenLMAddTest(TEST ${test}
+ DEPENDS ${AddTests_DEPENDS}
+ LIBRARIES ${AddTests_LIBRARIES}
+ TEST_ARGS ${AddTests_TEST_ARGS})
+ endforeach(test)
+endfunction()
diff --git a/src/kenlm/cmake/modules/FindEigen3.cmake b/src/kenlm/cmake/modules/FindEigen3.cmake
new file mode 100644
index 0000000..cea1afe
--- /dev/null
+++ b/src/kenlm/cmake/modules/FindEigen3.cmake
@@ -0,0 +1,90 @@
+# - Try to find Eigen3 lib
+#
+# This module supports requiring a minimum version, e.g. you can do
+# find_package(Eigen3 3.1.2)
+# to require version 3.1.2 or newer of Eigen3.
+#
+# Once done this will define
+#
+# EIGEN3_FOUND - system has eigen lib with correct version
+# EIGEN3_INCLUDE_DIR - the eigen include directory
+# EIGEN3_VERSION - eigen version
+#
+# This module reads hints about search locations from
+# the following enviroment variables:
+#
+# EIGEN3_ROOT
+# EIGEN3_ROOT_DIR
+
+# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
+# Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
+# Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+# Redistribution and use is allowed according to the terms of the 2-clause BSD license.
+
+if(NOT Eigen3_FIND_VERSION)
+ if(NOT Eigen3_FIND_VERSION_MAJOR)
+ set(Eigen3_FIND_VERSION_MAJOR 2)
+ endif(NOT Eigen3_FIND_VERSION_MAJOR)
+ if(NOT Eigen3_FIND_VERSION_MINOR)
+ set(Eigen3_FIND_VERSION_MINOR 91)
+ endif(NOT Eigen3_FIND_VERSION_MINOR)
+ if(NOT Eigen3_FIND_VERSION_PATCH)
+ set(Eigen3_FIND_VERSION_PATCH 0)
+ endif(NOT Eigen3_FIND_VERSION_PATCH)
+
+ set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
+endif(NOT Eigen3_FIND_VERSION)
+
+macro(_eigen3_check_version)
+ file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
+
+ string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
+ set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
+ string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
+ set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
+ string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
+ set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
+
+ set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
+ if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+ set(EIGEN3_VERSION_OK FALSE)
+ else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+ set(EIGEN3_VERSION_OK TRUE)
+ endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+
+ if(NOT EIGEN3_VERSION_OK)
+
+ message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
+ "but at least version ${Eigen3_FIND_VERSION} is required")
+ endif(NOT EIGEN3_VERSION_OK)
+endmacro(_eigen3_check_version)
+
+if (EIGEN3_INCLUDE_DIR)
+
+ # in cache already
+ _eigen3_check_version()
+ set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
+
+else (EIGEN3_INCLUDE_DIR)
+
+ find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
+ HINTS
+ ENV EIGEN3_ROOT
+ ENV EIGEN3_ROOT_DIR
+ PATHS
+ ${CMAKE_INSTALL_PREFIX}/include
+ ${KDE4_INCLUDE_DIR}
+ PATH_SUFFIXES eigen3 eigen
+ )
+
+ if(EIGEN3_INCLUDE_DIR)
+ _eigen3_check_version()
+ endif(EIGEN3_INCLUDE_DIR)
+
+ include(FindPackageHandleStandardArgs)
+ find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
+
+ mark_as_advanced(EIGEN3_INCLUDE_DIR)
+
+endif(EIGEN3_INCLUDE_DIR)
+
diff --git a/src/kenlm/compile_query_only.sh b/src/kenlm/compile_query_only.sh
new file mode 100755
index 0000000..7a82f49
--- /dev/null
+++ b/src/kenlm/compile_query_only.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+#This is just an example compilation. You should integrate these files into your build system. Boost jam is provided and preferred.
+
+echo You must use ./bjam if you want language model estimation, filtering, or support for compressed files \(.gz, .bz2, .xz\) 1>&2
+
+rm {lm,util}/*.o 2>/dev/null
+set -e
+
+CXX=${CXX:-g++}
+
+CXXFLAGS+=" -I. -O3 -DNDEBUG -DKENLM_MAX_ORDER=6"
+
+#If this fails for you, consider using bjam.
+if [ ${#NPLM} != 0 ]; then
+ CXXFLAGS+=" -DHAVE_NPLM -lneuralLM -L$NPLM/src -I$NPLM/src -lboost_thread-mt -fopenmp"
+ ADDED_PATHS="lm/wrappers/*.cc"
+fi
+echo 'Compiling with '$CXX $CXXFLAGS
+
+#Grab all cc files in these directories except those ending in test.cc or main.cc
+objects=""
+for i in util/double-conversion/*.cc util/*.cc lm/*.cc $ADDED_PATHS; do
+ if [ "${i%test.cc}" == "$i" ] && [ "${i%main.cc}" == "$i" ]; then
+ $CXX $CXXFLAGS -c $i -o ${i%.cc}.o
+ objects="$objects ${i%.cc}.o"
+ fi
+done
+
+mkdir -p bin
+if [ "$(uname)" != Darwin ]; then
+ CXXFLAGS="$CXXFLAGS -lrt"
+fi
+$CXX lm/build_binary_main.cc $objects -o bin/build_binary $CXXFLAGS $LDFLAGS
+$CXX lm/query_main.cc $objects -o bin/query $CXXFLAGS $LDFLAGS
diff --git a/src/kenlm/jam-files/LICENSE_1_0.txt b/src/kenlm/jam-files/LICENSE_1_0.txt
new file mode 100644
index 0000000..36b7cd9
--- /dev/null
+++ b/src/kenlm/jam-files/LICENSE_1_0.txt
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/src/kenlm/jam-files/boost-build/boost-build.jam b/src/kenlm/jam-files/boost-build/boost-build.jam
new file mode 100644
index 0000000..73db049
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/boost-build.jam
@@ -0,0 +1,8 @@
+# Copyright 2001, 2002 Dave Abrahams
+# Copyright 2002 Rene Rivera
+# Copyright 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+
+boost-build kernel ;
diff --git a/src/kenlm/jam-files/boost-build/bootstrap.jam b/src/kenlm/jam-files/boost-build/bootstrap.jam
new file mode 100644
index 0000000..af3e8bf
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/bootstrap.jam
@@ -0,0 +1,18 @@
+# Copyright (c) 2003 Vladimir Prus.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This file handles initial phase of Boost.Build loading.
+# Boost.Jam has already figured out where Boost.Build is
+# and loads this file, which is responsible for initialization
+# of basic facilities such a module system and loading the
+# main Boost.Build module, build-system.jam.
+#
+# Exact operation of this module is not interesting, it makes
+# sense to look at build-system.jam right away.
+
+# Load the kernel/bootstrap.jam, which does all the work.
+.bootstrap-file = $(.bootstrap-file:D)/kernel/bootstrap.jam ;
+include $(.bootstrap-file) ;
\ No newline at end of file
diff --git a/src/kenlm/jam-files/boost-build/build-system.jam b/src/kenlm/jam-files/boost-build/build-system.jam
new file mode 100644
index 0000000..247326a
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build-system.jam
@@ -0,0 +1,981 @@
+# Copyright 2003, 2005, 2007 Dave Abrahams
+# Copyright 2006, 2007 Rene Rivera
+# Copyright 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This file is part of Boost Build version 2. You can think of it as forming the
+# main() routine. It is invoked by the bootstrapping code in bootstrap.jam.
+
+import build-request ;
+import builtin ;
+import "class" : new ;
+import configure ;
+import config-cache ;
+import feature ;
+import generators ;
+import make ;
+import modules ;
+import os ;
+import path ;
+import project ;
+import property ;
+import property-set ;
+import regex ;
+import sequence ;
+import targets ;
+import toolset ;
+import utility ;
+import version ;
+import virtual-target ;
+
+
+################################################################################
+#
+# Module global data.
+#
+################################################################################
+
+# Shortcut used in this module for accessing used command-line parameters.
+.argv = [ modules.peek : ARGV ] ;
+
+# Flag indicating we should display additional debugging information related to
+# locating and loading Boost Build configuration files.
+.debug-config = [ MATCH ^(--debug-configuration)$ : $(.argv) ] ;
+
+# Virtual targets obtained when building main targets references on the command
+# line. When running 'bjam --clean main_target' we want to clean only files
+# belonging to that main target so we need to record which targets are produced
+# for it.
+.results-of-main-targets = ;
+
+# Was an XML dump requested?
+.out-xml = [ MATCH ^--out-xml=(.*)$ : $(.argv) ] ;
+
+# Default toolset & version to be used in case no other toolset has been used
+# explicitly by either the loaded configuration files, the loaded project build
+# scripts or an explicit toolset request on the command line. If not specified,
+# an arbitrary default will be used based on the current host OS. This value,
+# while not strictly necessary, has been added to allow testing Boost-Build's
+# default toolset usage functionality.
+.default-toolset = ;
+.default-toolset-version = ;
+
+
+################################################################################
+#
+# Public rules.
+#
+################################################################################
+
+# Returns the property set with the free features from the currently processed
+# build request.
+#
+rule command-line-free-features ( )
+{
+ return $(.command-line-free-features) ;
+}
+
+
+# Returns the location of the build system. The primary use case is building
+# Boost where it is sometimes needed to get the location of other components
+# (e.g. BoostBook files) and it is convenient to use locations relative to the
+# Boost Build path.
+#
+rule location ( )
+{
+ local r = [ modules.binding build-system ] ;
+ return $(r:P) ;
+}
+
+
+# Sets the default toolset & version to be used in case no other toolset has
+# been used explicitly by either the loaded configuration files, the loaded
+# project build scripts or an explicit toolset request on the command line. For
+# more detailed information see the comment related to used global variables.
+#
+rule set-default-toolset ( toolset : version ? )
+{
+ .default-toolset = $(toolset) ;
+ .default-toolset-version = $(version) ;
+}
+
+rule set-pre-build-hook ( function )
+{
+ .pre-build-hook = $(function) ;
+}
+
+rule set-post-build-hook ( function )
+{
+ .post-build-hook = $(function) ;
+}
+
+################################################################################
+#
+# Local rules.
+#
+################################################################################
+
+# Returns actual Jam targets to be used for executing a clean request.
+#
+local rule actual-clean-targets ( )
+{
+ # The cleaning is tricky. Say, if user says 'bjam --clean foo' where 'foo'
+ # is a directory, then we want to clean targets which are in 'foo' as well
+ # as those in any children Jamfiles under foo but not in any unrelated
+ # Jamfiles. To achieve this we first mark all projects explicitly detected
+ # as targets for this build system run as needing to be cleaned.
+ for local t in $(targets)
+ {
+ if [ class.is-a $(t) : project-target ]
+ {
+ local project = [ $(t).project-module ] ;
+ .should-clean-project.$(project) = true ;
+ }
+ }
+
+ # Construct a list of targets explicitly detected on this build system run
+ # as a result of building main targets.
+ local targets-to-clean ;
+ for local t in $(.results-of-main-targets)
+ {
+ # Do not include roots or sources.
+ targets-to-clean += [ virtual-target.traverse $(t) ] ;
+ }
+ targets-to-clean = [ sequence.unique $(targets-to-clean) ] ;
+
+ local to-clean ;
+ for local t in [ virtual-target.all-targets ]
+ {
+ # Remove only derived targets and only those asked to be cleaned,
+ # whether directly or by belonging to one of the removed projects.
+ local p = [ $(t).project ] ;
+ if [ $(t).action ] && ( $(t) in $(targets-to-clean) ||
+ [ should-clean-project [ $(p).project-module ] ] )
+ {
+ to-clean += $(t) ;
+ }
+ }
+
+ local to-clean-actual ;
+ for local t in $(to-clean)
+ {
+ to-clean-actual += [ $(t).actualize ] ;
+ }
+ return $(to-clean-actual) ;
+}
+
+
+# Given a target id, try to find and return the corresponding target. This is
+# only invoked when there is no Jamfile in ".". This code somewhat duplicates
+# code in project-target.find but we can not reuse that code without a
+# project-targets instance.
+#
+local rule find-target ( target-id )
+{
+ local split = [ MATCH (.*)//(.*) : $(target-id) ] ;
+
+ local pm ;
+ if $(split)
+ {
+ pm = [ project.find $(split[1]) : "." ] ;
+ }
+ else
+ {
+ pm = [ project.find $(target-id) : "." ] ;
+ }
+
+ local result ;
+ if $(pm)
+ {
+ result = [ project.target $(pm) ] ;
+ }
+
+ if $(split)
+ {
+ result = [ $(result).find $(split[2]) ] ;
+ }
+
+ return $(result) ;
+}
+
+
+# Initializes a new configuration module.
+#
+local rule initialize-config-module ( module-name : location ? )
+{
+ project.initialize $(module-name) : $(location) ;
+ if USER_MODULE in [ RULENAMES ]
+ {
+ USER_MODULE $(module-name) ;
+ }
+}
+
+
+# Helper rule used to load configuration files. Loads the first configuration
+# file with the given 'filename' at 'path' into module with name 'module-name'.
+# Not finding the requested file may or may not be treated as an error depending
+# on the must-find parameter. Returns a normalized path to the loaded
+# configuration file or nothing if no file was loaded.
+#
+local rule load-config ( module-name : filename : path + : must-find ? )
+{
+ if $(.debug-config)
+ {
+ local path-string = $(path) ;
+ if $(path-string) = "" { path-string = . ; }
+ ECHO notice: Searching '$(path-string)' for $(module-name)
+ configuration file '$(filename)'. ;
+ }
+ local where = [ GLOB $(path) : $(filename) ] ;
+ if $(where)
+ {
+ where = [ NORMALIZE_PATH $(where[1]) ] ;
+ if $(.debug-config)
+ {
+ local where-string = $(where:D) ;
+ if $(where-string) = "" { where-string = . ; }
+ where-string = '$(where-string)' ;
+ ECHO notice: Loading $(module-name) configuration file '$(filename)'
+ from $(where-string:J=" "). ;
+ }
+
+ # Set source location so that path-constant in config files with
+ # relative paths work. This is of most importance for
+ # project-config.jam, but may be used in other config files as well.
+ local attributes = [ project.attributes $(module-name) ] ;
+ $(attributes).set source-location : $(where:D) : exact ;
+ modules.load $(module-name) : $(filename) : $(path) ;
+ project.load-used-projects $(module-name) ;
+ }
+ else if $(must-find) || $(.debug-config)
+ {
+ local path-string = $(path) ;
+ if $(path-string) = "" { path-string = . ; }
+ path-string = '$(path-string)' ;
+ path-string = $(path-string:J=" ") ;
+ if $(must-find)
+ {
+ import errors ;
+ errors.user-error Configuration file '$(filename)' not found "in"
+ $(path-string). ;
+ }
+ ECHO notice: Configuration file '$(filename)' not found "in"
+ $(path-string). ;
+ }
+ return $(where) ;
+}
+
+
+# Loads all the configuration files used by Boost Build in the following order:
+#
+# -- test-config --
+# Loaded only if specified on the command-line using the --test-config
+# command-line parameter. It is ok for this file not to exist even if specified.
+# If this configuration file is loaded, regular site and user configuration
+# files will not be. If a relative path is specified, file is searched for in
+# the current folder.
+#
+# -- site-config --
+# Always named site-config.jam. Will only be found if located on the system
+# root path (Windows), /etc (non-Windows), user's home folder or the Boost Build
+# path, in that order. Not loaded in case the test-config configuration file is
+# loaded or the --ignore-site-config command-line option is specified.
+#
+# -- user-config --
+# Named user-config.jam by default or may be named explicitly using the
+# --user-config command-line option or the BOOST_BUILD_USER_CONFIG environment
+# variable. If named explicitly the file is looked for from the current working
+# directory and if the default one is used then it is searched for in the
+# user's home directory and the Boost Build path, in that order. Not loaded in
+# case either the test-config configuration file is loaded or an empty file name
+# is explicitly specified. If the file name has been given explicitly then the
+# file must exist.
+#
+# -- project-config --
+# Always named project-config.jam. Looked up in the current working folder and
+# then upwards through its parents up to the root folder.
+#
+# Test configurations have been added primarily for use by Boost Build's
+# internal unit testing system but may be used freely in other places as well.
+#
+local rule load-configuration-files
+{
+ # Flag indicating that site configuration should not be loaded.
+ local ignore-site-config =
+ [ MATCH ^(--ignore-site-config)$ : $(.argv) ] ;
+
+ initialize-config-module test-config ;
+ local test-config = [ MATCH ^--test-config=(.*)$ : $(.argv) ] ;
+ local uq = [ MATCH \"(.*)\" : $(test-config) ] ;
+ if $(uq)
+ {
+ test-config = $(uq) ;
+ }
+ if $(test-config)
+ {
+ local where = [ load-config test-config : $(test-config:BS) :
+ $(test-config:D) ] ;
+ if $(where)
+ {
+ if $(.debug-config)
+ {
+ ECHO "notice: Regular site and user configuration files will" ;
+ ECHO "notice: be ignored due to the test configuration being"
+ "loaded." ;
+ }
+ }
+ else
+ {
+ test-config = ;
+ }
+ }
+
+ local user-path = [ os.home-directories ] [ os.environ BOOST_BUILD_PATH ] ;
+ local site-path = /etc $(user-path) ;
+ if [ os.name ] in NT CYGWIN
+ {
+ site-path = [ modules.peek : SystemRoot ] $(user-path) ;
+ }
+
+ if $(.debug-config) && ! $(test-config) && $(ignore-site-config)
+ {
+ ECHO "notice: Site configuration files will be ignored due to the" ;
+ ECHO "notice: --ignore-site-config command-line option." ;
+ }
+
+ initialize-config-module site-config ;
+ if ! $(test-config) && ! $(ignore-site-config)
+ {
+ load-config site-config : site-config.jam : $(site-path) ;
+ }
+
+ initialize-config-module user-config ;
+ if ! $(test-config)
+ {
+ local user-config = [ MATCH ^--user-config=(.*)$ : $(.argv) ] ;
+ user-config = $(user-config[-1]) ;
+ user-config ?= [ os.environ BOOST_BUILD_USER_CONFIG ] ;
+ # Special handling for the case when the OS does not strip the quotes
+ # around the file name, as is the case when using Cygwin bash.
+ user-config = [ utility.unquote $(user-config) ] ;
+ local explicitly-requested = $(user-config) ;
+ user-config ?= user-config.jam ;
+
+ if $(user-config)
+ {
+ if $(explicitly-requested)
+ {
+ # Treat explicitly entered user paths as native OS path
+ # references and, if non-absolute, root them at the current
+ # working directory.
+ user-config = [ path.make $(user-config) ] ;
+ user-config = [ path.root $(user-config) [ path.pwd ] ] ;
+ user-config = [ path.native $(user-config) ] ;
+
+ if $(.debug-config)
+ {
+ ECHO notice: Loading explicitly specified user configuration
+ file: ;
+ ECHO " $(user-config)" ;
+ }
+
+ load-config user-config : $(user-config:BS) : $(user-config:D)
+ : must-exist ;
+ }
+ else
+ {
+ load-config user-config : $(user-config) : $(user-path) ;
+ }
+ }
+ else if $(.debug-config)
+ {
+ ECHO notice: User configuration file loading explicitly disabled. ;
+ }
+ }
+
+ # We look for project-config.jam from "." upward. I am not sure this is 100%
+ # right decision, we might as well check for it only alongside the Jamroot
+ # file. However:
+ # - We need to load project-config.jam before Jamroot
+ # - We probably need to load project-config.jam even if there is no Jamroot
+ # - e.g. to implement automake-style out-of-tree builds.
+ local file = [ path.glob "." : project-config.jam ] ;
+ if ! $(file)
+ {
+ file = [ path.glob-in-parents "." : project-config.jam ] ;
+ }
+ if $(file)
+ {
+ initialize-config-module project-config : $(file:D) ;
+ load-config project-config : project-config.jam : $(file:D) ;
+ }
+
+ project.end-load ;
+}
+
+
+# Autoconfigure toolsets based on any instances of --toolset=xx,yy,...zz or
+# toolset=xx,yy,...zz in the command line. May return additional properties to
+# be processed as if they had been specified by the user.
+#
+local rule process-explicit-toolset-requests
+{
+ local extra-properties ;
+
+ local option-toolsets = [ regex.split-list [ MATCH ^--toolset=(.*)$ : $(.argv) ] : "," ] ;
+ local feature-toolsets = [ regex.split-list [ MATCH ^toolset=(.*)$ : $(.argv) ] : "," ] ;
+
+ for local t in $(option-toolsets) $(feature-toolsets)
+ {
+ # Parse toolset-version/properties.
+ local toolset = [ MATCH ([^/]+)/?.* : $(t) ] ;
+ local properties = [ feature.expand-subfeatures <toolset>$(toolset) : true ] ;
+ local toolset-property = [ property.select <toolset> : $(properties) ] ;
+ local known ;
+ if $(toolset-property:G=) in [ feature.values <toolset> ]
+ {
+ known = true ;
+ }
+
+ # If the toolset is not known, configure it now.
+
+ # TODO: we should do 'using $(toolset)' in case no version has been
+ # specified and there are no versions defined for the given toolset to
+ # allow the toolset to configure its default version. For this we need
+ # to know how to detect whether a given toolset has any versions
+ # defined. An alternative would be to do this whenever version is not
+ # specified but that would require that toolsets correctly handle the
+ # case when their default version is configured multiple times which
+ # should be checked for all existing toolsets first.
+
+ if ! $(known)
+ {
+ if $(.debug-config)
+ {
+ ECHO "notice: [cmdline-cfg] toolset $(toolset) not"
+ "previously configured; attempting to auto-configure now" ;
+ }
+ local t,v = [ MATCH ([^-]+)-?(.+)? : $(toolset) ] ;
+ toolset.using $(t,v[1]) : $(t,v[2]) ;
+ }
+
+ # Make sure we get an appropriate property into the build request in
+ # case toolset has been specified using the "--toolset=..." command-line
+ # option form.
+ if ! $(t) in $(.argv) $(feature-toolsets)
+ {
+ if $(.debug-config)
+ {
+ ECHO notice: [cmdline-cfg] adding toolset=$(t) to the build
+ request. ;
+ }
+ extra-properties += toolset=$(t) ;
+ }
+ }
+
+ return $(extra-properties) ;
+}
+
+
+# Returns whether the given project (identifed by its project module) should be
+# cleaned because it or any of its parent projects have already been marked as
+# needing to be cleaned in this build. As an optimization, will explicitly mark
+# all encountered project needing to be cleaned in case thay have not already
+# been marked so.
+#
+local rule should-clean-project ( project )
+{
+ if ! $(.should-clean-project.$(project))-is-defined
+ {
+ local r = "" ;
+ if ! [ project.is-jamroot-module $(project) ]
+ {
+ local parent = [ project.attribute $(project) parent-module ] ;
+ if $(parent)
+ {
+ r = [ should-clean-project $(parent) ] ;
+ }
+ }
+ .should-clean-project.$(project) = $(r) ;
+ }
+
+ return $(.should-clean-project.$(project)) ;
+}
+
+
+################################################################################
+#
+# main()
+# ------
+#
+################################################################################
+
+{
+ if --version in $(.argv)
+ {
+ version.print ;
+ EXIT ;
+ }
+
+ version.verify-engine-version ;
+
+ load-configuration-files ;
+
+ # Load explicitly specified toolset modules.
+ local extra-properties = [ process-explicit-toolset-requests ] ;
+
+ # Load the actual project build script modules. We always load the project
+ # in the current folder so 'use-project' directives have any chance of being
+ # seen. Otherwise, we would not be able to refer to subprojects using target
+ # ids.
+ local current-project ;
+ {
+ local current-module = [ project.find "." : "." ] ;
+ if $(current-module)
+ {
+ current-project = [ project.target $(current-module) ] ;
+ }
+ }
+
+ # Load the default toolset module if no other has already been specified.
+ if ! [ feature.values <toolset> ]
+ {
+ local default-toolset = $(.default-toolset) ;
+ local default-toolset-version = ;
+ if $(default-toolset)
+ {
+ default-toolset-version = $(.default-toolset-version) ;
+ }
+ else
+ {
+ default-toolset = gcc ;
+ if [ os.name ] = NT
+ {
+ default-toolset = msvc ;
+ }
+ else if [ os.name ] = MACOSX
+ {
+ default-toolset = darwin ;
+ }
+ }
+
+ ECHO "warning: No toolsets are configured." ;
+ ECHO "warning: Configuring default toolset" \"$(default-toolset)\". ;
+ ECHO "warning: If the default is wrong, your build may not work correctly." ;
+ ECHO "warning: Use the \"toolset=xxxxx\" option to override our guess." ;
+ ECHO "warning: For more configuration options, please consult" ;
+ ECHO "warning: http://boost.org/boost-build2/doc/html/bbv2/advanced/configuration.html" ;
+
+ toolset.using $(default-toolset) : $(default-toolset-version) ;
+ }
+
+
+ # Parse command line for targets and properties. Note that this requires
+ # that all project files already be loaded.
+ # FIXME: This is not entirely true. Additional project files may be loaded
+ # only later via the project.find() rule when dereferencing encountered
+ # target ids containing explicit project references. See what to do about
+ # those as such 'lazy loading' may cause problems that are then extremely
+ # difficult to debug.
+ local build-request = [ build-request.from-command-line $(.argv)
+ $(extra-properties) ] ;
+ local target-ids = [ $(build-request).get-at 1 ] ;
+ local properties = [ $(build-request).get-at 2 ] ;
+
+
+ # Expand properties specified on the command line into multiple property
+ # sets consisting of all legal property combinations. Each expanded property
+ # set will be used for a single build run. E.g. if multiple toolsets are
+ # specified then requested targets will be built with each of them.
+ if $(properties)
+ {
+ expanded = [ build-request.expand-no-defaults $(properties) ] ;
+ local xexpanded ;
+ for local e in $(expanded)
+ {
+ xexpanded += [ property-set.create [ feature.split $(e) ] ] ;
+ }
+ expanded = $(xexpanded) ;
+ }
+ else
+ {
+ expanded = [ property-set.empty ] ;
+ }
+
+
+ # Check that we actually found something to build.
+ if ! $(current-project) && ! $(target-ids)
+ {
+ import errors ;
+ errors.user-error no Jamfile "in" current directory found, and no target
+ references specified. ;
+ }
+
+
+ # Flags indicating that this build system run has been started in order to
+ # clean existing instead of create new targets. Note that these are not the
+ # final flag values as they may get changed later on due to some special
+ # targets being specified on the command line.
+ local clean ; if "--clean" in $(.argv) { clean = true ; }
+ local cleanall ; if "--clean-all" in $(.argv) { cleanall = true ; }
+
+
+ # List of explicitly requested files to build. Any target references read
+ # from the command line parameter not recognized as one of the targets
+ # defined in the loaded Jamfiles will be interpreted as an explicitly
+ # requested file to build. If any such files are explicitly requested then
+ # only those files and the targets they depend on will be built and they
+ # will be searched for among targets that would have been built had there
+ # been no explicitly requested files.
+ local explicitly-requested-files
+
+
+ # List of Boost Build meta-targets, virtual-targets and actual Jam targets
+ # constructed in this build system run.
+ local targets ;
+ local virtual-targets ;
+ local actual-targets ;
+
+
+ # Process each target specified on the command-line and convert it into
+ # internal Boost Build target objects. Detect special clean target. If no
+ # main Boost Build targets were explictly requested use the current project
+ # as the target.
+ for local id in $(target-ids)
+ {
+ if $(id) = clean
+ {
+ clean = true ;
+ }
+ else
+ {
+ local t ;
+ if $(current-project)
+ {
+ t = [ $(current-project).find $(id) : no-error ] ;
+ }
+ else
+ {
+ t = [ find-target $(id) ] ;
+ }
+
+ if ! $(t)
+ {
+ ECHO "notice: could not find main target" $(id) ;
+ ECHO "notice: assuming it is a name of file to create." ;
+ explicitly-requested-files += $(id) ;
+ }
+ else
+ {
+ targets += $(t) ;
+ }
+ }
+ }
+ if ! $(targets)
+ {
+ targets += [ project.target [ project.module-name "." ] ] ;
+ }
+
+ if [ option.get dump-generators : : true ]
+ {
+ generators.dump ;
+ }
+
+ # We wish to put config.log in the build directory corresponding to Jamroot,
+ # so that the location does not differ depending on the directory we run the
+ # build from. The amount of indirection necessary here is scary.
+ local first-project = [ $(targets[0]).project ] ;
+ local first-project-root-location = [ $(first-project).get project-root ] ;
+ local first-project-root-module = [ project.load
+ $(first-project-root-location) ] ;
+ local first-project-root = [ project.target $(first-project-root-module) ] ;
+ local first-build-build-dir = [ $(first-project-root).build-dir ] ;
+ configure.set-log-file $(first-build-build-dir)/config.log ;
+ config-cache.load $(first-build-build-dir)/project-cache.jam ;
+
+ # Now that we have a set of targets to build and a set of property sets to
+ # build the targets with, we can start the main build process by using each
+ # property set to generate virtual targets from all of our listed targets
+ # and any of their dependants.
+ for local p in $(expanded)
+ {
+ .command-line-free-features = [ property-set.create [ $(p).free ] ] ;
+ for local t in $(targets)
+ {
+ local g = [ $(t).generate $(p) ] ;
+ if ! [ class.is-a $(t) : project-target ]
+ {
+ .results-of-main-targets += $(g[2-]) ;
+ }
+ virtual-targets += $(g[2-]) ;
+ }
+ }
+
+
+ # Convert collected virtual targets into actual raw Jam targets.
+ for t in $(virtual-targets)
+ {
+ actual-targets += [ $(t).actualize ] ;
+ }
+
+ config-cache.save ;
+
+
+ # If XML data output has been requested prepare additional rules and targets
+ # so we can hook into Jam to collect build data while its building and have
+ # it trigger the final XML report generation after all the planned targets
+ # have been built.
+ if $(.out-xml)
+ {
+ # Get a qualified virtual target name.
+ rule full-target-name ( target )
+ {
+ local name = [ $(target).name ] ;
+ local project = [ $(target).project ] ;
+ local project-path = [ $(project).get location ] ;
+ return $(project-path)//$(name) ;
+ }
+
+ # Generate an XML file containing build statistics for each constituent.
+ #
+ rule out-xml ( xml-file : constituents * )
+ {
+ # Prepare valid XML header and footer with some basic info.
+ local nl = "
+" ;
+ local os = [ modules.peek : OS OSPLAT JAMUNAME ] "" ;
+ local timestamp = [ modules.peek : JAMDATE ] ;
+ local cwd = [ PWD ] ;
+ local command = $(.argv) ;
+ local bb-version = [ version.boost-build ] ;
+ .header on $(xml-file) =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+ "$(nl)<build format=\"1.0\" version=\"$(bb-version)\">"
+ "$(nl) <os name=\"$(os[1])\" platform=\"$(os[2])\"><![CDATA[$(os[3-]:J= )]]></os>"
+ "$(nl) <timestamp><![CDATA[$(timestamp)]]></timestamp>"
+ "$(nl) <directory><![CDATA[$(cwd)]]></directory>"
+ "$(nl) <command><![CDATA[\"$(command:J=\" \")\"]]></command>"
+ ;
+ .footer on $(xml-file) =
+ "$(nl)</build>" ;
+
+ # Generate the target dependency graph.
+ .contents on $(xml-file) +=
+ "$(nl) <targets>" ;
+ for local t in [ virtual-target.all-targets ]
+ {
+ local action = [ $(t).action ] ;
+ if $(action)
+ # If a target has no action, it has no dependencies.
+ {
+ local name = [ full-target-name $(t) ] ;
+ local sources = [ $(action).sources ] ;
+ local dependencies ;
+ for local s in $(sources)
+ {
+ dependencies += [ full-target-name $(s) ] ;
+ }
+
+ local path = [ $(t).path ] ;
+ local jam-target = [ $(t).actual-name ] ;
+
+ .contents on $(xml-file) +=
+ "$(nl) <target>"
+ "$(nl) <name><![CDATA[$(name)]]></name>"
+ "$(nl) <dependencies>"
+ "$(nl) <dependency><![CDATA[$(dependencies)]]></dependency>"
+ "$(nl) </dependencies>"
+ "$(nl) <path><![CDATA[$(path)]]></path>"
+ "$(nl) <jam-target><![CDATA[$(jam-target)]]></jam-target>"
+ "$(nl) </target>"
+ ;
+ }
+ }
+ .contents on $(xml-file) +=
+ "$(nl) </targets>" ;
+
+ # Build $(xml-file) after $(constituents). Do so even if a
+ # constituent action fails and regenerate the xml on every bjam run.
+ INCLUDES $(xml-file) : $(constituents) ;
+ ALWAYS $(xml-file) ;
+ __ACTION_RULE__ on $(xml-file) =
+ build-system.out-xml.generate-action ;
+ out-xml.generate $(xml-file) ;
+ }
+
+ # The actual build actions are here; if we did this work in the actions
+ # clause we would have to form a valid command line containing the
+ # result of @(...) below (the name of the XML file).
+ #
+ rule out-xml.generate-action ( args * : xml-file
+ : command status start end user system : output ? )
+ {
+ local contents =
+ [ on $(xml-file) return $(.header) $(.contents) $(.footer) ] ;
+ local f = @($(xml-file):E=$(contents)) ;
+ }
+
+ # Nothing to do here; the *real* actions happen in
+ # out-xml.generate-action.
+ actions quietly out-xml.generate { }
+
+ # Define the out-xml file target, which depends on all the targets so
+ # that it runs the collection after the targets have run.
+ out-xml $(.out-xml) : $(actual-targets) ;
+
+ # Set up a global __ACTION_RULE__ that records all the available
+ # statistics about each actual target in a variable "on" the --out-xml
+ # target.
+ #
+ rule out-xml.collect ( xml-file : target : command status start end user
+ system : output ? )
+ {
+ local nl = "
+" ;
+ # Open the action with some basic info.
+ .contents on $(xml-file) +=
+ "$(nl) <action status=\"$(status)\" start=\"$(start)\" end=\"$(end)\" user=\"$(user)\" system=\"$(system)\">" ;
+
+ # If we have an action object we can print out more detailed info.
+ local action = [ on $(target) return $(.action) ] ;
+ if $(action)
+ {
+ local action-name = [ $(action).action-name ] ;
+ local action-sources = [ $(action).sources ] ;
+ local action-props = [ $(action).properties ] ;
+
+ # The qualified name of the action which we created the target.
+ .contents on $(xml-file) +=
+ "$(nl) <name><![CDATA[$(action-name)]]></name>" ;
+
+ # The sources that made up the target.
+ .contents on $(xml-file) +=
+ "$(nl) <sources>" ;
+ for local source in $(action-sources)
+ {
+ local source-actual = [ $(source).actual-name ] ;
+ .contents on $(xml-file) +=
+ "$(nl) <source><![CDATA[$(source-actual)]]></source>" ;
+ }
+ .contents on $(xml-file) +=
+ "$(nl) </sources>" ;
+
+ # The properties that define the conditions under which the
+ # target was built.
+ .contents on $(xml-file) +=
+ "$(nl) <properties>" ;
+ for local prop in [ $(action-props).raw ]
+ {
+ local prop-name = [ MATCH ^<(.*)>$ : $(prop:G) ] ;
+ .contents on $(xml-file) +=
+ "$(nl) <property name=\"$(prop-name)\"><![CDATA[$(prop:G=)]]></property>" ;
+ }
+ .contents on $(xml-file) +=
+ "$(nl) </properties>" ;
+ }
+
+ local locate = [ on $(target) return $(LOCATE) ] ;
+ locate ?= "" ;
+ .contents on $(xml-file) +=
+ "$(nl) <jam-target><![CDATA[$(target)]]></jam-target>"
+ "$(nl) <path><![CDATA[$(target:G=:R=$(locate))]]></path>"
+ "$(nl) <command><![CDATA[$(command)]]></command>"
+ "$(nl) <output><![CDATA[$(output)]]></output>" ;
+ .contents on $(xml-file) +=
+ "$(nl) </action>" ;
+ }
+
+ # When no __ACTION_RULE__ is set "on" a target, the search falls back to
+ # the global module.
+ module
+ {
+ __ACTION_RULE__ = build-system.out-xml.collect
+ [ modules.peek build-system : .out-xml ] ;
+ }
+
+ IMPORT
+ build-system :
+ out-xml.collect
+ out-xml.generate-action
+ : :
+ build-system.out-xml.collect
+ build-system.out-xml.generate-action
+ ;
+ }
+
+ local j = [ option.get jobs ] ;
+ if $(j)
+ {
+ modules.poke : PARALLELISM : $(j) ;
+ }
+
+ local k = [ option.get keep-going : true : true ] ;
+ if $(k) in "on" "yes" "true"
+ {
+ modules.poke : KEEP_GOING : 1 ;
+ }
+ else if $(k) in "off" "no" "false"
+ {
+ modules.poke : KEEP_GOING : 0 ;
+ }
+ else
+ {
+ EXIT "error: Invalid value for the --keep-going option" ;
+ }
+
+ # The 'all' pseudo target is not strictly needed expect in the case when we
+ # use it below but people often assume they always have this target
+ # available and do not declare it themselves before use which may cause
+ # build failures with an error message about not being able to build the
+ # 'all' target.
+ NOTFILE all ;
+
+ # And now that all the actual raw Jam targets and all the dependencies
+ # between them have been prepared all that is left is to tell Jam to update
+ # those targets.
+ if $(explicitly-requested-files)
+ {
+ # Note that this case can not be joined with the regular one when only
+ # exact Boost Build targets are requested as here we do not build those
+ # requested targets but only use them to construct the dependency tree
+ # needed to build the explicitly requested files.
+ UPDATE $(explicitly-requested-files:G=e) $(.out-xml) ;
+ }
+ else if $(cleanall)
+ {
+ UPDATE clean-all ;
+ }
+ else if $(clean)
+ {
+ common.Clean clean : [ actual-clean-targets ] ;
+ UPDATE clean ;
+ }
+ else
+ {
+ configure.print-configure-checks-summary ;
+
+ if $(.pre-build-hook)
+ {
+ $(.pre-build-hook) ;
+ }
+
+ DEPENDS all : $(actual-targets) ;
+ if UPDATE_NOW in [ RULENAMES ]
+ {
+ local ok = [ UPDATE_NOW all $(.out-xml) ] ;
+ if $(.post-build-hook)
+ {
+ $(.post-build-hook) $(ok) ;
+ }
+ # Prevent automatic update of the 'all' target, now that we have
+ # explicitly updated what we wanted.
+ UPDATE ;
+ }
+ else
+ {
+ UPDATE all $(.out-xml) ;
+ }
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/build/ac.jam b/src/kenlm/jam-files/boost-build/build/ac.jam
new file mode 100644
index 0000000..71bc16c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/ac.jam
@@ -0,0 +1,303 @@
+# Copyright (c) 2010 Vladimir Prus.
+# Copyright (c) 2013 Steven Watanabe
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import property-set ;
+import path ;
+import modules ;
+import "class" ;
+import errors ;
+import configure ;
+import project ;
+import virtual-target ;
+import generators ;
+import property ;
+import print ;
+
+project.initialize $(__name__) ;
+.project = [ project.current ] ;
+project ac ;
+
+rule generate-include ( target : sources * : properties * )
+{
+ local header = [ property.select <include> : $(properties) ] ;
+ print.output $(target) ;
+ print.text "#include <$(header:G=)>" : true ;
+}
+
+rule generate-main ( target : sources * : properties * )
+{
+ print.output $(target) ;
+ print.text "int main() {}" : true ;
+}
+
+rule find-include-path ( properties : header : provided-path ? )
+{
+ if $(provided-path) && [ path.exists [ path.root $(header) $(provided-path) ] ]
+ {
+ return $(provided-path) ;
+ }
+ else
+ {
+ local a = [ class.new action : ac.generate-include : [ property-set.create <include>$(header) ] ] ;
+ local cpp = [ class.new file-target $(header).cpp exact : CPP : $(.project) : $(a) ] ;
+ cpp = [ virtual-target.register $(cpp) ] ;
+ local result = [ generators.construct $(.project) $(header) : OBJ : $(properties) : $(cpp) : true ] ;
+ local jam-targets ;
+ for t in $(result[2-])
+ {
+ jam-targets += [ $(t).actualize ] ;
+ }
+ if [ UPDATE_NOW $(jam-targets) : [ modules.peek configure : .log-fd ]
+ : ignore-minus-n : ignore-minus-q ]
+ {
+ return %default ;
+ }
+ }
+}
+
+rule construct-library ( name : property-set : provided-path ? )
+{
+ property-set = [ $(property-set).refine [ property-set.create $(link-opt) ] ] ;
+ local lib-props = [ $(property-set).add-raw <name>$(name) <search>$(provided-path) ] ;
+ return [ generators.construct $(.project) lib-$(name)
+ : SEARCHED_LIB : $(lib-props) : : true ] ;
+}
+
+
+rule find-library ( properties : names + : provided-path ? )
+{
+ local result ;
+ if ! $(.main.cpp)
+ {
+ local a = [ class.new action : ac.generate-main :
+ [ property-set.empty ] ] ;
+ .main.cpp = [ virtual-target.register
+ [ class.new file-target main.cpp exact
+ : CPP : $(.project) : $(a) ] ] ;
+ }
+ if [ $(properties).get <link> ] = shared
+ {
+ link-opts = <link>shared <link>static ;
+ }
+ else
+ {
+ link-opts = <link>static <link>shared ;
+ }
+ while $(link-opts)
+ {
+ local names-iter = $(names) ;
+ properties = [ $(properties).refine [ property-set.create $(link-opts[1]) ] ] ;
+ while $(names-iter)
+ {
+ local name = $(names-iter[1]) ;
+ local lib = [ construct-library $(name) : $(properties) : $(provided-path) ] ;
+ local test = [ generators.construct $(.project) $(name) : EXE
+ : [ $(properties).add $(lib[1]) ] : $(.main.cpp) $(lib[2-])
+ : true ] ;
+ local jam-targets ;
+ for t in $(test[2-])
+ {
+ jam-targets += [ $(t).actualize ] ;
+ }
+ if [ UPDATE_NOW $(jam-targets) : [ modules.peek configure : .log-fd ]
+ : ignore-minus-n : ignore-minus-q ]
+ {
+ result = $(name) $(link-opts[1]) ;
+ names-iter = ; link-opts = ; # break
+ }
+ names-iter = $(names-iter[2-]) ;
+ }
+ link-opts = $(link-opts[2-]) ;
+ }
+ return $(result) ;
+}
+
+class ac-library : basic-target
+{
+ import errors ;
+ import indirect ;
+ import virtual-target ;
+ import ac ;
+ import configure ;
+ import config-cache ;
+
+ rule __init__ ( name : project : requirements * : include-path ? : library-path ? : library-name ? )
+ {
+ basic-target.__init__ $(name) : $(project) : : $(requirements) ;
+
+ reconfigure $(include-path) : $(library-path) : $(library-name) ;
+ }
+
+ rule set-header ( header )
+ {
+ self.header = $(header) ;
+ }
+
+ rule set-default-names ( names + )
+ {
+ self.default-names = $(names) ;
+ }
+
+ rule reconfigure ( include-path ? : library-path ? : library-name ? )
+ {
+ if $(include-path) || $(library-path) || $(library-name)
+ {
+ check-not-configured ;
+
+ self.include-path = $(include-path) ;
+ self.library-path = $(library-path) ;
+ self.library-name = $(library-name) ;
+ }
+ }
+
+ rule set-target ( target )
+ {
+ check-not-configured ;
+ self.target = $(target) ;
+ }
+
+ rule check-not-configured ( )
+ {
+ if $(self.include-path) || $(self.library-path) || $(self.library-name) || $(self.target)
+ {
+ errors.user-error [ name ] "is already configured" ;
+ }
+ }
+
+ rule construct ( name : sources * : property-set )
+ {
+ if $(self.target)
+ {
+ return [ $(self.target).generate $(property-set) ] ;
+ }
+ else
+ {
+ local use-environment ;
+ if ! $(self.library-name) && ! $(self.include-path) && ! $(self.library-path)
+ {
+ use-environment = true ;
+ }
+ local libnames = $(self.library-name) ;
+ if ! $(libnames) && $(use-environment)
+ {
+ libnames = [ modules.peek : $(name:U)_NAME ] ;
+ # Backward compatibility only.
+ libnames ?= [ modules.peek : $(name:U)_BINARY ] ;
+ }
+ libnames ?= $(self.default-names) ;
+
+ local include-path = $(self.include-path) ;
+ if ! $(include-path) && $(use-environment)
+ {
+ include-path = [ modules.peek : $(name:U)_INCLUDE ] ;
+ }
+
+ local library-path = $(self.library-path) ;
+ if ! $(library-path) && $(use-environment)
+ {
+ library-path = [ modules.peek : $(name:U)_LIBRARY_PATH ] ;
+ # Backwards compatibility only
+ library-path ?= [ modules.peek : $(name:U)_LIBPATH ] ;
+ }
+
+ local toolset = [ $(property-set).get <toolset> ] ;
+ local toolset-version-property = "<toolset-$(toolset):version>" ;
+ local relevant = [ property.select <target-os> <toolset>
+ $(toolset-version-property) <link> <address-model> <architecture> :
+ [ $(property-set).raw ] ] ;
+
+ local key = ac-library-$(name)-$(relevant:J=-) ;
+ local lookup = [ config-cache.get $(key) ] ;
+
+ if $(lookup)
+ {
+ if $(lookup) = missing
+ {
+ configure.log-library-search-result $(name) : "no (cached)" ;
+ return [ property-set.empty ] ;
+ }
+ else
+ {
+ local includes = $(lookup[1]) ;
+ if $(includes) = %default
+ {
+ includes = ;
+ }
+ local library = [ ac.construct-library $(lookup[2]) :
+ [ $(property-set).refine [ property-set.create $(lookup[3]) ] ] : $(library-path) ] ;
+ configure.log-library-search-result $(name) : "yes (cached)" ;
+ return [ $(library[1]).add-raw <include>$(includes) ] $(library[2-]) ;
+ }
+ }
+ else
+ {
+ local includes = [ ac.find-include-path $(property-set) : $(self.header) : $(include-path) ] ;
+ local library = [ ac.find-library $(property-set) : $(libnames) : $(library-path) ] ;
+ if $(includes) && $(library)
+ {
+ config-cache.set $(key) : $(includes) $(library) ;
+ if $(includes) = %default
+ {
+ includes = ;
+ }
+ library = [ ac.construct-library $(library[1]) :
+ [ $(property-set).refine [ property-set.create $(library[2]) ] ] : $(library-path) ] ;
+ configure.log-library-search-result $(name) : "yes" ;
+ return [ $(library[1]).add-raw <include>$(includes) ] $(library[2-]) ;
+ }
+ else
+ {
+ config-cache.set $(key) : missing ;
+ configure.log-library-search-result $(name) : "no" ;
+ return [ property-set.empty ] ;
+ }
+ }
+ }
+ }
+}
+
+class check-library-worker
+{
+ import property-set ;
+ import targets ;
+ import property ;
+
+ rule __init__ ( target : true-properties * : false-properties * )
+ {
+ self.target = $(target) ;
+ self.true-properties = $(true-properties) ;
+ self.false-properties = $(false-properties) ;
+ }
+
+ rule check ( properties * )
+ {
+ local choosen ;
+ local t = [ targets.current ] ;
+ local p = [ $(t).project ] ;
+ local ps = [ property-set.create $(properties) ] ;
+ ps = [ $(ps).propagated ] ;
+ local generated =
+ [ targets.generate-from-reference $(self.target) : $(p) : $(ps) ] ;
+ if $(generated[2])
+ {
+ choosen = $(self.true-properties) ;
+ }
+ else
+ {
+ choosen = $(self.false-properties) ;
+ }
+ return [ property.evaluate-conditionals-in-context $(choosen) :
+ $(properties) ] ;
+ }
+}
+
+rule check-library ( target : true-properties * : false-properties * )
+{
+ local instance = [ class.new check-library-worker $(target) :
+ $(true-properties) : $(false-properties) ] ;
+ return <conditional>@$(instance).check ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/alias.jam b/src/kenlm/jam-files/boost-build/build/alias.jam
new file mode 100644
index 0000000..9ac8cb8
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/alias.jam
@@ -0,0 +1,74 @@
+# Copyright 2003, 2004, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines the 'alias' rule and the associated target class.
+#
+# Alias is just a main target which returns its source targets without any
+# processing. For example:
+#
+# alias bin : hello test_hello ;
+# alias lib : helpers xml_parser ;
+#
+# Another important use of 'alias' is to conveniently group source files:
+#
+# alias platform-src : win.cpp : <os>NT ;
+# alias platform-src : linux.cpp : <os>LINUX ;
+# exe main : main.cpp platform-src ;
+#
+# Lastly, it is possible to create a local alias for some target, with different
+# properties:
+#
+# alias big_lib : : @/external_project/big_lib/<link>static ;
+#
+
+import "class" : new ;
+import project ;
+import property-set ;
+import targets ;
+
+
+class alias-target-class : basic-target
+{
+ rule __init__ ( name : project : sources * : requirements *
+ : default-build * : usage-requirements * )
+ {
+ basic-target.__init__ $(name) : $(project) : $(sources) :
+ $(requirements) : $(default-build) : $(usage-requirements) ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ return [ property-set.empty ] $(source-targets) ;
+ }
+
+ rule compute-usage-requirements ( subvariant )
+ {
+ local base = [ basic-target.compute-usage-requirements $(subvariant) ] ;
+ return [ $(base).add [ $(subvariant).sources-usage-requirements ] ] ;
+ }
+}
+
+
+# Declares the 'alias' target. It will process its sources virtual-targets by
+# returning them unaltered as its own constructed virtual-targets.
+#
+rule alias ( name : sources * : requirements * : default-build * :
+ usage-requirements * )
+{
+ local project = [ project.current ] ;
+
+ targets.main-target-alternative
+ [ new alias-target-class $(name) : $(project)
+ : [ targets.main-target-sources $(sources) : $(name) : no-renaming ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project)
+ ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) :
+ $(project) ]
+ ] ;
+}
+
+
+IMPORT $(__name__) : alias : : alias ;
diff --git a/src/kenlm/jam-files/boost-build/build/build-request.jam b/src/kenlm/jam-files/boost-build/build/build-request.jam
new file mode 100644
index 0000000..2a1bbb4
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/build-request.jam
@@ -0,0 +1,322 @@
+# Copyright 2002 Dave Abrahams
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import "class" : new ;
+import sequence ;
+import set ;
+import regex ;
+import feature ;
+import property ;
+import container ;
+import string ;
+
+
+# Transform property-set by applying f to each component property.
+#
+local rule apply-to-property-set ( f property-set )
+{
+ local properties = [ feature.split $(property-set) ] ;
+ return [ string.join [ $(f) $(properties) ] : / ] ;
+}
+
+
+# Expand the given build request by combining all property-sets which do not
+# specify conflicting non-free features. Expects all the project files to
+# already be loaded.
+#
+rule expand-no-defaults ( property-sets * )
+{
+ # First make all features and subfeatures explicit.
+ local expanded-property-sets = [ sequence.transform apply-to-property-set
+ feature.expand-subfeatures : $(property-sets) ] ;
+
+ # Now combine all of the expanded property-sets
+ local product = [ x-product $(expanded-property-sets) : $(feature-space) ] ;
+
+ return $(product) ;
+}
+
+
+# Implementation of x-product, below. Expects all the project files to already
+# be loaded.
+#
+local rule x-product-aux ( property-sets + )
+{
+ local result ;
+ local p = [ feature.split $(property-sets[1]) ] ;
+ local f = [ set.difference $(p:G) : [ feature.free-features ] ] ;
+ local seen ;
+ # No conflict with things used at a higher level?
+ if ! [ set.intersection $(f) : $(x-product-used) ]
+ {
+ local x-product-seen ;
+ {
+ # Do not mix in any conflicting features.
+ local x-product-used = $(x-product-used) $(f) ;
+
+ if $(property-sets[2])
+ {
+ local rest = [ x-product-aux $(property-sets[2-]) : $(feature-space) ] ;
+ result = $(property-sets[1])/$(rest) ;
+ }
+
+ result ?= $(property-sets[1]) ;
+ }
+
+ # If we did not encounter a conflicting feature lower down, do not
+ # recurse again.
+ if ! [ set.intersection $(f) : $(x-product-seen) ]
+ {
+ property-sets = ;
+ }
+
+ seen = $(x-product-seen) ;
+ }
+
+ if $(property-sets[2])
+ {
+ result += [ x-product-aux $(property-sets[2-]) : $(feature-space) ] ;
+ }
+
+ # Note that we have seen these features so that higher levels will recurse
+ # again without them set.
+ x-product-seen += $(f) $(seen) ;
+ return $(result) ;
+}
+
+
+# Return the cross-product of all elements of property-sets, less any that would
+# contain conflicting values for single-valued features. Expects all the project
+# files to already be loaded.
+#
+local rule x-product ( property-sets * )
+{
+ if $(property-sets).non-empty
+ {
+ # Prepare some "scoped globals" that can be used by the implementation
+ # function, x-product-aux.
+ local x-product-seen x-product-used ;
+ return [ x-product-aux $(property-sets) : $(feature-space) ] ;
+ }
+ # Otherwise return empty.
+}
+
+
+# Returns true if either 'v' or the part of 'v' before the first '-' symbol is
+# an implicit value. Expects all the project files to already be loaded.
+#
+local rule looks-like-implicit-value ( v )
+{
+ if [ feature.is-implicit-value $(v) ]
+ {
+ return true ;
+ }
+ else
+ {
+ local split = [ regex.split $(v) - ] ;
+ if [ feature.is-implicit-value $(split[1]) ]
+ {
+ return true ;
+ }
+ }
+}
+
+
+# Takes the command line tokens (such as taken from the ARGV rule) and
+# constructs a build request from them. Returns a vector of two vectors (where
+# "vector" means container.jam's "vector"). First is the set of targets
+# specified in the command line, and second is the set of requested build
+# properties. Expects all the project files to already be loaded.
+#
+rule from-command-line ( command-line * )
+{
+ local targets ;
+ local properties ;
+
+ command-line = $(command-line[2-]) ;
+ local skip-next = ;
+ for local e in $(command-line)
+ {
+ if $(skip-next)
+ {
+ skip-next = ;
+ }
+ else if ! [ MATCH ^(-) : $(e) ]
+ {
+ # Build request spec either has "=" in it or completely consists of
+ # implicit feature values.
+ local fs = feature-space ;
+ if [ MATCH "(.*=.*)" : $(e) ]
+ || [ looks-like-implicit-value $(e:D=) : $(feature-space) ]
+ {
+ properties += [ convert-command-line-element $(e) :
+ $(feature-space) ] ;
+ }
+ else if $(e)
+ {
+ targets += $(e) ;
+ }
+ }
+ else if [ MATCH "^(-[-ldjfsto])$" : $(e) ]
+ {
+ skip-next = true ;
+ }
+ }
+ return [ new vector
+ [ new vector $(targets) ]
+ [ new vector $(properties) ] ] ;
+}
+
+
+# Converts one element of command line build request specification into internal
+# form. Expects all the project files to already be loaded.
+#
+local rule convert-command-line-element ( e )
+{
+ local result ;
+ local parts = [ regex.split $(e) "/" ] ;
+ while $(parts)
+ {
+ local p = $(parts[1]) ;
+ local m = [ MATCH "([^=]*)=(.*)" : $(p) ] ;
+ local lresult ;
+ local feature ;
+ local values ;
+ if $(m)
+ {
+ feature = $(m[1]) ;
+ values = [ regex.split $(m[2]) "," ] ;
+ lresult = <$(feature)>$(values) ;
+ }
+ else
+ {
+ lresult = [ regex.split $(p) "," ] ;
+ }
+
+ if $(feature) && free in [ feature.attributes <$(feature)> ]
+ {
+ # If we have free feature, then the value is everything
+ # until the end of the command line token. Slashes in
+ # the following string are not taked to mean separation
+ # of properties. Commas are also not interpreted specially.
+ values = $(values:J=,) ;
+ values = $(values) $(parts[2-]) ;
+ values = $(values:J=/) ;
+ lresult = <$(feature)>$(values) ;
+ parts = ;
+ }
+
+ if ! [ MATCH (.*-.*) : $(p) ]
+ {
+ # property.validate cannot handle subfeatures, so we avoid the check
+ # here.
+ for local p in $(lresult)
+ {
+ property.validate $(p) : $(feature-space) ;
+ }
+ }
+
+ if ! $(result)
+ {
+ result = $(lresult) ;
+ }
+ else
+ {
+ result = $(result)/$(lresult) ;
+ }
+
+ parts = $(parts[2-]) ;
+ }
+
+ return $(result) ;
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+ import feature ;
+
+ feature.prepare-test build-request-test-temp ;
+
+ import build-request ;
+ import build-request : expand-no-defaults : build-request.expand-no-defaults ;
+ import errors : try catch ;
+ import feature : feature subfeature ;
+
+ feature toolset : gcc msvc borland : implicit ;
+ subfeature toolset gcc : version : 2.95.2 2.95.3 2.95.4
+ 3.0 3.0.1 3.0.2 : optional ;
+
+ feature variant : debug release : implicit composite ;
+ feature inlining : on off ;
+ feature "include" : : free ;
+
+ feature stdlib : native stlport : implicit ;
+
+ feature runtime-link : dynamic static : symmetric ;
+
+ # Empty build requests should expand to empty.
+ assert.result
+ : build-request.expand-no-defaults ;
+
+ assert.result
+ <toolset>gcc/<toolset-gcc:version>3.0.1/<stdlib>stlport/<variant>debug
+ <toolset>msvc/<stdlib>stlport/<variant>debug
+ <toolset>msvc/<variant>debug
+ : build-request.expand-no-defaults gcc-3.0.1/stlport msvc/stlport msvc debug ;
+
+ assert.result
+ <toolset>gcc/<toolset-gcc:version>3.0.1/<stdlib>stlport/<variant>debug
+ <toolset>msvc/<variant>debug
+ <variant>debug/<toolset>msvc/<stdlib>stlport
+ : build-request.expand-no-defaults gcc-3.0.1/stlport msvc debug msvc/stlport ;
+
+ assert.result
+ <toolset>gcc/<toolset-gcc:version>3.0.1/<stdlib>stlport/<variant>debug/<inlining>off
+ <toolset>gcc/<toolset-gcc:version>3.0.1/<stdlib>stlport/<variant>release/<inlining>off
+ : build-request.expand-no-defaults gcc-3.0.1/stlport debug release <inlining>off ;
+
+ assert.result
+ <include>a/b/c/<toolset>gcc/<toolset-gcc:version>3.0.1/<stdlib>stlport/<variant>debug/<include>x/y/z
+ <include>a/b/c/<toolset>msvc/<stdlib>stlport/<variant>debug/<include>x/y/z
+ <include>a/b/c/<toolset>msvc/<variant>debug/<include>x/y/z
+ : build-request.expand-no-defaults <include>a/b/c gcc-3.0.1/stlport msvc/stlport msvc debug <include>x/y/z ;
+
+ local r ;
+
+ r = [ build-request.from-command-line bjam debug runtime-link=dynamic ] ;
+ assert.equal [ $(r).get-at 1 ] : ;
+ assert.equal [ $(r).get-at 2 ] : debug <runtime-link>dynamic ;
+
+ try ;
+ {
+ build-request.from-command-line bjam gcc/debug runtime-link=dynamic/static ;
+ }
+ catch \"static\" is not an implicit feature value ;
+
+ r = [ build-request.from-command-line bjam -d2 --debug debug target runtime-link=dynamic ] ;
+ assert.equal [ $(r).get-at 1 ] : target ;
+ assert.equal [ $(r).get-at 2 ] : debug <runtime-link>dynamic ;
+
+ r = [ build-request.from-command-line bjam debug runtime-link=dynamic,static ] ;
+ assert.equal [ $(r).get-at 1 ] : ;
+ assert.equal [ $(r).get-at 2 ] : debug <runtime-link>dynamic <runtime-link>static ;
+
+ r = [ build-request.from-command-line bjam debug gcc/runtime-link=dynamic,static ] ;
+ assert.equal [ $(r).get-at 1 ] : ;
+ assert.equal [ $(r).get-at 2 ] : debug gcc/<runtime-link>dynamic
+ gcc/<runtime-link>static ;
+
+ r = [ build-request.from-command-line bjam msvc gcc,borland/runtime-link=static ] ;
+ assert.equal [ $(r).get-at 1 ] : ;
+ assert.equal [ $(r).get-at 2 ] : msvc gcc/<runtime-link>static
+ borland/<runtime-link>static ;
+
+ r = [ build-request.from-command-line bjam gcc-3.0 ] ;
+ assert.equal [ $(r).get-at 1 ] : ;
+ assert.equal [ $(r).get-at 2 ] : gcc-3.0 ;
+
+ feature.finish-test build-request-test-temp ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/config-cache.jam b/src/kenlm/jam-files/boost-build/build/config-cache.jam
new file mode 100644
index 0000000..5297dbb
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/config-cache.jam
@@ -0,0 +1,64 @@
+# Copyright 2012 Steven Watanabe
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import modules ;
+import errors ;
+import regex ;
+import path ;
+import project ;
+
+rule get ( name )
+{
+ return $(.vars.$(name)) ;
+}
+
+rule set ( name : value * )
+{
+ .all-vars += $(name) ;
+ .vars.$(name) = $(value) ;
+}
+
+rule save ( )
+{
+ if $(.cache-file)
+ {
+ local cache-file-native = [ path.native $(.cache-file) ] ;
+ local target = <new-cache-file>$(cache-file-native) ;
+ local contents = "# Automatically generated by Boost.Build.\n# Do not edit.\n\nmodule config-cache {\n" ;
+ for local var in $(.all-vars)
+ {
+ local transformed ;
+ for local value in $(.vars.$(var))
+ {
+ transformed += [ regex.escape $(value) : \"\\ : \\ ] ;
+ }
+ local quoted = \"$(transformed)\" ;
+ contents += " set \"$(var)\" : $(quoted:J= ) ;\n" ;
+ }
+ contents += "}\n" ;
+ FILE_CONTENTS on $(target) = $(contents) ;
+ ALWAYS $(target) ;
+ config-cache.write $(target) ;
+ UPDATE_NOW $(target) : [ modules.peek configure : .log-fd ] : ignore-minus-n ;
+ }
+}
+
+actions write
+{
+ @($(STDOUT):E=$(FILE_CONTENTS:J=)) > "$(<)"
+}
+
+rule load ( cache-file )
+{
+ if $(.cache-file)
+ {
+ errors.error duplicate load of cache file ;
+ }
+ cache-file = [ path.native $(cache-file) ] ;
+ if [ path.exists $(cache-file) ] && ! ( --reconfigure in [ modules.peek : ARGV ] )
+ {
+ include <old-cache-file>$(cache-file) ;
+ }
+ .cache-file = $(cache-file) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/configure.jam b/src/kenlm/jam-files/boost-build/build/configure.jam
new file mode 100644
index 0000000..66b81b3
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/configure.jam
@@ -0,0 +1,281 @@
+# Copyright (c) 2010 Vladimir Prus.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines function to help with two main tasks:
+#
+# - Discovering build-time configuration for the purposes of adjusting the build
+# process.
+# - Reporting what is built, and how it is configured.
+
+import "class" : new ;
+import common ;
+import path ;
+import property ;
+import property-set ;
+import targets ;
+import config-cache ;
+
+
+rule log-summary ( )
+{
+}
+
+
+.width = 30 ;
+
+rule set-width ( width )
+{
+ .width = $(width) ;
+}
+
+
+# Declare that the components specified by the parameter exist.
+#
+rule register-components ( components * )
+{
+ .components += $(components) ;
+}
+
+
+# Declare that the components specified by the parameters will be built.
+#
+rule components-building ( components * )
+{
+ .built-components += $(components) ;
+}
+
+
+# Report something about component configuration that the user should better
+# know.
+#
+rule log-component-configuration ( component : message )
+{
+ # FIXME: Implement per-property-set logs.
+ .component-logs.$(component) += $(message) ;
+}
+
+
+rule log-check-result ( result )
+{
+ if ! $(.announced-checks)
+ {
+ ECHO "Performing configuration checks\n" ;
+ .announced-checks = 1 ;
+ }
+
+ ECHO $(result) ;
+ # FIXME: Unfinished code. Nothing seems to set .check-results at the moment.
+ #.check-results += $(result) ;
+}
+
+
+rule log-library-search-result ( library : result )
+{
+ local x = [ PAD " - $(library)" : $(.width) ] ;
+ log-check-result "$(x) : $(result)" ;
+}
+
+
+rule print-component-configuration ( )
+{
+ # FIXME: See what was intended with this initial assignment.
+ # local c = [ sequence.unique $(.components) ] ;
+
+ ECHO "\nComponent configuration:\n" ;
+ local c ;
+ for c in $(.components)
+ {
+ local s ;
+ if $(c) in $(.built-components)
+ {
+ s = "building" ;
+ }
+ else
+ {
+ s = "not building" ;
+ }
+ ECHO [ PAD " - $(c)" : $(.width) ] ": $(s)" ;
+ for local m in $(.component-logs.$(c))
+ {
+ ECHO " -" $(m) ;
+ }
+ }
+ ECHO ;
+}
+
+
+rule print-configure-checks-summary ( )
+{
+ # FIXME: The problem with this approach is that the user sees the checks
+ # summary when all checks are done, and has no progress reporting while the
+ # checks are being executed.
+ if $(.check-results)
+ {
+ ECHO "Configuration checks summary\n" ;
+ for local r in $(.check-results)
+ {
+ ECHO $(r) ;
+ }
+ ECHO ;
+ }
+}
+
+
+# Attempt to build a metatarget named by 'metatarget-reference' in context of
+# 'project' with properties 'ps'. Returns non-empty value if build is OK.
+#
+rule builds-raw ( metatarget-reference : project : ps : what : retry ? )
+{
+ local result ;
+
+ if ! $(retry) && ! $(.$(what)-tested.$(ps))
+ {
+ .$(what)-tested.$(ps) = true ;
+
+ local cache-name = $(what) [ $(ps).raw ] ;
+ cache-name = $(cache-name:J=-) ;
+ local value = [ config-cache.get $(cache-name) ] ;
+
+ local targets = [ targets.generate-from-reference
+ $(metatarget-reference) : $(project) : $(ps) ] ;
+
+ local jam-targets ;
+ for local t in $(targets[2-])
+ {
+ jam-targets += [ $(t).actualize ] ;
+ }
+
+ if $(value)
+ {
+ local x = [ PAD " - $(what)" : $(.width) ] ;
+ if $(value) = true
+ {
+ .$(what)-supported.$(ps) = yes ;
+ result = true ;
+ log-check-result "$(x) : yes (cached)" ;
+ }
+ else
+ {
+ log-check-result "$(x) : no (cached)" ;
+ }
+ }
+ else if ! UPDATE_NOW in [ RULENAMES ]
+ {
+ # Cannot determine. Assume existance.
+ }
+ else
+ {
+ local x = [ PAD " - $(what)" : $(.width) ] ;
+ if [ UPDATE_NOW $(jam-targets) :
+ $(.log-fd) : ignore-minus-n : ignore-minus-q ]
+ {
+ .$(what)-supported.$(ps) = yes ;
+ result = true ;
+ log-check-result "$(x) : yes" ;
+ }
+ else
+ {
+ log-check-result "$(x) : no" ;
+ }
+ }
+ if ! $(value)
+ {
+ if $(result)
+ {
+ config-cache.set $(cache-name) : true ;
+ }
+ else
+ {
+ config-cache.set $(cache-name) : false ;
+ }
+ }
+ return $(result) ;
+ }
+ else
+ {
+ return $(.$(what)-supported.$(ps)) ;
+ }
+}
+
+rule builds ( metatarget-reference : properties * : what ? : retry ? )
+{
+ # FIXME: This should not be hardcoded. Other checks might want to consider a
+ # different set of features as relevant.
+ local toolset = [ property.select <toolset> : $(properties) ] ;
+ local toolset-version-property = "<toolset-$(toolset:G=):version>" ;
+ local relevant = [ property.select <target-os> <toolset>
+ $(toolset-version-property) <address-model> <architecture> :
+ $(properties) ] ;
+ local ps = [ property-set.create $(relevant) ] ;
+ local t = [ targets.current ] ;
+ local p = [ $(t).project ] ;
+
+ if ! $(what)
+ {
+ local resolved = [ targets.resolve-reference $(metatarget-reference) : $(p) ] ;
+ local name = [ $(resolved[1]).name ] ;
+ what = "$(name) builds" ;
+ }
+
+ return [ builds-raw $(metatarget-reference) : $(p) : $(ps) : $(what) :
+ $(retry) ] ;
+}
+
+
+# Called by Boost.Build startup code to specify the file to receive the
+# configuration check results. Should never be called by user code.
+#
+rule set-log-file ( log-file )
+{
+ path.makedirs [ path.parent $(log-file) ] ;
+ .log-fd = [ FILE_OPEN $(log-file) : "w" ] ;
+}
+
+
+# Frontend rules
+
+class check-target-builds-worker
+{
+ import configure ;
+ import property-set ;
+ import targets ;
+ import property ;
+
+ rule __init__ ( target message ? : true-properties * : false-properties * )
+ {
+ self.target = $(target) ;
+ self.message = $(message) ;
+ self.true-properties = $(true-properties) ;
+ self.false-properties = $(false-properties) ;
+ }
+
+ rule check ( properties * )
+ {
+ local choosen ;
+ if [ configure.builds $(self.target) : $(properties) : $(self.message) ]
+ {
+ choosen = $(self.true-properties) ;
+ }
+ else
+ {
+ choosen = $(self.false-properties) ;
+ }
+ return [ property.evaluate-conditionals-in-context $(choosen) :
+ $(properties) ] ;
+ }
+}
+
+
+rule check-target-builds ( target message ? : true-properties * :
+ false-properties * )
+{
+ local instance = [ new check-target-builds-worker $(target) $(message) :
+ $(true-properties) : $(false-properties) ] ;
+ return <conditional>@$(instance).check ;
+}
+
+
+IMPORT $(__name__) : check-target-builds : : check-target-builds ;
diff --git a/src/kenlm/jam-files/boost-build/build/feature.jam b/src/kenlm/jam-files/boost-build/build/feature.jam
new file mode 100644
index 0000000..ee6abc5
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/feature.jam
@@ -0,0 +1,1350 @@
+# Copyright 2001, 2002, 2003 Dave Abrahams
+# Copyright 2002, 2006 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import assert : * ;
+import "class" : * ;
+import indirect ;
+import modules ;
+import regex ;
+import sequence ;
+import set ;
+import utility ;
+
+
+local rule setup ( )
+{
+ .all-attributes =
+ implicit
+ composite
+ optional
+ symmetric
+ free
+ incidental
+ path
+ dependency
+ propagated
+ link-incompatible
+ subfeature
+ order-sensitive
+ ;
+
+ .all-features = ;
+ .all-subfeatures = ;
+ .all-top-features = ; # non-subfeatures
+ .all-implicit-values = ;
+}
+setup ;
+
+
+# Prepare a fresh space to test in by moving all global variable settings into
+# the given temporary module and erasing them here.
+#
+rule prepare-test ( temp-module )
+{
+ DELETE_MODULE $(temp-module) ;
+
+ # Transfer globals to temp-module.
+ for local v in [ VARNAMES feature ]
+ {
+ if [ MATCH (\\.) : $(v) ]
+ {
+ modules.poke $(temp-module) : $(v) : $($(v)) ;
+ $(v) = ;
+ }
+ }
+ setup ;
+}
+
+
+# Clear out all global variables and recover all variables from the given
+# temporary module.
+#
+rule finish-test ( temp-module )
+{
+ # Clear globals.
+ for local v in [ VARNAMES feature ]
+ {
+ if [ MATCH (\\.) : $(v) ]
+ {
+ $(v) = ;
+ }
+ }
+
+ for local v in [ VARNAMES $(temp-module) ]
+ {
+ $(v) = [ modules.peek $(temp-module) : $(v) ] ;
+ }
+ DELETE_MODULE $(temp-module) ;
+}
+
+
+# Transform features by bracketing any elements which are not already bracketed
+# by "<>".
+#
+local rule grist ( features * )
+{
+ local empty = "" ;
+ return $(empty:G=$(features)) ;
+}
+
+
+# Declare a new feature with the given name, values, and attributes.
+#
+rule feature (
+ name # Feature name.
+ : values * # Allowable values - may be extended later using feature.extend.
+ : attributes * # Feature attributes (e.g. implicit, free, propagated...).
+)
+{
+ name = [ grist $(name) ] ;
+
+ local error ;
+
+ # Check for any unknown attributes.
+ if ! ( $(attributes) in $(.all-attributes) )
+ {
+ error = unknown attributes:
+ [ set.difference $(attributes) : $(.all-attributes) ] ;
+ }
+ else if $(name) in $(.all-features)
+ {
+ error = feature already defined: ;
+ }
+ else if implicit in $(attributes) && free in $(attributes)
+ {
+ error = free features cannot also be implicit ;
+ }
+ else if free in $(attributes) && propagated in $(attributes)
+ {
+ error = free features cannot be propagated ;
+ }
+ else
+ {
+ local m = [ MATCH (.*=.*) : $(values) ] ;
+ if $(m[1])
+ {
+ error = "feature value may not contain '='" ;
+ }
+ }
+
+ if $(error)
+ {
+ import errors ;
+ errors.error $(error)
+ : "in" feature declaration:
+ : feature [ errors.lol->list $(1) : $(2) : $(3) ] ;
+ }
+
+ $(name).values ?= ;
+ $(name).attributes = $(attributes) ;
+ $(name).subfeatures ?= ;
+ $(attributes).features += $(name) ;
+
+ .all-features += $(name) ;
+ if subfeature in $(attributes)
+ {
+ .all-subfeatures += $(name) ;
+ }
+ else
+ {
+ .all-top-features += $(name) ;
+ }
+ extend $(name) : $(values) ;
+}
+
+
+# Sets the default value of the given feature, overriding any previous default.
+#
+rule set-default ( feature : value )
+{
+ local f = [ grist $(feature) ] ;
+ local a = $($(f).attributes) ;
+ local bad-attribute = ;
+ if free in $(a)
+ {
+ bad-attribute = free ;
+ }
+ else if optional in $(a)
+ {
+ bad-attribute = optional ;
+ }
+ if $(bad-attribute)
+ {
+ import errors ;
+ errors.error $(bad-attribute) property $(f) cannot have a default. ;
+ }
+ if ! $(value) in $($(f).values)
+ {
+ import errors ;
+ errors.error The specified default value, '$(value)' is invalid :
+ allowed values are: $($(f).values) ;
+ }
+ $(f).default = $(value) ;
+}
+
+
+# Returns the default property values for the given features.
+#
+rule defaults ( features * )
+{
+ local result ;
+ for local f in $(features)
+ {
+ local gf = $(:E=:G=$(f)) ;
+ local a = $($(gf).attributes) ;
+ if ( free in $(a) ) || ( optional in $(a) )
+ {
+ }
+ else
+ {
+ result += $(gf)$($(gf).default) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns true iff all 'names' elements are valid features.
+#
+rule valid ( names + )
+{
+ if $(names) in $(.all-features)
+ {
+ return true ;
+ }
+}
+
+
+# Returns the attibutes of the given feature.
+#
+rule attributes ( feature )
+{
+ return $($(feature).attributes) ;
+}
+
+
+# Returns the values of the given feature.
+#
+rule values ( feature )
+{
+ return $($(:E=:G=$(feature)).values) ;
+}
+
+
+# Returns true iff 'value-string' is a value-string of an implicit feature.
+#
+rule is-implicit-value ( value-string )
+{
+ local v = [ regex.split $(value-string) - ] ;
+ local failed ;
+ if ! $(v[1]) in $(.all-implicit-values)
+ {
+ failed = true ;
+ }
+ else
+ {
+ local feature = $($(v[1]).implicit-feature) ;
+ for local subvalue in $(v[2-])
+ {
+ if ! [ find-implied-subfeature $(feature) $(subvalue) : $(v[1]) ]
+ {
+ failed = true ;
+ }
+ }
+ }
+
+ if ! $(failed)
+ {
+ return true ;
+ }
+}
+
+
+# Returns the implicit feature associated with the given implicit value.
+#
+rule implied-feature ( implicit-value )
+{
+ local components = [ regex.split $(implicit-value) "-" ] ;
+ local feature = $($(components[1]).implicit-feature) ;
+ if ! $(feature)
+ {
+ import errors ;
+ errors.error \"$(implicit-value)\" is not an implicit feature value ;
+ feature = "" ; # Keep testing happy; it expects a result.
+ }
+ return $(feature) ;
+}
+
+
+local rule find-implied-subfeature ( feature subvalue : value-string ? )
+{
+ # Feature should be of the form <feature-name>.
+ if $(feature) != $(feature:G)
+ {
+ import errors ;
+ errors.error invalid feature $(feature) ;
+ }
+ return $($(feature)$(value-string:E="")<>$(subvalue).subfeature) ;
+}
+
+
+# Given a feature and a value of one of its subfeatures, find the name of the
+# subfeature. If value-string is supplied, looks for implied subfeatures that
+# are specific to that value of feature
+#
+rule implied-subfeature (
+ feature # The main feature name.
+ subvalue # The value of one of its subfeatures.
+ : value-string ? # The value of the main feature.
+)
+{
+ local subfeature = [ find-implied-subfeature $(feature) $(subvalue)
+ : $(value-string) ] ;
+ if ! $(subfeature)
+ {
+ value-string ?= "" ;
+ import errors ;
+ errors.error \"$(subvalue)\" is not a known subfeature value of
+ $(feature)$(value-string) ;
+ }
+ return $(subfeature) ;
+}
+
+
+# Generate an error if the feature is unknown.
+#
+local rule validate-feature ( feature )
+{
+ if ! $(feature) in $(.all-features)
+ {
+ import errors ;
+ errors.error unknown feature \"$(feature)\" ;
+ }
+}
+
+
+# Given a feature and its value or just a value corresponding to an implicit
+# feature, returns a property set consisting of all component subfeatures and
+# their values. For example all the following calls:
+#
+# expand-subfeatures-aux <toolset>gcc-2.95.2-linux-x86
+# expand-subfeatures-aux gcc-2.95.2-linux-x86
+#
+# return:
+#
+# <toolset>gcc <toolset-version>2.95.2 <toolset-os>linux <toolset-cpu>x86
+#
+local rule expand-subfeatures-aux (
+ feature ? # Feature name or empty if value corresponds to an
+ # implicit property.
+ : value # Feature value.
+ : dont-validate ? # If set, no value string validation will be done.
+)
+{
+ if $(feature)
+ {
+ feature = $(feature) ;
+ }
+
+ if ! $(feature)
+ {
+ feature = [ implied-feature $(value) ] ;
+ }
+ else
+ {
+ validate-feature $(feature) ;
+ }
+ if ! $(dont-validate)
+ {
+ validate-value-string $(feature) $(value) ;
+ }
+
+ local components = [ regex.split $(value) "-" ] ;
+
+ # Get the top-level feature's value.
+ local value = $(components[1]:G=) ;
+
+ local result = $(components[1]:G=$(feature)) ;
+
+ local subvalues = $(components[2-]) ;
+ while $(subvalues)
+ {
+ local subvalue = $(subvalues[1]) ; # Pop the head off of subvalues.
+ subvalues = $(subvalues[2-]) ;
+
+ local subfeature = [ find-implied-subfeature $(feature) $(subvalue) :
+ $(value) ] ;
+
+ # If no subfeature was found reconstitute the value string and use that.
+ if ! $(subfeature)
+ {
+ result = $(components:J=-) ;
+ result = $(result:G=$(feature)) ;
+ subvalues = ; # Stop looping.
+ }
+ else
+ {
+ local f = [ MATCH ^<(.*)>$ : $(feature) ] ;
+ result += $(subvalue:G=$(f)-$(subfeature)) ;
+ }
+ }
+
+ return $(result) ;
+}
+
+
+# Make all elements of properties corresponding to implicit features explicit,
+# and express all subfeature values as separate properties in their own right.
+# For example, all of the following properties
+#
+# gcc-2.95.2-linux-x86
+# <toolset>gcc-2.95.2-linux-x86
+#
+# might expand to
+#
+# <toolset>gcc <toolset-version>2.95.2 <toolset-os>linux <toolset-cpu>x86
+#
+rule expand-subfeatures (
+ properties * # Property set with elements of the form
+ # <feature>value-string or just value-string in the case
+ # of implicit features.
+ : dont-validate ?
+)
+{
+ local result ;
+ for local p in $(properties)
+ {
+ # Don't expand subfeatures in subfeatures
+ if ! [ MATCH "(:)" : $(p:G) ]
+ {
+ result += [ expand-subfeatures-aux $(p:G) : $(p:G=) : $(dont-validate) ] ;
+ }
+ else
+ {
+ result += $(p) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Helper for extend, below. Handles the feature case.
+#
+local rule extend-feature ( feature : values * )
+{
+ feature = [ grist $(feature) ] ;
+ validate-feature $(feature) ;
+ if implicit in $($(feature).attributes)
+ {
+ for local v in $(values)
+ {
+ if $($(v).implicit-feature)
+ {
+ import errors ;
+ errors.error $(v) is already associated with the
+ \"$($(v).implicit-feature)\" feature ;
+ }
+ $(v).implicit-feature = $(feature) ;
+ }
+
+ .all-implicit-values += $(values) ;
+ }
+ if ! $($(feature).values)
+ {
+ # This is the first value specified for this feature so make it be the
+ # default.
+ $(feature).default = $(values[1]) ;
+ }
+ $(feature).values += $(values) ;
+}
+
+
+# Checks that value-string is a valid value-string for the given feature.
+#
+rule validate-value-string ( feature value-string )
+{
+ if ! (
+ free in $($(feature).attributes)
+ || ( $(value-string) in $(feature).values )
+ )
+ {
+ local values = $(value-string) ;
+
+ if $($(feature).subfeatures)
+ {
+ if ! $(value-string) in $($(feature).values)
+ $($(feature).subfeatures)
+ {
+ values = [ regex.split $(value-string) - ] ;
+ }
+ }
+
+ if ! ( $(values[1]) in $($(feature).values) ) &&
+
+ # An empty value is allowed for optional features.
+ ( $(values[1]) || ! ( optional in $($(feature).attributes) ) )
+ {
+ import errors ;
+ errors.error \"$(values[1])\" is not a known value of feature
+ $(feature) : legal values: \"$($(feature).values)\" ;
+ }
+
+ for local v in $(values[2-])
+ {
+ # This will validate any subfeature values in value-string.
+ implied-subfeature $(feature) $(v) : $(values[1]) ;
+ }
+ }
+}
+
+
+# A helper that computes:
+# * name(s) of module-local variable(s) used to record the correspondence
+# between subvalue(s) and a subfeature
+# * value of that variable when such a subfeature/subvalue has been defined and
+# returns a list consisting of the latter followed by the former.
+#
+local rule subvalue-var (
+ feature # Main feature name.
+ value-string ? # If supplied, specifies a specific value of the main
+ # feature for which the subfeature values are valid.
+ : subfeature # Subfeature name.
+ : subvalues * # Subfeature values.
+)
+{
+ feature = [ grist $(feature) ] ;
+ validate-feature $(feature) ;
+ if $(value-string)
+ {
+ validate-value-string $(feature) $(value-string) ;
+ }
+
+ local subfeature-name = [ get-subfeature-name $(subfeature) $(value-string) ] ;
+
+ return $(subfeature-name)
+ $(feature)$(value-string:E="")<>$(subvalues).subfeature ;
+}
+
+
+# Extends the given subfeature with the subvalues. If the optional value-string
+# is provided, the subvalues are only valid for the given value of the feature.
+# Thus, you could say that <target-platform>mingw is specific to
+# <toolset>gcc-2.95.2 as follows:
+#
+# extend-subfeature toolset gcc-2.95.2 : target-platform : mingw ;
+#
+rule extend-subfeature (
+ feature # The feature whose subfeature is being extended.
+
+ value-string ? # If supplied, specifies a specific value of the main
+ # feature for which the new subfeature values are valid.
+
+ : subfeature # Subfeature name.
+ : subvalues * # Additional subfeature values.
+)
+{
+ local subfeature-vars = [ subvalue-var $(feature) $(value-string)
+ : $(subfeature) : $(subvalues) ] ;
+
+ local f = [ utility.ungrist [ grist $(feature) ] ] ;
+ extend $(f)-$(subfeature-vars[1]) : $(subvalues) ;
+
+ # Provide a way to get from the given feature or property and subfeature
+ # value to the subfeature name.
+ $(subfeature-vars[2-]) = $(subfeature-vars[1]) ;
+}
+
+
+# Returns true iff the subvalues are valid for the feature. When the optional
+# value-string is provided, returns true iff the subvalues are valid for the
+# given value of the feature.
+#
+rule is-subvalue ( feature : value-string ? : subfeature : subvalue )
+{
+ local subfeature-vars = [ subvalue-var $(feature) $(value-string)
+ : $(subfeature) : $(subvalue) ] ;
+
+ if $($(subfeature-vars[2])) = $(subfeature-vars[1])
+ {
+ return true ;
+ }
+}
+
+
+# Can be called three ways:
+#
+# 1. extend feature : values *
+# 2. extend <feature> subfeature : values *
+# 3. extend <feature>value-string subfeature : values *
+#
+# * Form 1 adds the given values to the given feature.
+# * Forms 2 and 3 add subfeature values to the given feature.
+# * Form 3 adds the subfeature values as specific to the given property
+# value-string.
+#
+rule extend ( feature-or-property subfeature ? : values * )
+{
+ local feature ; # If a property was specified this is its feature.
+ local value-string ; # E.g., the gcc-2.95-2 part of <toolset>gcc-2.95.2.
+
+ # If a property was specified.
+ if $(feature-or-property:G) && $(feature-or-property:G=)
+ {
+ # Extract the feature and value-string, if any.
+ feature = $(feature-or-property:G) ;
+ value-string = $(feature-or-property:G=) ;
+ }
+ else
+ {
+ feature = [ grist $(feature-or-property) ] ;
+ }
+
+ # Dispatch to the appropriate handler.
+ if $(subfeature)
+ {
+ extend-subfeature $(feature) $(value-string) : $(subfeature)
+ : $(values) ;
+ }
+ else
+ {
+ # If no subfeature was specified, we do not expect to see a
+ # value-string.
+ if $(value-string)
+ {
+ import errors ;
+ errors.error can only specify a property as the first argument when
+ extending a subfeature
+ : usage:
+ : " extend" feature ":" values...
+ : " | extend" <feature>value-string subfeature ":" values... ;
+ }
+
+ extend-feature $(feature) : $(values) ;
+ }
+}
+
+
+local rule get-subfeature-name ( subfeature value-string ? )
+{
+ local prefix = $(value-string): ;
+ return $(prefix:E="")$(subfeature) ;
+}
+
+
+# Declares a subfeature.
+#
+rule subfeature (
+ feature # Root feature that is not a subfeature.
+ value-string ? # A value-string specifying which feature or subfeature
+ # values this subfeature is specific to, if any.
+ : subfeature # The name of the subfeature being declared.
+ : subvalues * # The allowed values of this subfeature.
+ : attributes * # The attributes of the subfeature.
+)
+{
+ feature = [ grist $(feature) ] ;
+ validate-feature $(feature) ;
+
+ # Add grist to the subfeature name if a value-string was supplied.
+ local subfeature-name = [ get-subfeature-name $(subfeature) $(value-string) ] ;
+
+ if $(subfeature-name) in $($(feature).subfeatures)
+ {
+ import errors ;
+ errors.error \"$(subfeature)\" already declared as a subfeature of
+ \"$(feature)\" "specific to "$(value-string) ;
+ }
+ $(feature).subfeatures += $(subfeature-name) ;
+
+ # First declare the subfeature as a feature in its own right.
+ local f = [ utility.ungrist $(feature) ] ;
+ feature $(f)-$(subfeature-name) : $(subvalues) : $(attributes) subfeature ;
+
+ # Now make sure the subfeature values are known.
+ extend-subfeature $(feature) $(value-string) : $(subfeature) : $(subvalues) ;
+}
+
+
+# Set components of the given composite property.
+#
+rule compose ( composite-property : component-properties * )
+{
+ local feature = $(composite-property:G) ;
+ if ! ( composite in [ attributes $(feature) ] )
+ {
+ import errors ;
+ errors.error "$(feature)" is not a composite feature ;
+ }
+
+ $(composite-property).components ?= ;
+ if $($(composite-property).components)
+ {
+ import errors ;
+ errors.error components of "$(composite-property)" already set:
+ $($(composite-property).components) ;
+ }
+
+ if $(composite-property) in $(component-properties)
+ {
+ import errors ;
+ errors.error composite property "$(composite-property)" cannot have itself as a component ;
+ }
+ $(composite-property).components = $(component-properties) ;
+}
+
+
+local rule expand-composite ( property )
+{
+ return $(property)
+ [ sequence.transform expand-composite : $($(property).components) ] ;
+}
+
+
+# Return all values of the given feature specified by the given property set.
+#
+rule get-values ( feature : properties * )
+{
+ local result ;
+
+ feature = $(:E=:G=$(feature)) ; # Add <> if necessary.
+ for local p in $(properties)
+ {
+ if $(p:G) = $(feature)
+ {
+ # Use MATCH instead of :G= to get the value, in order to preserve
+ # the value intact instead of having bjam treat it as a decomposable
+ # path.
+ result += [ MATCH ">(.*)" : $(p) ] ;
+ }
+ }
+ return $(result) ;
+}
+
+
+rule free-features ( )
+{
+ return $(free.features) ;
+}
+
+
+# Expand all composite properties in the set so that all components are
+# explicitly expressed.
+#
+rule expand-composites ( properties * )
+{
+ local explicit-features = $(properties:G) ;
+ local result ;
+
+ # Now expand composite features.
+ for local p in $(properties)
+ {
+ local expanded = [ expand-composite $(p) ] ;
+
+ for local x in $(expanded)
+ {
+ if ! $(x) in $(result)
+ {
+ local f = $(x:G) ;
+
+ if $(f) in $(free.features)
+ {
+ result += $(x) ;
+ }
+ else if ! $(x) in $(properties) # x is the result of expansion
+ {
+ if ! $(f) in $(explicit-features) # not explicitly-specified
+ {
+ if $(f) in $(result:G)
+ {
+ import errors ;
+ errors.error expansions of composite features result
+ in conflicting values for $(f)
+ : values: [ get-values $(f) : $(result) ] $(x:G=)
+ : one contributing composite property was $(p) ;
+ }
+ else
+ {
+ result += $(x) ;
+ }
+ }
+ }
+ else if $(f) in $(result:G)
+ {
+ import errors ;
+ errors.error explicitly-specified values of non-free feature
+ $(f) conflict :
+ "existing values:" [ get-values $(f) : $(properties) ] :
+ "value from expanding " $(p) ":" $(x:G=) ;
+ }
+ else
+ {
+ result += $(x) ;
+ }
+ }
+ }
+ }
+ return $(result) ;
+}
+
+
+# Return true iff f is an ordinary subfeature of the parent-property's feature,
+# or if f is a subfeature of the parent-property's feature specific to the
+# parent-property's value.
+#
+local rule is-subfeature-of ( parent-property f )
+{
+ if subfeature in $($(f).attributes)
+ {
+ local specific-subfeature = [ MATCH <(.*):(.*)> : $(f) ] ;
+ if $(specific-subfeature)
+ {
+ # The feature has the form <topfeature-topvalue:subfeature>, e.g.
+ # <toolset-msvc:version>.
+ local feature-value = [ split-top-feature $(specific-subfeature[1])
+ ] ;
+ if <$(feature-value[1])>$(feature-value[2]) = $(parent-property)
+ {
+ return true ;
+ }
+ }
+ else
+ {
+ # The feature has the form <topfeature-subfeature>, e.g.
+ # <toolset-version>
+ local top-sub = [ split-top-feature [ utility.ungrist $(f) ] ] ;
+ if $(top-sub[2]) && <$(top-sub[1])> = $(parent-property:G)
+ {
+ return true ;
+ }
+ }
+ }
+}
+
+
+# As for is-subfeature-of but for subproperties.
+#
+local rule is-subproperty-of ( parent-property p )
+{
+ return [ is-subfeature-of $(parent-property) $(p:G) ] ;
+}
+
+
+# Given a property, return the subset of features consisting of all ordinary
+# subfeatures of the property's feature, and all specific subfeatures of the
+# property's feature which are conditional on the property's value.
+#
+local rule select-subfeatures ( parent-property : features * )
+{
+ return [ sequence.filter is-subfeature-of $(parent-property) : $(features) ] ;
+}
+
+
+# As for select-subfeatures but for subproperties.
+#
+local rule select-subproperties ( parent-property : properties * )
+{
+ return [ sequence.filter is-subproperty-of $(parent-property) : $(properties) ] ;
+}
+
+
+# Given a property set which may consist of composite and implicit properties
+# and combined subfeature values, returns an expanded, normalized property set
+# with all implicit features expressed explicitly, all subfeature values
+# individually expressed, and all components of composite properties expanded.
+# Non-free features directly expressed in the input properties cause any values
+# of those features due to composite feature expansion to be dropped. If two
+# values of a given non-free feature are directly expressed in the input, an
+# error is issued.
+#
+rule expand ( properties * )
+{
+ local expanded = [ expand-subfeatures $(properties) ] ;
+ return [ expand-composites $(expanded) ] ;
+}
+
+
+# Helper rule for minimize. Returns true iff property's feature is present in
+# the contents of the variable named by feature-set-var.
+#
+local rule in-features ( feature-set-var property )
+{
+ if $(property:G) in $($(feature-set-var))
+ {
+ return true ;
+ }
+}
+
+
+# Helper rule for minimize. Returns the list with the same properties, but with
+# all subfeatures moved to the end of the list.
+#
+local rule move-subfeatures-to-the-end ( properties * )
+{
+ local x1 ;
+ local x2 ;
+ for local p in $(properties)
+ {
+ if subfeature in $($(p:G).attributes)
+ {
+ x2 += $(p) ;
+ }
+ else
+ {
+ x1 += $(p) ;
+ }
+ }
+ return $(x1) $(x2) ;
+}
+
+
+# Given an expanded property set, eliminate all redundancy: properties that are
+# elements of other (composite) properties in the set will be eliminated.
+# Non-symmetric properties equal to default values will be eliminated unless
+# they override a value from some composite property. Implicit properties will
+# be expressed without feature grist, and sub-property values will be expressed
+# as elements joined to the corresponding main property.
+#
+rule minimize ( properties * )
+{
+ # Precondition checking
+ local implicits = [ set.intersection $(p:G=) : $(p:G) ] ;
+ if $(implicits)
+ {
+ import errors ;
+ errors.error minimize requires an expanded property set, but
+ \"$(implicits[1])\" appears to be the value of an un-expanded
+ implicit feature ;
+ }
+
+ # Remove properties implied by composite features.
+ local components = $($(properties).components) ;
+ local x = [ set.difference $(properties) : $(components) ] ;
+
+ # Handle subfeatures and implicit features.
+ x = [ move-subfeatures-to-the-end $(x) ] ;
+ local result ;
+ while $(x)
+ {
+ local p fullp = $(x[1]) ;
+ local f = $(p:G) ;
+ local v = $(p:G=) ;
+
+ # Eliminate features in implicit properties.
+ if implicit in [ attributes $(f) ]
+ {
+ p = $(v) ;
+ }
+
+ # Locate all subproperties of $(x[1]) in the property set.
+ local subproperties = [ select-subproperties $(fullp) : $(x) ] ;
+ if $(subproperties)
+ {
+ # Reconstitute the joined property name.
+ local sorted = [ sequence.insertion-sort $(subproperties) ] ;
+ result += $(p)-$(sorted:G="":J=-) ;
+
+ x = [ set.difference $(x[2-]) : $(subproperties) ] ;
+ }
+ else
+ {
+ # Eliminate properties whose value is equal to feature's default,
+ # which are not symmetric and which do not contradict values implied
+ # by composite properties.
+
+ # Since all component properties of composites in the set have been
+ # eliminated, any remaining property whose feature is the same as a
+ # component of a composite in the set must have a non-redundant
+ # value.
+ if $(fullp) != [ defaults $(f) ]
+ || symmetric in [ attributes $(f) ]
+ || $(fullp:G) in $(components:G)
+ {
+ result += $(p) ;
+ }
+
+ x = $(x[2-]) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Combine all subproperties into their parent properties
+#
+# Requires: for every subproperty, there is a parent property. All features are
+# explicitly expressed.
+#
+# This rule probably should not be needed, but build-request.expand-no-defaults
+# is being abused for unintended purposes and it needs help.
+#
+rule compress-subproperties ( properties * )
+{
+ local all-subs ;
+ local matched-subs ;
+ local result ;
+
+ for local p in $(properties)
+ {
+ if ! $(p:G)
+ {
+ # Expecting fully-gristed properties.
+ assert.variable-not-empty p:G ;
+ }
+
+ if ! subfeature in $($(p:G).attributes)
+ {
+ local subs = [ sequence.insertion-sort
+ [ sequence.filter is-subproperty-of $(p) : $(properties) ] ] ;
+
+ matched-subs += $(subs) ;
+
+ local subvalues = -$(subs:G=:J=-) ;
+ subvalues ?= "" ;
+ result += $(p)$(subvalues) ;
+ }
+ else
+ {
+ all-subs += $(p) ;
+ }
+ }
+ assert.result true : set.equal $(all-subs) : $(matched-subs) ;
+ return $(result) ;
+}
+
+
+# Given an ungristed string, finds the longest prefix which is a top-level
+# feature name followed by a dash, and return a pair consisting of the parts
+# before and after that dash. More interesting than a simple split because
+# feature names may contain dashes.
+#
+local rule split-top-feature ( feature-plus )
+{
+ local e = [ regex.split $(feature-plus) - ] ;
+ local f = $(e[1]) ;
+ local v ;
+ while $(e)
+ {
+ if <$(f)> in $(.all-top-features)
+ {
+ v = $(f) $(e[2-]:J=-) ;
+ }
+ e = $(e[2-]) ;
+ f = $(f)-$(e[1]) ;
+ }
+ return $(v) ;
+}
+
+
+# Given a set of properties, add default values for features not represented in
+# the set.
+#
+# Note: if there's an ordinary feature F1 and a composite feature F2 which
+# includes some value for F1 and both feature have default values then the
+# default value of F1 will be added (as opposed to the value in F2). This might
+# not be the right idea, e.g. consider:
+#
+# feature variant : debug ... ;
+# <variant>debug : .... <runtime-debugging>on
+# feature <runtime-debugging> : off on ;
+#
+# Here, when adding default for an empty property set, we'll get
+#
+# <variant>debug <runtime_debugging>off
+#
+# and that's kind of strange.
+#
+rule add-defaults ( properties * )
+{
+ for local v in $(properties:G=)
+ {
+ if $(v) in $(properties)
+ {
+ import errors ;
+ errors.error add-defaults requires explicitly specified features,
+ but \"$(v)\" appears to be the value of an un-expanded implicit
+ feature ;
+ }
+ }
+ # We don't add default for elements with ":" inside. This catches:
+ # 1. Conditional properties --- we don't want <variant>debug:<define>DEBUG
+ # to be takes as specified value for <variant>
+ # 2. Free properties with ":" in values. We don't care, since free
+ # properties don't have defaults.
+ local xproperties = [ MATCH "^([^:]+)$" : $(properties) ] ;
+ local missing-top = [ set.difference $(.all-top-features) : $(xproperties:G) ] ;
+ local more = [ defaults $(missing-top) ] ;
+ properties += $(more) ;
+ xproperties += $(more) ;
+
+ # Add defaults for subfeatures of features which are present.
+ for local p in $(xproperties)
+ {
+ local s = $($(p:G).subfeatures) ;
+ local f = [ utility.ungrist $(p:G) ] ;
+ local missing-subs = [ set.difference <$(f)-$(s)> : $(properties:G) ] ;
+ properties += [ defaults [ select-subfeatures $(p) : $(missing-subs) ] ] ;
+ }
+
+ return $(properties) ;
+}
+
+
+# Given a property-set of the form
+# v1/v2/...vN-1/<fN>vN/<fN+1>vN+1/...<fM>vM
+#
+# Returns
+# v1 v2 ... vN-1 <fN>vN <fN+1>vN+1 ... <fM>vM
+#
+# Note that vN...vM may contain slashes. This needs to be resilient to the
+# substitution of backslashes for slashes, since Jam, unbidden, sometimes swaps
+# slash direction on NT.
+#
+rule split ( property-set )
+{
+ local pieces = [ regex.split $(property-set) [\\/] ] ;
+ local result ;
+
+ for local x in $(pieces)
+ {
+ if ( ! $(x:G) ) && $(result[-1]:G)
+ {
+ result = $(result[1--2]) $(result[-1])/$(x) ;
+ }
+ else
+ {
+ result += $(x) ;
+ }
+ }
+
+ return $(result) ;
+}
+
+
+# Tests of module feature.
+#
+rule __test__ ( )
+{
+ # Use a fresh copy of the feature module.
+ prepare-test feature-test-temp ;
+
+ import assert ;
+ import errors : try catch ;
+
+ # These are local rules and so must be explicitly reimported into the
+ # testing module.
+ import feature : extend-feature validate-feature select-subfeatures ;
+
+ feature toolset : gcc : implicit ;
+ feature define : : free ;
+ feature runtime-link : dynamic static : symmetric ;
+ feature optimization : on off ;
+ feature variant : debug release profile : implicit composite symmetric ;
+ feature stdlib : native stlport ;
+ feature magic : : free ;
+
+ compose <variant>debug : <define>_DEBUG <optimization>off ;
+ compose <variant>release : <define>NDEBUG <optimization>on ;
+
+ assert.result dynamic static : values <runtime-link> ;
+ assert.result dynamic static : values runtime-link ;
+
+ try ;
+ {
+ compose <variant>profile : <variant>profile ;
+ }
+ catch composite property <variant>profile cannot have itself as a component ;
+
+ extend-feature toolset : msvc metrowerks ;
+ subfeature toolset gcc : version : 2.95.2 2.95.3 2.95.4 3.0 3.0.1 3.0.2 ;
+
+ assert.true is-subvalue toolset : gcc : version : 2.95.3 ;
+ assert.false is-subvalue toolset : gcc : version : 1.1 ;
+
+ assert.false is-subvalue toolset : msvc : version : 2.95.3 ;
+ assert.false is-subvalue toolset : : version : yabba ;
+
+ feature yabba ;
+ subfeature yabba : version : dabba ;
+ assert.true is-subvalue yabba : : version : dabba ;
+
+ subfeature toolset gcc : platform : linux cygwin : optional ;
+
+ assert.result <toolset-gcc:version>
+ : select-subfeatures <toolset>gcc
+ : <toolset-gcc:version>
+ <toolset-msvc:version>
+ <toolset-version>
+ <stdlib> ;
+
+ subfeature stdlib : version : 3 4 : optional ;
+
+ assert.result <stdlib-version>
+ : select-subfeatures <stdlib>native
+ : <toolset-gcc:version>
+ <toolset-msvc:version>
+ <toolset-version>
+ <stdlib-version> ;
+
+ assert.result <toolset>gcc <toolset-gcc:version>3.0.1
+ : expand-subfeatures <toolset>gcc-3.0.1 ;
+
+ assert.result <toolset>gcc <toolset-gcc:version>3.0.1 <toolset-gcc:platform>linux
+ : expand-subfeatures <toolset>gcc-3.0.1-linux ;
+
+ assert.result <toolset>gcc <toolset-gcc:version>3.0.1
+ : expand <toolset>gcc <toolset-gcc:version>3.0.1 ;
+
+ assert.result <define>foo=x-y
+ : expand-subfeatures <define>foo=x-y ;
+
+ assert.result <toolset>gcc <toolset-gcc:version>3.0.1
+ : expand-subfeatures gcc-3.0.1 ;
+
+ assert.result a c e
+ : get-values <x> : <x>a <y>b <x>c <y>d <x>e ;
+
+ assert.result <toolset>gcc <toolset-gcc:version>3.0.1
+ <variant>debug <define>_DEBUG <optimization>on
+ : expand gcc-3.0.1 debug <optimization>on ;
+
+ assert.result <variant>debug <define>_DEBUG <optimization>on
+ : expand debug <optimization>on ;
+
+ assert.result <optimization>on <variant>debug <define>_DEBUG
+ : expand <optimization>on debug ;
+
+ assert.result <runtime-link>dynamic <optimization>on
+ : defaults <runtime-link> <define> <optimization> ;
+
+ # Make sure defaults is resilient to missing grist.
+ assert.result <runtime-link>dynamic <optimization>on
+ : defaults runtime-link define optimization ;
+
+ feature dummy : dummy1 dummy2 ;
+ subfeature dummy : subdummy : x y z : optional ;
+
+ feature fu : fu1 fu2 : optional ;
+ subfeature fu : subfu : x y z : optional ;
+ subfeature fu : subfu2 : q r s ;
+
+ assert.result optional : attributes <fu> ;
+
+ assert.result <runtime-link>static <define>foobar <optimization>on
+ <toolset>gcc:<define>FOO <toolset>gcc <variant>debug <stdlib>native
+ <dummy>dummy1 <toolset-gcc:version>2.95.2
+ : add-defaults <runtime-link>static <define>foobar <optimization>on
+ <toolset>gcc:<define>FOO ;
+
+ assert.result <runtime-link>static <define>foobar <optimization>on
+ <toolset>gcc:<define>FOO <fu>fu1 <toolset>gcc <variant>debug
+ <stdlib>native <dummy>dummy1 <fu-subfu2>q <toolset-gcc:version>2.95.2
+ : add-defaults <runtime-link>static <define>foobar <optimization>on
+ <toolset>gcc:<define>FOO <fu>fu1 ;
+
+ set-default <runtime-link> : static ;
+ assert.result <runtime-link>static : defaults <runtime-link> ;
+
+ assert.result gcc-3.0.1 debug <optimization>on
+ : minimize [ expand gcc-3.0.1 debug <optimization>on <stdlib>native ] ;
+
+ assert.result gcc-3.0.1 debug <runtime-link>dynamic
+ : minimize
+ [ expand gcc-3.0.1 debug <optimization>off <runtime-link>dynamic ] ;
+
+ assert.result gcc-3.0.1 debug
+ : minimize [ expand gcc-3.0.1 debug <optimization>off ] ;
+
+ assert.result debug <optimization>on
+ : minimize [ expand debug <optimization>on ] ;
+
+ assert.result gcc-3.0
+ : minimize <toolset>gcc <toolset-gcc:version>3.0 ;
+
+ assert.result gcc-3.0
+ : minimize <toolset-gcc:version>3.0 <toolset>gcc ;
+
+ assert.result <x>y/z <a>b/c <d>e/f
+ : split <x>y/z/<a>b/c/<d>e/f ;
+
+ assert.result <x>y/z <a>b/c <d>e/f
+ : split <x>y\\z\\<a>b\\c\\<d>e\\f ;
+
+ assert.result a b c <d>e/f/g <h>i/j/k
+ : split a/b/c/<d>e/f/g/<h>i/j/k ;
+
+ assert.result a b c <d>e/f/g <h>i/j/k
+ : split a\\b\\c\\<d>e\\f\\g\\<h>i\\j\\k ;
+
+ # Test error checking.
+
+ try ;
+ {
+ expand release <optimization>off <optimization>on ;
+ }
+ catch explicitly-specified values of non-free feature <optimization> conflict ;
+
+ try ;
+ {
+ validate-feature <foobar> ;
+ }
+ catch unknown feature ;
+
+ validate-value-string <toolset> gcc ;
+ validate-value-string <toolset> gcc-3.0.1 ;
+
+ try ;
+ {
+ validate-value-string <toolset> digital_mars ;
+ }
+ catch \"digital_mars\" is not a known value of <toolset> ;
+
+ try ;
+ {
+ feature foobar : : baz ;
+ }
+ catch unknown attributes: baz ;
+
+ feature feature1 ;
+ try ;
+ {
+ feature feature1 ;
+ }
+ catch feature already defined: ;
+
+ try ;
+ {
+ feature feature2 : : free implicit ;
+ }
+ catch free features cannot also be implicit ;
+
+ try ;
+ {
+ feature feature3 : : free propagated ;
+ }
+ catch free features cannot be propagated ;
+
+ try ;
+ {
+ implied-feature lackluster ;
+ }
+ catch \"lackluster\" is not an implicit feature value ;
+
+ try ;
+ {
+ implied-subfeature <toolset> 3.0.1 ;
+ }
+ catch \"3.0.1\" is not a known subfeature value of <toolset> ;
+
+ try ;
+ {
+ implied-subfeature <toolset> not-a-version : gcc ;
+ }
+ catch \"not-a-version\" is not a known subfeature value of <toolset>gcc ;
+
+ # Leave a clean copy of the features module behind.
+ finish-test feature-test-temp ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/generators.jam b/src/kenlm/jam-files/boost-build/build/generators.jam
new file mode 100644
index 0000000..ec7183a
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/generators.jam
@@ -0,0 +1,1420 @@
+# Copyright 2002. Vladimir Prus
+# Copyright 2006. Rene Rivera
+#
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Manages 'generators' --- objects which can do transformation between different
+# target types and contain algorithm for finding transformation from sources to
+# targets.
+#
+# The main entry point to this module is generators.construct rule. It is given
+# a list of source targets, desired target type and a set of properties. It
+# starts by selecting 'viable generators', which have any chances of producing
+# the desired target type with the required properties. Generators are ranked
+# and a set of the most specific ones is selected.
+#
+# The most specific generators have their 'run' methods called, with the
+# properties and list of sources. Each one selects a target which can be
+# directly consumed, and tries to convert the remaining ones to the types it can
+# consume. This is done by recursively calling 'construct' with all consumable
+# types.
+#
+# If the generator has collected all the targets it needs, it creates targets
+# corresponding to result, and returns it. When all generators have been run,
+# results of one of them are selected and returned as a result.
+#
+# It is quite possible for 'construct' to return more targets that it was asked
+# for. For example, if it were asked to generate a target of type EXE, but the
+# only found generator produces both EXE and TDS (file with debug) information.
+# The extra target will be returned.
+#
+# Likewise, when generator tries to convert sources to consumable types, it can
+# get more targets that it was asked for. The question is what to do with extra
+# targets. Boost.Build attempts to convert them to requested types, and attempts
+# that as early as possible. Specifically, this is done after invoking each
+# generator. TODO: An example is needed to document the rationale for trying
+# extra target conversion at that point.
+#
+# In order for the system to be able to use a specific generator instance 'when
+# needed', the instance needs to be registered with the system using
+# generators.register() or one of its related rules. Unregistered generators may
+# only be run explicitly and will not be considered by Boost.Build when when
+# converting between given target types.
+
+import "class" : new ;
+import property-set ;
+import sequence ;
+import set ;
+import type ;
+import utility ;
+import virtual-target ;
+
+
+if "--debug-generators" in [ modules.peek : ARGV ]
+{
+ .debug = true ;
+}
+
+
+# Updated cached viable source target type information as needed after a new
+# target type gets defined. This is needed because if a target type is a viable
+# source target type for some generator then all of the target type's derived
+# target types should automatically be considered as viable source target types
+# for the same generator as well. Does nothing if a non-derived target type is
+# passed to it.
+#
+rule update-cached-information-with-a-new-type ( type )
+{
+ local base-type = [ type.base $(type) ] ;
+ if $(base-type)
+ {
+ for local g in $(.vstg-cached-generators)
+ {
+ if $(base-type) in $(.vstg.$(g))
+ {
+ .vstg.$(g) += $(type) ;
+ }
+ }
+
+ for local t in $(.vst-cached-types)
+ {
+ if $(base-type) in $(.vst.$(t))
+ {
+ .vst.$(t) += $(type) ;
+ }
+ }
+ }
+}
+
+
+# Clears cached viable source target type information except for target types
+# and generators with all source types listed as viable. Should be called when
+# something invalidates those cached values by possibly causing some new source
+# types to become viable.
+#
+local rule invalidate-extendable-viable-source-target-type-cache ( )
+{
+ local generators-with-cached-source-types = $(.vstg-cached-generators) ;
+ .vstg-cached-generators = ;
+ for local g in $(generators-with-cached-source-types)
+ {
+ if $(.vstg.$(g)) = *
+ {
+ .vstg-cached-generators += $(g) ;
+ }
+ else
+ {
+ .vstg.$(g) = ;
+ }
+ }
+
+ local types-with-cached-source-types = $(.vst-cached-types) ;
+ .vst-cached-types = ;
+ for local t in $(types-with-cached-source-types)
+ {
+ if $(.vst.$(t)) = *
+ {
+ .vst-cached-types += $(t) ;
+ }
+ else
+ {
+ .vst.$(t) = ;
+ }
+ }
+}
+
+
+# Outputs a debug message if generators debugging is on. Each element of
+# 'message' is checked to see if it is a class instance. If so, instead of the
+# value, the result of 'str' call is output.
+#
+local rule generators.dout ( message * )
+{
+ if $(.debug)
+ {
+ ECHO [ sequence.transform utility.str : $(message) ] ;
+ }
+}
+
+
+local rule indent ( )
+{
+ return $(.indent:J="") ;
+}
+
+
+local rule increase-indent ( )
+{
+ .indent += " " ;
+}
+
+
+local rule decrease-indent ( )
+{
+ .indent = $(.indent[2-]) ;
+}
+
+
+# Models a generator.
+#
+class generator
+{
+ import "class" : new ;
+ import feature ;
+ import generators : indent increase-indent decrease-indent generators.dout ;
+ import utility ;
+ import path ;
+ import property ;
+ import sequence ;
+ import set ;
+ import type ;
+ import virtual-target ;
+
+ EXPORT class@generator : indent increase-indent decrease-indent
+ generators.dout ;
+
+ rule __init__ (
+ id # Identifies the generator - should be name
+ # of the rule which sets up the build
+ # actions.
+
+ composing ? # Whether generator processes each source
+ # target in turn, converting it to required
+ # types. Ordinary generators pass all
+ # sources together to the recursive
+ # generators.construct-types call.
+
+ : source-types * # Types that this generator can handle. If
+ # empty, the generator can consume anything.
+
+ : target-types-and-names + # Types the generator will create and,
+ # optionally, names for created targets.
+ # Each element should have the form
+ # type["(" name-pattern ")"], for example,
+ # obj(%_x). Generated target name will be
+ # found by replacing % with the name of
+ # source, provided an explicit name was not
+ # specified.
+
+ : requirements *
+ )
+ {
+ self.id = $(id) ;
+ self.rule-name = $(id) ;
+ self.composing = $(composing) ;
+ self.source-types = $(source-types) ;
+ self.target-types-and-names = $(target-types-and-names) ;
+ self.requirements = $(requirements) ;
+
+ for local e in $(target-types-and-names)
+ {
+ # Create three parallel lists: one with the list of target types,
+ # and two other with prefixes and postfixes to be added to target
+ # name. We use parallel lists for prefix and postfix (as opposed to
+ # mapping), because given target type might occur several times, for
+ # example "H H(%_symbols)".
+ local m = [ MATCH ([^\\(]*)(\\((.*)%(.*)\\))? : $(e) ] ;
+ self.target-types += $(m[1]) ;
+ self.name-prefix += $(m[3]:E="") ;
+ self.name-postfix += $(m[4]:E="") ;
+ }
+
+ for local r in [ requirements ]
+ {
+ if $(r:G=)
+ {
+ self.property-requirements += $(r) ;
+ }
+ else
+ {
+ self.feature-requirements += $(r) ;
+ }
+ }
+
+ # Note that 'transform' here, is the same as 'for_each'.
+ sequence.transform type.validate : $(self.source-types) ;
+ sequence.transform type.validate : $(self.target-types) ;
+ }
+
+ ################# End of constructor #################
+
+ rule id ( )
+ {
+ return $(self.id) ;
+ }
+
+ # Returns the list of target type the generator accepts.
+ #
+ rule source-types ( )
+ {
+ return $(self.source-types) ;
+ }
+
+ # Returns the list of target types that this generator produces. It is
+ # assumed to be always the same -- i.e. it can not change depending on some
+ # provided list of sources.
+ #
+ rule target-types ( )
+ {
+ return $(self.target-types) ;
+ }
+
+ # Returns the required properties for this generator. Properties in returned
+ # set must be present in build properties if this generator is to be used.
+ # If result has grist-only element, that build properties must include some
+ # value of that feature.
+ #
+ # XXX: remove this method?
+ #
+ rule requirements ( )
+ {
+ return $(self.requirements) ;
+ }
+
+ rule set-rule-name ( rule-name )
+ {
+ self.rule-name = $(rule-name) ;
+ }
+
+ rule rule-name ( )
+ {
+ return $(self.rule-name) ;
+ }
+
+ # Returns a true value if the generator can be run with the specified
+ # properties.
+ #
+ rule match-rank ( property-set-to-match )
+ {
+ # See if generator requirements are satisfied by 'properties'. Treat a
+ # feature name in requirements (i.e. grist-only element), as matching
+ # any value of the feature.
+
+ if [ $(property-set-to-match).contains-raw $(self.property-requirements) ] &&
+ [ $(property-set-to-match).contains-features $(self.feature-requirements) ]
+ {
+ return true ;
+ }
+ else
+ {
+ return ;
+ }
+ }
+
+ # Returns another generator which differs from $(self) in
+ # - id
+ # - value to <toolset> feature in properties
+ #
+ rule clone ( new-id : new-toolset-properties + )
+ {
+ local g = [ new $(__class__) $(new-id) $(self.composing) :
+ $(self.source-types) : $(self.target-types-and-names) :
+ # Note: this does not remove any subfeatures of <toolset> which
+ # might cause problems.
+ [ property.change $(self.requirements) : <toolset> ]
+ $(new-toolset-properties) ] ;
+ return $(g) ;
+ }
+
+ # Creates another generator that is the same as $(self), except that if
+ # 'base' is in target types of $(self), 'type' will in target types of the
+ # new generator.
+ #
+ rule clone-and-change-target-type ( base : type )
+ {
+ local target-types ;
+ for local t in $(self.target-types-and-names)
+ {
+ local m = [ MATCH ([^\\(]*)(\\(.*\\))? : $(t) ] ;
+ if $(m) = $(base)
+ {
+ target-types += $(type)$(m[2]:E="") ;
+ }
+ else
+ {
+ target-types += $(t) ;
+ }
+ }
+
+ local g = [ new $(__class__) $(self.id) $(self.composing) :
+ $(self.source-types) : $(target-types) : $(self.requirements) ] ;
+ if $(self.rule-name)
+ {
+ $(g).set-rule-name $(self.rule-name) ;
+ }
+ return $(g) ;
+ }
+
+ # Tries to invoke this generator on the given sources. Returns a list of
+ # generated targets (instances of 'virtual-target') and optionally a set of
+ # properties to be added to the usage-requirements for all the generated
+ # targets. Returning nothing from run indicates that the generator was
+ # unable to create the target.
+ #
+ rule run
+ (
+ project # Project for which the targets are generated.
+ name ? # Used when determining the 'name' attribute for all
+ # generated targets. See the 'generated-targets' method.
+ : property-set # Desired properties for generated targets.
+ : sources + # Source targets.
+ )
+ {
+ generators.dout [ indent ] " ** generator" $(self.id) ;
+ generators.dout [ indent ] " composing:" $(self.composing) ;
+
+ if ! $(self.composing) && $(sources[2]) && $(self.source-types[2])
+ {
+ import errors : error : errors.error ;
+ errors.error "Unsupported source/source-type combination" ;
+ }
+
+ # We do not run composing generators if no name is specified. The reason
+ # is that composing generator combines several targets, which can have
+ # different names, and it cannot decide which name to give for produced
+ # target. Therefore, the name must be passed.
+ #
+ # This in effect, means that composing generators are runnable only at
+ # the top-level of a transformation graph, or if their name is passed
+ # explicitly. Thus, we dissallow composing generators in the middle. For
+ # example, the transformation CPP -> OBJ -> STATIC_LIB -> RSP -> EXE
+ # will not be allowed as the OBJ -> STATIC_LIB generator is composing.
+ if ! $(self.composing) || $(name)
+ {
+ run-really $(project) $(name) : $(property-set) : $(sources) ;
+ }
+ }
+
+ rule run-really ( project name ? : property-set : sources + )
+ {
+ # Targets that this generator will consume directly.
+ local consumed = ;
+ # Targets that can not be consumed and will be returned as-is.
+ local bypassed = ;
+
+ if $(self.composing)
+ {
+ consumed = [ convert-multiple-sources-to-consumable-types $(project)
+ : $(property-set) : $(sources) ] ;
+ }
+ else
+ {
+ consumed = [ convert-to-consumable-types $(project) $(name)
+ : $(property-set) : $(sources) ] ;
+ }
+
+ local result ;
+ if $(consumed)
+ {
+ result = [ construct-result $(consumed) : $(project) $(name) :
+ $(property-set) ] ;
+ }
+
+ if $(result)
+ {
+ generators.dout [ indent ] " SUCCESS: " $(result) ;
+ }
+ else
+ {
+ generators.dout [ indent ] " FAILURE" ;
+ }
+ generators.dout ;
+ return $(result) ;
+ }
+
+ # Constructs the dependency graph to be returned by this generator.
+ #
+ rule construct-result
+ (
+ consumed + # Already prepared list of consumable targets.
+ # Composing generators may receive multiple sources
+ # all of which will have types matching those in
+ # $(self.source-types). Non-composing generators with
+ # multiple $(self.source-types) will receive exactly
+ # len $(self.source-types) sources with types matching
+ # those in $(self.source-types). And non-composing
+ # generators with only a single source type may
+ # receive multiple sources with all of them of the
+ # type listed in $(self.source-types).
+ : project name ?
+ : property-set # Properties to be used for all actions created here.
+ )
+ {
+ local result ;
+ # If this is a 1->1 transformation, apply it to all consumed targets in
+ # order.
+ if ! $(self.source-types[2]) && ! $(self.composing)
+ {
+ for local r in $(consumed)
+ {
+ result += [ generated-targets $(r) : $(property-set) :
+ $(project) $(name) ] ;
+ }
+ }
+ else if $(consumed)
+ {
+ result += [ generated-targets $(consumed) : $(property-set) :
+ $(project) $(name) ] ;
+ }
+ return $(result) ;
+ }
+
+ # Determine target name from fullname (maybe including path components)
+ # Place optional prefix and postfix around basename
+ #
+ rule determine-target-name ( fullname : prefix ? : postfix ? )
+ {
+ # See if we need to add directory to the target name.
+ local dir = $(fullname:D) ;
+ local name = $(fullname:B) ;
+
+ name = $(prefix:E=)$(name) ;
+ name = $(name)$(postfix:E=) ;
+
+ if $(dir)
+ # Never append '..' to target path.
+ && ! [ MATCH .*(\\.\\.).* : $(dir) ]
+ && ! [ path.is-rooted $(dir) ]
+ {
+ # Relative path is always relative to the source directory. Retain
+ # it, so that users can have files with the same name in two
+ # different subdirectories.
+ name = $(dir)/$(name) ;
+ }
+ return $(name) ;
+ }
+
+ # Determine the name of the produced target from the names of the sources.
+ #
+ rule determine-output-name ( sources + )
+ {
+ # The simple case if when a name of source has single dot. Then, we take
+ # the part before dot. Several dots can be caused by:
+ # - using source file like a.host.cpp, or
+ # - a type whose suffix has a dot. Say, we can type 'host_cpp' with
+ # extension 'host.cpp'.
+ # In the first case, we want to take the part up to the last dot. In the
+ # second case -- not sure, but for now take the part up to the last dot
+ # too.
+ name = [ utility.basename [ $(sources[1]).name ] ] ;
+ for local s in $(sources[2-])
+ {
+ if [ utility.basename [ $(s).name ] ] != $(name)
+ {
+ import errors : error : errors.error ;
+ errors.error "$(self.id): source targets have different names: cannot determine target name" ;
+ }
+ }
+ return [ determine-target-name [ $(sources[1]).name ] ] ;
+ }
+
+ # Constructs targets that are created after consuming 'sources'. The result
+ # will be the list of virtual-target, which has the same length as the
+ # 'target-types' attribute and with corresponding types.
+ #
+ # When 'name' is empty, all source targets must have the same 'name'
+ # attribute value, which will be used instead of the 'name' argument.
+ #
+ # The 'name' attribute value for each generated target will be equal to the
+ # 'name' parameter if there is no name pattern for this type. Otherwise, the
+ # '%' symbol in the name pattern will be replaced with the 'name' parameter
+ # to obtain the 'name' attribute.
+ #
+ # For example, if targets types are T1 and T2 (with name pattern "%_x"),
+ # suffixes for T1 and T2 are .t1 and .t2, and source is foo.z, then created
+ # files would be "foo.t1" and "foo_x.t2". The 'name' attribute actually
+ # determines the basename of a file.
+ #
+ # Note that this pattern mechanism has nothing to do with implicit patterns
+ # in make. It is a way to produce a target whose name is different than the
+ # name of its source.
+ #
+ rule generated-targets ( sources + : property-set : project name ? )
+ {
+ if ! $(name)
+ {
+ name = [ determine-output-name $(sources) ] ;
+ }
+
+ # Assign an action for each target.
+ local action = [ action-class ] ;
+ local a = [ class.new $(action) $(sources) : $(self.rule-name) :
+ $(property-set) ] ;
+
+ # Create generated target for each target type.
+ local targets ;
+ local pre = $(self.name-prefix) ;
+ local post = $(self.name-postfix) ;
+ for local t in $(self.target-types)
+ {
+ local generated-name = $(pre[1])$(name:BS)$(post[1]) ;
+ generated-name = $(generated-name:R=$(name:D)) ;
+ pre = $(pre[2-]) ;
+ post = $(post[2-]) ;
+
+ targets += [ class.new file-target $(generated-name) : $(t) :
+ $(project) : $(a) ] ;
+ }
+
+ return [ sequence.transform virtual-target.register : $(targets) ] ;
+ }
+
+ # Attempts to convert 'sources' to targets of types that this generator can
+ # handle. The intention is to produce the set of targets that can be used
+ # when the generator is run.
+ #
+ rule convert-to-consumable-types
+ (
+ project name ?
+ : property-set
+ : sources +
+ : only-one ? # Convert 'source' to only one of the source types. If
+ # there is more that one possibility, report an error.
+ )
+ {
+ local _consumed ;
+ local missing-types ;
+
+ if $(sources[2])
+ {
+ # Do not know how to handle several sources yet. Just try to pass
+ # the request to other generator.
+ missing-types = $(self.source-types) ;
+ }
+ else
+ {
+ local temp = [ consume-directly $(sources) ] ;
+ if $(temp[1])
+ {
+ _consumed = $(temp[1]) ;
+ }
+ missing-types = $(temp[2-]) ;
+ }
+
+ # No need to search for transformation if some source type has consumed
+ # source and no more source types are needed.
+ if $(only-one) && $(_consumed)
+ {
+ missing-types = ;
+ }
+
+ # TODO: we should check that only one source type is created if
+ # 'only-one' is true.
+
+ if $(missing-types)
+ {
+ local transformed = [ generators.construct-types $(project) $(name)
+ : $(missing-types) : $(property-set) : $(sources) ] ;
+
+ # Add targets of right type to 'consumed'. Add others to 'bypassed'.
+ # The 'generators.construct' rule has done its best to convert
+ # everything to the required type. There is no need to rerun it on
+ # targets of different types.
+
+ # NOTE: ignoring usage requirements.
+ for local t in $(transformed[2-])
+ {
+ if [ $(t).type ] in $(missing-types)
+ {
+ _consumed += $(t) ;
+ }
+ }
+ }
+
+ return [ sequence.unique $(_consumed) ] ;
+ }
+
+ # Converts several files to consumable types. Called for composing
+ # generators only.
+ #
+ rule convert-multiple-sources-to-consumable-types ( project : property-set :
+ sources * )
+ {
+ local result ;
+ # We process each source one-by-one, trying to convert it to a usable
+ # type.
+ if ! $(self.source-types)
+ {
+ # Anything is acceptible
+ return $(sources) ;
+ }
+ else
+ {
+ local acceptible-types = [ sequence.unique
+ [ sequence.transform type.all-derived : $(self.source-types) ] ] ;
+ for local source in $(sources)
+ {
+ if ! [ $(source).type ] in $(acceptible-types)
+ {
+ local transformed = [ generators.construct-types $(project)
+ : $(self.source-types) : $(property-set) : $(source) ] ;
+ for local t in $(transformed[2-])
+ {
+ if [ $(t).type ] in $(self.source-types)
+ {
+ result += $(t) ;
+ }
+ }
+ if ! $(transformed)
+ {
+ generators.dout [ indent ] " failed to convert " $(source) ;
+ }
+ }
+ else
+ {
+ result += $(source) ;
+ }
+ }
+ return [ sequence.unique $(result) ] ;
+ }
+ }
+
+ rule consume-directly ( source )
+ {
+ local real-source-type = [ $(source).type ] ;
+
+ # If there are no source types, we can consume anything.
+ local source-types = $(self.source-types) ;
+ source-types ?= $(real-source-type) ;
+
+ local result = "" ;
+ local missing-types ;
+
+ for local st in $(source-types)
+ {
+ # The 'source' if of the right type already.
+ if $(real-source-type) = $(st) || [ type.is-derived
+ $(real-source-type) $(st) ]
+ {
+ result = $(source) ;
+ }
+ else
+ {
+ missing-types += $(st) ;
+ }
+ }
+ return $(result) $(missing-types) ;
+ }
+
+ # Returns the class to be used to actions. Default implementation returns
+ # "action".
+ #
+ rule action-class ( )
+ {
+ return "action" ;
+ }
+}
+
+
+# Registers a new generator instance 'g'.
+#
+rule register ( g )
+{
+ .all-generators += $(g) ;
+
+ # A generator can produce several targets of the same type. We want unique
+ # occurrence of that generator in .generators.$(t) in that case, otherwise,
+ # it will be tried twice and we will get a false ambiguity.
+ for local t in [ sequence.unique [ $(g).target-types ] ]
+ {
+ .generators.$(t) += $(g) ;
+ }
+
+ # Update the set of generators for toolset.
+
+ # TODO: should we check that generator with this id is not already
+ # registered. For example, the fop.jam module intentionally declared two
+ # generators with the same id, so such check will break it.
+ local id = [ $(g).id ] ;
+
+ # Some generators have multiple periods in their name, so a simple $(id:S=)
+ # will not generate the right toolset name. E.g. if id = gcc.compile.c++,
+ # then .generators-for-toolset.$(id:S=) will append to
+ # .generators-for-toolset.gcc.compile, which is a separate value from
+ # .generators-for-toolset.gcc. Correcting this makes generator inheritance
+ # work properly. See also inherit-generators in the toolset module.
+ local base = $(id) ;
+ while $(base:S)
+ {
+ base = $(base:B) ;
+ }
+ .generators-for-toolset.$(base) += $(g) ;
+
+
+ # After adding a new generator that can construct new target types, we need
+ # to clear the related cached viable source target type information for
+ # constructing a specific target type or using a specific generator. Cached
+ # viable source target type lists affected by this are those containing any
+ # of the target types constructed by the new generator or any of their base
+ # target types.
+ #
+ # A more advanced alternative to clearing that cached viable source target
+ # type information would be to expand it with additional source types or
+ # even better - mark it as needing to be expanded on next use.
+ #
+ # Also see the http://thread.gmane.org/gmane.comp.lib.boost.build/19077
+ # mailing list thread for an even more advanced idea of how we could convert
+ # Boost Build's Jamfile processing, target selection and generator selection
+ # into separate steps which would prevent these caches from ever being
+ # invalidated.
+ #
+ # For now we just clear all the cached viable source target type information
+ # that does not simply state 'all types' and may implement a more detailed
+ # algorithm later on if it becomes needed.
+
+ invalidate-extendable-viable-source-target-type-cache ;
+}
+
+
+# Creates a new non-composing 'generator' class instance and registers it.
+# Returns the created instance. Rationale: the instance is returned so that it
+# is possible to first register a generator and then call its 'run' method,
+# bypassing the whole generator selection process.
+#
+rule register-standard ( id : source-types * : target-types + : requirements * )
+{
+ local g = [ new generator $(id) : $(source-types) : $(target-types) :
+ $(requirements) ] ;
+ register $(g) ;
+ return $(g) ;
+}
+
+
+# Creates a new composing 'generator' class instance and registers it.
+#
+rule register-composing ( id : source-types * : target-types + : requirements *
+ )
+{
+ local g = [ new generator $(id) true : $(source-types) : $(target-types) :
+ $(requirements) ] ;
+ register $(g) ;
+ return $(g) ;
+}
+
+
+# Returns all generators belonging to the given 'toolset', i.e. whose ids are
+# '$(toolset).<something>'.
+#
+rule generators-for-toolset ( toolset )
+{
+ return $(.generators-for-toolset.$(toolset)) ;
+}
+
+
+# Make generator 'overrider-id' be preferred to 'overridee-id'. If, when
+# searching for generators that could produce a target of a certain type, both
+# those generators are among viable generators, the overridden generator is
+# immediately discarded.
+#
+# The overridden generators are discarded immediately after computing the list
+# of viable generators but before running any of them.
+#
+rule override ( overrider-id : overridee-id )
+{
+ .override.$(overrider-id) += $(overridee-id) ;
+}
+
+
+# Returns a list of source type which can possibly be converted to 'target-type'
+# by some chain of generator invocation.
+#
+# More formally, takes all generators for 'target-type' and returns a union of
+# source types for those generators and result of calling itself recursively on
+# source types.
+#
+# Returns '*' in case any type should be considered a viable source type for the
+# given type.
+#
+local rule viable-source-types-real ( target-type )
+{
+ local result ;
+
+ # 't0' is the initial list of target types we need to process to get a list
+ # of their viable source target types. New target types will not be added to
+ # this list.
+ local t0 = [ type.all-bases $(target-type) ] ;
+
+ # 't' is the list of target types which have not yet been processed to get a
+ # list of their viable source target types. This list will get expanded as
+ # we locate more target types to process.
+ local t = $(t0) ;
+
+ while $(t)
+ {
+ # Find all generators for the current type. Unlike
+ # 'find-viable-generators' we do not care about the property-set.
+ local generators = $(.generators.$(t[1])) ;
+ t = $(t[2-]) ;
+
+ while $(generators)
+ {
+ local g = $(generators[1]) ;
+ generators = $(generators[2-]) ;
+
+ if ! [ $(g).source-types ]
+ {
+ # Empty source types -- everything can be accepted.
+ result = * ;
+ # This will terminate this loop.
+ generators = ;
+ # This will terminate the outer loop.
+ t = ;
+ }
+
+ for local source-type in [ $(g).source-types ]
+ {
+ if ! $(source-type) in $(result)
+ {
+ # If a generator accepts a 'source-type' it will also
+ # happily accept any type derived from it.
+ for local n in [ type.all-derived $(source-type) ]
+ {
+ if ! $(n) in $(result)
+ {
+ # Here there is no point in adding target types to
+ # the list of types to process in case they are or
+ # have already been on that list. We optimize this
+ # check by realizing that we only need to avoid the
+ # original target type's base types. Other target
+ # types that are or have been on the list of target
+ # types to process have been added to the 'result'
+ # list as well and have thus already been eliminated
+ # by the previous if.
+ if ! $(n) in $(t0)
+ {
+ t += $(n) ;
+ }
+ result += $(n) ;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return $(result) ;
+}
+
+
+# Helper rule, caches the result of 'viable-source-types-real'.
+#
+rule viable-source-types ( target-type )
+{
+ local key = .vst.$(target-type) ;
+ if ! $($(key))
+ {
+ .vst-cached-types += $(target-type) ;
+ local v = [ viable-source-types-real $(target-type) ] ;
+ if ! $(v)
+ {
+ v = none ;
+ }
+ $(key) = $(v) ;
+ }
+
+ if $($(key)) != none
+ {
+ return $($(key)) ;
+ }
+}
+
+
+# Returns the list of source types, which, when passed to 'run' method of
+# 'generator', has some change of being eventually used (probably after
+# conversion by other generators).
+#
+# Returns '*' in case any type should be considered a viable source type for the
+# given generator.
+#
+rule viable-source-types-for-generator-real ( generator )
+{
+ local source-types = [ $(generator).source-types ] ;
+ if ! $(source-types)
+ {
+ # If generator does not specify any source types, it might be a special
+ # generator like builtin.lib-generator which just relays to other
+ # generators. Return '*' to indicate that any source type is possibly
+ # OK, since we do not know for sure.
+ return * ;
+ }
+ else
+ {
+ local result ;
+ while $(source-types)
+ {
+ local s = $(source-types[1]) ;
+ source-types = $(source-types[2-]) ;
+ local viable-sources = [ generators.viable-source-types $(s) ] ;
+ if $(viable-sources) = *
+ {
+ result = * ;
+ source-types = ; # Terminate the loop.
+ }
+ else
+ {
+ result += [ type.all-derived $(s) ] $(viable-sources) ;
+ }
+ }
+ return [ sequence.unique $(result) ] ;
+ }
+}
+
+
+# Helper rule, caches the result of 'viable-source-types-for-generator'.
+#
+local rule viable-source-types-for-generator ( generator )
+{
+ local key = .vstg.$(generator) ;
+ if ! $($(key))
+ {
+ .vstg-cached-generators += $(generator) ;
+ local v = [ viable-source-types-for-generator-real $(generator) ] ;
+ if ! $(v)
+ {
+ v = none ;
+ }
+ $(key) = $(v) ;
+ }
+
+ if $($(key)) != none
+ {
+ return $($(key)) ;
+ }
+}
+
+
+# Returns usage requirements + list of created targets.
+#
+local rule try-one-generator-really ( project name ? : generator : target-type
+ : property-set : sources * )
+{
+ local targets =
+ [ $(generator).run $(project) $(name) : $(property-set) : $(sources) ] ;
+
+ local usage-requirements ;
+ local success ;
+
+ generators.dout [ indent ] returned $(targets) ;
+
+ if $(targets)
+ {
+ success = true ;
+
+ if [ class.is-a $(targets[1]) : property-set ]
+ {
+ usage-requirements = $(targets[1]) ;
+ targets = $(targets[2-]) ;
+ }
+ else
+ {
+ usage-requirements = [ property-set.empty ] ;
+ }
+ }
+
+ generators.dout [ indent ] " generator" [ $(generator).id ] " spawned " ;
+ generators.dout [ indent ] " " $(targets) ;
+ if $(usage-requirements)
+ {
+ generators.dout [ indent ] " with usage requirements:" $(x) ;
+ }
+
+ if $(success)
+ {
+ return $(usage-requirements) $(targets) ;
+ }
+}
+
+
+# Checks if generator invocation can be pruned, because it is guaranteed to
+# fail. If so, quickly returns an empty list. Otherwise, calls
+# try-one-generator-really.
+#
+local rule try-one-generator ( project name ? : generator : target-type
+ : property-set : sources * )
+{
+ local source-types ;
+ for local s in $(sources)
+ {
+ source-types += [ $(s).type ] ;
+ }
+ local viable-source-types = [ viable-source-types-for-generator $(generator)
+ ] ;
+
+ if $(source-types) && $(viable-source-types) != * &&
+ ! [ set.intersection $(source-types) : $(viable-source-types) ]
+ {
+ local id = [ $(generator).id ] ;
+ generators.dout [ indent ] " ** generator '$(id)' pruned" ;
+ #generators.dout [ indent ] "source-types" '$(source-types)' ;
+ #generators.dout [ indent ] "viable-source-types" '$(viable-source-types)' ;
+ }
+ else
+ {
+ return [ try-one-generator-really $(project) $(name) : $(generator) :
+ $(target-type) : $(property-set) : $(sources) ] ;
+ }
+}
+
+
+rule construct-types ( project name ? : target-types + : property-set
+ : sources + )
+{
+ local result ;
+ local usage-requirements = [ property-set.empty ] ;
+ for local t in $(target-types)
+ {
+ local r = [ construct $(project) $(name) : $(t) : $(property-set) :
+ $(sources) ] ;
+ if $(r)
+ {
+ usage-requirements = [ $(usage-requirements).add $(r[1]) ] ;
+ result += $(r[2-]) ;
+ }
+ }
+ # TODO: have to introduce parameter controlling if several types can be
+ # matched and add appropriate checks.
+
+ # TODO: need to review the documentation for 'construct' to see if it should
+ # return $(source) even if nothing can be done with it. Currents docs seem
+ # to imply that, contrary to the behaviour.
+ if $(result)
+ {
+ return $(usage-requirements) $(result) ;
+ }
+ else
+ {
+ return $(usage-requirements) $(sources) ;
+ }
+}
+
+
+# Ensures all 'targets' have their type. If this is not so, exists with error.
+#
+local rule ensure-type ( targets * )
+{
+ for local t in $(targets)
+ {
+ if ! [ $(t).type ]
+ {
+ import errors ;
+ errors.error "target" [ $(t).str ] "has no type" ;
+ }
+ }
+}
+
+
+# Returns generators which can be used to construct target of specified type
+# with specified properties. Uses the following algorithm:
+# - iterates over requested target-type and all its bases (in the order returned
+# by type.all-bases).
+# - for each type find all generators that generate that type and whose
+# requirements are satisfied by properties.
+# - if the set of generators is not empty, returns that set.
+#
+# Note: this algorithm explicitly ignores generators for base classes if there
+# is at least one generator for the requested target-type.
+#
+local rule find-viable-generators-aux ( target-type : property-set )
+{
+ # Select generators that can create the required target type.
+ local viable-generators = ;
+
+ import type ;
+ local t = $(target-type) ;
+
+ if $(.debug)
+ {
+ generators.dout [ indent ] find-viable-generators target-type= $(target-type)
+ property-set= [ $(property-set).as-path ] ;
+ generators.dout [ indent ] "trying type" $(target-type) ;
+ }
+
+ local generators = $(.generators.$(target-type)) ;
+ if $(generators)
+ {
+ if $(.debug)
+ {
+ generators.dout [ indent ] "there are generators for this type" ;
+ }
+ }
+ else
+ {
+ local t = [ type.base $(target-type) ] ;
+
+ # Get the list of generators for the requested type. If no generator is
+ # registered, try base type, and so on.
+ while $(t)
+ {
+ if $(.debug)
+ {
+ generators.dout [ indent ] "trying type" $(t) ;
+ }
+ if $(.generators.$(t))
+ {
+ generators.dout [ indent ] "there are generators for this type" ;
+ generators = $(.generators.$(t)) ;
+
+ # We are here because there were no generators found for
+ # target-type but there are some generators for its base type.
+ # We will try to use them, but they will produce targets of
+ # base type, not of 'target-type'. So, we clone the generators
+ # and modify the list of target types.
+ local generators2 ;
+ for local g in $(generators)
+ {
+ # generators.register adds a generator to the list of
+ # generators for toolsets, which is a bit strange, but
+ # should work. That list is only used when inheriting a
+ # toolset, which should have been done before running
+ # generators.
+ generators2 += [ $(g).clone-and-change-target-type $(t) :
+ $(target-type) ] ;
+ generators.register $(generators2[-1]) ;
+ }
+ generators = $(generators2) ;
+ t = ;
+ }
+ else
+ {
+ t = [ type.base $(t) ] ;
+ }
+ }
+ }
+
+ for local g in $(generators)
+ {
+ if $(.debug)
+ {
+ generators.dout [ indent ] "trying generator" [ $(g).id ] "(" [ $(g).source-types ] -> [ $(g).target-types ] ")" ;
+ }
+
+ if [ $(g).match-rank $(property-set) ]
+ {
+ if $(.debug)
+ {
+ generators.dout [ indent ] " is viable" ;
+ }
+ viable-generators += $(g) ;
+ }
+ }
+
+ return $(viable-generators) ;
+}
+
+
+rule find-viable-generators ( target-type : property-set )
+{
+ local key = $(target-type).$(property-set) ;
+ local l = $(.fv.$(key)) ;
+ if ! $(l)
+ {
+ l = [ find-viable-generators-aux $(target-type) : $(property-set) ] ;
+ if ! $(l)
+ {
+ l = none ;
+ }
+ .fv.$(key) = $(l) ;
+ }
+
+ if $(l) = none
+ {
+ l = ;
+ }
+
+ local viable-generators ;
+ for local g in $(l)
+ {
+ # Avoid trying the same generator twice on different levels.
+ if ! $(g) in $(.active-generators)
+ {
+ viable-generators += $(g) ;
+ }
+ else
+ {
+ generators.dout [ indent ] " generator " [ $(g).id ] "is active, discaring" ;
+ }
+ }
+
+ # Generators which override 'all'.
+ local all-overrides ;
+ # Generators which are overriden.
+ local overriden-ids ;
+ for local g in $(viable-generators)
+ {
+ local id = [ $(g).id ] ;
+ local this-overrides = $(.override.$(id)) ;
+ overriden-ids += $(this-overrides) ;
+ if all in $(this-overrides)
+ {
+ all-overrides += $(g) ;
+ }
+ }
+ if $(all-overrides)
+ {
+ viable-generators = $(all-overrides) ;
+ }
+ local result ;
+ for local g in $(viable-generators)
+ {
+ if ! [ $(g).id ] in $(overriden-ids)
+ {
+ result += $(g) ;
+ }
+ }
+
+ return $(result) ;
+}
+
+
+.construct-stack = ;
+
+
+# Attempts to construct a target by finding viable generators, running them and
+# selecting the dependency graph.
+#
+local rule construct-really ( project name ? : target-type : property-set :
+ sources * )
+{
+ viable-generators = [ find-viable-generators $(target-type) :
+ $(property-set) ] ;
+
+ generators.dout [ indent ] "*** " [ sequence.length $(viable-generators) ]
+ " viable generators" ;
+
+ local result ;
+ local generators-that-succeeded ;
+ for local g in $(viable-generators)
+ {
+ # This variable will be restored on exit from this scope.
+ local .active-generators = $(g) $(.active-generators) ;
+
+ local r = [ try-one-generator $(project) $(name) : $(g) : $(target-type)
+ : $(property-set) : $(sources) ] ;
+
+ if $(r)
+ {
+ generators-that-succeeded += $(g) ;
+ if $(result)
+ {
+ ECHO "Error: ambiguity found when searching for best transformation" ;
+ ECHO "Trying to produce type '$(target-type)' from: " ;
+ for local s in $(sources)
+ {
+ ECHO " - " [ $(s).str ] ;
+ }
+ ECHO "Generators that succeeded:" ;
+ for local g in $(generators-that-succeeded)
+ {
+ ECHO " - " [ $(g).id ] ;
+ }
+ ECHO "First generator produced: " ;
+ for local t in $(result[2-])
+ {
+ ECHO " - " [ $(t).str ] ;
+ }
+ ECHO "Second generator produced: " ;
+ for local t in $(r[2-])
+ {
+ ECHO " - " [ $(t).str ] ;
+ }
+ EXIT ;
+ }
+ else
+ {
+ result = $(r) ;
+ }
+ }
+ }
+
+ return $(result) ;
+}
+
+
+# Attempts to create a target of 'target-type' with 'properties' from 'sources'.
+# The 'sources' are treated as a collection of *possible* ingridients, i.e.
+# there is no obligation to consume them all.
+#
+# Returns a list of targets. When this invocation is first instance of
+# 'construct' in stack, returns only targets of requested 'target-type',
+# otherwise, returns also unused sources and additionally generated targets.
+#
+# If 'top-level' is set, does not suppress generators that are already
+# used in the stack. This may be useful in cases where a generator
+# has to build a metatargets -- for example a target corresponding to
+# built tool.
+#
+rule construct ( project name ? : target-type : property-set * : sources * : top-level ? )
+{
+ local saved-stack ;
+ if $(top-level)
+ {
+ saved-active = $(.active-generators) ;
+ .active-generators = ;
+ }
+
+ if (.construct-stack)
+ {
+ ensure-type $(sources) ;
+ }
+
+ .construct-stack += 1 ;
+
+ increase-indent ;
+
+ if $(.debug)
+ {
+ generators.dout [ indent ] "*** construct" $(target-type) ;
+
+ for local s in $(sources)
+ {
+ generators.dout [ indent ] " from" $(s) ;
+ }
+ generators.dout [ indent ] " properties:" [ $(property-set).raw ] ;
+ }
+
+ local result = [ construct-really $(project) $(name) : $(target-type) :
+ $(property-set) : $(sources) ] ;
+
+ decrease-indent ;
+
+ .construct-stack = $(.construct-stack[2-]) ;
+
+ if $(top-level)
+ {
+ .active-generators = $(saved-active) ;
+ }
+
+ return $(result) ;
+}
+
+# Given 'result', obtained from some generator or generators.construct, adds
+# 'raw-properties' as usage requirements to it. If result already contains usage
+# requirements -- that is the first element of result of an instance of the
+# property-set class, the existing usage requirements and 'raw-properties' are
+# combined.
+#
+rule add-usage-requirements ( result * : raw-properties * )
+{
+ if $(result)
+ {
+ if [ class.is-a $(result[1]) : property-set ]
+ {
+ return [ $(result[1]).add-raw $(raw-properties) ] $(result[2-]) ;
+ }
+ else
+ {
+ return [ property-set.create $(raw-properties) ] $(result) ;
+ }
+ }
+}
+
+rule dump ( )
+{
+ for local g in $(.all-generators)
+ {
+ ECHO [ $(g).id ] ":" [ $(g).source-types ] -> [ $(g).target-types ] ;
+ }
+}
+
diff --git a/src/kenlm/jam-files/boost-build/build/project.jam b/src/kenlm/jam-files/boost-build/build/project.jam
new file mode 100644
index 0000000..c9a0909
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/project.jam
@@ -0,0 +1,1228 @@
+# Copyright 2002, 2003 Dave Abrahams
+# Copyright 2002, 2005, 2006 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Implements project representation and loading. Each project is represented by:
+# - a module where all the Jamfile content lives.
+# - an instance of 'project-attributes' class.
+# (given a module name, can be obtained using the 'attributes' rule)
+# - an instance of 'project-target' class (from targets.jam)
+# (given a module name, can be obtained using the 'target' rule)
+#
+# Typically, projects are created as result of loading a Jamfile, which is done
+# by rules 'load' and 'initialize', below. First, a module is prepared and a new
+# project-attributes instance is created. Some rules necessary for all projects
+# are added to the module (see the 'project-rules' module). Default project
+# attributes are set (inheriting parent project attributes, if it exists). After
+# that the Jamfile is read. It can declare its own attributes using the
+# 'project' rule which will be combined with any already set.
+#
+# The 'project' rule can also declare a project id which will be associated with
+# the project module.
+#
+# Besides Jamfile projects, we also support 'standalone' projects created by
+# calling 'initialize' in an arbitrary module and not specifying the project's
+# location. After the call, the module can call the 'project' rule, declare main
+# targets and behave as a regular project except that, since it is not
+# associated with any location, it should only declare prebuilt targets.
+#
+# The list of all loaded Jamfiles is stored in the .project-locations variable.
+# It is possible to obtain a module name for a location using the 'module-name'
+# rule. Standalone projects are not recorded and can only be referenced using
+# their project id.
+
+import "class" : new ;
+import modules ;
+import path ;
+import print ;
+import property-set ;
+import sequence ;
+
+
+.debug-loading = [ MATCH ^(--debug-loading)$ : [ modules.peek : ARGV ] ] ;
+
+
+# Loads the Jamfile at the given location. After loading, project global file
+# and Jamfiles needed by the requested one will be loaded recursively. If the
+# Jamfile at that location is loaded already, does nothing. Returns the project
+# module for the Jamfile.
+#
+rule load ( jamfile-location )
+{
+ local module-name = [ module-name $(jamfile-location) ] ;
+ # If Jamfile is already loaded, do not try again.
+ if ! $(module-name) in $(.jamfile-modules)
+ {
+ if $(.debug-loading)
+ {
+ ECHO Loading Jamfile at '$(jamfile-location)' ;
+ }
+
+ load-jamfile $(jamfile-location) : $(module-name) ;
+
+ # We want to make sure that child project are loaded only after parent
+ # projects. In particular, because parent projects define attributes
+ # which are then inherited by children, and we do not want children to
+ # be loaded before parent has defined everything.
+ #
+ # While "build-project" and "use-project" can potentially refer to child
+ # projects from parent projects, we do not immediately load child
+ # projects when seeing those attributes. Instead, we record the minimal
+ # information to be used only later.
+ load-used-projects $(module-name) ;
+ }
+ return $(module-name) ;
+}
+
+
+rule load-used-projects ( module-name )
+{
+ local used = [ modules.peek $(module-name) : .used-projects ] ;
+ local location = [ attribute $(module-name) location ] ;
+ while $(used)
+ {
+ local id = $(used[1]) ;
+ local where = [ path.make $(used[2]) ] ;
+ register-id $(id) : [ load [ path.root $(where) $(location) ] ] ;
+ used = $(used[3-]) ;
+ }
+}
+
+
+# Note the use of character groups, as opposed to listing 'Jamroot' and
+# 'jamroot'. With the latter, we would get duplicate matches on Windows and
+# would have to eliminate duplicates.
+JAMROOT ?= [ modules.peek : JAMROOT ] ;
+JAMROOT ?= project-root.jam [Jj]amroot [Jj]amroot.jam ;
+
+
+# Loads parent of Jamfile at 'location'. Issues an error if nothing is found.
+#
+rule load-parent ( location )
+{
+ local found = [ path.glob-in-parents $(location) : $(JAMROOT) $(JAMFILE) ] ;
+ if ! $(found)
+ {
+ import errors ;
+ errors.error Could not find parent "for" project at '$(location)' :
+ Did not find Jamfile.jam or Jamroot.jam "in" any parent directory. ;
+ }
+ return [ load $(found[1]:D) ] ;
+}
+
+
+# Returns the project module corresponding to the given project-id or plain
+# directory name. Returns nothing if such a project can not be found.
+#
+rule find ( name : current-location )
+{
+ local project-module ;
+
+ # Try interpreting name as project id.
+ if [ path.is-rooted $(name) ]
+ {
+ project-module = $($(name).jamfile-module) ;
+ }
+
+ if ! $(project-module)
+ {
+ local location = [ path.root [ path.make $(name) ] $(current-location) ]
+ ;
+
+ # If no project is registered for the given location, try to load it.
+ # First see if we have a Jamfile. If not, then see if we might have a
+ # project root willing to act as a Jamfile. In that case, project root
+ # must be placed in the directory referred to by id.
+
+ project-module = [ module-name $(location) ] ;
+ if ! $(project-module) in $(.jamfile-modules)
+ {
+ if [ path.glob $(location) : $(JAMROOT) $(JAMFILE) ]
+ {
+ project-module = [ load $(location) ] ;
+ }
+ else
+ {
+ project-module = ;
+ }
+ }
+ }
+
+ return $(project-module) ;
+}
+
+
+# Returns the name of the module corresponding to 'jamfile-location'. If no
+# module corresponds to that location yet, associates the default module name
+# with that location.
+#
+rule module-name ( jamfile-location )
+{
+ if ! $(.module.$(jamfile-location))
+ {
+ # Root the path, so that locations are always unambiguous. Without this,
+ # we can not decide if '../../exe/program1' and '.' are the same paths.
+ local normalized = [ path.root $(jamfile-location) [ path.pwd ] ] ;
+
+ # Quick & dirty fix to get the same module name when we supply two
+ # equivalent location paths, e.g. 'd:\Foo' & 'D:\fOo\bar\..' on Windows.
+ # Note that our current implementation will not work correctly if the
+ # given location references an empty folder, but in that case any later
+ # attempt to load a Jamfile from this location will fail anyway.
+ # FIXME: Implement this cleanly. Support for this type of path
+ # normalization already exists internally in Boost Jam and the current
+ # fix relies on the GLOB builtin rule using that support. Most likely we
+ # just need to add a new builtin rule to do this explicitly.
+ normalized = [ NORMALIZE_PATH $(normalized) ] ;
+ local glob-result = [ GLOB [ path.native $(normalized) ] : * ] ;
+ if $(glob-result)
+ {
+ normalized = $(glob-result[1]:D) ;
+ }
+ .module.$(jamfile-location) = Jamfile<$(normalized)> ;
+ }
+ return $(.module.$(jamfile-location)) ;
+}
+
+
+# Default patterns to search for the Jamfiles to use for build declarations.
+#
+JAMFILE = [ modules.peek : JAMFILE ] ;
+JAMFILE ?= [Bb]uild.jam [Jj]amfile.v2 [Jj]amfile [Jj]amfile.jam ;
+
+
+# Find the Jamfile at the given location. This returns the exact names of all
+# the Jamfiles in the given directory. The optional parent-root argument causes
+# this to search not the given directory but the ones above it up to the
+# parent-root directory.
+#
+rule find-jamfile (
+ dir # The directory(s) to look for a Jamfile.
+ parent-root ? # Optional flag indicating to search for the parent Jamfile.
+ : no-errors ?
+ )
+{
+ # Glob for all the possible Jamfiles according to the match pattern.
+ #
+ local jamfile-glob = ;
+ if $(parent-root)
+ {
+ if ! $(.parent-jamfile.$(dir))
+ {
+ .parent-jamfile.$(dir) = [ path.glob-in-parents $(dir) : $(JAMFILE)
+ ] ;
+ }
+ jamfile-glob = $(.parent-jamfile.$(dir)) ;
+ }
+ else
+ {
+ if ! $(.jamfile.$(dir))
+ {
+ .jamfile.$(dir) = [ path.glob $(dir) : $(JAMFILE) ] ;
+ }
+ jamfile-glob = $(.jamfile.$(dir)) ;
+
+ }
+
+ local jamfile-to-load = $(jamfile-glob) ;
+ # Multiple Jamfiles found in the same place. Warn about this and ensure we
+ # use only one of them. As a temporary convenience measure, if there is
+ # Jamfile.v2 among found files, suppress the warning and use it.
+ #
+ if $(jamfile-to-load[2-])
+ {
+ local v2-jamfiles = [ MATCH ^(.*[Jj]amfile\\.v2)|(.*[Bb]uild\\.jam)$ :
+ $(jamfile-to-load) ] ;
+
+ if $(v2-jamfiles) && ! $(v2-jamfiles[2])
+ {
+ jamfile-to-load = $(v2-jamfiles) ;
+ }
+ else
+ {
+ local jamfile = [ path.basename $(jamfile-to-load[1]) ] ;
+ ECHO "warning: Found multiple Jamfiles at '"$(dir)"'!"
+ "Loading the first one: '$(jamfile)'." ;
+ }
+
+ jamfile-to-load = $(jamfile-to-load[1]) ;
+ }
+
+ # Could not find it, error.
+ #
+ if ! $(no-errors) && ! $(jamfile-to-load)
+ {
+ import errors ;
+ errors.error Unable to load Jamfile.
+ : Could not find a Jamfile in directory '$(dir)'.
+ : Attempted to find it with pattern '$(JAMFILE:J=" ")'.
+ : Please consult the documentation at 'http://www.boost.org'. ;
+ }
+
+ return $(jamfile-to-load) ;
+}
+
+
+# Load a Jamfile at the given directory. Returns nothing. Will attempt to load
+# the file as indicated by the JAMFILE patterns. Effect of calling this rule
+# twice with the same 'dir' is undefined.
+#
+local rule load-jamfile ( dir : jamfile-module )
+{
+ # See if the Jamfile is where it should be.
+ #
+ local jamfile-to-load = [ path.glob $(dir) : $(JAMROOT) ] ;
+ if ! $(jamfile-to-load)
+ {
+ jamfile-to-load = [ find-jamfile $(dir) ] ;
+ }
+
+ if $(jamfile-to-load[2])
+ {
+ import errors ;
+ errors.error "Multiple Jamfiles found at '$(dir)'" :
+ "Filenames are: " $(jamfile-to-load:D=) ;
+ }
+
+ # Now load the Jamfile in its own context.
+ # The call to 'initialize' may load the parent Jamfile, which might contain
+ # a 'use-project' or a 'project.load' call, causing a second attempt to load
+ # the same project we are loading now. Checking inside .jamfile-modules
+ # prevents that second attempt from messing things up.
+ if ! $(jamfile-module) in $(.jamfile-modules)
+ {
+ local previous-project = $(.current-project) ;
+
+ # Initialize the Jamfile module before loading.
+ initialize $(jamfile-module) : [ path.parent $(jamfile-to-load) ] :
+ $(jamfile-to-load:BS) ;
+
+ if ! $(jamfile-module) in $(.jamfile-modules)
+ {
+ .jamfile-modules += $(jamfile-module) ;
+
+ local saved-project = $(.current-project) ;
+
+ mark-as-user $(jamfile-module) ;
+ modules.load $(jamfile-module) : [ path.native $(jamfile-to-load) ]
+ : . ;
+ if [ MATCH ^($(JAMROOT))$ : $(jamfile-to-load:BS) ]
+ {
+ jamfile = [ find-jamfile $(dir) : no-errors ] ;
+ if $(jamfile)
+ {
+ load-aux $(jamfile-module) : [ path.native $(jamfile) ] ;
+ }
+ }
+
+ # Now do some checks.
+ if $(.current-project) != $(saved-project)
+ {
+ import errors ;
+ errors.error
+ The value of the .current-project variable has magically
+ : changed after loading a Jamfile. This means some of the
+ : targets might be defined in the wrong project.
+ : after loading $(jamfile-module)
+ : expected value $(saved-project)
+ : actual value $(.current-project) ;
+ }
+
+ end-load $(previous-project) ;
+
+ if $(.global-build-dir)
+ {
+ if [ attribute $(jamfile-module) location ] && ! [ attribute
+ $(jamfile-module) id ]
+ {
+ local project-root = [ attribute $(jamfile-module)
+ project-root ] ;
+ if $(project-root) = $(dir)
+ {
+ ECHO "warning: the --build-dir option was specified" ;
+ ECHO "warning: but Jamroot at '$(dir)'" ;
+ ECHO "warning: specified no project id" ;
+ ECHO "warning: the --build-dir option will be ignored" ;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+# Called when done loading a project module. Restores the current project to its
+# previous value and does some additional checking to make sure our 'currently
+# loaded project' identifier does not get left with an invalid value.
+#
+rule end-load ( previous-project ? )
+{
+ if ! $(.current-project)
+ {
+ import errors ;
+ errors.error Ending project loading requested when there was no project
+ currently being loaded. ;
+ }
+
+ if ! $(previous-project) && $(.saved-current-project)
+ {
+ import errors ;
+ errors.error Ending project loading requested with no 'previous project'
+ when there were other projects still marked as being loaded
+ recursively. ;
+ }
+
+ .current-project = $(previous-project) ;
+}
+
+
+rule mark-as-user ( module-name )
+{
+ if USER_MODULE in [ RULENAMES ]
+ {
+ USER_MODULE $(module-name) ;
+ }
+}
+
+
+rule load-aux ( module-name : file )
+{
+ mark-as-user $(module-name) ;
+
+ module $(module-name)
+ {
+ include $(2) ;
+ local rules = [ RULENAMES $(1) ] ;
+ IMPORT $(1) : $(rules) : $(1) : $(1).$(rules) ;
+ }
+}
+
+
+.global-build-dir = [ MATCH ^--build-dir=(.*)$ : [ modules.peek : ARGV ] ] ;
+if $(.global-build-dir)
+{
+ # If the option is specified several times, take the last value.
+ .global-build-dir = [ path.make $(.global-build-dir[-1]) ] ;
+}
+
+
+# Initialize the module for a project.
+#
+rule initialize (
+ module-name # The name of the project module.
+ : location ? # The location (directory) of the project to initialize. If
+ # not specified, a standalone project will be initialized.
+ : basename ?
+ )
+{
+ if $(.debug-loading)
+ {
+ ECHO "Initializing project '$(module-name)'" ;
+ }
+
+ local jamroot ;
+
+ local parent-module ;
+ if $(module-name) = test-config
+ {
+ # No parent.
+ }
+ else if $(module-name) = site-config
+ {
+ parent-module = test-config ;
+ }
+ else if $(module-name) = user-config
+ {
+ parent-module = site-config ;
+ }
+ else if $(module-name) = project-config
+ {
+ parent-module = user-config ;
+ }
+ else if $(location) && ! [ MATCH ^($(JAMROOT))$ : $(basename) ]
+ {
+ # We search for parent/jamroot only if this is a jamfile project, i.e.
+ # if is not a standalone or a jamroot project.
+ parent-module = [ load-parent $(location) ] ;
+ }
+ else if $(location)
+ {
+ # We have a jamroot project. Inherit from user-config (or project-config
+ # if it exists).
+ if $(project-config.attributes)
+ {
+ parent-module = project-config ;
+ }
+ else
+ {
+ parent-module = user-config ;
+ }
+ jamroot = true ;
+ }
+
+ # TODO: need to consider if standalone projects can do anything but define
+ # prebuilt targets. If so, we need to give them a more sensible "location",
+ # so that source paths are correct.
+ location ?= "" ;
+ # Create the module for the Jamfile first.
+ module $(module-name)
+ {
+ }
+
+ # load-parent can end up loading this module again. Make sure this is not
+ # duplicated.
+ if ! $($(module-name).attributes)
+ {
+ $(module-name).attributes = [ new project-attributes $(location)
+ $(module-name) ] ;
+ local attributes = $($(module-name).attributes) ;
+
+ if $(location)
+ {
+ $(attributes).set source-location : [ path.make $(location) ] :
+ exact ;
+ }
+ else
+ {
+ local cfgs = project site test user ;
+ if ! $(module-name) in $(cfgs)-config
+ {
+ # This is a standalone project with known location. Set its
+ # source location so it can declare targets. This is needed so
+ # you can put a .jam file with your sources and use it via
+ # 'using'. Standard modules (in the 'tools' subdir) may not
+ # assume source dir is set.
+ local s = [ modules.binding $(module-name) ] ;
+ if ! $(s)
+ {
+ import errors ;
+ errors.error Could not determine project location
+ $(module-name) ;
+ }
+ $(attributes).set source-location : $(s:D) : exact ;
+ }
+ }
+
+ $(attributes).set requirements : [ property-set.empty ] : exact ;
+ $(attributes).set usage-requirements : [ property-set.empty ] : exact ;
+
+ # Import rules common to all project modules from project-rules module,
+ # defined at the end of this file.
+ local rules = [ RULENAMES project-rules ] ;
+ IMPORT project-rules : $(rules) : $(module-name) : $(rules) ;
+
+ if $(parent-module)
+ {
+ inherit-attributes $(module-name) : $(parent-module) ;
+ $(attributes).set parent-module : $(parent-module) : exact ;
+ }
+
+ if $(jamroot)
+ {
+ $(attributes).set project-root : $(location) : exact ;
+ if ! $(.first-project-root)
+ {
+ .first-project-root = $(module-name) ;
+ }
+ }
+
+ local parent ;
+ if $(parent-module)
+ {
+ parent = [ target $(parent-module) ] ;
+ }
+
+ if ! $(.target.$(module-name))
+ {
+ local requirements = [ attribute $(module-name) requirements ] ;
+ .target.$(module-name) = [ new project-target $(module-name) :
+ $(module-name) $(parent) : $(requirements) ] ;
+
+ if $(.debug-loading)
+ {
+ ECHO Assigned project target $(.target.$(module-name)) to
+ '$(module-name)' ;
+ }
+ }
+ }
+
+ .current-project = [ target $(module-name) ] ;
+}
+
+
+# Make 'project-module' inherit attributes of project root and parent module.
+#
+rule inherit-attributes ( project-module : parent-module )
+{
+ local attributes = $($(project-module).attributes) ;
+ local pattributes = [ attributes $(parent-module) ] ;
+ # Parent module might be locationless configuration module.
+ if [ modules.binding $(parent-module) ]
+ {
+ $(attributes).set parent :
+ [ path.parent [ path.make [ modules.binding $(parent-module) ] ] ] ;
+ }
+ $(attributes).set project-root :
+ [ $(pattributes).get project-root ] : exact ;
+ $(attributes).set default-build :
+ [ $(pattributes).get default-build ] ;
+ $(attributes).set requirements :
+ [ $(pattributes).get requirements ] : exact ;
+ $(attributes).set usage-requirements :
+ [ $(pattributes).get usage-requirements ] : exact ;
+
+ local parent-build-dir = [ $(pattributes).get build-dir ] ;
+ if $(parent-build-dir)
+ {
+ # Have to compute relative path from parent dir to our dir. Convert both
+ # paths to absolute, since we cannot find relative path from ".." to
+ # ".".
+
+ local location = [ attribute $(project-module) location ] ;
+ local parent-location = [ attribute $(parent-module) location ] ;
+
+ local pwd = [ path.pwd ] ;
+ local parent-dir = [ path.root $(parent-location) $(pwd) ] ;
+ local our-dir = [ path.root $(location) $(pwd) ] ;
+ $(attributes).set build-dir : [ path.join $(parent-build-dir)
+ [ path.relative $(our-dir) $(parent-dir) ] ] : exact ;
+ }
+}
+
+
+# Returns whether the given string is a valid registered project id.
+#
+rule is-registered-id ( id )
+{
+ return $($(id).jamfile-module) ;
+}
+
+
+# Associate the given id with the given project module. Returns the possibly
+# corrected project id.
+#
+rule register-id ( id : module )
+{
+ id = [ path.root $(id) / ] ;
+
+ if [ MATCH (//) : $(id) ]
+ {
+ import errors ;
+ errors.user-error Project id may not contain two consecutive slash
+ characters (project id: '$(id)'). ;
+ }
+
+ local orig-module = $($(id).jamfile-module) ;
+ if $(orig-module) && $(orig-module) != $(module)
+ {
+ local new-file = [ modules.peek $(module) : __file__ ] ;
+ local new-location = [ project.attribute $(module) location ] ;
+
+ local orig-file = [ modules.peek $(orig-module) : __file__ ] ;
+ local orig-main-id = [ project.attribute $(orig-module) id ] ;
+ local orig-location = [ project.attribute $(orig-module) location ] ;
+ local orig-project = [ target $(orig-module) ] ;
+ local orig-name = [ $(orig-project).name ] ;
+
+ import errors ;
+ errors.user-error Attempt to redeclare already registered project id
+ '$(id)'.
+ : Original project:
+ : " " Name: $(orig-name:E=---)
+ : " " Module: $(orig-module)
+ : " " Main id: $(orig-main-id:E=---)
+ : " " File: $(orig-file:E=---)
+ : " " Location: $(orig-location:E=---)
+ : New project:
+ : " " Module: $(module)
+ : " " File: $(new-file:E=---)
+ : " " Location: $(new-location:E=---) ;
+ }
+
+ $(id).jamfile-module = $(module) ;
+ return $(id) ;
+}
+
+
+# Class keeping all the attributes of a project.
+#
+# The standard attributes are "id", "location", "project-root", "parent"
+# "requirements", "default-build", "source-location" and "projects-to-build".
+#
+class project-attributes
+{
+ import path ;
+ import print ;
+ import project ;
+ import property ;
+ import property-set ;
+ import sequence ;
+
+ rule __init__ ( location project-module )
+ {
+ self.location = $(location) ;
+ self.project-module = $(project-module) ;
+ }
+
+ # Set the named attribute from the specification given by the user. The
+ # value actually set may be different.
+ #
+ rule set ( attribute : specification *
+ : exact ? # Sets value from 'specification' without any processing.
+ )
+ {
+ if $(exact)
+ {
+ self.$(attribute) = $(specification) ;
+ }
+ else if $(attribute) = "requirements"
+ {
+ local result = [ property-set.refine-from-user-input
+ $(self.requirements) : $(specification)
+ : $(self.project-module) : $(self.location) ] ;
+
+ if $(result[1]) = "@error"
+ {
+ import errors : error : errors.error ;
+ errors.error Requirements for project at '$(self.location)'
+ conflict with parent's. : Explanation: $(result[2-]) ;
+ }
+
+ self.requirements = $(result) ;
+ }
+ else if $(attribute) = "usage-requirements"
+ {
+ local unconditional ;
+ for local p in $(specification)
+ {
+ local split = [ property.split-conditional $(p) ] ;
+ split ?= nothing $(p) ;
+ unconditional += $(split[2]) ;
+ }
+
+ local non-free = [ property.remove free : $(unconditional) ] ;
+ if $(non-free)
+ {
+ import errors : error : errors.error ;
+ errors.error usage-requirements $(specification) have non-free
+ properties $(non-free) ;
+ }
+ local t = [ property.translate-paths $(specification) :
+ $(self.location) ] ;
+ if $(self.usage-requirements)
+ {
+ self.usage-requirements = [ property-set.create
+ [ $(self.usage-requirements).raw ] $(t) ] ;
+ }
+ else
+ {
+ self.usage-requirements = [ property-set.create $(t) ] ;
+ }
+ }
+ else if $(attribute) = "default-build"
+ {
+ self.default-build = [ property.make $(specification) ] ;
+ }
+ else if $(attribute) = "source-location"
+ {
+ self.source-location = ;
+ for local src-path in $(specification)
+ {
+ self.source-location += [ path.root [ path.make $(src-path) ]
+ $(self.location) ] ;
+ }
+ }
+ else if $(attribute) = "build-dir"
+ {
+ self.build-dir = [ path.root [ path.make $(specification) ]
+ $(self.location) ] ;
+ }
+ else if $(attribute) = "id"
+ {
+ self.id = [ project.register-id $(specification) :
+ $(self.project-module) ] ;
+ }
+ else if ! $(attribute) in "default-build" "location" "parent"
+ "projects-to-build" "project-root" "source-location"
+ {
+ import errors : error : errors.error ;
+ errors.error Invalid project attribute '$(attribute)' specified for
+ project at '$(self.location)' ;
+ }
+ else
+ {
+ self.$(attribute) = $(specification) ;
+ }
+ }
+
+ # Returns the value of the given attribute.
+ #
+ rule get ( attribute )
+ {
+ return $(self.$(attribute)) ;
+ }
+
+ # Returns whether these attributes belong to a Jamroot project module.
+ #
+ rule is-jamroot ( )
+ {
+ if $(self.location) && $(self.project-root) = $(self.location)
+ {
+ return true ;
+ }
+ }
+
+ # Prints the project attributes.
+ #
+ rule print ( )
+ {
+ local id = '$(self.id)' ;
+ print.section $(id:E=(none)) ;
+ print.list-start ;
+ print.list-item "Parent project:" $(self.parent:E=(none)) ;
+ print.list-item "Requirements:" [ $(self.requirements).raw ] ;
+ print.list-item "Default build:" $(self.default-build) ;
+ print.list-item "Source location:" $(self.source-location) ;
+ print.list-item "Projects to build:" [ sequence.insertion-sort
+ $(self.projects-to-build) ] ;
+ print.list-end ;
+ }
+}
+
+
+# Returns the build directory for standalone projects
+#
+rule standalone-build-dir ( )
+{
+ project = [ target $(.first-project-root) ] ;
+ return [ path.join [ $(project).build-dir ] standalone ] ;
+}
+
+# Returns the project which is currently being loaded.
+#
+rule current ( )
+{
+ if ! $(.current-project)
+ {
+ import errors ;
+ errors.error Reference to the project currently being loaded requested
+ when there was no project module being loaded. ;
+ }
+ return $(.current-project) ;
+}
+
+
+# Temporarily changes the current project to 'project'. Should be followed by
+# 'pop-current'.
+#
+rule push-current ( project )
+{
+ .saved-current-project += $(.current-project) ;
+ .current-project = $(project) ;
+}
+
+
+rule pop-current ( )
+{
+ .current-project = $(.saved-current-project[-1]) ;
+ .saved-current-project = $(.saved-current-project[1--2]) ;
+}
+
+
+# Returns the project-attribute instance for the specified Jamfile module.
+#
+rule attributes ( project )
+{
+ return $($(project).attributes) ;
+}
+
+
+# Returns the value of the specified attribute in the specified Jamfile module.
+#
+rule attribute ( project attribute )
+{
+ return [ $($(project).attributes).get $(attribute) ] ;
+}
+
+
+# Returns whether a project module is one of Boost Build's configuration
+# modules.
+#
+rule is-config-module ( project )
+{
+ local cfgs = project site test user ;
+ if $(project) in $(cfgs)-config
+ {
+ return true ;
+ }
+}
+
+
+# Returns whether a project module is a Jamroot project module.
+#
+rule is-jamroot-module ( project )
+{
+ return [ $($(project).attributes).is-jamroot ] ;
+}
+
+
+# Returns a project's parent jamroot module. Returns nothing if there is no such
+# module, i.e. if this is a standalone project or one of the internal Boost
+# Build configuration projects.
+#
+rule get-jamroot-module ( project )
+{
+ local jamroot-location = [ attribute $(project) project-root ] ;
+ if $(jamroot-location)
+ {
+ return [ module-name $(jamroot-location) ] ;
+ }
+}
+
+
+# Returns the project target corresponding to the 'project-module'.
+#
+rule target ( project-module )
+{
+ if ! $(.target.$(project-module))
+ {
+ import errors ;
+ errors.user-error Project target requested but not yet assigned for
+ module '$(project-module)'. ;
+ }
+ return $(.target.$(project-module)) ;
+}
+
+
+# Defines a Boost.Build extension project. Such extensions usually contain
+# library targets and features that can be used by many people. Even though
+# extensions are really projects, they can be initialized as a module would be
+# with the "using" (project.project-rules.using) mechanism.
+#
+rule extension ( id : options * : * )
+{
+ # The caller is a standalone module for the extension.
+ local mod = [ CALLER_MODULE ] ;
+
+ # We need to do the rest within the extension module.
+ module $(mod)
+ {
+ import path ;
+
+ # Find the root project.
+ local root-project = [ project.current ] ;
+ root-project = [ $(root-project).project-module ] ;
+ while
+ [ project.attribute $(root-project) parent-module ] &&
+ [ project.attribute $(root-project) parent-module ] != user-config
+ {
+ root-project = [ project.attribute $(root-project) parent-module ] ;
+ }
+
+ # Create the project data, and bring in the project rules into the
+ # module.
+ project.initialize $(__name__) : [ path.join [ project.attribute
+ $(root-project) location ] ext $(1:L) ] ;
+
+ # Create the project itself, i.e. the attributes. All extensions are
+ # created in the "/ext" project space.
+ project /ext/$(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17)
+ : $(18) : $(19) ;
+ local attributes = [ project.attributes $(__name__) ] ;
+
+ # Inherit from the root project of whomever is defining us.
+ project.inherit-attributes $(__name__) : $(root-project) ;
+ $(attributes).set parent-module : $(root-project) : exact ;
+ }
+}
+
+
+rule glob-internal ( project : wildcards + : excludes * : rule-name )
+{
+ local location = [ $(project).get source-location ] ;
+
+ local result ;
+ local paths = [ path.$(rule-name) $(location) :
+ [ sequence.transform path.make : $(wildcards) ] :
+ [ sequence.transform path.make : $(excludes) ] ] ;
+ if $(wildcards:D) || $(rule-name) != glob
+ {
+ # The paths we have found are relative to the current directory, but the
+ # names specified in the sources list are assumed to be relative to the
+ # source directory of the corresponding project. So, just make the names
+ # absolute.
+ for local p in $(paths)
+ {
+ # If the path is below source location, use relative path.
+ # Otherwise, use full path just to avoid any ambiguities.
+ local rel = [ path.relative $(p) $(location) : no-error ] ;
+ if $(rel) = not-a-child
+ {
+ result += [ path.root $(p) [ path.pwd ] ] ;
+ }
+ else
+ {
+ result += $(rel) ;
+ }
+ }
+ }
+ else
+ {
+ # There were no wildcards in the directory path, so the files are all in
+ # the source directory of the project. Just drop the directory, instead
+ # of making paths absolute.
+ result = $(paths:D="") ;
+ }
+
+ return $(result) ;
+}
+
+
+# This module defines rules common to all projects.
+#
+module project-rules
+{
+ import modules ;
+
+ rule using ( toolset-module : * )
+ {
+ import toolset ;
+
+ local saved-project = [ modules.peek project : .current-project ] ;
+
+ # Temporarily change the search path so the module referred to by
+ # 'using' can be placed in the same directory as Jamfile. User will
+ # expect the module to be found even though the directory is not in
+ # BOOST_BUILD_PATH.
+ local x = [ modules.peek : BOOST_BUILD_PATH ] ;
+ local caller = [ CALLER_MODULE ] ;
+ local caller-location = [ modules.binding $(caller) ] ;
+ modules.poke : BOOST_BUILD_PATH : $(caller-location:D) $(x) ;
+ toolset.using $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17)
+ : $(18) : $(19) ;
+ modules.poke : BOOST_BUILD_PATH : $(x) ;
+
+ # The above might have clobbered .current-project in case it caused a
+ # new project instance to be created (which would then automatically
+ # get set as the 'current' project). Restore the correct value so any
+ # main targets declared after this do not get mapped to the loaded
+ # module's project.
+ modules.poke project : .current-project : $(saved-project) ;
+ }
+
+ rule import ( * : * : * )
+ {
+ local caller = [ CALLER_MODULE ] ;
+ local saved-project = [ modules.peek project : .current-project ] ;
+ module $(caller)
+ {
+ modules.import $(1) : $(2) : $(3) ;
+ }
+
+ # The above might have clobbered .current-project in case it caused a
+ # new project instance to be created (which would then automatically
+ # get set as the 'current' project). Restore the correct value so any
+ # main targets declared after this do not get mapped to the loaded
+ # module's project.
+ modules.poke project : .current-project : $(saved-project) ;
+ }
+
+ rule project ( id ? : options * : * )
+ {
+ import path ;
+ import project ;
+
+ local caller = [ CALLER_MODULE ] ;
+ local attributes = [ project.attributes $(caller) ] ;
+ if $(id)
+ {
+ $(attributes).set id : $(id) ;
+ }
+
+ local explicit-build-dir ;
+
+ for n in 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
+ {
+ local option = $($(n)) ;
+ if $(option)
+ {
+ $(attributes).set $(option[1]) : $(option[2-]) ;
+ }
+ if $(option[1]) = "build-dir"
+ {
+ explicit-build-dir = [ path.make $(option[2-]) ] ;
+ }
+ }
+
+ # If '--build-dir' is specified, change the build dir for the project.
+ local global-build-dir = [ modules.peek project : .global-build-dir ] ;
+
+ if $(global-build-dir)
+ {
+ local location = [ $(attributes).get location ] ;
+ # Project with an empty location is a 'standalone' project such as
+ # user-config or qt. It has no build dir. If we try to set build dir
+ # for user-config, we shall then try to inherit it, with either
+ # weird or wrong consequences.
+ if $(location) && $(location) = [ $(attributes).get project-root ]
+ {
+ # Re-read the project id, since it might have been modified a
+ # bit when setting the project's id attribute, e.g. might have
+ # been prefixed by a slash if it was not already.
+ id = [ $(attributes).get id ] ;
+ # This is Jamroot.
+ if $(id)
+ {
+ if $(explicit-build-dir) &&
+ [ path.is-rooted $(explicit-build-dir) ]
+ {
+ import errors ;
+ errors.user-error Absolute directory specified via
+ 'build-dir' project attribute : Do not know how to
+ combine that with the --build-dir option. ;
+ }
+ # Strip the leading slash from id.
+ local rid = [ MATCH ^/(.*) : $(id) ] ;
+ local p = [ path.join $(global-build-dir) $(rid)
+ $(explicit-build-dir) ] ;
+
+ $(attributes).set build-dir : $(p) : exact ;
+ }
+ }
+ else
+ {
+ # Not Jamroot.
+ if $(explicit-build-dir)
+ {
+ import errors ;
+ errors.user-error When --build-dir is specified, the
+ 'build-dir' project : attribute is allowed only for
+ top-level 'project' invocations ;
+ }
+ }
+ }
+ }
+
+ # Declare and set a project global constant. Project global constants are
+ # normal variables but should not be changed. They are applied to every
+ # child Jamfile.
+ #
+ rule constant ( name : value + )
+ {
+ import project ;
+ local caller = [ CALLER_MODULE ] ;
+ local p = [ project.target $(caller) ] ;
+ $(p).add-constant $(name) : $(value) ;
+ }
+
+ # Declare and set a project global constant, whose value is a path. The path
+ # is adjusted to be relative to the invocation directory. The given value
+ # path is taken to be either absolute, or relative to this project root.
+ #
+ rule path-constant ( name : value + )
+ {
+ import project ;
+ local caller = [ CALLER_MODULE ] ;
+ local p = [ project.target $(caller) ] ;
+ $(p).add-constant $(name) : $(value) : path ;
+ }
+
+ rule use-project ( id : where )
+ {
+ # See comment in 'load' for explanation.
+ local caller = [ CALLER_MODULE ] ;
+ modules.poke $(caller) : .used-projects : [ modules.peek $(caller) :
+ .used-projects ] $(id) $(where) ;
+ }
+
+ rule build-project ( dir )
+ {
+ import project ;
+ local caller = [ CALLER_MODULE ] ;
+ local attributes = [ project.attributes $(caller) ] ;
+ local now = [ $(attributes).get projects-to-build ] ;
+ $(attributes).set projects-to-build : $(now) $(dir) ;
+ }
+
+ rule explicit ( target-names * )
+ {
+ import project ;
+ # If 'explicit' is used in a helper rule defined in Jamroot and
+ # inherited by children, then most of the time we want 'explicit' to
+ # operate on the Jamfile where the helper rule is invoked.
+ local t = [ project.current ] ;
+ for local n in $(target-names)
+ {
+ $(t).mark-target-as-explicit $(n) ;
+ }
+ }
+
+ rule always ( target-names * )
+ {
+ import project ;
+ local t = [ project.current ] ;
+ for local n in $(target-names)
+ {
+ $(t).mark-target-as-always $(n) ;
+ }
+ }
+
+ rule glob ( wildcards + : excludes * )
+ {
+ import project ;
+ return [ project.glob-internal [ project.current ] : $(wildcards) :
+ $(excludes) : glob ] ;
+ }
+
+ rule glob-tree ( wildcards + : excludes * )
+ {
+ import project ;
+ if $(wildcards:D) || $(excludes:D)
+ {
+ import errors ;
+ errors.user-error The patterns to 'glob-tree' may not include
+ directory ;
+ }
+ return [ project.glob-internal [ project.current ] : $(wildcards) :
+ $(excludes) : glob-tree ] ;
+ }
+
+ # Calculates conditional requirements for multiple requirements at once.
+ # This is a shorthand to reduce duplication and to keep an inline
+ # declarative syntax. For example:
+ #
+ # lib x : x.cpp : [ conditional <toolset>gcc <variant>debug :
+ # <define>DEBUG_EXCEPTION <define>DEBUG_TRACE ] ;
+ #
+ rule conditional ( condition + : requirements * )
+ {
+ local condition = $(condition:J=,) ;
+ if [ MATCH (:) : $(condition) ]
+ {
+ return $(condition)$(requirements) ;
+ }
+ else
+ {
+ return $(condition):$(requirements) ;
+ }
+ }
+
+ rule option ( name : value )
+ {
+ local m = [ CALLER_MODULE ] ;
+ local cfgs = project site test user ;
+ if ! $(m) in $(cfgs)-config
+ {
+ import errors ;
+ errors.error The 'option' rule may only be used "in" Boost Build
+ configuration files. ;
+ }
+ import option ;
+ option.set $(name) : $(value) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/build/property-set.jam b/src/kenlm/jam-files/boost-build/build/property-set.jam
new file mode 100644
index 0000000..55cb556
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/property-set.jam
@@ -0,0 +1,517 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import "class" : new ;
+import feature ;
+import path ;
+import project ;
+import property ;
+import sequence ;
+import set ;
+import option ;
+
+# Class for storing a set of properties.
+#
+# There is 1<->1 correspondence between identity and value. No two instances
+# of the class are equal. To maintain this property, the 'property-set.create'
+# rule should be used to create new instances. Instances are immutable.
+#
+# Each property is classified with regard to its effect on build results.
+# Incidental properties have no effect on build results, from Boost.Build's
+# point of view. Others are either free, or non-free and we refer to non-free
+# ones as 'base'. Each property belongs to exactly one of those categories.
+#
+# It is possible to get a list of properties belonging to each category as
+# well as a list of properties with a specific attribute.
+#
+# Several operations, like and refine and as-path are provided. They all use
+# caching whenever possible.
+#
+class property-set
+{
+ import errors ;
+ import feature ;
+ import path ;
+ import property ;
+ import property-set ;
+ import set ;
+
+ rule __init__ ( raw-properties * )
+ {
+ self.raw = $(raw-properties) ;
+
+ for local p in $(raw-properties)
+ {
+ if ! $(p:G)
+ {
+ errors.error "Invalid property: '$(p)'" ;
+ }
+ }
+ }
+
+ # Returns Jam list of stored properties.
+ #
+ rule raw ( )
+ {
+ return $(self.raw) ;
+ }
+
+ rule str ( )
+ {
+ return "[" $(self.raw) "]" ;
+ }
+
+ # Returns properties that are neither incidental nor free.
+ #
+ rule base ( )
+ {
+ if ! $(self.base-initialized)
+ {
+ init-base ;
+ }
+ return $(self.base) ;
+ }
+
+ # Returns free properties which are not incidental.
+ #
+ rule free ( )
+ {
+ if ! $(self.base-initialized)
+ {
+ init-base ;
+ }
+ return $(self.free) ;
+ }
+
+ # Returns dependency properties.
+ #
+ rule dependency ( )
+ {
+ if ! $(self.dependency-initialized)
+ {
+ init-dependency ;
+ }
+ return $(self.dependency) ;
+ }
+
+ rule non-dependency ( )
+ {
+ if ! $(self.dependency-initialized)
+ {
+ init-dependency ;
+ }
+ return $(self.non-dependency) ;
+ }
+
+ rule conditional ( )
+ {
+ if ! $(self.conditional-initialized)
+ {
+ init-conditional ;
+ }
+ return $(self.conditional) ;
+ }
+
+ rule non-conditional ( )
+ {
+ if ! $(self.conditional-initialized)
+ {
+ init-conditional ;
+ }
+ return $(self.non-conditional) ;
+ }
+
+ # Returns incidental properties.
+ #
+ rule incidental ( )
+ {
+ if ! $(self.base-initialized)
+ {
+ init-base ;
+ }
+ return $(self.incidental) ;
+ }
+
+ rule refine ( ps )
+ {
+ if ! $(self.refined.$(ps))
+ {
+ local r = [ property.refine $(self.raw) : [ $(ps).raw ] ] ;
+ if $(r[1]) != "@error"
+ {
+ self.refined.$(ps) = [ property-set.create $(r) ] ;
+ }
+ else
+ {
+ self.refined.$(ps) = $(r) ;
+ }
+ }
+ return $(self.refined.$(ps)) ;
+ }
+
+ rule expand ( )
+ {
+ if ! $(self.expanded)
+ {
+ self.expanded = [ property-set.create [ feature.expand $(self.raw) ]
+ ] ;
+ }
+ return $(self.expanded) ;
+ }
+
+ rule expand-composites ( )
+ {
+ if ! $(self.composites)
+ {
+ self.composites = [ property-set.create
+ [ feature.expand-composites $(self.raw) ] ] ;
+ }
+ return $(self.composites) ;
+ }
+
+ rule evaluate-conditionals ( context ? )
+ {
+ context ?= $(__name__) ;
+ if ! $(self.evaluated.$(context))
+ {
+ self.evaluated.$(context) = [ property-set.create
+ [ property.evaluate-conditionals-in-context $(self.raw) : [
+ $(context).raw ] ] ] ;
+ }
+ return $(self.evaluated.$(context)) ;
+ }
+
+ rule propagated ( )
+ {
+ if ! $(self.propagated-ps)
+ {
+ local result ;
+ for local p in $(self.raw)
+ {
+ if propagated in [ feature.attributes $(p:G) ]
+ {
+ result += $(p) ;
+ }
+ }
+ self.propagated-ps = [ property-set.create $(result) ] ;
+ }
+ return $(self.propagated-ps) ;
+ }
+
+ rule add-defaults ( )
+ {
+ if ! $(self.defaults)
+ {
+ self.defaults = [ property-set.create
+ [ feature.add-defaults $(self.raw) ] ] ;
+ }
+ return $(self.defaults) ;
+ }
+
+ rule as-path ( )
+ {
+ if ! $(self.as-path)
+ {
+ self.as-path = [ property.as-path [ base ] ] ;
+ }
+ return $(self.as-path) ;
+ }
+
+ # Computes the path to be used for a target with the given properties.
+ # Returns a list of
+ # - the computed path
+ # - if the path is relative to the build directory, a value of 'true'.
+ #
+ rule target-path ( )
+ {
+ if ! $(self.target-path)
+ {
+ # The <location> feature can be used to explicitly change the
+ # location of generated targets.
+ local l = [ get <location> ] ;
+ if $(l)
+ {
+ self.target-path = $(l) ;
+ }
+ else
+ {
+ local p = [ property-set.hash-maybe [ as-path ] ] ;
+
+ # A real ugly hack. Boost regression test system requires
+ # specific target paths, and it seems that changing it to handle
+ # other directory layout is really hard. For that reason, we
+ # teach V2 to do the things regression system requires. The
+ # value of '<location-prefix>' is prepended to the path.
+ local prefix = [ get <location-prefix> ] ;
+ if $(prefix)
+ {
+ self.target-path = [ path.join $(prefix) $(p) ] ;
+ }
+ else
+ {
+ self.target-path = $(p) ;
+ }
+ if ! $(self.target-path)
+ {
+ self.target-path = . ;
+ }
+ # The path is relative to build dir.
+ self.target-path += true ;
+ }
+ }
+ return $(self.target-path) ;
+ }
+
+ rule add ( ps )
+ {
+ if ! $(self.added.$(ps))
+ {
+ self.added.$(ps) = [ property-set.create $(self.raw) [ $(ps).raw ] ]
+ ;
+ }
+ return $(self.added.$(ps)) ;
+ }
+
+ rule add-raw ( properties * )
+ {
+ return [ add [ property-set.create $(properties) ] ] ;
+ }
+
+ # Returns all values of 'feature'.
+ #
+ rule get ( feature )
+ {
+ if ! $(self.map-built)
+ {
+ # For each feature, create a member var and assign all values to it.
+ # Since all regular member vars start with 'self', there will be no
+ # conflicts between names.
+ self.map-built = true ;
+ for local v in $(self.raw)
+ {
+ $(v:G) += $(v:G=) ;
+ }
+ }
+ return $($(feature)) ;
+ }
+
+ # Returns true if the property-set contains all the
+ # specified properties.
+ #
+ rule contains-raw ( properties * )
+ {
+ if $(properties) in $(self.raw)
+ {
+ return true ;
+ }
+ }
+
+ # Returns true if the property-set has values for
+ # all the specified features
+ #
+ rule contains-features ( features * )
+ {
+ if $(features) in $(self.raw:G)
+ {
+ return true ;
+ }
+ }
+
+ # private
+
+ rule init-base ( )
+ {
+ for local p in $(self.raw)
+ {
+ local att = [ feature.attributes $(p:G) ] ;
+ # A feature can be both incidental and free, in which case we add it
+ # to incidental.
+ if incidental in $(att)
+ {
+ self.incidental += $(p) ;
+ }
+ else if free in $(att)
+ {
+ self.free += $(p) ;
+ }
+ else
+ {
+ self.base += $(p) ;
+ }
+ }
+ self.base-initialized = true ;
+ }
+
+ rule init-dependency ( )
+ {
+ for local p in $(self.raw)
+ {
+ if dependency in [ feature.attributes $(p:G) ]
+ {
+ self.dependency += $(p) ;
+ }
+ else
+ {
+ self.non-dependency += $(p) ;
+ }
+ }
+ self.dependency-initialized = true ;
+ }
+
+ rule init-conditional ( )
+ {
+ for local p in $(self.raw)
+ {
+ # TODO: Note that non-conditional properties may contain colon (':')
+ # characters as well, e.g. free or indirect properties. Indirect
+ # properties for example contain a full Jamfile path in their value
+ # which on Windows file systems contains ':' as the drive separator.
+ if [ MATCH (:) : $(p:G=) ]
+ {
+ self.conditional += $(p) ;
+ }
+ else
+ {
+ self.non-conditional += $(p) ;
+ }
+ }
+ self.conditional-initialized = true ;
+ }
+}
+
+
+# Creates a new 'property-set' instance for the given raw properties or returns
+# an already existing ones.
+#
+rule create ( raw-properties * )
+{
+ raw-properties = [ sequence.unique
+ [ sequence.insertion-sort $(raw-properties) ] ] ;
+
+ local key = $(raw-properties:J=-:E=) ;
+
+ if ! $(.ps.$(key))
+ {
+ .ps.$(key) = [ new property-set $(raw-properties) ] ;
+ }
+ return $(.ps.$(key)) ;
+}
+NATIVE_RULE property-set : create ;
+
+if [ HAS_NATIVE_RULE class@property-set : get : 1 ]
+{
+ NATIVE_RULE class@property-set : get ;
+}
+
+if [ HAS_NATIVE_RULE class@property-set : contains-features : 1 ]
+{
+ NATIVE_RULE class@property-set : contains-features ;
+}
+
+# Creates a new 'property-set' instance after checking that all properties are
+# valid and converting implicit properties into gristed form.
+#
+rule create-with-validation ( raw-properties * )
+{
+ property.validate $(raw-properties) ;
+ return [ create [ property.make $(raw-properties) ] ] ;
+}
+
+
+# Creates a property-set from the input given by the user, in the context of
+# 'jamfile-module' at 'location'.
+#
+rule create-from-user-input ( raw-properties * : jamfile-module location )
+{
+ local project-id = [ project.attribute $(jamfile-module) id ] ;
+ project-id ?= [ path.root $(location) [ path.pwd ] ] ;
+ return [ property-set.create [ property.translate $(raw-properties)
+ : $(project-id) : $(location) : $(jamfile-module) ] ] ;
+}
+
+
+# Refines requirements with requirements provided by the user. Specially handles
+# "-<property>value" syntax in specification to remove given requirements.
+# - parent-requirements -- property-set object with requirements to refine.
+# - specification -- string list of requirements provided by the user.
+# - project-module -- module to which context indirect features will be
+# bound.
+# - location -- path to which path features are relative.
+#
+rule refine-from-user-input ( parent-requirements : specification * :
+ project-module : location )
+{
+ if ! $(specification)
+ {
+ return $(parent-requirements) ;
+ }
+ else
+ {
+ local add-requirements ;
+ local remove-requirements ;
+
+ for local r in $(specification)
+ {
+ local m = [ MATCH "^-(.*)" : $(r) ] ;
+ if $(m)
+ {
+ remove-requirements += $(m) ;
+ }
+ else
+ {
+ add-requirements += $(r) ;
+ }
+ }
+
+ if $(remove-requirements)
+ {
+ # Need to create a property set, so that path features and indirect
+ # features are translated just like they are in project
+ # requirements.
+ local ps = [ property-set.create-from-user-input
+ $(remove-requirements) : $(project-module) $(location) ] ;
+
+ parent-requirements = [ property-set.create
+ [ set.difference [ $(parent-requirements).raw ]
+ : [ $(ps).raw ] ] ] ;
+ specification = $(add-requirements) ;
+ }
+
+ local requirements = [ property-set.create-from-user-input
+ $(specification) : $(project-module) $(location) ] ;
+
+ return [ $(parent-requirements).refine $(requirements) ] ;
+ }
+}
+
+
+# Returns a property-set with an empty set of properties.
+#
+rule empty ( )
+{
+ if ! $(.empty)
+ {
+ .empty = [ create ] ;
+ }
+ return $(.empty) ;
+}
+
+
+if [ option.get hash : : yes ] = yes
+{
+ rule hash-maybe ( path ? )
+ {
+ path ?= "" ;
+ return [ MD5 $(path) ] ;
+ }
+}
+else
+{
+ rule hash-maybe ( path ? )
+ {
+ return $(path) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/build/property.jam b/src/kenlm/jam-files/boost-build/build/property.jam
new file mode 100644
index 0000000..dc9dbd8
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/property.jam
@@ -0,0 +1,905 @@
+# Copyright 2001, 2002, 2003 Dave Abrahams
+# Copyright 2006 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import feature ;
+import indirect ;
+import path ;
+import regex ;
+import string ;
+import sequence ;
+import set ;
+import utility ;
+
+
+# Refines 'properties' by overriding any non-free and non-conditional properties
+# for which a different value is specified in 'requirements'. Returns the
+# resulting list of properties.
+#
+rule refine ( properties * : requirements * )
+{
+ local result ;
+ local unset ;
+
+ # Collect all non-free features in requirements
+ for local r in $(requirements)
+ {
+ # Do not consider conditional requirements.
+ if ! [ MATCH (:) : $(r:G=) ] && ! free in [ feature.attributes $(r:G) ]
+ {
+ unset += $(r:G) ;
+ }
+ }
+
+ # Remove properties that are overridden by requirements
+ for local p in $(properties)
+ {
+ if [ MATCH (:) : $(p:G=) ] || ! $(p:G) in $(unset)
+ {
+ result += $(p) ;
+ }
+ }
+
+ return [ sequence.unique $(result) $(requirements) ] ;
+}
+
+
+# Removes all conditional properties whose conditions are not met. For those
+# with met conditions, removes the condition. Properties in conditions are
+# looked up in 'context'.
+#
+rule evaluate-conditionals-in-context ( properties * : context * )
+{
+ local base ;
+ local conditionals ;
+ for local p in $(properties)
+ {
+ if [ MATCH (:<) : $(p) ]
+ {
+ conditionals += $(p) ;
+ }
+ else
+ {
+ base += $(p) ;
+ }
+ }
+
+ local result = $(base) ;
+ for local p in $(conditionals)
+ {
+ # Separate condition and property.
+ local s = [ MATCH ^(.*):(<.*) : $(p) ] ;
+ # Split condition into individual properties.
+ local condition = [ regex.split $(s[1]) "," ] ;
+ # Evaluate condition.
+ if ! [ MATCH ^(!).* : $(condition:G=) ]
+ {
+ # Only positive checks
+ if $(condition) in $(context)
+ {
+ result += $(s[2]) ;
+ }
+ }
+ else
+ {
+ # Have negative checks
+ local fail ;
+ while $(condition)
+ {
+ local c = $(condition[1]) ;
+ local m = [ MATCH ^!(.*) : $(c) ] ;
+ if $(m)
+ {
+ local p = $(m:G=$(c:G)) ;
+ if $(p) in $(context)
+ {
+ fail = true ;
+ c = ;
+ }
+ }
+ else
+ {
+ if ! $(c) in $(context)
+ {
+ fail = true ;
+ c = ;
+ }
+ }
+ condition = $(condition[2-]) ;
+ }
+ if ! $(fail)
+ {
+ result += $(s[2]) ;
+ }
+ }
+ }
+ return $(result) ;
+}
+
+
+rule expand-subfeatures-in-conditions ( properties * )
+{
+ local result ;
+ for local p in $(properties)
+ {
+ local s = [ MATCH ^(.*):(<.*) : $(p) ] ;
+ if ! $(s)
+ {
+ result += $(p) ;
+ }
+ else
+ {
+ local condition = $(s[1]) ;
+ local value = $(s[2]) ;
+ # Condition might include several elements.
+ condition = [ regex.split $(condition) "," ] ;
+ local e ;
+ for local c in $(condition)
+ {
+ # It is common for a condition to include a toolset or
+ # subfeatures that have not been defined. In that case we want
+ # the condition to simply 'never be satisfied' and validation
+ # would only produce a spurious error so we prevent it by
+ # passing 'true' as the second parameter.
+ e += [ feature.expand-subfeatures $(c) : true ] ;
+ }
+ if $(e) = $(condition)
+ {
+ # (todo)
+ # This is just an optimization and possibly a premature one at
+ # that.
+ # (todo) (12.07.2008.) (Jurko)
+ result += $(p) ;
+ }
+ else
+ {
+ result += $(e:J=,):$(value) ;
+ }
+ }
+ }
+ return $(result) ;
+}
+
+
+# Helper for as-path, below. Orders properties with the implicit ones first, and
+# within the two sections in alphabetical order of feature name.
+#
+local rule path-order ( x y )
+{
+ if $(y:G) && ! $(x:G)
+ {
+ return true ;
+ }
+ else if $(x:G) && ! $(y:G)
+ {
+ return ;
+ }
+ else
+ {
+ if ! $(x:G)
+ {
+ x = [ feature.expand-subfeatures $(x) ] ;
+ y = [ feature.expand-subfeatures $(y) ] ;
+ }
+
+ if $(x[1]) < $(y[1])
+ {
+ return true ;
+ }
+ }
+}
+
+
+local rule abbreviate-dashed ( string )
+{
+ local r ;
+ for local part in [ regex.split $(string) - ]
+ {
+ r += [ string.abbreviate $(part) ] ;
+ }
+ return $(r:J=-) ;
+}
+
+
+local rule identity ( string )
+{
+ return $(string) ;
+}
+
+
+if --abbreviate-paths in [ modules.peek : ARGV ]
+{
+ .abbrev = abbreviate-dashed ;
+}
+else
+{
+ .abbrev = identity ;
+}
+
+
+# Returns a path representing the given expanded property set.
+#
+rule as-path ( properties * )
+{
+ local entry = .result.$(properties:J=-) ;
+
+ if ! $($(entry))
+ {
+ # Trim redundancy.
+ properties = [ feature.minimize $(properties) ] ;
+
+ # Sort according to path-order.
+ properties = [ sequence.insertion-sort $(properties) : path-order ] ;
+
+ local components ;
+ for local p in $(properties)
+ {
+ if $(p:G)
+ {
+ local f = [ utility.ungrist $(p:G) ] ;
+ p = $(f)-$(p:G=) ;
+ }
+ components += [ $(.abbrev) $(p) ] ;
+ }
+
+ $(entry) = $(components:J=/) ;
+ }
+
+ return $($(entry)) ;
+}
+
+
+# Exit with error if property is not valid.
+#
+local rule validate1 ( property )
+{
+ local msg ;
+ if $(property:G)
+ {
+ local feature = $(property:G) ;
+ local value = $(property:G=) ;
+
+ if ! [ feature.valid $(feature) ]
+ {
+ # Ungrist for better error messages.
+ feature = [ utility.ungrist $(property:G) ] ;
+ msg = "unknown feature '$(feature)'" ;
+ }
+ else if $(value) && ! free in [ feature.attributes $(feature) ]
+ {
+ feature.validate-value-string $(feature) $(value) ;
+ }
+ else if ! ( $(value) || ( optional in [ feature.attributes $(feature) ] ) )
+ {
+ # Ungrist for better error messages.
+ feature = [ utility.ungrist $(property:G) ] ;
+ msg = "No value specified for feature '$(feature)'" ;
+ }
+ }
+ else
+ {
+ local feature = [ feature.implied-feature $(property) ] ;
+ feature.validate-value-string $(feature) $(property) ;
+ }
+ if $(msg)
+ {
+ import errors ;
+ errors.error "Invalid property "'$(property:J=" ")'": "$(msg:J=" "). ;
+ }
+}
+
+
+rule validate ( properties * )
+{
+ for local p in $(properties)
+ {
+ validate1 $(p) ;
+ }
+}
+
+
+rule validate-property-sets ( property-sets * )
+{
+ for local s in $(property-sets)
+ {
+ validate [ feature.split $(s) ] ;
+ }
+}
+
+
+# Expands any implicit property values in the given property 'specification' so
+# they explicitly state their feature.
+#
+rule make ( specification * )
+{
+ local result ;
+ for local e in $(specification)
+ {
+ if $(e:G)
+ {
+ result += $(e) ;
+ }
+ else if [ feature.is-implicit-value $(e) ]
+ {
+ local feature = [ feature.implied-feature $(e) ] ;
+ result += $(feature)$(e) ;
+ }
+ else
+ {
+ import errors ;
+ errors.error "'$(e)' is not a valid property specification" ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns a property set containing all the elements in 'properties' that do not
+# have their attributes listed in 'attributes'.
+#
+rule remove ( attributes + : properties * )
+{
+ local result ;
+ for local e in $(properties)
+ {
+ if ! [ set.intersection $(attributes) : [ feature.attributes $(e:G) ] ]
+ {
+ result += $(e) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns a property set containing all the elements in 'properties' that have
+# their attributes listed in 'attributes'.
+#
+rule take ( attributes + : properties * )
+{
+ local result ;
+ for local e in $(properties)
+ {
+ if [ set.intersection $(attributes) : [ feature.attributes $(e:G) ] ]
+ {
+ result += $(e) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Selects properties corresponding to any of the given features.
+#
+rule select ( features * : properties * )
+{
+ local result ;
+
+ # Add any missing angle brackets.
+ local empty = "" ;
+ features = $(empty:G=$(features)) ;
+
+ for local p in $(properties)
+ {
+ if $(p:G) in $(features)
+ {
+ result += $(p) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns a modified version of properties with all values of the given feature
+# replaced by the given value. If 'value' is empty the feature will be removed.
+#
+rule change ( properties * : feature value ? )
+{
+ local result ;
+ for local p in $(properties)
+ {
+ if $(p:G) = $(feature)
+ {
+ result += $(value:G=$(feature)) ;
+ }
+ else
+ {
+ result += $(p) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# If 'property' is a conditional property, returns the condition and the
+# property. E.g. <variant>debug,<toolset>gcc:<inlining>full will become
+# <variant>debug,<toolset>gcc <inlining>full. Otherwise, returns an empty
+# string.
+#
+rule split-conditional ( property )
+{
+ return [ MATCH "^(.+):(<.+)" : $(property) ] ;
+}
+
+
+rule translate-path-value ( value : path )
+{
+ local t ;
+ for local v in [ regex.split $(value) "&&" ]
+ {
+ t += [ path.root [ path.make $(v) ] $(path) ] ;
+ }
+ return $(t:TJ="&&") ;
+}
+
+rule translate-dependency-value ( value : project-id : project-location )
+{
+ local split-target = [ regex.match ^(.*)//(.*) : $(value) ] ;
+ if $(split-target)
+ {
+ local rooted = [ path.root [ path.make $(split-target[1]) ]
+ [ path.root $(project-location) [ path.pwd ] ] ] ;
+ return $(rooted)//$(split-target[2]) ;
+ }
+ else if [ path.is-rooted $(value) ]
+ {
+ return $(value) ;
+ }
+ else
+ {
+ return $(project-id)//$(value) ;
+ }
+}
+
+rule translate-indirect-value ( rulename : context-module )
+{
+ if [ MATCH "^([^%]*)%([^%]+)$" : $(rulename) ]
+ {
+ # Rule is already in the 'indirect-rule' format.
+ return @$(rulename) ;
+ }
+ else
+ {
+ local v ;
+ if ! [ MATCH "([.])" : $(rulename) ]
+ {
+ # This is an unqualified rule name. The user might want to
+ # set flags on this rule name and toolset.flag
+ # auto-qualifies it. Need to do the same here so flag
+ # setting works. We can arrange for toolset.flag to *not*
+ # auto-qualify the argument but then two rules defined in
+ # two Jamfiles would conflict.
+ rulename = $(context-module).$(rulename) ;
+ }
+ v = [ indirect.make $(rulename) : $(context-module) ] ;
+ return @$(v) ;
+ }
+
+}
+
+# Equivalent to a calling all of:
+# translate-path
+# translate-indirect
+# translate-dependency
+# expand-subfeatures-in-conditions
+# make
+#
+rule translate ( properties * : project-id : project-location : context-module )
+{
+ local result ;
+ for local p in $(properties)
+ {
+ local split = [ split-conditional $(p) ] ;
+ local condition property ;
+
+ if $(split)
+ {
+ condition = $(split[1]) ;
+ property = $(split[2]) ;
+
+ local e ;
+ for local c in [ regex.split $(condition) "," ]
+ {
+ e += [ feature.expand-subfeatures $(c) : true ] ;
+ }
+
+ condition = $(e:J=,): ;
+ }
+ else
+ {
+ property = $(p) ;
+ }
+
+ local feature = $(property:G) ;
+ if ! $(feature)
+ {
+ if [ feature.is-implicit-value $(property) ]
+ {
+ feature = [ feature.implied-feature $(property) ] ;
+ result += $(condition:E=)$(feature)$(property) ;
+ }
+ else
+ {
+ import errors ;
+ errors.error "'$(e)' is not a valid property specification" ;
+ }
+ } else {
+ local attributes = [ feature.attributes $(feature) ] ;
+ local value ;
+ # Only free features should be translated
+ if free in $(attributes)
+ {
+ if path in $(attributes)
+ {
+ value = [ translate-path-value $(property:G=) : $(project-location) ] ;
+ result += $(condition:E=)$(feature)$(value) ;
+ }
+ else if dependency in $(attributes)
+ {
+ value = [ translate-dependency-value $(property:G=) : $(project-id) : $(project-location) ] ;
+ result += $(condition:E=)$(feature)$(value) ;
+ }
+ else
+ {
+ local m = [ MATCH ^@(.+) : $(property:G=) ] ;
+ if $(m)
+ {
+ value = [ translate-indirect-value $(m) : $(context-module) ] ;
+ result += $(condition:E=)$(feature)$(value) ;
+ }
+ else
+ {
+ result += $(condition:E=)$(property) ;
+ }
+ }
+ }
+ else
+ {
+ result += $(condition:E=)$(property) ;
+ }
+ }
+ }
+ return $(result) ;
+}
+
+# Interpret all path properties in 'properties' as relative to 'path'. The
+# property values are assumed to be in system-specific form, and will be
+# translated into normalized form.
+#
+rule translate-paths ( properties * : path )
+{
+ local result ;
+ for local p in $(properties)
+ {
+ local split = [ split-conditional $(p) ] ;
+ local condition = "" ;
+ if $(split)
+ {
+ condition = $(split[1]): ;
+ p = $(split[2]) ;
+ }
+
+ if path in [ feature.attributes $(p:G) ]
+ {
+ local values = [ regex.split $(p:TG=) "&&" ] ;
+ local t ;
+ for local v in $(values)
+ {
+ t += [ path.root [ path.make $(v) ] $(path) ] ;
+ }
+ t = $(t:J="&&") ;
+ result += $(condition)$(t:TG=$(p:G)) ;
+ }
+ else
+ {
+ result += $(condition)$(p) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Assumes that all feature values that start with '@' are names of rules, used
+# in 'context-module'. Such rules can be either local to the module or global.
+# Converts such values into 'indirect-rule' format (see indirect.jam), so they
+# can be called from other modules. Does nothing for such values that are
+# already in the 'indirect-rule' format.
+#
+rule translate-indirect ( specification * : context-module )
+{
+ local result ;
+ for local p in $(specification)
+ {
+ local m = [ MATCH ^@(.+) : $(p:G=) ] ;
+ if $(m)
+ {
+ local v ;
+ if [ MATCH "^([^%]*)%([^%]+)$" : $(m) ]
+ {
+ # Rule is already in the 'indirect-rule' format.
+ v = $(m) ;
+ }
+ else
+ {
+ if ! [ MATCH "([.])" : $(m) ]
+ {
+ # This is an unqualified rule name. The user might want to
+ # set flags on this rule name and toolset.flag
+ # auto-qualifies it. Need to do the same here so flag
+ # setting works. We can arrange for toolset.flag to *not*
+ # auto-qualify the argument but then two rules defined in
+ # two Jamfiles would conflict.
+ m = $(context-module).$(m) ;
+ }
+ v = [ indirect.make $(m) : $(context-module) ] ;
+ }
+
+ v = @$(v) ;
+ result += $(v:G=$(p:G)) ;
+ }
+ else
+ {
+ result += $(p) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Binds all dependency properties in a list relative to the given project.
+# Targets with absolute paths will be left unchanged and targets which have a
+# project specified will have the path to the project interpreted relative to
+# the specified location.
+#
+rule translate-dependencies ( specification * : project-id : location )
+{
+ local result ;
+ for local p in $(specification)
+ {
+ local split = [ split-conditional $(p) ] ;
+ local condition = "" ;
+ if $(split)
+ {
+ condition = $(split[1]): ;
+ p = $(split[2]) ;
+ }
+ if dependency in [ feature.attributes $(p:G) ]
+ {
+ local split-target = [ regex.match ^(.*)//(.*) : $(p:G=) ] ;
+ if $(split-target)
+ {
+ local rooted = [ path.root [ path.make $(split-target[1]) ]
+ [ path.root $(location) [ path.pwd ] ] ] ;
+ result += $(condition)$(p:G)$(rooted)//$(split-target[2]) ;
+ }
+ else if [ path.is-rooted $(p:G=) ]
+ {
+ result += $(condition)$(p) ;
+ }
+ else
+ {
+ result += $(condition)$(p:G)$(project-id)//$(p:G=) ;
+ }
+ }
+ else
+ {
+ result += $(condition)$(p) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Class maintaining a property set -> string mapping.
+#
+class property-map
+{
+ import numbers ;
+ import sequence ;
+
+ rule __init__ ( )
+ {
+ self.next-flag = 1 ;
+ }
+
+ # Associate 'value' with 'properties'.
+ #
+ rule insert ( properties * : value )
+ {
+ self.all-flags += self.$(self.next-flag) ;
+ self.$(self.next-flag) = $(value) $(properties) ;
+
+ self.next-flag = [ numbers.increment $(self.next-flag) ] ;
+ }
+
+ # Returns the value associated with 'properties' or any subset of it. If
+ # more than one subset has a value assigned to it, returns the value for the
+ # longest subset, if it is unique.
+ #
+ rule find ( property-set )
+ {
+ # First find all matches.
+ local matches ;
+ local match-ranks ;
+ for local i in $(self.all-flags)
+ {
+ local list = $($(i)) ;
+ if [ $(property-set).contains-raw $(list[2-]) ]
+ {
+ matches += $(list[1]) ;
+ match-ranks += [ sequence.length $(list) ] ;
+ }
+ }
+ local best = [ sequence.select-highest-ranked $(matches)
+ : $(match-ranks) ] ;
+ if $(best[2])
+ {
+ import errors : error : errors.error ;
+ errors.error "Ambiguous key $(properties:J= :E=)" ;
+ }
+ return $(best) ;
+ }
+
+ # Returns the value associated with 'properties'. If 'value' parameter is
+ # given, replaces the found value.
+ #
+ rule find-replace ( properties * : value ? )
+ {
+ # First find all matches.
+ local matches ;
+ local match-ranks ;
+ for local i in $(self.all-flags)
+ {
+ if $($(i)[2-]) in $(properties)
+ {
+ matches += $(i) ;
+ match-ranks += [ sequence.length $($(i)) ] ;
+ }
+ }
+ local best = [ sequence.select-highest-ranked $(matches)
+ : $(match-ranks) ] ;
+ if $(best[2])
+ {
+ import errors : error : errors.error ;
+ errors.error "Ambiguous key $(properties:J= :E=)" ;
+ }
+ local original = $($(best)[1]) ;
+ if $(value)
+ {
+ $(best) = $(value) $($(best)[2-]) ;
+ }
+ return $(original) ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+ import "class" : new ;
+ import errors : try catch ;
+ import feature ;
+
+ # Local rules must be explicitly re-imported.
+ import property : path-order abbreviate-dashed ;
+
+ feature.prepare-test property-test-temp ;
+
+ feature.feature toolset : gcc : implicit symmetric ;
+ feature.subfeature toolset gcc : version : 2.95.2 2.95.3 2.95.4 3.0 3.0.1
+ 3.0.2 : optional ;
+ feature.feature define : : free ;
+ feature.feature runtime-link : dynamic static : symmetric link-incompatible ;
+ feature.feature optimization : on off ;
+ feature.feature variant : debug release : implicit composite symmetric ;
+ feature.feature rtti : on off : link-incompatible ;
+
+ feature.compose <variant>debug : <define>_DEBUG <optimization>off ;
+ feature.compose <variant>release : <define>NDEBUG <optimization>on ;
+
+ validate <toolset>gcc <toolset>gcc-3.0.1 : $(test-space) ;
+
+ assert.true path-order $(test-space) debug <define>foo ;
+ assert.false path-order $(test-space) <define>foo debug ;
+ assert.true path-order $(test-space) gcc debug ;
+ assert.false path-order $(test-space) debug gcc ;
+ assert.true path-order $(test-space) <optimization>on <rtti>on ;
+ assert.false path-order $(test-space) <rtti>on <optimization>on ;
+
+ assert.result-set-equal <toolset>gcc <rtti>off <define>FOO
+ : refine <toolset>gcc <rtti>off
+ : <define>FOO
+ : $(test-space) ;
+
+ assert.result-set-equal <toolset>gcc <optimization>on
+ : refine <toolset>gcc <optimization>off
+ : <optimization>on
+ : $(test-space) ;
+
+ assert.result-set-equal <toolset>gcc <rtti>off
+ : refine <toolset>gcc : <rtti>off : $(test-space) ;
+
+ assert.result-set-equal <toolset>gcc <rtti>off <rtti>off:<define>FOO
+ : refine <toolset>gcc : <rtti>off <rtti>off:<define>FOO
+ : $(test-space) ;
+
+ assert.result-set-equal <toolset>gcc:<define>foo <toolset>gcc:<define>bar
+ : refine <toolset>gcc:<define>foo : <toolset>gcc:<define>bar
+ : $(test-space) ;
+
+ assert.result <define>MY_RELEASE
+ : evaluate-conditionals-in-context
+ <variant>release,<rtti>off:<define>MY_RELEASE
+ : <toolset>gcc <variant>release <rtti>off ;
+
+ assert.result debug
+ : as-path <optimization>off <variant>debug
+ : $(test-space) ;
+
+ assert.result gcc/debug/rtti-off
+ : as-path <toolset>gcc <optimization>off <rtti>off <variant>debug
+ : $(test-space) ;
+
+ assert.result optmz-off : abbreviate-dashed optimization-off ;
+ assert.result rntm-lnk-sttc : abbreviate-dashed runtime-link-static ;
+
+ try ;
+ validate <feature>value : $(test-space) ;
+ catch "Invalid property '<feature>value': unknown feature 'feature'." ;
+
+ try ;
+ validate <rtti>default : $(test-space) ;
+ catch \"default\" is not a known value of feature <rtti> ;
+
+ validate <define>WHATEVER : $(test-space) ;
+
+ try ;
+ validate <rtti> : $(test-space) ;
+ catch "Invalid property '<rtti>': No value specified for feature 'rtti'." ;
+
+ try ;
+ validate value : $(test-space) ;
+ catch \"value\" is not an implicit feature value ;
+
+ assert.result-set-equal <rtti>on
+ : remove free implicit : <toolset>gcc <define>foo <rtti>on : $(test-space) ;
+
+ assert.result-set-equal <include>a
+ : select include : <include>a <toolset>gcc ;
+
+ assert.result-set-equal <include>a
+ : select include bar : <include>a <toolset>gcc ;
+
+ assert.result-set-equal <include>a <toolset>gcc
+ : select include <bar> <toolset> : <include>a <toolset>gcc ;
+
+ assert.result-set-equal <toolset>kylix <include>a
+ : change <toolset>gcc <include>a : <toolset> kylix ;
+
+ pm = [ new property-map ] ;
+ $(pm).insert <toolset>gcc : o ;
+ $(pm).insert <toolset>gcc <os>NT : obj ;
+ $(pm).insert <toolset>gcc <os>CYGWIN : obj ;
+
+ assert.equal o : [ $(pm).find-replace <toolset>gcc ] ;
+
+ assert.equal obj : [ $(pm).find-replace <toolset>gcc <os>NT ] ;
+
+ try ;
+ $(pm).find-replace <toolset>gcc <os>NT <os>CYGWIN ;
+ catch "Ambiguous key <toolset>gcc <os>NT <os>CYGWIN" ;
+
+ # Test ordinary properties.
+ assert.result : split-conditional <toolset>gcc ;
+
+ # Test properties with ":".
+ assert.result : split-conditional <define>FOO=A::B ;
+
+ # Test conditional feature.
+ assert.result-set-equal <toolset>gcc,<toolset-gcc:version>3.0 <define>FOO
+ : split-conditional <toolset>gcc,<toolset-gcc:version>3.0:<define>FOO ;
+
+ feature.finish-test property-test-temp ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/readme.txt b/src/kenlm/jam-files/boost-build/build/readme.txt
new file mode 100644
index 0000000..b15055b
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/readme.txt
@@ -0,0 +1,11 @@
+Copyright 2001, 2002 Dave Abrahams
+Copyright 2002 Vladimir Prus
+Distributed under the Boost Software License, Version 1.0.
+(See accompanying file LICENSE_1_0.txt or copy at
+http://www.boost.org/LICENSE_1_0.txt)
+
+Development code for new build system. To run unit tests for jam code, execute:
+
+ bjam --debug --build-system=test
+
+Comprehensive tests require Python. See ../test/readme.txt
diff --git a/src/kenlm/jam-files/boost-build/build/scanner.jam b/src/kenlm/jam-files/boost-build/build/scanner.jam
new file mode 100644
index 0000000..ed55070
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/scanner.jam
@@ -0,0 +1,163 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2002, 2003, 2004, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Implements scanners: objects computing implicit dependencies for files, such
+# as includes in C++.
+#
+# A scanner has a regular expression used to find the dependencies, some data
+# needed to interpret those dependencies (e.g., include paths), and code which
+# establishing needed relationships between actual jam targets.
+#
+# Scanner objects are created by actions when they try to actualize virtual
+# targets, passed to the virtual-target.actualize() method and are then
+# associated with actual targets. It is possible to use several scanners for a
+# single virtual-target. For example, a single source file might be compiled
+# twice - each time using a different include path. In this case, two separate
+# actual targets will be created, each having a scanner of its own.
+#
+# Typically, scanners are created from target type and the action's properties,
+# using the rule 'get' in this module. Directly creating scanners is not
+# recommended, as it might create multiple equvivalent but different instances,
+# and lead to unnecessary actual target duplication. However, actions can also
+# create scanners in a special way, instead of relying on just the target type.
+
+import "class" : new ;
+import property ;
+import property-set ;
+import virtual-target ;
+
+# Base scanner class.
+#
+class scanner
+{
+ rule __init__ ( )
+ {
+ }
+
+ # Returns a pattern to use for scanning.
+ #
+ rule pattern ( )
+ {
+ import errors : error : errors.error ;
+ errors.error "method must be overriden" ;
+ }
+
+ # Establish necessary relationship between targets, given an actual target
+ # beeing scanned and a list of pattern matches in that file.
+ #
+ rule process ( target : matches * )
+ {
+ import errors : error : errors.error ;
+ errors.error "method must be overriden" ;
+ }
+}
+
+
+# Registers a new generator class, specifying a set of properties relevant to
+# this scanner. Constructor for that class should have one parameter: a list of
+# properties.
+#
+rule register ( scanner-class : relevant-properties * )
+{
+ .registered += $(scanner-class) ;
+ .relevant-properties.$(scanner-class) = $(relevant-properties) ;
+}
+
+
+# Common scanner class, usable when there is only one kind of includes (unlike
+# C, where "" and <> includes have different search paths).
+#
+class common-scanner : scanner
+{
+ import scanner ;
+
+ rule __init__ ( includes * )
+ {
+ scanner.__init__ ;
+ self.includes = $(includes) ;
+ }
+
+ rule process ( target : matches * : binding )
+ {
+ local target_path = [ NORMALIZE_PATH $(binding:D) ] ;
+
+ NOCARE $(matches) ;
+ INCLUDES $(target) : $(matches) ;
+ SEARCH on $(matches) = $(target_path) $(self.includes:G=) ;
+ ISFILE $(matches) ;
+
+ scanner.propagate $(__name__) : $(matches) : $(target) ;
+ }
+}
+
+
+# Returns an instance of a previously registered scanner, with the specified
+# properties.
+#
+rule get ( scanner-class : property-set )
+{
+ if ! $(scanner-class) in $(.registered)
+ {
+ import errors ;
+ errors.error "attempt to get an unregisted scanner" ;
+ }
+
+ local r = $(.rv-cache.$(property-set)) ;
+ if ! $(r)
+ {
+ r = [ property-set.create
+ [ property.select $(.relevant-properties.$(scanner-class)) :
+ [ $(property-set).raw ] ] ] ;
+ .rv-cache.$(property-set) = $(r) ;
+ }
+
+ if ! $(scanner.$(scanner-class).$(r:J=-))
+ {
+ local s = [ new $(scanner-class) [ $(r).raw ] ] ;
+ scanner.$(scanner-class).$(r:J=-) = $(s) ;
+ }
+ return $(scanner.$(scanner-class).$(r:J=-)) ;
+}
+
+
+# Installs the specified scanner on the actual target 'target'.
+#
+rule install ( scanner : target )
+{
+ HDRSCAN on $(target) = [ $(scanner).pattern ] ;
+ SCANNER on $(target) = $(scanner) ;
+ HDRRULE on $(target) = scanner.hdrrule ;
+
+ # Scanner reflects differences in properties affecting binding of 'target',
+ # which will be known when processing includes for it, and give information
+ # on how to interpret different include types (e.g. quoted vs. those in
+ # angle brackets in C files).
+ HDRGRIST on $(target) = $(scanner) ;
+}
+
+
+# Propagate scanner settings from 'including-target' to 'targets'.
+#
+rule propagate ( scanner : targets * : including-target )
+{
+ HDRSCAN on $(targets) = [ on $(including-target) return $(HDRSCAN) ] ;
+ SCANNER on $(targets) = $(scanner) ;
+ HDRRULE on $(targets) = scanner.hdrrule ;
+ HDRGRIST on $(targets) = [ on $(including-target) return $(HDRGRIST) ] ;
+}
+
+
+rule hdrrule ( target : matches * : binding )
+{
+ local scanner = [ on $(target) return $(SCANNER) ] ;
+ $(scanner).process $(target) : $(matches) : $(binding) ;
+}
+
+
+# hdrrule must be available at global scope so it can be invoked by header
+# scanning.
+#
+IMPORT scanner : hdrrule : : scanner.hdrrule ;
diff --git a/src/kenlm/jam-files/boost-build/build/targets.jam b/src/kenlm/jam-files/boost-build/build/targets.jam
new file mode 100644
index 0000000..3bd39a3
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/targets.jam
@@ -0,0 +1,1694 @@
+# Copyright Vladimir Prus 2002.
+# Copyright Rene Rivera 2006.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Supports 'abstract' targets, which are targets explicitly defined in a
+# Jamfile.
+#
+# Abstract targets are represented by classes derived from 'abstract-target'
+# class. The first abstract target is 'project-target', which is created for
+# each Jamfile, and can be obtained by the 'target' rule in the Jamfile's module
+# (see project.jam).
+#
+# Project targets keep a list of 'main-target' instances. A main target is what
+# the user explicitly defines in a Jamfile. It is possible to have several
+# definitions for a main target, for example to have different lists of sources
+# for different platforms. So, main targets keep a list of alternatives.
+#
+# Each alternative is an instance of 'abstract-target'. When a main target
+# subvariant is defined by some rule, that rule will decide what class to use,
+# create an instance of that class and add it to the list of alternatives for
+# the main target.
+#
+# Rules supplied by the build system will use only targets derived from the
+# 'basic-target' class, which will provide some default behaviour. There will be
+# different classes derived from it such as 'make-target', created by the 'make'
+# rule, and 'typed-target', created by rules such as 'exe' and 'lib'.
+#
+# +--------------------------+
+# | abstract-target |
+# +==========================+
+# | name |
+# | project |
+# | |
+# | generate(properties) = 0 |
+# +-------------+------------+
+# |
+# ^
+# / \
+# +-+-+
+# |
+# |
+# +------------------+-----+-------------------------------+
+# | | |
+# | | |
+# +-----------+----------+ +------+------+ +-------+------+
+# | project-target | | main-target | | basic-target |
+# +======================+ 1 * +=============+ alternatives +==============+
+# | generate(properties) |o-----+ generate |<>------------->| generate |
+# | main-target | +-------------+ | construct = 0|
+# +----------------------+ +-------+------+
+# |
+# ^
+# / \
+# +-+-+
+# |
+# |
+# ...--+-----------------+-----------------+------------------+
+# | | | |
+# | | | |
+# ... ---+-----+ +-------+------+ +------+------+ +-------+------+
+# | | typed-target | | make-target | | stage-target |
+# . +==============+ +=============+ +==============+
+# . | construct | | construct | | construct |
+# +--------------+ +-------------+ +--------------+
+
+import assert ;
+import build-request ;
+import "class" : new ;
+import feature ;
+import indirect ;
+import path ;
+import property ;
+import property-set ;
+import sequence ;
+import set ;
+import toolset ;
+
+
+# Base class for all abstract targets.
+#
+class abstract-target
+{
+ import assert ;
+ import "class" ;
+ import errors ;
+ import project ;
+
+ rule __init__ ( name # Name of the target in Jamfile.
+ : project-target # The project target to which this one belongs.
+ )
+ {
+ # Note: it might seem that we don't need either name or project at all.
+ # However, there are places where we really need it. One example is
+ # error messages which should name problematic targets. Another is
+ # setting correct paths for sources and generated files.
+
+ self.name = $(name) ;
+ self.project = $(project-target) ;
+ self.location = [ errors.nearest-user-location ] ;
+ }
+
+ # Returns the name of this target.
+ rule name ( )
+ {
+ return $(self.name) ;
+ }
+
+ # Returns the project for this target.
+ rule project ( )
+ {
+ return $(self.project) ;
+ }
+
+ # Return the location where the target was declared.
+ rule location ( )
+ {
+ return $(self.location) ;
+ }
+
+ # Returns a user-readable name for this target.
+ rule full-name ( )
+ {
+ local location = [ $(self.project).get location ] ;
+ return $(location)/$(self.name) ;
+ }
+
+ # Generates virtual targets for this abstract target using the specified
+ # properties, unless a different value of some feature is required by the
+ # target.
+ # On success, returns:
+ # - a property-set with the usage requirements to be applied to dependants
+ # - a list of produced virtual targets, which may be empty.
+ # If 'property-set' is empty, performs the default build of this target, in
+ # a way specific to the derived class.
+ #
+ rule generate ( property-set )
+ {
+ errors.error "method should be defined in derived classes" ;
+ }
+
+ rule rename ( new-name )
+ {
+ self.name = $(new-name) ;
+ }
+}
+
+
+if --debug-building in [ modules.peek : ARGV ]
+{
+ modules.poke : .debug-building : true ;
+}
+
+
+rule indent ( )
+{
+ return $(.indent:J="") ;
+}
+
+
+rule increase-indent ( )
+{
+ .indent += " " ;
+}
+
+
+rule decrease-indent ( )
+{
+ .indent = $(.indent[2-]) ;
+}
+
+
+# Project target class (derived from 'abstract-target').
+#
+# This class has the following responsibilities:
+# - Maintaining a list of main targets in this project and building them.
+#
+# Main targets are constructed in two stages:
+# - When Jamfile is read, a number of calls to 'add-alternative' is made. At
+# that time, alternatives can also be renamed to account for inline targets.
+# - The first time 'main-target' or 'has-main-target' rule is called, all
+# alternatives are enumerated and main targets are created.
+#
+class project-target : abstract-target
+{
+ import project ;
+ import targets ;
+ import path ;
+ import print ;
+ import property-set ;
+ import set ;
+ import sequence ;
+ import "class" : new ;
+
+ rule __init__ ( name : project-module parent-project ?
+ : requirements * : default-build * )
+ {
+ abstract-target.__init__ $(name) : $(__name__) ;
+
+ self.project-module = $(project-module) ;
+ self.location = [ project.attribute $(project-module) location ] ;
+ self.requirements = $(requirements) ;
+ self.default-build = $(default-build) ;
+
+ if $(parent-project)
+ {
+ inherit $(parent-project) ;
+ }
+ }
+
+ # This is needed only by the 'make' rule. Need to find a way to make 'make'
+ # work without this method.
+ #
+ rule project-module ( )
+ {
+ return $(self.project-module) ;
+ }
+
+ rule get ( attribute )
+ {
+ return [ project.attribute $(self.project-module) $(attribute) ] ;
+ }
+
+ rule build-dir ( )
+ {
+ if ! $(self.build-dir)
+ {
+ self.build-dir = [ get build-dir ] ;
+ if ! $(self.build-dir)
+ {
+ local location = [ $(self.project).get location ] ;
+ if $(location)
+ {
+ self.build-dir = [ path.join $(location) bin ] ;
+ }
+ else
+ {
+ local id = [ get id ] ;
+ if $(id)
+ {
+ local rid = [ MATCH ^/(.*) : $(id) ] ;
+ self.build-dir = [ path.join [ project.standalone-build-dir ] $(rid) ] ;
+ }
+ else
+ {
+ errors.error "Could not create build-dir for standalone project $(self.project-module:E=)."
+ : "Missing project id" ;
+ }
+ }
+ }
+ }
+ return $(self.build-dir) ;
+ }
+
+ # Generates all possible targets contained in this project.
+ #
+ rule generate ( property-set * )
+ {
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO [ targets.indent ] "building project" [ name ]
+ " ('$(__name__)') with" [ $(property-set).raw ] ;
+ targets.increase-indent ;
+ }
+
+ local usage-requirements = [ property-set.empty ] ;
+ local targets ;
+
+ for local t in [ targets-to-build ]
+ {
+ local g = [ $(t).generate $(property-set) ] ;
+ usage-requirements = [ $(usage-requirements).add $(g[1]) ] ;
+ targets += $(g[2-]) ;
+ }
+ targets.decrease-indent ;
+ return $(usage-requirements) [ sequence.unique $(targets) ] ;
+ }
+
+ # Computes and returns a list of abstract-target instances which must be
+ # built when this project is built.
+ #
+ rule targets-to-build ( )
+ {
+ local result ;
+
+ if ! $(self.built-main-targets)
+ {
+ build-main-targets ;
+ }
+
+ # Collect all main targets here, except for "explicit" ones.
+ for local t in $(self.main-targets)
+ {
+ if ! [ $(t).name ] in $(self.explicit-targets)
+ {
+ result += $(t) ;
+ }
+ }
+
+ # Collect all projects referenced via "projects-to-build" attribute.
+ local self-location = [ get location ] ;
+ for local pn in [ get projects-to-build ]
+ {
+ result += [ find $(pn)/ ] ;
+ }
+
+ return $(result) ;
+ }
+
+ # Add 'target' to the list of targets in this project that should be build
+ # only by explicit request
+ #
+ rule mark-target-as-explicit ( target-name * )
+ {
+ # Record the name of the target, not instance, since this rule is called
+ # before main target instances are created.
+ self.explicit-targets += $(target-name) ;
+ }
+
+ rule mark-target-as-always ( target-name * )
+ {
+ # Record the name of the target, not instance, since this rule is called
+ # before main target instances are created.
+ self.always-targets += $(target-name) ;
+ }
+
+ # Add new target alternative
+ #
+ rule add-alternative ( target-instance )
+ {
+ if $(self.built-main-targets)
+ {
+ import errors : error : errors.error ;
+ errors.error add-alternative called when main targets are already
+ created. : in project [ full-name ] ;
+ }
+ self.alternatives += $(target-instance) ;
+ }
+
+ # Returns a 'main-target' class instance corresponding to 'name'.
+ #
+ rule main-target ( name )
+ {
+ if ! $(self.built-main-targets)
+ {
+ build-main-targets ;
+ }
+ return $(self.main-target.$(name)) ;
+ }
+
+ # Returns whether a main target with the specified name exists.
+ #
+ rule has-main-target ( name )
+ {
+ if ! $(self.built-main-targets)
+ {
+ build-main-targets ;
+ }
+
+ if $(self.main-target.$(name))
+ {
+ return true ;
+ }
+ }
+
+ # Worker function for the find rule not implementing any caching and simply
+ # returning nothing in case the target can not be found.
+ #
+ rule find-really ( id )
+ {
+ local result ;
+ local current-location = [ get location ] ;
+
+ local split = [ MATCH ^(.*)//(.*)$ : $(id) ] ;
+ local project-part = $(split[1]) ;
+ local target-part = $(split[2]) ;
+
+ local extra-error-message ;
+ if $(project-part)
+ {
+ # There is an explicitly specified project part in id. Looks up the
+ # project and passes the request to it.
+ local pm = [ project.find $(project-part) : $(current-location) ] ;
+ if $(pm)
+ {
+ project-target = [ project.target $(pm) ] ;
+ result = [ $(project-target).find $(target-part) : no-error ] ;
+ }
+ else
+ {
+ extra-error-message = could not resolve project reference
+ '$(project-part)' ;
+ if ! [ path.is-rooted $(project-part) ]
+ {
+ local rooted = [ path.root $(project-part) / ] ;
+ if [ project.is-registered-id $(rooted) ]
+ {
+ extra-error-message += - possibly missing a leading
+ slash ('/') character. ;
+ }
+ }
+ }
+ }
+ else
+ {
+ # Interpret target-name as name of main target. Need to do this
+ # before checking for file. Consider the following scenario with a
+ # toolset not modifying its executable's names, e.g. gcc on
+ # Unix-like platforms:
+ #
+ # exe test : test.cpp ;
+ # install s : test : <location>. ;
+ #
+ # After the first build we would have a target named 'test' in the
+ # Jamfile and a file named 'test' on the disk. We need the target to
+ # override the file.
+ result = [ main-target $(id) ] ;
+
+ # Interpret id as an existing file reference.
+ if ! $(result)
+ {
+ result = [ new file-reference [ path.make $(id) ] :
+ $(self.project) ] ;
+ if ! [ $(result).exists ]
+ {
+ result = ;
+ }
+ }
+
+ # Interpret id as project-id.
+ if ! $(result)
+ {
+ local project-module = [ project.find $(id) :
+ $(current-location) ] ;
+ if $(project-module)
+ {
+ result = [ project.target $(project-module) ] ;
+ }
+ }
+ }
+
+ return $(result:E="") $(extra-error-message) ;
+ }
+
+ # Find and return the target with the specified id, treated relative to
+ # self. Id may specify either a target or a file name with the target taking
+ # priority. May report an error or return nothing if the target is not found
+ # depending on the 'no-error' parameter.
+ #
+ rule find ( id : no-error ? )
+ {
+ local v = $(.id.$(id)) ;
+ local extra-error-message ;
+ if ! $(v)
+ {
+ local r = [ find-really $(id) ] ;
+ v = $(r[1]) ;
+ extra-error-message = $(r[2-]) ;
+ if ! $(v)
+ {
+ v = none ;
+ }
+ .id.$(id) = $(v) ;
+ }
+
+ if $(v) != none
+ {
+ return $(v) ;
+ }
+ else if ! $(no-error)
+ {
+ local current-location = [ get location ] ;
+ import errors : user-error : errors.user-error ;
+ errors.user-error Unable to find file or target named
+ : " " '$(id)'
+ : referred to from project at
+ : " " '$(current-location)'
+ : $(extra-error-message) ;
+ }
+ }
+
+ rule build-main-targets ( )
+ {
+ self.built-main-targets = true ;
+ for local a in $(self.alternatives)
+ {
+ local name = [ $(a).name ] ;
+ local target = $(self.main-target.$(name)) ;
+ if ! $(target)
+ {
+ local t = [ new main-target $(name) : $(self.project) ] ;
+ self.main-target.$(name) = $(t) ;
+ self.main-targets += $(t) ;
+ target = $(self.main-target.$(name)) ;
+ }
+
+ if $(name) in $(self.always-targets)
+ {
+ $(a).always ;
+ }
+
+ $(target).add-alternative $(a) ;
+ }
+ }
+
+ # Accessor, add a constant.
+ #
+ rule add-constant (
+ name # Variable name of the constant.
+ : value + # Value of the constant.
+ : type ? # Optional type of value.
+ )
+ {
+ switch $(type)
+ {
+ case path :
+ local r ;
+ for local v in $(value)
+ {
+ local l = $(self.location) ;
+ if ! $(l)
+ {
+ # Project corresponding to config files do not have
+ # 'location' attribute, but do have source location. It
+ # might be more reasonable to make every project have a
+ # location and use some other approach to prevent buildable
+ # targets in config files, but that has been left for later.
+ l = [ get source-location ] ;
+ }
+ v = [ path.root [ path.make $(v) ] $(l) ] ;
+ # Now make the value absolute path.
+ v = [ path.root $(v) [ path.pwd ] ] ;
+ # Constants should be in platform-native form.
+ v = [ path.native $(v) ] ;
+ r += $(v) ;
+ }
+ value = $(r) ;
+ }
+ if ! $(name) in $(self.constants)
+ {
+ self.constants += $(name) ;
+ }
+ self.constant.$(name) = $(value) ;
+ # Inject the constant in the scope of the Jamroot module.
+ modules.poke $(self.project-module) : $(name) : $(value) ;
+ }
+
+ rule inherit ( parent )
+ {
+ for local c in [ modules.peek $(parent) : self.constants ]
+ {
+ # No need to pass the type. Path constants were converted to
+ # absolute paths already by parent.
+ add-constant $(c) : [ modules.peek $(parent) : self.constant.$(c) ]
+ ;
+ }
+
+ # Import rules from parent.
+ local this-module = [ project-module ] ;
+ local parent-module = [ $(parent).project-module ] ;
+ # Do not import rules coming from 'project-rules' as they must be
+ # imported localized.
+ local user-rules = [ set.difference
+ [ RULENAMES $(parent-module) ] :
+ [ RULENAMES project-rules ] ] ;
+ IMPORT $(parent-module) : $(user-rules) : $(this-module) : $(user-rules)
+ ;
+ EXPORT $(this-module) : $(user-rules) ;
+ }
+}
+
+
+# Helper rules to detect cycles in main target references.
+#
+local rule start-building ( main-target-instance )
+{
+ if $(main-target-instance) in $(.targets-being-built)
+ {
+ local names ;
+ for local t in $(.targets-being-built) $(main-target-instance)
+ {
+ names += [ $(t).full-name ] ;
+ }
+
+ import errors ;
+ errors.error "Recursion in main target references"
+ : "the following target are being built currently:"
+ : $(names) ;
+ }
+ .targets-being-built += $(main-target-instance) ;
+}
+
+
+local rule end-building ( main-target-instance )
+{
+ .targets-being-built = $(.targets-being-built[1--2]) ;
+}
+
+
+# A named top-level target in Jamfile.
+#
+class main-target : abstract-target
+{
+ import assert ;
+ import feature ;
+ import print ;
+ import property-set ;
+ import sequence ;
+ import targets : start-building end-building ;
+
+ rule __init__ ( name : project )
+ {
+ abstract-target.__init__ $(name) : $(project) ;
+ }
+
+ # Add a new alternative for this target
+ rule add-alternative ( target )
+ {
+ local d = [ $(target).default-build ] ;
+ if $(self.alternatives) && ( $(self.default-build) != $(d) )
+ {
+ import errors : error : errors.error ;
+ errors.error "default build must be identical in all alternatives"
+ : "main target is" [ full-name ]
+ : "with" [ $(d).raw ]
+ : "differing from previous default build"
+ [ $(self.default-build).raw ] ;
+ }
+ else
+ {
+ self.default-build = $(d) ;
+ }
+ self.alternatives += $(target) ;
+ }
+
+ # Returns the best viable alternative for this property-set. See the
+ # documentation for selection rules.
+ #
+ local rule select-alternatives ( property-set debug ? )
+ {
+ # When selecting alternatives we have to consider defaults, for example:
+ # lib l : l.cpp : <variant>debug ;
+ # lib l : l_opt.cpp : <variant>release ;
+ # will not work unless we add default value <variant>debug.
+ property-set = [ $(p).add-defaults ] ;
+
+ # The algorithm: we keep the current best viable alternative. When we
+ # encounter a new best viable alternative, we compare it with the
+ # current one.
+
+ local best ;
+ local best-properties ;
+
+ if $(self.alternatives[2-])
+ {
+ local bad ;
+ local worklist = $(self.alternatives) ;
+ while $(worklist) && ! $(bad)
+ {
+ local v = $(worklist[1]) ;
+ local properties = [ $(v).match $(property-set) $(debug) ] ;
+
+ if $(properties) != no-match
+ {
+ if ! $(best)
+ {
+ best = $(v) ;
+ best-properties = $(properties) ;
+ }
+ else
+ {
+ if $(properties) = $(best-properties)
+ {
+ bad = true ;
+ }
+ else if $(properties) in $(best-properties)
+ {
+ # Do nothing, this alternative is worse
+ }
+ else if $(best-properties) in $(properties)
+ {
+ best = $(v) ;
+ best-properties = $(properties) ;
+ }
+ else
+ {
+ bad = true ;
+ }
+ }
+ }
+ worklist = $(worklist[2-]) ;
+ }
+ if ! $(bad)
+ {
+ return $(best) ;
+ }
+ }
+ else
+ {
+ return $(self.alternatives) ;
+ }
+ }
+
+ rule apply-default-build ( property-set )
+ {
+ return [ targets.apply-default-build $(property-set) :
+ $(self.default-build) ] ;
+ }
+
+ # Select an alternative for this main target, by finding all alternatives
+ # whose requirements are satisfied by 'properties' and picking the one with
+ # the longest requirements set. Returns the result of calling 'generate' on
+ # that alternative.
+ #
+ rule generate ( property-set )
+ {
+ start-building $(__name__) ;
+
+ # We want composite properties in the build request to act as if all the
+ # properties they expand to have been explicitly specified.
+ property-set = [ $(property-set).expand ] ;
+
+ local all-property-sets = [ apply-default-build $(property-set) ] ;
+ local usage-requirements = [ property-set.empty ] ;
+ local result ;
+ for local p in $(all-property-sets)
+ {
+ local r = [ generate-really $(p) ] ;
+ if $(r)
+ {
+ usage-requirements = [ $(usage-requirements).add $(r[1]) ] ;
+ result += $(r[2-]) ;
+ }
+ }
+ end-building $(__name__) ;
+ return $(usage-requirements) [ sequence.unique $(result) ] ;
+ }
+
+ # Generates the main target with the given property set and returns a list
+ # which first element is property-set object containing usage-requirements
+ # of generated target and with generated virtual target in other elements.
+ # It is possible that no targets are generated.
+ #
+ local rule generate-really ( property-set )
+ {
+ local best-alternatives = [ select-alternatives $(property-set) ] ;
+ if ! $(best-alternatives)
+ {
+ ECHO "error: No best alternative for" [ full-name ] ;
+ select-alternatives $(property-set) debug ;
+ return [ property-set.empty ] ;
+ }
+ else
+ {
+ # Now return virtual targets for the only alternative.
+ return [ $(best-alternatives).generate $(property-set) ] ;
+ }
+ }
+
+ rule rename ( new-name )
+ {
+ abstract-target.rename $(new-name) ;
+ for local a in $(self.alternatives)
+ {
+ $(a).rename $(new-name) ;
+ }
+ }
+}
+
+
+# Abstract target referring to a source file. This is an artificial entity
+# allowing sources to a target to be represented using a list of abstract target
+# instances.
+#
+class file-reference : abstract-target
+{
+ import virtual-target ;
+ import property-set ;
+ import path ;
+
+ rule __init__ ( file : project )
+ {
+ abstract-target.__init__ $(file) : $(project) ;
+ }
+
+ rule generate ( properties )
+ {
+ return [ property-set.empty ] [ virtual-target.from-file $(self.name) :
+ [ location ] : $(self.project) ] ;
+ }
+
+ # Returns true if the referred file really exists.
+ rule exists ( )
+ {
+ location ;
+ return $(self.file-path) ;
+ }
+
+ # Returns the location of target. Needed by 'testing.jam'.
+ rule location ( )
+ {
+ if ! $(self.file-location)
+ {
+ local source-location = [ $(self.project).get source-location ] ;
+ for local src-dir in $(source-location)
+ {
+ if ! $(self.file-location)
+ {
+ local location = [ path.root $(self.name) $(src-dir) ] ;
+ if [ CHECK_IF_FILE [ path.native $(location) ] ]
+ {
+ self.file-location = $(src-dir) ;
+ self.file-path = $(location) ;
+ }
+ }
+ }
+ }
+ return $(self.file-location) ;
+ }
+}
+
+
+# Given a target-reference, made in context of 'project', returns the
+# abstract-target instance that is referred to, as well as properties explicitly
+# specified for this reference.
+#
+rule resolve-reference ( target-reference : project )
+{
+ # Separate target name from properties override.
+ local split = [ MATCH "^([^<]*)(/(<.*))?$" : $(target-reference) ] ;
+ local id = $(split[1]) ;
+ local sproperties = ;
+ if $(split[3])
+ {
+ sproperties = [ property.make [ feature.split $(split[3]) ] ] ;
+ sproperties = [ feature.expand-composites $(sproperties) ] ;
+ }
+
+ # Find the target.
+ local target = [ $(project).find $(id) ] ;
+
+ return $(target) [ property-set.create $(sproperties) ] ;
+}
+
+
+# Attempts to generate the target given by target reference, which can refer
+# both to a main target or to a file. Returns a list consisting of
+# - usage requirements
+# - generated virtual targets, if any
+#
+rule generate-from-reference (
+ target-reference # Target reference.
+ : project # Project where the reference is made.
+ : property-set # Properties of the main target that makes the reference.
+)
+{
+ local r = [ resolve-reference $(target-reference) : $(project) ] ;
+ local target = $(r[1]) ;
+ local sproperties = $(r[2]) ;
+
+ # Take properties which should be propagated and refine them with
+ # source-specific requirements.
+ local propagated = [ $(property-set).propagated ] ;
+ local rproperties = [ $(propagated).refine $(sproperties) ] ;
+ if $(rproperties[1]) = "@error"
+ {
+ import errors ;
+ errors.error
+ "When building" [ full-name ] " with properties " $(properties) :
+ "Invalid properties specified for " $(source) ":"
+ $(rproperties[2-]) ;
+ }
+ return [ $(target).generate $(rproperties) ] ;
+}
+
+
+rule apply-default-build ( property-set : default-build )
+{
+ # 1. First, see what properties from default-build are already present in
+ # property-set.
+
+ local raw = [ $(property-set).raw ] ;
+ local specified-features = $(raw:G) ;
+
+ local defaults-to-apply ;
+ for local d in [ $(default-build).raw ]
+ {
+ if ! $(d:G) in $(specified-features)
+ {
+ defaults-to-apply += $(d) ;
+ }
+ }
+
+ # 2. If there are any defaults to be applied, form a new build request. Pass
+ # it through to 'expand-no-defaults' since default-build might contain
+ # "release debug" resulting in two property-sets.
+ local result ;
+ if $(defaults-to-apply)
+ {
+ # We have to compress subproperties here to prevent property lists like:
+ # <toolset>msvc <toolset-msvc:version>7.1 <threading>multi
+ #
+ # from being expanded into:
+ # <toolset-msvc:version>7.1/<threading>multi
+ # <toolset>msvc/<toolset-msvc:version>7.1/<threading>multi
+ #
+ # due to a cross-product property combination. That may be an indication
+ # that build-request.expand-no-defaults is the wrong rule to use here.
+ properties = [ build-request.expand-no-defaults
+ [ feature.compress-subproperties $(raw) ] $(defaults-to-apply) ] ;
+
+ if $(properties)
+ {
+ for local p in $(properties)
+ {
+ result += [ property-set.create
+ [ feature.expand [ feature.split $(p) ] ] ] ;
+ }
+ }
+ else
+ {
+ result = [ property-set.empty ] ;
+ }
+ }
+ else
+ {
+ result = $(property-set) ;
+ }
+ return $(result) ;
+}
+
+
+# Given a build request and requirements, return properties common to dependency
+# build request and target requirements.
+#
+# TODO: Document exactly what 'common properties' are, whether they should
+# include default property values, whether they should contain any conditional
+# properties or should those be already processed, etc. See whether there are
+# any differences between use cases with empty and non-empty build-request as
+# well as with requirements containing and those not containing any non-free
+# features.
+#
+rule common-properties ( build-request requirements )
+{
+ # For optimization, we add free requirements directly, without using a
+ # complex algorithm. This gives the complex algorithm a better chance of
+ # caching results.
+ local free = [ $(requirements).free ] ;
+ local non-free = [ property-set.create [ $(requirements).base ]
+ [ $(requirements).incidental ] ] ;
+
+ local key = .rp.$(build-request)-$(non-free) ;
+ if ! $($(key))
+ {
+ $(key) = [ common-properties2 $(build-request) $(non-free) ] ;
+ }
+ return [ $($(key)).add-raw $(free) ] ;
+}
+
+
+# Given a 'context' -- a set of already present properties, and 'requirements',
+# decide which extra properties should be applied to 'context'. For conditional
+# requirements, this means evaluating the condition. For indirect conditional
+# requirements, this means calling a rule. Ordinary requirements are always
+# applied.
+#
+# Handles the situation where evaluating one conditional requirement affects
+# conditions of another conditional requirements, such as:
+# <toolset>gcc:<variant>release <variant>release:<define>RELEASE
+#
+# If 'what' is 'refined' returns context refined with new requirements. If
+# 'what' is 'added' returns just the requirements to be applied.
+#
+rule evaluate-requirements ( requirements : context : what )
+{
+ # Apply non-conditional requirements. It is possible that further
+ # conditional requirement change a value set by non-conditional
+ # requirements. For example:
+ #
+ # exe a : a.cpp : <threading>single <toolset>foo:<threading>multi ;
+ #
+ # I am not sure if this should be an error, or not, especially given that
+ #
+ # <threading>single
+ #
+ # might come from project's requirements.
+
+ local unconditional = [ feature.expand [ $(requirements).non-conditional ] ]
+ ;
+
+ local raw = [ $(context).raw ] ;
+ raw = [ property.refine $(raw) : $(unconditional) ] ;
+
+ # We have collected properties that surely must be present in common
+ # properties. We now try to figure out what other properties should be added
+ # in order to satisfy rules (4)-(6) from the docs.
+
+ local conditionals = [ $(requirements).conditional ] ;
+ # The 'count' variable has one element for each conditional feature and for
+ # each occurrence of '<indirect-conditional>' feature. It is used as a loop
+ # counter: for each iteration of the loop before we remove one element and
+ # the property set should stabilize before we are done. It is assumed that
+ # #conditionals iterations should be enough for properties to propagate
+ # along conditions in any direction.
+ local count = $(conditionals) [ $(requirements).get <conditional> ]
+ and-once-more ;
+
+ local added-requirements ;
+
+ local current = $(raw) ;
+
+ # It is assumed that ordinary conditional requirements can not add
+ # <conditional> properties (a.k.a. indirect conditional properties), and
+ # that rules referred to by <conditional> properties can not add new
+ # <conditional> properties. So the list of indirect conditionals does not
+ # change.
+ local indirect = [ $(requirements).get <conditional> ] ;
+ indirect = [ MATCH ^@(.*) : $(indirect) ] ;
+
+ local ok ;
+ while $(count)
+ {
+ # Evaluate conditionals in context of current properties.
+ local e = [ property.evaluate-conditionals-in-context $(conditionals) :
+ $(current) ] ;
+
+ # Evaluate indirect conditionals.
+ for local i in $(indirect)
+ {
+ local t = [ current ] ;
+ local p = [ $(t).project ] ;
+ local new = [ indirect.call $(i) $(current) ] ;
+ e += [ property.translate-paths $(new) : [ $(p).location ] ] ;
+ }
+
+ if $(e) = $(added-requirements)
+ {
+ # If we got the same result, we have found the final properties.
+ count = ;
+ ok = true ;
+ }
+ else
+ {
+ # Oops, conditional evaluation results have changed. Also 'current'
+ # contains leftovers from a previous evaluation. Recompute 'current'
+ # using initial properties and conditional requirements.
+ added-requirements = $(e) ;
+ current = [ property.refine $(raw) : [ feature.expand $(e) ] ] ;
+ }
+ count = $(count[2-]) ;
+ }
+ if ! $(ok)
+ {
+ import errors ;
+ errors.error Can not evaluate conditional properties $(conditionals) ;
+ }
+
+ if $(what) = added
+ {
+ return [ property-set.create $(unconditional) $(added-requirements) ] ;
+ }
+ else if $(what) = refined
+ {
+ return [ property-set.create $(current) ] ;
+ }
+ else
+ {
+ import errors ;
+ errors.error "Invalid value of the 'what' parameter." ;
+ }
+}
+
+
+rule common-properties2 ( build-request requirements )
+{
+ # This guarantees that default properties are present in the result, unless
+ # they are overriden by some requirement. FIXME: There is a possibility that
+ # we have added <foo>bar, which is composite and expands to <foo2>bar2, but
+ # default value of <foo2> is not bar2, in which case it is not clear what to
+ # do.
+ #
+ build-request = [ $(build-request).add-defaults ] ;
+ # Features added by 'add-defaults' can be composite and expand to features
+ # without default values -- which therefore have not been added yet. It
+ # could be clearer/faster to expand only newly added properties but that is
+ # not critical.
+ build-request = [ $(build-request).expand ] ;
+
+ return [ evaluate-requirements $(requirements) : $(build-request) :
+ refined ] ;
+}
+
+
+rule push-target ( target )
+{
+ .targets = $(target) $(.targets) ;
+}
+
+rule pop-target ( )
+{
+ .targets = $(.targets[2-]) ;
+}
+
+# Return the metatarget that is currently being generated.
+rule current ( )
+{
+ return $(.targets[1]) ;
+}
+
+
+# Implements the most standard way of constructing main target alternative from
+# sources. Allows sources to be either file or other main target and handles
+# generation of those dependency targets.
+#
+class basic-target : abstract-target
+{
+ import build-request ;
+ import build-system ;
+ import "class" : new ;
+ import feature ;
+ import property ;
+ import property-set ;
+ import sequence ;
+ import set ;
+ import targets ;
+ import virtual-target ;
+
+ rule __init__ ( name : project : sources * : requirements * :
+ default-build * : usage-requirements * )
+ {
+ abstract-target.__init__ $(name) : $(project) ;
+
+ self.sources = $(sources) ;
+ if ! $(requirements)
+ {
+ requirements = [ property-set.empty ] ;
+ }
+ self.requirements = $(requirements) ;
+ if ! $(default-build)
+ {
+ default-build = [ property-set.empty ] ;
+ }
+ self.default-build = $(default-build) ;
+ if ! $(usage-requirements)
+ {
+ usage-requirements = [ property-set.empty ] ;
+ }
+ self.usage-requirements = $(usage-requirements) ;
+
+ if $(sources:G)
+ {
+ import errors : user-error : errors.user-error ;
+ errors.user-error properties found "in" the 'sources' parameter
+ "for" [ full-name ] ;
+ }
+ }
+
+ rule always ( )
+ {
+ self.always = 1 ;
+ }
+
+ # Returns the list of abstract-targets which are used as sources. The extra
+ # properties specified for sources are not represented. The only user for
+ # this rule at the moment is the "--dump-tests" feature of the test system.
+ #
+ rule sources ( )
+ {
+ if ! $(self.source-targets)
+ {
+ for local s in $(self.sources)
+ {
+ self.source-targets += [ targets.resolve-reference $(s) :
+ $(self.project) ] ;
+ }
+ }
+ return $(self.source-targets) ;
+ }
+
+ rule requirements ( )
+ {
+ return $(self.requirements) ;
+ }
+
+ rule default-build ( )
+ {
+ return $(self.default-build) ;
+ }
+
+ # Returns the alternative condition for this alternative, if the condition
+ # is satisfied by 'property-set'.
+ #
+ rule match ( property-set debug ? )
+ {
+ # The condition is composed of all base non-conditional properties. It
+ # is not clear if we should expand 'self.requirements' or not. For one
+ # thing, it would be nice to be able to put
+ # <toolset>msvc-6.0
+ # in requirements. On the other hand, if we have <variant>release as a
+ # condition it does not make sense to require <optimization>full to be
+ # in the build request just to select this variant.
+ local bcondition = [ $(self.requirements).base ] ;
+ local ccondition = [ $(self.requirements).conditional ] ;
+ local condition = [ set.difference $(bcondition) : $(ccondition) ] ;
+ if $(debug)
+ {
+ ECHO " next alternative: required properties:"
+ $(condition:E=(empty)) ;
+ }
+
+ if $(condition) in [ $(property-set).raw ]
+ {
+ if $(debug)
+ {
+ ECHO " matched" ;
+ }
+ return $(condition) ;
+ }
+ else
+ {
+ if $(debug)
+ {
+ ECHO " not matched" ;
+ }
+ return no-match ;
+ }
+ }
+
+ # Takes a target reference, which might be either target id or a dependency
+ # property, and generates that target using 'property-set' as a build
+ # request.
+ #
+ # The results are added to the variable called 'result-var'. Usage
+ # requirements are added to the variable called 'usage-requirements-var'.
+ #
+ rule generate-dependencies ( dependencies * : property-set : result-var
+ usage-requirements-var )
+ {
+ for local dependency in $(dependencies)
+ {
+ local grist = $(dependency:G) ;
+ local id = $(dependency:G=) ;
+ local result = [ targets.generate-from-reference $(id) :
+ $(self.project) : $(property-set) ] ;
+
+ $(result-var) += $(result[2-]:G=$(grist)) ;
+ $(usage-requirements-var) += [ $(result[1]).raw ] ;
+ }
+ }
+
+ # Determines final build properties, generates sources, and calls
+ # 'construct'. This method should not be overridden.
+ #
+ rule generate ( property-set )
+ {
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO ;
+ local fn = [ full-name ] ;
+ ECHO [ targets.indent ] "Building target '$(fn)'" ;
+ targets.increase-indent ;
+ ECHO [ targets.indent ] Build request: $(property-set)
+ [ $(property-set).raw ] ;
+ local cf = [ build-system.command-line-free-features ] ;
+ ECHO [ targets.indent ] Command line free features: [ $(cf).raw ] ;
+ ECHO [ targets.indent ] Target requirements:
+ [ $(self.requirements).raw ] ;
+ }
+ targets.push-target $(__name__) ;
+
+ # Apply free features from the command line. If user said
+ # define=FOO
+ # he most likely wants this define to be set for all compiles.
+ # Make it before check for already built.
+ property-set = [ $(property-set).refine
+ [ build-system.command-line-free-features ] ] ;
+
+ if ! $(self.generated.$(property-set))
+ {
+ local rproperties = [ targets.common-properties $(property-set)
+ $(self.requirements) ] ;
+
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO ;
+ ECHO [ targets.indent ] "Common properties: "
+ [ $(rproperties).raw ] ;
+ }
+
+ if ( $(rproperties[1]) != "@error" ) && ( [ $(rproperties).get
+ <build> ] != no )
+ {
+ local source-targets ;
+ local properties = [ $(rproperties).non-dependency ] ;
+ local usage-requirements ;
+
+ generate-dependencies [ $(rproperties).dependency ] :
+ $(rproperties) : properties usage-requirements ;
+
+ generate-dependencies $(self.sources) : $(rproperties) :
+ source-targets usage-requirements ;
+
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO ;
+ ECHO [ targets.indent ] "Usage requirements for"
+ $(self.name)": " $(usage-requirements) ;
+ }
+
+ rproperties = [ property-set.create $(properties)
+ $(usage-requirements) ] ;
+ usage-requirements = [ property-set.create $(usage-requirements)
+ ] ;
+
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO [ targets.indent ] "Build properties: "
+ [ $(rproperties).raw ] ;
+ }
+
+ local extra = [ $(rproperties).get <source> ] ;
+ source-targets += $(extra:G=) ;
+ # We might get duplicate sources, for example if we link to two
+ # libraries having the same <library> usage requirement. Use
+ # stable sort, since for some targets the order is important,
+ # e.g. RUN_PY targets need a python source to come first.
+ source-targets = [ sequence.unique $(source-targets) : stable ]
+ ;
+
+ local result = [ construct $(self.name) : $(source-targets) :
+ $(rproperties) ] ;
+
+ if $(result)
+ {
+ local gur = $(result[1]) ;
+ result = $(result[2-]) ;
+
+ if $(self.always)
+ {
+ for local t in $(result)
+ {
+ $(t).always ;
+ }
+ }
+
+ local s = [ create-subvariant $(result)
+ : [ virtual-target.recent-targets ]
+ : $(property-set) : $(source-targets)
+ : $(rproperties) : $(usage-requirements) ] ;
+ virtual-target.clear-recent-targets ;
+
+ local ur = [ compute-usage-requirements $(s) ] ;
+ ur = [ $(ur).add $(gur) ] ;
+ $(s).set-usage-requirements $(ur) ;
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO [ targets.indent ] "Usage requirements from"
+ $(self.name)": " [ $(ur).raw ] ;
+ }
+
+ self.generated.$(property-set) = $(ur) $(result) ;
+ }
+ }
+ else
+ {
+ if $(rproperties[1]) = "@error"
+ {
+ ECHO [ targets.indent ] "Skipping build of:" [ full-name ]
+ "cannot compute common properties" ;
+ }
+ else if [ $(rproperties).get <build> ] = no
+ {
+ # If we just see <build>no, we cannot produce any reasonable
+ # diagnostics. The code that adds this property is expected
+ # to explain why a target is not built, for example using
+ # the configure.log-component-configuration function.
+ }
+ else
+ {
+ ECHO [ targets.indent ] "Skipping build of: " [ full-name ]
+ " unknown reason" ;
+ }
+
+ # We are here either because there has been an error computing
+ # properties or there is <build>no in properties. In the latter
+ # case we do not want any diagnostic. In the former case, we
+ # need diagnostics. FIXME
+
+ # If this target fails to build, add <build>no to properties to
+ # cause any parent target to fail to build. Except that it
+ # - does not work now, since we check for <build>no only in
+ # common properties, but not in properties that came from
+ # dependencies
+ # - it is not clear if that is a good idea anyway. The alias
+ # target, for example, should not fail to build if a
+ # dependency fails.
+ self.generated.$(property-set) = [ property-set.create <build>no
+ ] ;
+ }
+ }
+ else
+ {
+ if [ modules.peek : .debug-building ]
+ {
+ ECHO [ targets.indent ] "Already built" ;
+ local ur = $(self.generated.$(property-set)) ;
+ ur = $(ur[0]) ;
+ targets.increase-indent ;
+ ECHO [ targets.indent ] "Usage requirements from"
+ $(self.name)": " [ $(ur).raw ] ;
+ targets.decrease-indent ;
+ }
+ }
+
+ targets.pop-target ;
+ targets.decrease-indent ;
+ return $(self.generated.$(property-set)) ;
+ }
+
+ # Given the set of generated targets, and refined build properties,
+ # determines and sets appropriate usage requirements on those targets.
+ #
+ rule compute-usage-requirements ( subvariant )
+ {
+ local rproperties = [ $(subvariant).build-properties ] ;
+ xusage-requirements = [ targets.evaluate-requirements
+ $(self.usage-requirements) : $(rproperties) : added ] ;
+
+ # We generate all dependency properties and add them, as well as their
+ # usage requirements, to the result.
+ local extra ;
+ generate-dependencies [ $(xusage-requirements).dependency ] :
+ $(rproperties) : extra extra ;
+
+ local result = [ property-set.create
+ [ $(xusage-requirements).non-dependency ] $(extra) ] ;
+
+ # Propagate usage requirements we got from sources, except for the
+ # <pch-header> and <pch-file> features.
+ #
+ # That feature specifies which pch file to use, and should apply only to
+ # direct dependents. Consider:
+ #
+ # pch pch1 : ...
+ # lib lib1 : ..... pch1 ;
+ # pch pch2 :
+ # lib lib2 : pch2 lib1 ;
+ #
+ # Here, lib2 should not get <pch-header> property from pch1.
+ #
+ # Essentially, when those two features are in usage requirements, they
+ # are propagated only to direct dependents. We might need a more general
+ # mechanism, but for now, only those two features are special.
+ #
+ # TODO - Actually there are more possible candidates like for instance
+ # when listing static library X as a source for another static library.
+ # Then static library X will be added as a <source> property to the
+ # second library's usage requirements but those requirements should last
+ # only up to the first executable or shared library that actually links
+ # to it.
+ local raw = [ $(subvariant).sources-usage-requirements ] ;
+ raw = [ $(raw).raw ] ;
+ raw = [ property.change $(raw) : <pch-header> ] ;
+ raw = [ property.change $(raw) : <pch-file> ] ;
+ return [ $(result).add [ property-set.create $(raw) ] ] ;
+ }
+
+ # Creates new subvariant instances for 'targets'.
+ # 'root-targets' - virtual targets to be returned to dependants
+ # 'all-targets' - virtual targets created while building this main target
+ # 'build-request' - property-set instance with requested build properties
+ #
+ local rule create-subvariant ( root-targets * : all-targets * :
+ build-request : sources * : rproperties : usage-requirements )
+ {
+ for local e in $(root-targets)
+ {
+ $(e).root true ;
+ }
+
+ # Process all virtual targets that will be created if this main target
+ # is created.
+ local s = [ new subvariant $(__name__) : $(build-request) : $(sources) :
+ $(rproperties) : $(usage-requirements) : $(all-targets) ] ;
+ for local v in $(all-targets)
+ {
+ if ! [ $(v).creating-subvariant ]
+ {
+ $(v).creating-subvariant $(s) ;
+ }
+ }
+ return $(s) ;
+ }
+
+ # Constructs virtual targets for this abstract target and the dependency
+ # graph. Returns a usage-requirements property-set and a list of virtual
+ # targets. Should be overriden in derived classes.
+ #
+ rule construct ( name : source-targets * : properties * )
+ {
+ import errors : error : errors.error ;
+ errors.error "method should be defined in derived classes" ;
+ }
+}
+
+
+class typed-target : basic-target
+{
+ import generators ;
+
+ rule __init__ ( name : project : type : sources * : requirements * :
+ default-build * : usage-requirements * )
+ {
+ basic-target.__init__ $(name) : $(project) : $(sources) :
+ $(requirements) : $(default-build) : $(usage-requirements) ;
+
+ self.type = $(type) ;
+ }
+
+ rule type ( )
+ {
+ return $(self.type) ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ local r = [ generators.construct $(self.project) $(name:S=)
+ : $(self.type)
+ : [ property-set.create [ $(property-set).raw ]
+ <main-target-type>$(self.type) ]
+ : $(source-targets) : true ] ;
+ if ! $(r)
+ {
+ ECHO "warn: Unable to construct" [ full-name ] ;
+
+ # Are there any top-level generators for this type/property set.
+ if ! [ generators.find-viable-generators $(self.type) :
+ $(property-set) ]
+ {
+ ECHO "error: no generators were found for type '$(self.type)'" ;
+ ECHO "error: and the requested properties" ;
+ ECHO "error: make sure you've configured the needed tools" ;
+ ECHO "See http://boost.org/boost-build2/doc/html/bbv2/advanced/configuration.html" ;
+ EXIT "To debug this problem, try the --debug-generators option."
+ ;
+ }
+ }
+ return $(r) ;
+ }
+}
+
+
+# Return the list of sources to use, if main target rule is invoked with
+# 'sources'. If there are any objects in 'sources', they are treated as main
+# target instances, and the name of such targets are adjusted to be
+# '<name_of_this_target>__<name_of_source_target>'. Such renaming is disabled if
+# a non-empty value is passed as the 'no-renaming' parameter.
+#
+rule main-target-sources ( sources * : main-target-name : no-renaming ? )
+{
+ local result ;
+ for local t in $(sources)
+ {
+ if [ class.is-instance $(t) ]
+ {
+ local name = [ $(t).name ] ;
+ if ! $(no-renaming)
+ {
+ name = $(main-target-name)__$(name) ;
+ $(t).rename $(name) ;
+ }
+ # Inline targets are not built by default.
+ local p = [ $(t).project ] ;
+ $(p).mark-target-as-explicit $(name) ;
+ result += $(name) ;
+ }
+ else
+ {
+ result += $(t) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns the requirements to use when declaring a main target, obtained by
+# translating all specified property paths and refining project requirements
+# with the ones specified for the target.
+#
+rule main-target-requirements (
+ specification * # Properties explicitly specified for the main target.
+ : project # Project where the main target is to be declared.
+)
+{
+ local requirements = [ property-set.refine-from-user-input
+ [ $(project).get requirements ] : $(specification) :
+ [ $(project).project-module ] : [ $(project).get location ] ] ;
+ if $(requirements[1]) = "@error"
+ {
+ import errors ;
+ errors.error "Conflicting requirements for target:" $(requirements) ;
+ }
+ return [ $(requirements).add [ toolset.requirements ] ] ;
+}
+
+
+# Returns the usage requirements to use when declaring a main target, which are
+# obtained by translating all specified property paths and adding project's
+# usage requirements.
+#
+rule main-target-usage-requirements (
+ specification * # Use-properties explicitly specified for a main target.
+ : project # Project where the main target is to be declared.
+)
+{
+ local project-usage-requirements = [ $(project).get usage-requirements ] ;
+
+ # We do not use 'refine-from-user-input' because:
+ # - I am not sure if removing parent's usage requirements makes sense
+ # - refining usage requirements is not needed, since usage requirements are
+ # always free.
+ local usage-requirements = [ property-set.create-from-user-input
+ $(specification)
+ : [ $(project).project-module ] [ $(project).get location ] ] ;
+
+ return [ $(project-usage-requirements).add $(usage-requirements) ] ;
+}
+
+
+# Return the default build value to use when declaring a main target, which is
+# obtained by using the specified value if not empty and parent's default build
+# attribute otherwise.
+#
+rule main-target-default-build (
+ specification * # Default build explicitly specified for a main target.
+ : project # Project where the main target is to be declared.
+)
+{
+ local result ;
+ if $(specification)
+ {
+ result = $(specification) ;
+ }
+ else
+ {
+ result = [ $(project).get default-build ] ;
+ }
+ return [ property-set.create-with-validation $(result) ] ;
+}
+
+
+# Registers the specified target as a main target alternative and returns it.
+#
+rule main-target-alternative ( target )
+{
+ local ptarget = [ $(target).project ] ;
+ $(ptarget).add-alternative $(target) ;
+ return $(target) ;
+}
+
+
+# Creates a metatarget with the specified properties, using 'klass' as the
+# class. The 'name', 'sources', 'requirements', 'default-build' and
+# 'usage-requirements' are assumed to be in the form specified by the user in
+# the Jamfile corresponding to 'project'.
+#
+rule create-metatarget ( klass : project : name : sources * : requirements * :
+ default-build * : usage-requirements * )
+{
+ return [ targets.main-target-alternative [ new $(klass) $(name) : $(project)
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) :
+ $(project) ] ] ] ;
+}
+
+
+# Creates a typed-target with the specified properties. The 'name', 'sources',
+# 'requirements', 'default-build' and 'usage-requirements' are assumed to be in
+# the form specified by the user in the Jamfile corresponding to 'project'.
+#
+rule create-typed-target ( type : project : name : sources * : requirements * :
+ default-build * : usage-requirements * )
+{
+ return [ targets.main-target-alternative [ new typed-target $(name) :
+ $(project) : $(type)
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) :
+ $(project) ] ] ] ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/toolset.jam b/src/kenlm/jam-files/boost-build/build/toolset.jam
new file mode 100644
index 0000000..6e5b980
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/toolset.jam
@@ -0,0 +1,582 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2005 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Support for toolset definition.
+
+import errors ;
+import feature ;
+import generators ;
+import numbers ;
+import path ;
+import property ;
+import regex ;
+import sequence ;
+import set ;
+import property-set ;
+
+
+.flag-no = 1 ;
+
+.ignore-requirements = ;
+
+# This is used only for testing, to make sure we do not get random extra
+# elements in paths.
+if --ignore-toolset-requirements in [ modules.peek : ARGV ]
+{
+ .ignore-requirements = 1 ;
+}
+
+
+# Initializes an additional toolset-like module. First load the 'toolset-module'
+# and then calls its 'init' rule with trailing arguments.
+#
+rule using ( toolset-module : * )
+{
+ import $(toolset-module) ;
+ $(toolset-module).init $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9)
+ ;
+}
+
+
+# Expands subfeatures in each property sets, e.g. '<toolset>gcc-3.2' will be
+# converted to '<toolset>gcc/<toolset-version>3.2'.
+#
+local rule normalize-condition ( property-sets * )
+{
+ local result ;
+ for local p in $(property-sets)
+ {
+ local split = [ feature.split $(p) ] ;
+ local expanded = [ feature.expand-subfeatures [ feature.split $(p) ] ] ;
+ result += $(expanded:J=/) ;
+ }
+ return $(result) ;
+}
+
+
+# Specifies if the 'flags' rule should check that the invoking module is the
+# same as the module we are setting the flag for. 'v' can be either 'checked' or
+# 'unchecked'. Subsequent call to 'pop-checking-for-flags-module' will restore
+# the setting that was in effect before calling this rule.
+#
+rule push-checking-for-flags-module ( v )
+{
+ .flags-module-checking = $(v) $(.flags-module-checking) ;
+}
+
+rule pop-checking-for-flags-module ( )
+{
+ .flags-module-checking = $(.flags-module-checking[2-]) ;
+}
+
+
+# Specifies the flags (variables) that must be set on targets under certain
+# conditions, described by arguments.
+#
+rule flags (
+ rule-or-module # If contains a dot, should be a rule name. The flags will
+ # be applied when that rule is used to set up build
+ # actions.
+ #
+ # If does not contain dot, should be a module name. The
+ # flag will be applied for all rules in that module. If
+ # module for rule is different from the calling module, an
+ # error is issued.
+
+ variable-name # Variable that should be set on target.
+ condition * : # A condition when this flag should be applied. Should be a
+ # set of property sets. If one of those property sets is
+ # contained in the build properties, the flag will be used.
+ # Implied values are not allowed: "<toolset>gcc" should be
+ # used, not just "gcc". Subfeatures, like in
+ # "<toolset>gcc-3.2" are allowed. If left empty, the flag
+ # will be used unconditionally.
+ #
+ # Propery sets may use value-less properties ('<a>' vs.
+ # '<a>value') to match absent properties. This allows to
+ # separately match:
+ #
+ # <architecture>/<address-model>64
+ # <architecture>ia64/<address-model>
+ #
+ # Where both features are optional. Without this syntax
+ # we would be forced to define "default" values.
+
+ values * : # The value to add to variable. If <feature> is specified,
+ # then the value of 'feature' will be added.
+ unchecked ? # If value 'unchecked' is passed, will not test that flags
+ # are set for the calling module.
+ : hack-hack ? # For
+ # flags rule OPTIONS <cxx-abi> : -model ansi
+ # Treat <cxx-abi> as condition
+ # FIXME: ugly hack.
+)
+{
+ local caller = [ CALLER_MODULE ] ;
+ if ! [ MATCH ".*([.]).*" : $(rule-or-module) ]
+ && [ MATCH "(Jamfile<.*)" : $(caller) ]
+ {
+ # Unqualified rule name, used inside Jamfile. Most likely used with
+ # 'make' or 'notfile' rules. This prevents setting flags on the entire
+ # Jamfile module (this will be considered as rule), but who cares?
+ # Probably, 'flags' rule should be split into 'flags' and
+ # 'flags-on-module'.
+ rule-or-module = $(caller).$(rule-or-module) ;
+ }
+ else
+ {
+ local module_ = [ MATCH "([^.]*).*" : $(rule-or-module) ] ;
+ if $(unchecked) != unchecked
+ && $(.flags-module-checking[1]) != unchecked
+ && $(module_) != $(caller)
+ {
+ errors.error "Module $(caller) attempted to set flags for module $(module_)" ;
+ }
+ }
+
+ if $(condition) && ! $(condition:G=) && ! $(hack-hack)
+ {
+ # We have condition in the form '<feature>', that is, without value.
+ # That is an older syntax:
+ # flags gcc.link RPATH <dll-path> ;
+ # for compatibility, convert it to
+ # flags gcc.link RPATH : <dll-path> ;
+ values = $(condition) ;
+ condition = ;
+ }
+
+ if $(condition)
+ {
+ property.validate-property-sets $(condition) ;
+ condition = [ normalize-condition $(condition) ] ;
+ }
+
+ add-flag $(rule-or-module) : $(variable-name) : $(condition) : $(values) ;
+}
+
+
+# Adds a new flag setting with the specified values. Does no checking.
+#
+local rule add-flag ( rule-or-module : variable-name : condition * : values * )
+{
+ .$(rule-or-module).flags += $(.flag-no) ;
+
+ # Store all flags for a module.
+ local module_ = [ MATCH "([^.]*).*" : $(rule-or-module) ] ;
+ .module-flags.$(module_) += $(.flag-no) ;
+ # Store flag-no -> rule-or-module mapping.
+ .rule-or-module.$(.flag-no) = $(rule-or-module) ;
+
+ .$(rule-or-module).variable.$(.flag-no) += $(variable-name) ;
+ .$(rule-or-module).values.$(.flag-no) += $(values) ;
+ .$(rule-or-module).condition.$(.flag-no) += $(condition) ;
+
+ .flag-no = [ numbers.increment $(.flag-no) ] ;
+}
+
+
+# Returns the first element of 'property-sets' which is a subset of
+# 'properties' or an empty list if no such element exists.
+#
+rule find-property-subset ( property-sets * : properties * )
+{
+ # Cut property values off.
+ local prop-keys = $(properties:G) ;
+
+ local result ;
+ for local s in $(property-sets)
+ {
+ if ! $(result)
+ {
+ # Handle value-less properties like '<architecture>' (compare with
+ # '<architecture>x86').
+
+ local set = [ feature.split $(s) ] ;
+
+ # Find the set of features that
+ # - have no property specified in required property set
+ # - are omitted in the build property set.
+ local default-props ;
+ for local i in $(set)
+ {
+ # If $(i) is a value-less property it should match default value
+ # of an optional property. See the first line in the example
+ # below:
+ #
+ # property set properties result
+ # <a> <b>foo <b>foo match
+ # <a> <b>foo <a>foo <b>foo no match
+ # <a>foo <b>foo <b>foo no match
+ # <a>foo <b>foo <a>foo <b>foo match
+ if ! ( $(i:G=) || ( $(i:G) in $(prop-keys) ) )
+ {
+ default-props += $(i) ;
+ }
+ }
+
+ if $(set) in $(properties) $(default-props)
+ {
+ result = $(s) ;
+ }
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns a value to be added to some flag for some target based on the flag's
+# value definition and the given target's property set.
+#
+rule handle-flag-value ( value * : properties * )
+{
+ local result ;
+ if $(value:G)
+ {
+ local matches = [ property.select $(value) : $(properties) ] ;
+ for local p in $(matches)
+ {
+ local att = [ feature.attributes $(p:G) ] ;
+ if dependency in $(att)
+ {
+ # The value of a dependency feature is a target and needs to be
+ # actualized.
+ result += [ $(p:G=).actualize ] ;
+ }
+ else if path in $(att) || free in $(att)
+ {
+ local values ;
+ # Treat features with && in the value specially -- each
+ # &&-separated element is considered a separate value. This is
+ # needed to handle searched libraries or include paths, which
+ # may need to be in a specific order.
+ if ! [ MATCH (&&) : $(p:G=) ]
+ {
+ values = $(p:G=) ;
+ }
+ else
+ {
+ values = [ regex.split $(p:G=) "&&" ] ;
+ }
+ if path in $(att)
+ {
+ result += [ sequence.transform path.native : $(values) ] ;
+ }
+ else
+ {
+ result += $(values) ;
+ }
+ }
+ else
+ {
+ result += $(p:G=) ;
+ }
+ }
+ }
+ else
+ {
+ result += $(value) ;
+ }
+ return $(result) ;
+}
+
+
+# Given a rule name and a property set, returns a list of interleaved variables
+# names and values which must be set on targets for that rule/property-set
+# combination.
+#
+rule set-target-variables-aux ( rule-or-module : property-set )
+{
+ local result ;
+ properties = [ $(property-set).raw ] ;
+ for local f in $(.$(rule-or-module).flags)
+ {
+ local variable = $(.$(rule-or-module).variable.$(f)) ;
+ local condition = $(.$(rule-or-module).condition.$(f)) ;
+ local values = $(.$(rule-or-module).values.$(f)) ;
+
+ if ! $(condition) ||
+ [ find-property-subset $(condition) : $(properties) ]
+ {
+ local processed ;
+ for local v in $(values)
+ {
+ # The value might be <feature-name> so needs special treatment.
+ processed += [ handle-flag-value $(v) : $(properties) ] ;
+ }
+ for local r in $(processed)
+ {
+ result += $(variable) $(r) ;
+ }
+ }
+ }
+
+ # Strip away last dot separated part and recurse.
+ local next = [ MATCH ^(.+)\\.([^\\.])* : $(rule-or-module) ] ;
+ if $(next)
+ {
+ result += [ set-target-variables-aux $(next[1]) : $(property-set) ] ;
+ }
+ return $(result) ;
+}
+
+rule relevant-features ( rule-or-module )
+{
+ local result ;
+ if ! $(.relevant-features.$(rule-or-module))
+ {
+ for local f in $(.$(rule-or-module).flags)
+ {
+ local condition = $(.$(rule-or-module).condition.$(f)) ;
+ local values = $(.$(rule-or-module).values.$(f)) ;
+
+ for local c in $(condition)
+ {
+ for local p in [ feature.split $(c) ]
+ {
+ if $(p:G)
+ {
+ result += $(p:G) ;
+ }
+ else
+ {
+ local temp = [ feature.expand-subfeatures $(p) ] ;
+ result += $(temp:G) ;
+ }
+ }
+ }
+
+ for local v in $(values)
+ {
+ if $(v:G)
+ {
+ result += $(v:G) ;
+ }
+ }
+ }
+
+ # Strip away last dot separated part and recurse.
+ local next = [ MATCH ^(.+)\\.([^\\.])* : $(rule-or-module) ] ;
+ if $(next)
+ {
+ result += [ relevant-features $(next[1]) ] ;
+ }
+ result = [ sequence.unique $(result) ] ;
+ if $(result[1]) = ""
+ {
+ result = $(result) ;
+ }
+ .relevant-features.$(rule-or-module) = $(result) ;
+ return $(result) ;
+ }
+ else
+ {
+ return $(.relevant-features.$(rule-or-module)) ;
+ }
+}
+
+rule filter-property-set ( rule-or-module : property-set )
+{
+ local key = .filtered.property-set.$(rule-or-module).$(property-set) ;
+ if ! $($(key))
+ {
+ local relevant = [ relevant-features $(rule-or-module) ] ;
+ local result ;
+ for local p in [ $(property-set).raw ]
+ {
+ if $(p:G) in $(relevant)
+ {
+ result += $(p) ;
+ }
+ }
+ $(key) = [ property-set.create $(result) ] ;
+ }
+ return $($(key)) ;
+}
+
+rule set-target-variables ( rule-or-module targets + : property-set )
+{
+ property-set = [ filter-property-set $(rule-or-module) : $(property-set) ] ;
+ local key = .stv.$(rule-or-module).$(property-set) ;
+ local settings = $($(key)) ;
+ if ! $(settings)
+ {
+ settings = [ set-target-variables-aux $(rule-or-module) :
+ $(property-set) ] ;
+
+ if ! $(settings)
+ {
+ settings = none ;
+ }
+ $(key) = $(settings) ;
+ }
+
+ if $(settings) != none
+ {
+ local var-name = ;
+ for local name-or-value in $(settings)
+ {
+ if $(var-name)
+ {
+ $(var-name) on $(targets) += $(name-or-value) ;
+ var-name = ;
+ }
+ else
+ {
+ var-name = $(name-or-value) ;
+ }
+ }
+ }
+}
+
+
+# Make toolset 'toolset', defined in a module of the same name, inherit from
+# 'base'.
+# 1. The 'init' rule from 'base' is imported into 'toolset' with full name.
+# Another 'init' is called, which forwards to the base one.
+# 2. All generators from 'base' are cloned. The ids are adjusted and <toolset>
+# property in requires is adjusted too.
+# 3. All flags are inherited.
+# 4. All rules are imported.
+#
+rule inherit ( toolset : base )
+{
+ import $(base) ;
+ inherit-generators $(toolset) : $(base) ;
+ inherit-flags $(toolset) : $(base) ;
+ inherit-rules $(toolset) : $(base) ;
+}
+
+
+rule inherit-generators ( toolset properties * : base : generators-to-ignore * )
+{
+ properties ?= <toolset>$(toolset) ;
+ local base-generators = [ generators.generators-for-toolset $(base) ] ;
+ for local g in $(base-generators)
+ {
+ local id = [ $(g).id ] ;
+
+ if ! $(id) in $(generators-to-ignore)
+ {
+ # Some generator names have multiple periods in their name, so
+ # $(id:B=$(toolset)) does not generate the right new-id name. E.g.
+ # if id = gcc.compile.c++ then $(id:B=darwin) = darwin.c++, which is
+ # not what we want. Manually parse the base and suffix. If there is
+ # a better way to do this, I would love to see it. See also the
+ # register() rule in the generators module.
+ local base = $(id) ;
+ local suffix = "" ;
+ while $(base:S)
+ {
+ suffix = $(base:S)$(suffix) ;
+ base = $(base:B) ;
+ }
+ local new-id = $(toolset)$(suffix) ;
+
+ generators.register [ $(g).clone $(new-id) : $(properties) ] ;
+ }
+ }
+}
+
+
+# Brings all flag definitions from the 'base' toolset into the 'toolset'
+# toolset. Flag definitions whose conditions make use of properties in
+# 'prohibited-properties' are ignored. Do not confuse property and feature, for
+# example <debug-symbols>on and <debug-symbols>off, so blocking one of them does
+# not block the other one.
+#
+# The flag conditions are not altered at all, so if a condition includes a name,
+# or version of a base toolset, it will not ever match the inheriting toolset.
+# When such flag settings must be inherited, define a rule in base toolset
+# module and call it as needed.
+#
+rule inherit-flags ( toolset : base : prohibited-properties * : prohibited-vars * )
+{
+ for local f in $(.module-flags.$(base))
+ {
+ local rule-or-module = $(.rule-or-module.$(f)) ;
+ if ( [ set.difference
+ $(.$(rule-or-module).condition.$(f)) :
+ $(prohibited-properties) ]
+ || ! $(.$(rule-or-module).condition.$(f))
+ ) && ( ! $(.$(rule-or-module).variable.$(f)) in $(prohibited-vars) )
+ {
+ local rule_ = [ MATCH "[^.]*\.(.*)" : $(rule-or-module) ] ;
+ local new-rule-or-module ;
+ if $(rule_)
+ {
+ new-rule-or-module = $(toolset).$(rule_) ;
+ }
+ else
+ {
+ new-rule-or-module = $(toolset) ;
+ }
+
+ add-flag
+ $(new-rule-or-module)
+ : $(.$(rule-or-module).variable.$(f))
+ : $(.$(rule-or-module).condition.$(f))
+ : $(.$(rule-or-module).values.$(f)) ;
+ }
+ }
+}
+
+
+rule inherit-rules ( toolset : base : localize ? )
+{
+ # It appears that "action" creates a local rule.
+ local base-generators = [ generators.generators-for-toolset $(base) ] ;
+ local rules ;
+ for local g in $(base-generators)
+ {
+ rules += [ MATCH "[^.]*\.(.*)" : [ $(g).rule-name ] ] ;
+ }
+ rules = [ sequence.unique $(rules) ] ;
+ IMPORT $(base) : $(rules) : $(toolset) : $(rules) : $(localize) ;
+ IMPORT $(toolset) : $(rules) : : $(toolset).$(rules) ;
+}
+
+.requirements = [ property-set.empty ] ;
+
+# Return the list of global 'toolset requirements'. Those requirements will be
+# automatically added to the requirements of any main target.
+#
+rule requirements ( )
+{
+ return $(.requirements) ;
+}
+
+
+# Adds elements to the list of global 'toolset requirements'. The requirements
+# will be automatically added to the requirements for all main targets, as if
+# they were specified literally. For best results, all requirements added should
+# be conditional or indirect conditional.
+#
+rule add-requirements ( requirements * )
+{
+ if ! $(.ignore-requirements)
+ {
+ requirements = [ property.translate-indirect $(specification) : [ CALLER_MODULE ] ] ;
+ requirements = [ property.expand-subfeatures-in-conditions $(requirements) ] ;
+ requirements = [ property.make $(requirements) ] ;
+ .requirements = [ $(.requirements).add-raw $(requirements) ] ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+ local p = <b>0 <c>1 <d>2 <e>3 <f>4 ;
+ assert.result <c>1/<d>2/<e>3 : find-property-subset <c>1/<d>2/<e>3 <a>0/<b>0/<c>1 <d>2/<e>5 <a>9 : $(p) ;
+ assert.result : find-property-subset <a>0/<b>0/<c>9/<d>9/<e>5 <a>9 : $(p) ;
+
+ local p-set = <a>/<b> <a>0/<b> <a>/<b>1 <a>0/<b>1 ;
+ assert.result <a>/<b> : find-property-subset $(p-set) : ;
+ assert.result <a>0/<b> : find-property-subset $(p-set) : <a>0 <c>2 ;
+ assert.result <a>/<b>1 : find-property-subset $(p-set) : <b>1 <c>2 ;
+ assert.result <a>0/<b>1 : find-property-subset $(p-set) : <a>0 <b>1 ;
+}
diff --git a/src/kenlm/jam-files/boost-build/build/type.jam b/src/kenlm/jam-files/boost-build/build/type.jam
new file mode 100644
index 0000000..e8cc44e
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/type.jam
@@ -0,0 +1,401 @@
+# Copyright 2002, 2003 Dave Abrahams
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Deals with target type declaration and defines target class which supports
+# typed targets.
+
+import "class" : new ;
+import feature ;
+import generators : * ;
+import os ;
+import project ;
+import property ;
+import scanner ;
+
+# The following import would create a circular dependency:
+# project -> project-root -> builtin -> type -> targets -> project
+# import targets ;
+
+# The feature is optional so it would never get added implicitly. It is used
+# only for internal purposes and in all cases we want to use it explicitly.
+feature.feature target-type : : composite optional ;
+
+feature.feature main-target-type : : optional incidental ;
+feature.feature base-target-type : : composite optional free ;
+
+
+# Registers a target type, possible derived from a 'base-type'. Providing a list
+# of 'suffixes' here is a shortcut for separately calling the register-suffixes
+# rule with the given suffixes and the set-generated-target-suffix rule with the
+# first given suffix.
+#
+rule register ( type : suffixes * : base-type ? )
+{
+ # Type names cannot contain hyphens, because when used as feature-values
+ # they would be interpreted as composite features which need to be
+ # decomposed.
+ switch $(type)
+ {
+ case *-* :
+ import errors ;
+ errors.error "type name \"$(type)\" contains a hyphen" ;
+ }
+
+ if $(type) in $(.types)
+ {
+ import errors ;
+ errors.error "Type $(type) is already registered." ;
+ }
+
+ {
+ .types += $(type) ;
+ .base.$(type) = $(base-type) ;
+ .derived.$(base-type) += $(type) ;
+ .bases.$(type) = $(type) $(.bases.$(base-type)) ;
+
+ # Store suffixes for generated targets.
+ .suffixes.$(type) = [ new property-map ] ;
+
+ # Store prefixes for generated targets (e.g. "lib" for library).
+ .prefixes.$(type) = [ new property-map ] ;
+
+ if $(suffixes)-is-defined
+ {
+ # Specify mapping from suffixes to type.
+ register-suffixes $(suffixes) : $(type) ;
+ # By default generated targets of 'type' will use the first of
+ #'suffixes'. This may be overriden.
+ set-generated-target-suffix $(type) : : $(suffixes[1]) ;
+ }
+
+ feature.extend target-type : $(type) ;
+ feature.extend main-target-type : $(type) ;
+ feature.extend base-target-type : $(type) ;
+
+ feature.compose <target-type>$(type) : $(base-type:G=<base-target-type>) ;
+ feature.compose <base-target-type>$(type) : <base-target-type>$(base-type) ;
+
+ # We used to declare the main target rule only when a 'main' parameter
+ # has been specified. However, it is hard to decide that a type will
+ # *never* need a main target rule and so from time to time we needed to
+ # make yet another type 'main'. So now a main target rule is defined for
+ # each type.
+ main-rule-name = [ type-to-rule-name $(type) ] ;
+ .main-target-type.$(main-rule-name) = $(type) ;
+ IMPORT $(__name__) : main-target-rule : : $(main-rule-name) ;
+
+ # Adding a new derived type affects generator selection so we need to
+ # make the generator selection module update any of its cached
+ # information related to a new derived type being defined.
+ generators.update-cached-information-with-a-new-type $(type) ;
+ }
+}
+
+
+# Given a type, returns the name of the main target rule which creates targets
+# of that type.
+#
+rule type-to-rule-name ( type )
+{
+ # Lowercase everything. Convert underscores to dashes.
+ import regex ;
+ local n = [ regex.split $(type:L) "_" ] ;
+ return $(n:J=-) ;
+}
+
+
+# Given a main target rule name, returns the type for which it creates targets.
+#
+rule type-from-rule-name ( rule-name )
+{
+ return $(.main-target-type.$(rule-name)) ;
+}
+
+
+# Specifies that files with suffix from 'suffixes' be recognized as targets of
+# type 'type'. Issues an error if a different type is already specified for any
+# of the suffixes.
+#
+rule register-suffixes ( suffixes + : type )
+{
+ for local s in $(suffixes)
+ {
+ if ! $(.type.$(s))
+ {
+ .type.$(s) = $(type) ;
+ }
+ else if $(.type.$(s)) != $(type)
+ {
+ import errors ;
+ errors.error Attempting to specify multiple types for suffix
+ \"$(s)\" : "Old type $(.type.$(s)), New type $(type)" ;
+ }
+ }
+}
+
+
+# Returns true iff type has been registered.
+#
+rule registered ( type )
+{
+ if $(type) in $(.types)
+ {
+ return true ;
+ }
+}
+
+
+# Issues an error if 'type' is unknown.
+#
+rule validate ( type )
+{
+ if ! [ registered $(type) ]
+ {
+ import errors ;
+ errors.error "Unknown target type $(type)" ;
+ }
+}
+
+
+# Sets a scanner class that will be used for this 'type'.
+#
+rule set-scanner ( type : scanner )
+{
+ validate $(type) ;
+ .scanner.$(type) = $(scanner) ;
+}
+
+
+# Returns a scanner instance appropriate to 'type' and 'properties'.
+#
+rule get-scanner ( type : property-set )
+{
+ if $(.scanner.$(type))
+ {
+ return [ scanner.get $(.scanner.$(type)) : $(property-set) ] ;
+ }
+}
+
+
+# Returns a base type for the given type or nothing in case the given type is
+# not derived.
+#
+rule base ( type )
+{
+ return $(.base.$(type)) ;
+}
+
+
+# Returns the given type and all of its base types in order of their distance
+# from type.
+#
+rule all-bases ( type )
+{
+ return $(.bases.$(type)) ;
+}
+
+
+# Returns the given type and all of its derived types in order of their distance
+# from type.
+#
+rule all-derived ( type )
+{
+ local result = $(type) ;
+ for local d in $(.derived.$(type))
+ {
+ result += [ all-derived $(d) ] ;
+ }
+ return $(result) ;
+}
+
+
+# Returns true if 'type' is equal to 'base' or has 'base' as its direct or
+# indirect base.
+#
+rule is-derived ( type base )
+{
+ if $(base) in $(.bases.$(type))
+ {
+ return true ;
+ }
+}
+
+# Returns true if 'type' is either derived from or is equal to 'base'.
+#
+# TODO: It might be that is-derived and is-subtype were meant to be different
+# rules - one returning true for type = base and one not, but as currently
+# implemented they are actually the same. Clean this up.
+#
+rule is-subtype ( type base )
+{
+ return [ is-derived $(type) $(base) ] ;
+}
+
+
+
+
+# Sets a file suffix to be used when generating a target of 'type' with the
+# specified properties. Can be called with no properties if no suffix has
+# already been specified for the 'type'. The 'suffix' parameter can be an empty
+# string ("") to indicate that no suffix should be used.
+#
+# Note that this does not cause files with 'suffix' to be automatically
+# recognized as being of 'type'. Two different types can use the same suffix for
+# their generated files but only one type can be auto-detected for a file with
+# that suffix. User should explicitly specify which one using the
+# register-suffixes rule.
+#
+rule set-generated-target-suffix ( type : properties * : suffix )
+{
+ set-generated-target-ps suffix : $(type) : $(properties) : $(suffix) ;
+}
+
+
+# Change the suffix previously registered for this type/properties combination.
+# If suffix is not yet specified, sets it.
+#
+rule change-generated-target-suffix ( type : properties * : suffix )
+{
+ change-generated-target-ps suffix : $(type) : $(properties) : $(suffix) ;
+}
+
+
+# Returns the suffix used when generating a file of 'type' with the given
+# properties.
+#
+rule generated-target-suffix ( type : property-set )
+{
+ return [ generated-target-ps suffix : $(type) : $(property-set) ] ;
+}
+
+
+# Sets a target prefix that should be used when generating targets of 'type'
+# with the specified properties. Can be called with empty properties if no
+# prefix for 'type' has been specified yet.
+#
+# The 'prefix' parameter can be empty string ("") to indicate that no prefix
+# should be used.
+#
+# Usage example: library names use the "lib" prefix on unix.
+#
+rule set-generated-target-prefix ( type : properties * : prefix )
+{
+ set-generated-target-ps prefix : $(type) : $(properties) : $(prefix) ;
+}
+
+
+# Change the prefix previously registered for this type/properties combination.
+# If prefix is not yet specified, sets it.
+#
+rule change-generated-target-prefix ( type : properties * : prefix )
+{
+ change-generated-target-ps prefix : $(type) : $(properties) : $(prefix) ;
+}
+
+
+rule generated-target-prefix ( type : property-set )
+{
+ return [ generated-target-ps prefix : $(type) : $(property-set) ] ;
+}
+
+
+# Common rules for prefix/suffix provisioning follow.
+
+local rule set-generated-target-ps ( ps : type : properties * : psval )
+{
+ $(.$(ps)es.$(type)).insert $(properties) : $(psval) ;
+}
+
+
+local rule change-generated-target-ps ( ps : type : properties * : psval )
+{
+ local prev = [ $(.$(ps)es.$(type)).find-replace $(properties) : $(psval) ] ;
+ if ! $(prev)
+ {
+ set-generated-target-ps $(ps) : $(type) : $(properties) : $(psval) ;
+ }
+}
+
+
+# Returns either prefix or suffix (as indicated by 'ps') that should be used
+# when generating a target of 'type' with the specified properties. Parameter
+# 'ps' can be either "prefix" or "suffix". If no prefix/suffix is specified for
+# 'type', returns prefix/suffix for base type, if any.
+#
+local rule generated-target-ps ( ps : type : property-set )
+{
+ local result ;
+ local found ;
+ while $(type) && ! $(found)
+ {
+ result = [ $(.$(ps)es.$(type)).find $(property-set) ] ;
+ # If the prefix/suffix is explicitly set to an empty string, we consider
+ # prefix/suffix to be found. If we were not to compare with "", there
+ # would be no way to specify an empty prefix/suffix.
+ if $(result)-is-defined
+ {
+ found = true ;
+ }
+ type = $(.base.$(type)) ;
+ }
+ if $(result) = ""
+ {
+ result = ;
+ }
+ return $(result) ;
+}
+
+
+# Returns file type given its name. If there are several dots in filename, tries
+# each suffix. E.g. for name of "file.so.1.2" suffixes "2", "1", and "so" will
+# be tried.
+#
+rule type ( filename )
+{
+ if [ os.name ] in NT CYGWIN
+ {
+ filename = $(filename:L) ;
+ }
+ local type ;
+ while ! $(type) && $(filename:S)
+ {
+ local suffix = $(filename:S) ;
+ type = $(.type$(suffix)) ;
+ filename = $(filename:S=) ;
+ }
+ return $(type) ;
+}
+
+
+# Rule used to construct all main targets. Note that this rule gets imported
+# into the global namespace under different alias names and the exact target
+# type to construct is selected based on the alias used to actually invoke this
+# rule.
+#
+rule main-target-rule ( name : sources * : requirements * : default-build * :
+ usage-requirements * )
+{
+ # First discover the required target type based on the exact alias used to
+ # invoke this rule.
+ local bt = [ BACKTRACE 1 ] ;
+ local rulename = $(bt[4]) ;
+ local target-type = [ type-from-rule-name $(rulename) ] ;
+
+ # This is a circular module dependency and so must be imported here.
+ import targets ;
+
+ return [ targets.create-typed-target $(target-type) : [ project.current ] :
+ $(name) : $(sources) : $(requirements) : $(default-build) :
+ $(usage-requirements) ] ;
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ # TODO: Add tests for all the is-derived, is-base & related type relation
+ # checking rules.
+}
diff --git a/src/kenlm/jam-files/boost-build/build/version.jam b/src/kenlm/jam-files/boost-build/build/version.jam
new file mode 100644
index 0000000..d4b787d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/version.jam
@@ -0,0 +1,165 @@
+# Copyright 2002, 2003, 2004, 2006 Vladimir Prus
+# Copyright 2008, 2012 Jurko Gospodnetic
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import numbers ;
+
+
+.major = "2011" ;
+.minor = "12" ;
+
+
+rule boost-build ( )
+{
+ return "$(.major).$(.minor)-svn" ;
+}
+
+
+rule print ( )
+{
+ if [ verify-engine-version ]
+ {
+ ECHO "Boost.Build" [ boost-build ] ;
+ }
+}
+
+
+rule verify-engine-version ( )
+{
+ local v = [ modules.peek : JAM_VERSION ] ;
+
+ if $(v[1]) != $(.major) || $(v[2]) != $(.minor)
+ {
+ local argv = [ modules.peek : ARGV ] ;
+ local e = $(argv[1]) ;
+ local l = [ modules.binding version ] ;
+ l = $(l:D) ;
+ l = $(l:D) ;
+ ECHO "warning: mismatched versions of Boost.Build engine and core" ;
+ ECHO "warning: Boost.Build engine ($(e)) is $(v:J=.)" ;
+ ECHO "warning: Boost.Build core (at $(l)) is" [ boost-build ] ;
+ }
+ else
+ {
+ return true ;
+ }
+}
+
+
+# Utility rule for testing whether all elements in a sequence are equal to 0.
+#
+local rule is-all-zeroes ( sequence * )
+{
+ local result = "true" ;
+ for local e in $(sequence)
+ {
+ if $(e) != "0"
+ {
+ result = "" ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns "true" if the first version is less than the second one.
+#
+rule version-less ( lhs + : rhs + )
+{
+ numbers.check $(lhs) ;
+ numbers.check $(rhs) ;
+
+ local done ;
+ local result ;
+
+ while ! $(done) && $(lhs) && $(rhs)
+ {
+ if [ numbers.less $(lhs[1]) $(rhs[1]) ]
+ {
+ done = "true" ;
+ result = "true" ;
+ }
+ else if [ numbers.less $(rhs[1]) $(lhs[1]) ]
+ {
+ done = "true" ;
+ }
+ else
+ {
+ lhs = $(lhs[2-]) ;
+ rhs = $(rhs[2-]) ;
+ }
+ }
+ if ( ! $(done) && ! $(lhs) && ! [ is-all-zeroes $(rhs) ] )
+ {
+ result = "true" ;
+ }
+
+ return $(result) ;
+}
+
+
+# Returns "true" if the current JAM version version is at least the given
+# version.
+#
+rule check-jam-version ( version + )
+{
+ local version-tag = $(version:J=.) ;
+ if ! $(version-tag)
+ {
+ import errors ;
+ errors.error Invalid version specifier: : $(version:E="(undefined)") ;
+ }
+
+ if ! $(.jam-version-check.$(version-tag))-is-defined
+ {
+ local jam-version = [ modules.peek : JAM_VERSION ] ;
+ if ! $(jam-version)
+ {
+ import errors ;
+ errors.error "Unable to deduce Boost Jam version. Your Boost Jam"
+ "installation is most likely terribly outdated." ;
+ }
+ .jam-version-check.$(version-tag) = "true" ;
+ if [ version-less [ modules.peek : JAM_VERSION ] : $(version) ]
+ {
+ .jam-version-check.$(version-tag) = "" ;
+ }
+ }
+ return $(.jam-version-check.$(version-tag)) ;
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ local jam-version = [ modules.peek : JAM_VERSION ] ;
+ local future-version = $(jam-version) ;
+ future-version += "1" ;
+
+ assert.true check-jam-version $(jam-version) ;
+ assert.false check-jam-version $(future-version) ;
+
+ assert.true version-less 0 : 1 ;
+ assert.false version-less 0 : 0 ;
+ assert.true version-less 1 : 2 ;
+ assert.false version-less 1 : 1 ;
+ assert.false version-less 2 : 1 ;
+ assert.true version-less 3 1 20 : 3 4 10 ;
+ assert.false version-less 3 1 10 : 3 1 10 ;
+ assert.false version-less 3 4 10 : 3 1 20 ;
+ assert.true version-less 3 1 20 5 1 : 3 4 10 ;
+ assert.false version-less 3 1 10 5 1 : 3 1 10 ;
+ assert.false version-less 3 4 10 5 1 : 3 1 20 ;
+ assert.true version-less 3 1 20 : 3 4 10 5 1 ;
+ assert.true version-less 3 1 10 : 3 1 10 5 1 ;
+ assert.false version-less 3 4 10 : 3 1 20 5 1 ;
+ assert.false version-less 3 1 10 : 3 1 10 0 0 ;
+ assert.false version-less 3 1 10 0 0 : 3 1 10 ;
+ assert.false version-less 3 1 10 0 : 3 1 10 0 0 ;
+ assert.false version-less 3 1 10 0 : 03 1 10 0 0 ;
+ assert.false version-less 03 1 10 0 : 3 1 10 0 0 ;
+
+ # TODO: Add tests for invalid input data being sent to version-less.
+}
diff --git a/src/kenlm/jam-files/boost-build/build/virtual-target.jam b/src/kenlm/jam-files/boost-build/build/virtual-target.jam
new file mode 100644
index 0000000..f62eadb
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/build/virtual-target.jam
@@ -0,0 +1,1339 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2005, 2006 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Implements virtual targets, which correspond to actual files created during a
+# build, but are not yet targets in Jam sense. They are needed, for example,
+# when searching for possible transformation sequences, when it is not yet known
+# whether a particular target should be created at all.
+#
+# +--------------------------+
+# | virtual-target |
+# +==========================+
+# | actualize |
+# +--------------------------+
+# | actualize-action() = 0 |
+# | actualize-location() = 0 |
+# +----------------+---------+
+# |
+# ^
+# / \
+# +-+-+
+# |
+# +---------------------+ +-------+--------------+
+# | action | | abstract-file-target |
+# +=====================| * +======================+
+# | action-name | +--+ action |
+# | properties | | +----------------------+
+# +---------------------+--+ | actualize-action() |
+# | actualize() |0..1 +-----------+----------+
+# | path() | |
+# | adjust-properties() | sources |
+# | actualize-sources() | targets |
+# +------+--------------+ ^
+# | / \
+# ^ +-+-+
+# / \ |
+# +-+-+ +-------------+-------------+
+# | | |
+# | +------+---------------+ +--------+-------------+
+# | | file-target | | searched-lib-target |
+# | +======================+ +======================+
+# | | actualize-location() | | actualize-location() |
+# | +----------------------+ +----------------------+
+# |
+# +-+------------------------------+
+# | |
+# +----+----------------+ +---------+-----------+
+# | compile-action | | link-action |
+# +=====================+ +=====================+
+# | adjust-properties() | | adjust-properties() |
+# +---------------------+ | actualize-sources() |
+# +---------------------+
+#
+# The 'compile-action' and 'link-action' classes are not defined here but in
+# builtin.jam modules. They are shown in the diagram to give the big picture.
+
+import "class" : new ;
+import path ;
+import sequence ;
+import set ;
+import type ;
+import utility ;
+
+
+# Models a potential target. It can be converted into a Jam target and used in
+# building, if needed. However, it can be also dropped, which allows us to
+# search for different transformations and select only one.
+#
+class virtual-target
+{
+ import scanner ;
+ import sequence ;
+ import utility ;
+ import virtual-target ;
+
+ rule __init__ (
+ name # Target/project name.
+ : project # Project to which this target belongs.
+ )
+ {
+ self.name = $(name) ;
+ self.project = $(project) ;
+ self.dependencies = ;
+ }
+
+ # Name of this target.
+ #
+ rule name ( )
+ {
+ return $(self.name) ;
+ }
+
+ # Project of this target.
+ #
+ rule project ( )
+ {
+ return $(self.project) ;
+ }
+
+ # Adds additional 'virtual-target' instances this one depends on.
+ #
+ rule depends ( d + )
+ {
+ self.dependencies = [ sequence.merge $(self.dependencies) :
+ [ sequence.insertion-sort $(d) ] ] ;
+ }
+
+ rule dependencies ( )
+ {
+ return $(self.dependencies) ;
+ }
+
+ rule always ( )
+ {
+ .always = 1 ;
+ }
+
+ # Generates all the actual targets and sets up build actions for this
+ # target.
+ #
+ # If 'scanner' is specified, creates an additional target with the same
+ # location as the actual target, which will depend on the actual target and
+ # be associated with a 'scanner'. That additional target is returned. See
+ # the docs (#dependency_scanning) for rationale. Target must correspond to a
+ # file if 'scanner' is specified.
+ #
+ # If scanner is not specified then the actual target is returned.
+ #
+ rule actualize ( scanner ? )
+ {
+ local actual-name = [ actualize-no-scanner ] ;
+
+ if $(.always)
+ {
+ ALWAYS $(actual-name) ;
+ }
+
+ if ! $(scanner)
+ {
+ return $(actual-name) ;
+ }
+ else
+ {
+ # Add the scanner instance to the grist for name.
+ local g = [ sequence.join [ utility.ungrist $(actual-name:G) ]
+ $(scanner) : - ] ;
+ local name = $(actual-name:G=$(g)) ;
+
+ if ! $(self.made.$(scanner))
+ {
+ self.made.$(scanner) = true ;
+ actualize-location $(name) ;
+ scanner.install $(scanner) : $(name) ;
+ }
+ return $(name) ;
+ }
+ }
+
+# private: (overridables)
+
+ # Sets up build actions for 'target'. Should call appropriate rules and set
+ # target variables.
+ #
+ rule actualize-action ( target )
+ {
+ import errors : error : errors.error ;
+ errors.error "method should be defined in derived classes" ;
+ }
+
+ # Sets up variables on 'target' which specify its location.
+ #
+ rule actualize-location ( target )
+ {
+ import errors : error : errors.error ;
+ errors.error "method should be defined in derived classes" ;
+ }
+
+ # If the target is a generated one, returns the path where it will be
+ # generated. Otherwise, returns an empty list.
+ #
+ rule path ( )
+ {
+ import errors : error : errors.error ;
+ errors.error "method should be defined in derived classes" ;
+ }
+
+ # Returns the actual target name to be used in case when no scanner is
+ # involved.
+ #
+ rule actual-name ( )
+ {
+ import errors : error : errors.error ;
+ errors.error "method should be defined in derived classes" ;
+ }
+
+# implementation
+ rule actualize-no-scanner ( )
+ {
+ # In fact, we just need to merge virtual-target with
+ # abstract-file-target as the latter is the only class derived from the
+ # former. But that has been left for later.
+
+ import errors : error : errors.error ;
+ errors.error "method should be defined in derived classes" ;
+ }
+}
+
+
+# Target corresponding to a file. The exact mapping for file is not yet
+# specified in this class. (TODO: Actually, the class name could be better...)
+#
+# May be a source file (when no action is specified) or a derived file
+# (otherwise).
+#
+# The target's grist is a concatenation of its project's location, action
+# properties (for derived targets) and, optionally, value identifying the main
+# target.
+#
+class abstract-file-target : virtual-target
+{
+ import project ;
+ import regex ;
+ import sequence ;
+ import path ;
+ import type ;
+ import property-set ;
+ import indirect ;
+
+ rule __init__ (
+ name # Target's name.
+ exact ? # If non-empty, the name is exactly the name created file
+ # should have. Otherwise, the '__init__' method will add a
+ # suffix obtained from 'type' by calling
+ # 'type.generated-target-suffix'.
+ : type ? # Target's type.
+ : project
+ : action ?
+ )
+ {
+ virtual-target.__init__ $(name) : $(project) ;
+
+ self.type = $(type) ;
+ self.action = $(action) ;
+ if $(action)
+ {
+ $(action).add-targets $(__name__) ;
+
+ if $(self.type) && ! $(exact)
+ {
+ _adjust-name $(name) ;
+ }
+ }
+ }
+
+ rule type ( )
+ {
+ return $(self.type) ;
+ }
+
+ # Sets the path. When generating target name, it will override any path
+ # computation from properties.
+ #
+ rule set-path ( path )
+ {
+ self.path = [ path.native $(path) ] ;
+ }
+
+ # Returns the currently set action.
+ #
+ rule action ( )
+ {
+ return $(self.action) ;
+ }
+
+ # Sets/gets the 'root' flag. Target is root if it directly corresponds to
+ # some variant of a main target.
+ #
+ rule root ( set ? )
+ {
+ if $(set)
+ {
+ self.root = true ;
+ }
+ return $(self.root) ;
+ }
+
+ # Gets or sets the subvariant which created this target. Subvariant is set
+ # when target is brought into existance and is never changed after that. In
+ # particular, if a target is shared by multiple subvariants, only the first
+ # one is stored.
+ #
+ rule creating-subvariant ( s ? # If specified, specifies the value to set,
+ # which should be a 'subvariant' class
+ # instance.
+ )
+ {
+ if $(s) && ! $(self.creating-subvariant)
+ {
+ self.creating-subvariant = $(s) ;
+ }
+ return $(self.creating-subvariant) ;
+ }
+
+ rule actualize-action ( target )
+ {
+ if $(self.action)
+ {
+ $(self.action).actualize ;
+ }
+ }
+
+ # Return a human-readable representation of this target. If this target has
+ # an action, that is:
+ #
+ # { <action-name>-<self.name>.<self.type> <action-sources>... }
+ #
+ # otherwise, it is:
+ #
+ # { <self.name>.<self.type> }
+ #
+ rule str ( )
+ {
+ local action = [ action ] ;
+ local name-dot-type = [ sequence.join $(self.name) "." $(self.type) ] ;
+
+ if $(action)
+ {
+ local sources = [ $(action).sources ] ;
+ local action-name = [ $(action).action-name ] ;
+
+ local ss ;
+ for local s in $(sources)
+ {
+ ss += [ $(s).str ] ;
+ }
+
+ return "{" $(action-name)-$(name-dot-type) $(ss) "}" ;
+ }
+ else
+ {
+ return "{" $(name-dot-type) "}" ;
+ }
+ }
+
+ rule less ( a )
+ {
+ if [ str ] < [ $(a).str ]
+ {
+ return true ;
+ }
+ }
+
+ rule equal ( a )
+ {
+ if [ str ] = [ $(a).str ]
+ {
+ return true ;
+ }
+ }
+
+# private:
+ rule actual-name ( )
+ {
+ if ! $(self.actual-name)
+ {
+ local grist = [ grist ] ;
+ local basename = [ path.native $(self.name) ] ;
+ self.actual-name = <$(grist)>$(basename) ;
+ }
+ return $(self.actual-name) ;
+ }
+
+ # Helper to 'actual-name', above. Computes a unique prefix used to
+ # distinguish this target from other targets with the same name creating
+ # different files.
+ #
+ rule grist ( )
+ {
+ # Depending on target, there may be different approaches to generating
+ # unique prefixes. We generate prefixes in the form:
+ # <one letter approach code> <the actual prefix>
+ local path = [ path ] ;
+ if $(path)
+ {
+ # The target will be generated to a known path. Just use the path
+ # for identification, since path is as unique as it can get.
+ return p$(path) ;
+ }
+ else
+ {
+ # File is either source, which will be searched for, or is not a
+ # file at all. Use the location of project for distinguishing.
+ local project-location = [ $(self.project).get location ] ;
+ local location-grist = [ sequence.join [ regex.split
+ $(project-location) "/" ] : "!" ] ;
+
+ if $(self.action)
+ {
+ local ps = [ $(self.action).properties ] ;
+ local property-grist = [ $(ps).as-path ] ;
+ # 'property-grist' can be empty when 'ps' is an empty property
+ # set.
+ if $(property-grist)
+ {
+ location-grist = $(location-grist)/$(property-grist) ;
+ }
+ }
+
+ return l$(location-grist) ;
+ }
+ }
+
+ # Given the target name specified in constructor, returns the name which
+ # should be really used, by looking at the <tag> properties. Tag properties
+ # need to be specified as <tag>@rule-name. This makes Boost Build call the
+ # specified rule with the target name, type and properties to get the new
+ # name. If no <tag> property is specified or the rule specified by <tag>
+ # returns nothing, returns the result of calling
+ # virtual-target.add-prefix-and-suffix.
+ #
+ rule _adjust-name ( specified-name )
+ {
+ local ps ;
+ if $(self.action)
+ {
+ ps = [ $(self.action).properties ] ;
+ }
+ else
+ {
+ ps = [ property-set.empty ] ;
+ }
+
+ local tag = [ $(ps).get <tag> ] ;
+
+ if $(tag)
+ {
+ local rule-name = [ MATCH ^@(.*) : $(tag) ] ;
+ if $(rule-name)
+ {
+ if $(tag[2])
+ {
+ import errors : error : errors.error ;
+ errors.error <tag>@rulename is present but is not the only
+ <tag> feature. ;
+ }
+
+ self.name = [ indirect.call $(rule-name) $(specified-name)
+ : $(self.type) : $(ps) ] ;
+ }
+ else
+ {
+ import errors : error : errors.error ;
+ errors.error <tag> property value must be '@rule-name'. ;
+ }
+ }
+
+ # If there is no tag or the tag rule returned nothing.
+ if ! $(tag) || ! $(self.name)
+ {
+ self.name = [ virtual-target.add-prefix-and-suffix $(specified-name)
+ : $(self.type) : $(ps) ] ;
+ }
+ }
+
+ rule actualize-no-scanner ( )
+ {
+ local name = [ actual-name ] ;
+
+ # Do anything only on the first invocation.
+ if ! $(self.made-no-scanner)
+ {
+ self.made-no-scanner = true ;
+
+ if $(self.action)
+ {
+ # For non-derived target, we do not care if there are several
+ # virtual targets that refer to the same name. One case when
+ # this is unavoidable is when the file name is main.cpp and two
+ # targets have types CPP (for compiling) and MOCCABLE_CPP (for
+ # conversion to H via Qt tools).
+ virtual-target.register-actual-name $(name) : $(__name__) ;
+ }
+
+ for local i in $(self.dependencies)
+ {
+ DEPENDS $(name) : [ $(i).actualize ] ;
+ }
+
+ actualize-location $(name) ;
+ actualize-action $(name) ;
+ }
+ return $(name) ;
+ }
+}
+
+
+# Appends the suffix appropriate to 'type/property-set' combination to the
+# specified name and returns the result.
+#
+rule add-prefix-and-suffix ( specified-name : type ? : property-set )
+{
+ local suffix = [ type.generated-target-suffix $(type) : $(property-set) ] ;
+
+ # Handle suffixes for which no leading dot is desired. Those are specified
+ # by enclosing them in <...>. Needed by python so it can create "_d.so"
+ # extensions, for example.
+ if $(suffix:G)
+ {
+ suffix = [ utility.ungrist $(suffix) ] ;
+ }
+ else
+ {
+ suffix = .$(suffix) ;
+ }
+
+ local prefix = [ type.generated-target-prefix $(type) : $(property-set) ] ;
+
+ if [ MATCH ^($(prefix)) : $(specified-name) ]
+ {
+ prefix = ;
+ }
+ return $(prefix:E="")$(specified-name)$(suffix:E="") ;
+}
+
+
+# File targets with explicitly known location.
+#
+# The file path is determined as
+# * Value passed to the 'set-path' method, if any.
+# * For derived files, project's build dir, joined with components that
+# describe action properties. If free properties are not equal to the
+# project's reference properties an element with the name of the main
+# target is added.
+# * For source files, project's source dir.
+#
+# The file suffix is determined as:
+# * The value passed to the 'suffix' method, if any.
+# * The suffix corresponding to the target's type.
+#
+class file-target : abstract-file-target
+{
+ import "class" : new ;
+ import common ;
+
+ rule __init__ (
+ name exact ?
+ : type ? # Optional type for this target.
+ : project
+ : action ?
+ : path ?
+ )
+ {
+ abstract-file-target.__init__ $(name) $(exact) : $(type) : $(project) :
+ $(action) ;
+
+ self.path = $(path) ;
+ }
+
+ rule clone-with-different-type ( new-type )
+ {
+ return [ new file-target $(self.name) exact : $(new-type) :
+ $(self.project) : $(self.action) : $(self.path) ] ;
+ }
+
+ rule actualize-location ( target )
+ {
+ # Scanner targets are always bound to already existing files in already
+ # existing folder. They need to be marked as depending on their base
+ # target (i.e. the target being scanned) but, unlike regular
+ # dependencies set up by the DEPENDS rule, they must not depend on any
+ # targets already marked as included by the base target. Otherwise such
+ # an included file being newer than the file being scanned would cause
+ # the scanner target to be updated, further causing any target depending
+ # on that scanner target to be rebuilt. This is the exact relationship
+ # as set up by Boost Jam's SEARCH binding method (needed to support
+ # searching for generated targets) so we want to bind scanner targets
+ # using this method instead of explicitly specifying their location
+ # using LOCATE.
+ #
+ # FIXME: We recognize scanner targets by their given name being
+ # different from this target's actual name. This is a hack and should be
+ # cleaned up by reorganizing who knows about scanners in the
+ # virtual-target/abstract-file-target/file-target/notfile-target/
+ # searched-lib-target/... class hierarchy.
+ local is-scanner-target ;
+ if $(target) != [ actual-name ]
+ {
+ is-scanner-target = true ;
+ }
+
+ if $(self.action) && ! $(is-scanner-target)
+ {
+ # This is a derived file.
+ local path = [ path ] ;
+ LOCATE on $(target) = $(path) ;
+
+ # Make sure the path exists.
+ DEPENDS $(target) : $(path) ;
+ common.MkDir $(path) ;
+
+ # It is possible that the target name includes a directory too, for
+ # example when installing headers. Create that directory.
+ if $(target:D)
+ {
+ local d = $(target:D) ;
+ d = $(d:R=$(path)) ;
+ DEPENDS $(target) : $(d) ;
+ common.MkDir $(d) ;
+ }
+
+ # For a real file target, we create a fake target depending on the
+ # real target. This allows us to run
+ #
+ # b2 hello.o
+ #
+ # without trying to guess the name of the real target. Note that the
+ # target has no directory name and uses a special <e> grist.
+ #
+ # First, that means that "b2 hello.o" will build all known hello.o
+ # targets. Second, the <e> grist makes sure this target will not be
+ # confused with other targets, for example, if we have subdir 'test'
+ # with target 'test' in it that includes a 'test.o' file, then the
+ # target for directory will be just 'test' the target for test.o
+ # will be <ptest/bin/gcc/debug>test.o and the target we create below
+ # will be <e>test.o
+ DEPENDS $(target:G=e) : $(target) ;
+ # Allow b2 <path-to-file>/<file> to work. This will not catch all
+ # possible ways to refer to the path (relative/absolute, extra ".",
+ # various "..", but should help in obvious cases.
+ DEPENDS $(target:G=e:R=$(path)) : $(target) ;
+ }
+ else
+ {
+ SEARCH on $(target) = [ path.native $(self.path) ] ;
+ }
+ }
+
+ # Returns the directory for this target.
+ #
+ rule path ( )
+ {
+ if ! $(self.path)
+ {
+ if $(self.action)
+ {
+ local p = [ $(self.action).properties ] ;
+ local path,relative-to-build-dir = [ $(p).target-path ] ;
+ local path = $(path,relative-to-build-dir[1]) ;
+ local relative-to-build-dir = $(path,relative-to-build-dir[2]) ;
+
+ if $(relative-to-build-dir)
+ {
+ path = [ path.join [ $(self.project).build-dir ] $(path) ] ;
+ }
+
+ self.path = [ path.native $(path) ] ;
+ }
+ }
+ return $(self.path) ;
+ }
+}
+
+
+class notfile-target : abstract-file-target
+{
+ rule __init__ ( name : project : action ? )
+ {
+ abstract-file-target.__init__ $(name) : : $(project) : $(action) ;
+ }
+
+ # Returns nothing to indicate that the target's path is not known.
+ #
+ rule path ( )
+ {
+ return ;
+ }
+
+ rule actualize-location ( target )
+ {
+ NOTFILE $(target) ;
+ ALWAYS $(target) ;
+ # TEMPORARY $(target) ;
+ NOUPDATE $(target) ;
+ }
+}
+
+
+# Class representing an action. Both 'targets' and 'sources' should list
+# instances of 'virtual-target'. Action name should name a rule with this
+# prototype:
+# rule action-name ( targets + : sources * : properties * )
+# Targets and sources are passed as actual Jam targets. The rule may not
+# establish additional dependency relationships.
+#
+class action
+{
+ import "class" ;
+ import indirect ;
+ import path ;
+ import property-set ;
+ import set : difference ;
+ import toolset ;
+ import type ;
+
+ rule __init__ ( sources * : action-name + : property-set ? )
+ {
+ self.sources = $(sources) ;
+
+ self.action-name = [ indirect.make-qualified $(action-name) ] ;
+
+ if ! $(property-set)
+ {
+ property-set = [ property-set.empty ] ;
+ }
+
+ if ! [ class.is-instance $(property-set) ]
+ {
+ import errors : error : errors.error ;
+ errors.error "Property set instance required" ;
+ }
+
+ self.properties = $(property-set) ;
+ }
+
+ rule add-targets ( targets * )
+ {
+ self.targets += $(targets) ;
+ }
+
+ rule replace-targets ( old-targets * : new-targets * )
+ {
+ self.targets = [ set.difference $(self.targets) : $(old-targets) ] ;
+ self.targets += $(new-targets) ;
+ }
+
+ rule targets ( )
+ {
+ return $(self.targets) ;
+ }
+
+ rule sources ( )
+ {
+ return $(self.sources) ;
+ }
+
+ rule action-name ( )
+ {
+ return $(self.action-name) ;
+ }
+
+ rule properties ( )
+ {
+ return $(self.properties) ;
+ }
+
+ # Generates actual build instructions.
+ #
+ rule actualize ( )
+ {
+ if ! $(self.actualized)
+ {
+ self.actualized = true ;
+
+ local ps = [ properties ] ;
+ local properties = [ adjust-properties $(ps) ] ;
+
+ local actual-targets ;
+ for local i in [ targets ]
+ {
+ actual-targets += [ $(i).actualize ] ;
+ }
+
+ actualize-sources [ sources ] : $(properties) ;
+
+ DEPENDS $(actual-targets) : $(self.actual-sources)
+ $(self.dependency-only-sources) ;
+
+ # Action name can include additional rule arguments, which should
+ # not be passed to 'set-target-variables'.
+ toolset.set-target-variables
+ [ indirect.get-rule $(self.action-name[1]) ] $(actual-targets)
+ : $(properties) ;
+
+ # Reflect ourselves in a variable for the target. This allows
+ # looking up additional info for the action given the raw target.
+ # For example to debug or output action information from action
+ # rules.
+ .action on $(actual-targets) = $(__name__) ;
+
+ indirect.call $(self.action-name) $(actual-targets)
+ : $(self.actual-sources) : [ $(properties).raw ] ;
+
+ # Since we set up the creating action here, we set up the action for
+ # cleaning up as well.
+ common.Clean clean-all : $(actual-targets) ;
+ }
+ }
+
+ # Helper for 'actualize-sources'. For each passed source, actualizes it with
+ # the appropriate scanner. Returns the actualized virtual targets.
+ #
+ rule actualize-source-type ( sources * : property-set )
+ {
+ local result = ;
+ for local i in $(sources)
+ {
+ local scanner ;
+ if [ $(i).type ]
+ {
+ scanner = [ type.get-scanner [ $(i).type ] : $(property-set) ] ;
+ }
+ result += [ $(i).actualize $(scanner) ] ;
+ }
+ return $(result) ;
+ }
+
+ # Creates actual Jam targets for sources. Initializes the following member
+ # variables:
+ # 'self.actual-sources' -- sources passed to the updating action.
+ # 'self.dependency-only-sources' -- sources marked as dependencies, but
+ # are not used otherwise.
+ #
+ # New values will be *appended* to the variables. They may be non-empty if
+ # caller wants it.
+ #
+ rule actualize-sources ( sources * : property-set )
+ {
+ local dependencies = [ $(self.properties).get <dependency> ] ;
+
+ self.dependency-only-sources +=
+ [ actualize-source-type $(dependencies) : $(property-set) ] ;
+ self.actual-sources +=
+ [ actualize-source-type $(sources) : $(property-set) ] ;
+
+ # This is used to help b2 find dependencies in generated headers and
+ # other main targets, e.g. in:
+ #
+ # make a.h : ....... ;
+ # exe hello : hello.cpp : <implicit-dependency>a.h ;
+ #
+ # For b2 to find the dependency the generated target must be
+ # actualized (i.e. have its Jam target constructed). In the above case,
+ # if we are building just hello ("b2 hello"), 'a.h' will not be
+ # actualized unless we do it here.
+ local implicit = [ $(self.properties).get <implicit-dependency> ] ;
+ for local i in $(implicit)
+ {
+ $(i:G=).actualize ;
+ }
+ }
+
+ # Determines real properties when trying to build with 'properties'. This is
+ # the last chance to fix properties, for example to adjust includes to get
+ # generated headers correctly. Default implementation simply returns its
+ # argument.
+ #
+ rule adjust-properties ( property-set )
+ {
+ return $(property-set) ;
+ }
+}
+
+
+# Action class which does nothing --- it produces the targets with specific
+# properties out of nowhere. It is needed to distinguish virtual targets with
+# different properties that are known to exist and have no actions which create
+# them.
+#
+class null-action : action
+{
+ rule __init__ ( property-set ? )
+ {
+ action.__init__ : .no-action : $(property-set) ;
+ }
+
+ rule actualize ( )
+ {
+ if ! $(self.actualized)
+ {
+ self.actualized = true ;
+ for local i in [ targets ]
+ {
+ $(i).actualize ;
+ }
+ }
+ }
+}
+
+
+# Class which acts exactly like 'action', except that its sources are not
+# scanned for dependencies.
+#
+class non-scanning-action : action
+{
+ rule __init__ ( sources * : action-name + : property-set ? )
+ {
+ action.__init__ $(sources) : $(action-name) : $(property-set) ;
+ }
+
+ rule actualize-source-type ( sources * : property-set )
+ {
+ local result ;
+ for local i in $(sources)
+ {
+ result += [ $(i).actualize ] ;
+ }
+ return $(result) ;
+ }
+}
+
+
+# Creates a virtual target with an appropriate name and type from 'file'. If a
+# target with that name in that project already exists, returns that already
+# created target.
+#
+# FIXME: a more correct way would be to compute the path to the file, based on
+# name and source location for the project, and use that path to determine if
+# the target has already been created. This logic should be shared with how we
+# usually find targets identified by a specific target id. It should also be
+# updated to work correctly when the file is specified using both relative and
+# absolute paths.
+#
+# TODO: passing a project with all virtual targets is starting to be annoying.
+#
+rule from-file ( file : file-loc : project )
+{
+ import type ; # Had to do this here to break a circular dependency.
+
+ # Check whether we already created a target corresponding to this file.
+ local path = [ path.root [ path.root $(file) $(file-loc) ] [ path.pwd ] ] ;
+
+ if $(.files.$(path))
+ {
+ return $(.files.$(path)) ;
+ }
+ else
+ {
+ local name = [ path.make $(file) ] ;
+ local type = [ type.type $(file) ] ;
+ local result ;
+
+ result = [ new file-target $(file) : $(type) : $(project) : :
+ $(file-loc) ] ;
+
+ .files.$(path) = $(result) ;
+ return $(result) ;
+ }
+}
+
+
+# Registers a new virtual target. Checks if there is already a registered target
+# with the same name, type, project and subvariant properties as well as the
+# same sources and equal action. If such target is found it is returned and a
+# new 'target' is not registered. Otherwise, 'target' is registered and
+# returned.
+#
+rule register ( target )
+{
+ local signature = [ sequence.join [ $(target).path ] [ $(target).name ] : -
+ ] ;
+
+ local result ;
+ for local t in $(.cache.$(signature))
+ {
+ local a1 = [ $(t).action ] ;
+ local a2 = [ $(target).action ] ;
+
+ if ! $(result)
+ {
+ if ! $(a1) && ! $(a2)
+ {
+ result = $(t) ;
+ }
+ else if $(a1) && $(a2) &&
+ ( [ $(a1).action-name ] = [ $(a2).action-name ] ) &&
+ ( [ $(a1).sources ] = [ $(a2).sources ] )
+ {
+ local ps1 = [ $(a1).properties ] ;
+ local ps2 = [ $(a2).properties ] ;
+ local p1 = [ $(ps1).base ] [ $(ps1).free ] [ set.difference
+ [ $(ps1).dependency ] : [ $(ps1).incidental ] ] ;
+ local p2 = [ $(ps2).base ] [ $(ps2).free ] [ set.difference
+ [ $(ps2).dependency ] : [ $(ps2).incidental ] ] ;
+ if $(p1) = $(p2)
+ {
+ result = $(t) ;
+ }
+ }
+ }
+ }
+
+ if ! $(result)
+ {
+ .cache.$(signature) += $(target) ;
+ result = $(target) ;
+ }
+
+ .recent-targets += $(result) ;
+ .all-targets += $(result) ;
+
+ return $(result) ;
+}
+
+
+# Each target returned by 'register' is added to the .recent-targets list,
+# returned by this function. This allows us to find all virtual targets created
+# when building a specific main target, even those constructed only as
+# intermediate targets.
+#
+rule recent-targets ( )
+{
+ return $(.recent-targets) ;
+}
+
+
+rule clear-recent-targets ( )
+{
+ .recent-targets = ;
+}
+
+
+# Returns all virtual targets ever created.
+#
+rule all-targets ( )
+{
+ return $(.all-targets) ;
+}
+
+
+# Returns all targets from 'targets' with types equal to 'type' or derived from
+# it.
+#
+rule select-by-type ( type : targets * )
+{
+ local result ;
+ for local t in $(targets)
+ {
+ if [ type.is-subtype [ $(t).type ] $(type) ]
+ {
+ result += $(t) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+rule register-actual-name ( actual-name : virtual-target )
+{
+ if $(.actual.$(actual-name))
+ {
+ local cs1 = [ $(.actual.$(actual-name)).creating-subvariant ] ;
+ local cmt1-name ;
+ if $(cs1)-is-defined
+ {
+ local cmt1 = [ $(cs1).main-target ] ;
+ cmt1-name = [ $(cmt1).full-name ] ;
+ }
+ local cs2 = [ $(virtual-target).creating-subvariant ] ;
+ local cmt2-name ;
+ if $(cs2)-is-defined
+ {
+ local cmt2 = [ $(cs2).main-target ] ;
+ cmt2-name = [ $(cmt2).full-name ] ;
+ }
+ local extra-error-information ;
+ if ! $(cs1)-is-defined || ! $(cs2)-is-defined
+ {
+ extra-error-information = Encountered a virtual-target without a
+ creating subvariant. It could be the virtual target has not been
+ registered via the virtual-target.register rule. ;
+ }
+
+ local action1 = [ $(.actual.$(actual-name)).action ] ;
+ local action2 = [ $(virtual-target).action ] ;
+ local properties-added ;
+ local properties-removed ;
+ if $(action1) && $(action2)
+ {
+ local p1 = [ $(action1).properties ] ;
+ p1 = [ $(p1).raw ] ;
+ local p2 = [ $(action2).properties ] ;
+ p2 = [ $(p2).raw ] ;
+ properties-removed = [ set.difference $(p1) : $(p2) ] ;
+ properties-removed ?= "none" ;
+ properties-added = [ set.difference $(p2) : $(p1) ] ;
+ properties-added ?= "none" ;
+ }
+ import errors : error : errors.error ;
+ errors.error "Duplicate name of actual target:" $(actual-name)
+ : "previous virtual target" [ $(.actual.$(actual-name)).str ]
+ : "created from" $(cmt1-name)
+ : "another virtual target" [ $(virtual-target).str ]
+ : "created from" $(cmt2-name)
+ : "added properties:" $(properties-added)
+ : "removed properties:" $(properties-removed)
+ : $(extra-error-information) ;
+ }
+ else
+ {
+ .actual.$(actual-name) = $(virtual-target) ;
+ }
+}
+
+
+# Traverses the dependency graph of 'target' and return all targets that will be
+# created before this one is created. If the root of some dependency graph is
+# found during traversal, it is either included or not, depending on the
+# 'include-roots' value. In either case traversal stops at root targets, i.e.
+# root target sources are not traversed.
+#
+rule traverse ( target : include-roots ? : include-sources ? )
+{
+ local result ;
+ if [ $(target).action ]
+ {
+ local action = [ $(target).action ] ;
+ # This includes the 'target' as well.
+ result += [ $(action).targets ] ;
+
+ for local t in [ $(action).sources ]
+ {
+ if ! [ $(t).root ]
+ {
+ result += [ traverse $(t) : $(include-roots) :
+ $(include-sources) ] ;
+ }
+ else if $(include-roots)
+ {
+ result += $(t) ;
+ }
+ }
+ }
+ else if $(include-sources)
+ {
+ result = $(target) ;
+ }
+ return $(result) ;
+}
+
+
+# Takes an 'action' instance and creates a new instance of it and all targets
+# produced by the action. The rule-name and properties are set to
+# 'new-rule-name' and 'new-properties', if those are specified. Returns the
+# cloned action.
+#
+rule clone-action ( action : new-project : new-action-name ? : new-properties ?
+ )
+{
+ if ! $(new-action-name)
+ {
+ new-action-name = [ $(action).action-name ] ;
+ }
+ if ! $(new-properties)
+ {
+ new-properties = [ $(action).properties ] ;
+ }
+
+ local action-class = [ modules.peek $(action) : __class__ ] ;
+ local cloned-action = [ class.new $(action-class)
+ [ $(action).sources ] : $(new-action-name) : $(new-properties) ] ;
+
+ local cloned-targets ;
+ for local target in [ $(action).targets ]
+ {
+ local n = [ $(target).name ] ;
+ # Do not modify produced target names.
+ local cloned-target = [ class.new file-target $(n) exact :
+ [ $(target).type ] : $(new-project) : $(cloned-action) ] ;
+ local d = [ $(target).dependencies ] ;
+ if $(d)
+ {
+ $(cloned-target).depends $(d) ;
+ }
+ $(cloned-target).root [ $(target).root ] ;
+ $(cloned-target).creating-subvariant [ $(target).creating-subvariant ] ;
+
+ cloned-targets += $(cloned-target) ;
+ }
+
+ return $(cloned-action) ;
+}
+
+
+class subvariant
+{
+ import sequence ;
+ import type ;
+
+ rule __init__ ( main-target # The instance of main-target class.
+ : property-set # Properties requested for this target.
+ : sources *
+ : build-properties # Actually used properties.
+ : sources-usage-requirements # Properties propagated from sources.
+ : created-targets * ) # Top-level created targets.
+ {
+ self.main-target = $(main-target) ;
+ self.properties = $(property-set) ;
+ self.sources = $(sources) ;
+ self.build-properties = $(build-properties) ;
+ self.sources-usage-requirements = $(sources-usage-requirements) ;
+ self.created-targets = $(created-targets) ;
+
+ # Pre-compose a list of other dependency graphs this one depends on.
+ local deps = [ $(build-properties).get <implicit-dependency> ] ;
+ for local d in $(deps)
+ {
+ self.other-dg += [ $(d:G=).creating-subvariant ] ;
+ }
+
+ self.other-dg = [ sequence.unique $(self.other-dg) ] ;
+ }
+
+ rule main-target ( )
+ {
+ return $(self.main-target) ;
+ }
+
+ rule created-targets ( )
+ {
+ return $(self.created-targets) ;
+ }
+
+ rule requested-properties ( )
+ {
+ return $(self.properties) ;
+ }
+
+ rule build-properties ( )
+ {
+ return $(self.build-properties) ;
+ }
+
+ rule sources-usage-requirements ( )
+ {
+ return $(self.sources-usage-requirements) ;
+ }
+
+ rule set-usage-requirements ( usage-requirements )
+ {
+ self.usage-requirements = $(usage-requirements) ;
+ }
+
+ rule usage-requirements ( )
+ {
+ return $(self.usage-requirements) ;
+ }
+
+ # Returns all targets referenced by this subvariant, either directly or
+ # indirectly, and either as sources, or as dependency properties. Targets
+ # referred to using the dependency property are returned as properties, not
+ # targets.
+ #
+ rule all-referenced-targets ( theset )
+ {
+ # Find directly referenced targets.
+ local deps = [ $(self.build-properties).dependency ] ;
+ local all-targets = $(self.sources) $(deps) ;
+
+ # Find other subvariants.
+ local r ;
+ for local t in $(all-targets)
+ {
+ if ! [ $(theset).contains $(t) ]
+ {
+ $(theset).add $(t) ;
+ r += [ $(t:G=).creating-subvariant ] ;
+ }
+ }
+ r = [ sequence.unique $(r) ] ;
+ for local s in $(r)
+ {
+ if $(s) != $(__name__)
+ {
+ $(s).all-referenced-targets $(theset) ;
+ }
+ }
+ }
+
+ # Returns the properties specifying implicit include paths to generated
+ # headers. This traverses all targets in this subvariant and subvariants
+ # referred by <implicit-dependecy> properties. For all targets of type
+ # 'target-type' (or for all targets, if 'target-type' is not specified), the
+ # result will contain <$(feature)>path-to-that-target.
+ #
+ rule implicit-includes ( feature : target-type ? )
+ {
+ local key = ii$(feature)-$(target-type:E="") ;
+ if ! $($(key))-is-not-empty
+ {
+ local target-paths = [ all-target-directories $(target-type) ] ;
+ target-paths = [ sequence.unique $(target-paths) ] ;
+ local result = $(target-paths:G=$(feature)) ;
+ if ! $(result)
+ {
+ result = "" ;
+ }
+ $(key) = $(result) ;
+ }
+ if $($(key)) = ""
+ {
+ return ;
+ }
+ else
+ {
+ return $($(key)) ;
+ }
+ }
+
+ rule all-target-directories ( target-type ? )
+ {
+ if ! $(self.target-directories)
+ {
+ compute-target-directories $(target-type) ;
+ }
+ return $(self.target-directories) ;
+ }
+
+ rule compute-target-directories ( target-type ? )
+ {
+ local result ;
+ for local t in $(self.created-targets)
+ {
+ # Skip targets of the wrong type.
+ if ! $(target-type) ||
+ [ type.is-derived [ $(t).type ] $(target-type) ]
+ {
+ result = [ sequence.merge $(result) : [ $(t).path ] ] ;
+ }
+ }
+ for local d in $(self.other-dg)
+ {
+ result += [ $(d).all-target-directories $(target-type) ] ;
+ }
+ self.target-directories = $(result) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/kernel/boost-build.jam b/src/kenlm/jam-files/boost-build/kernel/boost-build.jam
new file mode 100644
index 0000000..377f6ec
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/kernel/boost-build.jam
@@ -0,0 +1,5 @@
+# Copyright 2003 Dave Abrahams
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+boost-build . ;
diff --git a/src/kenlm/jam-files/boost-build/kernel/bootstrap.jam b/src/kenlm/jam-files/boost-build/kernel/bootstrap.jam
new file mode 100644
index 0000000..c4320dc
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/kernel/bootstrap.jam
@@ -0,0 +1,266 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2003, 2005, 2006 Rene Rivera
+# Copyright 2003, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# First of all, check the jam version.
+if $(JAM_VERSION:J="") < 030112
+{
+ ECHO "error: Boost.Jam version 3.1.12 or later required" ;
+ EXIT ;
+}
+
+local required-rules = GLOB-RECURSIVELY HAS_NATIVE_RULE ;
+for local r in $(required-rules)
+{
+ if ! $(r) in [ RULENAMES ]
+ {
+ ECHO "error: builtin rule '$(r)' is not present" ;
+ ECHO "error: your version of bjam is likely out of date" ;
+ ECHO "error: please get a fresh version from SVN." ;
+ EXIT ;
+ }
+}
+
+local native = regex transform 2 ;
+while $(native)
+{
+ if ! [ HAS_NATIVE_RULE $(native[1]) : $(native[2]) : $(native[3]) ]
+ {
+ ECHO "error: missing native rule '$(native[1]).$(native[2])'" ;
+ ECHO "error: or interface version of that rule is too low" ;
+ ECHO "error: your version of bjam is likely out of date" ;
+ ECHO "error: please get a fresh version from SVN." ;
+ EXIT ;
+ }
+ native = $(native[4-]) ;
+}
+
+
+# Check that the builtin .ENVIRON module is present. We do not have a builtin to
+# check that a module is present, so we assume that the PATH environment
+# variable is always set and verify that the .ENVIRON module has a non-empty
+# value of that variable.
+module .ENVIRON
+{
+ local p = $(PATH) $(Path) $(path) ;
+ if ! $(p)
+ {
+ ECHO "error: no builtin module .ENVIRON is found" ;
+ ECHO "error: your version of bjam is likely out of date" ;
+ ECHO "error: please get a fresh version from SVN." ;
+ EXIT ;
+ }
+}
+
+# Check that @() functionality is present. Similarly to modules, we do not have
+# a way to test this directly. Instead we check that $(TMPNAME) functionality is
+# present which was added at roughly the same time (more precisely, it was added
+# just before).
+{
+ if ! $(TMPNAME)
+ {
+ ECHO "error: no @() functionality found" ;
+ ECHO "error: your version of b2 is likely out of date" ;
+ ECHO "error: please get a fresh version from SVN." ;
+ EXIT ;
+ }
+}
+
+# Make sure that \n escape is avaiable.
+if "\n" = "n"
+{
+ if $(OS) = CYGWIN
+ {
+ ECHO "warning: escape sequences are not supported" ;
+ ECHO "warning: this will cause major misbehaviour on cygwin" ;
+ ECHO "warning: your version of b2 is likely out of date" ;
+ ECHO "warning: please get a fresh version from SVN." ;
+ }
+}
+
+
+# Bootstrap the module system. Then bring the import rule into the global module.
+#
+SEARCH on <module@>modules.jam = $(.bootstrap-file:D) ;
+module modules { include <module@>modules.jam ; }
+IMPORT modules : import : : import ;
+
+{
+ # Add module subdirectories to the BOOST_BUILD_PATH, which allows us to make
+ # incremental refactoring steps by moving modules to appropriate
+ # subdirectories, thereby achieving some physical separation of different
+ # layers without changing all of our code to specify subdirectories in
+ # import statements or use an extra level of qualification on imported
+ # names.
+
+ local subdirs =
+ kernel # only the most-intrinsic modules: modules, errors
+ util # low-level substrate: string/number handling, etc.
+ build # essential elements of the build system architecture
+ tools # toolsets for handling specific build jobs and targets.
+ contrib # user contributed (unreviewed) modules
+ . # build-system.jam lives here
+ ;
+ local whereami = [ NORMALIZE_PATH $(.bootstrap-file:DT) ] ;
+ BOOST_BUILD_PATH += $(whereami:D)/$(subdirs) ;
+
+ modules.poke .ENVIRON : BOOST_BUILD_PATH : $(BOOST_BUILD_PATH) ;
+
+ modules.poke : EXTRA_PYTHONPATH : $(whereami) ;
+}
+
+# Reload the modules, to clean up things. The modules module can tolerate being
+# imported twice.
+#
+import modules ;
+
+# Process option plugins first to allow them to prevent loading the rest of the
+# build system.
+#
+import option ;
+local dont-build = [ option.process ] ;
+
+# Should we skip building, i.e. loading the build system, according to the
+# options processed?
+#
+if ! $(dont-build)
+{
+ if ! --python in $(ARGV)
+ {
+ # Allow users to override the build system file from the command-line
+ # (mostly for testing).
+ local build-system = [ MATCH --build-system=(.*) : $(ARGV) ] ;
+ build-system ?= build-system ;
+
+ # Use last element in case of multiple command-line options.
+ import $(build-system[-1]) ;
+ }
+ else
+ {
+ ECHO "Boost.Build V2 Python port (experimental)" ;
+
+ # Define additional interface exposed to Python code. Python code will
+ # also have access to select bjam builtins in the 'bjam' module, but
+ # some things are easier to define outside C.
+ module python_interface
+ {
+ rule load ( module-name : location )
+ {
+ USER_MODULE $(module-name) ;
+ # Make all rules in the loaded module available in the global
+ # namespace, so that we do not have to bother specifying the
+ # "correct" module when calling from Python.
+ module $(module-name)
+ {
+ __name__ = $(1) ;
+ include $(2) ;
+ local rules = [ RULENAMES $(1) ] ;
+ IMPORT $(1) : $(rules) : $(1) : $(1).$(rules) ;
+ }
+ }
+
+ rule peek ( module-name ? : variables + )
+ {
+ module $(<)
+ {
+ return $($(>)) ;
+ }
+ }
+
+ rule set-variable ( module-name : name : value * )
+ {
+ module $(<)
+ {
+ $(>) = $(3) ;
+ }
+ }
+
+ rule set-top-level-targets ( targets * )
+ {
+ DEPENDS all : $(targets) ;
+ }
+
+ rule call-in-module ( m : rulename : * )
+ {
+ module $(m)
+ {
+ return [ $(2) $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9)
+ : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16)
+ : $(17) : $(18) : $(19) ] ;
+ }
+ }
+
+
+ rule set-update-action ( action : targets * : sources * :
+ properties * )
+ {
+ $(action) $(targets) : $(sources) : $(properties) ;
+ }
+
+ rule set-update-action-in-module ( m : action : targets * :
+ sources * : properties * )
+ {
+ module $(m)
+ {
+ $(2) $(3) : $(4) : $(5) ;
+ }
+ }
+
+ rule set-target-variable ( targets + : variable : value * : append ?
+ )
+ {
+ if $(append)
+ {
+ $(variable) on $(targets) += $(value) ;
+ }
+ else
+ {
+ $(variable) on $(targets) = $(value) ;
+ }
+ }
+
+ rule get-target-variable ( targets + : variable )
+ {
+ return [ on $(targets) return $($(variable)) ] ;
+ }
+
+ rule import-rules-from-parent ( parent-module : this-module :
+ user-rules * )
+ {
+ IMPORT $(parent-module) : $(user-rules) : $(this-module) :
+ $(user-rules) ;
+ EXPORT $(this-module) : $(user-rules) ;
+ }
+
+ rule mark-included ( targets * : includes * )
+ {
+ NOCARE $(includes) ;
+ INCLUDES $(targets) : $(includes) ;
+ ISFILE $(includes) ;
+ }
+ }
+
+ PYTHON_IMPORT_RULE bootstrap : bootstrap : PyBB : bootstrap ;
+ modules.poke PyBB : root : [ NORMALIZE_PATH $(.bootstrap-file:DT)/.. ] ;
+
+ module PyBB
+ {
+ local ok = [ bootstrap $(root) ] ;
+ if ! $(ok)
+ {
+ EXIT ;
+ }
+ }
+
+
+ #PYTHON_IMPORT_RULE boost.build.build_system : main : PyBB : main ;
+
+ #module PyBB
+ #{
+ # main ;
+ #}
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/kernel/class.jam b/src/kenlm/jam-files/boost-build/kernel/class.jam
new file mode 100644
index 0000000..e48ab6d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/kernel/class.jam
@@ -0,0 +1,420 @@
+# Copyright 2001, 2002, 2003 Dave Abrahams
+# Copyright 2002, 2005 Rene Rivera
+# Copyright 2002, 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Polymorphic class system built on top of core Jam facilities.
+#
+# Classes are defined by 'class' keywords:
+#
+# class myclass
+# {
+# rule __init__ ( arg1 ) # constructor
+# {
+# self.attribute = $(arg1) ;
+# }
+#
+# rule method1 ( ) # method
+# {
+# return [ method2 ] ;
+# }
+#
+# rule method2 ( ) # method
+# {
+# return $(self.attribute) ;
+# }
+# }
+#
+# The __init__ rule is the constructor, and sets member variables.
+#
+# New instances are created by invoking [ new <class> <args...> ]:
+#
+# local x = [ new myclass foo ] ; # x is a new myclass object
+# assert.result foo : [ $(x).method1 ] ; # $(x).method1 returns "foo"
+#
+# Derived class are created by mentioning base classes in the declaration::
+#
+# class derived : myclass
+# {
+# rule __init__ ( arg )
+# {
+# myclass.__init__ $(arg) ; # call base __init__
+#
+# }
+#
+# rule method2 ( ) # method override
+# {
+# return $(self.attribute)XXX ;
+# }
+# }
+#
+# All methods operate virtually, replacing behavior in the base classes. For
+# example::
+#
+# local y = [ new derived foo ] ; # y is a new derived object
+# assert.result fooXXX : [ $(y).method1 ] ; # $(y).method1 returns "foo"
+#
+# Each class instance is its own core Jam module. All instance attributes and
+# methods are accessible without additional qualification from within the class
+# instance. All rules imported in class declaration, or visible in base classses
+# are also visible. Base methods are available in qualified form:
+# base-name.method-name. By convention, attribute names are prefixed with
+# "self.".
+
+import modules ;
+import numbers ;
+
+
+rule xinit ( instance : class )
+{
+ module $(instance)
+ {
+ __class__ = $(2) ;
+ __name__ = $(1) ;
+ }
+}
+
+
+rule new ( class args * : * )
+{
+ .next-instance ?= 1 ;
+ local id = object($(class))@$(.next-instance) ;
+
+ INSTANCE $(id) : class@$(class) ;
+ xinit $(id) : $(class) ;
+ IMPORT_MODULE $(id) ;
+ $(id).__init__ $(args) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17) :
+ $(18) : $(19) ;
+
+ # Bump the next unique object name.
+ .next-instance = [ numbers.increment $(.next-instance) ] ;
+
+ # Return the name of the new instance.
+ return $(id) ;
+}
+
+
+rule bases ( class )
+{
+ module class@$(class)
+ {
+ return $(__bases__) ;
+ }
+}
+
+
+rule is-derived ( class : bases + )
+{
+ local stack = $(class) ;
+ local visited found ;
+ while ! $(found) && $(stack)
+ {
+ local top = $(stack[1]) ;
+ stack = $(stack[2-]) ;
+ if ! ( $(top) in $(visited) )
+ {
+ visited += $(top) ;
+ stack += [ bases $(top) ] ;
+
+ if $(bases) in $(visited)
+ {
+ found = true ;
+ }
+ }
+ }
+ return $(found) ;
+}
+
+
+# Returns true if the 'value' is a class instance.
+#
+rule is-instance ( value )
+{
+ return [ MATCH "^(object\\()[^@]+\\)@.*" : $(value) ] ;
+}
+
+
+# Check if the given value is of the given type.
+#
+rule is-a (
+ instance # The value to check.
+ : type # The type to test for.
+)
+{
+ if [ is-instance $(instance) ]
+ {
+ return [ class.is-derived [ modules.peek $(instance) : __class__ ] : $(type) ] ;
+ }
+}
+
+
+local rule typecheck ( x )
+{
+ local class-name = [ MATCH "^\\[(.*)\\]$" : [ BACKTRACE 1 ] ] ;
+ if ! [ is-a $(x) : $(class-name) ]
+ {
+ return "Expected an instance of "$(class-name)" but got \""$(x)"\" for argument" ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+ import "class" : new ;
+ import errors : try catch ;
+
+ # This will be the construction function for a class called 'myclass'.
+ #
+ class myclass
+ {
+ import assert ;
+
+ rule __init__ ( x_ * : y_ * )
+ {
+ # Set some instance variables.
+ x = $(x_) ;
+ y = $(y_) ;
+ foo += 10 ;
+ }
+
+ rule set-x ( newx * )
+ {
+ x = $(newx) ;
+ }
+
+ rule get-x ( )
+ {
+ return $(x) ;
+ }
+
+ rule set-y ( newy * )
+ {
+ y = $(newy) ;
+ }
+
+ rule get-y ( )
+ {
+ return $(y) ;
+ }
+
+ rule f ( )
+ {
+ return [ g $(x) ] ;
+ }
+
+ rule g ( args * )
+ {
+ if $(x) in $(y)
+ {
+ return $(x) ;
+ }
+ else if $(y) in $(x)
+ {
+ return $(y) ;
+ }
+ else
+ {
+ return ;
+ }
+ }
+
+ rule get-class ( )
+ {
+ return $(__class__) ;
+ }
+
+ rule get-instance ( )
+ {
+ return $(__name__) ;
+ }
+
+ rule invariant ( )
+ {
+ assert.equal 1 : 1 ;
+ }
+
+ rule get-foo ( )
+ {
+ return $(foo) ;
+ }
+ } # class myclass ;
+
+ class derived1 : myclass
+ {
+ rule __init__ ( z_ )
+ {
+ myclass.__init__ $(z_) : X ;
+ z = $(z_) ;
+ }
+
+ # Override g.
+ #
+ rule g ( args * )
+ {
+ return derived1.g ;
+ }
+
+ rule h ( )
+ {
+ return derived1.h ;
+ }
+
+ rule get-z ( )
+ {
+ return $(z) ;
+ }
+
+ # Check that 'assert.equal' visible in base class is visible here.
+ #
+ rule invariant2 ( )
+ {
+ assert.equal 2 : 2 ;
+ }
+
+ # Check that 'assert.variable-not-empty' visible in base class is
+ # visible here.
+ #
+ rule invariant3 ( )
+ {
+ local v = 10 ;
+ assert.variable-not-empty v ;
+ }
+ } # class derived1 : myclass ;
+
+ class derived2 : myclass
+ {
+ rule __init__ ( )
+ {
+ myclass.__init__ 1 : 2 ;
+ }
+
+ # Override g.
+ #
+ rule g ( args * )
+ {
+ return derived2.g ;
+ }
+
+ # Test the ability to call base class functions with qualification.
+ #
+ rule get-x ( )
+ {
+ return [ myclass.get-x ] ;
+ }
+ } # class derived2 : myclass ;
+
+ class derived2a : derived2
+ {
+ rule __init__
+ {
+ derived2.__init__ ;
+ }
+ } # class derived2a : derived2 ;
+
+ local rule expect_derived2 ( [derived2] x ) { }
+
+ local a = [ new myclass 3 4 5 : 4 5 ] ;
+ local b = [ new derived1 4 ] ;
+ local b2 = [ new derived1 4 ] ;
+ local c = [ new derived2 ] ;
+ local d = [ new derived2 ] ;
+ local e = [ new derived2a ] ;
+
+ expect_derived2 $(d) ;
+ expect_derived2 $(e) ;
+
+ # Argument checking is set up to call exit(1) directly on failure, and we
+ # can not hijack that with try, so we should better not do this test by
+ # default. We could fix this by having errors look up and invoke the EXIT
+ # rule instead; EXIT can be hijacked (;-)
+ if --fail-typecheck in [ modules.peek : ARGV ]
+ {
+ try ;
+ {
+ expect_derived2 $(a) ;
+ }
+ catch
+ "Expected an instance of derived2 but got" instead
+ ;
+ }
+
+ #try ;
+ #{
+ # new bad_subclass ;
+ #}
+ #catch
+ # bad_subclass.bad_subclass failed to call base class constructor
+ # myclass.__init__
+ # ;
+
+ #try ;
+ #{
+ # class bad_subclass ;
+ #}
+ #catch bad_subclass has already been declared ;
+
+ assert.result 3 4 5 : $(a).get-x ;
+ assert.result 4 5 : $(a).get-y ;
+ assert.result 4 : $(b).get-x ;
+ assert.result X : $(b).get-y ;
+ assert.result 4 : $(b).get-z ;
+ assert.result 1 : $(c).get-x ;
+ assert.result 2 : $(c).get-y ;
+ assert.result 4 5 : $(a).f ;
+ assert.result derived1.g : $(b).f ;
+ assert.result derived2.g : $(c).f ;
+ assert.result derived2.g : $(d).f ;
+
+ assert.result 10 : $(b).get-foo ;
+
+ $(a).invariant ;
+ $(b).invariant2 ;
+ $(b).invariant3 ;
+
+ # Check that the __class__ attribute is getting properly set.
+ assert.result myclass : $(a).get-class ;
+ assert.result derived1 : $(b).get-class ;
+ assert.result $(a) : $(a).get-instance ;
+
+ $(a).set-x a.x ;
+ $(b).set-x b.x ;
+ $(c).set-x c.x ;
+ $(d).set-x d.x ;
+ assert.result a.x : $(a).get-x ;
+ assert.result b.x : $(b).get-x ;
+ assert.result c.x : $(c).get-x ;
+ assert.result d.x : $(d).get-x ;
+
+ class derived3 : derived1 derived2
+ {
+ rule __init__ ( )
+ {
+ }
+ }
+
+ assert.result : bases myclass ;
+ assert.result myclass : bases derived1 ;
+ assert.result myclass : bases derived2 ;
+ assert.result derived1 derived2 : bases derived3 ;
+
+ assert.true is-derived derived1 : myclass ;
+ assert.true is-derived derived2 : myclass ;
+ assert.true is-derived derived3 : derived1 ;
+ assert.true is-derived derived3 : derived2 ;
+ assert.true is-derived derived3 : derived1 derived2 myclass ;
+ assert.true is-derived derived3 : myclass ;
+
+ assert.false is-derived myclass : derived1 ;
+
+ assert.true is-instance $(a) ;
+ assert.false is-instance bar ;
+
+ assert.true is-a $(a) : myclass ;
+ assert.true is-a $(c) : derived2 ;
+ assert.true is-a $(d) : myclass ;
+ assert.false is-a literal : myclass ;
+}
diff --git a/src/kenlm/jam-files/boost-build/kernel/errors.jam b/src/kenlm/jam-files/boost-build/kernel/errors.jam
new file mode 100644
index 0000000..9563396
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/kernel/errors.jam
@@ -0,0 +1,287 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2004 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Print a stack backtrace leading to this rule's caller. Each argument
+# represents a line of output to be printed after the first line of the
+# backtrace.
+#
+rule backtrace ( skip-frames prefix messages * : * )
+{
+ local frame-skips = 5 9 13 17 21 25 29 33 37 41 45 49 53 57 61 65 69 73 77 81 ;
+ local drop-elements = $(frame-skips[$(skip-frames)]) ;
+ if ! ( $(skip-frames) in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 )
+ {
+ ECHO "warning: backtrace doesn't support skipping $(skip-frames) "
+ "frames; using 1 instead." ;
+ drop-elements = 5 ;
+ }
+
+ local args = $(.args) ;
+ if $(.user-modules-only)
+ {
+ local bt = [ nearest-user-location ] ;
+ if $(bt)
+ {
+ ECHO $(prefix) at $(bt) ;
+ }
+ for local n in $(args)
+ {
+ if $($(n))-is-defined
+ {
+ ECHO $(prefix) $($(n)) ;
+ }
+ }
+ }
+ else
+ {
+ # Get the whole backtrace, then drop the initial quadruples
+ # corresponding to the frames that must be skipped.
+ local bt = [ BACKTRACE ] ;
+ bt = $(bt[$(drop-elements)-]) ;
+
+ while $(bt)
+ {
+ local m = [ MATCH ^(.+)\\.$ : $(bt[3]) ] ;
+ ECHO $(bt[1]):$(bt[2]): "in" $(bt[4]) "from module" $(m) ;
+
+ # The first time through, print each argument on a separate line.
+ for local n in $(args)
+ {
+ if $($(n))-is-defined
+ {
+ ECHO $(prefix) $($(n)) ;
+ }
+ }
+ args = ; # Kill args so that this never happens again.
+
+ # Move on to the next quadruple.
+ bt = $(bt[5-]) ;
+ }
+ }
+}
+
+.args ?= messages 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ;
+.disabled ?= ;
+.last-error-$(.args) ?= ;
+
+
+# try-catch --
+#
+# This is not really an exception-handling mechanism, but it does allow us to
+# perform some error-checking on our error-checking. Errors are suppressed after
+# a try, and the first one is recorded. Use catch to check that the error
+# message matched expectations.
+
+# Begin looking for error messages.
+#
+rule try ( )
+{
+ .disabled += true ;
+ .last-error-$(.args) = ;
+}
+
+
+# Stop looking for error messages; generate an error if an argument of messages
+# is not found in the corresponding argument in the error call.
+#
+rule catch ( messages * : * )
+{
+ .disabled = $(.disabled[2-]) ; # Pop the stack.
+
+ import sequence ;
+
+ if ! $(.last-error-$(.args))-is-defined
+ {
+ error-skip-frames 3 expected an error, but none occurred ;
+ }
+ else
+ {
+ for local n in $(.args)
+ {
+ if ! $($(n)) in $(.last-error-$(n))
+ {
+ local v = [ sequence.join $($(n)) : " " ] ;
+ v ?= "" ;
+ local joined = [ sequence.join $(.last-error-$(n)) : " " ] ;
+
+ .last-error-$(.args) = ;
+ error-skip-frames 3 expected \"$(v)\" in argument $(n) of error
+ : got \"$(joined)\" instead ;
+ }
+ }
+ }
+}
+
+
+rule error-skip-frames ( skip-frames messages * : * )
+{
+ if ! $(.disabled)
+ {
+ backtrace $(skip-frames) error: $(messages) : $(2) : $(3) : $(4) : $(5)
+ : $(6) : $(7) : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) : $(14)
+ : $(15) : $(16) : $(17) : $(18) : $(19) ;
+ EXIT ;
+ }
+ else if ! $(.last-error-$(.args))
+ {
+ for local n in $(.args)
+ {
+ # Add an extra empty string so that we always have something in the
+ # event of an error.
+ .last-error-$(n) = $($(n)) "" ;
+ }
+ }
+}
+
+if --no-error-backtrace in [ modules.peek : ARGV ]
+{
+ .no-error-backtrace = true ;
+}
+
+
+# Print an error message with a stack backtrace and exit.
+#
+rule error ( messages * : * )
+{
+ if $(.no-error-backtrace)
+ {
+ local first-printed ;
+ # Print each argument on a separate line.
+ for local n in $(.args)
+ {
+ if $($(n))-is-defined
+ {
+ if ! $(first-printed)
+ {
+ ECHO error: $($(n)) ;
+ first-printed = true ;
+ }
+ else
+ {
+ ECHO $($(n)) ;
+ }
+ }
+ }
+ EXIT ;
+ }
+ else
+ {
+ error-skip-frames 3 $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) :
+ $(8) : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16)
+ : $(17) : $(18) : $(19) ;
+ }
+}
+
+
+# Same as 'error', but the generated backtrace will include only user files.
+#
+rule user-error ( messages * : * )
+{
+ .user-modules-only = 1 ;
+ error-skip-frames 3 $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17) :
+ $(18) : $(19) ;
+}
+
+
+# Print a warning message with a stack backtrace and exit.
+#
+rule warning
+{
+ backtrace 2 warning: $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17) :
+ $(18) : $(19) ;
+}
+
+
+# Convert an arbitrary argument list into a list with ":" separators and quoted
+# elements representing the same information. This is mostly useful for
+# formatting descriptions of arguments with which a rule was called when
+# reporting an error.
+#
+rule lol->list ( * )
+{
+ local result ;
+ local remaining = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ;
+ while $($(remaining))
+ {
+ local n = $(remaining[1]) ;
+ remaining = $(remaining[2-]) ;
+
+ if $(n) != 1
+ {
+ result += ":" ;
+ }
+ result += \"$($(n))\" ;
+ }
+ return $(result) ;
+}
+
+
+# Return the file:line for the nearest entry in backtrace which correspond to a
+# user module.
+#
+rule nearest-user-location ( )
+{
+ local bt = [ BACKTRACE ] ;
+
+ local result ;
+ while $(bt) && ! $(result)
+ {
+ local m = [ MATCH ^(.+)\\.$ : $(bt[3]) ] ;
+ local user-modules = ([Jj]amroot(.jam|.v2|)|([Jj]amfile(.jam|.v2|)|user-config.jam|site-config.jam|project-config.jam|project-root.jam) ;
+
+ if [ MATCH $(user-modules) : $(bt[1]:D=) ]
+ {
+ result = $(bt[1]):$(bt[2]) ;
+ }
+ bt = $(bt[5-]) ;
+ }
+ return $(result) ;
+}
+
+
+# If optimized rule is available in Jam, use it.
+if NEAREST_USER_LOCATION in [ RULENAMES ]
+{
+ rule nearest-user-location ( )
+ {
+ local r = [ NEAREST_USER_LOCATION ] ;
+ return $(r[1]):$(r[2]) ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ # Show that we can correctly catch an expected error.
+ try ;
+ {
+ error an error occurred : somewhere ;
+ }
+ catch an error occurred : somewhere ;
+
+ # Show that unexpected errors generate real errors.
+ try ;
+ {
+ try ;
+ {
+ error an error occurred : somewhere ;
+ }
+ catch an error occurred : nowhere ;
+ }
+ catch expected \"nowhere\" in argument 2 ;
+
+ # Show that not catching an error where one was expected is an error.
+ try ;
+ {
+ try ;
+ {
+ }
+ catch ;
+ }
+ catch expected an error, but none occurred ;
+}
diff --git a/src/kenlm/jam-files/boost-build/kernel/modules.jam b/src/kenlm/jam-files/boost-build/kernel/modules.jam
new file mode 100644
index 0000000..4258225
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/kernel/modules.jam
@@ -0,0 +1,359 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2003, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Essentially an include guard; ensures that no module is loaded multiple times.
+.loaded ?= ;
+
+# A list of modules currently being loaded for error reporting of circular
+# dependencies.
+.loading ?= ;
+
+# A list of modules needing to be tested using their __test__ rule.
+.untested ?= ;
+
+# A list of modules which have been tested using their __test__ rule.
+.tested ?= ;
+
+
+# Runs internal Boost Build unit tests for the specified module. The module's
+# __test__ rule is executed in its own module to eliminate any inadvertent
+# effects of testing module dependencies (such as assert) on the module itself.
+#
+local rule run-module-test ( m )
+{
+ local tested-modules = [ modules.peek modules : .tested ] ;
+
+ if ( ! $(m) in $(tested-modules) ) # Avoid recursive test invocations.
+ && ( ( --debug in $(argv) ) || ( --debug-module=$(m) in $(argv) ) )
+ {
+ modules.poke modules : .tested : $(tested-modules) $(m) ;
+
+ if ! ( __test__ in [ RULENAMES $(m) ] )
+ {
+ local argv = [ peek : ARGV ] ;
+ if ! ( --quiet in $(argv) ) && ( --debug-tests in $(argv) )
+ {
+ ECHO warning: no __test__ rule defined in module $(m) ;
+ }
+ }
+ else
+ {
+ if ! ( --quiet in $(argv) )
+ {
+ ECHO testing module $(m)... ;
+ }
+
+ local test-module = __test-$(m)__ ;
+ IMPORT $(m) : [ RULENAMES $(m) ] : $(test-module) : [ RULENAMES $(m)
+ ] ;
+ IMPORT $(m) : __test__ : $(test-module) : __test__ : LOCALIZE ;
+ module $(test-module)
+ {
+ __test__ ;
+ }
+ }
+ }
+}
+
+
+# Return the binding of the given module.
+#
+rule binding ( module )
+{
+ return $($(module).__binding__) ;
+}
+
+
+# Sets the module-local value of a variable. This is the most reliable way to
+# set a module-local variable in a different module; it eliminates issues of
+# name shadowing due to dynamic scoping.
+#
+rule poke ( module-name ? : variables + : value * )
+{
+ module $(<)
+ {
+ $(>) = $(3) ;
+ }
+}
+
+
+# Returns the module-local value of a variable. This is the most reliable way to
+# examine a module-local variable in a different module; it eliminates issues of
+# name shadowing due to dynamic scoping.
+#
+rule peek ( module-name ? : variables + )
+{
+ module $(<)
+ {
+ return $($(>)) ;
+ }
+}
+
+
+# Call the given rule locally in the given module. Use this for rules accepting
+# rule names as arguments, so that the passed rule may be invoked in the context
+# of the rule's caller (for example, if the rule accesses module globals or is a
+# local rule). Note that rules called this way may accept at most 18 parameters.
+#
+rule call-in ( module-name ? : rule-name args * : * )
+{
+ module $(module-name)
+ {
+ return [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) : $(10) :
+ $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17) : $(18) :
+ $(19) ] ;
+ }
+}
+
+
+# Given a possibly qualified rule name and arguments, remove any initial module
+# qualification from the rule and invoke it in that module. If there is no
+# module qualification, the rule is invoked in the global module. Note that
+# rules called this way may accept at most 18 parameters.
+#
+rule call-locally ( qualified-rule-name args * : * )
+{
+ local module-rule = [ MATCH (.*)\\.(.*) : $(qualified-rule-name) ] ;
+ local rule-name = $(module-rule[2]) ;
+ rule-name ?= $(qualified-rule-name) ;
+ # We pass only 18 parameters here since Boost Jam allows at most 19 rule
+ # parameter positions and the call-in rule already uses up the initial
+ # position for the module name.
+ return [ call-in $(module-rule[1]) : $(rule-name) $(args) : $(2) : $(3) :
+ $(4) : $(5) : $(6) : $(7) : $(8) : $(9) : $(10) : $(11) : $(12) : $(13)
+ $(14) : $(15) : $(16) : $(17) : $(18) : $(19) ] ;
+}
+
+
+# Load the indicated module if it is not already loaded.
+#
+rule load (
+ module-name # Name of module to load. Rules will be defined in this
+ # module.
+ : filename ? # (partial) path to file; Defaults to $(module-name).jam.
+ : search * # Directories in which to search for filename. Defaults to
+ # $(BOOST_BUILD_PATH).
+)
+{
+ # Avoid loading modules twice.
+ if ! ( $(module-name) in $(.loaded) )
+ {
+ filename ?= $(module-name).jam ;
+
+ # Mark the module loaded so we do not try to load it recursively.
+ .loaded += $(module-name) ;
+
+ # Suppress tests if any module loads are already in progress.
+ local suppress-test = $(.loading[1]) ;
+
+ # Push this module on the loading stack.
+ .loading += $(module-name) ;
+
+ # Remember that it is untested.
+ .untested += $(module-name) ;
+
+ # Insert the new module's __name__ and __file__ globals.
+ poke $(module-name) : __name__ : $(module-name) ;
+ poke $(module-name) : __file__ : $(filename) ;
+
+ module $(module-name)
+ {
+ # Add some grist so that the module will have a unique target name.
+ local module-target = $(__file__:G=module@) ;
+
+ local search = $(3) ;
+ search ?= [ modules.peek : BOOST_BUILD_PATH ] ;
+ SEARCH on $(module-target) = $(search) ;
+ BINDRULE on $(module-target) = modules.record-binding ;
+
+ include $(module-target) ;
+
+ # Allow the module to see its own names with full qualification.
+ local rules = [ RULENAMES $(__name__) ] ;
+ IMPORT $(__name__) : $(rules) : $(__name__) : $(__name__).$(rules) ;
+ }
+
+ if $(module-name) != modules && ! [ binding $(module-name) ]
+ {
+ import errors ;
+ errors.error "Could not find module" $(module-name) in $(search) ;
+ }
+
+ # Pop the loading stack. Must happen before testing or we will run into
+ # a circular loading dependency.
+ .loading = $(.loading[1--2]) ;
+
+ # Run any pending tests if this is an outer load.
+ if ! $(suppress-test)
+ {
+ local argv = [ peek : ARGV ] ;
+ for local m in $(.untested)
+ {
+ run-module-test $(m) ;
+ }
+ .untested = ;
+ }
+ }
+ else if $(module-name) in $(.loading)
+ {
+ import errors ;
+ errors.error loading \"$(module-name)\"
+ : circular module loading dependency:
+ : $(.loading)" ->" $(module-name) ;
+ }
+}
+
+
+# This helper is used by load (above) to record the binding (path) of each
+# loaded module.
+#
+rule record-binding ( module-target : binding )
+{
+ $(.loading[-1]).__binding__ = $(binding) ;
+}
+
+
+# Transform each path in the list, with all backslashes converted to forward
+# slashes and all detectable redundancy removed. Something like this is probably
+# needed in path.jam, but I am not sure of that, I do not understand it, and I
+# am not ready to move all of path.jam into the kernel.
+#
+local rule normalize-raw-paths ( paths * )
+{
+ local result ;
+ for p in $(paths:T)
+ {
+ result += [ NORMALIZE_PATH $(p) ] ;
+ }
+ return $(result) ;
+}
+
+
+.cwd = [ PWD ] ;
+
+
+# Load the indicated module and import rule names into the current module. Any
+# members of rules-opt will be available without qualification in the caller's
+# module. Any members of rename-opt will be taken as the names of the rules in
+# the caller's module, in place of the names they have in the imported module.
+# If rules-opt = '*', all rules from the indicated module are imported into the
+# caller's module. If rename-opt is supplied, it must have the same number of
+# elements as rules-opt.
+#
+rule import ( module-names + : rules-opt * : rename-opt * )
+{
+ if ( $(rules-opt) = * || ! $(rules-opt) ) && $(rename-opt)
+ {
+ import errors ;
+ errors.error "Rule aliasing is only available for explicit imports." ;
+ }
+
+ if $(module-names[2]) && ( $(rules-opt) || $(rename-opt) )
+ {
+ import errors ;
+ errors.error "When loading multiple modules, no specific rules or"
+ "renaming is allowed" ;
+ }
+
+ local caller = [ CALLER_MODULE ] ;
+
+ # Import each specified module
+ for local m in $(module-names)
+ {
+ if ! $(m) in $(.loaded)
+ {
+ # If the importing module is not already in the BOOST_BUILD_PATH,
+ # prepend it to the path. We do not want to invert the search order
+ # of modules that are already there.
+
+ local caller-location ;
+ if $(caller)
+ {
+ caller-location = [ binding $(caller) ] ;
+ caller-location = $(caller-location:D) ;
+ caller-location = [ normalize-raw-paths
+ $(caller-location:R=$(.cwd)) ] ;
+ }
+
+ local search = [ peek : BOOST_BUILD_PATH ] ;
+ search = [ normalize-raw-paths $(search:R=$(.cwd)) ] ;
+
+ if $(caller-location) && ! $(caller-location) in $(search)
+ {
+ search = $(caller-location) $(search) ;
+ }
+
+ load $(m) : : $(search) ;
+ }
+
+ IMPORT_MODULE $(m) : $(caller) ;
+
+ if $(rules-opt)
+ {
+ local source-names ;
+ if $(rules-opt) = *
+ {
+ local all-rules = [ RULENAMES $(m) ] ;
+ source-names = $(all-rules) ;
+ }
+ else
+ {
+ source-names = $(rules-opt) ;
+ }
+ local target-names = $(rename-opt) ;
+ target-names ?= $(source-names) ;
+ IMPORT $(m) : $(source-names) : $(caller) : $(target-names) ;
+ }
+ }
+}
+
+
+# Define exported copies in $(target-module) of all rules exported from
+# $(source-module). Also make them available in the global module with
+# qualification, so that it is just as though the rules were defined originally
+# in $(target-module).
+#
+rule clone-rules ( source-module target-module )
+{
+ local r = [ RULENAMES $(source-module) ] ;
+ IMPORT $(source-module) : $(r) : $(target-module) : $(r) : LOCALIZE ;
+ EXPORT $(target-module) : $(r) ;
+ IMPORT $(target-module) : $(r) : : $(target-module).$(r) ;
+}
+
+
+# These rules need to be available in all modules to implement module loading
+# itself and other fundamental operations.
+local globalize = peek poke record-binding ;
+IMPORT modules : $(globalize) : : modules.$(globalize) ;
+
+
+rule __test__ ( )
+{
+ import assert ;
+ import modules : normalize-raw-paths ;
+
+ module modules.__test__
+ {
+ foo = bar ;
+ }
+
+ assert.result bar : peek modules.__test__ : foo ;
+
+ poke modules.__test__ : foo : bar baz ;
+ assert.result bar baz : peek modules.__test__ : foo ;
+
+ assert.result c:/foo/bar : normalize-raw-paths c:/x/../foo/./xx/yy/../../bar ;
+ assert.result . : normalize-raw-paths . ;
+ assert.result .. : normalize-raw-paths .. ;
+ assert.result ../.. : normalize-raw-paths ../.. ;
+ assert.result .. : normalize-raw-paths ./.. ;
+ assert.result / / : normalize-raw-paths / \\ ;
+ assert.result a : normalize-raw-paths a ;
+ assert.result a : normalize-raw-paths a/ ;
+ assert.result /a : normalize-raw-paths /a/ ;
+ assert.result / : normalize-raw-paths /a/.. ;
+}
diff --git a/src/kenlm/jam-files/boost-build/options/help.jam b/src/kenlm/jam-files/boost-build/options/help.jam
new file mode 100644
index 0000000..b507e1e
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/options/help.jam
@@ -0,0 +1,212 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2003, 2006 Rene Rivera
+# Copyright 2003, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# This module is the plug-in handler for the --help and --help-.*
+# command-line options
+import modules ;
+import assert ;
+import doc : do-scan set-option set-output set-output-file print-help-usage print-help-top ;
+import sequence ;
+import set ;
+import project ;
+import print ;
+import os ;
+import version ;
+import path ;
+
+# List of possible modules, but which really aren't.
+#
+.not-modules =
+ boost-build bootstrap site-config test user-config
+ -tools allyourbase boost-base features python stlport testing unit-tests ;
+
+# The help system options are parsed here and handed off to the doc
+# module to translate into documentation requests and actions. The
+# understood options are:
+#
+# --help-disable-<option>
+# --help-doc-options
+# --help-enable-<option>
+# --help-internal
+# --help-options
+# --help-usage
+# --help-output <type>
+# --help-output-file <file>
+# --help [<module-or-class>]
+#
+rule process (
+ command # The option.
+ : values * # The values, starting after the "=".
+ )
+{
+ assert.result --help : MATCH ^(--help).* : $(command) ;
+ local did-help = ;
+ switch $(command)
+ {
+ case --help-internal :
+ local path-to-modules = [ modules.peek : BOOST_BUILD_PATH ] ;
+ path-to-modules ?= . ;
+ local possible-modules = [ GLOB $(path-to-modules) : *\\.jam ] ;
+ local not-modules = [ GLOB $(path-to-modules) : *$(.not-modules)\\.jam ] ;
+ local modules-to-list =
+ [ sequence.insertion-sort
+ [ set.difference $(possible-modules:D=:S=) : $(not-modules:D=:S=) ] ] ;
+ local modules-to-scan ;
+ for local m in $(modules-to-list)
+ {
+ local module-files = [ GLOB $(path-to-modules) : $(m)\\.jam ] ;
+ modules-to-scan += $(module-files[1]) ;
+ }
+ do-scan $(modules-to-scan) : print-help-all ;
+ did-help = true ;
+
+ case --help-enable-* :
+ local option = [ MATCH --help-enable-(.*) : $(command) ] ; option = $(option:L) ;
+ set-option $(option) : enabled ;
+ did-help = true ;
+
+ case --help-disable-* :
+ local option = [ MATCH --help-disable-(.*) : $(command) ] ; option = $(option:L) ;
+ set-option $(option) ;
+ did-help = true ;
+
+ case --help-output :
+ set-output $(values[1]) ;
+ did-help = true ;
+
+ case --help-output-file :
+ set-output-file $(values[1]) ;
+ did-help = true ;
+
+ case --help-doc-options :
+ local doc-module-spec = [ split-symbol doc ] ;
+ do-scan $(doc-module-spec[1]) : print-help-options ;
+ did-help = true ;
+
+ case --help-options :
+ print-help-usage ;
+ did-help = true ;
+
+ case --help :
+ local spec = $(values[1]) ;
+ if $(spec)
+ {
+ local spec-parts = [ split-symbol $(spec) ] ;
+ if $(spec-parts)
+ {
+ if $(spec-parts[2])
+ {
+ do-scan $(spec-parts[1]) : print-help-classes $(spec-parts[2]) ;
+ do-scan $(spec-parts[1]) : print-help-rules $(spec-parts[2]) ;
+ do-scan $(spec-parts[1]) : print-help-variables $(spec-parts[2]) ;
+ }
+ else
+ {
+ do-scan $(spec-parts[1]) : print-help-module ;
+ }
+ }
+ else
+ {
+ EXIT "Unrecognized help option '"$(command)" "$(spec)"'." ;
+ }
+ }
+ else
+ {
+ version.print ;
+ ECHO ;
+ # First print documentation from the current Jamfile, if any.
+ # FIXME: Generally, this duplication of project.jam logic is bad.
+ local names = [ modules.peek project : JAMROOT ]
+ [ modules.peek project : JAMFILE ] ;
+ local project-file = [ path.glob . : $(names) ] ;
+ if ! $(project-file)
+ {
+ project-file = [ path.glob-in-parents . : $(names) ] ;
+ }
+
+ for local p in $(project-file)
+ {
+ do-scan $(p) : print-help-project $(p) ;
+ }
+
+ # Next any user-config help.
+ local user-path = [ os.home-directories ] [ os.environ BOOST_BUILD_PATH ] ;
+ local user-config = [ GLOB $(user-path) : user-config.jam ] ;
+ if $(user-config)
+ {
+ do-scan $(user-config[1]) : print-help-config user $(user-config[1]) ;
+ }
+
+ # Next any site-config help.
+ local site-config = [ GLOB $(user-path) : site-config.jam ] ;
+ if $(site-config)
+ {
+ do-scan $(site-config[1]) : print-help-config site $(site-config[1]) ;
+ }
+
+ # Then the overall help.
+ print-help-top ;
+ }
+ did-help = true ;
+ }
+ if $(did-help)
+ {
+ UPDATE all ;
+ NOCARE all ;
+ }
+ return $(did-help) ;
+}
+
+# Split a reference to a symbol into module and symbol parts.
+#
+local rule split-symbol (
+ symbol # The symbol to split.
+ )
+{
+ local path-to-modules = [ modules.peek : BOOST_BUILD_PATH ] ;
+ path-to-modules ?= . ;
+ local module-name = $(symbol) ;
+ local symbol-name = ;
+ local result = ;
+ while ! $(result)
+ {
+ local module-path = [ GLOB $(path-to-modules) : $(module-name)\\.jam ] ;
+ if $(module-path)
+ {
+ # The 'module-name' in fact refers to module. Return the full
+ # module path and a symbol within it. If 'symbol' passed to this
+ # rule is already module, 'symbol-name' will be empty. Otherwise,
+ # it's initialized on the previous loop iteration.
+ # In case there are several modules by this name,
+ # use the first one.
+ result = $(module-path[1]) $(symbol-name) ;
+ }
+ else
+ {
+ if ! $(module-name:S)
+ {
+ result = - ;
+ }
+ else
+ {
+ local next-symbol-part = [ MATCH ^.(.*) : $(module-name:S) ] ;
+ if $(symbol-name)
+ {
+ symbol-name = $(next-symbol-part).$(symbol-name) ;
+ }
+ else
+ {
+ symbol-name = $(next-symbol-part) ;
+ }
+ module-name = $(module-name:B) ;
+ }
+ }
+ }
+ if $(result) != -
+ {
+ return $(result) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/site-config.jam b/src/kenlm/jam-files/boost-build/site-config.jam
new file mode 100644
index 0000000..6afe526
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/site-config.jam
@@ -0,0 +1,11 @@
+# Copyright 1999-2012 Gentoo Foundation
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Define two new variants to be used when building boost (or separate boost-libs)
+# on Gentoo. The two variants make use of Gentoo-specific optimization and debug-symbols
+# values "none" which are not part of the official boost distribution.
+# DO NOT RELY ON THE FOLLOWING VARIANTS TO BE PRESENT ON OTHER OS!
+variant gentoorelease : release : <optimization>none <debug-symbols>none <runtime-link>shared ;
+variant gentoodebug : debug : <optimization>none <debug-symbols>on <runtime-link>shared ;
+
diff --git a/src/kenlm/jam-files/boost-build/tools/acc.jam b/src/kenlm/jam-files/boost-build/tools/acc.jam
new file mode 100644
index 0000000..f04c9dc
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/acc.jam
@@ -0,0 +1,118 @@
+# Copyright Vladimir Prus 2004.
+# Copyright Toon Knapen 2004.
+# Copyright Boris Gubenko 2007.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# Boost.Build V2 toolset for the HP aC++ compiler.
+#
+
+import toolset : flags ;
+import feature ;
+import generators ;
+import common ;
+
+feature.extend toolset : acc ;
+toolset.inherit acc : unix ;
+generators.override builtin.lib-generator : acc.prebuilt ;
+generators.override acc.searched-lib-generator : searched-lib-generator ;
+
+# Configures the acc toolset.
+rule init ( version ? : user-provided-command * : options * )
+{
+ local condition = [ common.check-init-parameters acc
+ : version $(version) ] ;
+
+ local command = [ common.get-invocation-command acc : aCC
+ : $(user-provided-command) ] ;
+
+ common.handle-options acc : $(condition) : $(command) : $(options) ;
+}
+
+
+# Declare generators
+generators.register-c-compiler acc.compile.c : C : OBJ : <toolset>acc ;
+generators.register-c-compiler acc.compile.c++ : CPP : OBJ : <toolset>acc ;
+
+# Declare flags.
+flags acc CFLAGS <optimization>off : ;
+flags acc CFLAGS <optimization>speed : -O3 ;
+flags acc CFLAGS <optimization>space : -O2 ;
+
+flags acc CFLAGS <inlining>off : +d ;
+flags acc CFLAGS <inlining>on : ;
+flags acc CFLAGS <inlining>full : ;
+
+flags acc C++FLAGS <exception-handling>off : ;
+flags acc C++FLAGS <exception-handling>on : ;
+
+flags acc C++FLAGS <rtti>off : ;
+flags acc C++FLAGS <rtti>on : ;
+
+# We want the full path to the sources in the debug symbols because otherwise
+# the debugger won't find the sources when we use boost.build.
+flags acc CFLAGS <debug-symbols>on : -g ;
+flags acc LINKFLAGS <debug-symbols>on : -g ;
+flags acc LINKFLAGS <debug-symbols>off : -s ;
+
+# V2 does not have <shared-linkable>, not sure what this meant in V1.
+# flags acc CFLAGS <shared-linkable>true : +Z ;
+
+flags acc CFLAGS <profiling>on : -pg ;
+flags acc LINKFLAGS <profiling>on : -pg ;
+
+flags acc CFLAGS <address-model>64 : +DD64 ;
+flags acc LINKFLAGS <address-model>64 : +DD64 ;
+
+# It is unknown if there's separate option for rpath used only
+# at link time, similar to -rpath-link in GNU. We'll use -L.
+flags acc RPATH_LINK : <xdll-path> ;
+
+flags acc CFLAGS <cflags> ;
+flags acc C++FLAGS <cxxflags> ;
+flags acc DEFINES <define> ;
+flags acc UNDEFS <undef> ;
+flags acc HDRS <include> ;
+flags acc STDHDRS <sysinclude> ;
+flags acc LINKFLAGS <linkflags> ;
+flags acc ARFLAGS <arflags> ;
+
+flags acc LIBPATH <library-path> ;
+flags acc NEEDLIBS <library-file> ;
+flags acc FINDLIBS <find-shared-library> ;
+flags acc FINDLIBS <find-static-library> ;
+
+# Select the compiler name according to the threading model.
+flags acc CFLAGS <threading>multi : -mt ;
+flags acc LINKFLAGS <threading>multi : -mt ;
+
+flags acc.compile.c++ TEMPLATE_DEPTH <c++-template-depth> ;
+
+
+actions acc.link bind NEEDLIBS
+{
+ $(CONFIG_COMMAND) -AA $(LINKFLAGS) -o "$(<[1])" -L"$(RPATH_LINK)" -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS)
+}
+
+SPACE = " " ;
+actions acc.link.dll bind NEEDLIBS
+{
+ $(CONFIG_COMMAND) -AA -b $(LINKFLAGS) -o "$(<[1])" -L"$(RPATH_LINK)" -Wl,+h$(<[-1]:D=) -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS)
+}
+
+actions acc.compile.c
+{
+ cc -c -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" $(OPTIONS)
+}
+
+actions acc.compile.c++
+{
+ $(CONFIG_COMMAND) -AA -c -Wc,--pending_instantiations=$(TEMPLATE_DEPTH) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" $(OPTIONS)
+}
+
+actions updated together piecemeal acc.archive
+{
+ ar ru$(ARFLAGS:E="") "$(<)" "$(>)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/auto-index.jam b/src/kenlm/jam-files/boost-build/tools/auto-index.jam
new file mode 100644
index 0000000..41d0482
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/auto-index.jam
@@ -0,0 +1,204 @@
+
+import feature ;
+import generators ;
+import "class" ;
+import toolset ;
+import targets ;
+import "class" : new ;
+import project ;
+
+feature.feature auto-index : off "on" ;
+feature.feature auto-index-internal : off "on" ;
+feature.feature auto-index-verbose : off "on" ;
+feature.feature auto-index-no-duplicates : off "on" ;
+feature.feature auto-index-script : : free path ;
+feature.feature auto-index-prefix : : free path ;
+feature.feature auto-index-type : : free ;
+feature.feature auto-index-section-names : "on" off ;
+
+toolset.flags auto-index.auto-index FLAGS <auto-index-internal>on : --internal-index ;
+toolset.flags auto-index.auto-index SCRIPT <auto-index-script> ;
+toolset.flags auto-index.auto-index PREFIX <auto-index-prefix> ;
+toolset.flags auto-index.auto-index INDEX_TYPE <auto-index-type> ;
+toolset.flags auto-index.auto-index FLAGS <auto-index-verbose>on : --verbose ;
+toolset.flags auto-index.auto-index FLAGS <auto-index-no-duplicates>on : --no-duplicates ;
+toolset.flags auto-index.auto-index FLAGS <auto-index-section-names>off : --no-section-names ;
+
+# <auto-index-binary> shell command to run AutoIndex
+# <auto-index-binary-dependencies> targets to build AutoIndex from sources.
+feature.feature <auto-index-binary> : : free ;
+feature.feature <auto-index-binary-dependencies> : : free dependency ;
+
+class auto-index-generator : generator
+{
+ import common modules path targets build-system ;
+ rule run ( project name ? : property-set : sources * )
+ {
+ # AutoIndex invocation command and dependencies.
+ local auto-index-binary = [ modules.peek auto-index : .command ] ;
+ local auto-index-binary-dependencies ;
+
+ if $(auto-index-binary)
+ {
+ # Use user-supplied command.
+ auto-index-binary = [ common.get-invocation-command auto-index : auto-index : $(auto-index-binary) ] ;
+ }
+ else
+ {
+ # Search for AutoIndex sources in sensible places, like
+ # $(BOOST_ROOT)/tools/auto_index
+ # $(BOOST_BUILD_PATH)/../../auto_index
+
+ # And build auto-index executable from sources.
+
+ local boost-root = [ modules.peek : BOOST_ROOT ] ;
+ local boost-build-path = [ build-system.location ] ;
+ local boost-build-path2 = [ modules.peek : BOOST_BUILD_PATH ] ;
+
+ local auto-index-dir ;
+
+ if $(boost-root)
+ {
+ auto-index-dir += [ path.join $(boost-root) tools ] ;
+ }
+
+ if $(boost-build-path)
+ {
+ auto-index-dir += $(boost-build-path)/../.. ;
+ }
+ if $(boost-build-path2)
+ {
+ auto-index-dir += $(boost-build-path2)/.. ;
+ }
+
+ #ECHO $(auto-index-dir) ;
+ auto-index-dir = [ path.glob $(auto-index-dir) : auto_index ] ;
+ #ECHO $(auto-index-dir) ;
+
+ # If the AutoIndex source directory was found, mark its main target
+ # as a dependency for the current project. Otherwise, try to find
+ # 'auto-index' in user's PATH
+ if $(auto-index-dir)
+ {
+ auto-index-dir = [ path.make $(auto-index-dir[1]) ] ;
+ auto-index-dir = $(auto-index-dir)/build ;
+
+ #ECHO $(auto-index-dir) ;
+
+ # Get the main-target in AutoIndex directory.
+ local auto-index-main-target = [ targets.resolve-reference $(auto-index-dir) : $(project) ] ;
+
+ #ECHO $(auto-index-main-target) ;
+
+ # The first element are actual targets, the second are
+ # properties found in target-id. We do not care about these
+ # since we have passed the id ourselves.
+ auto-index-main-target =
+ [ $(auto-index-main-target[1]).main-target auto_index ] ;
+
+ #ECHO $(auto-index-main-target) ;
+
+ auto-index-binary-dependencies =
+ [ $(auto-index-main-target).generate [ $(property-set).propagated ] ] ;
+
+ # Ignore usage-requirements returned as first element.
+ auto-index-binary-dependencies = $(auto-index-binary-dependencies[2-]) ;
+
+ # Some toolsets generate extra targets (e.g. RSP). We must mark
+ # all targets as dependencies for the project, but we will only
+ # use the EXE target for auto-index-to-boostbook translation.
+ for local target in $(auto-index-binary-dependencies)
+ {
+ if [ $(target).type ] = EXE
+ {
+ auto-index-binary =
+ [ path.native
+ [ path.join
+ [ $(target).path ]
+ [ $(target).name ]
+ ]
+ ] ;
+ }
+ }
+ }
+ else
+ {
+ ECHO "AutoIndex warning: The path to the auto-index executable was" ;
+ ECHO " not provided. Additionally, couldn't find AutoIndex" ;
+ ECHO " sources searching in" ;
+ ECHO " * BOOST_ROOT/tools/auto-index" ;
+ ECHO " * BOOST_BUILD_PATH/../../auto-index" ;
+ ECHO " Will now try to find a precompiled executable by searching" ;
+ ECHO " the PATH for 'auto-index'." ;
+ ECHO " To disable this warning in the future, or to completely" ;
+ ECHO " avoid compilation of auto-index, you can explicitly set the" ;
+ ECHO " path to a auto-index executable command in user-config.jam" ;
+ ECHO " or site-config.jam with the call" ;
+ ECHO " using auto-index : /path/to/auto-index ;" ;
+
+ # As a last resort, search for 'auto-index' command in path. Note
+ # that even if the 'auto-index' command is not found,
+ # get-invocation-command will still return 'auto-index' and might
+ # generate an error while generating the virtual-target.
+
+ auto-index-binary = [ common.get-invocation-command auto-index : auto-index ] ;
+ }
+ }
+
+ # Add $(auto-index-binary-dependencies) as a dependency of the current
+ # project and set it as the <auto-index-binary> feature for the
+ # auto-index-to-boostbook rule, below.
+ property-set = [ $(property-set).add-raw
+ <dependency>$(auto-index-binary-dependencies)
+ <auto-index-binary>$(auto-index-binary)
+ <auto-index-binary-dependencies>$(auto-index-binary-dependencies)
+ ] ;
+
+ #ECHO "binary = " $(auto-index-binary) ;
+ #ECHO "dependencies = " $(auto-index-binary-dependencies) ;
+
+ return [ generator.run $(project) $(name) : $(property-set) : $(sources) ] ;
+ }
+}
+
+# Initialization of toolset.
+#
+# Parameters:
+# command ? -> path to AutoIndex executable.
+#
+# When command is not supplied toolset will search for AutoIndex directory and
+# compile the executable from source. If that fails we still search the path for
+# 'auto_index'.
+#
+rule init (
+ command ? # path to the AutoIndex executable.
+ )
+{
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+ .command = $(command) ;
+ }
+}
+
+toolset.flags auto-index.auto-index AI-COMMAND <auto-index-binary> ;
+toolset.flags auto-index.auto-index AI-DEPENDENCIES <auto-index-binary-dependencies> ;
+
+generators.register [ class.new auto-index-generator auto-index.auto-index : DOCBOOK : DOCBOOK(%.auto_index) : <auto-index>on ] ;
+generators.override auto-index.auto-index : boostbook.boostbook-to-docbook ;
+
+rule auto-index ( target : source : properties * )
+{
+ # Signal dependency of auto-index sources on <auto-index-binary-dependencies>
+ # upon invocation of auto-index-to-boostbook.
+ #ECHO "AI-COMMAND= " $(AI-COMMAND) ;
+ DEPENDS $(target) : [ on $(target) return $(AI-DEPENDENCIES) ] ;
+ #DEPENDS $(target) : [ on $(target) return $(SCRIPT) ] ;
+}
+
+actions auto-index
+{
+ $(AI-COMMAND) $(FLAGS) "--prefix="$(PREFIX) "--script="$(SCRIPT) "--index-type="$(INDEX_TYPE) "--in="$(>) "--out="$(<)
+}
+
+
diff --git a/src/kenlm/jam-files/boost-build/tools/bison.jam b/src/kenlm/jam-files/boost-build/tools/bison.jam
new file mode 100644
index 0000000..0689d4b
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/bison.jam
@@ -0,0 +1,32 @@
+# Copyright 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import generators ;
+import feature ;
+import type ;
+import property ;
+
+feature.feature bison.prefix : : free ;
+type.register Y : y ;
+type.register YY : yy ;
+generators.register-standard bison.bison : Y : C H ;
+generators.register-standard bison.bison : YY : CPP HPP ;
+
+rule init ( )
+{
+}
+
+rule bison ( dst dst_header : src : properties * )
+{
+ local r = [ property.select bison.prefix : $(properties) ] ;
+ if $(r)
+ {
+ PREFIX_OPT on $(<) = -p $(r:G=) ;
+ }
+}
+
+actions bison
+{
+ bison $(PREFIX_OPT) -d -o $(<[1]) $(>)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/boostbook-config.jam b/src/kenlm/jam-files/boost-build/tools/boostbook-config.jam
new file mode 100644
index 0000000..6e3f3dd
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/boostbook-config.jam
@@ -0,0 +1,13 @@
+#~ Copyright 2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for BoostBook tools. To use, just import this module.
+#
+# This module is deprecated.
+# using boostbook ;
+# with no arguments now suffices.
+
+import toolset : using ;
+
+using boostbook ;
diff --git a/src/kenlm/jam-files/boost-build/tools/boostbook.jam b/src/kenlm/jam-files/boost-build/tools/boostbook.jam
new file mode 100644
index 0000000..de83dec
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/boostbook.jam
@@ -0,0 +1,773 @@
+# Copyright 2003, 2004, 2005 Dave Abrahams
+# Copyright 2003, 2004, 2005 Douglas Gregor
+# Copyright 2005, 2006, 2007 Rene Rivera
+# Copyright 2003, 2004, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines rules to handle generation of documentation from BoostBook
+# sources.
+#
+# The type of output is controlled by the <format> feature which can have the
+# following values:
+# * html: Generates html documentation. This is the default.
+# * xhtml: Generates xhtml documentation.
+# * htmlhelp: Generates html help output.
+# * onehtml: Generates a single html page.
+# * man: Generates man pages.
+# * pdf: Generates pdf documentation.
+# * ps: Generates postscript output.
+# * docbook: Generates docbook XML.
+# * fo: Generates XSL formating objects.
+# * tests: Extracts test cases from the boostbook XML.
+#
+# <format> is an implicit feature, so for example, typing pdf on the command
+# line is a short-cut for format=pdf.
+
+import build-system ;
+import "class" : new ;
+import common ;
+import feature ;
+import generators ;
+import make ;
+import modules ;
+import os ;
+import path ;
+import print ;
+import project ;
+import property ;
+import property-set ;
+import regex ;
+import scanner ;
+import sequence ;
+import targets ;
+import type ;
+import virtual-target ;
+import xsltproc ;
+
+# Make this module into a project.
+project.initialize $(__name__) ;
+project boostbook ;
+
+.debug-configuration = [ MATCH ^(--debug-configuration)$ : [ modules.peek : ARGV
+ ] ] ;
+
+feature.feature format
+ : html xhtml htmlhelp onehtml man pdf ps docbook fo tests
+ : incidental implicit composite propagated ;
+
+type.register DTDXML : dtdxml ;
+type.register XML : xml ;
+type.register BOOSTBOOK : boostbook : XML ;
+type.register DOCBOOK : docbook : XML ;
+type.register FO : fo : XML ;
+type.register PDF : pdf ;
+type.register PS : ps ;
+type.register XSLT : xsl : XML ;
+type.register HTMLDIR ;
+type.register XHTMLDIR ;
+type.register HTMLHELP ;
+type.register MANPAGES ;
+type.register TESTS : tests ;
+
+
+# Initialize BoostBook support.
+#
+rule init (
+ docbook-xsl-dir ? # The DocBook XSL stylesheet directory. If not provided,
+ # we use DOCBOOK_XSL_DIR from the environment (if
+ # available) or look in standard locations. Otherwise,
+ # we let the XML processor load the stylesheets
+ # remotely.
+
+ : docbook-dtd-dir ? # The DocBook DTD directory. If not provided, we use
+ # DOCBOOK_DTD_DIR From the environment (if available) or
+ # look in standard locations. Otherwise, we let the XML
+ # processor load the DTD remotely.
+
+ : boostbook-dir ? # The BoostBook directory with the DTD and XSL subdirs.
+)
+{
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+
+ check-boostbook-dir $(boostbook-dir) ;
+ find-tools $(docbook-xsl-dir) : $(docbook-dtd-dir) : $(boostbook-dir) ;
+
+ # Register generators only if we were called via "using boostbook ;"
+ local reg-gen = generators.register-standard ;
+ $(reg-gen) boostbook.dtdxml-to-boostbook : DTDXML : XML ;
+ $(reg-gen) boostbook.boostbook-to-docbook : XML : DOCBOOK ;
+ $(reg-gen) boostbook.boostbook-to-tests : XML : TESTS ;
+ $(reg-gen) boostbook.docbook-to-onehtml : DOCBOOK : HTML ;
+ $(reg-gen) boostbook.docbook-to-htmldir : DOCBOOK : HTMLDIR ;
+ $(reg-gen) boostbook.docbook-to-xhtmldir : DOCBOOK : XHTMLDIR ;
+ $(reg-gen) boostbook.docbook-to-htmlhelp : DOCBOOK : HTMLHELP ;
+ $(reg-gen) boostbook.docbook-to-manpages : DOCBOOK : MANPAGES ;
+ $(reg-gen) boostbook.docbook-to-fo : DOCBOOK : FO ;
+
+ # The same about Jamfile main target rules.
+ IMPORT $(__name__) : boostbook : : boostbook ;
+ }
+ else
+ {
+ if $(docbook-xsl-dir)
+ {
+ modify-config ;
+ .docbook-xsl-dir = [ path.make $(docbook-xsl-dir) ] ;
+ check-docbook-xsl-dir ;
+ }
+ if $(docbook-dtd-dir)
+ {
+ modify-config ;
+ .docbook-dtd-dir = [ path.make $(docbook-dtd-dir) ] ;
+ check-docbook-dtd-dir ;
+ }
+ if $(boostbook-dir)
+ {
+ modify-config ;
+ check-boostbook-dir $(boostbook-dir) ;
+ local boostbook-xsl-dir = [ path.glob $(boostbook-dir) : xsl ] ;
+ local boostbook-dtd-dir = [ path.glob $(boostbook-dir) : dtd ] ;
+ .boostbook-xsl-dir = $(boostbook-xsl-dir[1]) ;
+ .boostbook-dtd-dir = $(boostbook-dtd-dir[1]) ;
+ check-boostbook-xsl-dir ;
+ check-boostbook-dtd-dir ;
+ }
+ }
+}
+
+
+local rule lock-config ( )
+{
+ if ! $(.initialized)
+ {
+ import errors ;
+ errors.user-error BoostBook has not been configured. ;
+ }
+ if ! $(.config-locked)
+ {
+ .config-locked = true ;
+
+ if $(.error-message)
+ {
+ print-error $(.error-message) ;
+ }
+ }
+}
+
+
+local rule modify-config ( )
+{
+ if $(.config-locked)
+ {
+ import errors ;
+ errors.user-error BoostBook configuration cannot be changed after it has
+ been used. ;
+ }
+}
+
+rule print-error ( location message * )
+{
+ ECHO error: at $(location) ;
+ ECHO error: $(message) ;
+ EXIT ;
+}
+
+rule make-error ( message * )
+{
+ return [ errors.nearest-user-location ] $(message) ;
+}
+
+
+rule find-boost-in-registry ( keys * )
+{
+ local boost-root ;
+ for local R in $(keys)
+ {
+ local installed-boost = [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\$(R)" : "InstallRoot" ] ;
+ if $(installed-boost)
+ {
+ boost-root += [ path.make $(installed-boost) ] ;
+ }
+ }
+ return $(boost-root) ;
+}
+
+
+rule check-docbook-xsl-dir ( )
+{
+ if $(.docbook-xsl-dir)
+ {
+ if ! [ path.glob $(.docbook-xsl-dir) : common/common.xsl ]
+ {
+ import errors ;
+ .error-message = [ make-error BoostBook: could not find docbook XSL stylesheets
+ in: [ path.native $(.docbook-xsl-dir) ] ] ;
+ }
+ else if $(.debug-configuration)
+ {
+ ECHO notice: BoostBook: found docbook XSL stylesheets in: [
+ path.native $(.docbook-xsl-dir) ] ;
+ }
+ }
+}
+
+
+rule check-docbook-dtd-dir ( )
+{
+ if $(.docbook-dtd-dir)
+ {
+ if ! [ path.glob $(.docbook-dtd-dir) : docbookx.dtd ]
+ {
+ import errors ;
+ .error-message = [ make-error BoostBook: could not find docbook DTD in: [
+ path.native $(.docbook-dtd-dir) ] ] ;
+ }
+ else if $(.debug-configuration)
+ {
+ ECHO notice: BoostBook: found docbook DTD in: [ path.native
+ $(.docbook-dtd-dir) ] ;
+ }
+ }
+}
+
+
+rule check-boostbook-xsl-dir ( )
+{
+ if ! $(.boostbook-xsl-dir)
+ {
+ .error-message = [ make-error BoostBook: could not find boostbook XSL stylesheets. ] ;
+ }
+ else if ! [ path.glob $(.boostbook-xsl-dir) : docbook.xsl ]
+ {
+ .error-message = [ make-error BoostBook: could not find docbook XSL stylesheets in:
+ [ path.native $(.boostbook-xsl-dir) ] ] ;
+ }
+ else if $(.debug-configuration)
+ {
+ ECHO notice: BoostBook: found boostbook XSL stylesheets in: [
+ path.native $(.boostbook-xsl-dir) ] ;
+ }
+}
+
+
+rule check-boostbook-dtd-dir ( )
+{
+ if ! $(.boostbook-dtd-dir)
+ {
+ .error-message = [ make-error BoostBook: could not find boostbook DTD. ] ;
+ }
+ else if ! [ path.glob $(.boostbook-dtd-dir) : boostbook.dtd ]
+ {
+ .error-message = [ make-error BoostBook: could not find boostbook DTD in: [
+ path.native $(.boostbook-dtd-dir) ] ] ;
+ }
+ else if $(.debug-configuration)
+ {
+ ECHO notice: BoostBook: found boostbook DTD in: [ path.native
+ $(.boostbook-dtd-dir) ] ;
+ }
+}
+
+
+rule check-boostbook-dir ( boostbook-dir ? )
+{
+ if $(boostbook-dir) && ! [ path.glob $(boostbook-dir) : xsl ]
+ {
+ import errors ;
+ .error-message = [ make-error BoostBook: could not find boostbook in: [ path.native
+ $(boostbook-dir) ] ] ;
+ }
+}
+
+
+rule find-tools ( docbook-xsl-dir ? : docbook-dtd-dir ? : boostbook-dir ? )
+{
+ docbook-xsl-dir ?= [ modules.peek : DOCBOOK_XSL_DIR ] ;
+ docbook-dtd-dir ?= [ modules.peek : DOCBOOK_DTD_DIR ] ;
+ boostbook-dir ?= [ modules.peek : BOOSTBOOK_DIR ] ;
+
+ # Look for the boostbook stylesheets relative to BOOST_ROOT and Boost.Build.
+ local boost-build-root = [ path.make [ build-system.location ] ] ;
+ local boostbook-search-dirs = [ path.join $(boost-build-root) .. .. ] ;
+
+ local boost-root = [ modules.peek : BOOST_ROOT ] ;
+ if $(boost-root)
+ {
+ boostbook-search-dirs += [ path.join [ path.make $(boost-root) ] tools ]
+ ;
+ }
+ boostbook-dir ?= [ path.glob $(boostbook-search-dirs) : boostbook* ] ;
+
+ # Try to find the tools in platform specific locations.
+ if [ os.name ] = NT
+ {
+ # If installed by the Boost installer.
+ local boost-root = ;
+
+ local boost-installer-versions = snapshot cvs 1.33.0 ;
+ local boost-consulting-installer-versions = 1.33.1 1.34.0 1.34.1 ;
+ local boostpro-installer-versions =
+ 1.35.0 1.36.0 1.37.0 1.38.0 1.39.0 1.40.0 1.41.0 1.42.0
+ 1.43.0 1.44.0 1.45.0 1.46.0 1.47.0 1.48.0 1.49.0 1.50.0 ;
+
+ local old-installer-root = [ find-boost-in-registry
+ Boost.org\\$(boost-installer-versions) ] ;
+
+ # Make sure that the most recent version is searched for first.
+ boost-root += [ sequence.reverse [ find-boost-in-registry
+ Boost-Consulting.com\\$(boost-consulting-installer-versions)
+ boostpro.com\\$(boostpro-installer-versions) ] ] ;
+
+ # Plausible locations.
+ local root = [ PWD ] ;
+ while $(root) != $(root:D) { root = $(root:D) ; }
+ root = [ path.make $(root) ] ;
+ local search-dirs ;
+ local docbook-search-dirs ;
+ for local p in $(boost-root)
+ {
+ search-dirs += [ path.join $(p) tools ] ;
+ }
+ for local p in $(old-installer-root)
+ {
+ search-dirs += [ path.join $(p) share ] ;
+ docbook-search-dirs += [ path.join $(p) share ] ;
+ }
+ search-dirs += [ path.join $(root) Boost tools ] ;
+ search-dirs += [ path.join $(root) Boost share ] ;
+ docbook-search-dirs += [ path.join $(root) Boost share ] ;
+
+ docbook-xsl-dir ?= [ path.glob $(docbook-search-dirs) : docbook-xsl* ] ;
+ docbook-dtd-dir ?= [ path.glob $(docbook-search-dirs) : docbook-xml* ] ;
+ boostbook-dir ?= [ path.glob $(search-dirs) : boostbook* ] ;
+ }
+ else
+ {
+ # Plausible locations.
+
+ local share = /usr/local/share /usr/share /opt/share /opt/local/share ;
+ local dtd-versions = 4.2 ;
+
+ docbook-xsl-dir ?= [ path.glob $(share) : docbook-xsl* ] ;
+ docbook-xsl-dir ?= [ path.glob $(share)/sgml/docbook : xsl-stylesheets ]
+ ;
+ docbook-xsl-dir ?= [ path.glob $(share)/xsl : docbook* ] ;
+
+ docbook-dtd-dir ?= [ path.glob $(share) : docbook-xml* ] ;
+ docbook-dtd-dir ?= [ path.glob $(share)/sgml/docbook :
+ xml-dtd-$(dtd-versions)* ] ;
+ docbook-dtd-dir ?= [ path.glob $(share)/xml/docbook : $(dtd-versions) ]
+ ;
+
+ boostbook-dir ?= [ path.glob $(share) : boostbook* ] ;
+
+ # Ubuntu Linux.
+ docbook-xsl-dir ?= [ path.glob /usr/share/xml/docbook/stylesheet :
+ nwalsh ] ;
+ docbook-dtd-dir ?= [ path.glob /usr/share/xml/docbook/schema/dtd :
+ $(dtd-versions) ] ;
+
+ # SUSE.
+ docbook-xsl-dir ?= [ path.glob /usr/share/xml/docbook/stylesheet/nwalsh
+ : current ] ;
+ }
+
+ if $(docbook-xsl-dir)
+ {
+ .docbook-xsl-dir = [ path.make $(docbook-xsl-dir[1]) ] ;
+ }
+ if $(docbook-dtd-dir)
+ {
+ .docbook-dtd-dir = [ path.make $(docbook-dtd-dir[1]) ] ;
+ }
+
+ if $(.debug-configuration)
+ {
+ ECHO notice: Boost.Book: searching XSL/DTD "in" ;
+ ECHO notice: [ sequence.transform path.native : $(boostbook-dir) ] ;
+ }
+ local boostbook-xsl-dir ;
+ for local dir in $(boostbook-dir)
+ {
+ boostbook-xsl-dir += [ path.glob $(dir) : xsl ] ;
+ }
+ local boostbook-dtd-dir ;
+ for local dir in $(boostbook-dir)
+ {
+ boostbook-dtd-dir += [ path.glob $(dir) : dtd ] ;
+ }
+ .boostbook-xsl-dir = $(boostbook-xsl-dir[1]) ;
+ .boostbook-dtd-dir = $(boostbook-dtd-dir[1]) ;
+
+ check-docbook-xsl-dir ;
+ check-docbook-dtd-dir ;
+ check-boostbook-xsl-dir ;
+ check-boostbook-dtd-dir ;
+}
+
+
+rule xsl-dir
+{
+ lock-config ;
+ return $(.boostbook-xsl-dir) ;
+}
+
+
+rule dtd-dir
+{
+ lock-config ;
+ return $(.boostbook-dtd-dir) ;
+}
+
+
+rule docbook-xsl-dir
+{
+ lock-config ;
+ return $(.docbook-xsl-dir) ;
+}
+
+
+rule docbook-dtd-dir
+{
+ lock-config ;
+ return $(.docbook-dtd-dir) ;
+}
+
+
+rule dtdxml-to-boostbook ( target : source : properties * )
+{
+ lock-config ;
+ xsltproc.xslt $(target) : $(source)
+ "$(.boostbook-xsl-dir)/dtd/dtd2boostbook.xsl" : $(properties) ;
+}
+
+
+rule boostbook-to-docbook ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/docbook.xsl ] ;
+ xsltproc.xslt $(target) : $(source) $(stylesheet) : $(properties) ;
+}
+
+
+rule docbook-to-onehtml ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/html-single.xsl ] ;
+ xsltproc.xslt $(target) : $(source) $(stylesheet) : $(properties) ;
+}
+
+
+rule docbook-to-htmldir ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/html.xsl ] ;
+ xsltproc.xslt-dir $(target) : $(source) $(stylesheet) : $(properties) : html
+ ;
+}
+
+
+rule docbook-to-xhtmldir ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/xhtml.xsl ] ;
+ xsltproc.xslt-dir $(target) : $(source) $(stylesheet) : $(properties) :
+ xhtml ;
+}
+
+
+rule docbook-to-htmlhelp ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/html-help.xsl ] ;
+ xsltproc.xslt-dir $(target) : $(source) $(stylesheet) : $(properties) :
+ htmlhelp ;
+}
+
+
+rule docbook-to-manpages ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/manpages.xsl ] ;
+ xsltproc.xslt-dir $(target) : $(source) $(stylesheet) : $(properties) : man
+ ;
+}
+
+
+rule docbook-to-fo ( target : source : properties * )
+{
+ lock-config ;
+ local stylesheet = [ path.native $(.boostbook-xsl-dir)/fo.xsl ] ;
+ xsltproc.xslt $(target) : $(source) $(stylesheet) : $(properties) ;
+}
+
+
+rule format-catalog-path ( path )
+{
+ local result = $(path) ;
+ if [ xsltproc.is-cygwin ]
+ {
+ if [ os.name ] = NT
+ {
+ drive = [ MATCH ^/(.):(.*)$ : $(path) ] ;
+ result = /cygdrive/$(drive[1])$(drive[2]) ;
+ }
+ }
+ else
+ {
+ if [ os.name ] = CYGWIN
+ {
+ local native-path = [ path.native $(path) ] ;
+ result = [ path.make $(native-path:W) ] ;
+ }
+ }
+ return [ regex.replace $(result) " " "%20" ] ;
+}
+
+
+rule generate-xml-catalog ( target : sources * : properties * )
+{
+ print.output $(target) ;
+
+ # BoostBook DTD catalog entry.
+ local boostbook-dtd-dir = [ boostbook.dtd-dir ] ;
+ if $(boostbook-dtd-dir)
+ {
+ boostbook-dtd-dir = [ format-catalog-path $(boostbook-dtd-dir) ] ;
+ }
+
+ print.text
+ "<?xml version=\"1.0\"?>"
+ "<!DOCTYPE catalog "
+ " PUBLIC \"-//OASIS/DTD Entity Resolution XML Catalog V1.0//EN\""
+ " \"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd\">"
+ "<catalog xmlns=\"urn:oasis:names:tc:entity:xmlns:xml:catalog\">"
+ " <rewriteURI uriStartString=\"http://www.boost.org/tools/boostbook/dtd/\" rewritePrefix=\"file://$(boostbook-dtd-dir)/\"/>"
+ : true ;
+
+ local docbook-xsl-dir = [ boostbook.docbook-xsl-dir ] ;
+ if ! $(docbook-xsl-dir)
+ {
+ ECHO "BoostBook warning: no DocBook XSL directory specified." ;
+ ECHO " If you have the DocBook XSL stylesheets installed, please " ;
+ ECHO " set DOCBOOK_XSL_DIR to the stylesheet directory on either " ;
+ ECHO " the command line (via -sDOCBOOK_XSL_DIR=...) or in a " ;
+ ECHO " Boost.Jam configuration file. The DocBook XSL stylesheets " ;
+ ECHO " are available here: http://docbook.sourceforge.net/ " ;
+ ECHO " Stylesheets will be downloaded on-the-fly (very slow!) " ;
+ }
+ else
+ {
+ docbook-xsl-dir = [ format-catalog-path $(docbook-xsl-dir) ] ;
+ print.text " <rewriteURI uriStartString=\"http://docbook.sourceforge.net/release/xsl/current/\" rewritePrefix=\"file://$(docbook-xsl-dir)/\"/>" ;
+ }
+
+ local docbook-dtd-dir = [ boostbook.docbook-dtd-dir ] ;
+ if ! $(docbook-dtd-dir)
+ {
+ ECHO "BoostBook warning: no DocBook DTD directory specified." ;
+ ECHO " If you have the DocBook DTD installed, please set " ;
+ ECHO " DOCBOOK_DTD_DIR to the DTD directory on either " ;
+ ECHO " the command line (via -sDOCBOOK_DTD_DIR=...) or in a " ;
+ ECHO " Boost.Jam configuration file. The DocBook DTD is available " ;
+ ECHO " here: http://www.oasis-open.org/docbook/xml/4.2/index.shtml" ;
+ ECHO " The DTD will be downloaded on-the-fly (very slow!) " ;
+ }
+ else
+ {
+ docbook-dtd-dir = [ format-catalog-path $(docbook-dtd-dir) ] ;
+ print.text " <rewriteURI uriStartString=\"http://www.oasis-open.org/docbook/xml/4.2/\" rewritePrefix=\"file://$(docbook-dtd-dir)/\"/>" ;
+ }
+
+ print.text "</catalog>" ;
+}
+
+
+# Returns information about the global XML catalog target, creating it lazily if
+# needed. To get the global catalog generated only once we do not create it in
+# every project that requests it but instead only create it based on the first
+# project requesting it and then reuse it from there for any later requests.
+#
+# To get 'as close as possible' to having the global catalog stored in the same
+# location independent of which folder our build was run from, we assign its
+# target to the given project's base Jamroot project. This works correctly as
+# long as we know the passed project is not standalone or one of Boost Build's
+# configuration module projects, as those to not have a Jamroot project in their
+# parent chain. Note also that we can still get our targets generated in
+# different folders in case when one build project references a target from
+# another build project with its own separate Jamroot.
+#
+# FIXME: Ideally the catalog target should be created as part of the boostbook
+# project and stored in some central location for all used standalone pojects,
+# shared between all builds made on that system. This however would require much
+# more though to add the necessary changes to Boost Build's internal design.
+#
+local rule xml-catalog ( project )
+{
+ if ! $(.xml-catalog)
+ {
+ local project-module = [ $(project).project-module ] ;
+ local root-module = [ project.get-jamroot-module $(project-module) ] ;
+ if ! $(root-module)
+ {
+ import errors ;
+ if [ project.is-config-module $(project-module) ]
+ {
+ errors.user-error boostbook targets can not be declared in Boost
+ Build's configuration modules. ;
+ }
+ else
+ {
+ errors.user-error boostbook targets can not be declared in
+ standalone projects. : use a Jamfile/Jamroot project
+ instead. ;
+ }
+ }
+ local root-project = [ project.target $(root-module) ] ;
+
+ .xml-catalog = [ virtual-target.register [ new file-target
+ boostbook_catalog : XML : $(root-project) : [ new action :
+ boostbook.generate-xml-catalog ] ] ] ;
+ .xml-catalog-file = [ $(.xml-catalog).path ] [ $(.xml-catalog).name ] ;
+ .xml-catalog-file = $(.xml-catalog-file:J=/) ;
+ }
+ return $(.xml-catalog) $(.xml-catalog-file) ;
+}
+
+
+class boostbook-target-class : basic-target
+{
+ import generators ;
+ import property-set ;
+ import virtual-target ;
+
+ rule construct ( name : sources * : property-set )
+ {
+ # Generate the catalog, but only once.
+ IMPORT boostbook : xml-catalog : $(__name__) : boostbook.xml-catalog ;
+ local global-catalog = [ boostbook.xml-catalog [ project ] ] ;
+ local catalog = $(global-catalog[1]) ;
+ local catalog-file = $(global-catalog[2]) ;
+ local targets ;
+
+ # Add the catalog to the property set.
+ property-set = [ $(property-set).add-raw <catalog>$(catalog-file) ] ;
+
+ local type = none ;
+ local manifest ;
+ local format = [ $(property-set).get <format> ] ;
+ switch $(format)
+ {
+ case html : type = HTMLDIR ; manifest = HTML.manifest ;
+ case xhtml : type = XHTMLDIR ; manifest = HTML.manifest ;
+ case htmlhelp : type = HTMLHELP ; manifest = HTML.manifest ;
+ case onehtml : type = HTML ;
+ case man : type = MANPAGES ; manifest = man.manifest ;
+ case docbook : type = DOCBOOK ;
+ case fo : type = FO ;
+ case pdf : type = PDF ;
+ case ps : type = PS ;
+ case tests : type = TESTS ;
+ }
+
+ local target ;
+ if $(manifest)
+ {
+ # Sources --> DOCBOOK.
+ local docbook-target = [ generators.construct [ project ] : DOCBOOK
+ : $(property-set) : $(sources) ] ;
+ docbook-target = $(docbook-target[2]) ;
+ $(docbook-target).depends $(catalog) ;
+
+ # DOCBOOK --> type.
+ target = [ generators.construct [ project ] $(name)_$(manifest) :
+ $(type) : [ $(property-set).add-raw
+ <xsl:param>manifest=$(name)_$(manifest) ] : $(docbook-target) ]
+ ;
+ target = $(target[2]) ;
+ local name = [ $(property-set).get <name> ] ;
+ name ?= $(format) ;
+ $(target).set-path $(name) ;
+ }
+ else
+ {
+ # Sources --> type.
+ target = [ generators.construct [ project ] : $(type) :
+ $(property-set) : $(sources) ] ;
+ target = $(target[2]) ;
+ if ! $(target)
+ {
+ import errors ;
+ errors.error Cannot build documentation type '$(format)'. ;
+ }
+ }
+ $(target).depends $(catalog) ;
+
+ return [ property-set.empty ] $(target) ;
+ }
+}
+
+
+# Declare a boostbook target.
+#
+rule boostbook ( target-name : sources * : requirements * : default-build * )
+{
+ return [ targets.create-metatarget boostbook-target-class :
+ [ project.current ] : $(target-name) : $(sources) : $(requirements) :
+ $(default-build) ] ;
+}
+
+
+rule boostbook-to-tests ( target : source : properties * )
+{
+ lock-config ;
+ local boost_root = [ modules.peek : BOOST_ROOT ] ;
+ local native-path = [ path.native [ path.join $(.boostbook-xsl-dir) testing
+ Jamfile ] ] ;
+ local stylesheet = $(native-path:S=.xsl) ;
+ xsltproc.xslt $(target) : $(source) $(stylesheet) : $(properties)
+ <xsl:param>boost.root=$(boost_root) ;
+}
+
+
+#############################################################################
+# Dependency scanners
+#############################################################################
+# XInclude scanner. Mostly stolen from c-scanner. :)
+# Note that this assumes an "xi" prefix for XIncludes. This is not always the
+# case for XML documents, but we assume it is true for anything we encounter.
+#
+class xinclude-scanner : scanner
+{
+ import scanner ;
+
+ rule __init__ ( includes * )
+ {
+ scanner.__init__ ;
+ self.includes = $(includes) ;
+ }
+
+ rule pattern ( )
+ {
+ return "xi:include[ ]*href=\"([^\"]*)\"" ;
+ }
+
+ rule process ( target : matches * : binding )
+ {
+ local target_path = [ NORMALIZE_PATH $(binding:D) ] ;
+
+ NOCARE $(matches) ;
+ INCLUDES $(target) : $(matches) ;
+ SEARCH on $(matches) = $(target_path) $(self.includes:G=) ;
+
+ scanner.propagate $(__name__) : $(matches) : $(target) ;
+ }
+}
+
+scanner.register xinclude-scanner : xsl:path ;
+type.set-scanner XML : xinclude-scanner ;
diff --git a/src/kenlm/jam-files/boost-build/tools/borland.jam b/src/kenlm/jam-files/boost-build/tools/borland.jam
new file mode 100644
index 0000000..6e43ca9
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/borland.jam
@@ -0,0 +1,220 @@
+# Copyright 2005 Dave Abrahams
+# Copyright 2003 Rene Rivera
+# Copyright 2003, 2004, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Support for the Borland's command line compiler
+
+import property ;
+import generators ;
+import os ;
+import toolset : flags ;
+import feature : get-values ;
+import type ;
+import common ;
+
+feature.extend toolset : borland ;
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters borland :
+ version $(version) ] ;
+
+ local command = [ common.get-invocation-command borland : bcc32.exe
+ : $(command) ] ;
+
+ common.handle-options borland : $(condition) : $(command) : $(options) ;
+
+ if $(command)
+ {
+ command = [ common.get-absolute-tool-path $(command[-1]) ] ;
+ }
+ root = $(command:D) ;
+
+ flags borland.compile STDHDRS $(condition) : $(root)/include/ ;
+ flags borland.link STDLIBPATH $(condition) : $(root)/lib ;
+ flags borland.link RUN_PATH $(condition) : $(root)/bin ;
+ flags borland .root $(condition) : $(root)/bin/ ;
+}
+
+
+# A borland-specific target type
+type.register BORLAND.TDS : tds ;
+
+# Declare generators
+
+generators.register-linker borland.link : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : EXE : <toolset>borland ;
+generators.register-linker borland.link.dll : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : SHARED_LIB IMPORT_LIB : <toolset>borland ;
+
+generators.register-archiver borland.archive : OBJ : STATIC_LIB : <toolset>borland ;
+generators.register-c-compiler borland.compile.c++ : CPP : OBJ : <toolset>borland ;
+generators.register-c-compiler borland.compile.c : C : OBJ : <toolset>borland ;
+generators.register-standard borland.asm : ASM : OBJ : <toolset>borland ;
+
+# Declare flags
+
+flags borland.compile OPTIONS <debug-symbols>on : -v ;
+flags borland.link OPTIONS <debug-symbols>on : -v ;
+
+flags borland.compile OPTIONS <optimization>off : -Od ;
+flags borland.compile OPTIONS <optimization>speed : -O2 ;
+flags borland.compile OPTIONS <optimization>space : -O1 ;
+
+if $(.BORLAND_HAS_FIXED_INLINING_BUGS)
+{
+ flags borland CFLAGS <inlining>off : -vi- ;
+ flags borland CFLAGS <inlining>on : -vi -w-inl ;
+ flags borland CFLAGS <inlining>full : -vi -w-inl ;
+}
+else
+{
+ flags borland CFLAGS : -vi- ;
+}
+
+flags borland.compile OPTIONS <warnings>off : -w- ;
+flags borland.compile OPTIONS <warnings>all : -w ;
+flags borland.compile OPTIONS <warnings-as-errors>on : -w! ;
+
+
+# Deal with various runtime configs...
+
+# This should be not for DLL
+flags borland OPTIONS <user-interface>console : -tWC ;
+
+# -tWR sets -tW as well, so we turn it off here and then turn it
+# on again later if we need it:
+flags borland OPTIONS <runtime-link>shared : -tWR -tWC ;
+flags borland OPTIONS <user-interface>gui : -tW ;
+
+flags borland OPTIONS <main-target-type>LIB/<link>shared : -tWD ;
+# Hmm.. not sure what's going on here.
+flags borland OPTIONS : -WM- ;
+flags borland OPTIONS <threading>multi : -tWM ;
+
+
+
+flags borland.compile OPTIONS <cxxflags> ;
+flags borland.compile DEFINES <define> ;
+flags borland.compile INCLUDES <include> ;
+
+flags borland NEED_IMPLIB <main-target-type>LIB/<link>shared : "" ;
+
+#
+# for C++ compiles the following options are turned on by default:
+#
+# -j5 stops after 5 errors
+# -g255 allow an unlimited number of warnings
+# -q no banner
+# -c compile to object
+# -P C++ code regardless of file extention
+# -a8 8 byte alignment, this option is on in the IDE by default
+# and effects binary compatibility.
+#
+
+# -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o"$(<)" "$(>)"
+
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" -j5 -g255 -q -c -P -a8 -Vx- -Ve- -b- $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -I"$(STDHDRS)" -o"$(<)" "$(>)"
+}
+
+# For C, we don't pass -P flag
+actions compile.c
+{
+ "$(CONFIG_COMMAND)" -j5 -g255 -q -c -a8 -Vx- -Ve- -b- $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -I"$(STDHDRS)" -o"$(<)" "$(>)"
+}
+
+
+# Declare flags and action for linking
+toolset.flags borland.link OPTIONS <debug-symbols>on : -v ;
+toolset.flags borland.link LIBRARY_PATH <library-path> ;
+toolset.flags borland.link FINDLIBS_ST <find-static-library> ;
+toolset.flags borland.link FINDLIBS_SA <find-shared-library> ;
+toolset.flags borland.link LIBRARIES <library-file> ;
+
+flags borland.link OPTIONS <linkflags> ;
+flags borland.link OPTIONS <link>shared : -tWD ;
+
+flags borland.link LIBRARY_PATH_OPTION <toolset>borland : -L : unchecked ;
+flags borland.link LIBRARY_OPTION <toolset>borland : "" : unchecked ;
+
+
+
+# bcc32 needs to have ilink32 in the path in order to invoke it, so explicitly
+# specifying $(BCC_TOOL_PATH)bcc32 doesn't help. You need to add
+# $(BCC_TOOL_PATH) to the path
+# The NEED_IMPLIB variable controls whether we need to invoke implib.
+
+flags borland.archive AROPTIONS <archiveflags> ;
+
+# Declare action for archives. We don't use response file
+# since it's hard to get "+-" there.
+# The /P256 increases 'page' size -- with too low
+# values tlib fails when building large applications.
+# CONSIDER: don't know what 'together' is for...
+actions updated together piecemeal archive
+{
+ $(.set-path)$(.root:W)$(.old-path)
+ tlib $(AROPTIONS) /P256 /u /a /C "$(<:W)" +-"$(>:W)"
+}
+
+
+if [ os.name ] = CYGWIN
+{
+ .set-path = "cmd /S /C set \"PATH=" ;
+ .old-path = ";%PATH%\" \"&&\"" ;
+
+
+ # Couldn't get TLIB to stop being confused about pathnames
+ # containing dashes (it seemed to treat them as option separators
+ # when passed through from bash), so we explicitly write the
+ # command into a .bat file and execute that. TLIB is also finicky
+ # about pathname style! Forward slashes, too, are treated as
+ # options.
+ actions updated together piecemeal archive
+ {
+ chdir $(<:D)
+ echo +-$(>:BS) > $(<:BS).rsp
+ $(.set-path)$(.root)$(.old-path) "tlib.exe" $(AROPTIONS) /P256 /C $(<:BS) @$(<:BS).rsp && $(RM) $(<:BS).rsp
+ }
+}
+else if [ os.name ] = NT
+{
+ .set-path = "set \"PATH=" ;
+ .old-path = ";%PATH%\"
+ " ;
+}
+else
+{
+ .set-path = "PATH=\"" ;
+ .old-path = "\":$PATH
+ export PATH
+ " ;
+}
+
+RM = [ common.rm-command ] ;
+
+nl = "
+" ;
+
+actions link
+{
+ $(.set-path)$(.root:W)$(.old-path) "$(CONFIG_COMMAND)" -v -q $(OPTIONS) -L"$(LIBRARY_PATH:W)" -L"$(STDLIBPATH:W)" -e"$(<[1]:W)" @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)$(LIBRARIES) $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST:S=.lib)" $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA:S=.lib)")"
+}
+
+
+actions link.dll bind LIBRARIES RSP
+{
+ $(.set-path)$(.root:W)$(.old-path) "$(CONFIG_COMMAND)" -v -q $(OPTIONS) -L"$(LIBRARY_PATH:W)" -L"$(STDLIBPATH:W)" -e"$(<[1]:W)" @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)$(LIBRARIES) $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST:S=.lib)" $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA:S=.lib)")" && "$(.root)implib" "$(<[2]:W)" "$(<[1]:W)"
+}
+
+# It seems impossible to specify output file with directory when compiling
+# asm files using bcc32, so use tasm32 directly.
+# /ml makes all symbol names case-sensitive
+actions asm
+{
+ $(.set-path)$(.root:W)$(.old-path) tasm32.exe /ml "$(>)" "$(<)"
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/builtin.jam b/src/kenlm/jam-files/boost-build/tools/builtin.jam
new file mode 100644
index 0000000..01c82f1
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/builtin.jam
@@ -0,0 +1,974 @@
+# Copyright 2002, 2003, 2004, 2005 Dave Abrahams
+# Copyright 2002, 2005, 2006, 2007, 2010 Rene Rivera
+# Copyright 2006 Juergen Hunold
+# Copyright 2005 Toon Knapen
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Defines standard features and rules.
+
+import alias ;
+import "class" : new ;
+import errors ;
+import feature ;
+import generators ;
+import numbers ;
+import os ;
+import path ;
+import print ;
+import project ;
+import property ;
+import regex ;
+import scanner ;
+import sequence ;
+import stage ;
+import symlink ;
+import toolset ;
+import type ;
+import targets ;
+import types/register ;
+import utility ;
+import virtual-target ;
+import message ;
+import convert ;
+
+# FIXME: the following generate module import is not needed here but removing it
+# too hastly will break using code (e.g. the main Boost library Jamroot file)
+# that forgot to import the generate module before calling the generate rule.
+import generate ;
+
+
+.os-names = aix bsd cygwin darwin freebsd hpux iphone linux netbsd openbsd osf
+ qnx qnxnto sgi solaris unix unixware windows
+ elf # Not actually an OS -- used for targeting bare metal where object
+ # format is ELF. This catches both -elf and -eabi gcc targets and well
+ # as other compilers targeting ELF. It is not clear how often we need
+ # the 'elf' key as opposed to other bare metal targets, but let us
+ # stick with gcc naming.
+ ;
+
+# Feature used to determine which OS we're on. New <target-os> and <host-os>
+# features should be used instead.
+local os = [ modules.peek : OS ] ;
+feature.feature os : $(os) : propagated link-incompatible ;
+
+
+# Translates from bjam current OS to the os tags used in host-os and target-os,
+# i.e. returns the running host-os.
+#
+local rule default-host-os ( )
+{
+ local host-os ;
+ if [ os.name ] in $(.os-names:U)
+ {
+ host-os = [ os.name ] ;
+ }
+ else
+ {
+ switch [ os.name ]
+ {
+ case NT : host-os = windows ;
+ case AS400 : host-os = unix ;
+ case MINGW : host-os = windows ;
+ case BSDI : host-os = bsd ;
+ case COHERENT : host-os = unix ;
+ case DRAGONFLYBSD : host-os = bsd ;
+ case IRIX : host-os = sgi ;
+ case MACOSX : host-os = darwin ;
+ case KFREEBSD : host-os = freebsd ;
+ case LINUX : host-os = linux ;
+ case SUNOS :
+ ECHO "SunOS is not a supported operating system." ;
+ ECHO "We believe last version of SunOS was released in 1992, " ;
+ ECHO "so if you get this message, something is very wrong with configuration logic. " ;
+ ECHO "Please report this as a bug. " ;
+ EXIT ;
+ case * : host-os = unix ;
+ }
+ }
+ return $(host-os:L) ;
+}
+
+
+# The two OS features define a known set of abstract OS names. The host-os is
+# the OS under which bjam is running. Even though this should really be a fixed
+# property we need to list all the values to prevent unknown value errors. Both
+# set the default value to the current OS to account for the default use case of
+# building on the target OS.
+feature.feature host-os : $(.os-names) ;
+feature.set-default host-os : [ default-host-os ] ;
+
+feature.feature target-os : $(.os-names) : propagated link-incompatible ;
+feature.set-default target-os : [ default-host-os ] ;
+
+
+feature.feature toolset : : implicit propagated symmetric ;
+feature.feature stdlib : native : propagated composite ;
+feature.feature link : shared static : propagated ;
+feature.feature runtime-link : shared static : propagated ;
+feature.feature runtime-debugging : on off : propagated ;
+feature.feature optimization : off speed space none : propagated ;
+feature.feature profiling : off on : propagated ;
+feature.feature inlining : off on full : propagated ;
+feature.feature threading : single multi : propagated ;
+feature.feature rtti : on off : propagated ;
+feature.feature exception-handling : on off : propagated ;
+
+# Whether there is support for asynchronous EH (e.g. catching SEGVs).
+feature.feature asynch-exceptions : off on : propagated ;
+
+# Whether all extern "C" functions are considered nothrow by default.
+feature.feature extern-c-nothrow : off on : propagated ;
+
+feature.feature debug-symbols : on off none : propagated ;
+# Controls whether the binary should be stripped -- that is have
+# everything not necessary to running removed. This option should
+# not be very often needed. Also, this feature will show up in
+# target paths of everything, not just binaries. Should fix that
+# when impelementing feature relevance.
+feature.feature strip : off on : propagated ;
+feature.feature define : : free ;
+feature.feature undef : : free ;
+feature.feature "include" : : free path ; #order-sensitive ;
+feature.feature cflags : : free ;
+feature.feature cxxflags : : free ;
+feature.feature fflags : : free ;
+feature.feature asmflags : : free ;
+feature.feature linkflags : : free ;
+feature.feature archiveflags : : free ;
+feature.feature version : : free ;
+
+# Generic, i.e. non-language specific, flags for tools.
+feature.feature flags : : free ;
+feature.feature location-prefix : : free ;
+
+
+# The following features are incidental since they have no effect on built
+# products. Not making them incidental will result in problems in corner cases,
+# e.g.:
+#
+# unit-test a : a.cpp : <use>b ;
+# lib b : a.cpp b ;
+#
+# Here, if <use> is not incidental, we would decide we have two targets for
+# a.obj with different properties and complain about it.
+#
+# Note that making a feature incidental does not mean it is ignored. It may be
+# ignored when creating a virtual target, but the rest of build process will use
+# them.
+feature.feature use : : free dependency incidental ;
+feature.feature dependency : : free dependency incidental ;
+feature.feature implicit-dependency : : free dependency incidental ;
+
+feature.feature warnings :
+ on # Enable default/"reasonable" warning level for the tool.
+ all # Enable all possible warnings issued by the tool.
+ off # Disable all warnings issued by the tool.
+ : incidental propagated ;
+
+feature.feature warnings-as-errors :
+ off # Do not fail the compilation if there are warnings.
+ on # Fail the compilation if there are warnings.
+ : incidental propagated ;
+
+# Feature that allows us to configure the maximal template instantiation depth
+# level allowed by a C++ compiler. Applies only to C++ toolsets whose compilers
+# actually support this configuration setting.
+#
+# Note that Boost Build currently does not allow defining features that take any
+# positive integral value as a parameter, which is what we need here, so we just
+# define some of the values here and leave it up to the user to extend this set
+# as he needs using the feature.extend rule.
+#
+# TODO: This should be upgraded as soon as Boost Build adds support for custom
+# validated feature values or at least features allowing any positive integral
+# value. See related Boost Build related trac ticket #194.
+#
+feature.feature c++-template-depth
+ :
+ [ numbers.range 64 1024 : 64 ]
+ [ numbers.range 20 1000 : 10 ]
+ # Maximum template instantiation depth guaranteed for ANSI/ISO C++
+ # conforming programs.
+ 17
+ :
+ incidental optional propagated ;
+
+feature.feature source : : free dependency incidental ;
+feature.feature library : : free dependency incidental ;
+feature.feature file : : free dependency incidental ;
+feature.feature find-shared-library : : free ; #order-sensitive ;
+feature.feature find-static-library : : free ; #order-sensitive ;
+feature.feature library-path : : free path ; #order-sensitive ;
+
+# Internal feature.
+feature.feature library-file : : free dependency ;
+
+feature.feature name : : free ;
+feature.feature tag : : free ;
+feature.feature search : : free path ; #order-sensitive ;
+feature.feature location : : free path ;
+feature.feature dll-path : : free path ;
+feature.feature hardcode-dll-paths : true false : incidental ;
+
+
+# An internal feature that holds the paths of all dependency shared libraries.
+# On Windows, it is needed so that we can add all those paths to PATH when
+# running applications. On Linux, it is needed to add proper -rpath-link command
+# line options.
+feature.feature xdll-path : : free path ;
+
+# Provides means to specify def-file for windows DLLs.
+feature.feature def-file : : free dependency ;
+
+feature.feature suppress-import-lib : false true : incidental ;
+
+# Internal feature used to store the name of a bjam action to call when building
+# a target.
+feature.feature action : : free ;
+
+# This feature is used to allow specific generators to run. For example, QT
+# tools can only be invoked when QT library is used. In that case, <allow>qt
+# will be in usage requirement of the library.
+feature.feature allow : : free ;
+
+# The addressing model to generate code for. Currently a limited set only
+# specifying the bit size of pointers.
+feature.feature address-model : 16 32 64 32_64 : propagated optional ;
+
+# Type of CPU architecture to compile for.
+feature.feature architecture :
+ # x86 and x86-64
+ x86
+
+ # ia64
+ ia64
+
+ # Sparc
+ sparc
+
+ # RS/6000 & PowerPC
+ power
+
+ # MIPS/SGI
+ mips1 mips2 mips3 mips4 mips32 mips32r2 mips64
+
+ # HP/PA-RISC
+ parisc
+
+ # Advanced RISC Machines
+ arm
+
+ # Combined architectures for platforms/toolsets that support building for
+ # multiple architectures at once. "combined" would be the default multi-arch
+ # for the toolset.
+ combined
+ combined-x86-power
+
+ : propagated optional ;
+
+# The specific instruction set in an architecture to compile.
+feature.feature instruction-set :
+ # x86 and x86-64
+ native i486 i586 i686 pentium pentium-mmx pentiumpro pentium2 pentium3
+ pentium3m pentium-m pentium4 pentium4m prescott nocona core2 corei7 corei7-avx core-avx-i
+ conroe conroe-xe conroe-l allendale merom merom-xe kentsfield kentsfield-xe penryn wolfdale
+ yorksfield nehalem sandy-bridge ivy-bridge haswell k6 k6-2 k6-3 athlon athlon-tbird athlon-4 athlon-xp
+ athlon-mp k8 opteron athlon64 athlon-fx k8-sse3 opteron-sse3 athlon64-sse3 amdfam10 barcelona
+ bdver1 bdver2 bdver3 btver1 btver2 winchip-c6 winchip2 c3 c3-2 atom
+
+ # ia64
+ itanium itanium1 merced itanium2 mckinley
+
+ # Sparc
+ v7 cypress v8 supersparc sparclite hypersparc sparclite86x f930 f934
+ sparclet tsc701 v9 ultrasparc ultrasparc3
+
+ # RS/6000 & PowerPC
+ 401 403 405 405fp 440 440fp 505 601 602 603 603e 604 604e 620 630 740 7400
+ 7450 750 801 821 823 860 970 8540 power-common ec603e g3 g4 g5 power power2
+ power3 power4 power5 powerpc powerpc64 rios rios1 rsc rios2 rs64a
+
+ # MIPS
+ 4kc 4kp 5kc 20kc m4k r2000 r3000 r3900 r4000 r4100 r4300 r4400 r4600 r4650
+ r6000 r8000 rm7000 rm9000 orion sb1 vr4100 vr4111 vr4120 vr4130 vr4300
+ vr5000 vr5400 vr5500
+
+ # HP/PA-RISC
+ 700 7100 7100lc 7200 7300 8000
+
+ # Advanced RISC Machines
+ armv2 armv2a armv3 armv3m armv4 armv4t armv5 armv5t armv5te armv6 armv6j iwmmxt ep9312
+ armv7 armv7s
+
+ : propagated optional ;
+
+# Used to select a specific variant of C++ ABI if the compiler supports several.
+feature.feature c++abi : : propagated optional ;
+
+feature.feature conditional : : incidental free ;
+
+# The value of 'no' prevents building of a target.
+feature.feature build : yes no : optional ;
+
+# Windows-specific features
+
+feature.feature user-interface : console gui wince native auto ;
+
+feature.feature variant : : implicit composite propagated symmetric ;
+
+
+# Declares a new variant.
+#
+# First determines explicit properties for this variant, by refining parents'
+# explicit properties with the passed explicit properties. The result is
+# remembered and will be used if this variant is used as parent.
+#
+# Second, determines the full property set for this variant by adding to the
+# explicit properties default values for all missing non-symmetric properties.
+#
+# Lastly, makes appropriate value of 'variant' property expand to the full
+# property set.
+#
+rule variant ( name # Name of the variant
+ : parents-or-properties * # Specifies parent variants, if
+ # 'explicit-properties' are given, and
+ # explicit-properties or parents otherwise.
+ : explicit-properties * # Explicit properties.
+ )
+{
+ local parents ;
+ if ! $(explicit-properties)
+ {
+ if $(parents-or-properties[1]:G)
+ {
+ explicit-properties = $(parents-or-properties) ;
+ }
+ else
+ {
+ parents = $(parents-or-properties) ;
+ }
+ }
+ else
+ {
+ parents = $(parents-or-properties) ;
+ }
+
+ # The problem is that we have to check for conflicts between base variants.
+ if $(parents[2])
+ {
+ errors.error "multiple base variants are not yet supported" ;
+ }
+
+ local inherited ;
+ # Add explicitly specified properties for parents.
+ for local p in $(parents)
+ {
+ # TODO: This check may be made stricter.
+ if ! [ feature.is-implicit-value $(p) ]
+ {
+ errors.error "Invalid base variant" $(p) ;
+ }
+
+ inherited += $(.explicit-properties.$(p)) ;
+ }
+ property.validate $(explicit-properties) ;
+ explicit-properties = [ property.refine $(inherited)
+ : $(explicit-properties) ] ;
+
+ # Record explicitly specified properties for this variant. We do this after
+ # inheriting parents' properties so they affect other variants derived from
+ # this one.
+ .explicit-properties.$(name) = $(explicit-properties) ;
+
+ feature.extend variant : $(name) ;
+ feature.compose <variant>$(name) : $(explicit-properties) ;
+}
+IMPORT $(__name__) : variant : : variant ;
+
+
+variant debug : <optimization>off <debug-symbols>on <inlining>off
+ <runtime-debugging>on ;
+variant release : <optimization>speed <debug-symbols>off <inlining>full
+ <runtime-debugging>off <define>NDEBUG ;
+variant profile : release : <profiling>on <debug-symbols>on ;
+
+
+class searched-lib-target : abstract-file-target
+{
+ rule __init__ ( name
+ : project
+ : shared ?
+ : search *
+ : action
+ )
+ {
+ abstract-file-target.__init__ $(name) : SEARCHED_LIB : $(project)
+ : $(action) : ;
+
+ self.shared = $(shared) ;
+ self.search = $(search) ;
+ }
+
+ rule shared ( )
+ {
+ return $(self.shared) ;
+ }
+
+ rule search ( )
+ {
+ return $(self.search) ;
+ }
+
+ rule actualize-location ( target )
+ {
+ NOTFILE $(target) ;
+ }
+
+ rule path ( )
+ {
+ }
+}
+
+
+# The generator class for libraries (target type LIB). Depending on properties
+# it will request building of the appropriate specific library type --
+# -- SHARED_LIB, STATIC_LIB or SHARED_LIB.
+#
+class lib-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8)
+ : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) :
+ $(17) : $(18) : $(19) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ # The lib generator is composing, and can be only invoked with an
+ # explicit name. This check is present in generator.run (and so in
+ # builtin.linking-generator) but duplicated here to avoid doing extra
+ # work.
+ if $(name)
+ {
+ local properties = [ $(property-set).raw ] ;
+ # Determine the needed target type.
+ local actual-type ;
+ # <source>files can be generated by <conditional>@rule feature
+ # in which case we do not consider it a SEARCHED_LIB type.
+ if ! <source> in $(properties:G) &&
+ ( <search> in $(properties:G) || <name> in $(properties:G) )
+ {
+ actual-type = SEARCHED_LIB ;
+ }
+ else if <file> in $(properties:G)
+ {
+ actual-type = LIB ;
+ }
+ else if <link>shared in $(properties)
+ {
+ actual-type = SHARED_LIB ;
+ }
+ else
+ {
+ actual-type = STATIC_LIB ;
+ }
+ property-set = [ $(property-set).add-raw <main-target-type>LIB ] ;
+ # Construct the target.
+ return [ generators.construct $(project) $(name) : $(actual-type)
+ : $(property-set) : $(sources) ] ;
+ }
+ }
+
+ rule viable-source-types ( )
+ {
+ return * ;
+ }
+}
+
+
+generators.register [ new lib-generator builtin.lib-generator : : LIB ] ;
+
+
+# The implementation of the 'lib' rule. Beyond standard syntax that rule allows
+# simplified: "lib a b c ;".
+#
+rule lib ( names + : sources * : requirements * : default-build * :
+ usage-requirements * )
+{
+ if $(names[2])
+ {
+ if <name> in $(requirements:G)
+ {
+ errors.user-error "When several names are given to the 'lib' rule" :
+ "it is not allowed to specify the <name> feature." ;
+ }
+ if $(sources)
+ {
+ errors.user-error "When several names are given to the 'lib' rule" :
+ "it is not allowed to specify sources." ;
+ }
+ }
+
+ # This is a circular module dependency so it must be imported here.
+ import targets ;
+
+ local project = [ project.current ] ;
+ local result ;
+
+ for local name in $(names)
+ {
+ local r = $(requirements) ;
+ # Support " lib a ; " and " lib a b c ; " syntax.
+ if ! $(sources) && ! <name> in $(requirements:G)
+ && ! <file> in $(requirements:G)
+ {
+ r += <name>$(name) ;
+ }
+ result += [ targets.main-target-alternative
+ [ new typed-target $(name) : $(project) : LIB
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(r) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) : $(project) ]
+ ] ] ;
+ }
+ return $(result) ;
+}
+IMPORT $(__name__) : lib : : lib ;
+
+
+class searched-lib-generator : generator
+{
+ import property-set ;
+
+ rule __init__ ( )
+ {
+ # The requirements cause the generators to be tried *only* when we are
+ # building a lib target with a 'search' feature. This seems ugly --- all
+ # we want is to make sure searched-lib-generator is not invoked deep
+ # inside transformation search to produce intermediate targets.
+ generator.__init__ searched-lib-generator : : SEARCHED_LIB ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if $(name)
+ {
+ # If 'name' is empty, it means we have not been called to build a
+ # top-level target. In this case, we just fail immediately, because
+ # searched-lib-generator cannot be used to produce intermediate
+ # targets.
+
+ local properties = [ $(property-set).raw ] ;
+ local shared ;
+ if <link>shared in $(properties)
+ {
+ shared = true ;
+ }
+
+ local search = [ feature.get-values <search> : $(properties) ] ;
+
+ local a = [ new null-action $(property-set) ] ;
+ local lib-name = [ feature.get-values <name> : $(properties) ] ;
+ lib-name ?= $(name) ;
+ local t = [ new searched-lib-target $(lib-name) : $(project)
+ : $(shared) : $(search) : $(a) ] ;
+ # We return sources for a simple reason. If there is
+ # lib png : z : <name>png ;
+ # the 'z' target should be returned, so that apps linking to 'png'
+ # will link to 'z', too.
+ return [ property-set.create <xdll-path>$(search) ]
+ [ virtual-target.register $(t) ] $(sources) ;
+ }
+ }
+}
+
+generators.register [ new searched-lib-generator ] ;
+
+
+class prebuilt-lib-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8)
+ : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) :
+ $(17) : $(18) : $(19) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ local f = [ $(property-set).get <file> ] ;
+ return $(f) $(sources) ;
+ }
+}
+
+generators.register
+ [ new prebuilt-lib-generator builtin.prebuilt : : LIB : <file> ] ;
+
+generators.override builtin.prebuilt : builtin.lib-generator ;
+
+class preprocessed-target-class : basic-target
+{
+ import generators ;
+ rule construct ( name : sources * : property-set )
+ {
+ local result = [ generators.construct [ project ]
+ $(name) : PREPROCESSED_CPP : $(property-set) : $(sources) ] ;
+ if ! $(result)
+ {
+ result = [ generators.construct [ project ]
+ $(name) : PREPROCESSED_C : $(property-set) : $(sources) ] ;
+ }
+ if ! $(result)
+ {
+ local s ;
+ for x in $(sources)
+ {
+ s += [ $(x).name ] ;
+ }
+ local p = [ project ] ;
+ errors.user-error
+ "In project" [ $(p).name ] :
+ "Could not construct preprocessed file \"$(name)\" from $(s:J=, )." ;
+ }
+ return $(result) ;
+ }
+}
+
+rule preprocessed ( name : sources * : requirements * : default-build * :
+ usage-requirements * )
+{
+ local project = [ project.current ] ;
+ return [ targets.main-target-alternative
+ [ new preprocessed-target-class $(name) : $(project)
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) : $(project) ]
+ ] ] ;
+}
+
+IMPORT $(__name__) : preprocessed : : preprocessed ;
+
+class compile-action : action
+{
+ import sequence ;
+
+ rule __init__ ( targets * : sources * : action-name : properties * )
+ {
+ action.__init__ $(targets) : $(sources) : $(action-name) : $(properties) ;
+ }
+
+ # For all virtual targets for the same dependency graph as self, i.e. which
+ # belong to the same main target, add their directories to the include path.
+ #
+ rule adjust-properties ( property-set )
+ {
+ local s = [ $(self.targets[1]).creating-subvariant ] ;
+ if $(s)
+ {
+ return [ $(property-set).add-raw
+ [ $(s).implicit-includes "include" : H ] ] ;
+ }
+ else
+ {
+ return $(property-set) ;
+ }
+ }
+}
+
+
+# Declare a special compiler generator. The only thing it does is changing the
+# type used to represent 'action' in the constructed dependency graph to
+# 'compile-action'. That class in turn adds additional include paths to handle
+# cases when a source file includes headers which are generated themselves.
+#
+class C-compiling-generator : generator
+{
+ rule __init__ ( id : source-types + : target-types + : requirements *
+ : optional-properties * )
+ {
+ generator.__init__ $(id) : $(source-types) : $(target-types) :
+ $(requirements) : $(optional-properties) ;
+ }
+
+ rule action-class ( )
+ {
+ return compile-action ;
+ }
+}
+
+
+rule register-c-compiler ( id : source-types + : target-types + : requirements *
+ : optional-properties * )
+{
+ generators.register [ new C-compiling-generator $(id) : $(source-types) :
+ $(target-types) : $(requirements) : $(optional-properties) ] ;
+}
+
+# FIXME: this is ugly, should find a better way (we would like client code to
+# register all generators as "generators.some-rule" instead of
+# "some-module.some-rule".)
+#
+IMPORT $(__name__) : register-c-compiler : : generators.register-c-compiler ;
+
+
+# The generator class for handling EXE and SHARED_LIB creation.
+#
+class linking-generator : generator
+{
+ import path ;
+ import project ;
+ import property-set ;
+ import type ;
+
+ rule __init__ ( id
+ composing ? : # The generator will be composing if a non-empty
+ # string is passed or the parameter is not given. To
+ # make the generator non-composing, pass an empty
+ # string ("").
+ source-types + :
+ target-types + :
+ requirements * )
+ {
+ composing ?= true ;
+ generator.__init__ $(id) $(composing) : $(source-types)
+ : $(target-types) : $(requirements) ;
+ }
+
+ rule run ( project name ? : property-set : sources + )
+ {
+ sources += [ $(property-set).get <library> ] ;
+
+ # Add <library-path> properties for all searched libraries.
+ local extra ;
+ for local s in $(sources)
+ {
+ if [ $(s).type ] = SEARCHED_LIB
+ {
+ local search = [ $(s).search ] ;
+ extra += <library-path>$(search) ;
+ }
+ }
+
+ # It is possible that sources include shared libraries that did not came
+ # from 'lib' targets, e.g. .so files specified as sources. In this case
+ # we have to add extra dll-path properties and propagate extra xdll-path
+ # properties so that application linking to us will get xdll-path to
+ # those libraries.
+ local extra-xdll-paths ;
+ for local s in $(sources)
+ {
+ if [ type.is-derived [ $(s).type ] SHARED_LIB ] && ! [ $(s).action ]
+ {
+ # Unfortunately, we do not have a good way to find the path to a
+ # file, so use this nasty approach.
+ #
+ # TODO: This needs to be done better. One thing that is really
+ # broken with this is that it does not work correctly with
+ # projects having multiple source locations.
+ local p = [ $(s).project ] ;
+ local location = [ path.root [ $(s).name ]
+ [ $(p).get source-location ] ] ;
+ extra-xdll-paths += [ path.parent $(location) ] ;
+ }
+ }
+
+ # Hardcode DLL paths only when linking executables.
+ # Pros: do not need to relink libraries when installing.
+ # Cons: "standalone" libraries (plugins, python extensions) can not
+ # hardcode paths to dependent libraries.
+ if [ $(property-set).get <hardcode-dll-paths> ] = true
+ && [ type.is-derived $(self.target-types[1]) EXE ]
+ {
+ local xdll-path = [ $(property-set).get <xdll-path> ] ;
+ extra += <dll-path>$(xdll-path) <dll-path>$(extra-xdll-paths) ;
+ }
+
+ if $(extra)
+ {
+ property-set = [ $(property-set).add-raw $(extra) ] ;
+ }
+
+ local result = [ generator.run $(project) $(name) : $(property-set)
+ : $(sources) ] ;
+
+ local ur ;
+ if $(result)
+ {
+ ur = [ extra-usage-requirements $(result) : $(property-set) ] ;
+ ur = [ $(ur).add
+ [ property-set.create <xdll-path>$(extra-xdll-paths) ] ] ;
+ }
+ return $(ur) $(result) ;
+ }
+
+ rule extra-usage-requirements ( created-targets * : property-set )
+ {
+ local result = [ property-set.empty ] ;
+ local extra ;
+
+ # Add appropricate <xdll-path> usage requirements.
+ local raw = [ $(property-set).raw ] ;
+ if <link>shared in $(raw)
+ {
+ local paths ;
+ local pwd = [ path.pwd ] ;
+ for local t in $(created-targets)
+ {
+ if [ type.is-derived [ $(t).type ] SHARED_LIB ]
+ {
+ paths += [ path.root [ path.make [ $(t).path ] ] $(pwd) ] ;
+ }
+ }
+ extra += $(paths:G=<xdll-path>) ;
+ }
+
+ # We need to pass <xdll-path> features that we've got from sources,
+ # because if a shared library is built, exe using it needs to know paths
+ # to other shared libraries this one depends on in order to be able to
+ # find them all at runtime.
+
+ # Just pass all features in property-set, it is theorically possible
+ # that we will propagate <xdll-path> features explicitly specified by
+ # the user, but then the user is to blaim for using an internal feature.
+ local values = [ $(property-set).get <xdll-path> ] ;
+ extra += $(values:G=<xdll-path>) ;
+
+ if $(extra)
+ {
+ result = [ property-set.create $(extra) ] ;
+ }
+ return $(result) ;
+ }
+
+ rule generated-targets ( sources + : property-set : project name ? )
+ {
+ local sources2 ; # Sources to pass to inherited rule.
+ local properties2 ; # Properties to pass to inherited rule.
+ local libraries ; # Library sources.
+
+ # Searched libraries are not passed as arguments to the linker but via
+ # some option. So, we pass them to the action using a property.
+ properties2 = [ $(property-set).raw ] ;
+ local fsa ;
+ local fst ;
+ for local s in $(sources)
+ {
+ if [ type.is-derived [ $(s).type ] SEARCHED_LIB ]
+ {
+ local name = [ $(s).name ] ;
+ if [ $(s).shared ]
+ {
+ fsa += $(name) ;
+ }
+ else
+ {
+ fst += $(name) ;
+ }
+ }
+ else
+ {
+ sources2 += $(s) ;
+ }
+ }
+ properties2 += <find-shared-library>$(fsa:J=&&)
+ <find-static-library>$(fst:J=&&) ;
+
+ return [ generator.generated-targets $(sources2)
+ : [ property-set.create $(properties2) ] : $(project) $(name) ] ;
+ }
+}
+
+
+rule register-linker ( id composing ? : source-types + : target-types +
+ : requirements * )
+{
+ generators.register [ new linking-generator $(id) $(composing)
+ : $(source-types) : $(target-types) : $(requirements) ] ;
+}
+
+
+# The generator class for handling STATIC_LIB creation.
+#
+class archive-generator : generator
+{
+ import property-set ;
+
+ rule __init__ ( id composing ? : source-types + : target-types +
+ : requirements * )
+ {
+ composing ?= true ;
+ generator.__init__ $(id) $(composing) : $(source-types)
+ : $(target-types) : $(requirements) ;
+ }
+
+ rule run ( project name ? : property-set : sources + )
+ {
+ sources += [ $(property-set).get <library> ] ;
+
+ local result = [ generator.run $(project) $(name) : $(property-set)
+ : $(sources) ] ;
+
+ # For static linking, if we get a library in source, we can not directly
+ # link to it so we need to cause our dependencies to link to that
+ # library. There are two approaches:
+ # - adding the library to the list of returned targets.
+ # - using the <library> usage requirements.
+ # The problem with the first is:
+ #
+ # lib a1 : : <file>liba1.a ;
+ # lib a2 : a2.cpp a1 : <link>static ;
+ # install dist : a2 ;
+ #
+ # here we will try to install 'a1', even though it is not necessary in
+ # the general case. With the second approach, even indirect dependants
+ # will link to the library, but it should not cause any harm. So, return
+ # all LIB sources together with created targets, so that dependants link
+ # to them.
+ local usage-requirements ;
+ if [ $(property-set).get <link> ] = static
+ {
+ for local t in $(sources)
+ {
+ if [ type.is-derived [ $(t).type ] LIB ]
+ {
+ usage-requirements += <library>$(t) ;
+ }
+ }
+ }
+
+ usage-requirements = [ property-set.create $(usage-requirements) ] ;
+
+ return $(usage-requirements) $(result) ;
+ }
+}
+
+
+rule register-archiver ( id composing ? : source-types + : target-types +
+ : requirements * )
+{
+ generators.register [ new archive-generator $(id) $(composing)
+ : $(source-types) : $(target-types) : $(requirements) ] ;
+}
+
+
+# Generator that accepts everything and produces nothing. Useful as a general
+# fallback for toolset-specific actions like PCH generation.
+#
+class dummy-generator : generator
+{
+ import property-set ;
+
+ rule run ( project name ? : property-set : sources + )
+ {
+ return [ property-set.empty ] ;
+ }
+}
+
+IMPORT $(__name__) : register-linker register-archiver
+ : : generators.register-linker generators.register-archiver ;
diff --git a/src/kenlm/jam-files/boost-build/tools/cast.jam b/src/kenlm/jam-files/boost-build/tools/cast.jam
new file mode 100644
index 0000000..41b0ac3
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/cast.jam
@@ -0,0 +1,91 @@
+# Copyright 2005 Vladimir Prus.
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Defines main target 'cast', used to change type for target. For example, in Qt
+# library one wants two kinds of CPP files -- those that just compiled and those
+# that are passed via the MOC tool.
+#
+# This is done with:
+#
+# exe main : main.cpp [ cast _ moccable-cpp : widget.cpp ] ;
+#
+# Boost.Build will assign target type CPP to both main.cpp and widget.cpp. Then,
+# the cast rule will change target type of widget.cpp to MOCCABLE-CPP, and Qt
+# support will run the MOC tool as part of the build process.
+#
+# At the moment, the 'cast' rule only works for non-derived (source) targets.
+#
+# TODO: The following comment is unclear or incorrect. Clean it up.
+# > Another solution would be to add a separate main target 'moc-them' that
+# > would moc all the passed sources, no matter what their type is, but I prefer
+# > cast, as defining a new target type + generator for that type is somewhat
+# > simpler than defining a main target rule.
+
+import "class" : new ;
+import project ;
+import property-set ;
+import targets ;
+import type ;
+
+
+class cast-target-class : typed-target
+{
+ import type ;
+
+ rule __init__ ( name : project : type : sources * : requirements * :
+ default-build * : usage-requirements * )
+ {
+ typed-target.__init__ $(name) : $(project) : $(type) : $(sources) :
+ $(requirements) : $(default-build) : $(usage-requirements) ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ local result ;
+ for local s in $(source-targets)
+ {
+ if ! [ class.is-a $(s) : file-target ]
+ {
+ import errors : user-error : errors.user-error ;
+ errors.user-error Source to the 'cast' rule is not a file! ;
+ }
+ if [ $(s).action ]
+ {
+ import errors : user-error : errors.user-error ;
+ errors.user-error Only non-derived target are allowed for
+ 'cast'. : when building [ full-name ] ;
+ }
+ local r = [ $(s).clone-with-different-type $(self.type) ] ;
+ result += [ virtual-target.register $(r) ] ;
+ }
+ return [ property-set.empty ] $(result) ;
+ }
+}
+
+
+rule cast ( name type : sources * : requirements * : default-build * :
+ usage-requirements * )
+{
+ local project = [ project.current ] ;
+
+ local real-type = [ type.type-from-rule-name $(type) ] ;
+ if ! $(real-type)
+ {
+ import errors ;
+ errors.user-error No type corresponds to the main target rule name
+ '$(type)' : "Hint: try a lowercase name" ;
+ }
+
+ targets.main-target-alternative [ new cast-target-class $(name) : $(project)
+ : $(real-type)
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) :
+ $(project) ] ] ;
+}
+
+
+IMPORT $(__name__) : cast : : cast ;
diff --git a/src/kenlm/jam-files/boost-build/tools/clang-darwin.jam b/src/kenlm/jam-files/boost-build/tools/clang-darwin.jam
new file mode 100644
index 0000000..a8abc7d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/clang-darwin.jam
@@ -0,0 +1,170 @@
+# Copyright Vladimir Prus 2004.
+# Copyright Noel Belcourt 2007.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+import clang ;
+import feature : feature ;
+import os ;
+import toolset ;
+import toolset : flags ;
+import gcc ;
+import common ;
+import errors ;
+import generators ;
+
+feature.extend-subfeature toolset clang : platform : darwin ;
+
+toolset.inherit-generators clang-darwin
+ <toolset>clang <toolset-clang:platform>darwin
+ : gcc
+ # Don't inherit PCH generators. They were not tested, and probably
+ # don't work for this compiler.
+ : gcc.mingw.link gcc.mingw.link.dll gcc.compile.c.pch gcc.compile.c++.pch
+ ;
+
+generators.override clang-darwin.prebuilt : builtin.lib-generator ;
+generators.override clang-darwin.prebuilt : builtin.prebuilt ;
+generators.override clang-darwin.searched-lib-generator : searched-lib-generator ;
+
+toolset.inherit-rules clang-darwin : gcc ;
+toolset.inherit-flags clang-darwin : gcc
+ : <inlining>off <inlining>on <inlining>full <optimization>space
+ <warnings>off <warnings>all <warnings>on
+ <architecture>x86/<address-model>32
+ <architecture>x86/<address-model>64
+ ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+# vectorization diagnostics
+feature vectorize : off on full ;
+
+# Initializes the clang-darwin toolset
+# version in optional
+# name (default clang++) is used to invoke the specified clang complier
+# compile and link options allow you to specify addition command line options for each version
+rule init ( version ? : command * : options * )
+{
+ command = [ common.get-invocation-command clang-darwin : clang++
+ : $(command) ] ;
+
+ # Determine the version
+ local command-string = $(command:J=" ") ;
+ if $(command)
+ {
+ version ?= [ MATCH "^([0-9.]+)"
+ : [ SHELL "$(command-string) -dumpversion" ] ] ;
+ }
+
+ local condition = [ common.check-init-parameters clang-darwin
+ : version $(version) ] ;
+
+ common.handle-options clang-darwin : $(condition) : $(command) : $(options) ;
+
+ gcc.init-link-flags clang-darwin darwin $(condition) ;
+
+}
+
+SPACE = " " ;
+
+flags clang-darwin.compile OPTIONS <cflags> ;
+flags clang-darwin.compile OPTIONS <cxxflags> ;
+# flags clang-darwin.compile INCLUDES <include> ;
+
+# Declare flags and action for compilation.
+toolset.flags clang-darwin.compile OPTIONS <optimization>off : -O0 ;
+toolset.flags clang-darwin.compile OPTIONS <optimization>speed : -O3 ;
+toolset.flags clang-darwin.compile OPTIONS <optimization>space : -Os ;
+
+toolset.flags clang-darwin.compile OPTIONS <inlining>off : -fno-inline ;
+toolset.flags clang-darwin.compile OPTIONS <inlining>on : -Wno-inline ;
+toolset.flags clang-darwin.compile OPTIONS <inlining>full : -finline-functions -Wno-inline ;
+
+toolset.flags clang-darwin.compile OPTIONS <warnings>off : -w ;
+toolset.flags clang-darwin.compile OPTIONS <warnings>on : -Wall ;
+toolset.flags clang-darwin.compile OPTIONS <warnings>all : -Wall -pedantic ;
+toolset.flags clang-darwin.compile OPTIONS <warnings-as-errors>on : -Werror ;
+
+toolset.flags clang-darwin.compile OPTIONS <debug-symbols>on : -g ;
+toolset.flags clang-darwin.compile OPTIONS <profiling>on : -pg ;
+toolset.flags clang-darwin.compile OPTIONS <rtti>off : -fno-rtti ;
+
+actions compile.c
+{
+ "$(CONFIG_COMMAND)" -x c $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" -x c++ $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+flags clang-darwin ARFLAGS <archiveflags> ;
+
+# Default value. Mostly for the sake of clang-linux
+# that inherits from gcc, but does not has the same
+# logic to set the .AR variable. We can put the same
+# logic in clang-linux, but that's hardly worth the trouble
+# as on Linux, 'ar' is always available.
+.AR = ar ;
+
+rule archive ( targets * : sources * : properties * )
+{
+ # Always remove archive and start again. Here's rationale from
+ # Andre Hentz:
+ #
+ # I had a file, say a1.c, that was included into liba.a.
+ # I moved a1.c to a2.c, updated my Jamfiles and rebuilt.
+ # My program was crashing with absurd errors.
+ # After some debugging I traced it back to the fact that a1.o was *still*
+ # in liba.a
+ #
+ # Rene Rivera:
+ #
+ # Originally removing the archive was done by splicing an RM
+ # onto the archive action. That makes archives fail to build on NT
+ # when they have many files because it will no longer execute the
+ # action directly and blow the line length limit. Instead we
+ # remove the file in a different action, just before the building
+ # of the archive.
+ #
+ local clean.a = $(targets[1])(clean) ;
+ TEMPORARY $(clean.a) ;
+ NOCARE $(clean.a) ;
+ LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ;
+ DEPENDS $(clean.a) : $(sources) ;
+ DEPENDS $(targets) : $(clean.a) ;
+ common.RmTemps $(clean.a) : $(targets) ;
+}
+
+actions piecemeal archive
+{
+ "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)"
+ "ranlib" -cs "$(<)"
+}
+
+flags clang-darwin.link USER_OPTIONS <linkflags> ;
+
+# Declare actions for linking
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+ # Serialize execution of the 'link' action, since
+ # running N links in parallel is just slower.
+ JAM_SEMAPHORE on $(targets) = <s>clang-darwin-link-semaphore ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS)
+}
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" -single_module -dynamiclib -install_name "$(<[1]:D=)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/clang-linux.jam b/src/kenlm/jam-files/boost-build/tools/clang-linux.jam
new file mode 100644
index 0000000..2999c2d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/clang-linux.jam
@@ -0,0 +1,196 @@
+# Copyright (c) 2003 Michael Stevens
+# Copyright (c) 2010-2011 Bryce Lelbach (blelbach@cct.lsu.edu, maintainer)
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import toolset ;
+import feature ;
+import toolset : flags ;
+
+import clang ;
+import gcc ;
+import common ;
+import errors ;
+import generators ;
+import type ;
+import numbers ;
+
+feature.extend-subfeature toolset clang : platform : linux ;
+
+toolset.inherit-generators clang-linux
+ <toolset>clang <toolset-clang:platform>linux : gcc
+ : gcc.mingw.link gcc.mingw.link.dll gcc.cygwin.link gcc.cygwin.link.dll ;
+generators.override clang-linux.prebuilt : builtin.lib-generator ;
+generators.override clang-linux.prebuilt : builtin.prebuilt ;
+generators.override clang-linux.searched-lib-generator : searched-lib-generator ;
+
+# Override default do-nothing generators.
+generators.override clang-linux.compile.c.pch : pch.default-c-pch-generator ;
+generators.override clang-linux.compile.c++.pch : pch.default-cpp-pch-generator ;
+
+type.set-generated-target-suffix PCH
+ : <toolset>clang <toolset-clang:platform>linux : pth ;
+
+toolset.inherit-rules clang-linux : gcc ;
+toolset.inherit-flags clang-linux : gcc
+ : <inlining>off <inlining>on <inlining>full
+ <optimization>space <optimization>speed
+ <warnings>off <warnings>all <warnings>on ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] {
+ .debug-configuration = true ;
+}
+
+rule init ( version ? : command * : options * ) {
+ command = [ common.get-invocation-command clang-linux : clang++
+ : $(command) ] ;
+
+ # Determine the version
+ local command-string = $(command:J=" ") ;
+
+ if $(command) {
+ version ?= [ MATCH "version ([0-9.]+)"
+ : [ SHELL "$(command-string) --version" ] ] ;
+ }
+
+ local condition = [ common.check-init-parameters clang-linux
+ : version $(version) ] ;
+
+ common.handle-options clang-linux : $(condition) : $(command) : $(options) ;
+
+ gcc.init-link-flags clang-linux gnu $(condition) ;
+}
+
+###############################################################################
+# Flags
+
+toolset.flags clang-linux.compile OPTIONS <cflags> ;
+toolset.flags clang-linux.compile.c++ OPTIONS <cxxflags> ;
+
+toolset.flags clang-linux.compile OPTIONS <optimization>off : ;
+toolset.flags clang-linux.compile OPTIONS <optimization>speed : -O3 ;
+toolset.flags clang-linux.compile OPTIONS <optimization>space : -Os ;
+
+# note: clang silently ignores some of these inlining options
+toolset.flags clang-linux.compile OPTIONS <inlining>off : -fno-inline ;
+toolset.flags clang-linux.compile OPTIONS <inlining>on : -Wno-inline ;
+toolset.flags clang-linux.compile OPTIONS <inlining>full : -finline-functions -Wno-inline ;
+
+toolset.flags clang-linux.compile OPTIONS <warnings>off : -w ;
+toolset.flags clang-linux.compile OPTIONS <warnings>on : -Wall ;
+toolset.flags clang-linux.compile OPTIONS <warnings>all : -Wall -pedantic ;
+toolset.flags clang-linux.compile OPTIONS <warnings-as-errors>on : -Werror ;
+
+toolset.flags clang-linux.compile OPTIONS <debug-symbols>on : -g ;
+toolset.flags clang-linux.compile OPTIONS <profiling>on : -pg ;
+toolset.flags clang-linux.compile OPTIONS <rtti>off : -fno-rtti ;
+
+###############################################################################
+# C and C++ compilation
+
+rule compile.c++ ( targets * : sources * : properties * ) {
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+
+ local pth-file = [ on $(<) return $(PCH_FILE) ] ;
+
+ if $(pth-file) {
+ DEPENDS $(<) : $(pth-file) ;
+ clang-linux.compile.c++.with-pch $(targets) : $(sources) ;
+ }
+ else {
+ clang-linux.compile.c++.without-pth $(targets) : $(sources) ;
+ }
+}
+
+actions compile.c++.without-pth {
+ "$(CONFIG_COMMAND)" -c -x c++ $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)"
+}
+
+actions compile.c++.with-pch bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" -c -x c++ $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -Xclang -include-pth -Xclang "$(PCH_FILE)" -o "$(<)" "$(>)"
+}
+
+rule compile.c ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+
+ local pth-file = [ on $(<) return $(PCH_FILE) ] ;
+
+ if $(pth-file) {
+ DEPENDS $(<) : $(pth-file) ;
+ clang-linux.compile.c.with-pch $(targets) : $(sources) ;
+ }
+ else {
+ clang-linux.compile.c.without-pth $(targets) : $(sources) ;
+ }
+}
+
+actions compile.c.without-pth
+{
+ "$(CONFIG_COMMAND)" -c -x c $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c.with-pch bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" -c -x c $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -Xclang -include-pth -Xclang "$(PCH_FILE)" -c -o "$(<)" "$(>)"
+}
+
+###############################################################################
+# PCH emission
+
+rule compile.c++.pch ( targets * : sources * : properties * ) {
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.c++.pch {
+ rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -Xclang -emit-pth -o "$(<)" "$(>)"
+}
+
+rule compile.c.pch ( targets * : sources * : properties * ) {
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.c.pch
+{
+ rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -Xclang -emit-pth -o "$(<)" "$(>)"
+}
+
+###############################################################################
+# Linking
+
+SPACE = " " ;
+
+rule link ( targets * : sources * : properties * ) {
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+ SPACE on $(targets) = " " ;
+ JAM_SEMAPHORE on $(targets) = <s>clang-linux-link-semaphore ;
+}
+
+actions link bind LIBRARIES {
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS)
+}
+
+rule link.dll ( targets * : sources * : properties * ) {
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+ SPACE on $(targets) = " " ;
+ JAM_SEMAPHORE on $(targets) = <s>clang-linux-link-semaphore ;
+}
+
+# Differ from 'link' above only by -shared.
+actions link.dll bind LIBRARIES {
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS)
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/clang.jam b/src/kenlm/jam-files/boost-build/tools/clang.jam
new file mode 100644
index 0000000..e0ac9a5
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/clang.jam
@@ -0,0 +1,27 @@
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# This is a generic 'clang' toolset. Depending on the current system, it
+# forwards either to 'clang-unix' or 'clang-darwin' modules.
+
+import feature ;
+import os ;
+import toolset ;
+
+feature.extend toolset : clang ;
+feature.subfeature toolset clang : platform : : propagated link-incompatible ;
+
+rule init ( * : * )
+{
+ if [ os.name ] = MACOSX
+ {
+ toolset.using clang-darwin :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+ else
+ {
+ toolset.using clang-linux :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/common.jam b/src/kenlm/jam-files/boost-build/tools/common.jam
new file mode 100644
index 0000000..08df2d9
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/common.jam
@@ -0,0 +1,988 @@
+# Copyright 2003, 2005 Dave Abrahams
+# Copyright 2005, 2006 Rene Rivera
+# Copyright 2005 Toon Knapen
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Provides actions common to all toolsets, such as creating directories and
+# removing files.
+
+import os ;
+import modules ;
+import utility ;
+import print ;
+import type ;
+import feature ;
+import errors ;
+import path ;
+import sequence ;
+import toolset ;
+import virtual-target ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+if [ MATCH (--show-configuration) : [ modules.peek : ARGV ] ]
+{
+ .show-configuration = true ;
+}
+
+# Configurations
+#
+# The following class helps to manage toolset configurations. Each configuration
+# has a unique ID and one or more parameters. A typical example of a unique ID
+# is a condition generated by 'common.check-init-parameters' rule. Other kinds
+# of IDs can be used. Parameters may include any details about the configuration
+# like 'command', 'path', etc.
+#
+# A toolset configuration may be in one of the following states:
+#
+# - registered
+# Configuration has been registered (e.g. explicitly or by auto-detection
+# code) but has not yet been marked as used, i.e. 'toolset.using' rule has
+# not yet been called for it.
+# - used
+# Once called 'toolset.using' rule marks the configuration as 'used'.
+#
+# The main difference between the states above is that while a configuration is
+# 'registered' its options can be freely changed. This is useful in particular
+# for autodetection code - all detected configurations may be safely overwritten
+# by user code.
+
+class configurations
+{
+ import errors ;
+
+ rule __init__ ( )
+ {
+ }
+
+ # Registers a configuration.
+ #
+ # Returns 'true' if the configuration has been added and an empty value if
+ # it already exists. Reports an error if the configuration is 'used'.
+ #
+ rule register ( id )
+ {
+ if $(id) in $(self.used)
+ {
+ errors.error "common: the configuration '$(id)' is in use" ;
+ }
+
+ local retval ;
+
+ if ! $(id) in $(self.all)
+ {
+ self.all += $(id) ;
+
+ # Indicate that a new configuration has been added.
+ retval = true ;
+ }
+
+ return $(retval) ;
+ }
+
+ # Mark a configuration as 'used'.
+ #
+ # Returns 'true' if the state of the configuration has been changed to
+ # 'used' and an empty value if it the state has not been changed. Reports an
+ # error if the configuration is not known.
+ #
+ rule use ( id )
+ {
+ if ! $(id) in $(self.all)
+ {
+ errors.error "common: the configuration '$(id)' is not known" ;
+ }
+
+ local retval ;
+
+ if ! $(id) in $(self.used)
+ {
+ self.used += $(id) ;
+
+ # Indicate that the configuration has been marked as 'used'.
+ retval = true ;
+ }
+
+ return $(retval) ;
+ }
+
+ # Return all registered configurations.
+ #
+ rule all ( )
+ {
+ return $(self.all) ;
+ }
+
+ # Return all used configurations.
+ #
+ rule used ( )
+ {
+ return $(self.used) ;
+ }
+
+ # Returns the value of a configuration parameter.
+ #
+ rule get ( id : param )
+ {
+ return $(self.$(param).$(id)) ;
+ }
+
+ # Sets the value of a configuration parameter.
+ #
+ rule set ( id : param : value * )
+ {
+ self.$(param).$(id) = $(value) ;
+ }
+}
+
+
+# The rule for checking toolset parameters. Trailing parameters should all be
+# parameter name/value pairs. The rule will check that each parameter either has
+# a value in each invocation or has no value in each invocation. Also, the rule
+# will check that the combination of all parameter values is unique in all
+# invocations.
+#
+# Each parameter name corresponds to a subfeature. This rule will declare a
+# subfeature the first time a non-empty parameter value is passed and will
+# extend it with all the values.
+#
+# The return value from this rule is a condition to be used for flags settings.
+#
+rule check-init-parameters ( toolset requirement * : * )
+{
+ local sig = $(toolset) ;
+ local condition = <toolset>$(toolset) ;
+ local subcondition ;
+ for local index in 2 3 4 5 6 7 8 9
+ {
+ local name = $($(index)[1]) ;
+ local value = $($(index)[2]) ;
+
+ if $(value)-is-not-empty
+ {
+ condition = $(condition)-$(value) ;
+ if $(.had-unspecified-value.$(toolset).$(name))
+ {
+ errors.user-error
+ "$(toolset) initialization: parameter '$(name)'"
+ "inconsistent" : "no value was specified in earlier"
+ "initialization" : "an explicit value is specified now" ;
+ }
+ # The below logic is for intel compiler. It calls this rule with
+ # 'intel-linux' and 'intel-win' as toolset, so we need to get the
+ # base part of toolset name. We can not pass 'intel' as toolset
+ # because in that case it will be impossible to register versionless
+ # intel-linux and intel-win toolsets of a specific version.
+ local t = $(toolset) ;
+ local m = [ MATCH ([^-]*)- : $(toolset) ] ;
+ if $(m)
+ {
+ t = $(m[1]) ;
+ }
+ if ! $(.had-value.$(toolset).$(name))
+ {
+ if ! $(.declared-subfeature.$(t).$(name))
+ {
+ feature.subfeature toolset $(t) : $(name) : : propagated ;
+ .declared-subfeature.$(t).$(name) = true ;
+ }
+ .had-value.$(toolset).$(name) = true ;
+ }
+ feature.extend-subfeature toolset $(t) : $(name) : $(value) ;
+ subcondition += <toolset-$(t):$(name)>$(value) ;
+ }
+ else
+ {
+ if $(.had-value.$(toolset).$(name))
+ {
+ errors.user-error
+ "$(toolset) initialization: parameter '$(name)'"
+ "inconsistent" : "an explicit value was specified in an"
+ "earlier initialization" : "no value is specified now" ;
+ }
+ .had-unspecified-value.$(toolset).$(name) = true ;
+ }
+ sig = $(sig)$(value:E="")- ;
+ }
+ if $(sig) in $(.all-signatures)
+ {
+ local message =
+ "duplicate initialization of $(toolset) with the following parameters: " ;
+ for local index in 2 3 4 5 6 7 8 9
+ {
+ local p = $($(index)) ;
+ if $(p)
+ {
+ message += "$(p[1]) = $(p[2]:E=<unspecified>)" ;
+ }
+ }
+ message += "previous initialization at $(.init-loc.$(sig))" ;
+ errors.user-error
+ $(message[1]) : $(message[2]) : $(message[3]) : $(message[4]) :
+ $(message[5]) : $(message[6]) : $(message[7]) : $(message[8]) ;
+ }
+ .all-signatures += $(sig) ;
+ .init-loc.$(sig) = [ errors.nearest-user-location ] ;
+
+ # If we have a requirement, this version should only be applied under that
+ # condition. To accomplish this we add a toolset requirement that imposes
+ # the toolset subcondition, which encodes the version.
+ if $(requirement)
+ {
+ local r = <toolset>$(toolset) $(requirement) ;
+ r = $(r:J=,) ;
+ toolset.add-requirements $(r):$(subcondition) ;
+ }
+
+ # We add the requirements, if any, to the condition to scope the toolset
+ # variables and options to this specific version.
+ condition += $(requirement) ;
+
+ if $(.show-configuration)
+ {
+ ECHO notice: $(condition) ;
+ }
+ return $(condition:J=/) ;
+}
+
+
+# A helper rule to get the command to invoke some tool. If
+# 'user-provided-command' is not given, tries to find binary named 'tool' in
+# PATH and in the passed 'additional-path'. Otherwise, verifies that the first
+# element of 'user-provided-command' is an existing program.
+#
+# This rule returns the command to be used when invoking the tool. If we can not
+# find the tool, a warning is issued. If 'path-last' is specified, PATH is
+# checked after 'additional-paths' when searching for 'tool'.
+#
+rule get-invocation-command-nodefault ( toolset : tool :
+ user-provided-command * : additional-paths * : path-last ? )
+{
+ local command ;
+ if ! $(user-provided-command)
+ {
+ command = [ find-tool $(tool) : $(additional-paths) : $(path-last) ] ;
+ if ! $(command) && $(.debug-configuration)
+ {
+ ECHO warning: toolset $(toolset) initialization: can not find tool
+ $(tool) ;
+ ECHO warning: initialized from [ errors.nearest-user-location ] ;
+ }
+ }
+ else
+ {
+ command = [ check-tool $(user-provided-command) ] ;
+ if ! $(command) && $(.debug-configuration)
+ {
+ ECHO warning: toolset $(toolset) initialization: ;
+ ECHO warning: can not find user-provided command
+ '$(user-provided-command)' ;
+ ECHO warning: initialized from [ errors.nearest-user-location ] ;
+ }
+ }
+
+ return $(command) ;
+}
+
+
+# Same as get-invocation-command-nodefault, except that if no tool is found,
+# returns either the user-provided-command, if present, or the 'tool' parameter.
+#
+rule get-invocation-command ( toolset : tool : user-provided-command * :
+ additional-paths * : path-last ? )
+{
+ local result = [ get-invocation-command-nodefault $(toolset) : $(tool) :
+ $(user-provided-command) : $(additional-paths) : $(path-last) ] ;
+
+ if ! $(result)
+ {
+ if $(user-provided-command)
+ {
+ result = $(user-provided-command) ;
+ }
+ else
+ {
+ result = $(tool) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Given an invocation command return the absolute path to the command. This
+# works even if command has no path element and was found on the PATH.
+#
+rule get-absolute-tool-path ( command )
+{
+ if $(command:D)
+ {
+ return $(command:D) ;
+ }
+ else
+ {
+ local m = [ GLOB [ modules.peek : PATH Path path ] : $(command)
+ $(command).exe ] ;
+ return $(m[1]:D) ;
+ }
+}
+
+
+# Attempts to find tool (binary) named 'name' in PATH and in 'additional-paths'.
+# If found in PATH, returns 'name' and if found in additional paths, returns
+# absolute name. If the tool is found in several directories, returns the first
+# path found. Otherwise, returns an empty string. If 'path-last' is specified,
+# PATH is searched after 'additional-paths'.
+#
+rule find-tool ( name : additional-paths * : path-last ? )
+{
+ local path = [ path.programs-path ] ;
+ local match = [ path.glob $(path) : $(name) $(name).exe ] ;
+ local additional-match = [ path.glob $(additional-paths) : $(name)
+ $(name).exe ] ;
+
+ local result ;
+ if $(path-last)
+ {
+ result = $(additional-match) ;
+ if ! $(result) && $(match)
+ {
+ result = $(name) ;
+ }
+ }
+ else
+ {
+ if $(match)
+ {
+ result = $(name) ;
+ }
+ else
+ {
+ result = $(additional-match) ;
+ }
+ }
+ if $(result)
+ {
+ return [ path.native $(result[1]) ] ;
+ }
+}
+
+# Checks if 'command' can be found either in path or is a full name to an
+# existing file.
+#
+local rule check-tool-aux ( command )
+{
+ if $(command:D)
+ {
+ if [ path.exists $(command) ]
+ # Both NT and Cygwin will run .exe files by their unqualified names.
+ || ( [ os.on-windows ] && [ path.exists $(command).exe ] )
+ # Only NT will run .bat & .cmd files by their unqualified names.
+ || ( ( [ os.name ] = NT ) && ( [ path.exists $(command).bat ] ||
+ [ path.exists $(command).cmd ] ) )
+ {
+ return $(command) ;
+ }
+ }
+ else
+ {
+ if [ GLOB [ modules.peek : PATH Path path ] : $(command) ]
+ {
+ return $(command) ;
+ }
+ }
+}
+
+
+# Checks that a tool can be invoked by 'command'. If command is not an absolute
+# path, checks if it can be found in 'path'. If comand is an absolute path,
+# check that it exists. Returns 'command' if ok or empty string otherwise.
+#
+local rule check-tool ( xcommand + )
+{
+ if [ check-tool-aux $(xcommand[1]) ] ||
+ [ check-tool-aux $(xcommand[-1]) ]
+ {
+ return $(xcommand) ;
+ }
+}
+
+
+# Handle common options for toolset, specifically sets the following flag
+# variables:
+# - CONFIG_COMMAND to $(command)
+# - OPTIONS for compile to the value of <compileflags> in $(options)
+# - OPTIONS for compile.c to the value of <cflags> in $(options)
+# - OPTIONS for compile.c++ to the value of <cxxflags> in $(options)
+# - OPTIONS for compile.fortran to the value of <fflags> in $(options)
+# - OPTIONS for link to the value of <linkflags> in $(options)
+#
+rule handle-options ( toolset : condition * : command * : options * )
+{
+ if $(.debug-configuration)
+ {
+ ECHO notice: will use '$(command)' for $(toolset), condition
+ $(condition:E=(empty)) ;
+ }
+
+ # The last parameter ('unchecked') says it is OK to set flags for another
+ # module.
+ toolset.flags $(toolset) CONFIG_COMMAND $(condition) : $(command)
+ : unchecked ;
+
+ toolset.flags $(toolset).compile OPTIONS $(condition) :
+ [ feature.get-values <compileflags> : $(options) ] : unchecked ;
+
+ toolset.flags $(toolset).compile.c OPTIONS $(condition) :
+ [ feature.get-values <cflags> : $(options) ] : unchecked ;
+
+ toolset.flags $(toolset).compile.c++ OPTIONS $(condition) :
+ [ feature.get-values <cxxflags> : $(options) ] : unchecked ;
+
+ toolset.flags $(toolset).compile.fortran OPTIONS $(condition) :
+ [ feature.get-values <fflags> : $(options) ] : unchecked ;
+
+ toolset.flags $(toolset).link OPTIONS $(condition) :
+ [ feature.get-values <linkflags> : $(options) ] : unchecked ;
+}
+
+
+# Returns the location of the "program files" directory on a Windows platform.
+#
+rule get-program-files-dir ( )
+{
+ local ProgramFiles = [ modules.peek : ProgramFiles ] ;
+ if $(ProgramFiles)
+ {
+ ProgramFiles = "$(ProgramFiles:J= )" ;
+ }
+ else
+ {
+ ProgramFiles = "c:\\Program Files" ;
+ }
+ return $(ProgramFiles) ;
+}
+
+
+if [ os.name ] = NT
+{
+ RM = del /f /q ;
+ CP = copy /b ;
+ IGNORE = "2>nul >nul & setlocal" ;
+ LN ?= $(CP) ;
+ # Ugly hack to convince copy to set the timestamp of the destination to the
+ # current time by concatenating the source with a nonexistent file. Note
+ # that this requires /b (binary) as the default when concatenating files is
+ # /a (ascii).
+ WINDOWS-CP-HACK = "+ this-file-does-not-exist-A698EE7806899E69" ;
+}
+else
+{
+ RM = rm -f ;
+ CP = cp ;
+ LN = ln ;
+}
+
+
+rule rm-command ( )
+{
+ return $(RM) ;
+}
+
+
+rule copy-command ( )
+{
+ return $(CP) ;
+}
+
+
+if "\n" = "n"
+{
+ # Escape characters not supported so use ugly hacks. Will not work on Cygwin
+ # - see below.
+ nl = "
+" ;
+ q = "" ;
+}
+else
+{
+ nl = "\n" ;
+ q = "\"" ;
+}
+
+# Returns the command needed to set an environment variable on the current
+# platform. The variable setting persists through all following commands and is
+# visible in the environment seen by subsequently executed commands. In other
+# words, on Unix systems, the variable is exported, which is consistent with the
+# only possible behavior on Windows systems.
+#
+rule variable-setting-command ( variable : value )
+{
+ if [ os.name ] = NT
+ {
+ return "set $(variable)=$(value)$(nl)" ;
+ }
+ else
+ {
+ # If we do not have escape character support in bjam, the cod below
+ # blows up on CYGWIN, since the $(nl) variable holds a Windows new-line
+ # \r\n sequence that messes up the executed export command which then
+ # reports that the passed variable name is incorrect.
+ # But we have a check for cygwin in kernel/bootstrap.jam already.
+ return "$(variable)=$(q)$(value)$(q)$(nl)export $(variable)$(nl)" ;
+ }
+}
+
+
+# Returns a command to sets a named shell path variable to the given NATIVE
+# paths on the current platform.
+#
+rule path-variable-setting-command ( variable : paths * )
+{
+ local sep = [ os.path-separator ] ;
+ return [ variable-setting-command $(variable) : $(paths:J=$(sep)) ] ;
+}
+
+
+# Returns a command that prepends the given paths to the named path variable on
+# the current platform.
+#
+rule prepend-path-variable-command ( variable : paths * )
+{
+ return [ path-variable-setting-command $(variable)
+ : $(paths) [ os.expand-variable $(variable) ] ] ;
+}
+
+
+# Return a command which can create a file. If 'r' is result of invocation, then
+# 'r foobar' will create foobar with unspecified content. What happens if file
+# already exists is unspecified.
+#
+rule file-creation-command ( )
+{
+ if [ os.name ] = NT
+ {
+ # A few alternative implementations on Windows:
+ #
+ # 'type NUL >> '
+ # That would construct an empty file instead of a file containing
+ # a space and an end-of-line marker but it would also not change
+ # the target's timestamp in case the file already exists.
+ #
+ # 'type NUL > '
+ # That would construct an empty file instead of a file containing
+ # a space and an end-of-line marker but it would also destroy an
+ # already existing file by overwriting it with an empty one.
+ #
+ # I guess the best solution would be to allow Boost Jam to define
+ # built-in functions such as 'create a file', 'touch a file' or 'copy a
+ # file' which could be used from inside action code. That would allow
+ # completely portable operations without this kind of kludge.
+ # (22.02.2009.) (Jurko)
+ return "echo. > " ;
+ }
+ else
+ {
+ return "touch " ;
+ }
+}
+
+
+# Returns a command that may be used for 'touching' files. It is not a real
+# 'touch' command on NT because it adds an empty line at the end of file but it
+# works with source files.
+#
+rule file-touch-command ( )
+{
+ if [ os.name ] = NT
+ {
+ return "echo. >> " ;
+ }
+ else
+ {
+ return "touch " ;
+ }
+}
+
+
+rule MkDir
+{
+ # If dir exists, do not update it. Do this even for $(DOT).
+ NOUPDATE $(<) ;
+
+ if $(<) != $(DOT) && ! $($(<)-mkdir)
+ {
+ # Cheesy gate to prevent multiple invocations on same dir.
+ $(<)-mkdir = true ;
+
+ # Schedule the mkdir build action.
+ common.mkdir $(<) ;
+
+ # Prepare a Jam 'dirs' target that can be used to make the build only
+ # construct all the target directories.
+ DEPENDS dirs : $(<) ;
+
+ # Recursively create parent directories. $(<:P) = $(<)'s parent & we
+ # recurse until root.
+
+ local s = $(<:P) ;
+ if [ os.name ] = NT
+ {
+ switch $(s)
+ {
+ case *: : s = ;
+ case *:\\ : s = ;
+ }
+ }
+
+ if $(s)
+ {
+ if $(s) != $(<)
+ {
+ DEPENDS $(<) : $(s) ;
+ MkDir $(s) ;
+ }
+ else
+ {
+ NOTFILE $(s) ;
+ }
+ }
+ }
+}
+
+
+#actions MkDir1
+#{
+# mkdir "$(<)"
+#}
+
+# The following quick-fix actions should be replaced using the original MkDir1
+# action once Boost Jam gets updated to correctly detect different paths leading
+# up to the same filesystem target and triggers their build action only once.
+# (todo) (04.07.2008.) (Jurko)
+
+if [ os.name ] = NT
+{
+ actions mkdir
+ {
+ if not exist "$(<)\\" mkdir "$(<)"
+ }
+}
+else
+{
+ actions mkdir
+ {
+ mkdir -p "$(<)"
+ }
+}
+
+actions piecemeal together existing Clean
+{
+ $(RM) "$(>)"
+}
+
+
+rule copy
+{
+}
+
+
+actions copy
+{
+ $(CP) "$(>)" $(WINDOWS-CP-HACK) "$(<)"
+}
+
+
+rule RmTemps
+{
+}
+
+
+actions quietly updated piecemeal together RmTemps
+{
+ $(RM) "$(>)" $(IGNORE)
+}
+
+
+actions hard-link
+{
+ $(RM) "$(<)" 2$(NULL_OUT) $(NULL_OUT)
+ $(LN) "$(>)" "$(<)" $(NULL_OUT)
+}
+
+
+# Given a target, as given to a custom tag rule, returns a string formatted
+# according to the passed format. Format is a list of properties that is
+# represented in the result. For each element of format the corresponding target
+# information is obtained and added to the result string. For all, but the
+# literal, the format value is taken as the as string to prepend to the output
+# to join the item to the rest of the result. If not given "-" is used as a
+# joiner.
+#
+# The format options can be:
+#
+# <base>[joiner]
+# :: The basename of the target name.
+# <toolset>[joiner]
+# :: The abbreviated toolset tag being used to build the target.
+# <threading>[joiner]
+# :: Indication of a multi-threaded build.
+# <runtime>[joiner]
+# :: Collective tag of the build runtime.
+# <version:/version-feature | X.Y[.Z]/>[joiner]
+# :: Short version tag taken from the given "version-feature" in the
+# build properties. Or if not present, the literal value as the
+# version number.
+# <property:/property-name/>[joiner]
+# :: Direct lookup of the given property-name value in the build
+# properties. /property-name/ is a regular expression. E.g.
+# <property:toolset-.*:flavor> will match every toolset.
+# /otherwise/
+# :: The literal value of the format argument.
+#
+# For example this format:
+#
+# boost_ <base> <toolset> <threading> <runtime> <version:boost-version>
+#
+# Might return:
+#
+# boost_thread-vc80-mt-gd-1_33.dll, or
+# boost_regex-vc80-gd-1_33.dll
+#
+# The returned name also has the target type specific prefix and suffix which
+# puts it in a ready form to use as the value from a custom tag rule.
+#
+rule format-name ( format * : name : type ? : property-set )
+{
+ local result = "" ;
+ for local f in $(format)
+ {
+ switch $(f:G)
+ {
+ case <base> :
+ local matched = [ MATCH "^(boost.*python)-.*" : $(name) ] ;
+ if $(matched) = boost_python || $(matched) = boost_mpi_python
+ {
+ result += $(name) ;
+ }
+ else
+ {
+ result += $(name:B) ;
+ }
+
+ case <toolset> :
+ result += [ join-tag $(f:G=) : [ toolset-tag $(name) : $(type) :
+ $(property-set) ] ] ;
+
+ case <threading> :
+ result += [ join-tag $(f:G=) : [ threading-tag $(name) : $(type)
+ : $(property-set) ] ] ;
+
+ case <runtime> :
+ result += [ join-tag $(f:G=) : [ runtime-tag $(name) : $(type) :
+ $(property-set) ] ] ;
+
+ case <qt> :
+ result += [ join-tag $(f:G=) : [ qt-tag $(name) : $(type) :
+ $(property-set) ] ] ;
+
+ case <address-model> :
+ result += [ join-tag $(f:G=) : [ address-model-tag $(name) :
+ $(type) : $(property-set) ] ] ;
+
+ case <version:*> :
+ local key = [ MATCH <version:(.*)> : $(f:G) ] ;
+ local version = [ $(property-set).get <$(key)> ] ;
+ version ?= $(key) ;
+ version = [ MATCH "^([^.]+)[.]([^.]+)[.]?([^.]*)" : $(version) ] ;
+ result += [ join-tag $(f:G=) : $(version[1])_$(version[2]) ] ;
+
+ case <property:*> :
+ local key = [ MATCH <property:(.*)> : $(f:G) ] ;
+ local p0 = [ MATCH <($(key))> : [ $(property-set).raw ] ] ;
+ if $(p0)
+ {
+ local p = [ $(property-set).get <$(p0)> ] ;
+ if $(p)
+ {
+ result += [ join-tag $(f:G=) : $(p) ] ;
+ }
+ }
+
+ case * :
+ result += $(f:G=) ;
+ }
+ }
+ return [ virtual-target.add-prefix-and-suffix $(result:J=) : $(type) :
+ $(property-set) ] ;
+}
+
+
+local rule join-tag ( joiner ? : tag ? )
+{
+ if ! $(joiner) { joiner = - ; }
+ return $(joiner)$(tag) ;
+}
+
+
+local rule toolset-tag ( name : type ? : property-set )
+{
+ local tag = ;
+
+ local properties = [ $(property-set).raw ] ;
+ switch [ $(property-set).get <toolset> ]
+ {
+ case borland* : tag += bcb ;
+ case clang* :
+ {
+ switch [ $(property-set).get <toolset-clang:platform> ]
+ {
+ case darwin : tag += clang-darwin ;
+ case linux : tag += clang ;
+ }
+ }
+ case como* : tag += como ;
+ case cw : tag += cw ;
+ case darwin* : tag += xgcc ;
+ case edg* : tag += edg ;
+ case gcc* :
+ {
+ switch [ $(property-set).get <toolset-gcc:flavor> ]
+ {
+ case *mingw* : tag += mgw ;
+ case * : tag += gcc ;
+ }
+ }
+ case intel :
+ if [ $(property-set).get <toolset-intel:platform> ] = win
+ {
+ tag += iw ;
+ }
+ else
+ {
+ tag += il ;
+ }
+ case kcc* : tag += kcc ;
+ case kylix* : tag += bck ;
+ #case metrowerks* : tag += cw ;
+ #case mingw* : tag += mgw ;
+ case mipspro* : tag += mp ;
+ case msvc* : tag += vc ;
+ case qcc* : tag += qcc ;
+ case sun* : tag += sw ;
+ case tru64cxx* : tag += tru ;
+ case vacpp* : tag += xlc ;
+ }
+ local version = [ MATCH <toolset.*version>([0123456789]+)[.]([0123456789]*)
+ : $(properties) ] ;
+ # For historical reasons, vc6.0 and vc7.0 use different naming.
+ if $(tag) = vc
+ {
+ if $(version[1]) = 6
+ {
+ # Cancel minor version.
+ version = 6 ;
+ }
+ else if $(version[1]) = 7 && $(version[2]) = 0
+ {
+ version = 7 ;
+ }
+ }
+ # On intel, version is not added, because it does not matter and it is the
+ # version of vc used as backend that matters. Ideally, we should encode the
+ # backend version but that would break compatibility with V1.
+ if $(tag) = iw
+ {
+ version = ;
+ }
+
+ # On borland, version is not added for compatibility with V1.
+ if $(tag) = bcb
+ {
+ version = ;
+ }
+
+ tag += $(version) ;
+
+ return $(tag:J=) ;
+}
+
+
+local rule threading-tag ( name : type ? : property-set )
+{
+ if <threading>multi in [ $(property-set).raw ]
+ {
+ return mt ;
+ }
+}
+
+
+local rule runtime-tag ( name : type ? : property-set )
+{
+ local tag = ;
+
+ local properties = [ $(property-set).raw ] ;
+ if <runtime-link>static in $(properties) { tag += s ; }
+
+ # This is an ugly thing. In V1, there is code to automatically detect which
+ # properties affect a target. So, if <runtime-debugging> does not affect gcc
+ # toolset, the tag rules will not even see <runtime-debugging>. Similar
+ # functionality in V2 is not implemented yet, so we just check for toolsets
+ # known to care about runtime debugging.
+ if ( <toolset>msvc in $(properties) ) ||
+ ( <stdlib>stlport in $(properties) ) ||
+ ( <toolset-intel:platform>win in $(properties) )
+ {
+ if <runtime-debugging>on in $(properties) { tag += g ; }
+ }
+
+ if <python-debugging>on in $(properties) { tag += y ; }
+ if <variant>debug in $(properties) { tag += d ; }
+ if <stdlib>stlport in $(properties) { tag += p ; }
+ if <stdlib-stlport:iostream>hostios in $(properties) { tag += n ; }
+
+ return $(tag:J=) ;
+}
+
+
+# Create a tag for the Qt library version
+# "<qt>4.6.0" will result in tag "qt460"
+local rule qt-tag ( name : type ? : property-set )
+{
+ local v = [ MATCH ([0123456789]+)[.]?([0123456789]*)[.]?([0123456789]*) :
+ [ $(property-set).get <qt> ] ] ;
+ return qt$(v:J=) ;
+}
+
+
+# Create a tag for the address-model
+# <address-model>64 will simply generate "64"
+local rule address-model-tag ( name : type ? : property-set )
+{
+ return [ $(property-set).get <address-model> ] ;
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ local save-os = [ modules.peek os : .name ] ;
+
+ modules.poke os : .name : LINUX ;
+ assert.result "PATH=\"foo:bar:baz\"\nexport PATH\n"
+ : path-variable-setting-command PATH : foo bar baz ;
+ assert.result "PATH=\"foo:bar:$PATH\"\nexport PATH\n"
+ : prepend-path-variable-command PATH : foo bar ;
+
+ modules.poke os : .name : NT ;
+ assert.result "set PATH=foo;bar;baz\n"
+ : path-variable-setting-command PATH : foo bar baz ;
+ assert.result "set PATH=foo;bar;%PATH%\n"
+ : prepend-path-variable-command PATH : foo bar ;
+
+ modules.poke os : .name : $(save-os) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/como-linux.jam b/src/kenlm/jam-files/boost-build/tools/como-linux.jam
new file mode 100644
index 0000000..5c554c8
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/como-linux.jam
@@ -0,0 +1,103 @@
+# Copyright 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# The following #// line will be used by the regression test table generation
+# program as the column heading for HTML tables. Must not include a version
+# number.
+#//<a href="http://www.comeaucomputing.com/">Comeau<br>C++</a>
+
+import toolset ;
+import feature ;
+import toolset : flags ;
+import common ;
+import generators ;
+
+import unix ;
+import como ;
+
+feature.extend-subfeature toolset como : platform : linux ;
+
+toolset.inherit-generators como-linux
+ <toolset>como <toolset-como:platform>linux : unix ;
+generators.override como-linux.prebuilt : builtin.lib-generator ;
+generators.override como-linux.searched-lib-generator : searched-lib-generator ;
+toolset.inherit-flags como-linux : unix ;
+toolset.inherit-rules como-linux : gcc ;
+
+generators.register-c-compiler como-linux.compile.c++ : CPP : OBJ
+ : <toolset>como <toolset-como:platform>linux ;
+generators.register-c-compiler como-linux.compile.c : C : OBJ
+ : <toolset>como <toolset-como:platform>linux ;
+
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters como-linux
+ : version $(version) ] ;
+
+ command = [ common.get-invocation-command como-linux : como
+ : $(command) ] ;
+
+ common.handle-options como-linux : $(condition) : $(command) : $(options) ;
+}
+
+
+flags como-linux C++FLAGS <exception-handling>off : --no_exceptions ;
+flags como-linux C++FLAGS <exception-handling>on : --exceptions ;
+
+flags como-linux CFLAGS <inlining>off : --no_inlining ;
+flags como-linux CFLAGS <inlining>on <inlining>full : --inlining ;
+
+flags como-linux CFLAGS <optimization>off : -O0 ;
+flags como-linux CFLAGS <optimization>speed : -O3 ;
+flags como-linux CFLAGS <optimization>space : -Os ;
+
+flags como-linux CFLAGS <debug-symbols>on : -g ;
+flags como-linux LINKFLAGS <debug-symbols>on : -g ;
+
+flags como-linux FINDLIBS : m ;
+flags como-linux FINDLIBS : rt ;
+
+flags como-linux CFLAGS <cflags> ;
+flags como-linux C++FLAGS <cxxflags> ;
+flags como-linux DEFINES <define> ;
+flags como-linux UNDEFS <undef> ;
+flags como-linux HDRS <include> ;
+flags como-linux STDHDRS <sysinclude> ;
+flags como-linux LINKFLAGS <linkflags> ;
+flags como-linux ARFLAGS <arflags> ;
+
+flags como-linux.link LIBRARIES <library-file> ;
+flags como-linux.link LINKPATH <library-path> ;
+flags como-linux.link FINDLIBS-ST <find-static-library> ;
+flags como-linux.link FINDLIBS-SA <find-shared-library> ;
+
+flags como-linux.link RPATH <dll-path> ;
+flags como-linux.link RPATH_LINK <xdll-path> ;
+
+
+actions link bind LIBRARIES
+{
+ $(CONFIG_COMMAND) $(LINKFLAGS) -o "$(<[1])" "$(>)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" "$(LIBRARIES)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) 2>&1
+}
+
+actions link.dll bind LIBRARIES
+{
+ $(CONFIG_COMMAND) $(LINKFLAGS) -shared -o "$(<[1])" "$(>)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" "$(LIBRARIES)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) 2>&1
+}
+
+actions compile.c
+{
+ $(CONFIG_COMMAND) -c --c99 --long_long -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" 2>&1
+}
+
+actions compile.c++
+{
+ $(CONFIG_COMMAND) -tused -c --long_long -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" 2>&1
+}
+
+actions archive
+{
+ ar rcu $(<) $(>)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/como-win.jam b/src/kenlm/jam-files/boost-build/tools/como-win.jam
new file mode 100644
index 0000000..d21a70d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/como-win.jam
@@ -0,0 +1,117 @@
+# (C) Copyright David Abrahams 2001.
+# (C) Copyright MetaCommunications, Inc. 2004.
+
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# The following #// line will be used by the regression test table generation
+# program as the column heading for HTML tables. Must not include a version
+# number.
+#//<a href="http://www.comeaucomputing.com/">Comeau<br>C++</a>
+
+import common ;
+import como ;
+import feature ;
+import generators ;
+import toolset : flags ;
+
+feature.extend-subfeature toolset como : platform : win ;
+
+
+# Initializes the Comeau toolset for windows. The command is the command which
+# invokes the compiler. You should either set environment variable
+# COMO_XXX_INCLUDE where XXX is the used backend (as described in the
+# documentation), or pass that as part of command, e.g:
+#
+# using como-win : 4.3 : "set COMO_BCC_INCLUDE=C:/include &&" como.exe ;
+#
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters como-win
+ : version $(version) ] ;
+
+ command = [ common.get-invocation-command como-win : como.exe :
+ $(command) ] ;
+
+ common.handle-options como-win : $(condition) : $(command) : $(options) ;
+}
+
+generators.register-c-compiler como-win.compile.c++ : CPP : OBJ
+ : <toolset>como <toolset-como:platform>win ;
+generators.register-c-compiler como-win.compile.c : C : OBJ
+ : <toolset>como <toolset-como:platform>win ;
+
+
+generators.register-linker como-win.link
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : EXE
+ : <toolset>como <toolset-como:platform>win ;
+
+# Note that status of shared libraries support is not clear, so we do not define
+# the link.dll generator.
+generators.register-archiver como-win.archive
+ : OBJ : STATIC_LIB
+ : <toolset>como <toolset-como:platform>win ;
+
+
+flags como-win C++FLAGS <exception-handling>off : --no_exceptions ;
+flags como-win C++FLAGS <exception-handling>on : --exceptions ;
+
+flags como-win CFLAGS <inlining>off : --no_inlining ;
+flags como-win CFLAGS <inlining>on <inlining>full : --inlining ;
+
+
+# The following seems to be VC-specific options. At least, when I uncomment
+# then, Comeau with bcc as backend reports that bcc32 invocation failed.
+#
+#flags como-win CFLAGS <debug-symbols>on : /Zi ;
+#flags como-win CFLAGS <optimization>off : /Od ;
+
+
+flags como-win CFLAGS <cflags> ;
+flags como-win CFLAGS : -D_WIN32 ; # Make sure that we get the Boost Win32 platform config header.
+flags como-win CFLAGS <threading>multi : -D_MT ; # Make sure that our config knows that threading is on.
+flags como-win C++FLAGS <cxxflags> ;
+flags como-win DEFINES <define> ;
+flags como-win UNDEFS <undef> ;
+flags como-win HDRS <include> ;
+flags como-win SYSHDRS <sysinclude> ;
+flags como-win LINKFLAGS <linkflags> ;
+flags como-win ARFLAGS <arflags> ;
+flags como-win NO_WARN <no-warn> ;
+
+#flags como-win STDHDRS : $(COMO_INCLUDE_PATH) ;
+#flags como-win STDLIB_PATH : $(COMO_STDLIB_PATH)$(SLASH) ;
+
+flags como-win LIBPATH <library-path> ;
+flags como-win LIBRARIES <library-file> ;
+flags como-win FINDLIBS <find-shared-library> ;
+flags como-win FINDLIBS <find-static-library> ;
+
+nl = "
+" ;
+
+
+# For como, we repeat all libraries so that dependencies are always resolved.
+#
+actions link bind LIBRARIES
+{
+ $(CONFIG_COMMAND) --no_version --no_prelink_verbose $(LINKFLAGS) -o "$(<[1]:S=)" @"@($(<[1]:W).rsp:E=$(nl)"$(>)")" "$(LIBRARIES)" "$(FINDLIBS:S=.lib)"
+}
+
+actions compile.c
+{
+ $(CONFIG_COMMAND) -c --c99 -e5 --no_version --display_error_number --diag_suppress=9,21,161,748,940,962 -U$(UNDEFS) -D$(DEFINES) $(WARN) $(CFLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -I"$(SYSHDRS)" -o "$(<:D=)" "$(>)"
+}
+
+actions compile.c++
+{
+ $(CONFIG_COMMAND) -c -e5 --no_version --no_prelink_verbose --display_error_number --long_long --diag_suppress=9,21,161,748,940,962 --diag_error=461 -D__STL_LONG_LONG -U$(UNDEFS) -D$(DEFINES) $(WARN) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -I"$(SYSHDRS)" -o "$(<)" "$(>)"
+}
+
+actions archive
+{
+ $(CONFIG_COMMAND) --no_version --no_prelink_verbose --prelink_object @"@($(<[1]:W).rsp:E=$(nl)"$(>)")"
+ lib $(ARFLAGS) /nologo /out:"$(<:S=.lib)" @"@($(<[1]:W).rsp:E=$(nl)"$(>)")"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/como.jam b/src/kenlm/jam-files/boost-build/tools/como.jam
new file mode 100644
index 0000000..04a05a9
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/como.jam
@@ -0,0 +1,29 @@
+# Copyright Vladimir Prus 2004.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# This is a generic 'como' toolset. Depending on the current system, it
+# forwards either to 'como-linux' or 'como-win' modules.
+
+import feature ;
+import os ;
+import toolset ;
+
+feature.extend toolset : como ;
+feature.subfeature toolset como : platform : : propagated link-incompatible ;
+
+rule init ( * : * )
+{
+ if [ os.name ] = LINUX
+ {
+ toolset.using como-linux :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+ else
+ {
+ toolset.using como-win :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/convert.jam b/src/kenlm/jam-files/boost-build/tools/convert.jam
new file mode 100644
index 0000000..ac1d701
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/convert.jam
@@ -0,0 +1,62 @@
+# Copyright (c) 2009 Vladimir Prus
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Implements 'convert' target that takes a bunch of source and
+# tries to convert each one to the specified type.
+#
+# For example:
+#
+# convert objects obj : a.cpp b.cpp ;
+#
+
+import targets ;
+import generators ;
+import project ;
+import type ;
+import "class" : new ;
+
+class convert-target-class : typed-target
+{
+ rule __init__ ( name : project : type
+ : sources * : requirements * : default-build * : usage-requirements * )
+ {
+ typed-target.__init__ $(name) : $(project) : $(type)
+ : $(sources) : $(requirements) : $(default-build) : $(usage-requirements) ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ local r = [ generators.construct $(self.project) : $(self.type)
+ : [ property-set.create [ $(property-set).raw ] # [ feature.expand
+ <main-target-type>$(self.type) ]
+ # ]
+ : $(source-targets) ] ;
+ if ! $(r)
+ {
+ errors.error "unable to construct" [ full-name ] ;
+ }
+
+ return $(r) ;
+ }
+
+}
+
+rule convert ( name type : sources * : requirements * : default-build *
+ : usage-requirements * )
+{
+ local project = [ project.current ] ;
+
+ # This is a circular module dependency, so it must be imported here
+ modules.import targets ;
+ targets.main-target-alternative
+ [ new convert-target-class $(name) : $(project) : [ type.type-from-rule-name $(type) ]
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) : $(project) ]
+ ] ;
+}
+IMPORT $(__name__) : convert : : convert ;
diff --git a/src/kenlm/jam-files/boost-build/tools/cray.jam b/src/kenlm/jam-files/boost-build/tools/cray.jam
new file mode 100644
index 0000000..a37fe23
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/cray.jam
@@ -0,0 +1,115 @@
+# Copyright 2001 David Abrahams.
+# Copyright 2004, 2005 Markus Schoepflin.
+# Copyright 2011, John Maddock
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# Cray C++ Compiler
+# See http://docs.cray.com/books/S-2179-50/html-S-2179-50/S-2179-50-toc.html
+#
+
+import feature generators common ;
+import toolset : flags ;
+
+feature.extend toolset : cray ;
+
+# Inherit from Unix toolset to get library ordering magic.
+toolset.inherit cray : unix ;
+
+generators.override cray.prebuilt : builtin.lib-generator ;
+generators.override cray.prebuilt : builtin.prebuilt ;
+generators.override cray.searched-lib-generator : searched-lib-generator ;
+
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters cray : version $(version) ] ;
+
+ local command = [ common.get-invocation-command cray : CC : $(command) ] ;
+
+ if $(command)
+ {
+ local root = [ common.get-absolute-tool-path $(command[-1]) ] ;
+
+ if $(root)
+ {
+ flags cray .root $(condition) : "\"$(root)\"/" ;
+ }
+ }
+ # If we can't find 'CC' anyway, at least show 'CC' in the commands
+ command ?= CC ;
+
+ common.handle-options cray : $(condition) : $(command) : $(options) ;
+}
+
+generators.register-c-compiler cray.compile.c++ : CPP : OBJ : <toolset>cray ;
+generators.register-c-compiler cray.compile.c : C : OBJ : <toolset>cray ;
+
+
+
+# No static linking as far as I can tell.
+# flags cxx LINKFLAGS <runtime-link>static : -bstatic ;
+flags cray.compile OPTIONS <debug-symbols>on : -Gn ;
+flags cray.link OPTIONS <debug-symbols>on : -Gn ;
+
+flags cray.compile OPTIONS <optimization>off : -O0 ;
+flags cray.compile OPTIONS <optimization>speed : -O3 ;
+flags cray.compile OPTIONS <optimization>space : -O1 ;
+
+flags cray.compile OPTIONS <cflags> ;
+flags cray.compile.c++ OPTIONS <cxxflags> ;
+flags cray.compile DEFINES <define> ;
+flags cray.compile INCLUDES <include> ;
+flags cray.link OPTIONS <linkflags> ;
+
+flags cray.compile OPTIONS <link>shared : -fPIC ;
+flags cray.link OPTIONS <link>shared : -fPIC ;
+
+flags cray.link LIBPATH <library-path> ;
+flags cray.link LIBRARIES <library-file> ;
+flags cray.link FINDLIBS-ST <find-static-library> ;
+flags cray.link FINDLIBS-SA <find-shared-library> ;
+
+actions link bind LIBRARIES
+{
+ $(CONFIG_COMMAND) $(OPTIONS) -o "$(<)" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA)
+}
+
+# When creating dynamic libraries, we don't want to be warned about unresolved
+# symbols, therefore all unresolved symbols are marked as expected by
+# '-expect_unresolved *'. This also mirrors the behaviour of the GNU tool
+# chain.
+
+actions link.dll bind LIBRARIES
+{
+ $(CONFIG_COMMAND) -shared $(OPTIONS) -o "$(<[1])" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA)
+}
+
+
+# Note: Relaxed ANSI mode (-std) is used for compilation because in strict ANSI
+# C89 mode (-std1) the compiler doesn't accept C++ comments in C files. As -std
+# is the default, no special flag is needed.
+actions compile.c
+{
+ $(.root:E=)cc -c $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)"
+}
+
+# Note: The compiler is forced to compile the files as C++ (-x cxx) because
+# otherwise it will silently ignore files with no file extension.
+#
+# Note: We deliberately don't suppress any warnings on the compiler command
+# line, the user can always do this in a customized toolset later on.
+
+actions compile.c++
+{
+ $(CONFIG_COMMAND) -c -h gnu $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)"
+}
+
+# Always create archive from scratch. See the gcc toolet for rationale.
+RM = [ common.rm-command ] ;
+actions together piecemeal archive
+{
+ $(RM) "$(<)"
+ ar rc $(<) $(>)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/cw-config.jam b/src/kenlm/jam-files/boost-build/tools/cw-config.jam
new file mode 100644
index 0000000..1211b7c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/cw-config.jam
@@ -0,0 +1,34 @@
+#~ Copyright 2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for CodeWarrior toolset. To use, just import this module.
+
+import os ;
+import toolset : using ;
+
+if [ os.name ] = NT
+{
+ for local R in 9 8 7
+ {
+ local cw-path = [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior\\Product Versions\\CodeWarrior for Windows R$(R)"
+ : "PATH" ] ;
+ local cw-version = [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior\\Product Versions\\CodeWarrior for Windows R$(R)"
+ : "VERSION" ] ;
+ cw-path ?= [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior for Windows\\$(R).0"
+ : "PATH" ] ;
+ cw-version ?= $(R).0 ;
+
+ if $(cw-path)
+ {
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO "notice:" using cw ":" $(cw-version) ":" "$(cw-path)\\Other Metrowerks Tools\\Command Line Tools\\mwcc.exe" ;
+ }
+ using cw : $(cw-version) : "$(cw-path)\\Other Metrowerks Tools\\Command Line Tools\\mwcc.exe" ;
+ }
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/cw.jam b/src/kenlm/jam-files/boost-build/tools/cw.jam
new file mode 100644
index 0000000..ddcbfeb
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/cw.jam
@@ -0,0 +1,246 @@
+# Copyright (C) Reece H Dunn 2004
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# based on the msvc.jam toolset
+
+import property ;
+import generators ;
+import os ;
+import type ;
+import toolset : flags ;
+import errors : error ;
+import feature : feature get-values ;
+import path ;
+import sequence : unique ;
+import common ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+feature.extend toolset : cw ;
+
+toolset.add-requirements <toolset>cw,<runtime-link>shared:<threading>multi ;
+
+nl = "
+" ;
+
+rule init ( version ? : command * : options * )
+{
+ # TODO: fix the $(command[1]) = $(compiler) issue
+
+ setup = [ get-values <setup> : $(options) ] ;
+ setup ?= cwenv.bat ;
+ compiler = [ get-values <compiler> : $(options) ] ;
+ compiler ?= mwcc ;
+ linker = [ get-values <linker> : $(options) ] ;
+ linker ?= mwld ;
+
+ local condition = [ common.check-init-parameters cw :
+ version $(version) ] ;
+
+ command = [ common.get-invocation-command cw : mwcc.exe : $(command) :
+ [ default-paths $(version) ] ] ;
+
+ common.handle-options cw : $(condition) : $(command) : $(options) ;
+
+ local root = [ feature.get-values <root> : $(options) ] ;
+ if $(command)
+ {
+ command = [ common.get-absolute-tool-path $(command[-1]) ] ;
+ }
+ local tool-root = $(command) ;
+
+ setup = $(tool-root)\\$(setup) ;
+
+ # map the batch file in setup so it can be executed
+
+ other-tools = $(tool-root:D) ;
+ root ?= $(other-tools:D) ;
+
+ flags cw.link RUN_PATH $(condition) :
+ "$(root)\\Win32-x86 Support\\Libraries\\Runtime"
+ "$(root)\\Win32-x86 Support\\Libraries\\Runtime\\Libs\\MSL_All-DLLs" ;
+
+ setup = "set \"CWFOLDER="$(root)"\" && call \""$(setup)"\" > nul " ;
+
+ if [ os.name ] = NT
+ {
+ setup = $(setup)"
+" ;
+ }
+ else
+ {
+ setup = "cmd /S /C "$(setup)" \"&&\" " ;
+ }
+
+ # bind the setup command to the tool so it can be executed before the
+ # command
+
+ local prefix = $(setup) ;
+
+ flags cw.compile .CC $(condition) : $(prefix)$(compiler) ;
+ flags cw.link .LD $(condition) : $(prefix)$(linker) ;
+ flags cw.archive .LD $(condition) : $(prefix)$(linker) ;
+
+ if [ MATCH ^([89]\\.) : $(version) ]
+ {
+ if [ os.name ] = NT
+ {
+ # The runtime libraries
+ flags cw.compile CFLAGS <runtime-link>static/<threading>single/<runtime-debugging>off : -runtime ss ;
+ flags cw.compile CFLAGS <runtime-link>static/<threading>single/<runtime-debugging>on : -runtime ssd ;
+
+ flags cw.compile CFLAGS <runtime-link>static/<threading>multi/<runtime-debugging>off : -runtime sm ;
+ flags cw.compile CFLAGS <runtime-link>static/<threading>multi/<runtime-debugging>on : -runtime smd ;
+
+ flags cw.compile CFLAGS <runtime-link>shared/<runtime-debugging>off : -runtime dm ;
+ flags cw.compile CFLAGS <runtime-link>shared/<runtime-debugging>on : -runtime dmd ;
+ }
+ }
+}
+
+
+local rule default-paths ( version ? ) # FIXME
+{
+ local possible-paths ;
+ local ProgramFiles = [ common.get-program-files-dir ] ;
+
+ # TODO: add support for cw8 and cw9 detection
+
+ local version-6-path = $(ProgramFiles)"\\Metrowerks\\CodeWarrior" ;
+ possible-paths += $(version-6-path) ;
+
+ # perform post-processing
+
+ possible-paths
+ = $(possible-paths)"\\Other Metrowerks Tools\\Command Line Tools" ;
+
+ possible-paths += [ modules.peek : PATH Path path ] ;
+
+ return $(possible-paths) ;
+}
+
+
+
+
+## declare generators
+
+generators.register-c-compiler cw.compile.c++ : CPP : OBJ : <toolset>cw ;
+generators.register-c-compiler cw.compile.c : C : OBJ : <toolset>cw ;
+
+generators.register-linker cw.link
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : EXE
+ : <toolset>cw
+ ;
+generators.register-linker cw.link.dll
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : SHARED_LIB IMPORT_LIB
+ : <toolset>cw
+ ;
+
+generators.register-archiver cw.archive
+ : OBJ
+ : STATIC_LIB
+ : <toolset>cw
+ ;
+
+## compilation phase
+
+flags cw WHATEVER <toolset-cw:version> ;
+
+flags cw.compile CFLAGS <debug-symbols>on : -g ;
+flags cw.compile CFLAGS <optimization>off : -O0 ;
+flags cw.compile CFLAGS <optimization>speed : -O4,p ;
+flags cw.compile CFLAGS <optimization>space : -O4,s ;
+flags cw.compile CFLAGS <inlining>off : -inline off ;
+flags cw.compile CFLAGS <inlining>on : -inline on ;
+flags cw.compile CFLAGS <inlining>full : -inline all ;
+flags cw.compile CFLAGS <exception-handling>off : -Cpp_exceptions off ;
+
+
+flags cw.compile CFLAGS <rtti>on : -RTTI on ;
+flags cw.compile CFLAGS <rtti>off : -RTTI off ;
+
+flags cw.compile CFLAGS <warnings>on : -w on ;
+flags cw.compile CFLAGS <warnings>off : -w off ;
+flags cw.compile CFLAGS <warnings>all : -w all ;
+flags cw.compile CFLAGS <warnings-as-errors>on : -w error ;
+
+flags cw.compile USER_CFLAGS <cflags> : ;
+flags cw.compile.c++ USER_CFLAGS <cxxflags> : ;
+
+flags cw.compile DEFINES <define> ;
+flags cw.compile UNDEFS <undef> ;
+flags cw.compile INCLUDES <include> ;
+
+actions compile.c
+{
+ $(.CC) -c -cwd include -lang c -U$(UNDEFS) $(CFLAGS) $(USER_CFLAGS) -I- -o "$(<)" @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)-D$(DEFINES) $(nl)"-I$(INCLUDES)")"
+}
+actions compile.c++
+{
+ $(.CC) -c -cwd include -lang c++ -U$(UNDEFS) $(CFLAGS) $(USER_CFLAGS) -I- -o "$(<)" @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)-D$(DEFINES) $(nl)"-I$(INCLUDES)")"
+}
+
+## linking phase
+
+flags cw.link DEF_FILE <def-file> ;
+
+flags cw LINKFLAGS : -search ;
+flags cw LINKFLAGS <debug-symbols>on : -g ;
+flags cw LINKFLAGS <user-interface>console : -subsystem console ;
+flags cw LINKFLAGS <user-interface>gui : -subsystem windows ;
+flags cw LINKFLAGS <user-interface>wince : -subsystem wince ;
+flags cw LINKFLAGS <user-interface>native : -subsystem native ;
+flags cw LINKFLAGS <user-interface>auto : -subsystem auto ;
+
+flags cw LINKFLAGS <main-target-type>LIB/<link>static : -library ;
+
+flags cw.link USER_LINKFLAGS <linkflags> ;
+flags cw.link LINKPATH <library-path> ;
+
+flags cw.link FINDLIBS_ST <find-static-library> ;
+flags cw.link FINDLIBS_SA <find-shared-library> ;
+flags cw.link LIBRARY_OPTION <toolset>cw : "" : unchecked ;
+flags cw.link LIBRARIES_MENTIONED_BY_FILE : <library-file> ;
+
+rule link.dll ( targets + : sources * : properties * )
+{
+ DEPENDS $(<) : [ on $(<) return $(DEF_FILE) ] ;
+}
+
+if [ os.name ] in NT
+{
+ actions archive
+ {
+ if exist "$(<[1])" DEL "$(<[1])"
+ $(.LD) -library -o "$(<[1])" @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)$(LIBRARIES_MENTIONED_BY_FILE) $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST:S=.lib)" $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA:S=.lib)")"
+ }
+}
+else # cygwin
+{
+ actions archive
+ {
+ _bbv2_out_="$(<)"
+ if test -f "$_bbv2_out_" ; then
+ _bbv2_existing_="$(<:W)"
+ fi
+ $(.LD) -library -o "$(<:W)" $_bbv2_existing_ @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)$(LIBRARIES_MENTIONED_BY_FILE) $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST:S=.lib)" $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA:S=.lib)")"
+ }
+}
+
+actions link bind DEF_FILE
+{
+ $(.LD) -o "$(<[1]:W)" -L"$(LINKPATH)" $(LINKFLAGS) $(USER_LINKFLAGS) @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)$(LIBRARIES_MENTIONED_BY_FILE) $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST:S=.lib)" $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA:S=.lib)")"
+}
+
+actions link.dll bind DEF_FILE
+{
+ $(.LD) -shared -o "$(<[1]:W)" -implib "$(<[2]:W)" -L"$(LINKPATH)" $(LINKFLAGS) -f"$(DEF_FILE)" $(USER_LINKFLAGS) @"@($(<[1]:W).rsp:E=$(nl)"$(>)" $(nl)$(LIBRARIES_MENTIONED_BY_FILE) $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST:S=.lib)" $(nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA:S=.lib)")"
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/darwin.jam b/src/kenlm/jam-files/boost-build/tools/darwin.jam
new file mode 100644
index 0000000..bd77ff7
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/darwin.jam
@@ -0,0 +1,592 @@
+# Copyright 2003 Christopher Currie
+# Copyright 2006 Dave Abrahams
+# Copyright 2003, 2004, 2005, 2006 Vladimir Prus
+# Copyright 2005-2007 Mat Marcus
+# Copyright 2005-2007 Adobe Systems Incorporated
+# Copyright 2007-2010 Rene Rivera
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Please see http://article.gmane.org/gmane.comp.lib.boost.build/3389/
+# for explanation why it's a separate toolset.
+
+import feature : feature ;
+import toolset : flags ;
+import type ;
+import common ;
+import generators ;
+import path : basename ;
+import version ;
+import property-set ;
+import regex ;
+import errors ;
+
+## Use a framework.
+feature framework : : free ;
+
+## The MacOSX version to compile for, which maps to the SDK to use (sysroot).
+feature macosx-version : : propagated link-incompatible symmetric optional ;
+
+## The minimal MacOSX version to target.
+feature macosx-version-min : : propagated optional ;
+
+## A dependency, that is forced to be included in the link.
+feature force-load : : free dependency incidental ;
+
+#############################################################################
+
+_ = " " ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+feature.extend toolset : darwin ;
+import gcc ;
+toolset.inherit-generators darwin : gcc : gcc.mingw.link gcc.mingw.link.dll ;
+
+generators.override darwin.prebuilt : builtin.prebuilt ;
+generators.override darwin.searched-lib-generator : searched-lib-generator ;
+
+# Override default do-nothing generators.
+generators.override darwin.compile.c.pch : pch.default-c-pch-generator ;
+generators.override darwin.compile.c++.pch : pch.default-cpp-pch-generator ;
+
+type.set-generated-target-suffix PCH : <toolset>darwin : gch ;
+
+toolset.inherit-rules darwin : gcc : localize ;
+toolset.inherit-flags darwin : gcc
+ : <runtime-link>static
+ <architecture>arm/<address-model>32
+ <architecture>arm/<address-model>64
+ <architecture>arm/<instruction-set>
+ <architecture>x86/<address-model>32
+ <architecture>x86/<address-model>64
+ <architecture>x86/<instruction-set>
+ <architecture>power/<address-model>32
+ <architecture>power/<address-model>64
+ <architecture>power/<instruction-set> ;
+
+# Options:
+#
+# <root>PATH
+# Platform root path. The common autodetection will set this to
+# "/Developer". And when a command is given it will be set to
+# the corresponding "*.platform/Developer" directory.
+#
+rule init ( version ? : command * : options * : requirement * )
+{
+ # First time around, figure what is host OSX version
+ if ! $(.host-osx-version)
+ {
+ .host-osx-version = [ MATCH "^([0-9.]+)"
+ : [ SHELL "/usr/bin/sw_vers -productVersion" ] ] ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: OSX version on this machine is $(.host-osx-version) ;
+ }
+ }
+
+ # - The root directory of the tool install.
+ local root = [ feature.get-values <root> : $(options) ] ;
+
+ # - The bin directory where to find the commands to execute.
+ local bin ;
+
+ # - The configured compile driver command.
+ local command = [ common.get-invocation-command darwin : g++ : $(command) ] ;
+
+ # The version as reported by the compiler
+ local real-version ;
+
+ # - Autodetect the root and bin dir if not given.
+ if $(command)
+ {
+ bin ?= [ common.get-absolute-tool-path $(command[1]) ] ;
+ if $(bin) = "/usr/bin"
+ {
+ root ?= /Developer ;
+ }
+ else
+ {
+ local r = $(bin:D) ;
+ r = $(r:D) ;
+ root ?= $(r) ;
+ }
+ }
+
+ # - Autodetect the version if not given.
+ if $(command)
+ {
+ # - The 'command' variable can have multiple elements. When calling
+ # the SHELL builtin we need a single string.
+ local command-string = $(command:J=" ") ;
+ real-version = [ MATCH "^([0-9.]+)"
+ : [ SHELL "$(command-string) -dumpversion" ] ] ;
+ version ?= $(real-version) ;
+ }
+
+ .real-version.$(version) = $(real-version) ;
+
+ # - Define the condition for this toolset instance.
+ local condition =
+ [ common.check-init-parameters darwin $(requirement) : version $(version) ] ;
+
+ # - Set the toolset generic common options.
+ common.handle-options darwin : $(condition) : $(command) : $(options) ;
+
+ # - GCC 4.0 and higher in Darwin does not have -fcoalesce-templates.
+ if $(real-version) < "4.0.0"
+ {
+ flags darwin.compile.c++ OPTIONS $(condition) : -fcoalesce-templates ;
+ }
+ # - GCC 4.2 and higher in Darwin does not have -Wno-long-double.
+ if $(real-version) < "4.2.0"
+ {
+ flags darwin.compile OPTIONS $(condition) : -Wno-long-double ;
+ }
+
+ # - Set the link flags common with the GCC toolset.
+ gcc.init-link-flags darwin darwin $(condition) ;
+
+ # - The symbol strip program.
+ local strip ;
+ if <striper> in $(options)
+ {
+ # We can turn off strip by specifying it as empty. In which
+ # case we switch to using the linker to do the strip.
+ flags darwin.link.dll OPTIONS
+ $(condition)/<main-target-type>LIB/<link>shared/<address-model>32/<strip>on : -Wl,-x ;
+ flags darwin.link.dll OPTIONS
+ $(condition)/<main-target-type>LIB/<link>shared/<address-model>/<strip>on : -Wl,-x ;
+ flags darwin.link OPTIONS
+ $(condition)/<main-target-type>EXE/<address-model>32/<strip>on : -s ;
+ flags darwin.link OPTIONS
+ $(condition)/<main-target-type>EXE/<address-model>/<strip>on : -s ;
+ }
+ else
+ {
+ # Otherwise we need to find a strip program to use. And hence
+ # also tell the link action that we need to use a strip
+ # post-process.
+ flags darwin.link NEED_STRIP $(condition)/<strip>on : "" ;
+ strip =
+ [ common.get-invocation-command darwin
+ : strip : [ feature.get-values <striper> : $(options) ] : $(bin) : search-path ] ;
+ flags darwin.link .STRIP $(condition) : $(strip[1]) ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using strip for $(condition) at $(strip[1]) ;
+ }
+ }
+
+ # - The archive builder (libtool is the default as creating
+ # archives in darwin is complicated.
+ local archiver =
+ [ common.get-invocation-command darwin
+ : libtool : [ feature.get-values <archiver> : $(options) ] : $(bin) : search-path ] ;
+ flags darwin.archive .LIBTOOL $(condition) : $(archiver[1]) ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using archiver for $(condition) at $(archiver[1]) ;
+ }
+
+ # - Initialize the SDKs available in the root for this tool.
+ local sdks = [ init-available-sdk-versions $(condition) : $(root) ] ;
+
+ #~ ECHO --- ;
+ #~ ECHO --- bin :: $(bin) ;
+ #~ ECHO --- root :: $(root) ;
+ #~ ECHO --- version :: $(version) ;
+ #~ ECHO --- condition :: $(condition) ;
+ #~ ECHO --- strip :: $(strip) ;
+ #~ ECHO --- archiver :: $(archiver) ;
+ #~ ECHO --- sdks :: $(sdks) ;
+ #~ ECHO --- ;
+ #~ EXIT ;
+}
+
+# Add and set options for a discovered SDK version.
+local rule init-sdk ( condition * : root ? : version + : version-feature ? )
+{
+ local rule version-to-feature ( version + )
+ {
+ switch $(version[1])
+ {
+ case iphone* :
+ {
+ return $(version[1])-$(version[2-]:J=.) ;
+ }
+ case mac* :
+ {
+ return $(version[2-]:J=.) ;
+ }
+ case * :
+ {
+ return $(version:J=.) ;
+ }
+ }
+ }
+
+ # leave compiler flags etc. up to the toolchain
+ return $(version-feature) ;
+
+ if $(version-feature)
+ {
+ if $(.debug-configuration)
+ {
+ ECHO notice: available sdk for $(condition)/<macosx-version>$(version-feature) at $(root) ;
+ }
+
+ # Add the version to the features for specifying them.
+ if ! $(version-feature) in [ feature.values macosx-version ]
+ {
+ feature.extend macosx-version : $(version-feature) ;
+ }
+ if ! $(version-feature) in [ feature.values macosx-version-min ]
+ {
+ feature.extend macosx-version-min : $(version-feature) ;
+ }
+
+ # Set the flags the version needs to compile with, first
+ # generic options.
+ flags darwin.compile OPTIONS $(condition)/<macosx-version>$(version-feature)
+ : -isysroot $(root) ;
+ flags darwin.link OPTIONS $(condition)/<macosx-version>$(version-feature)
+ : -isysroot $(root) ;
+
+ # Then device variation options.
+ switch $(version[1])
+ {
+ case iphonesim* :
+ {
+ local N = $(version[2]) ;
+ if ! $(version[3]) { N += 00 ; }
+ else if [ regex.match (..) : $(version[3]) ] { N += $(version[3]) ; }
+ else { N += 0$(version[3]) ; }
+ if ! $(version[4]) { N += 00 ; }
+ else if [ regex.match (..) : $(version[4]) ] { N += $(version[4]) ; }
+ else { N += 0$(version[4]) ; }
+ N = $(N:J=) ;
+ flags darwin.compile OPTIONS <macosx-version-min>$(version-feature)
+ : -D__IPHONE_OS_VERSION_MIN_REQUIRED=$(N) ;
+ flags darwin.link OPTIONS <macosx-version-min>$(version-feature)
+ : -D__IPHONE_OS_VERSION_MIN_REQUIRED=$(N) ;
+ }
+
+ case iphone* :
+ {
+ flags darwin.compile OPTIONS <macosx-version-min>$(version-feature)
+ : -miphoneos-version-min=$(version[2-]:J=.) ;
+ flags darwin.link OPTIONS <macosx-version-min>$(version-feature)
+ : -miphoneos-version-min=$(version[2-]:J=.) ;
+ }
+
+ case mac* :
+ {
+ flags darwin.compile OPTIONS <macosx-version-min>$(version-feature)
+ : -mmacosx-version-min=$(version[2-]:J=.) ;
+ flags darwin.link OPTIONS <macosx-version-min>$(version-feature)
+ : -mmacosx-version-min=$(version[2-]:J=.) ;
+ }
+ }
+
+ if $(version[3]) > 0
+ {
+ # We have a minor version of an SDK. We want to set up
+ # previous minor versions, plus the current minor version.
+ # So we recurse to set up the previous minor versions, up to
+ # the current version.
+ local minor-minus-1 = [ CALC $(version[3]) - 1 ] ;
+ return
+ [ init-sdk $(condition) : $(root)
+ : $(version[1-2]) $(minor-minus-1) : [ version-to-feature $(version[1-2]) $(minor-minus-1) ] ]
+ $(version-feature) ;
+ }
+ else
+ {
+ return $(version-feature) ;
+ }
+ }
+ else if $(version[4])
+ {
+ # We have a patch version of an SDK. We want to set up
+ # both the specific patch version, and the minor version.
+ # So we recurse to set up the patch version. Plus the minor version.
+ return
+ [ init-sdk $(condition) : $(root)
+ : $(version[1-3]) : [ version-to-feature $(version[1-3]) ] ]
+ [ init-sdk $(condition) : $(root)
+ : $(version) : [ version-to-feature $(version) ] ] ;
+ }
+ else
+ {
+ # Yes, this is intentionally recursive.
+ return
+ [ init-sdk $(condition) : $(root)
+ : $(version) : [ version-to-feature $(version) ] ] ;
+ }
+}
+
+# Determine the MacOSX SDK versions installed and their locations.
+local rule init-available-sdk-versions ( condition * : root ? )
+{
+ root ?= /Developer ;
+ local sdks-root = $(root)/SDKs ;
+ local sdks = [ GLOB $(sdks-root) : MacOSX*.sdk iPhoneOS*.sdk iPhoneSimulator*.sdk ] ;
+ local result ;
+ for local sdk in $(sdks)
+ {
+ local sdk-match = [ MATCH ([^0-9]+)([0-9]+)[.]([0-9x]+)[.]?([0-9x]+)? : $(sdk:D=) ] ;
+ local sdk-platform = $(sdk-match[1]:L) ;
+ local sdk-version = $(sdk-match[2-]) ;
+ if $(sdk-version)
+ {
+ switch $(sdk-platform)
+ {
+ case macosx :
+ {
+ sdk-version = mac $(sdk-version) ;
+ }
+ case iphoneos :
+ {
+ sdk-version = iphone $(sdk-version) ;
+ }
+ case iphonesimulator :
+ {
+ sdk-version = iphonesim $(sdk-version) ;
+ }
+ case * :
+ {
+ sdk-version = $(sdk-version:J=-) ;
+ }
+ }
+ result += [ init-sdk $(condition) : $(sdk) : $(sdk-version) ] ;
+ }
+ }
+ return $(result) ;
+}
+
+# Generic options.
+flags darwin.compile OPTIONS <flags> ;
+
+# The following adds objective-c support to darwin.
+# Thanks to http://thread.gmane.org/gmane.comp.lib.boost.build/13759
+
+generators.register-c-compiler darwin.compile.m : OBJECTIVE_C : OBJ : <toolset>darwin ;
+generators.register-c-compiler darwin.compile.mm : OBJECTIVE_CPP : OBJ : <toolset>darwin ;
+
+rule setup-address-model ( targets * : sources * : properties * )
+{
+ local ps = [ property-set.create $(properties) ] ;
+ local arch = [ $(ps).get <architecture> ] ;
+ local instruction-set = [ $(ps).get <instruction-set> ] ;
+ local address-model = [ $(ps).get <address-model> ] ;
+ local osx-version = [ $(ps).get <macosx-version> ] ;
+ local gcc-version = [ $(ps).get <toolset-darwin:version> ] ;
+ gcc-version = $(.real-version.$(gcc-version)) ;
+ local options ;
+
+ local support-ppc64 = 1 ;
+
+ osx-version ?= $(.host-osx-version) ;
+
+ switch $(osx-version)
+ {
+ case iphone* :
+ {
+ support-ppc64 = ;
+ }
+
+ case * :
+ if $(osx-version) && ! [ version.version-less [ regex.split $(osx-version) \\. ] : 10 6 ]
+ {
+ # When targeting 10.6:
+ # - gcc 4.2 will give a compiler errir if ppc64 compilation is requested
+ # - gcc 4.0 will compile fine, somehow, but then fail at link time
+ support-ppc64 = ;
+ }
+ }
+ # Gentoo Prefix toolchain doesn't do multi-arch, so don't try either
+ switch $(donotaddarchpleaseXXXarch)
+ {
+ case combined :
+ {
+ if $(address-model) = 32_64 {
+ if $(support-ppc64) {
+ options = -arch i386 -arch ppc -arch x86_64 -arch ppc64 ;
+ } else {
+ # Build 3-way binary
+ options = -arch i386 -arch ppc -arch x86_64 ;
+ }
+ } else if $(address-model) = 64 {
+ if $(support-ppc64) {
+ options = -arch x86_64 -arch ppc64 ;
+ } else {
+ errors.user-error "64-bit PPC compilation is not supported when targeting OSX 10.6 or later" ;
+ }
+ } else {
+ options = -arch i386 -arch ppc ;
+ }
+ }
+
+ case x86 :
+ {
+ if $(address-model) = 32_64 {
+ options = -arch i386 -arch x86_64 ;
+ } else if $(address-model) = 64 {
+ options = -arch x86_64 ;
+ } else {
+ options = -arch i386 ;
+ }
+ }
+
+ case power :
+ {
+ if ! $(support-ppc64)
+ && ( $(address-model) = 32_64 || $(address-model) = 64 )
+ {
+ errors.user-error "64-bit PPC compilation is not supported when targeting OSX 10.6 or later" ;
+ }
+
+ if $(address-model) = 32_64 {
+ options = -arch ppc -arch ppc64 ;
+ } else if $(address-model) = 64 {
+ options = -arch ppc64 ;
+ } else {
+ options = -arch ppc ;
+ }
+ }
+
+ case arm :
+ {
+ if $(instruction-set) {
+ options = -arch$(_)$(instruction-set) ;
+ } else {
+ options = -arch arm ;
+ }
+ }
+ }
+
+ if $(options)
+ {
+ OPTIONS on $(targets) += $(options) ;
+ }
+}
+
+rule setup-threading ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+}
+
+rule setup-fpic ( targets * : sources * : properties * )
+{
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+}
+
+rule compile.m ( targets * : sources * : properties * )
+{
+ LANG on $(<) = "-x objective-c" ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.m
+{
+ "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.mm ( targets * : sources * : properties * )
+{
+ LANG on $(<) = "-x objective-c++" ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.mm
+{
+ "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+# Set the max header padding to allow renaming of libs for installation.
+flags darwin.link.dll OPTIONS : -headerpad_max_install_names ;
+
+# To link the static runtime we need to link to all the core runtime libraries.
+flags darwin.link OPTIONS <runtime-link>static
+ : -nodefaultlibs -shared-libgcc -lstdc++-static -lgcc_eh -lgcc -lSystem ;
+
+# Strip as much as possible when optimizing.
+flags darwin.link OPTIONS <optimization>speed : -Wl,-dead_strip -no_dead_strip_inits_and_terms ;
+flags darwin.link OPTIONS <optimization>space : -Wl,-dead_strip -no_dead_strip_inits_and_terms ;
+
+# Dynamic/shared linking.
+flags darwin.compile OPTIONS <link>shared : -dynamic ;
+
+# Misc options.
+flags darwin.compile OPTIONS : -gdwarf-2 -fexceptions ;
+#~ flags darwin.link OPTIONS : -fexceptions ;
+
+# Add the framework names to use.
+flags darwin.link FRAMEWORK <framework> ;
+
+#
+flags darwin.link FORCE_LOAD <force-load> ;
+
+# This is flag is useful for debugging the link step
+# uncomment to see what libtool is doing under the hood
+#~ flags darwin.link.dll OPTIONS : -Wl,-v ;
+
+# set up the -F option to include the paths to any frameworks used.
+local rule prepare-framework-path ( target + )
+{
+ # The -framework option only takes basename of the framework.
+ # The -F option specifies the directories where a framework
+ # is searched for. So, if we find <framework> feature
+ # with some path, we need to generate property -F option.
+ local framework-paths = [ on $(target) return $(FRAMEWORK:D) ] ;
+
+ # Be sure to generate no -F if there's no path.
+ for local framework-path in $(framework-paths)
+ {
+ if $(framework-path) != ""
+ {
+ FRAMEWORK_PATH on $(target) += -F$(framework-path) ;
+ }
+ }
+}
+
+rule link ( targets * : sources * : properties * )
+{
+ DEPENDS $(targets) : [ on $(targets) return $(FORCE_LOAD) ] ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+ prepare-framework-path $(<) ;
+}
+
+# Note that using strip without any options was reported to result in broken
+# binaries, at least on OS X 10.5.5, see:
+# http://svn.boost.org/trac/boost/ticket/2347
+# So we pass -S -x.
+actions link bind LIBRARIES FORCE_LOAD
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -o "$(<)" "$(>)" -Wl,-force_load$(_)"$(FORCE_LOAD)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(FRAMEWORK_PATH) -framework$(_)$(FRAMEWORK:D=:S=) $(OPTIONS) $(USER_OPTIONS)
+ $(NEED_STRIP)"$(.STRIP)" $(NEED_STRIP)-S $(NEED_STRIP)-x $(NEED_STRIP)"$(<)"
+}
+
+rule link.dll ( targets * : sources * : properties * )
+{
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+ prepare-framework-path $(<) ;
+}
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -dynamiclib -Wl,-single_module -install_name "$(<:B)$(<:S)" -L"$(LINKPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(FRAMEWORK_PATH) -framework$(_)$(FRAMEWORK:D=:S=) $(OPTIONS) $(USER_OPTIONS)
+}
+
+# We use libtool instead of ar to support universal binary linking
+# TODO: Find a way to use the underlying tools, i.e. lipo, to do this.
+actions piecemeal archive
+{
+ "$(.LIBTOOL)" -static -o "$(<:T)" $(ARFLAGS) "$(>:T)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/dmc.jam b/src/kenlm/jam-files/boost-build/tools/dmc.jam
new file mode 100644
index 0000000..8af8725
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/dmc.jam
@@ -0,0 +1,134 @@
+# Digital Mars C++
+
+# (C) Copyright Christof Meerwald 2003.
+# (C) Copyright Aleksey Gurtovoy 2004.
+# (C) Copyright Arjan Knepper 2006.
+#
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# The following #// line will be used by the regression test table generation
+# program as the column heading for HTML tables. Must not include version number.
+#//<a href="http://www.digitalmars.com/">Digital<br>Mars C++</a>
+
+import feature generators common ;
+import toolset : flags ;
+import sequence regex ;
+
+feature.extend toolset : dmc ;
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters dmc : version $(version) ] ;
+
+ local command = [ common.get-invocation-command dmc : dmc : $(command) ] ;
+ command ?= dmc ;
+
+ common.handle-options dmc : $(condition) : $(command) : $(options) ;
+
+ if $(command)
+ {
+ command = [ common.get-absolute-tool-path $(command[-1]) ] ;
+ }
+ root = $(command:D) ;
+
+ if $(root)
+ {
+ # DMC linker is sensitive the the direction of slashes, and
+ # won't link if forward slashes are used in command.
+ root = [ sequence.join [ regex.split $(root) "/" ] : "\\" ] ;
+ flags dmc .root $(condition) : $(root)\\bin\\ ;
+ }
+ else
+ {
+ flags dmc .root $(condition) : "" ;
+ }
+}
+
+
+# Declare generators
+generators.register-linker dmc.link : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : EXE : <toolset>dmc ;
+generators.register-linker dmc.link.dll : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : SHARED_LIB IMPORT_LIB : <toolset>dmc ;
+
+generators.register-archiver dmc.archive : OBJ : STATIC_LIB : <toolset>dmc ;
+generators.register-c-compiler dmc.compile.c++ : CPP : OBJ : <toolset>dmc ;
+generators.register-c-compiler dmc.compile.c : C : OBJ : <toolset>dmc ;
+
+
+# Declare flags
+# dmc optlink has some limitation on the amount of debug-info included. Therefore only linenumbers are enabled in debug builds.
+# flags dmc.compile OPTIONS <debug-symbols>on : -g ;
+flags dmc.compile OPTIONS <debug-symbols>on : -gl ;
+flags dmc.link OPTIONS <debug-symbols>on : /CO /NOPACKF /DEBUGLI ;
+flags dmc.link OPTIONS <debug-symbols>off : /PACKF ;
+
+flags dmc.compile OPTIONS <optimization>off : -S -o+none ;
+flags dmc.compile OPTIONS <optimization>speed : -o+time ;
+flags dmc.compile OPTIONS <optimization>space : -o+space ;
+flags dmc.compile OPTIONS <exception-handling>on : -Ae ;
+flags dmc.compile OPTIONS <rtti>on : -Ar ;
+# FIXME:
+# Compiling sources to be linked into a shared lib (dll) the -WD cflag should be used
+# Compiling sources to be linked into a static lib (lib) or executable the -WA cflag should be used
+# But for some reason the -WD cflag is always in use.
+# flags dmc.compile OPTIONS <link>shared : -WD ;
+# flags dmc.compile OPTIONS <link>static : -WA ;
+
+# Note that these two options actually imply multithreading support on DMC
+# because there is no single-threaded dynamic runtime library. Specifying
+# <threading>multi would be a bad idea, though, because no option would be
+# matched when the build uses the default settings of <runtime-link>dynamic
+# and <threading>single.
+flags dmc.compile OPTIONS <runtime-debugging>off/<runtime-link>shared : -ND ;
+flags dmc.compile OPTIONS <runtime-debugging>on/<runtime-link>shared : -ND ;
+
+flags dmc.compile OPTIONS <runtime-debugging>off/<runtime-link>static/<threading>single : ;
+flags dmc.compile OPTIONS <runtime-debugging>on/<runtime-link>static/<threading>single : ;
+flags dmc.compile OPTIONS <runtime-debugging>off/<runtime-link>static/<threading>multi : -D_MT ;
+flags dmc.compile OPTIONS <runtime-debugging>on/<runtime-link>static/<threading>multi : -D_MT ;
+
+flags dmc.compile OPTIONS : <cflags> ;
+flags dmc.compile.c++ OPTIONS : <cxxflags> ;
+
+flags dmc.compile DEFINES : <define> ;
+flags dmc.compile INCLUDES : <include> ;
+
+flags dmc.link <linkflags> ;
+flags dmc.archive OPTIONS <arflags> ;
+
+flags dmc LIBPATH <library-path> ;
+flags dmc LIBRARIES <library-file> ;
+flags dmc FINDLIBS <find-library-sa> ;
+flags dmc FINDLIBS <find-library-st> ;
+
+actions together link bind LIBRARIES
+{
+ "$(.root)link" $(OPTIONS) /NOI /DE /XN "$(>)" , "$(<[1])" ,, $(LIBRARIES) user32.lib kernel32.lib "$(FINDLIBS:S=.lib)" , "$(<[2]:B).def"
+}
+
+actions together link.dll bind LIBRARIES
+{
+ echo LIBRARY "$(<[1])" > $(<[2]:B).def
+ echo DESCRIPTION 'A Library' >> $(<[2]:B).def
+ echo EXETYPE NT >> $(<[2]:B).def
+ echo SUBSYSTEM WINDOWS >> $(<[2]:B).def
+ echo CODE EXECUTE READ >> $(<[2]:B).def
+ echo DATA READ WRITE >> $(<[2]:B).def
+ "$(.root)link" $(OPTIONS) /NOI /DE /XN /ENTRY:_DllMainCRTStartup /IMPLIB:"$(<[2])" "$(>)" $(LIBRARIES) , "$(<[1])" ,, user32.lib kernel32.lib "$(FINDLIBS:S=.lib)" , "$(<[2]:B).def"
+}
+
+actions compile.c
+{
+ "$(.root)dmc" -c $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o"$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(.root)dmc" -cpp -c -Ab $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o"$(<)" "$(>)"
+}
+
+actions together piecemeal archive
+{
+ "$(.root)lib" $(OPTIONS) -c -n -p256 "$(<)" "$(>)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/docutils.jam b/src/kenlm/jam-files/boost-build/tools/docutils.jam
new file mode 100644
index 0000000..fc775b6
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/docutils.jam
@@ -0,0 +1,85 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# Support for docutils ReStructuredText processing.
+
+import type ;
+import scanner ;
+import generators ;
+import os ;
+import common ;
+import toolset ;
+import path ;
+import feature : feature ;
+import property ;
+
+.initialized = ;
+
+type.register ReST : rst ;
+
+class rst-scanner : common-scanner
+{
+ rule __init__ ( paths * )
+ {
+ common-scanner.__init__ . $(paths) ;
+ }
+
+ rule pattern ( )
+ {
+ return "^[ ]*\\.\\.[ ]+include::[ ]+([^
+]+)"
+ "^[ ]*\\.\\.[ ]+image::[ ]+([^
+]+)"
+ "^[ ]*\\.\\.[ ]+figure::[ ]+([^
+]+)"
+ ;
+ }
+}
+
+scanner.register rst-scanner : include ;
+type.set-scanner ReST : rst-scanner ;
+
+generators.register-standard docutils.html : ReST : HTML ;
+
+rule init ( docutils-dir ? : tools-dir ? )
+{
+ docutils-dir ?= [ modules.peek : DOCUTILS_DIR ] ;
+ tools-dir ?= $(docutils-dir)/tools ;
+
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+ .docutils-dir = $(docutils-dir) ;
+ .tools-dir = $(tools-dir:R="") ;
+
+ .setup = [
+ common.prepend-path-variable-command PYTHONPATH
+ : $(.docutils-dir) $(.docutils-dir)/extras ] ;
+ RST2XXX = [ common.find-tool rst2html ] ;
+ }
+}
+
+rule html ( target : source : properties * )
+{
+ if ! [ on $(target) return $(RST2XXX) ]
+ {
+ local python-cmd = [ property.select <python.interpreter> : $(properties) ] ;
+ RST2XXX on $(target) = $(python-cmd:G=:E="python") $(.tools-dir)/rst2html.py ;
+ }
+}
+
+
+feature docutils : : free ;
+feature docutils-html : : free ;
+feature docutils-cmd : : free ;
+toolset.flags docutils COMMON-FLAGS : <docutils> ;
+toolset.flags docutils HTML-FLAGS : <docutils-html> ;
+toolset.flags docutils RST2XXX : <docutils-cmd> ;
+
+actions html
+{
+ $(.setup)
+ "$(RST2XXX)" $(COMMON-FLAGS) $(HTML-FLAGS) $(>) $(<)
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/doxygen-config.jam b/src/kenlm/jam-files/boost-build/tools/doxygen-config.jam
new file mode 100644
index 0000000..2cd2cca
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/doxygen-config.jam
@@ -0,0 +1,11 @@
+#~ Copyright 2005, 2006 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for Doxygen tools. To use, just import this module.
+
+import toolset : using ;
+
+ECHO "warning: doxygen-config.jam is deprecated. Use 'using doxygen ;' instead." ;
+
+using doxygen ;
diff --git a/src/kenlm/jam-files/boost-build/tools/doxygen.jam b/src/kenlm/jam-files/boost-build/tools/doxygen.jam
new file mode 100644
index 0000000..6a56ccd
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/doxygen.jam
@@ -0,0 +1,775 @@
+# Copyright 2003, 2004 Douglas Gregor
+# Copyright 2003, 2004, 2005 Vladimir Prus
+# Copyright 2006 Rene Rivera
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines rules to handle generation of various outputs from source
+# files documented with doxygen comments. The supported transformations are:
+#
+# * Source -> Doxygen XML -> BoostBook XML
+# * Source -> Doxygen HTML
+#
+# The type of transformation is selected based on the target requested. For
+# BoostBook XML, the default, specifying a target with an ".xml" suffix, or an
+# empty suffix, will produce a <target>.xml and <target>.boostbook. For Doxygen
+# HTML specifying a target with an ".html" suffix will produce a directory
+# <target> with the Doxygen html files, and a <target>.html file redirecting to
+# that directory.
+
+import alias ;
+import boostbook ;
+import "class" : new ;
+import common ;
+import feature ;
+import make ;
+import modules ;
+import generators ;
+import os ;
+import path ;
+import print ;
+import project ;
+import property ;
+import stage ;
+import targets ;
+import toolset ;
+import type ;
+import utility ;
+import xsltproc ;
+import virtual-target ;
+
+
+# Use to specify extra configuration paramters. These get translated into a
+# doxyfile which configures the building of the docs.
+feature.feature doxygen:param : : free ;
+
+# Specify the "<xsl:param>boost.doxygen.header.prefix" XSLT option.
+feature.feature prefix : : free ;
+
+# Specify the "<xsl:param>boost.doxygen.reftitle" XSLT option.
+feature.feature reftitle : : free ;
+
+# Which processor to use for various translations from Doxygen.
+feature.feature doxygen.processor : xsltproc doxproc : propagated implicit ;
+
+# To generate, or not, index sections.
+feature.feature doxygen.doxproc.index : no yes : propagated incidental ;
+
+# The ID for the resulting BoostBook reference section.
+feature.feature doxygen.doxproc.id : : free ;
+
+# The title for the resulting BoostBook reference section.
+feature.feature doxygen.doxproc.title : : free ;
+
+# Location for images when generating XML
+feature.feature doxygen:xml-imagedir : : free ;
+
+# Indicates whether the entire directory should be deleted
+feature.feature doxygen.rmdir : off on : optional incidental ;
+
+# Doxygen configuration input file.
+type.register DOXYFILE : doxyfile ;
+
+# Doxygen XML multi-file output.
+type.register DOXYGEN_XML_MULTIFILE : xml-dir : XML ;
+
+# Doxygen XML coallesed output.
+type.register DOXYGEN_XML : doxygen : XML ;
+
+# Doxygen HTML multifile directory.
+type.register DOXYGEN_HTML_MULTIFILE : html-dir : HTML ;
+
+# Redirection HTML file to HTML multifile directory.
+type.register DOXYGEN_HTML : : HTML ;
+
+type.register DOXYGEN_XML_IMAGES : doxygen-xml-images ;
+
+
+# Initialize the Doxygen module. Parameters are:
+# name: the name of the 'doxygen' executable. If not specified, the name
+# 'doxygen' will be used
+#
+rule init ( name ? )
+{
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+
+ .doxproc = [ modules.binding $(__name__) ] ;
+ .doxproc = $(.doxproc:D)/doxproc.py ;
+
+ generators.register-composing doxygen.headers-to-doxyfile
+ : H HPP CPP : DOXYFILE ;
+ generators.register-standard doxygen.run
+ : DOXYFILE : DOXYGEN_XML_MULTIFILE ;
+ generators.register-standard doxygen.xml-dir-to-boostbook
+ : DOXYGEN_XML_MULTIFILE : BOOSTBOOK : <doxygen.processor>doxproc ;
+ generators.register-standard doxygen.xml-to-boostbook
+ : DOXYGEN_XML : BOOSTBOOK : <doxygen.processor>xsltproc ;
+ generators.register-standard doxygen.collect
+ : DOXYGEN_XML_MULTIFILE : DOXYGEN_XML ;
+ generators.register-standard doxygen.run
+ : DOXYFILE : DOXYGEN_HTML_MULTIFILE ;
+ generators.register-standard doxygen.html-redirect
+ : DOXYGEN_HTML_MULTIFILE : DOXYGEN_HTML ;
+ generators.register-standard doxygen.copy-latex-pngs
+ : DOXYGEN_HTML : DOXYGEN_XML_IMAGES ;
+
+ IMPORT $(__name__) : doxygen : : doxygen ;
+ }
+
+ if $(name)
+ {
+ modify-config ;
+ .doxygen = $(name) ;
+ check-doxygen ;
+ }
+
+ if ! $(.doxygen)
+ {
+ check-doxygen ;
+ }
+}
+
+
+local rule freeze-config ( )
+{
+ if ! $(.initialized)
+ {
+ import errors ;
+ errors.user-error doxygen must be initialized before it can be used. ;
+ }
+ if ! $(.config-frozen)
+ {
+ .config-frozen = true ;
+ if [ .is-cygwin ]
+ {
+ .is-cygwin = true ;
+ }
+ }
+}
+
+
+local rule modify-config ( )
+{
+ if $(.config-frozen)
+ {
+ import errors ;
+ errors.user-error "Cannot change doxygen after it has been used." ;
+ }
+}
+
+
+local rule check-doxygen ( )
+{
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO "notice:" using doxygen ":" $(.doxygen) ;
+ }
+ local extra-paths ;
+ if [ os.name ] = NT
+ {
+ local ProgramFiles = [ modules.peek : ProgramFiles ] ;
+ if $(ProgramFiles)
+ {
+ extra-paths = "$(ProgramFiles:J= )" ;
+ }
+ else
+ {
+ extra-paths = "C:\\Program Files" ;
+ }
+ }
+ .doxygen = [ common.get-invocation-command doxygen : doxygen : $(.doxygen) :
+ $(extra-paths) ] ;
+}
+
+
+rule name ( )
+{
+ freeze-config ;
+ return $(.doxygen) ;
+}
+
+
+local rule .is-cygwin ( )
+{
+ if [ os.on-windows ]
+ {
+ local file = [ path.make [ modules.binding $(__name__) ] ] ;
+ local dir = [ path.native [ path.join [ path.parent $(file) ] doxygen ]
+ ] ;
+ local command = cd \"$(dir)\" "&&" \"$(.doxygen)\"
+ windows-paths-check.doxyfile 2>&1 ;
+ command = $(command:J=" ") ;
+ result = [ SHELL $(command) ] ;
+ if [ MATCH "(Parsing file /)" : $(result) ]
+ {
+ return true ;
+ }
+ }
+}
+
+
+# Runs Doxygen on the given Doxygen configuration file (the source) to generate
+# the Doxygen files. The output is dumped according to the settings in the
+# Doxygen configuration file, not according to the target! Because of this, we
+# essentially "touch" the target file, in effect making it look like we have
+# really written something useful to it. Anyone that uses this action must deal
+# with this behavior.
+#
+actions doxygen-action
+{
+ $(RM) "$(*.XML)" & "$(NAME:E=doxygen)" "$(>)" && echo "Stamped" > "$(<)"
+}
+
+
+# Runs the Python doxproc XML processor.
+#
+actions doxproc
+{
+ python "$(DOXPROC)" "--xmldir=$(>)" "--output=$(<)" "$(OPTIONS)" "--id=$(ID)" "--title=$(TITLE)"
+}
+
+
+rule translate-path ( path )
+{
+ freeze-config ;
+ if [ os.on-windows ]
+ {
+ if [ os.name ] = CYGWIN
+ {
+ if $(.is-cygwin)
+ {
+ return $(path) ;
+ }
+ else
+ {
+ return $(path:W) ;
+ }
+ }
+ else
+ {
+ if $(.is-cygwin)
+ {
+ match = [ MATCH ^(.):(.*) : $(path) ] ;
+ if $(match)
+ {
+ return /cygdrive/$(match[1])$(match[2]:T) ;
+ }
+ else
+ {
+ return $(path:T) ;
+ }
+ }
+ else
+ {
+ return $(path) ;
+ }
+ }
+ }
+ else
+ {
+ return $(path) ;
+ }
+}
+
+
+# Generates a doxygen configuration file (doxyfile) given a set of C++ sources
+# and a property list that may contain <doxygen:param> features.
+#
+rule headers-to-doxyfile ( target : sources * : properties * )
+{
+ local text = "# Generated by Boost.Build version 2" ;
+
+ local output-dir ;
+
+ # Translate <doxygen:param> into command line flags.
+ for local param in [ feature.get-values <doxygen:param> : $(properties) ]
+ {
+ local namevalue = [ MATCH ([^=]*)=(.*) : $(param) ] ;
+ if $(namevalue[1]) = OUTPUT_DIRECTORY
+ {
+ output-dir = [ translate-path [ utility.unquote $(namevalue[2]) ] ]
+ ;
+ text += "OUTPUT_DIRECTORY = \"$(output-dir)\"" ;
+ }
+ else
+ {
+ text += "$(namevalue[1]) = $(namevalue[2])" ;
+ }
+ }
+
+ if ! $(output-dir)
+ {
+ output-dir = [ translate-path [ on $(target) return $(LOCATE) ] ] ;
+ text += "OUTPUT_DIRECTORY = \"$(output-dir)\"" ;
+ }
+
+ local headers ;
+ for local header in $(sources:G=)
+ {
+ header = [ translate-path $(header) ] ;
+ headers += \"$(header)\" ;
+ }
+
+ # Doxygen generates LaTex by default. So disable it unconditionally, or at
+ # least until someone needs, and hence writes support for, LaTex output.
+ text += "GENERATE_LATEX = NO" ;
+ text += "INPUT = $(headers:J= )" ;
+ print.output $(target) plain ;
+ print.text $(text) : true ;
+}
+
+
+# Run Doxygen. See doxygen-action for a description of the strange properties of
+# this rule.
+#
+rule run ( target : source : properties * )
+{
+ freeze-config ;
+ if <doxygen.rmdir>on in $(properties)
+ {
+ local output-dir = [ path.make [ MATCH
+ <doxygen:param>OUTPUT_DIRECTORY=\"?([^\"]*) : $(properties) ] ] ;
+ local html-dir = [ path.make [ MATCH <doxygen:param>HTML_OUTPUT=(.*) :
+ $(properties) ] ] ;
+ if $(output-dir) && $(html-dir) &&
+ [ path.glob $(output-dir) : $(html-dir) ]
+ {
+ HTMLDIR on $(target) = [ path.native [ path.join $(output-dir)
+ $(html-dir) ] ] ;
+ rm-htmldir $(target) ;
+ }
+ }
+ doxygen-action $(target) : $(source) ;
+ NAME on $(target) = $(.doxygen) ;
+ RM on $(target) = [ modules.peek common : RM ] ;
+ *.XML on $(target) = [ path.native [ path.join [ path.make [ on $(target)
+ return $(LOCATE) ] ] $(target:B:S=) *.xml ] ] ;
+}
+
+
+if [ os.name ] = NT
+{
+ RMDIR = rmdir /s /q ;
+}
+else
+{
+ RMDIR = rm -rf ;
+}
+
+actions quietly rm-htmldir
+{
+ $(RMDIR) $(HTMLDIR)
+}
+
+
+# The rules below require BoostBook stylesheets, so we need some code to check
+# that the boostbook module has actualy been initialized.
+#
+rule check-boostbook ( )
+{
+ if ! [ modules.peek boostbook : .initialized ]
+ {
+ import errors ;
+ errors.user-error
+ : The boostbook module is not initialized you have attempted to use
+ : the 'doxygen' toolset, which requires BoostBook, but never
+ : initialized BoostBook.
+ : Hint: add 'using boostbook ;' to your user-config.jam. ;
+ }
+}
+
+
+# Collect the set of Doxygen XML files into a single XML source file that can be
+# handled by an XSLT processor. The source is completely ignored (see
+# doxygen-action), because this action picks up the Doxygen XML index file xml/
+# index.xml. This is because we can not teach Doxygen to act like a NORMAL
+# program and take a "-o output.xml" argument (grrrr). The target of the
+# collection will be a single Doxygen XML file.
+#
+rule collect ( target : source : properties * )
+{
+ check-boostbook ;
+ local collect-xsl-dir
+ = [ path.native [ path.join [ boostbook.xsl-dir ] doxygen collect ] ] ;
+ local source-path
+ = [ path.make [ on $(source) return $(LOCATE) ] ] ;
+ local collect-path
+ = [ path.root [ path.join $(source-path) $(source:B) ] [ path.pwd ] ] ;
+ local native-path
+ = [ path.native $(collect-path) ] ;
+ local real-source
+ = [ path.native [ path.join $(collect-path) index.xml ] ] ;
+ xsltproc.xslt $(target) : $(real-source) $(collect-xsl-dir:S=.xsl)
+ : <xsl:param>doxygen.xml.path=$(native-path) ;
+}
+
+
+# Translate Doxygen XML into BoostBook.
+#
+rule xml-to-boostbook ( target : source : properties * )
+{
+ check-boostbook ;
+ local xsl-dir = [ boostbook.xsl-dir ] ;
+ local d2b-xsl = [ path.native [ path.join [ boostbook.xsl-dir ] doxygen
+ doxygen2boostbook.xsl ] ] ;
+
+ local xslt-properties = $(properties) ;
+ for local prefix in [ feature.get-values <prefix> : $(properties) ]
+ {
+ xslt-properties += "<xsl:param>boost.doxygen.header.prefix=$(prefix)" ;
+ }
+ for local title in [ feature.get-values <reftitle> : $(properties) ]
+ {
+ xslt-properties += "<xsl:param>boost.doxygen.reftitle=$(title)" ;
+ }
+
+ xsltproc.xslt $(target) : $(source) $(d2b-xsl) : $(xslt-properties) ;
+}
+
+
+toolset.flags doxygen.xml-dir-to-boostbook OPTIONS <doxygen.doxproc.index>yes :
+ --enable-index ;
+toolset.flags doxygen.xml-dir-to-boostbook ID <doxygen.doxproc.id> ;
+toolset.flags doxygen.xml-dir-to-boostbook TITLE <doxygen.doxproc.title> ;
+
+
+rule xml-dir-to-boostbook ( target : source : properties * )
+{
+ DOXPROC on $(target) = $(.doxproc) ;
+ LOCATE on $(source:S=) = [ on $(source) return $(LOCATE) ] ;
+ doxygen.doxproc $(target) : $(source:S=) ;
+}
+
+
+# Generate the HTML redirect to HTML dir index.html file.
+#
+rule html-redirect ( target : source : properties * )
+{
+ local uri = "$(target:B)/index.html" ;
+ print.output $(target) plain ;
+ print.text
+"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"
+ \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
+<html xmlns=\"http://www.w3.org/1999/xhtml\">
+<head>
+ <meta http-equiv=\"refresh\" content=\"0; URL=$(uri)\" />
+
+ <title></title>
+</head>
+
+<body>
+ Automatic redirection failed, please go to <a href=
+ \"$(uri)\">$(uri)</a>.
+</body>
+</html>
+"
+ : true ;
+}
+
+rule copy-latex-pngs ( target : source : requirements * )
+{
+ local directory = [ path.native [ feature.get-values <doxygen:xml-imagedir>
+ : $(requirements) ] ] ;
+ local location = [ on $(target) return $(LOCATE) ] ;
+
+ local pdf-location = [ path.native [ path.join [ path.make $(location) ]
+ [ path.make $(directory) ] ] ] ;
+ local html-location = [ path.native [ path.join . html [ path.make
+ $(directory) ] ] ] ;
+
+ common.MkDir $(pdf-location) ;
+ common.MkDir $(html-location) ;
+
+ DEPENDS $(target) : $(pdf-location) $(html-location) ;
+
+ if [ os.name ] = NT
+ {
+ CP on $(target) = copy /y ;
+ FROM on $(target) = \\*.png ;
+ TOHTML on $(target) = .\\html\\$(directory) ;
+ TOPDF on $(target) = \\$(directory) ;
+ }
+ else
+ {
+ CP on $(target) = cp ;
+ FROM on $(target) = /*.png ;
+ TOHTML on $(target) = ./html/$(directory) ;
+ TOPDF on $(target) = $(target:D)/$(directory) ;
+ }
+}
+
+actions copy-latex-pngs
+{
+ $(CP) $(>:S=)$(FROM) $(TOHTML)
+ $(CP) $(>:S=)$(FROM) $(<:D)$(TOPDF)
+ echo "Stamped" > "$(<)"
+}
+
+
+# Building latex images for doxygen XML depends on latex, dvips, and gs being in
+# your PATH. This is true for most Unix installs, but not on Win32, where you
+# will need to install MkTex and Ghostscript and add these tools to your path.
+
+actions check-latex
+{
+ latex -version >$(<)
+}
+
+actions check-dvips
+{
+ dvips -version >$(<)
+}
+
+if [ os.name ] = "NT"
+{
+ actions check-gs
+ {
+ gswin32c -version >$(<)
+ }
+}
+else
+{
+ actions check-gs
+ {
+ gs -version >$(<)
+ }
+}
+
+
+local rule check-tools-targets ( project )
+{
+ if ! $(.check-tools-targets)
+ {
+ # Find the root project.
+ #
+ # This is a best effort attempt to avoid using different locations for
+ # storing *.check files depending on which project imported the doxygen
+ # toolset first. The files are stored in a location related to the
+ # project's root project. Note that this location may change depending
+ # on the folder the build was run from in case the build uses multiple
+ # related projects with their own Jamroot separate modules.
+ local project-module = [ $(project).project-module ] ;
+ local root-module = [ project.get-jamroot-module $(project-module) ] ;
+ if ! $(root-module)
+ {
+ import errors ;
+ if [ project.is-config-module $(project-module) ]
+ {
+ errors.user-error doxygen targets can not be declared in Boost
+ Build's configuration modules. ;
+ }
+ else
+ {
+ errors.user-error doxygen targets can not be declared in
+ standalone projects. : use a Jamfile/Jamroot project
+ instead. ;
+ }
+ }
+ local root-project = [ project.target $(root-module) ] ;
+
+ local targets =
+ [ new file-target latex.check : : $(root-project) : [ new action :
+ doxygen.check-latex ] ]
+ [ new file-target dvips.check : : $(root-project) : [ new action :
+ doxygen.check-dvips ] ]
+ [ new file-target gs.check : : $(root-project) : [ new action :
+ doxygen.check-gs ] ] ;
+
+ for local target in $(targets)
+ {
+ .check-tools-targets += [ virtual-target.register $(target) ] ;
+ }
+ }
+ return $(.check-tools-targets) ;
+}
+
+
+project.initialize $(__name__) ;
+project doxygen ;
+
+class doxygen-check-tools-target-class : basic-target
+{
+ rule construct ( name : sources * : property-set )
+ {
+ IMPORT doxygen : check-tools-targets : $(__name__) :
+ doxygen.check-tools-targets ;
+ return [ property-set.empty ] [ doxygen.check-tools-targets [ project ]
+ ] ;
+ }
+}
+
+
+# Declares a metatarget for collecting version information on different external
+# tools used in this module.
+#
+rule check-tools ( target )
+{
+ freeze-config ;
+ targets.create-metatarget doxygen-check-tools-target-class :
+ [ project.current ] : $(target) ;
+}
+
+
+# User-level rule to generate HTML files or BoostBook XML from a set of headers
+# via Doxygen.
+#
+rule doxygen ( target : sources + : requirements * : default-build * :
+ usage-requirements * )
+{
+ freeze-config ;
+ local project = [ project.current ] ;
+
+ if $(target:S) = .html
+ {
+ # Build an HTML directory from the sources.
+ local html-location = [ feature.get-values <location> : $(requirements)
+ ] ;
+ local output-dir ;
+ if [ $(project).get build-dir ]
+ {
+ # Explicitly specified build dir. Add html at the end.
+ output-dir = [ path.join [ $(project).build-dir ]
+ $(html-location:E=html) ] ;
+ }
+ else
+ {
+ # Trim 'bin' from implicit build dir, for no other reason than
+ # backward compatibility.
+ output-dir = [ path.join [ path.parent [ $(project).build-dir ] ]
+ $(html-location:E=html) ] ;
+ }
+ output-dir = [ path.root $(output-dir) [ path.pwd ] ] ;
+ local output-dir-native = [ path.native $(output-dir) ] ;
+ requirements = [ property.change $(requirements) : <location> ] ;
+
+ # The doxygen configuration file.
+ targets.create-typed-target DOXYFILE : $(project) : $(target:S=.tag)
+ : $(sources)
+ : $(requirements)
+ <doxygen:param>GENERATE_HTML=YES
+ <doxygen:param>GENERATE_XML=NO
+ <doxygen:param>"OUTPUT_DIRECTORY=\"$(output-dir-native)\""
+ <doxygen:param>HTML_OUTPUT=$(target:B)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target:S=.tag) ;
+
+ # The html directory to generate by running doxygen.
+ targets.create-typed-target DOXYGEN_HTML_MULTIFILE : $(project)
+ : $(target:S=.dir) # Name.
+ : $(target:S=.tag) # Sources.
+ : $(requirements)
+ <doxygen:param>"OUTPUT_DIRECTORY=\"$(output-dir-native)\""
+ <doxygen:param>HTML_OUTPUT=$(target:B)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target:S=.dir) ;
+
+ # The redirect html file into the generated html.
+ targets.create-typed-target DOXYGEN_HTML : $(project) : $(target)
+ : $(target:S=.dir) # Sources.
+ : $(requirements) <location>$(output-dir)
+ : $(default-build) ;
+ }
+ else
+ {
+ # Build a BoostBook XML file from the sources.
+ local location-xml = [ feature.get-values <location> : $(requirements) ]
+ ;
+ requirements = [ property.change $(requirements) : <location> ] ;
+ local target-xml = $(target:B=$(target:B)-xml) ;
+
+ # Check whether we need to build images.
+ local images-location = [ feature.get-values <doxygen:xml-imagedir> :
+ $(requirements) ] ;
+ if $(images-location)
+ {
+ # Prepare a metatarget for collecting used external tool version
+ # information. We use only one such metatarget as they always
+ # produce the same files and we do not want to deal with multiple
+ # metatargets having matching names, causing 'ambiguous variants'
+ # errors.
+ if ! $(.check-tools)
+ {
+ # FIXME: Since we have the check-tools target object reference,
+ # see how we can use that instead of having to construct a valid
+ # target reference string for use in <dependency> property
+ # values.
+ local project-id = --doxygen.check-tools-project-- ;
+ local target-id = --doxygen.check-tools-- ;
+ local pm = [ $(project).project-module ] ;
+ project.register-id $(project-id) : $(pm) ;
+ check-tools $(target-id) ;
+ .check-tools = /$(project-id)//$(target-id) ;
+ }
+
+ doxygen $(target).doxygen-xml-images.html : $(sources) :
+ $(requirements)
+ <doxygen.rmdir>on
+ <doxygen:param>QUIET=YES
+ <doxygen:param>WARNINGS=NO
+ <doxygen:param>WARN_IF_UNDOCUMENTED=NO
+ <dependency>$(.check-tools) ;
+ $(project).mark-target-as-explicit $(target).doxygen-xml-images.html
+ ;
+
+ targets.create-typed-target DOXYGEN_XML_IMAGES : $(project)
+ : $(target).doxygen-xml-images # Name.
+ : $(target).doxygen-xml-images.html # Sources.
+ : $(requirements)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target).doxygen-xml-images ;
+
+ if ! [ MATCH (/)$ : $(images-location) ]
+ {
+ images-location = $(images-location)/ ;
+ }
+
+ requirements +=
+ <dependency>$(target).doxygen-xml-images
+ <xsl:param>boost.doxygen.formuladir=$(images-location) ;
+ }
+
+ # The doxygen configuration file.
+ targets.create-typed-target DOXYFILE : $(project) : $(target-xml:S=.tag)
+ : $(sources)
+ : $(requirements)
+ <doxygen:param>GENERATE_HTML=NO
+ <doxygen:param>GENERATE_XML=YES
+ <doxygen:param>XML_OUTPUT=$(target-xml)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target-xml:S=.tag) ;
+
+ # The Doxygen XML directory for the processed source files.
+ targets.create-typed-target DOXYGEN_XML_MULTIFILE : $(project)
+ : $(target-xml:S=.dir) # Name.
+ : $(target-xml:S=.tag) # Sources.
+ : $(requirements)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target-xml:S=.dir) ;
+
+ # The resulting BoostBook file is generated by the processor tool. The
+ # tool can be either the xsltproc plus accompanying XSL scripts. Or it
+ # can be the python doxproc.py script.
+ targets.create-typed-target BOOSTBOOK : $(project) : $(target-xml)
+ : $(target-xml:S=.dir) # Sources.
+ : $(requirements)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target-xml) ;
+
+ stage $(target:S=.xml) # Name.
+ : $(target-xml) # Sources.
+ : $(requirements)
+ <location>$(location-xml:E=.)
+ <name>$(target:S=.xml)
+ : $(default-build) ;
+ $(project).mark-target-as-explicit $(target:S=.xml) ;
+
+ # TODO: See why this alias target is used here instead of simply naming
+ # the previous stage target $(target) and having it specify the alias
+ # target's usage requirements directly.
+ alias $(target) : : $(requirements) : $(default-build) :
+ $(usage-requirements) <dependency>$(target:S=.xml) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile b/src/kenlm/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile
new file mode 100644
index 0000000..9b969df
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile
@@ -0,0 +1,3 @@
+INPUT = windows-paths-check.hpp
+GENERATE_HTML = NO
+GENERATE_LATEX = NO
diff --git a/scripts/training/MGIZA/.tm_project2.cache b/src/kenlm/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp
similarity index 100%
rename from scripts/training/MGIZA/.tm_project2.cache
rename to src/kenlm/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp
diff --git a/src/kenlm/jam-files/boost-build/tools/fop.jam b/src/kenlm/jam-files/boost-build/tools/fop.jam
new file mode 100644
index 0000000..c24b872
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/fop.jam
@@ -0,0 +1,69 @@
+# Copyright (C) 2003-2004 Doug Gregor and Dave Abrahams. Distributed
+# under the Boost Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+#
+# This module defines rules to handle generation of PDF and
+# PostScript files from XSL Formatting Objects via Apache FOP
+
+import generators ;
+import common ;
+import boostbook ;
+
+generators.register-standard fop.render.pdf : FO : PDF ;
+generators.register-standard fop.render.ps : FO : PS ;
+
+# Initializes the fop toolset.
+#
+rule init ( fop-command ? : java-home ? : java ? )
+{
+ local has-command = $(.has-command) ;
+
+ if $(fop-command)
+ {
+ .has-command = true ;
+ }
+
+ if $(fop-command) || ! $(has-command)
+ {
+ fop-command = [ common.get-invocation-command fop : fop : $(fop-command)
+ : [ modules.peek : FOP_DIR ] ] ;
+ }
+
+ if $(fop-command)
+ {
+ .FOP_COMMAND = $(fop-command) ;
+ }
+
+ if $(java-home) || $(java)
+ {
+ .FOP_SETUP = ;
+
+
+ # JAVA_HOME is the location that java was installed to.
+
+ if $(java-home)
+ {
+ .FOP_SETUP += [ common.variable-setting-command JAVA_HOME : $(java-home) ] ;
+ }
+
+ # JAVACMD is the location that of the java executable, useful for a
+ # non-standard java installation, where the executable isn't at
+ # $JAVA_HOME/bin/java.
+
+ if $(java)
+ {
+ .FOP_SETUP += [ common.variable-setting-command JAVACMD : $(java) ] ;
+ }
+ }
+}
+
+actions render.pdf
+{
+ $(.FOP_SETUP) $(.FOP_COMMAND:E=fop) $(>) $(<)
+}
+
+actions render.ps
+{
+ $(.FOP_SETUP) $(.FOP_COMMAND:E=fop) $(>) -ps $(<)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/fortran.jam b/src/kenlm/jam-files/boost-build/tools/fortran.jam
new file mode 100644
index 0000000..3766582
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/fortran.jam
@@ -0,0 +1,55 @@
+# Copyright (C) 2004 Toon Knapen
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# This file contains common settings for all fortran tools
+#
+
+import "class" : new ;
+import feature : feature ;
+
+import type ;
+import generators ;
+import common ;
+
+type.register FORTRAN : f F for f77 ;
+type.register FORTRAN90 : f90 F90 ;
+
+feature fortran : : free ;
+feature fortran90 : : free ;
+
+class fortran-compiling-generator : generator
+{
+ rule __init__ ( id : source-types + : target-types + : requirements * : optional-properties * )
+ {
+ generator.__init__ $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ;
+ }
+}
+
+rule register-fortran-compiler ( id : source-types + : target-types + : requirements * : optional-properties * )
+{
+ local g = [ new fortran-compiling-generator $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ] ;
+ generators.register $(g) ;
+}
+
+class fortran90-compiling-generator : generator
+{
+ rule __init__ ( id : source-types + : target-types + : requirements * : optional-properties * )
+ {
+ generator.__init__ $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ;
+ }
+}
+
+rule register-fortran90-compiler ( id : source-types + : target-types + : requirements * : optional-properties * )
+{
+ local g = [ new fortran90-compiling-generator $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ] ;
+ generators.register $(g) ;
+}
+
+# FIXME: this is ugly, should find a better way (we'd want client code to
+# register all generators as "generator.some-rule", not with "some-module.some-rule".)
+IMPORT $(__name__) : register-fortran-compiler : : generators.register-fortran-compiler ;
+IMPORT $(__name__) : register-fortran90-compiler : : generators.register-fortran90-compiler ;
diff --git a/src/kenlm/jam-files/boost-build/tools/gcc.jam b/src/kenlm/jam-files/boost-build/tools/gcc.jam
new file mode 100644
index 0000000..ef90f05
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/gcc.jam
@@ -0,0 +1,1184 @@
+# Copyright 2001 David Abrahams
+# Copyright 2002-2006 Rene Rivera
+# Copyright 2002-2003 Vladimir Prus
+# Copyright 2005 Reece H. Dunn
+# Copyright 2006 Ilya Sokolov
+# Copyright 2007 Roland Schwarz
+# Copyright 2007 Boris Gubenko
+#
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import "class" : new ;
+import common ;
+import feature ;
+import fortran ;
+import generators ;
+import os ;
+import pch ;
+import property ;
+import property-set ;
+import rc ;
+import regex ;
+import set ;
+import toolset ;
+import type ;
+import unix ;
+
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+
+feature.extend toolset : gcc ;
+# feature.subfeature toolset gcc : flavor : : optional ;
+
+toolset.inherit-generators gcc : unix : unix.link unix.link.dll ;
+toolset.inherit-flags gcc : unix ;
+toolset.inherit-rules gcc : unix ;
+
+generators.override gcc.prebuilt : builtin.prebuilt ;
+generators.override gcc.searched-lib-generator : searched-lib-generator ;
+
+# Make gcc toolset object files use the "o" suffix on all platforms.
+type.set-generated-target-suffix OBJ : <toolset>gcc : o ;
+type.set-generated-target-suffix OBJ : <toolset>gcc <target-os>windows : o ;
+type.set-generated-target-suffix OBJ : <toolset>gcc <target-os>cygwin : o ;
+
+
+# Initializes the gcc toolset for the given version. If necessary, command may
+# be used to specify where the compiler is located. The parameter 'options' is a
+# space-delimited list of options, each one specified as
+# <option-name>option-value. Valid option names are: cxxflags, linkflags and
+# linker-type. Accepted linker-type values are aix, darwin, gnu, hpux, osf or
+# sun and the default value will be selected based on the current OS.
+# Example:
+# using gcc : 3.4 : : <cxxflags>foo <linkflags>bar <linker-type>sun ;
+#
+# The compiler command to use is detected in three steps:
+# 1) If an explicit command is specified by the user, it will be used and must
+# be available.
+# 2) If only a certain version is specified, it is enforced:
+# - either the 'g++-VERSION' command must be available
+# - or the default command 'g++' must be available and match the exact
+# version.
+# 3) Without user-provided restrictions use default 'g++'.
+#
+rule init ( version ? : command * : options * )
+{
+ #1): use user-provided command
+ local tool-command = ;
+ if $(command)
+ {
+ tool-command = [ common.get-invocation-command-nodefault gcc : g++ :
+ $(command) ] ;
+ if ! $(tool-command)
+ {
+ import errors ;
+ errors.error toolset gcc initialization:
+ : provided command '$(command)' not found
+ : initialized from [ errors.nearest-user-location ] ;
+ }
+ }
+ #2): enforce user-provided version
+ else if $(version)
+ {
+ tool-command = [ common.get-invocation-command-nodefault gcc :
+ "g++-$(version[1])" ] ;
+
+ #2.1) fallback: check whether "g++" reports the requested version
+ if ! $(tool-command)
+ {
+ tool-command = [ common.get-invocation-command-nodefault gcc : g++ ]
+ ;
+ if $(tool-command)
+ {
+ local tool-command-string = $(tool-command:J=" ") ;
+ local tool-version = [ MATCH "^([0-9.]+)" :
+ [ SHELL "$(tool-command-string) -dumpversion" ] ] ;
+ if $(tool-version) != $(version)
+ {
+ # Permit a match betwen a two-digit version specified by the
+ # user (e.g. 4.4) and a 3-digit version reported by gcc.
+ # Since only two digits are present in the binary name
+ # anyway, insisting that user specify the 3-digit version
+ # when configuring Boost.Build, while it is not required on
+ # the command line, would be strange.
+ local stripped = [ MATCH "^([0-9]+\.[0-9]+).*" :
+ $(tool-version) ] ;
+ if $(stripped) != $(version)
+ {
+ import errors ;
+ errors.error toolset gcc initialization:
+ : version '$(version)' requested but
+ 'g++-$(version)' not found and version
+ '$(tool-version)' of default '$(tool-command)'
+ does not match
+ : initialized from [ errors.nearest-user-location ]
+ ;
+ tool-command = ;
+ }
+ # Use full 3-digit version to be compatible with the
+ # 'using gcc ;' case
+ version = $(tool-version) ;
+ }
+ }
+ else
+ {
+ import errors ;
+ errors.error toolset gcc initialization:
+ : version '$(version)' requested but neither
+ 'g++-$(version)' nor default 'g++' found
+ : initialized from [ errors.nearest-user-location ] ;
+ }
+ }
+ }
+ #3) default: no command and no version specified, try using "g++"
+ else
+ {
+ tool-command = [ common.get-invocation-command-nodefault gcc : g++ ] ;
+ if ! $(tool-command)
+ {
+ import errors ;
+ errors.error toolset gcc initialization:
+ : no command provided, default command 'g++' not found
+ : initialized from [ errors.nearest-user-location ] ;
+ }
+ }
+
+
+ # Information about the gcc command...
+ # The command.
+ local command = $(tool-command) ;
+ # The 'command' variable can have multiple elements but when calling the
+ # SHELL builtin we need a single string.
+ local command-string = $(command:J=" ") ;
+ # The root directory of the tool install.
+ local root = [ feature.get-values <root> : $(options) ] ;
+ # The bin directory where to find the command to execute.
+ local bin ;
+ # The compiler flavor.
+ local flavor = [ feature.get-values <flavor> : $(options) ] ;
+ # Autodetect the root and bin dir if not given.
+ if $(command)
+ {
+ bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ;
+ root ?= $(bin:D) ;
+ }
+ # Autodetect the version and flavor if not given.
+ if $(command)
+ {
+ local machine = [ MATCH "^([^ ]+)" :
+ [ SHELL "$(command-string) -dumpmachine" ] ] ;
+ version ?= [ MATCH "^([0-9.]+)" :
+ [ SHELL "$(command-string) -dumpversion" ] ] ;
+ switch $(machine:L)
+ {
+ case *mingw* : flavor ?= mingw ;
+ }
+ }
+
+ local condition ;
+ if $(flavor)
+ {
+ condition = flavor $(flavor) ;
+ }
+ condition = [ common.check-init-parameters gcc : version $(version)
+ : $(condition) ] ;
+
+ common.handle-options gcc : $(condition) : $(command) : $(options) ;
+
+ local linker = [ feature.get-values <linker-type> : $(options) ] ;
+ # TODO: The logic below should actually be keyed on <target-os>.
+ if ! $(linker)
+ {
+ switch [ os.name ]
+ {
+ case OSF : linker = osf ;
+ case HPUX : linker = hpux ;
+ case AIX : linker = aix ;
+ case SOLARIS : linker = sun ;
+ case * : linker = gnu ;
+ }
+ }
+ init-link-flags gcc $(linker) $(condition) ;
+
+ # If gcc is installed in a non-standard location, we would need to add
+ # LD_LIBRARY_PATH when running programs created with it (for unit-test/run
+ # rules).
+ if $(command)
+ {
+ # On multilib 64-bit boxes, there are both 32-bit and 64-bit libraries
+ # and all must be added to LD_LIBRARY_PATH. The linker will pick the
+ # right onces. Note that we do not provide a clean way to build a 32-bit
+ # binary using a 64-bit compiler, but user can always pass -m32
+ # manually.
+ local lib_path = $(root)/bin $(root)/lib $(root)/lib32 $(root)/lib64 ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using gcc libraries :: $(condition) :: $(lib_path) ;
+ }
+ toolset.flags gcc.link RUN_PATH $(condition) : $(lib_path) ;
+ }
+
+ # If we are not using a system gcc installation we should adjust the various
+ # programs as needed to prefer using their installation specific versions.
+ # This is essential for correct use of MinGW and for cross-compiling.
+
+ local nl = "
+" ;
+
+ # - Archive builder.
+ local archiver = [ common.get-invocation-command gcc
+ : [ NORMALIZE_PATH [ MATCH "(.*)[$(nl)]+" :
+ [ SHELL "$(command-string) -print-prog-name=ar" ] ] ]
+ : [ feature.get-values <archiver> : $(options) ]
+ : $(bin)
+ : search-path ] ;
+ toolset.flags gcc.archive .AR $(condition) : $(archiver[1]) ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using gcc archiver :: $(condition) :: $(archiver[1]) ;
+ }
+
+ # - Ranlib.
+ local ranlib = [ common.get-invocation-command gcc
+ : [ NORMALIZE_PATH [ MATCH "(.*)[$(nl)]+" :
+ [ SHELL "$(command-string) -print-prog-name=ranlib" ] ] ]
+ : [ feature.get-values <ranlib> : $(options) ]
+ : $(bin)
+ : search-path ] ;
+ toolset.flags gcc.archive .RANLIB $(condition) : $(ranlib[1]) ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using gcc ranlib :: $(condition) :: $(ranlib[1]) ;
+ }
+
+ # - Resource compiler.
+ local rc = [ common.get-invocation-command-nodefault gcc : windres :
+ [ feature.get-values <rc> : $(options) ] : $(bin) : search-path ] ;
+ local rc-type = [ feature.get-values <rc-type> : $(options) ] ;
+ rc-type ?= windres ;
+ if ! $(rc)
+ {
+ # If we can not find an RC compiler we fallback to a null one that
+ # creates empty object files. This allows the same Jamfiles to work
+ # across the board. The null RC uses assembler to create the empty
+ # objects, so configure that.
+ rc = [ common.get-invocation-command gcc : as : : $(bin) : search-path ]
+ ;
+ rc-type = null ;
+ }
+ rc.configure $(rc) : $(condition) : <rc-type>$(rc-type) ;
+}
+
+if [ os.name ] = NT
+{
+ # This causes single-line command invocation to not go through .bat files,
+ # thus avoiding command-line length limitations.
+ # TODO: Set JAMSHELL on specific targets instead of globally.
+ JAMSHELL = % ;
+}
+
+generators.register-c-compiler gcc.compile.c++.preprocess : CPP : PREPROCESSED_CPP : <toolset>gcc ;
+generators.register-c-compiler gcc.compile.c.preprocess : C : PREPROCESSED_C : <toolset>gcc ;
+generators.register-c-compiler gcc.compile.c++ : CPP : OBJ : <toolset>gcc ;
+generators.register-c-compiler gcc.compile.c : C : OBJ : <toolset>gcc ;
+generators.register-c-compiler gcc.compile.asm : ASM : OBJ : <toolset>gcc ;
+generators.register-fortran-compiler gcc.compile.fortran : FORTRAN FORTRAN90 : OBJ : <toolset>gcc ;
+
+# pch support
+
+# The compiler looks for a precompiled header in each directory just before it
+# looks for the include file in that directory. The name searched for is the
+# name specified in the #include directive with ".gch" suffix appended. The
+# logic in gcc-pch-generator will make sure that the BASE_PCH suffix is appended
+# to the full header name.
+
+type.set-generated-target-suffix PCH : <toolset>gcc : gch ;
+
+# GCC-specific pch generator.
+class gcc-pch-generator : pch-generator
+{
+ import project ;
+ import property-set ;
+ import type ;
+
+ rule run-pch ( project name ? : property-set : sources + )
+ {
+ # Find the header in sources. Ignore any CPP sources.
+ local header ;
+ for local s in $(sources)
+ {
+ if [ type.is-derived [ $(s).type ] H ]
+ {
+ header = $(s) ;
+ }
+ }
+
+ # Error handling: base header file name should be the same as the base
+ # precompiled header name.
+ local header-name = [ $(header).name ] ;
+ local header-basename = $(header-name:B) ;
+ if $(header-basename) != $(name)
+ {
+ local location = [ $(project).project-module ] ;
+ import errors : user-error : errors.user-error ;
+ errors.user-error "in" $(location): pch target name '$(name)' should
+ be the same as the base name of header file '$(header-name)' ;
+ }
+
+ local pch-file = [ generator.run $(project) $(name) : $(property-set)
+ : $(header) ] ;
+
+ # Return result of base class and pch-file property as
+ # usage-requirements.
+ return
+ [ property-set.create <pch-file>$(pch-file) <cflags>-Winvalid-pch ]
+ $(pch-file)
+ ;
+ }
+
+ # Calls the base version specifying source's name as the name of the created
+ # target. As a result, the PCH will be named whatever.hpp.gch, and not
+ # whatever.gch.
+ rule generated-targets ( sources + : property-set : project name ? )
+ {
+ name = [ $(sources[1]).name ] ;
+ return [ generator.generated-targets $(sources)
+ : $(property-set) : $(project) $(name) ] ;
+ }
+}
+
+# Note: the 'H' source type will catch both '.h' header and '.hpp' header. The
+# latter have HPP type, but HPP type is derived from H. The type of compilation
+# is determined entirely by the destination type.
+generators.register [ new gcc-pch-generator gcc.compile.c.pch : H : C_PCH : <pch>on <toolset>gcc ] ;
+generators.register [ new gcc-pch-generator gcc.compile.c++.pch : H : CPP_PCH : <pch>on <toolset>gcc ] ;
+
+# Override default do-nothing generators.
+generators.override gcc.compile.c.pch : pch.default-c-pch-generator ;
+generators.override gcc.compile.c++.pch : pch.default-cpp-pch-generator ;
+
+toolset.flags gcc.compile PCH_FILE <pch>on : <pch-file> ;
+
+# Declare flags and action for compilation.
+toolset.flags gcc.compile OPTIONS <optimization>off : -O0 ;
+toolset.flags gcc.compile OPTIONS <optimization>speed : -O3 ;
+toolset.flags gcc.compile OPTIONS <optimization>space : -Os ;
+
+toolset.flags gcc.compile OPTIONS <inlining>off : -fno-inline ;
+toolset.flags gcc.compile OPTIONS <inlining>on : -Wno-inline ;
+toolset.flags gcc.compile OPTIONS <inlining>full : -finline-functions -Wno-inline ;
+
+toolset.flags gcc.compile OPTIONS <warnings>off : -w ;
+toolset.flags gcc.compile OPTIONS <warnings>on : -Wall ;
+toolset.flags gcc.compile OPTIONS <warnings>all : -Wall -pedantic ;
+toolset.flags gcc.compile OPTIONS <warnings-as-errors>on : -Werror ;
+
+toolset.flags gcc.compile OPTIONS <debug-symbols>on : -g ;
+toolset.flags gcc.compile OPTIONS <profiling>on : -pg ;
+
+toolset.flags gcc.compile.c++ OPTIONS <rtti>off : -fno-rtti ;
+toolset.flags gcc.compile.c++ OPTIONS <exception-handling>off : -fno-exceptions ;
+
+rule setup-fpic ( targets * : sources * : properties * )
+{
+ local link = [ feature.get-values link : $(properties) ] ;
+ if $(link) = shared
+ {
+ local target = [ feature.get-values target-os : $(properties) ] ;
+
+ # This logic will add -fPIC for all compilations:
+ #
+ # lib a : a.cpp b ;
+ # obj b : b.cpp ;
+ # exe c : c.cpp a d ;
+ # obj d : d.cpp ;
+ #
+ # This all is fine, except that 'd' will be compiled with -fPIC even
+ # though it is not needed, as 'd' is used only in exe. However, it is
+ # hard to detect where a target is going to be used. Alternatively, we
+ # can set -fPIC only when main target type is LIB but than 'b' would be
+ # compiled without -fPIC which would lead to link errors on x86-64. So,
+ # compile everything with -fPIC.
+ #
+ # Yet another alternative would be to create a propagated <sharedable>
+ # feature and set it when building shared libraries, but that would be
+ # hard to implement and would increase the target path length even more.
+
+ # On Windows, fPIC is the default, and specifying -fPIC explicitly leads
+ # to a warning.
+ if ! $(target) in cygwin windows
+ {
+ OPTIONS on $(targets) += -fPIC ;
+ }
+ }
+}
+
+rule setup-address-model ( targets * : sources * : properties * )
+{
+ local model = [ feature.get-values address-model : $(properties) ] ;
+ if $(model)
+ {
+ local option ;
+ local os = [ feature.get-values target-os : $(properties) ] ;
+ if $(os) = aix
+ {
+ if $(model) = 32
+ {
+ option = -maix32 ;
+ }
+ else
+ {
+ option = -maix64 ;
+ }
+ }
+ else if $(os) = hpux
+ {
+ if $(model) = 32
+ {
+ option = -milp32 ;
+ }
+ else
+ {
+ option = -mlp64 ;
+ }
+ }
+ else
+ {
+ local arch = [ feature.get-values architecture : $(properties) ] ;
+ if $(arch) != arm
+ {
+ if $(model) = 32
+ {
+ option = -m32 ;
+ }
+ else if $(model) = 64
+ {
+ option = -m64 ;
+ }
+ }
+ # For darwin, the model can be 32_64. darwin.jam will handle that
+ # on its own.
+ }
+ OPTIONS on $(targets) += $(option) ;
+ }
+}
+
+
+# FIXME: this should not use os.name.
+if ! [ os.name ] in NT OSF HPUX AIX
+{
+ # OSF does have an option called -soname but it does not seem to work as
+ # expected, therefore it has been disabled.
+ HAVE_SONAME = "" ;
+ SONAME_OPTION = -h ;
+}
+
+# HPUX, for some reason, seems to use '+h' instead of '-h'.
+if [ os.name ] = HPUX
+{
+ HAVE_SONAME = "" ;
+ SONAME_OPTION = +h ;
+}
+
+toolset.flags gcc.compile USER_OPTIONS <cflags> ;
+toolset.flags gcc.compile.c++ USER_OPTIONS <cxxflags> ;
+toolset.flags gcc.compile DEFINES <define> ;
+toolset.flags gcc.compile INCLUDES <include> ;
+toolset.flags gcc.compile.c++ TEMPLATE_DEPTH <c++-template-depth> ;
+toolset.flags gcc.compile.fortran USER_OPTIONS <fflags> ;
+
+rule compile.c++.pch ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.c++.pch
+{
+ "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.c.pch ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.c.pch
+{
+ "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.c++.preprocess ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+
+ # Some extensions are compiled as C++ by default. For others, we need to
+ # pass -x c++. We could always pass -x c++ but distcc does not work with it.
+ if ! $(>:S) in .cc .cp .cxx .cpp .c++ .C
+ {
+ LANG on $(<) = "-x c++" ;
+ }
+ DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ;
+}
+
+rule compile.c.preprocess ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+
+ # If we use the name g++ then default file suffix -> language mapping does
+ # not work. So have to pass -x option. Maybe, we can work around this by
+ # allowing the user to specify both C and C++ compiler names.
+ #if $(>:S) != .c
+ #{
+ LANG on $(<) = "-x c" ;
+ #}
+ DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ;
+}
+
+rule compile.c++ ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+
+ # Some extensions are compiled as C++ by default. For others, we need to
+ # pass -x c++. We could always pass -x c++ but distcc does not work with it.
+ if ! $(>:S) in .cc .cp .cxx .cpp .c++ .C
+ {
+ LANG on $(<) = "-x c++" ;
+ }
+ DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ;
+
+ # Here we want to raise the template-depth parameter value to something
+ # higher than the default value of 17. Note that we could do this using the
+ # feature.set-default rule but we do not want to set the default value for
+ # all toolsets as well.
+ #
+ # TODO: This 'modified default' has been inherited from some 'older Boost
+ # Build implementation' and has most likely been added to make some Boost
+ # library parts compile correctly. We should see what exactly prompted this
+ # and whether we can get around the problem more locally.
+ local template-depth = [ on $(<) return $(TEMPLATE_DEPTH) ] ;
+ if ! $(template-depth)
+ {
+ TEMPLATE_DEPTH on $(<) = 128 ;
+ }
+}
+
+rule compile.c ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+
+ # If we use the name g++ then default file suffix -> language mapping does
+ # not work. So have to pass -x option. Maybe, we can work around this by
+ # allowing the user to specify both C and C++ compiler names.
+ #if $(>:S) != .c
+ #{
+ LANG on $(<) = "-x c" ;
+ #}
+ DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ;
+}
+
+rule compile.fortran ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.c++ bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<:W)" "$(>:W)"
+}
+
+actions compile.c bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++.preprocess bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" "$(>:W)" -E >"$(<:W)"
+}
+
+actions compile.c.preprocess bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" "$(>)" -E >$(<)
+}
+
+actions compile.fortran
+{
+ "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.asm ( targets * : sources * : properties * )
+{
+ setup-fpic $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+ LANG on $(<) = "-x assembler-with-cpp" ;
+}
+
+actions compile.asm
+{
+ "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+# Class checking that we do not try to use the <runtime-link>static property
+# while creating or using a shared library, since it is not supported by
+# gcc/libc.
+class gcc-linking-generator : unix-linking-generator
+{
+ rule run ( project name ? : property-set : sources + )
+ {
+ # TODO: Replace this with the use of a target-os property.
+ local no-static-link = ;
+ if [ modules.peek : UNIX ]
+ {
+ switch [ modules.peek : JAMUNAME ]
+ {
+ case * : no-static-link = true ;
+ }
+ }
+
+ local properties = [ $(property-set).raw ] ;
+ local reason ;
+ if $(no-static-link) && <runtime-link>static in $(properties)
+ {
+ if <link>shared in $(properties)
+ {
+ reason = On gcc, DLLs can not be built with
+ '<runtime-link>static'. ;
+ }
+ else if [ type.is-derived $(self.target-types[1]) EXE ]
+ {
+ for local s in $(sources)
+ {
+ local type = [ $(s).type ] ;
+ if $(type) && [ type.is-derived $(type) SHARED_LIB ]
+ {
+ reason = On gcc, using DLLs together with the
+ '<runtime-link>static' option is not possible. ;
+ }
+ }
+ }
+ }
+ if $(reason)
+ {
+ ECHO warning: $(reason) ;
+ ECHO warning: It is suggested to use '<runtime-link>static' together
+ with '<link>static'. ;
+ }
+ else
+ {
+ return [ unix-linking-generator.run $(project) $(name) :
+ $(property-set) : $(sources) ] ;
+ }
+ }
+}
+
+# The set of permissible input types is different on mingw. So, define two sets
+# of generators, with mingw generators selected when target-os=windows.
+
+local g ;
+g = [ new gcc-linking-generator gcc.mingw.link
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : EXE
+ : <toolset>gcc <target-os>windows ] ;
+$(g).set-rule-name gcc.link ;
+generators.register $(g) ;
+
+g = [ new gcc-linking-generator gcc.mingw.link.dll
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : IMPORT_LIB SHARED_LIB
+ : <toolset>gcc <target-os>windows ] ;
+$(g).set-rule-name gcc.link.dll ;
+generators.register $(g) ;
+
+generators.register
+ [ new gcc-linking-generator gcc.link
+ : LIB OBJ
+ : EXE
+ : <toolset>gcc ] ;
+generators.register
+ [ new gcc-linking-generator gcc.link.dll
+ : LIB OBJ
+ : SHARED_LIB
+ : <toolset>gcc ] ;
+
+generators.override gcc.mingw.link : gcc.link ;
+generators.override gcc.mingw.link.dll : gcc.link.dll ;
+
+# Cygwin is similar to msvc and mingw in that it uses import libraries. While in
+# simple cases, it can directly link to a shared library, it is believed to be
+# slower, and not always possible. Define cygwin-specific generators here.
+
+g = [ new gcc-linking-generator gcc.cygwin.link
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : EXE
+ : <toolset>gcc <target-os>cygwin ] ;
+$(g).set-rule-name gcc.link ;
+generators.register $(g) ;
+
+g = [ new gcc-linking-generator gcc.cygwin.link.dll
+ : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB
+ : IMPORT_LIB SHARED_LIB
+ : <toolset>gcc <target-os>cygwin ] ;
+$(g).set-rule-name gcc.link.dll ;
+generators.register $(g) ;
+
+generators.override gcc.cygwin.link : gcc.link ;
+generators.override gcc.cygwin.link.dll : gcc.link.dll ;
+
+# Declare flags for linking.
+# First, the common flags.
+toolset.flags gcc.link OPTIONS <debug-symbols>on : -g ;
+toolset.flags gcc.link OPTIONS <profiling>on : -pg ;
+toolset.flags gcc.link USER_OPTIONS <linkflags> ;
+toolset.flags gcc.link LINKPATH <library-path> ;
+toolset.flags gcc.link FINDLIBS-ST <find-static-library> ;
+toolset.flags gcc.link FINDLIBS-SA <find-shared-library> ;
+toolset.flags gcc.link LIBRARIES <library-file> ;
+
+toolset.flags gcc.link.dll .IMPLIB-COMMAND <target-os>windows : "-Wl,--out-implib," ;
+toolset.flags gcc.link.dll .IMPLIB-COMMAND <target-os>cygwin : "-Wl,--out-implib," ;
+
+# For <runtime-link>static we made sure there are no dynamic libraries in the
+# link. On HP-UX not all system libraries exist as archived libraries (for
+# example, there is no libunwind.a), so, on this platform, the -static option
+# cannot be specified.
+if [ os.name ] != HPUX
+{
+ toolset.flags gcc.link OPTIONS <runtime-link>static : -static ;
+}
+
+# Now, the vendor specific flags.
+# The parameter linker can be either aix, darwin, gnu, hpux, osf or sun.
+rule init-link-flags ( toolset linker condition )
+{
+ switch $(linker)
+ {
+ case aix :
+ # On AIX we *have* to use the native linker.
+ #
+ # Using -brtl, the AIX linker will look for libraries with both the .a
+ # and .so extensions, such as libfoo.a and libfoo.so. Without -brtl, the
+ # AIX linker looks only for libfoo.a. Note that libfoo.a is an archived
+ # file that may contain shared objects and is different from static libs
+ # as on Linux.
+ #
+ # The -bnoipath strips the prepending (relative) path of libraries from
+ # the loader section in the target library or executable. Hence, during
+ # load-time LIBPATH (identical to LD_LIBRARY_PATH) or a hard-coded
+ # -blibpath (*similar* to -lrpath/-lrpath-link) is searched. Without
+ # this option, the prepending (relative) path + library name is
+ # hard-coded in the loader section, causing *only* this path to be
+ # searched during load-time. Note that the AIX linker does not have an
+ # -soname equivalent, this is as close as it gets.
+ #
+ # The above options are definately for AIX 5.x, and most likely also for
+ # AIX 4.x and AIX 6.x. For details about the AIX linker see:
+ # http://download.boulder.ibm.com/ibmdl/pub/software/dw/aix/es-aix_ll.pdf
+ #
+
+ toolset.flags $(toolset).link OPTIONS : -Wl,-brtl -Wl,-bnoipath
+ : unchecked ;
+
+ case darwin :
+ # On Darwin, the -s option to ld does not work unless we pass -static,
+ # and passing -static unconditionally is a bad idea. So, do not pass -s
+ # at all and darwin.jam will use a separate 'strip' invocation.
+ toolset.flags $(toolset).link RPATH $(condition) : <dll-path> :
+ unchecked ;
+ toolset.flags $(toolset).link RPATH_LINK $(condition) : <xdll-path> :
+ unchecked ;
+
+ case gnu :
+ # Strip the binary when no debugging is needed. We use --strip-all flag
+ # as opposed to -s since icc (intel's compiler) is generally
+ # option-compatible with and inherits from the gcc toolset, but does not
+ # support -s.
+ toolset.flags $(toolset).link OPTIONS $(condition)/<strip>on : -Wl,--strip-all : unchecked ;
+ toolset.flags $(toolset).link RPATH $(condition) : <dll-path> : unchecked ;
+ toolset.flags $(toolset).link RPATH_LINK $(condition) : <xdll-path> : unchecked ;
+ toolset.flags $(toolset).link START-GROUP $(condition) : -Wl,--start-group : unchecked ;
+ toolset.flags $(toolset).link END-GROUP $(condition) : -Wl,--end-group : unchecked ;
+
+ # gnu ld has the ability to change the search behaviour for libraries
+ # referenced by the -l switch. These modifiers are -Bstatic and
+ # -Bdynamic and change search for -l switches that follow them. The
+ # following list shows the tried variants. Search stops at the first
+ # variant that has a match.
+ #
+ # *nix: -Bstatic -lxxx
+ # libxxx.a
+ #
+ # *nix: -Bdynamic -lxxx
+ # libxxx.so
+ # libxxx.a
+ #
+ # windows (mingw, cygwin) -Bstatic -lxxx
+ # libxxx.a
+ # xxx.lib
+ #
+ # windows (mingw, cygwin) -Bdynamic -lxxx
+ # libxxx.dll.a
+ # xxx.dll.a
+ # libxxx.a
+ # xxx.lib
+ # cygxxx.dll (*)
+ # libxxx.dll
+ # xxx.dll
+ # libxxx.a
+ #
+ # (*) This is for cygwin
+ # Please note that -Bstatic and -Bdynamic are not a guarantee that a
+ # static or dynamic lib indeed gets linked in. The switches only change
+ # search patterns!
+
+ # On *nix mixing shared libs with static runtime is not a good idea.
+ toolset.flags $(toolset).link FINDLIBS-ST-PFX
+ $(condition)/<runtime-link>shared : -Wl,-Bstatic : unchecked ;
+ toolset.flags $(toolset).link FINDLIBS-SA-PFX
+ $(condition)/<runtime-link>shared : -Wl,-Bdynamic : unchecked ;
+
+ # On windows allow mixing of static and dynamic libs with static
+ # runtime is not a good idea.
+ toolset.flags $(toolset).link FINDLIBS-ST-PFX
+ $(condition)/<runtime-link>static/<target-os>windows : -Wl,-Bstatic
+ : unchecked ;
+ toolset.flags $(toolset).link FINDLIBS-SA-PFX
+ $(condition)/<runtime-link>static/<target-os>windows : -Wl,-Bdynamic
+ : unchecked ;
+ toolset.flags $(toolset).link OPTIONS
+ $(condition)/<runtime-link>static/<target-os>windows : -Wl,-Bstatic
+ : unchecked ;
+
+ case hpux :
+ toolset.flags $(toolset).link OPTIONS $(condition)/<strip>on : -Wl,-s :
+ unchecked ;
+ toolset.flags $(toolset).link OPTIONS $(condition)/<link>shared : -fPIC
+ : unchecked ;
+
+ case osf :
+ # No --strip-all, just -s.
+ toolset.flags $(toolset).link OPTIONS $(condition)/<strip>on : -Wl,-s :
+ unchecked ;
+ toolset.flags $(toolset).link RPATH $(condition) : <dll-path> :
+ unchecked ;
+ # This does not support -R.
+ toolset.flags $(toolset).link RPATH_OPTION $(condition) : -rpath :
+ unchecked ;
+ # -rpath-link is not supported at all.
+
+ case sun :
+ toolset.flags $(toolset).link OPTIONS $(condition)/<strip>on : -Wl,-s :
+ unchecked ;
+ toolset.flags $(toolset).link RPATH $(condition) : <dll-path> :
+ unchecked ;
+ # Solaris linker does not have a separate -rpath-link, but allows using
+ # -L for the same purpose.
+ toolset.flags $(toolset).link LINKPATH $(condition) : <xdll-path> :
+ unchecked ;
+
+ # This permits shared libraries with non-PIC code on Solaris.
+ # VP, 2004/09/07: Now that we have -fPIC hardcode in link.dll, the
+ # following is not needed. Whether -fPIC should be hardcoded, is a
+ # separate question.
+ # AH, 2004/10/16: it is still necessary because some tests link against
+ # static libraries that were compiled without PIC.
+ toolset.flags $(toolset).link OPTIONS $(condition)/<link>shared :
+ -mimpure-text : unchecked ;
+
+ case * :
+ import errors ;
+ errors.user-error $(toolset) initialization: invalid linker '$(linker)'
+ : The value '$(linker)' specified for <linker> is not recognized.
+ : Possible values are 'aix', 'darwin', 'gnu', 'hpux', 'osf' or 'sun'
+ ;
+ }
+}
+
+
+# Enclose the RPATH variable on 'targets' in double quotes, unless it is already
+# enclosed in single quotes. This special casing is done because it is common to
+# pass '$ORIGIN' to linker -- and it has to have single quotes to prevent shell
+# expansion -- and if we add double quotes then the preventing properties of
+# single quotes disappear.
+#
+rule quote-rpath ( targets * )
+{
+ local r = [ on $(targets[1]) return $(RPATH) ] ;
+ if ! [ MATCH ('.*') : $(r) ]
+ {
+ r = \"$(r)\" ;
+ }
+ RPATH on $(targets) = $(r) ;
+}
+
+# Declare actions for linking.
+rule link ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+ SPACE on $(targets) = " " ;
+ # Serialize execution of the 'link' action, since running N links in
+ # parallel is just slower. For now, serialize only gcc links, it might be a
+ # good idea to serialize all links.
+ JAM_SEMAPHORE on $(targets) = <s>gcc-link-semaphore ;
+ quote-rpath $(targets) ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,$(RPATH) -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) -l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) $(OPTIONS) $(USER_OPTIONS)
+}
+
+
+# Default value. Mostly for the sake of intel-linux that inherits from gcc, but
+# does not have the same logic to set the .AR variable. We can put the same
+# logic in intel-linux, but that is hardly worth the trouble as on Linux, 'ar'
+# is always available.
+.AR = ar ;
+.RANLIB = ranlib ;
+
+toolset.flags gcc.archive AROPTIONS <archiveflags> ;
+
+rule archive ( targets * : sources * : properties * )
+{
+ # Always remove archive and start again. Here is the rationale from
+ #
+ # Andre Hentz:
+ #
+ # I had a file, say a1.c, that was included into liba.a. I moved a1.c to
+ # a2.c, updated my Jamfiles and rebuilt. My program was crashing with absurd
+ # errors. After some debugging I traced it back to the fact that a1.o was
+ # *still* in liba.a
+ #
+ # Rene Rivera:
+ #
+ # Originally removing the archive was done by splicing an RM onto the
+ # archive action. That makes archives fail to build on NT when they have
+ # many files because it will no longer execute the action directly and blow
+ # the line length limit. Instead we remove the file in a different action,
+ # just before building the archive.
+ #
+ local clean.a = $(targets[1])(clean) ;
+ TEMPORARY $(clean.a) ;
+ NOCARE $(clean.a) ;
+ LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ;
+ DEPENDS $(clean.a) : $(sources) ;
+ DEPENDS $(targets) : $(clean.a) ;
+ common.RmTemps $(clean.a) : $(targets) ;
+}
+
+# Declare action for creating static libraries.
+# The letter 'r' means to add files to the archive with replacement. Since we
+# remove archive, we do not care about replacement, but there is no option "add
+# without replacement".
+# The letter 'c' suppresses the warning in case the archive does not exists yet.
+# That warning is produced only on some platforms, for whatever reasons.
+#
+actions piecemeal archive
+{
+ "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)"
+ "$(.RANLIB)" "$(<)"
+}
+
+rule link.dll ( targets * : sources * : properties * )
+{
+ setup-threading $(targets) : $(sources) : $(properties) ;
+ setup-address-model $(targets) : $(sources) : $(properties) ;
+ SPACE on $(targets) = " " ;
+ JAM_SEMAPHORE on $(targets) = <s>gcc-link-semaphore ;
+ quote-rpath $(targets) ;
+}
+
+# Differs from 'link' above only by -shared.
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,$(RPATH) "$(.IMPLIB-COMMAND)$(<[1])" -o "$(<[-1])" $(HAVE_SONAME)-Wl,$(SONAME_OPTION)$(SPACE)-Wl,$(<[-1]:D=) -shared $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) -l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) $(OPTIONS) $(USER_OPTIONS)
+}
+
+rule setup-threading ( targets * : sources * : properties * )
+{
+ local threading = [ feature.get-values threading : $(properties) ] ;
+ if $(threading) = multi
+ {
+ local target = [ feature.get-values target-os : $(properties) ] ;
+ local option ;
+ local libs ;
+
+ switch $(target)
+ {
+ case windows : option = -mthreads ;
+ case cygwin : option = -mthreads ;
+ case solaris : option = -pthreads ; libs = rt ;
+ case beos : # No threading options.
+ case *bsd : option = -pthread ; # There is no -lrt on BSD.
+ case sgi : # gcc on IRIX does not support multi-threading.
+ case darwin : # No threading options.
+ case * : option = -pthread ; libs = rt ;
+ }
+
+ if $(option)
+ {
+ OPTIONS on $(targets) += $(option) ;
+ }
+ if $(libs)
+ {
+ FINDLIBS-SA on $(targets) += $(libs) ;
+ }
+ }
+}
+
+
+local rule cpu-flags ( toolset variable : architecture : instruction-set + :
+ values + : default ? )
+{
+ if $(default)
+ {
+ toolset.flags $(toolset) $(variable)
+ <architecture>$(architecture)/<instruction-set> : $(values) ;
+ }
+ toolset.flags $(toolset) $(variable)
+ <architecture>/<instruction-set>$(instruction-set)
+ <architecture>$(architecture)/<instruction-set>$(instruction-set)
+ : $(values) ;
+}
+
+
+# Set architecture/instruction-set options.
+#
+# x86 and compatible
+# The 'native' option appeared in gcc 4.2 so we cannot safely use it as default.
+# Use i686 instead for 32-bit.
+toolset.flags gcc OPTIONS <architecture>x86/<address-model>32/<instruction-set> : -march=i686 ;
+cpu-flags gcc OPTIONS : x86 : native : -march=native ;
+cpu-flags gcc OPTIONS : x86 : i486 : -march=i486 ;
+cpu-flags gcc OPTIONS : x86 : i586 : -march=i586 ;
+cpu-flags gcc OPTIONS : x86 : i686 : -march=i686 ;
+cpu-flags gcc OPTIONS : x86 : pentium : -march=pentium ;
+cpu-flags gcc OPTIONS : x86 : pentium-mmx : -march=pentium-mmx ;
+cpu-flags gcc OPTIONS : x86 : pentiumpro : -march=pentiumpro ;
+cpu-flags gcc OPTIONS : x86 : pentium2 : -march=pentium2 ;
+cpu-flags gcc OPTIONS : x86 : pentium3 : -march=pentium3 ;
+cpu-flags gcc OPTIONS : x86 : pentium3m : -march=pentium3m ;
+cpu-flags gcc OPTIONS : x86 : pentium-m : -march=pentium-m ;
+cpu-flags gcc OPTIONS : x86 : pentium4 : -march=pentium4 ;
+cpu-flags gcc OPTIONS : x86 : pentium4m : -march=pentium4m ;
+cpu-flags gcc OPTIONS : x86 : prescott : -march=prescott ;
+cpu-flags gcc OPTIONS : x86 : nocona : -march=nocona ;
+cpu-flags gcc OPTIONS : x86 : core2 : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : conroe : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : conroe-xe : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : conroe-l : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : allendale : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : wolfdale : -march=core2 -msse4.1 ;
+cpu-flags gcc OPTIONS : x86 : merom : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : merom-xe : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : kentsfield : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : kentsfield-xe : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : yorksfield : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : penryn : -march=core2 ;
+cpu-flags gcc OPTIONS : x86 : corei7 : -march=corei7 ;
+cpu-flags gcc OPTIONS : x86 : nehalem : -march=corei7 ;
+cpu-flags gcc OPTIONS : x86 : corei7-avx : -march=corei7-avx ;
+cpu-flags gcc OPTIONS : x86 : sandy-bridge : -march=corei7-avx ;
+cpu-flags gcc OPTIONS : x86 : core-avx-i : -march=core-avx-i ;
+cpu-flags gcc OPTIONS : x86 : ivy-bridge : -march=core-avx-i ;
+cpu-flags gcc OPTIONS : x86 : haswell : -march=core-avx-i -mavx2 -mfma -mbmi -mbmi2 -mlzcnt ;
+cpu-flags gcc OPTIONS : x86 : k6 : -march=k6 ;
+cpu-flags gcc OPTIONS : x86 : k6-2 : -march=k6-2 ;
+cpu-flags gcc OPTIONS : x86 : k6-3 : -march=k6-3 ;
+cpu-flags gcc OPTIONS : x86 : athlon : -march=athlon ;
+cpu-flags gcc OPTIONS : x86 : athlon-tbird : -march=athlon-tbird ;
+cpu-flags gcc OPTIONS : x86 : athlon-4 : -march=athlon-4 ;
+cpu-flags gcc OPTIONS : x86 : athlon-xp : -march=athlon-xp ;
+cpu-flags gcc OPTIONS : x86 : athlon-mp : -march=athlon-mp ;
+##
+cpu-flags gcc OPTIONS : x86 : k8 : -march=k8 ;
+cpu-flags gcc OPTIONS : x86 : opteron : -march=opteron ;
+cpu-flags gcc OPTIONS : x86 : athlon64 : -march=athlon64 ;
+cpu-flags gcc OPTIONS : x86 : athlon-fx : -march=athlon-fx ;
+cpu-flags gcc OPTIONS : x86 : k8-sse3 : -march=k8-sse3 ;
+cpu-flags gcc OPTIONS : x86 : opteron-sse3 : -march=opteron-sse3 ;
+cpu-flags gcc OPTIONS : x86 : athlon64-sse3 : -march=athlon64-sse3 ;
+cpu-flags gcc OPTIONS : x86 : amdfam10 : -march=amdfam10 ;
+cpu-flags gcc OPTIONS : x86 : barcelona : -march=barcelona ;
+cpu-flags gcc OPTIONS : x86 : bdver1 : -march=bdver1 ;
+cpu-flags gcc OPTIONS : x86 : bdver2 : -march=bdver2 ;
+cpu-flags gcc OPTIONS : x86 : bdver3 : -march=bdver3 ;
+cpu-flags gcc OPTIONS : x86 : btver1 : -march=btver1 ;
+cpu-flags gcc OPTIONS : x86 : btver2 : -march=btver2 ;
+cpu-flags gcc OPTIONS : x86 : winchip-c6 : -march=winchip-c6 ;
+cpu-flags gcc OPTIONS : x86 : winchip2 : -march=winchip2 ;
+cpu-flags gcc OPTIONS : x86 : c3 : -march=c3 ;
+cpu-flags gcc OPTIONS : x86 : c3-2 : -march=c3-2 ;
+##
+cpu-flags gcc OPTIONS : x86 : atom : -march=atom ;
+# Sparc
+cpu-flags gcc OPTIONS : sparc : c3 : -mcpu=c3 : default ;
+cpu-flags gcc OPTIONS : sparc : v7 : -mcpu=v7 ;
+cpu-flags gcc OPTIONS : sparc : cypress : -mcpu=cypress ;
+cpu-flags gcc OPTIONS : sparc : v8 : -mcpu=v8 ;
+cpu-flags gcc OPTIONS : sparc : supersparc : -mcpu=supersparc ;
+cpu-flags gcc OPTIONS : sparc : sparclite : -mcpu=sparclite ;
+cpu-flags gcc OPTIONS : sparc : hypersparc : -mcpu=hypersparc ;
+cpu-flags gcc OPTIONS : sparc : sparclite86x : -mcpu=sparclite86x ;
+cpu-flags gcc OPTIONS : sparc : f930 : -mcpu=f930 ;
+cpu-flags gcc OPTIONS : sparc : f934 : -mcpu=f934 ;
+cpu-flags gcc OPTIONS : sparc : sparclet : -mcpu=sparclet ;
+cpu-flags gcc OPTIONS : sparc : tsc701 : -mcpu=tsc701 ;
+cpu-flags gcc OPTIONS : sparc : v9 : -mcpu=v9 ;
+cpu-flags gcc OPTIONS : sparc : ultrasparc : -mcpu=ultrasparc ;
+cpu-flags gcc OPTIONS : sparc : ultrasparc3 : -mcpu=ultrasparc3 ;
+# RS/6000 & PowerPC
+cpu-flags gcc OPTIONS : power : 403 : -mcpu=403 ;
+cpu-flags gcc OPTIONS : power : 505 : -mcpu=505 ;
+cpu-flags gcc OPTIONS : power : 601 : -mcpu=601 ;
+cpu-flags gcc OPTIONS : power : 602 : -mcpu=602 ;
+cpu-flags gcc OPTIONS : power : 603 : -mcpu=603 ;
+cpu-flags gcc OPTIONS : power : 603e : -mcpu=603e ;
+cpu-flags gcc OPTIONS : power : 604 : -mcpu=604 ;
+cpu-flags gcc OPTIONS : power : 604e : -mcpu=604e ;
+cpu-flags gcc OPTIONS : power : 620 : -mcpu=620 ;
+cpu-flags gcc OPTIONS : power : 630 : -mcpu=630 ;
+cpu-flags gcc OPTIONS : power : 740 : -mcpu=740 ;
+cpu-flags gcc OPTIONS : power : 7400 : -mcpu=7400 ;
+cpu-flags gcc OPTIONS : power : 7450 : -mcpu=7450 ;
+cpu-flags gcc OPTIONS : power : 750 : -mcpu=750 ;
+cpu-flags gcc OPTIONS : power : 801 : -mcpu=801 ;
+cpu-flags gcc OPTIONS : power : 821 : -mcpu=821 ;
+cpu-flags gcc OPTIONS : power : 823 : -mcpu=823 ;
+cpu-flags gcc OPTIONS : power : 860 : -mcpu=860 ;
+cpu-flags gcc OPTIONS : power : 970 : -mcpu=970 ;
+cpu-flags gcc OPTIONS : power : 8540 : -mcpu=8540 ;
+cpu-flags gcc OPTIONS : power : power : -mcpu=power ;
+cpu-flags gcc OPTIONS : power : power2 : -mcpu=power2 ;
+cpu-flags gcc OPTIONS : power : power3 : -mcpu=power3 ;
+cpu-flags gcc OPTIONS : power : power4 : -mcpu=power4 ;
+cpu-flags gcc OPTIONS : power : power5 : -mcpu=power5 ;
+cpu-flags gcc OPTIONS : power : powerpc : -mcpu=powerpc ;
+cpu-flags gcc OPTIONS : power : powerpc64 : -mcpu=powerpc64 ;
+cpu-flags gcc OPTIONS : power : rios : -mcpu=rios ;
+cpu-flags gcc OPTIONS : power : rios1 : -mcpu=rios1 ;
+cpu-flags gcc OPTIONS : power : rios2 : -mcpu=rios2 ;
+cpu-flags gcc OPTIONS : power : rsc : -mcpu=rsc ;
+cpu-flags gcc OPTIONS : power : rs64a : -mcpu=rs64 ;
+# AIX variant of RS/6000 & PowerPC
+toolset.flags gcc AROPTIONS <address-model>64/<target-os>aix : "-X 64" ;
diff --git a/src/kenlm/jam-files/boost-build/tools/generate.jam b/src/kenlm/jam-files/boost-build/tools/generate.jam
new file mode 100644
index 0000000..6732fa3
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/generate.jam
@@ -0,0 +1,108 @@
+# Copyright 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Declares main target 'generate' used to produce targets by calling a
+# user-provided rule that takes and produces virtual targets.
+
+import "class" : new ;
+import errors ;
+import feature ;
+import project ;
+import property ;
+import property-set ;
+import targets ;
+import regex ;
+
+
+feature.feature generating-rule : : free ;
+
+
+class generated-target-class : basic-target
+{
+ import errors ;
+ import indirect ;
+ import virtual-target ;
+
+ rule __init__ ( name : project : sources * : requirements *
+ : default-build * : usage-requirements * )
+ {
+ basic-target.__init__ $(name) : $(project) : $(sources)
+ : $(requirements) : $(default-build) : $(usage-requirements) ;
+
+ if ! [ $(self.requirements).get <generating-rule> ]
+ {
+ errors.user-error "The generate rule requires the <generating-rule>"
+ "property to be set" ;
+ }
+ }
+
+ rule construct ( name : sources * : property-set )
+ {
+ local result ;
+ local gr = [ $(property-set).get <generating-rule> ] ;
+
+ # FIXME: this is a copy-paste from virtual-target.jam. We should add a
+ # utility rule to call a rule like this.
+ local rule-name = [ MATCH ^@(.*) : $(gr) ] ;
+ if $(rule-name)
+ {
+ if $(gr[2])
+ {
+ local target-name = [ full-name ] ;
+ errors.user-error "Multiple <generating-rule> properties"
+ "encountered for target $(target-name)." ;
+ }
+
+ result = [ indirect.call $(rule-name) $(self.project) $(name)
+ : $(property-set) : $(sources) ] ;
+
+ if ! $(result)
+ {
+ ECHO "warning: Unable to construct" [ full-name ] ;
+ }
+ }
+
+ local ur ;
+ local targets ;
+
+ if $(result)
+ {
+ if [ class.is-a $(result[1]) : property-set ]
+ {
+ ur = $(result[1]) ;
+ targets = $(result[2-]) ;
+ }
+ else
+ {
+ ur = [ property-set.empty ] ;
+ targets = $(result) ;
+ }
+ }
+ # FIXME: the following loop should be doable using sequence.transform or
+ # some similar utility rule.
+ local rt ;
+ for local t in $(targets)
+ {
+ rt += [ virtual-target.register $(t) ] ;
+ }
+ return $(ur) $(rt) ;
+ }
+}
+
+
+rule generate ( name : sources * : requirements * : default-build *
+ : usage-requirements * )
+{
+ local project = [ project.current ] ;
+
+ targets.main-target-alternative
+ [ new generated-target-class $(name) : $(project)
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ : [ targets.main-target-usage-requirements $(usage-requirements) : $(project) ]
+ ] ;
+}
+
+IMPORT $(__name__) : generate : : generate ;
diff --git a/src/kenlm/jam-files/boost-build/tools/gettext.jam b/src/kenlm/jam-files/boost-build/tools/gettext.jam
new file mode 100644
index 0000000..99a43ff
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/gettext.jam
@@ -0,0 +1,230 @@
+# Copyright 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# This module support GNU gettext internationalization utilities.
+#
+# It provides two main target rules: 'gettext.catalog', used for
+# creating machine-readable catalogs from translations files, and
+# 'gettext.update', used for update translation files from modified
+# sources.
+#
+# To add i18n support to your application you should follow these
+# steps.
+#
+# - Decide on a file name which will contain translations and
+# what main target name will be used to update it. For example::
+#
+# gettext.update update-russian : russian.po a.cpp my_app ;
+#
+# - Create the initial translation file by running::
+#
+# bjam update-russian
+#
+# - Edit russian.po. For example, you might change fields like LastTranslator.
+#
+# - Create a main target for final message catalog::
+#
+# gettext.catalog russian : russian.po ;
+#
+# The machine-readable catalog will be updated whenever you update
+# "russian.po". The "russian.po" file will be updated only on explicit
+# request. When you're ready to update translations, you should
+#
+# - Run::
+#
+# bjam update-russian
+#
+# - Edit "russian.po" in appropriate editor.
+#
+# The next bjam run will convert "russian.po" into machine-readable form.
+#
+# By default, translations are marked by 'i18n' call. The 'gettext.keyword'
+# feature can be used to alter this.
+
+
+import targets ;
+import property-set ;
+import virtual-target ;
+import "class" : new ;
+import project ;
+import type ;
+import generators ;
+import errors ;
+import feature : feature ;
+import toolset : flags ;
+import regex ;
+
+.path = "" ;
+
+# Initializes the gettext module.
+rule init ( path ? # Path where all tools are located. If not specified,
+ # they should be in PATH.
+ )
+{
+ if $(.initialized) && $(.path) != $(path)
+ {
+ errors.error "Attempt to reconfigure with different path" ;
+ }
+ .initialized = true ;
+ if $(path)
+ {
+ .path = $(path)/ ;
+ }
+}
+
+# Creates a main target 'name', which, when updated, will cause
+# file 'existing-translation' to be updated with translations
+# extracted from 'sources'. It's possible to specify main target
+# in sources --- it which case all target from dependency graph
+# of those main targets will be scanned, provided they are of
+# appropricate type. The 'gettext.types' feature can be used to
+# control the types.
+#
+# The target will be updated only if explicitly requested on the
+# command line.
+rule update ( name : existing-translation sources + : requirements * )
+{
+ local project = [ project.current ] ;
+
+ targets.main-target-alternative
+ [ new typed-target $(name) : $(project) : gettext.UPDATE :
+ $(existing-translation) $(sources)
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ ] ;
+ $(project).mark-target-as-explicit $(name) ;
+}
+
+
+# The human editable source, containing translation.
+type.register gettext.PO : po ;
+# The machine readable message catalog.
+type.register gettext.catalog : mo ;
+# Intermediate type produce by extracting translations from
+# sources.
+type.register gettext.POT : pot ;
+# Pseudo type used to invoke update-translations generator
+type.register gettext.UPDATE ;
+
+# Identifies the keyword that should be used when scanning sources.
+# Default: i18n
+feature gettext.keyword : : free ;
+# Contains space-separated list of sources types which should be scanned.
+# Default: "C CPP"
+feature gettext.types : : free ;
+
+generators.register-standard gettext.compile : gettext.PO : gettext.catalog ;
+
+class update-translations-generator : generator
+{
+ import regex : split ;
+ import property-set ;
+
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ # The rule should be called with at least two sources. The first source
+ # is the translation (.po) file to update. The remaining sources are targets
+ # which should be scanned for new messages. All sources files for those targets
+ # will be found and passed to the 'xgettext' utility, which extracts the
+ # messages for localization. Those messages will be merged to the .po file.
+ rule run ( project name ? : property-set : sources * : multiple ? )
+ {
+ local types = [ $(property-set).get <gettext.types> ] ;
+ types ?= "C CPP" ;
+ types = [ regex.split $(types) " " ] ;
+
+ local keywords = [ $(property-set).get <gettext.keyword> ] ;
+ property-set = [ property-set.create $(keywords:G=<gettext.keyword>) ] ;
+
+ # First deterime the list of sources that must be scanned for
+ # messages.
+ local all-sources ;
+ # CONSIDER: I'm not sure if the logic should be the same as for 'stage':
+ # i.e. following dependency properties as well.
+ for local s in $(sources[2-])
+ {
+ all-sources += [ virtual-target.traverse $(s) : : include-sources ] ;
+ }
+ local right-sources ;
+ for local s in $(all-sources)
+ {
+ if [ $(s).type ] in $(types)
+ {
+ right-sources += $(s) ;
+ }
+ }
+
+ local .constructed ;
+ if $(right-sources)
+ {
+ # Create the POT file, which will contain list of messages extracted
+ # from the sources.
+ local extract =
+ [ new action $(right-sources) : gettext.extract : $(property-set) ] ;
+ local new-messages = [ new file-target $(name) : gettext.POT
+ : $(project) : $(extract) ] ;
+
+ # Create a notfile target which will update the existing translation file
+ # with new messages.
+ local a = [ new action $(sources[1]) $(new-messages)
+ : gettext.update-po-dispatch ] ;
+ local r = [ new notfile-target $(name) : $(project) : $(a) ] ;
+ .constructed = [ virtual-target.register $(r) ] ;
+ }
+ else
+ {
+ errors.error "No source could be scanned by gettext tools" ;
+ }
+ return $(.constructed) ;
+ }
+}
+generators.register [ new update-translations-generator gettext.update : : gettext.UPDATE ] ;
+
+flags gettext.extract KEYWORD <gettext.keyword> ;
+actions extract
+{
+ $(.path)xgettext -k$(KEYWORD:E=i18n) -o $(<) $(>)
+}
+
+# Does realy updating of po file. The tricky part is that
+# we're actually updating one of the sources:
+# $(<) is the NOTFILE target we're updating
+# $(>[1]) is the PO file to be really updated.
+# $(>[2]) is the PO file created from sources.
+#
+# When file to be updated does not exist (during the
+# first run), we need to copy the file created from sources.
+# In all other cases, we need to update the file.
+rule update-po-dispatch
+{
+ NOCARE $(>[1]) ;
+ gettext.create-po $(<) : $(>) ;
+ gettext.update-po $(<) : $(>) ;
+ _ on $(<) = " " ;
+ ok on $(<) = "" ;
+ EXISTING_PO on $(<) = $(>[1]) ;
+}
+
+# Due to fancy interaction of existing and updated, this rule can be called with
+# one source, in which case we copy the lonely source into EXISTING_PO, or with
+# two sources, in which case the action body expands to nothing. I'd really like
+# to have "missing" action modifier.
+actions quietly existing updated create-po bind EXISTING_PO
+{
+ cp$(_)"$(>[1])"$(_)"$(EXISTING_PO)"$($(>[2]:E=ok))
+}
+
+actions updated update-po bind EXISTING_PO
+{
+ $(.path)msgmerge$(_)-U$(_)"$(EXISTING_PO)"$(_)"$(>[1])"
+}
+
+actions gettext.compile
+{
+ $(.path)msgfmt -o $(<) $(>)
+}
+
+IMPORT $(__name__) : update : : gettext.update ;
diff --git a/src/kenlm/jam-files/boost-build/tools/gfortran.jam b/src/kenlm/jam-files/boost-build/tools/gfortran.jam
new file mode 100644
index 0000000..0aa69b8
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/gfortran.jam
@@ -0,0 +1,39 @@
+# Copyright (C) 2004 Toon Knapen
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import toolset : flags ;
+import feature ;
+import fortran ;
+
+rule init ( version ? : command * : options * )
+{
+}
+
+# Declare flags and action for compilation
+flags gfortran OPTIONS <fflags> ;
+
+flags gfortran OPTIONS <optimization>off : -O0 ;
+flags gfortran OPTIONS <optimization>speed : -O3 ;
+flags gfortran OPTIONS <optimization>space : -Os ;
+
+flags gfortran OPTIONS <debug-symbols>on : -g ;
+flags gfortran OPTIONS <profiling>on : -pg ;
+
+flags gfortran OPTIONS <link>shared/<main-target-type>LIB : -fPIC ;
+
+flags gfortran DEFINES <define> ;
+flags gfortran INCLUDES <include> ;
+
+rule compile.fortran
+{
+}
+
+actions compile.fortran
+{
+ gcc -Wall $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)"
+}
+
+generators.register-fortran-compiler gfortran.compile.fortran : FORTRAN FORTRAN90 : OBJ ;
diff --git a/src/kenlm/jam-files/boost-build/tools/hp_cxx.jam b/src/kenlm/jam-files/boost-build/tools/hp_cxx.jam
new file mode 100644
index 0000000..86cd783
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/hp_cxx.jam
@@ -0,0 +1,181 @@
+# Copyright 2001 David Abrahams.
+# Copyright 2004, 2005 Markus Schoepflin.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# HP CXX compiler
+# See http://h30097.www3.hp.com/cplus/?jumpid=reg_R1002_USEN
+#
+#
+# Notes on this toolset:
+#
+# - Because of very subtle issues with the default ansi mode, strict_ansi mode
+# is used for compilation. One example of things that don't work correctly in
+# the default ansi mode is overload resolution of function templates when
+# mixed with non-template functions.
+#
+# - For template instantiation "-timplicit_local" is used. Previously,
+# "-tlocal" has been tried to avoid the need for a template repository
+# but this doesn't work with manually instantiated templates. "-tweak"
+# has not been used to avoid the stream of warning messages issued by
+# ar or ld when creating a library or linking an application.
+#
+# - Debug symbols are generated with "-g3", as this works both in debug and
+# release mode. When compiling C++ code without optimization, we additionally
+# use "-gall", which generates full symbol table information for all classes,
+# structs, and unions. As this turns off optimization, it can't be used when
+# optimization is needed.
+#
+
+import feature generators common ;
+import toolset : flags ;
+
+feature.extend toolset : hp_cxx ;
+feature.extend c++abi : cxxarm ;
+
+# Inherit from Unix toolset to get library ordering magic.
+toolset.inherit hp_cxx : unix ;
+
+generators.override hp_cxx.prebuilt : builtin.lib-generator ;
+generators.override hp_cxx.prebuilt : builtin.prebuilt ;
+generators.override hp_cxx.searched-lib-generator : searched-lib-generator ;
+
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters hp_cxx : version $(version) ] ;
+
+ local command = [ common.get-invocation-command hp_cxx : cxx : $(command) ] ;
+
+ if $(command)
+ {
+ local root = [ common.get-absolute-tool-path $(command[-1]) ] ;
+
+ if $(root)
+ {
+ flags hp_cxx .root $(condition) : "\"$(root)\"/" ;
+ }
+ }
+ # If we can't find 'cxx' anyway, at least show 'cxx' in the commands
+ command ?= cxx ;
+
+ common.handle-options hp_cxx : $(condition) : $(command) : $(options) ;
+}
+
+generators.register-c-compiler hp_cxx.compile.c++ : CPP : OBJ : <toolset>hp_cxx ;
+generators.register-c-compiler hp_cxx.compile.c : C : OBJ : <toolset>hp_cxx ;
+
+
+
+# No static linking as far as I can tell.
+# flags cxx LINKFLAGS <runtime-link>static : -bstatic ;
+flags hp_cxx.compile OPTIONS <debug-symbols>on : -g3 ;
+flags hp_cxx.compile OPTIONS <optimization>off/<debug-symbols>on : -gall ;
+flags hp_cxx.link OPTIONS <debug-symbols>on : -g ;
+flags hp_cxx.link OPTIONS <debug-symbols>off : -s ;
+
+flags hp_cxx.compile OPTIONS <optimization>off : -O0 ;
+flags hp_cxx.compile OPTIONS <optimization>speed/<inlining>on : -O2 ;
+flags hp_cxx.compile OPTIONS <optimization>speed : -O2 ;
+
+# This (undocumented) macro needs to be defined to get all C function
+# overloads required by the C++ standard.
+flags hp_cxx.compile.c++ OPTIONS : -D__CNAME_OVERLOADS ;
+
+# Added for threading support
+flags hp_cxx.compile OPTIONS <threading>multi : -pthread ;
+flags hp_cxx.link OPTIONS <threading>multi : -pthread ;
+
+flags hp_cxx.compile OPTIONS <optimization>space/<inlining>on : <inlining>size ;
+flags hp_cxx.compile OPTIONS <optimization>space : -O1 ;
+flags hp_cxx.compile OPTIONS <inlining>off : -inline none ;
+
+# The compiler versions tried (up to V6.5-040) hang when compiling Boost code
+# with full inlining enabled. So leave it at the default level for now.
+#
+# flags hp_cxx.compile OPTIONS <inlining>full : -inline all ;
+
+flags hp_cxx.compile OPTIONS <profiling>on : -pg ;
+flags hp_cxx.link OPTIONS <profiling>on : -pg ;
+
+# Selection of the object model. This flag is needed on both the C++ compiler
+# and linker command line.
+
+# Unspecified ABI translates to '-model ansi' as most
+# standard-conforming.
+flags hp_cxx.compile.c++ OPTIONS <c++abi> : -model ansi : : hack-hack ;
+flags hp_cxx.compile.c++ OPTIONS <c++abi>cxxarm : -model arm ;
+flags hp_cxx.link OPTIONS <c++abi> : -model ansi : : hack-hack ;
+flags hp_cxx.link OPTIONS <c++abi>cxxarm : -model arm ;
+
+# Display a descriptive tag together with each compiler message. This tag can
+# be used by the user to explicitely suppress the compiler message.
+flags hp_cxx.compile OPTIONS : -msg_display_tag ;
+
+flags hp_cxx.compile OPTIONS <cflags> ;
+flags hp_cxx.compile.c++ OPTIONS <cxxflags> ;
+flags hp_cxx.compile DEFINES <define> ;
+flags hp_cxx.compile INCLUDES <include> ;
+flags hp_cxx.link OPTIONS <linkflags> ;
+
+flags hp_cxx.link LIBPATH <library-path> ;
+flags hp_cxx.link LIBRARIES <library-file> ;
+flags hp_cxx.link FINDLIBS-ST <find-static-library> ;
+flags hp_cxx.link FINDLIBS-SA <find-shared-library> ;
+
+flags hp_cxx.compile.c++ TEMPLATE_DEPTH <c++-template-depth> ;
+
+actions link bind LIBRARIES
+{
+ $(CONFIG_COMMAND) -noimplicit_include $(OPTIONS) -o "$(<)" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) -lrt -lm
+}
+
+# When creating dynamic libraries, we don't want to be warned about unresolved
+# symbols, therefore all unresolved symbols are marked as expected by
+# '-expect_unresolved *'. This also mirrors the behaviour of the GNU tool
+# chain.
+
+actions link.dll bind LIBRARIES
+{
+ $(CONFIG_COMMAND) -shared -expect_unresolved \* -noimplicit_include $(OPTIONS) -o "$(<[1])" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) -lm
+}
+
+
+# Note: Relaxed ANSI mode (-std) is used for compilation because in strict ANSI
+# C89 mode (-std1) the compiler doesn't accept C++ comments in C files. As -std
+# is the default, no special flag is needed.
+actions compile.c
+{
+ $(.root:E=)cc -c $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)"
+}
+
+# Note: The compiler is forced to compile the files as C++ (-x cxx) because
+# otherwise it will silently ignore files with no file extension.
+#
+# Note: We deliberately don't suppress any warnings on the compiler command
+# line, the user can always do this in a customized toolset later on.
+
+rule compile.c++
+{
+ # We preprocess the TEMPLATE_DEPTH command line option here because we found
+ # no way to do it correctly in the actual action code. There we either get
+ # the -pending_instantiations parameter when no c++-template-depth property
+ # has been specified or we get additional quotes around
+ # "-pending_instantiations ".
+ local template-depth = [ on $(1) return $(TEMPLATE_DEPTH) ] ;
+ TEMPLATE_DEPTH on $(1) = "-pending_instantiations "$(template-depth) ;
+}
+
+actions compile.c++
+{
+ $(CONFIG_COMMAND) -x cxx -c -std strict_ansi -nopure_cname -noimplicit_include -timplicit_local -ptr "$(<[1]:D)/cxx_repository" $(OPTIONS) $(TEMPLATE_DEPTH) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)"
+}
+
+# Always create archive from scratch. See the gcc toolet for rationale.
+RM = [ common.rm-command ] ;
+actions together piecemeal archive
+{
+ $(RM) "$(<)"
+ ar rc $(<) $(>)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/hpfortran.jam b/src/kenlm/jam-files/boost-build/tools/hpfortran.jam
new file mode 100644
index 0000000..96e8d18
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/hpfortran.jam
@@ -0,0 +1,35 @@
+# Copyright (C) 2004 Toon Knapen
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import toolset : flags ;
+import feature ;
+import fortran ;
+
+rule init ( version ? : command * : options * )
+{
+}
+
+# Declare flags and action for compilation
+flags hpfortran OPTIONS <optimization>off : -O0 ;
+flags hpfortran OPTIONS <optimization>speed : -O3 ;
+flags hpfortran OPTIONS <optimization>space : -O1 ;
+
+flags hpfortran OPTIONS <debug-symbols>on : -g ;
+flags hpfortran OPTIONS <profiling>on : -pg ;
+
+flags hpfortran DEFINES <define> ;
+flags hpfortran INCLUDES <include> ;
+
+rule compile.fortran
+{
+}
+
+actions compile.fortran
+{
+ f77 +DD64 $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)"
+}
+
+generators.register-fortran-compiler hpfortran.compile.fortran : FORTRAN : OBJ ;
diff --git a/src/kenlm/jam-files/boost-build/tools/ifort.jam b/src/kenlm/jam-files/boost-build/tools/ifort.jam
new file mode 100644
index 0000000..eb7c198
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/ifort.jam
@@ -0,0 +1,44 @@
+# Copyright (C) 2004 Toon Knapen
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import toolset : flags ;
+import feature ;
+import fortran ;
+
+rule init ( version ? : command * : options * )
+{
+}
+
+# Declare flags and action for compilation
+flags ifort OPTIONS <fflags> ;
+
+flags ifort OPTIONS <optimization>off : /Od ;
+flags ifort OPTIONS <optimization>speed : /O3 ;
+flags ifort OPTIONS <optimization>space : /O1 ;
+
+flags ifort OPTIONS <debug-symbols>on : /debug:full ;
+flags ifort OPTIONS <profiling>on : /Qprof_gen ;
+
+flags ifort.compile FFLAGS <runtime-debugging>off/<runtime-link>shared : /MD ;
+flags ifort.compile FFLAGS <runtime-debugging>on/<runtime-link>shared : /MDd ;
+flags ifort.compile FFLAGS <runtime-debugging>off/<runtime-link>static/<threading>single : /ML ;
+flags ifort.compile FFLAGS <runtime-debugging>on/<runtime-link>static/<threading>single : /MLd ;
+flags ifort.compile FFLAGS <runtime-debugging>off/<runtime-link>static/<threading>multi : /MT ;
+flags ifort.compile FFLAGS <runtime-debugging>on/<runtime-link>static/<threading>multi : /MTd ;
+
+flags ifort DEFINES <define> ;
+flags ifort INCLUDES <include> ;
+
+rule compile.fortran
+{
+}
+
+actions compile.fortran
+{
+ ifort $(FFLAGS) $(OPTIONS) /names:lowercase /D$(DEFINES) /I"$(INCLUDES)" /c /object:"$(<)" "$(>)"
+}
+
+generators.register-fortran-compiler ifort.compile.fortran : FORTRAN : OBJ ;
diff --git a/src/kenlm/jam-files/boost-build/tools/intel-darwin.jam b/src/kenlm/jam-files/boost-build/tools/intel-darwin.jam
new file mode 100644
index 0000000..c682a7f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/intel-darwin.jam
@@ -0,0 +1,227 @@
+# Copyright Vladimir Prus 2004.
+# Copyright Noel Belcourt 2007.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+import intel ;
+import feature : feature ;
+import os ;
+import toolset ;
+import toolset : flags ;
+import gcc ;
+import common ;
+import errors ;
+import generators ;
+
+feature.extend-subfeature toolset intel : platform : darwin ;
+
+toolset.inherit-generators intel-darwin
+ <toolset>intel <toolset-intel:platform>darwin
+ : gcc
+ # Don't inherit PCH generators. They were not tested, and probably
+ # don't work for this compiler.
+ : gcc.mingw.link gcc.mingw.link.dll gcc.compile.c.pch gcc.compile.c++.pch
+ ;
+
+generators.override intel-darwin.prebuilt : builtin.lib-generator ;
+generators.override intel-darwin.prebuilt : builtin.prebuilt ;
+generators.override intel-darwin.searched-lib-generator : searched-lib-generator ;
+
+toolset.inherit-rules intel-darwin : gcc ;
+toolset.inherit-flags intel-darwin : gcc
+ : <inlining>off <inlining>on <inlining>full <optimization>space
+ <warnings>off <warnings>all <warnings>on
+ <architecture>x86/<address-model>32
+ <architecture>x86/<address-model>64
+ ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+# vectorization diagnostics
+feature vectorize : off on full ;
+
+# Initializes the intel-darwin toolset
+# version in mandatory
+# name (default icc) is used to invoke the specified intel complier
+# compile and link options allow you to specify addition command line options for each version
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters intel-darwin
+ : version $(version) ] ;
+
+ command = [ common.get-invocation-command intel-darwin : icc
+ : $(command) : /opt/intel_cc_80/bin ] ;
+
+ common.handle-options intel-darwin : $(condition) : $(command) : $(options) ;
+
+ gcc.init-link-flags intel-darwin darwin $(condition) ;
+
+ # handle <library-path>
+ # local library-path = [ feature.get-values <library-path> : $(options) ] ;
+ # flags intel-darwin.link USER_OPTIONS $(condition) : [ feature.get-values <dll-path> : $(options) ] ;
+
+ local root = [ feature.get-values <root> : $(options) ] ;
+ local bin ;
+ if $(command) || $(root)
+ {
+ bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ;
+ root ?= $(bin:D) ;
+
+ if $(root)
+ {
+ # Libraries required to run the executable may be in either
+ # $(root)/lib (10.1 and earlier)
+ # or
+ # $(root)/lib/architecture-name (11.0 and later:
+ local lib_path = $(root)/lib $(root:P)/lib/$(bin:B) ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using intel libraries :: $(condition) :: $(lib_path) ;
+ }
+ flags intel-darwin.link RUN_PATH $(condition) : $(lib_path) ;
+ }
+ }
+
+ local m = [ MATCH (..).* : $(version) ] ;
+ local n = [ MATCH (.)\\. : $(m) ] ;
+ if $(n) {
+ m = $(n) ;
+ }
+
+ local major = $(m) ;
+
+ if $(major) = "9" {
+ flags intel-darwin.compile OPTIONS $(condition)/<inlining>off : -Ob0 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<inlining>on : -Ob1 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<inlining>full : -Ob2 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<vectorize>off : -vec-report0 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<vectorize>on : -vec-report1 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<vectorize>full : -vec-report5 ;
+ flags intel-darwin.link OPTIONS $(condition)/<runtime-link>static : -static -static-libcxa -lstdc++ -lpthread ;
+ flags intel-darwin.link OPTIONS $(condition)/<runtime-link>shared : -shared-libcxa -lstdc++ -lpthread ;
+ }
+ else {
+ flags intel-darwin.compile OPTIONS $(condition)/<inlining>off : -inline-level=0 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<inlining>on : -inline-level=1 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<inlining>full : -inline-level=2 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<vectorize>off : -vec-report0 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<vectorize>on : -vec-report1 ;
+ flags intel-darwin.compile OPTIONS $(condition)/<vectorize>full : -vec-report5 ;
+ flags intel-darwin.link OPTIONS $(condition)/<runtime-link>static : -static -static-intel -lstdc++ -lpthread ;
+ flags intel-darwin.link OPTIONS $(condition)/<runtime-link>shared : -shared-intel -lstdc++ -lpthread ;
+ }
+
+ local minor = [ MATCH ".*\\.(.).*" : $(version) ] ;
+
+ # wchar_t char_traits workaround for compilers older than 10.2
+ if $(major) = "9" || ( $(major) = "10" && ( $(minor) = "0" || $(minor) = "1" ) ) {
+ flags intel-darwin.compile DEFINES $(condition) : __WINT_TYPE__=int : unchecked ;
+ }
+}
+
+SPACE = " " ;
+
+flags intel-darwin.compile OPTIONS <cflags> ;
+flags intel-darwin.compile OPTIONS <cxxflags> ;
+# flags intel-darwin.compile INCLUDES <include> ;
+
+flags intel-darwin.compile OPTIONS <optimization>space : -O1 ; # no specific space optimization flag in icc
+
+#
+.cpu-type-em64t = prescott nocona core2 corei7 corei7-avx core-avx-i
+ conroe conroe-xe conroe-l allendale merom
+ merom-xe kentsfield kentsfield-xe penryn wolfdale
+ yorksfield nehalem sandy-bridge ivy-bridge haswell ;
+.cpu-type-amd64 = k8 opteron athlon64 athlon-fx k8-sse3 opteron-sse3
+ athlon64-sse3 amdfam10 barcelona bdver1 bdver2 bdver3 btver1 btver2 ;
+.cpu-type-x86-64 = $(.cpu-type-em64t) $(.cpu-type-amd64) ;
+
+flags intel-darwin.compile OPTIONS <instruction-set>$(.cpu-type-x86-64)/<address-model>32 : -m32 ; # -mcmodel=small ;
+flags intel-darwin.compile OPTIONS <instruction-set>$(.cpu-type-x86-64)/<address-model>64 : -m64 ; # -mcmodel=large ;
+
+flags intel-darwin.compile.c OPTIONS <warnings>off : -w0 ;
+flags intel-darwin.compile.c OPTIONS <warnings>on : -w1 ;
+flags intel-darwin.compile.c OPTIONS <warnings>all : -w2 ;
+
+flags intel-darwin.compile.c++ OPTIONS <warnings>off : -w0 ;
+flags intel-darwin.compile.c++ OPTIONS <warnings>on : -w1 ;
+flags intel-darwin.compile.c++ OPTIONS <warnings>all : -w2 ;
+
+actions compile.c
+{
+ "$(CONFIG_COMMAND)" -xc $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" -xc++ $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+flags intel-darwin ARFLAGS <archiveflags> ;
+
+# Default value. Mostly for the sake of intel-linux
+# that inherits from gcc, but does not has the same
+# logic to set the .AR variable. We can put the same
+# logic in intel-linux, but that's hardly worth the trouble
+# as on Linux, 'ar' is always available.
+.AR = ar ;
+
+rule archive ( targets * : sources * : properties * )
+{
+ # Always remove archive and start again. Here's rationale from
+ # Andre Hentz:
+ #
+ # I had a file, say a1.c, that was included into liba.a.
+ # I moved a1.c to a2.c, updated my Jamfiles and rebuilt.
+ # My program was crashing with absurd errors.
+ # After some debugging I traced it back to the fact that a1.o was *still*
+ # in liba.a
+ #
+ # Rene Rivera:
+ #
+ # Originally removing the archive was done by splicing an RM
+ # onto the archive action. That makes archives fail to build on NT
+ # when they have many files because it will no longer execute the
+ # action directly and blow the line length limit. Instead we
+ # remove the file in a different action, just before the building
+ # of the archive.
+ #
+ local clean.a = $(targets[1])(clean) ;
+ TEMPORARY $(clean.a) ;
+ NOCARE $(clean.a) ;
+ LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ;
+ DEPENDS $(clean.a) : $(sources) ;
+ DEPENDS $(targets) : $(clean.a) ;
+ common.RmTemps $(clean.a) : $(targets) ;
+}
+
+actions piecemeal archive
+{
+ "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)"
+ "ranlib" -cs "$(<)"
+}
+
+flags intel-darwin.link USER_OPTIONS <linkflags> ;
+
+# Declare actions for linking
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+ # Serialize execution of the 'link' action, since
+ # running N links in parallel is just slower.
+ JAM_SEMAPHORE on $(targets) = <s>intel-darwin-link-semaphore ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS)
+}
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" -single_module -dynamiclib -install_name "$(<[1]:D=)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/intel-linux.jam b/src/kenlm/jam-files/boost-build/tools/intel-linux.jam
new file mode 100644
index 0000000..d9164ad
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/intel-linux.jam
@@ -0,0 +1,250 @@
+# Copyright (c) 2003 Michael Stevens
+# Copyright (c) 2011 Bryce Lelbach
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import toolset ;
+import feature ;
+import toolset : flags ;
+
+import intel ;
+import gcc ;
+import common ;
+import errors ;
+import generators ;
+import type ;
+import numbers ;
+
+feature.extend-subfeature toolset intel : platform : linux ;
+
+toolset.inherit-generators intel-linux
+ <toolset>intel <toolset-intel:platform>linux : gcc : gcc.mingw.link gcc.mingw.link.dll ;
+generators.override intel-linux.prebuilt : builtin.lib-generator ;
+generators.override intel-linux.prebuilt : builtin.prebuilt ;
+generators.override intel-linux.searched-lib-generator : searched-lib-generator ;
+
+# Override default do-nothing generators.
+generators.override intel-linux.compile.c.pch : pch.default-c-pch-generator ;
+generators.override intel-linux.compile.c++.pch : pch.default-cpp-pch-generator ;
+
+type.set-generated-target-suffix PCH : <toolset>intel <toolset-intel:platform>linux : pchi ;
+
+toolset.inherit-rules intel-linux : gcc ;
+toolset.inherit-flags intel-linux : gcc
+ : <inlining>off <inlining>on <inlining>full
+ <optimization>space <optimization>speed
+ <warnings>off <warnings>all <warnings>on
+ ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+# Initializes the intel-linux toolset
+# version in mandatory
+# name (default icpc) is used to invoke the specified intel-linux complier
+# compile and link options allow you to specify addition command line options for each version
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters intel-linux
+ : version $(version) ] ;
+
+ if $(.debug-configuration)
+ {
+ ECHO "notice: intel-linux version is" $(version) ;
+ }
+
+ local default_path ;
+
+ # Intel C++ Composer XE 2011 for Linux, aka Intel C++ Compiler XE 12.0,
+ # aka intel-linux-12.0. In this version, Intel thankfully decides to install
+ # to a sane 'intel' folder in /opt.
+ if [ MATCH "(12[.]0|12)" : $(version) ]
+ { default_path = /opt/intel/bin ; }
+ # Intel C++ Compiler 11.1.
+ else if [ MATCH "(11[.]1)" : $(version) ]
+ { default_path = /opt/intel_cce_11.1.064.x86_64/bin ; }
+ # Intel C++ Compiler 11.0.
+ else if [ MATCH "(11[.]0|11)" : $(version) ]
+ { default_path = /opt/intel_cce_11.0.074.x86_64/bin ; }
+ # Intel C++ Compiler 10.1.
+ else if [ MATCH "(10[.]1)" : $(version) ]
+ { default_path = /opt/intel_cce_10.1.013_x64/bin ; }
+ # Intel C++ Compiler 9.1.
+ else if [ MATCH "(9[.]1)" : $(version) ]
+ { default_path = /opt/intel_cc_91/bin ; }
+ # Intel C++ Compiler 9.0.
+ else if [ MATCH "(9[.]0|9)" : $(version) ]
+ { default_path = /opt/intel_cc_90/bin ; }
+ # Intel C++ Compiler 8.1.
+ else if [ MATCH "(8[.]1)" : $(version) ]
+ { default_path = /opt/intel_cc_81/bin ; }
+ # Intel C++ Compiler 8.0 - this used to be the default, so now it's the
+ # fallback.
+ else
+ { default_path = /opt/intel_cc_80/bin ; }
+
+ if $(.debug-configuration)
+ {
+ ECHO "notice: default search path for intel-linux is" $(default_path) ;
+ }
+
+ command = [ common.get-invocation-command intel-linux : icpc
+ : $(command) : $(default_path) ] ;
+
+ common.handle-options intel-linux : $(condition) : $(command) : $(options) ;
+
+ gcc.init-link-flags intel-linux gnu $(condition) ;
+
+ local root = [ feature.get-values <root> : $(options) ] ;
+ local bin ;
+ if $(command) || $(root)
+ {
+ bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ;
+ root ?= $(bin:D) ;
+
+ local command-string = $(command:J=" ") ;
+ local version-output = [ SHELL "$(command-string) --version" ] ;
+ local real-version = [ MATCH "([0-9.]+)" : $(version-output) ] ;
+ local major = [ MATCH "([0-9]+).*" : $(real-version) ] ;
+
+ # If we failed to determine major version, use the behaviour for
+ # the current compiler.
+ if $(major) && [ numbers.less $(major) 10 ]
+ {
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>off : "-Ob0" ;
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>on : "-Ob1" ;
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>full : "-Ob2" ;
+ flags intel-linux.compile OPTIONS $(condition)/<optimization>space : "-O1" ;
+ flags intel-linux.compile OPTIONS $(condition)/<optimization>speed : "-O3 -ip" ;
+ }
+ else if $(major) && [ numbers.less $(major) 11 ]
+ {
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>off : "-inline-level=0" ;
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>on : "-inline-level=1" ;
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>full : "-inline-level=2" ;
+ flags intel-linux.compile OPTIONS $(condition)/<optimization>space : "-O1" ;
+ flags intel-linux.compile OPTIONS $(condition)/<optimization>speed : "-O3 -ip" ;
+ }
+ else # newer version of intel do have -Os (at least 11+, don't know about 10)
+ {
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>off : "-inline-level=0" ;
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>on : "-inline-level=1" ;
+ flags intel-linux.compile OPTIONS $(condition)/<inlining>full : "-inline-level=2" ;
+ flags intel-linux.compile OPTIONS $(condition)/<optimization>space : "-Os" ;
+ flags intel-linux.compile OPTIONS $(condition)/<optimization>speed : "-O3 -ip" ;
+ }
+
+ if $(root)
+ {
+ # Libraries required to run the executable may be in either
+ # $(root)/lib (10.1 and earlier)
+ # or
+ # $(root)/lib/architecture-name (11.0 and later:
+ local lib_path = $(root)/lib $(root:P)/lib/$(bin:B) ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using intel libraries :: $(condition) :: $(lib_path) ;
+ }
+ flags intel-linux.link RUN_PATH $(condition) : $(lib_path) ;
+ }
+ }
+}
+
+SPACE = " " ;
+
+flags intel-linux.compile OPTIONS <warnings>off : -w0 ;
+flags intel-linux.compile OPTIONS <warnings>on : -w1 ;
+flags intel-linux.compile OPTIONS <warnings>all : -w2 ;
+
+rule compile.c++ ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+ DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ;
+}
+
+actions compile.c++ bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" -c -xc++ $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -use-pch"$(PCH_FILE)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.c ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+ DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ;
+}
+
+actions compile.c bind PCH_FILE
+{
+ "$(CONFIG_COMMAND)" -c -xc $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -use-pch"$(PCH_FILE)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.c++.pch ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+#
+# Compiling a pch first deletes any existing *.pchi file, as Intel's compiler
+# won't over-write an existing pch: instead it creates filename$1.pchi, filename$2.pchi
+# etc - which appear not to do anything except take up disk space :-(
+#
+actions compile.c++.pch
+{
+ rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -pch-create "$(<)" "$(>)"
+}
+
+actions compile.fortran
+{
+ "ifort" -c $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.c.pch ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-fpic $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+}
+
+actions compile.c.pch
+{
+ rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -pch-create "$(<)" "$(>)"
+}
+
+rule link ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+ SPACE on $(targets) = " " ;
+ JAM_SEMAPHORE on $(targets) = <s>intel-linux-link-semaphore ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS)
+}
+
+rule link.dll ( targets * : sources * : properties * )
+{
+ gcc.setup-threading $(targets) : $(sources) : $(properties) ;
+ gcc.setup-address-model $(targets) : $(sources) : $(properties) ;
+ SPACE on $(targets) = " " ;
+ JAM_SEMAPHORE on $(targets) = <s>intel-linux-link-semaphore ;
+}
+
+# Differ from 'link' above only by -shared.
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS)
+}
+
+
+
diff --git a/src/kenlm/jam-files/boost-build/tools/intel-win.jam b/src/kenlm/jam-files/boost-build/tools/intel-win.jam
new file mode 100644
index 0000000..c9adac0
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/intel-win.jam
@@ -0,0 +1,184 @@
+# Copyright Vladimir Prus 2004.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# Importing common is needed because the rules we inherit here depend on it.
+# That is nasty.
+import common ;
+import errors ;
+import feature ;
+import intel ;
+import msvc ;
+import os ;
+import toolset ;
+import generators ;
+import type ;
+
+feature.extend-subfeature toolset intel : platform : win ;
+
+toolset.inherit-generators intel-win <toolset>intel <toolset-intel:platform>win : msvc ;
+toolset.inherit-flags intel-win : msvc : : YLOPTION ;
+toolset.inherit-rules intel-win : msvc ;
+
+# Override default do-nothing generators.
+generators.override intel-win.compile.c.pch : pch.default-c-pch-generator ;
+generators.override intel-win.compile.c++.pch : pch.default-cpp-pch-generator ;
+generators.override intel-win.compile.rc : rc.compile.resource ;
+generators.override intel-win.compile.mc : mc.compile ;
+
+toolset.flags intel-win.compile PCH_SOURCE <pch>on : <pch-source> ;
+
+toolset.add-requirements <toolset>intel-win,<runtime-link>shared:<threading>multi ;
+
+# Initializes the intel toolset for windows
+rule init ( version ? : # the compiler version
+ command * : # the command to invoke the compiler itself
+ options * # Additional option: <compatibility>
+ # either 'vc6', 'vc7', 'vc7.1'
+ # or 'native'(default).
+ )
+{
+ local compatibility =
+ [ feature.get-values <compatibility> : $(options) ] ;
+ local condition = [ common.check-init-parameters intel-win
+ : version $(version) : compatibility $(compatibility) ] ;
+
+ command = [ common.get-invocation-command intel-win : icl.exe :
+ $(command) ] ;
+
+ common.handle-options intel-win : $(condition) : $(command) : $(options) ;
+
+ local root ;
+ if $(command)
+ {
+ root = [ common.get-absolute-tool-path $(command[-1]) ] ;
+ root = $(root)/ ;
+ }
+
+ local setup ;
+ setup = [ GLOB $(root) : iclvars_*.bat ] ;
+ if ! $(setup)
+ {
+ setup = $(root)/iclvars.bat ;
+ }
+ setup = "call \""$(setup)"\" > nul " ;
+
+ if [ os.name ] = NT
+ {
+ setup = $(setup)"
+" ;
+ }
+ else
+ {
+ setup = "cmd /S /C "$(setup)" \"&&\" " ;
+ }
+
+ toolset.flags intel-win.compile .CC $(condition) : $(setup)icl ;
+ toolset.flags intel-win.link .LD $(condition) : $(setup)xilink ;
+ toolset.flags intel-win.archive .LD $(condition) : $(setup)xilink /lib ;
+ toolset.flags intel-win.link .MT $(condition) : $(setup)mt -nologo ;
+ toolset.flags intel-win.compile .MC $(condition) : $(setup)mc ;
+ toolset.flags intel-win.compile .RC $(condition) : $(setup)rc ;
+
+ local m = [ MATCH (.).* : $(version) ] ;
+ local major = $(m[1]) ;
+
+ local C++FLAGS ;
+
+ C++FLAGS += /nologo ;
+
+ # Reduce the number of spurious error messages
+ C++FLAGS += /Qwn5 /Qwd985 ;
+
+ # Enable ADL
+ C++FLAGS += -Qoption,c,--arg_dep_lookup ; #"c" works for C++, too
+
+ # Disable Microsoft "secure" overloads in Dinkumware libraries since they
+ # cause compile errors with Intel versions 9 and 10.
+ C++FLAGS += -D_SECURE_SCL=0 ;
+
+ if $(major) > 5
+ {
+ C++FLAGS += /Zc:forScope ; # Add support for correct for loop scoping.
+ }
+
+ # Add options recognized only by intel7 and above.
+ if $(major) >= 7
+ {
+ C++FLAGS += /Qansi_alias ;
+ }
+
+ if $(compatibility) = vc6
+ {
+ C++FLAGS +=
+ # Emulate VC6
+ /Qvc6
+
+ # No wchar_t support in vc6 dinkum library. Furthermore, in vc6
+ # compatibility-mode, wchar_t is not a distinct type from unsigned
+ # short.
+ -DBOOST_NO_INTRINSIC_WCHAR_T
+ ;
+ }
+ else
+ {
+ if $(major) > 5
+ {
+ # Add support for wchar_t
+ C++FLAGS += /Zc:wchar_t
+ # Tell the dinkumware library about it.
+ -D_NATIVE_WCHAR_T_DEFINED
+ ;
+ }
+ }
+
+ if $(compatibility) && $(compatibility) != native
+ {
+ C++FLAGS += /Q$(base-vc) ;
+ }
+ else
+ {
+ C++FLAGS +=
+ -Qoption,cpp,--arg_dep_lookup
+ # The following options were intended to disable the Intel compiler's
+ # 'bug-emulation' mode, but were later reported to be causing ICE with
+ # Intel-Win 9.0. It is not yet clear which options can be safely used.
+ # -Qoption,cpp,--const_string_literals
+ # -Qoption,cpp,--new_for_init
+ # -Qoption,cpp,--no_implicit_typename
+ # -Qoption,cpp,--no_friend_injection
+ # -Qoption,cpp,--no_microsoft_bugs
+ ;
+ }
+
+ toolset.flags intel-win CFLAGS $(condition) : $(C++FLAGS) ;
+ # By default, when creating PCH, intel adds 'i' to the explicitly
+ # specified name of the PCH file. Of course, Boost.Build is not
+ # happy when compiler produces not the file it was asked for.
+ # The option below stops this behaviour.
+ toolset.flags intel-win CFLAGS : -Qpchi- ;
+
+ if ! $(compatibility)
+ {
+ # If there's no backend version, assume 10.
+ compatibility = vc10 ;
+ }
+
+ local extract-version = [ MATCH ^vc(.*) : $(compatibility) ] ;
+ if ! $(extract-version)
+ {
+ errors.user-error "Invalid value for compatibility option:"
+ $(compatibility) ;
+ }
+
+ # Depending on the settings, running of tests require some runtime DLLs.
+ toolset.flags intel-win RUN_PATH $(condition) : $(root) ;
+
+ msvc.configure-version-specific intel-win : $(extract-version[1]) : $(condition) ;
+}
+
+toolset.flags intel-win.link LIBRARY_OPTION <toolset>intel : "" ;
+
+toolset.flags intel-win YLOPTION ;
+
diff --git a/src/kenlm/jam-files/boost-build/tools/intel.jam b/src/kenlm/jam-files/boost-build/tools/intel.jam
new file mode 100644
index 0000000..67038aa
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/intel.jam
@@ -0,0 +1,34 @@
+# Copyright Vladimir Prus 2004.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# This is a generic 'intel' toolset. Depending on the current
+# system, it forwards either to 'intel-linux' or 'intel-win'
+# modules.
+
+import feature ;
+import os ;
+import toolset ;
+
+feature.extend toolset : intel ;
+feature.subfeature toolset intel : platform : : propagated link-incompatible ;
+
+rule init ( * : * )
+{
+ if [ os.name ] = LINUX
+ {
+ toolset.using intel-linux :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+ else if [ os.name ] = MACOSX
+ {
+ toolset.using intel-darwin :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+ else
+ {
+ toolset.using intel-win :
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/jpeg.jam b/src/kenlm/jam-files/boost-build/tools/jpeg.jam
new file mode 100644
index 0000000..128ab63
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/jpeg.jam
@@ -0,0 +1,233 @@
+# Copyright (c) 2010 Vladimir Prus.
+# Copyright (c) 2013 Steven Watanabe
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Supports the libjpeg library
+#
+# After 'using libjpeg', the following targets are available:
+#
+# /libjpeg//libjpeg -- The libjpeg library
+
+import project ;
+import ac ;
+import errors ;
+import "class" : new ;
+import targets ;
+import path ;
+import modules ;
+import errors ;
+import indirect ;
+import property ;
+import property-set ;
+
+header = jconfig.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h jpegint.h jpeglib.h
+ jversion.h ;
+
+names = libjpeg ;
+
+sources = jaricom.c jcapimin.c jcapistd.c jcarith.c jccoefct.c jccolor.c
+ jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c jcmaster.c
+ jcomapi.c jcparam.c jcprepct.c jcsample.c jctrans.c jdapimin.c
+ jdapistd.c jdarith.c jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c
+ jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c jdmaster.c
+ jdmerge.c jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c
+ jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c jquant1.c ;
+
+library-id = 0 ;
+
+if --debug-configuration in [ modules.peek : ARGV ]
+{
+ .debug = true ;
+}
+
+# Initializes the libjpeg library.
+#
+# libjpeg can be configured either to use pre-existing binaries
+# or to build the library from source.
+#
+# Options for configuring a prebuilt libjpeg::
+#
+# <search>
+# The directory containing the libjpeg binaries.
+# <name>
+# Overrides the default library name.
+# <include>
+# The directory containing the libjpeg headers.
+#
+# If none of these options is specified, then the environmental
+# variables LIBJPEG_LIBRARY_PATH, LIBJPEG_NAME, and LIBJPEG_INCLUDE will
+# be used instead.
+#
+# Options for building libjpeg from source::
+#
+# <source>
+# The libjpeg source directory. Defaults to the environmental variable
+# LIBJPEG_SOURCE.
+# <tag>
+# A rule which computes the actual name of the compiled
+# libraries based on the build properties. Ignored
+# when using precompiled binaries.
+# <build-name>
+# The base name to use for the compiled library. Ignored
+# when using precompiled binaries.
+#
+# Examples::
+#
+# # Find libjpeg in the default system location
+# using libjpeg ;
+# # Build libjpeg from source
+# using libjpeg : 8c : <source>/home/steven/libjpeg-8c ;
+# # Find libjpeg in /usr/local
+# using libjpeg : 8c
+# : <include>/usr/local/include <search>/usr/local/lib ;
+# # Build libjpeg from source for msvc and find
+# # prebuilt binaries for gcc.
+# using libjpeg : 8c : <source>C:/Devel/src/libjpeg-8c : <toolset>msvc ;
+# using libjpeg : 8c : : <toolset>gcc ;
+#
+rule init (
+ version ?
+ # The libjpeg version (currently ignored)
+
+ : options *
+ # A list of the options to use
+
+ : requirements *
+ # The requirements for the libjpeg target
+
+ : is-default ?
+ # Default configurations are only used when libjpeg
+ # has not yet been configured.
+ )
+{
+ local caller = [ project.current ] ;
+
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+
+ project.initialize $(__name__) ;
+ .project = [ project.current ] ;
+ project libjpeg ;
+ }
+
+ local library-path = [ property.select <search> : $(options) ] ;
+ library-path = $(library-path:G=) ;
+ local include-path = [ property.select <include> : $(options) ] ;
+ include-path = $(include-path:G=) ;
+ local source-path = [ property.select <source> : $(options) ] ;
+ source-path = $(source-path:G=) ;
+ local library-name = [ property.select <name> : $(options) ] ;
+ library-name = $(library-name:G=) ;
+ local tag = [ property.select <tag> : $(options) ] ;
+ tag = $(tag:G=) ;
+ local build-name = [ property.select <build-name> : $(options) ] ;
+ build-name = $(build-name:G=) ;
+
+ condition = [ property-set.create $(requirements) ] ;
+ condition = [ property-set.create [ $(condition).base ] ] ;
+
+ local no-build-from-source ;
+ # Ignore environmental ZLIB_SOURCE if this initialization
+ # requested to search for a specific pre-built library.
+ if $(library-path) || $(include-path) || $(library-name)
+ {
+ if $(source-path) || $(tag) || $(build-name)
+ {
+ errors.user-error "incompatible options for libjpeg:"
+ [ property.select <search> <include> <name> : $(options) ] "and"
+ [ property.select <source> <tag> <build-name> : $(options) ] ;
+ }
+ else
+ {
+ no-build-from-source = true ;
+ }
+ }
+
+ source-path ?= [ modules.peek : ZLIB_SOURCE ] ;
+
+ if $(.configured.$(condition))
+ {
+ if $(is-default)
+ {
+ if $(.debug)
+ {
+ ECHO "notice: [libjpeg] libjpeg is already configured" ;
+ }
+ }
+ else
+ {
+ errors.user-error "libjpeg is already configured" ;
+ }
+ return ;
+ }
+ else if $(source-path) && ! $(no-build-from-source)
+ {
+ build-name ?= z ;
+ library-id = [ CALC $(library-id) + 1 ] ;
+ tag = [ MATCH ^@?(.*)$ : $(tag) ] ;
+ if $(tag) && ! [ MATCH ^([^%]*)%([^%]+)$ : $(tag) ]
+ {
+ tag = [ indirect.make $(tag) : [ $(caller).project-module ] ] ;
+ }
+ sources = [ path.glob $(source-path) : $(sources) ] ;
+ if $(.debug)
+ {
+ ECHO "notice: [libjpeg] Building libjpeg from source as $(build-name)" ;
+ if $(condition)
+ {
+ ECHO "notice: [libjpeg] Condition" [ $(condition).raw ] ;
+ }
+ if $(sources)
+ {
+ ECHO "notice: [libjpeg] found libjpeg source in $(source-path)" ;
+ }
+ else
+ {
+ ECHO "warning: [libjpeg] could not find libjpeg source in $(source-path)" ;
+ }
+ }
+ local target ;
+ if $(sources) {
+ target = [ targets.create-typed-target LIB : $(.project)
+ : $(build-name).$(library-id)
+ : $(sources)
+ : $(requirements)
+ <tag>@$(tag)
+ <include>$(source-path)
+ <toolset>msvc:<define>_CRT_SECURE_NO_DEPRECATE
+ <toolset>msvc:<define>_SCL_SECURE_NO_DEPRECATE
+ <link>shared:<define>ZLIB_DLL
+ :
+ : <include>$(source-path) ] ;
+ }
+
+ local mt = [ new ac-library libjpeg : $(.project) : $(condition) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ if $(target)
+ {
+ $(mt).set-target $(target) ;
+ }
+ targets.main-target-alternative $(mt) ;
+ } else {
+ if $(.debug)
+ {
+ ECHO "notice: [libjpeg] Using pre-installed library" ;
+ if $(condition)
+ {
+ ECHO "notice: [libjpeg] Condition" [ $(condition).raw ] ;
+ }
+ }
+
+ local mt = [ new ac-library libjpeg : $(.project) : $(condition) :
+ $(include-path) : $(library-path) : $(library-name) : $(root) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ targets.main-target-alternative $(mt) ;
+ }
+ .configured.$(condition) = true ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/lex.jam b/src/kenlm/jam-files/boost-build/tools/lex.jam
new file mode 100644
index 0000000..75d6413
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/lex.jam
@@ -0,0 +1,33 @@
+# Copyright 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import type ;
+import generators ;
+import feature ;
+import property ;
+
+
+feature.feature flex.prefix : : free ;
+type.register LEX : l ;
+type.register LEX++ : ll ;
+generators.register-standard lex.lex : LEX : C ;
+generators.register-standard lex.lex : LEX++ : CPP ;
+
+rule init ( )
+{
+}
+
+rule lex ( target : source : properties * )
+{
+ local r = [ property.select flex.prefix : $(properties) ] ;
+ if $(r)
+ {
+ PREFIX on $(<) = $(r:G=) ;
+ }
+}
+
+actions lex
+{
+ flex -P$(PREFIX) -o$(<) $(>)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/make.jam b/src/kenlm/jam-files/boost-build/tools/make.jam
new file mode 100644
index 0000000..40b59fa
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/make.jam
@@ -0,0 +1,63 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2003 Douglas Gregor
+# Copyright 2006 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines the 'make' main target rule.
+
+import "class" : new ;
+import project ;
+import property-set ;
+import targets ;
+
+
+class make-target-class : basic-target
+{
+ import "class" : new ;
+ import type ;
+ import virtual-target ;
+
+ rule __init__ ( name : project : sources * : requirements *
+ : default-build * : usage-requirements * )
+ {
+ basic-target.__init__ $(name) : $(project) : $(sources) :
+ $(requirements) : $(default-build) : $(usage-requirements) ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ local action-name = [ $(property-set).get <action> ] ;
+ # 'm' will always be set -- we add '@' ourselves in the 'make' rule
+ # below.
+ local m = [ MATCH ^@(.*) : $(action-name) ] ;
+
+ local a = [ new action $(source-targets) : $(m[1]) : $(property-set) ] ;
+ local t = [ new file-target $(self.name) exact : [ type.type
+ $(self.name) ] : $(self.project) : $(a) ] ;
+ return [ property-set.empty ] [ virtual-target.register $(t) ] ;
+ }
+}
+
+
+# Declares the 'make' main target.
+#
+rule make ( target-name : sources * : generating-rule + : requirements * :
+ usage-requirements * )
+{
+ # The '@' sign causes the feature.jam module to qualify rule name with the
+ # module name of current project, if needed.
+ local m = [ MATCH ^(@).* : $(generating-rule) ] ;
+ if ! $(m)
+ {
+ generating-rule = @$(generating-rule) ;
+ }
+ targets.create-metatarget make-target-class : [ project.current ] :
+ $(target-name) : $(sources) : $(requirements) <action>$(generating-rule)
+ : : $(usage-requirements) ;
+}
+
+
+IMPORT $(__name__) : make : : make ;
diff --git a/src/kenlm/jam-files/boost-build/tools/mc.jam b/src/kenlm/jam-files/boost-build/tools/mc.jam
new file mode 100644
index 0000000..5783777
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/mc.jam
@@ -0,0 +1,44 @@
+#~ Copyright 2005 Alexey Pakhunov.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Support for Microsoft message compiler tool.
+# Notes:
+# - there's just message compiler tool, there's no tool for
+# extracting message strings from sources
+# - This file allows to use Microsoft message compiler
+# with any toolset. In msvc.jam, there's more specific
+# message compiling action.
+
+import common ;
+import generators ;
+import feature : feature get-values ;
+import toolset : flags ;
+import type ;
+import rc ;
+
+rule init ( )
+{
+}
+
+type.register MC : mc ;
+
+
+# Command line options
+feature mc-input-encoding : ansi unicode : free ;
+feature mc-output-encoding : unicode ansi : free ;
+feature mc-set-customer-bit : no yes : free ;
+
+flags mc.compile MCFLAGS <mc-input-encoding>ansi : -a ;
+flags mc.compile MCFLAGS <mc-input-encoding>unicode : -u ;
+flags mc.compile MCFLAGS <mc-output-encoding>ansi : -A ;
+flags mc.compile MCFLAGS <mc-output-encoding>unicode : -U ;
+flags mc.compile MCFLAGS <mc-set-customer-bit>no : ;
+flags mc.compile MCFLAGS <mc-set-customer-bit>yes : -c ;
+
+generators.register-standard mc.compile : MC : H RC ;
+
+actions compile
+{
+ mc $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/message.jam b/src/kenlm/jam-files/boost-build/tools/message.jam
new file mode 100644
index 0000000..672b6e0
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/message.jam
@@ -0,0 +1,62 @@
+# Copyright 2008 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Defines main target type 'message', that prints a message when built for the
+# first time.
+
+import project ;
+import "class" : new ;
+import targets ;
+import property-set ;
+
+class message-target-class : basic-target
+{
+ rule set-message ( * )
+ {
+ self.1 = $(1) ;
+ self.2 = $(2) ;
+ self.3 = $(3) ;
+ self.4 = $(4) ;
+ self.5 = $(5) ;
+ self.6 = $(6) ;
+ self.7 = $(7) ;
+ self.8 = $(8) ;
+ self.9 = $(9) ;
+ self.built = ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ if ! $(self.built)
+ {
+ for i in 1 2 3 4 5 6 7 8 9
+ {
+ if $(self.$(i))
+ {
+ ECHO $(self.$(i)) ;
+ }
+ }
+ self.built = 1 ;
+ }
+
+ return [ property-set.empty ] ;
+ }
+}
+
+
+rule message ( name : * )
+{
+ local project = [ project.current ] ;
+
+ local result = [ targets.main-target-alternative
+ [ new message-target-class $(name) : $(project)
+ : [ targets.main-target-sources : $(name) ]
+ : [ targets.main-target-requirements : $(project) ]
+ : [ targets.main-target-default-build : $(project) ]
+ : [ targets.main-target-usage-requirements : $(project) ]
+ ] ] ;
+ $(result).set-message $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ return $(result) ;
+}
+IMPORT $(__name__) : message : : message ;
diff --git a/src/kenlm/jam-files/boost-build/tools/midl.jam b/src/kenlm/jam-files/boost-build/tools/midl.jam
new file mode 100644
index 0000000..0aa5dda
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/midl.jam
@@ -0,0 +1,142 @@
+# Copyright (c) 2005 Alexey Pakhunov.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Microsoft Interface Definition Language (MIDL) related routines
+
+import common ;
+import generators ;
+import feature : feature get-values ;
+import os ;
+import scanner ;
+import toolset : flags ;
+import type ;
+
+rule init ( )
+{
+}
+
+type.register IDL : idl ;
+
+# A type library (.tlb) is generated by MIDL compiler and can be included
+# to resources of an application (.rc). In order to be found by a resource
+# compiler its target type should be derived from 'H' - otherwise
+# the property '<implicit-dependency>' will be ignored.
+type.register MSTYPELIB : tlb : H ;
+
+
+# Register scanner for MIDL files
+class midl-scanner : scanner
+{
+ import path property-set regex scanner type virtual-target ;
+
+ rule __init__ ( includes * )
+ {
+ scanner.__init__ ;
+
+ self.includes = $(includes) ;
+
+ # List of quoted strings
+ self.re-strings = "[ \t]*\"([^\"]*)\"([ \t]*,[ \t]*\"([^\"]*)\")*[ \t]*" ;
+
+ # 'import' and 'importlib' directives
+ self.re-import = "import"$(self.re-strings)"[ \t]*;" ;
+ self.re-importlib = "importlib[ \t]*[(]"$(self.re-strings)"[)][ \t]*;" ;
+
+ # C preprocessor 'include' directive
+ self.re-include-angle = "#[ \t]*include[ \t]*<(.*)>" ;
+ self.re-include-quoted = "#[ \t]*include[ \t]*\"(.*)\"" ;
+ }
+
+ rule pattern ( )
+ {
+ # Match '#include', 'import' and 'importlib' directives
+ return "((#[ \t]*include|import(lib)?).+(<(.*)>|\"(.*)\").+)" ;
+ }
+
+ rule process ( target : matches * : binding )
+ {
+ local included-angle = [ regex.transform $(matches) : $(self.re-include-angle) : 1 ] ;
+ local included-quoted = [ regex.transform $(matches) : $(self.re-include-quoted) : 1 ] ;
+ local imported = [ regex.transform $(matches) : $(self.re-import) : 1 3 ] ;
+ local imported_tlbs = [ regex.transform $(matches) : $(self.re-importlib) : 1 3 ] ;
+
+ # CONSIDER: the new scoping rule seem to defeat "on target" variables.
+ local g = [ on $(target) return $(HDRGRIST) ] ;
+ local b = [ NORMALIZE_PATH $(binding:D) ] ;
+
+ # Attach binding of including file to included targets.
+ # When target is directly created from virtual target
+ # this extra information is unnecessary. But in other
+ # cases, it allows to distinguish between two headers of the
+ # same name included from different places.
+ local g2 = $(g)"#"$(b) ;
+
+ included-angle = $(included-angle:G=$(g)) ;
+ included-quoted = $(included-quoted:G=$(g2)) ;
+ imported = $(imported:G=$(g2)) ;
+ imported_tlbs = $(imported_tlbs:G=$(g2)) ;
+
+ local all = $(included-angle) $(included-quoted) $(imported) ;
+
+ INCLUDES $(target) : $(all) ;
+ DEPENDS $(target) : $(imported_tlbs) ;
+ NOCARE $(all) $(imported_tlbs) ;
+ SEARCH on $(included-angle) = $(self.includes:G=) ;
+ SEARCH on $(included-quoted) = $(b) $(self.includes:G=) ;
+ SEARCH on $(imported) = $(b) $(self.includes:G=) ;
+ SEARCH on $(imported_tlbs) = $(b) $(self.includes:G=) ;
+
+ scanner.propagate
+ [ type.get-scanner CPP : [ property-set.create $(self.includes) ] ] :
+ $(included-angle) $(included-quoted) : $(target) ;
+
+ scanner.propagate $(__name__) : $(imported) : $(target) ;
+ }
+}
+
+scanner.register midl-scanner : include ;
+type.set-scanner IDL : midl-scanner ;
+
+
+# Command line options
+feature midl-stubless-proxy : yes no : propagated ;
+feature midl-robust : yes no : propagated ;
+
+flags midl.compile.idl MIDLFLAGS <midl-stubless-proxy>yes : /Oicf ;
+flags midl.compile.idl MIDLFLAGS <midl-stubless-proxy>no : /Oic ;
+flags midl.compile.idl MIDLFLAGS <midl-robust>yes : /robust ;
+flags midl.compile.idl MIDLFLAGS <midl-robust>no : /no_robust ;
+
+# Architecture-specific options
+architecture-x86 = <architecture> <architecture>x86 ;
+address-model-32 = <address-model> <address-model>32 ;
+address-model-64 = <address-model> <address-model>64 ;
+
+flags midl.compile.idl MIDLFLAGS $(architecture-x86)/$(address-model-32) : /win32 ;
+flags midl.compile.idl MIDLFLAGS $(architecture-x86)/<address-model>64 : /x64 ;
+flags midl.compile.idl MIDLFLAGS <architecture>ia64/$(address-model-64) : /ia64 ;
+
+
+flags midl.compile.idl DEFINES <define> ;
+flags midl.compile.idl UNDEFS <undef> ;
+flags midl.compile.idl INCLUDES <include> ;
+
+
+generators.register-c-compiler midl.compile.idl : IDL : MSTYPELIB H C(%_i) C(%_proxy) C(%_dlldata) ;
+
+
+# MIDL does not always generate '%_proxy.c' and '%_dlldata.c'. This behavior
+# depends on contents of the source IDL file. Calling TOUCH_FILE below ensures
+# that both files will be created so bjam will not try to recreate them
+# constantly.
+TOUCH_FILE = [ common.file-touch-command ] ;
+
+actions compile.idl
+{
+ midl /nologo @"@($(<[1]:W).rsp:E=$(nl)"$(>:W)" $(nl)-D$(DEFINES) $(nl)"-I$(INCLUDES)" $(nl)-U$(UNDEFS) $(nl)$(MIDLFLAGS) $(nl)/tlb "$(<[1]:W)" $(nl)/h "$(<[2]:W)" $(nl)/iid "$(<[3]:W)" $(nl)/proxy "$(<[4]:W)" $(nl)/dlldata "$(<[5]:W)")"
+ $(TOUCH_FILE) "$(<[4]:W)"
+ $(TOUCH_FILE) "$(<[5]:W)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/mipspro.jam b/src/kenlm/jam-files/boost-build/tools/mipspro.jam
new file mode 100644
index 0000000..417eaef
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/mipspro.jam
@@ -0,0 +1,145 @@
+# Copyright Noel Belcourt 2007.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import property ;
+import generators ;
+import os ;
+import toolset : flags ;
+import feature ;
+import fortran ;
+import type ;
+import common ;
+
+feature.extend toolset : mipspro ;
+toolset.inherit mipspro : unix ;
+generators.override mipspro.prebuilt : builtin.lib-generator ;
+generators.override mipspro.searched-lib-generator : searched-lib-generator ;
+
+# Documentation and toolchain description located
+# http://www.sgi.com/products/software/irix/tools/
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [
+ common.check-init-parameters mipspro : version $(version) ] ;
+
+ command = [ common.get-invocation-command mipspro : CC : $(command) ] ;
+
+ common.handle-options mipspro : $(condition) : $(command) : $(options) ;
+
+ command_c = $(command_c[1--2]) $(command[-1]:B=cc) ;
+
+ toolset.flags mipspro CONFIG_C_COMMAND $(condition) : $(command_c) ;
+
+ # fortran support
+ local command = [
+ common.get-invocation-command mipspro : f77 : $(command) : $(install_dir) ] ;
+
+ command_f = $(command_f[1--2]) $(command[-1]:B=f77) ;
+ toolset.flags mipspro CONFIG_F_COMMAND $(condition) : $(command_f) ;
+
+ # set link flags
+ flags mipspro.link FINDLIBS-ST : [
+ feature.get-values <find-static-library> : $(options) ] : unchecked ;
+
+ flags mipspro.link FINDLIBS-SA : [
+ feature.get-values <find-shared-library> : $(options) ] : unchecked ;
+}
+
+# Declare generators
+generators.register-c-compiler mipspro.compile.c : C : OBJ : <toolset>mipspro ;
+generators.register-c-compiler mipspro.compile.c++ : CPP : OBJ : <toolset>mipspro ;
+generators.register-fortran-compiler mipspro.compile.fortran : FORTRAN : OBJ : <toolset>mipspro ;
+
+cpu-arch-32 =
+ <architecture>/<address-model>
+ <architecture>/<address-model>32 ;
+
+cpu-arch-64 =
+ <architecture>/<address-model>64 ;
+
+flags mipspro.compile OPTIONS $(cpu-arch-32) : -n32 ;
+flags mipspro.compile OPTIONS $(cpu-arch-64) : -64 ;
+
+# Declare flags and actions for compilation
+flags mipspro.compile OPTIONS <debug-symbols>on : -g ;
+# flags mipspro.compile OPTIONS <profiling>on : -xprofile=tcov ;
+flags mipspro.compile OPTIONS <warnings>off : -w ;
+flags mipspro.compile OPTIONS <warnings>on : -ansiW -diag_suppress 1429 ; # suppress long long is nonstandard warning
+flags mipspro.compile OPTIONS <warnings>all : -fullwarn ;
+flags mipspro.compile OPTIONS <optimization>speed : -Ofast ;
+flags mipspro.compile OPTIONS <optimization>space : -O2 ;
+flags mipspro.compile OPTIONS <cflags> : -LANG:std ;
+flags mipspro.compile.c++ OPTIONS <inlining>off : -INLINE:none ;
+flags mipspro.compile.c++ OPTIONS <cxxflags> ;
+flags mipspro.compile DEFINES <define> ;
+flags mipspro.compile INCLUDES <include> ;
+
+
+flags mipspro.compile.fortran OPTIONS <fflags> ;
+
+actions compile.c
+{
+ "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" -FE:template_in_elf_section -ptused $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.fortran
+{
+ "$(CONFIG_F_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+# Declare flags and actions for linking
+flags mipspro.link OPTIONS <debug-symbols>on : -g ;
+# Strip the binary when no debugging is needed
+# flags mipspro.link OPTIONS <debug-symbols>off : -s ;
+# flags mipspro.link OPTIONS <profiling>on : -xprofile=tcov ;
+# flags mipspro.link OPTIONS <threading>multi : -mt ;
+
+flags mipspro.link OPTIONS $(cpu-arch-32) : -n32 ;
+flags mipspro.link OPTIONS $(cpu-arch-64) : -64 ;
+
+flags mipspro.link OPTIONS <optimization>speed : -Ofast ;
+flags mipspro.link OPTIONS <optimization>space : -O2 ;
+flags mipspro.link OPTIONS <linkflags> ;
+flags mipspro.link LINKPATH <library-path> ;
+flags mipspro.link FINDLIBS-ST <find-static-library> ;
+flags mipspro.link FINDLIBS-SA <find-shared-library> ;
+flags mipspro.link FINDLIBS-SA <threading>multi : pthread ;
+flags mipspro.link LIBRARIES <library-file> ;
+flags mipspro.link LINK-RUNTIME <runtime-link>static : static ;
+flags mipspro.link LINK-RUNTIME <runtime-link>shared : dynamic ;
+flags mipspro.link RPATH <dll-path> ;
+
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -FE:template_in_elf_section -ptused $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -lm
+}
+
+# Slight mods for dlls
+rule link.dll ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME)
+}
+
+# Declare action for creating static libraries
+actions piecemeal archive
+{
+ ar -cr "$(<)" "$(>)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/mpi.jam b/src/kenlm/jam-files/boost-build/tools/mpi.jam
new file mode 100644
index 0000000..0fe490b
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/mpi.jam
@@ -0,0 +1,583 @@
+# Support for the Message Passing Interface (MPI)
+#
+# (C) Copyright 2005, 2006 Trustees of Indiana University
+# (C) Copyright 2005 Douglas Gregor
+#
+# Distributed under the Boost Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)
+#
+# Authors: Douglas Gregor
+# Andrew Lumsdaine
+#
+# ==== MPI Configuration ====
+#
+# For many users, MPI support can be enabled simply by adding the following
+# line to your user-config.jam file:
+#
+# using mpi ;
+#
+# This should auto-detect MPI settings based on the MPI wrapper compiler in
+# your path, e.g., "mpic++". If the wrapper compiler is not in your path, or
+# has a different name, you can pass the name of the wrapper compiler as the
+# first argument to the mpi module:
+#
+# using mpi : /opt/mpich2-1.0.4/bin/mpiCC ;
+#
+# If your MPI implementation does not have a wrapper compiler, or the MPI
+# auto-detection code does not work with your MPI's wrapper compiler,
+# you can pass MPI-related options explicitly via the second parameter to the
+# mpi module:
+#
+# using mpi : : <find-shared-library>lammpio <find-shared-library>lammpi++
+# <find-shared-library>mpi <find-shared-library>lam
+# <find-shared-library>dl ;
+#
+# To see the results of MPI auto-detection, pass "--debug-configuration" on
+# the bjam command line.
+#
+# The (optional) fourth argument configures Boost.MPI for running
+# regression tests. These parameters specify the executable used to
+# launch jobs (default: "mpirun") followed by any necessary arguments
+# to this to run tests and tell the program to expect the number of
+# processors to follow (default: "-np"). With the default parameters,
+# for instance, the test harness will execute, e.g.,
+#
+# mpirun -np 4 all_gather_test
+#
+# ==== Linking Against the MPI Libraries ===
+#
+# To link against the MPI libraries, import the "mpi" module and add the
+# following requirement to your target:
+#
+# <library>/mpi//mpi
+#
+# Since MPI support is not always available, you should check
+# "mpi.configured" before trying to link against the MPI libraries.
+
+import "class" : new ;
+import common ;
+import feature : feature ;
+import generators ;
+import os ;
+import project ;
+import property ;
+import testing ;
+import toolset ;
+import type ;
+import path ;
+
+# Make this module a project
+project.initialize $(__name__) ;
+project mpi ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+# Assuming the first part of the command line is the given prefix
+# followed by some non-empty value, remove the first argument. Returns
+# either nothing (if there was no prefix or no value) or a pair
+#
+# <name>value rest-of-cmdline
+#
+# This is a subroutine of cmdline_to_features
+rule add_feature ( prefix name cmdline )
+{
+ local match = [ MATCH "^$(prefix)([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ;
+
+ # If there was no value associated with the prefix, abort
+ if ! $(match) {
+ return ;
+ }
+
+ local value = $(match[1]) ;
+
+ if [ MATCH " +" : $(value) ] {
+ value = "\"$(value)\"" ;
+ }
+
+ return "<$(name)>$(value)" $(match[2]) ;
+}
+
+# Strip any end-of-line characters off the given string and return the
+# result.
+rule strip-eol ( string )
+{
+ local match = [ MATCH "^(([A-Za-z0-9~`\.!@#$%^&*()_+={};:'\",.<>/?\\| -]|[|])*).*$" : $(string) ] ;
+
+ if $(match)
+ {
+ return $(match[1]) ;
+ }
+ else
+ {
+ return $(string) ;
+ }
+}
+
+# Split a command-line into a set of features. Certain kinds of
+# compiler flags are recognized (e.g., -I, -D, -L, -l) and replaced
+# with their Boost.Build equivalents (e.g., <include>, <define>,
+# <library-path>, <find-library>). All other arguments are introduced
+# using the features in the unknown-features parameter, because we
+# don't know how to deal with them. For instance, if your compile and
+# correct. The incoming command line should be a string starting with
+# an executable (e.g., g++ -I/include/path") and may contain any
+# number of command-line arguments thereafter. The result is a list of
+# features corresponding to the given command line, ignoring the
+# executable.
+rule cmdline_to_features ( cmdline : unknown-features ? )
+{
+ local executable ;
+ local features ;
+ local otherflags ;
+ local result ;
+
+ unknown-features ?= <cxxflags> <linkflags> ;
+
+ # Pull the executable out of the command line. At this point, the
+ # executable is just thrown away.
+ local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ;
+ executable = $(match[1]) ;
+ cmdline = $(match[2]) ;
+
+ # List the prefix/feature pairs that we will be able to transform.
+ # Every kind of parameter not mentioned here will be placed in both
+ # cxxflags and linkflags, because we don't know where they should go.
+ local feature_kinds-D = "define" ;
+ local feature_kinds-I = "include" ;
+ local feature_kinds-L = "library-path" ;
+ local feature_kinds-l = "find-shared-library" ;
+
+ while $(cmdline) {
+
+ # Check for one of the feature prefixes we know about. If we
+ # find one (and the associated value is nonempty), convert it
+ # into a feature.
+ local match = [ MATCH "^(-.)(.*)" : $(cmdline) ] ;
+ local matched ;
+ if $(match) && $(match[2]) {
+ local prefix = $(match[1]) ;
+ if $(feature_kinds$(prefix)) {
+ local name = $(feature_kinds$(prefix)) ;
+ local add = [ add_feature $(prefix) $(name) $(cmdline) ] ;
+
+ if $(add) {
+
+ if $(add[1]) = <find-shared-library>pthread
+ {
+ # Uhm. It's not really nice that this MPI implementation
+ # uses -lpthread as opposed to -pthread. We do want to
+ # set <threading>multi, instead of -lpthread.
+ result += "<threading>multi" ;
+ MPI_EXTRA_REQUIREMENTS += "<threading>multi" ;
+ }
+ else
+ {
+ result += $(add[1]) ;
+ }
+
+ cmdline = $(add[2]) ;
+ matched = yes ;
+ }
+ }
+ }
+
+ # If we haven't matched a feature prefix, just grab the command-line
+ # argument itself. If we can map this argument to a feature
+ # (e.g., -pthread -> <threading>multi), then do so; otherwise,
+ # and add it to the list of "other" flags that we don't
+ # understand.
+ if ! $(matched) {
+ match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ;
+ local value = $(match[1]) ;
+ cmdline = $(match[2]) ;
+
+ # Check for multithreading support
+ if $(value) = "-pthread" || $(value) = "-pthreads"
+ {
+ result += "<threading>multi" ;
+
+ # DPG: This is a hack intended to work around a BBv2 bug where
+ # requirements propagated from libraries are not checked for
+ # conflicts when BBv2 determines which "common" properties to
+ # apply to a target. In our case, the <threading>single property
+ # gets propagated from the common properties to Boost.MPI
+ # targets, even though <threading>multi is in the usage
+ # requirements of <library>/mpi//mpi.
+ MPI_EXTRA_REQUIREMENTS += "<threading>multi" ;
+ }
+ else if [ MATCH "(.*[a-zA-Z0-9<>?-].*)" : $(value) ] {
+ otherflags += $(value) ;
+ }
+ }
+ }
+
+ # If there are other flags that we don't understand, add them to the
+ # result as both <cxxflags> and <linkflags>
+ if $(otherflags) {
+ for unknown in $(unknown-features)
+ {
+ result += "$(unknown)$(otherflags:J= )" ;
+ }
+ }
+
+ return $(result) ;
+}
+
+# Determine if it is safe to execute the given shell command by trying
+# to execute it and determining whether the exit code is zero or
+# not. Returns true for an exit code of zero, false otherwise.
+local rule safe-shell-command ( cmdline )
+{
+ local result = [ SHELL "$(cmdline) > /dev/null 2>/dev/null; if [ "$?" -eq "0" ]; then echo SSCOK; fi" ] ;
+ return [ MATCH ".*(SSCOK).*" : $(result) ] ;
+}
+
+# Initialize the MPI module.
+rule init ( mpicxx ? : options * : mpirun-with-options * )
+{
+ if ! $(options) && $(.debug-configuration)
+ {
+ ECHO "===============MPI Auto-configuration===============" ;
+ }
+
+ if ! $(mpicxx) && [ os.on-windows ]
+ {
+ # Try to auto-configure to the Microsoft Compute Cluster Pack
+ local cluster_pack_path_native = "C:\\Program Files\\Microsoft Compute Cluster Pack" ;
+ local cluster_pack_path = [ path.make $(cluster_pack_path_native) ] ;
+ if [ GLOB $(cluster_pack_path_native)\\Include : mpi.h ]
+ {
+ if $(.debug-configuration)
+ {
+ ECHO "Found Microsoft Compute Cluster Pack: $(cluster_pack_path_native)" ;
+ }
+
+ # Pick up either the 32-bit or 64-bit library, depending on which address
+ # model the user has selected. Default to 32-bit.
+ options = <include>$(cluster_pack_path)/Include
+ <address-model>64:<library-path>$(cluster_pack_path)/Lib/amd64
+ <library-path>$(cluster_pack_path)/Lib/i386
+ <find-static-library>msmpi
+ <toolset>msvc:<define>_SECURE_SCL=0
+ ;
+
+ # Setup the "mpirun" equivalent (mpiexec)
+ .mpirun = "\"$(cluster_pack_path_native)\\Bin\\mpiexec.exe"\" ;
+ .mpirun_flags = -n ;
+ }
+ else if $(.debug-configuration)
+ {
+ ECHO "Did not find Microsoft Compute Cluster Pack in $(cluster_pack_path_native)." ;
+ }
+ }
+
+ if ! $(options)
+ {
+ # Try to auto-detect options based on the wrapper compiler
+ local command = [ common.get-invocation-command mpi : mpic++ : $(mpicxx) ] ;
+
+ if ! $(mpicxx) && ! $(command)
+ {
+ # Try "mpiCC", which is used by MPICH
+ command = [ common.get-invocation-command mpi : mpiCC ] ;
+ }
+
+ if ! $(mpicxx) && ! $(command)
+ {
+ # Try "mpicxx", which is used by OpenMPI and MPICH2
+ command = [ common.get-invocation-command mpi : mpicxx ] ;
+ }
+
+ local result ;
+ local compile_flags ;
+ local link_flags ;
+
+ if ! $(command)
+ {
+ # Do nothing: we'll complain later
+ }
+ # OpenMPI and newer versions of LAM-MPI have -showme:compile and
+ # -showme:link.
+ else if [ safe-shell-command "$(command) -showme:compile" ] &&
+ [ safe-shell-command "$(command) -showme:link" ]
+ {
+ if $(.debug-configuration)
+ {
+ ECHO "Found recent LAM-MPI or Open MPI wrapper compiler: $(command)" ;
+ }
+
+ compile_flags = [ SHELL "$(command) -showme:compile" ] ;
+ link_flags = [ SHELL "$(command) -showme:link" ] ;
+
+ # Prepend COMPILER as the executable name, to match the format of
+ # other compilation commands.
+ compile_flags = "COMPILER $(compile_flags)" ;
+ link_flags = "COMPILER $(link_flags)" ;
+ }
+ # Look for LAM-MPI's -showme
+ else if [ safe-shell-command "$(command) -showme" ]
+ {
+ if $(.debug-configuration)
+ {
+ ECHO "Found older LAM-MPI wrapper compiler: $(command)" ;
+ }
+
+ result = [ SHELL "$(command) -showme" ] ;
+ }
+ # Look for MPICH
+ else if [ safe-shell-command "$(command) -show" ]
+ {
+ if $(.debug-configuration)
+ {
+ ECHO "Found MPICH wrapper compiler: $(command)" ;
+ }
+ compile_flags = [ SHELL "$(command) -compile_info" ] ;
+ link_flags = [ SHELL "$(command) -link_info" ] ;
+ }
+ # Sun HPC and Ibm POE
+ else if [ SHELL "$(command) -v 2>/dev/null" ]
+ {
+ compile_flags = [ SHELL "$(command) -c -v -xtarget=native64 2>/dev/null" ] ;
+
+ local back = [ MATCH "--------------------(.*)" : $(compile_flags) ] ;
+ if $(back)
+ {
+ # Sun HPC
+ if $(.debug-configuration)
+ {
+ ECHO "Found Sun MPI wrapper compiler: $(command)" ;
+ }
+
+ compile_flags = [ MATCH "(.*)--------------------" : $(back) ] ;
+ compile_flags = [ MATCH "(.*)-v" : $(compile_flags) ] ;
+ link_flags = [ SHELL "$(command) -v -xtarget=native64 2>/dev/null" ] ;
+ link_flags = [ MATCH "--------------------(.*)" : $(link_flags) ] ;
+ link_flags = [ MATCH "(.*)--------------------" : $(link_flags) ] ;
+
+ # strip out -v from compile options
+ local front = [ MATCH "(.*)-v" : $(link_flags) ] ;
+ local back = [ MATCH "-v(.*)" : $(link_flags) ] ;
+ link_flags = "$(front) $(back)" ;
+ front = [ MATCH "(.*)-xtarget=native64" : $(link_flags) ] ;
+ back = [ MATCH "-xtarget=native64(.*)" : $(link_flags) ] ;
+ link_flags = "$(front) $(back)" ;
+ }
+ else
+ {
+ # Ibm POE
+ if $(.debug-configuration)
+ {
+ ECHO "Found IBM MPI wrapper compiler: $(command)" ;
+ }
+
+ #
+ compile_flags = [ SHELL "$(command) -c -v 2>/dev/null" ] ;
+ compile_flags = [ MATCH "(.*)exec: export.*" : $(compile_flags) ] ;
+ local front = [ MATCH "(.*)-v" : $(compile_flags) ] ;
+ local back = [ MATCH "-v(.*)" : $(compile_flags) ] ;
+ compile_flags = "$(front) $(back)" ;
+ front = [ MATCH "(.*)-c" : $(compile_flags) ] ;
+ back = [ MATCH "-c(.*)" : $(compile_flags) ] ;
+ compile_flags = "$(front) $(back)" ;
+ link_flags = $(compile_flags) ;
+
+ # get location of mpif.h from mpxlf
+ local f_flags = [ SHELL "mpxlf -v 2>/dev/null" ] ;
+ f_flags = [ MATCH "(.*)exec: export.*" : $(f_flags) ] ;
+ front = [ MATCH "(.*)-v" : $(f_flags) ] ;
+ back = [ MATCH "-v(.*)" : $(f_flags) ] ;
+ f_flags = "$(front) $(back)" ;
+ f_flags = [ MATCH "xlf_r(.*)" : $(f_flags) ] ;
+ f_flags = [ MATCH "-F:mpxlf_r(.*)" : $(f_flags) ] ;
+ compile_flags = [ strip-eol $(compile_flags) ] ;
+ compile_flags = "$(compile_flags) $(f_flags)" ;
+ }
+ }
+
+ if $(result) || $(compile_flags) && $(link_flags)
+ {
+ if $(result)
+ {
+ result = [ strip-eol $(result) ] ;
+ options = [ cmdline_to_features $(result) ] ;
+ }
+ else
+ {
+ compile_flags = [ strip-eol $(compile_flags) ] ;
+ link_flags = [ strip-eol $(link_flags) ] ;
+
+ # Separately process compilation and link features, then combine
+ # them at the end.
+ local compile_features = [ cmdline_to_features $(compile_flags)
+ : "<cxxflags>" ] ;
+ local link_features = [ cmdline_to_features $(link_flags)
+ : "<linkflags>" ] ;
+ options = $(compile_features) $(link_features) ;
+ }
+
+ # If requested, display MPI configuration information.
+ if $(.debug-configuration)
+ {
+ if $(result)
+ {
+ ECHO " Wrapper compiler command line: $(result)" ;
+ }
+ else
+ {
+ local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$"
+ : $(compile_flags) ] ;
+ ECHO "MPI compilation flags: $(match[2])" ;
+ local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$"
+ : $(link_flags) ] ;
+ ECHO "MPI link flags: $(match[2])" ;
+ }
+ }
+ }
+ else
+ {
+ if $(command)
+ {
+ ECHO "MPI auto-detection failed: unknown wrapper compiler $(command)" ;
+ ECHO "Please report this error to the Boost mailing list: http://www.boost.org" ;
+ }
+ else if $(mpicxx)
+ {
+ ECHO "MPI auto-detection failed: unable to find wrapper compiler $(mpicxx)" ;
+ }
+ else
+ {
+ ECHO "MPI auto-detection failed: unable to find wrapper compiler `mpic++' or `mpiCC'" ;
+ }
+ ECHO "You will need to manually configure MPI support." ;
+ }
+
+ }
+
+ # Find mpirun (or its equivalent) and its flags
+ if ! $(.mpirun)
+ {
+ .mpirun =
+ [ common.get-invocation-command mpi : mpirun : $(mpirun-with-options[1]) ] ;
+ .mpirun_flags = $(mpirun-with-options[2-]) ;
+ .mpirun_flags ?= -np ;
+ }
+
+ if $(.debug-configuration)
+ {
+ if $(options)
+ {
+ echo "MPI build features: " ;
+ ECHO $(options) ;
+ }
+
+ if $(.mpirun)
+ {
+ echo "MPI launcher: $(.mpirun) $(.mpirun_flags)" ;
+ }
+
+ ECHO "====================================================" ;
+ }
+
+ if $(options)
+ {
+ .configured = true ;
+
+ # Set up the "mpi" alias
+ alias mpi : : : : $(options) ;
+ }
+}
+
+# States whether MPI has bee configured
+rule configured ( )
+{
+ return $(.configured) ;
+}
+
+# Returs the "extra" requirements needed to build MPI. These requirements are
+# part of the /mpi//mpi library target, but they need to be added to anything
+# that uses MPI directly to work around bugs in BBv2's propagation of
+# requirements.
+rule extra-requirements ( )
+{
+ return $(MPI_EXTRA_REQUIREMENTS) ;
+}
+
+# Support for testing; borrowed from Python
+type.register RUN_MPI_OUTPUT ;
+type.register RUN_MPI : : TEST ;
+
+class mpi-test-generator : generator
+{
+ import property-set ;
+
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ self.composing = true ;
+ }
+
+ rule run ( project name ? : property-set : sources * : multiple ? )
+ {
+ # Generate an executable from the sources. This is the executable we will run.
+ local executable =
+ [ generators.construct $(project) $(name) : EXE : $(property-set) : $(sources) ] ;
+
+ result =
+ [ construct-result $(executable[2-]) : $(project) $(name)-run : $(property-set) ] ;
+ }
+}
+
+# Use mpi-test-generator to generate MPI tests from sources
+generators.register
+ [ new mpi-test-generator mpi.capture-output : : RUN_MPI_OUTPUT ] ;
+
+generators.register-standard testing.expect-success
+ : RUN_MPI_OUTPUT : RUN_MPI ;
+
+# The number of processes to spawn when executing an MPI test.
+feature mpi:processes : : free incidental ;
+
+# The flag settings on testing.capture-output do not
+# apply to mpi.capture output at the moment.
+# Redo this explicitly.
+toolset.flags mpi.capture-output ARGS <testing.arg> ;
+rule capture-output ( target : sources * : properties * )
+{
+ # Use the standard capture-output rule to run the tests
+ testing.capture-output $(target) : $(sources[1]) : $(properties) ;
+
+ # Determine the number of processes we should run on.
+ local num_processes = [ property.select <mpi:processes> : $(properties) ] ;
+ num_processes = $(num_processes:G=) ;
+
+ # serialize the MPI tests to avoid overloading systems
+ JAM_SEMAPHORE on $(target) = <s>mpi-run-semaphore ;
+
+ # We launch MPI processes using the "mpirun" equivalent specified by the user.
+ LAUNCHER on $(target) =
+ [ on $(target) return $(.mpirun) $(.mpirun_flags) $(num_processes) ] ;
+}
+
+# Creates a set of test cases to be run through the MPI launcher. The name, sources,
+# and requirements are the same as for any other test generator. However, schedule is
+# a list of numbers, which indicates how many processes each test run will use. For
+# example, passing 1 2 7 will run the test with 1 process, then 2 processes, then 7
+# 7 processes. The name provided is just the base name: the actual tests will be
+# the name followed by a hypen, then the number of processes.
+rule mpi-test ( name : sources * : requirements * : schedule * )
+{
+ sources ?= $(name).cpp ;
+ schedule ?= 1 2 3 4 7 8 13 17 ;
+
+ local result ;
+ for processes in $(schedule)
+ {
+ result += [ testing.make-test
+ run-mpi : $(sources) /boost/mpi//boost_mpi
+ : $(requirements) <toolset>msvc:<link>static <mpi:processes>$(processes) : $(name)-$(processes) ] ;
+ }
+ return $(result) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/msvc-config.jam b/src/kenlm/jam-files/boost-build/tools/msvc-config.jam
new file mode 100644
index 0000000..6c71e3b
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/msvc-config.jam
@@ -0,0 +1,12 @@
+#~ Copyright 2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for VisualStudio toolset. To use, just import this module.
+
+import toolset : using ;
+
+ECHO "warning: msvc-config.jam is deprecated. Use 'using msvc : all ;' instead." ;
+
+using msvc : all ;
+
diff --git a/src/kenlm/jam-files/boost-build/tools/msvc.jam b/src/kenlm/jam-files/boost-build/tools/msvc.jam
new file mode 100644
index 0000000..6ecd033
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/msvc.jam
@@ -0,0 +1,1435 @@
+# Copyright (c) 2003 David Abrahams
+# Copyright (c) 2005 Vladimir Prus
+# Copyright (c) 2005 Alexey Pakhunov
+# Copyright (c) 2006 Bojan Resnik
+# Copyright (c) 2006 Ilya Sokolov
+# Copyright (c) 2007 Rene Rivera
+# Copyright (c) 2008 Jurko Gospodnetic
+#
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+################################################################################
+#
+# MSVC Boost Build toolset module.
+# --------------------------------
+#
+# All toolset versions need to have their location either auto-detected or
+# explicitly specified except for the special 'default' version that expects the
+# environment to find the needed tools or report an error.
+#
+################################################################################
+
+import "class" : new ;
+import common ;
+import feature ;
+import generators ;
+import mc ;
+import midl ;
+import os ;
+import path ;
+import pch ;
+import property ;
+import rc ;
+import toolset ;
+import type ;
+
+
+type.register MANIFEST : manifest ;
+feature.feature embed-manifest : on off : incidental propagated ;
+
+type.register PDB : pdb ;
+
+
+################################################################################
+#
+# Public rules.
+#
+################################################################################
+
+# Initialize a specific toolset version configuration. As the result, path to
+# compiler and, possible, program names are set up, and will be used when that
+# version of compiler is requested. For example, you might have:
+#
+# using msvc : 6.5 : cl.exe ;
+# using msvc : 7.0 : Y:/foo/bar/cl.exe ;
+#
+# The version parameter may be ommited:
+#
+# using msvc : : Z:/foo/bar/cl.exe ;
+#
+# The following keywords have special meanings when specified as versions:
+# - all - all detected but not yet used versions will be marked as used
+# with their default options.
+# - default - this is an equivalent to an empty version.
+#
+# Depending on a supplied version, detected configurations and presence 'cl.exe'
+# in the path different results may be achieved. The following table describes
+# the possible scenarios:
+#
+# Nothing "x.y"
+# Passed Nothing "x.y" detected, detected,
+# version detected detected cl.exe in path cl.exe in path
+#
+# default Error Use "x.y" Create "default" Use "x.y"
+# all None Use all None Use all
+# x.y - Use "x.y" - Use "x.y"
+# a.b Error Error Create "a.b" Create "a.b"
+#
+# "x.y" - refers to a detected version;
+# "a.b" - refers to an undetected version.
+#
+# FIXME: Currently the command parameter and the <compiler> property parameter
+# seem to overlap in duties. Remove this duplication. This seems to be related
+# to why someone started preparing to replace init with configure rules.
+#
+rule init (
+ # The msvc version being configured. When omitted the tools invoked when no
+ # explicit version is given will be configured.
+ version ?
+
+ # The command used to invoke the compiler. If not specified:
+ # - if version is given, default location for that version will be
+ # searched
+ #
+ # - if version is not given, default locations for MSVC 9.0, 8.0, 7.1, 7.0
+ # and 6.* will be searched
+ #
+ # - if compiler is not found in the default locations, PATH will be
+ # searched.
+ : command *
+
+ # Options may include:
+ #
+ # All options shared by multiple toolset types as handled by the
+ # common.handle-options() rule, e.g. <cflags>, <compileflags>, <cxxflags>,
+ # <fflags> & <linkflags>.
+ #
+ # <assembler>
+ # <compiler>
+ # <idl-compiler>
+ # <linker>
+ # <mc-compiler>
+ # <resource-compiler>
+ # Exact tool names to be used by this msvc toolset configuration.
+ #
+ # <compiler-filter>
+ # Command through which to pipe the output of running the compiler.
+ # For example to pass the output to STLfilt.
+ #
+ # <setup>
+ # Global setup command to invoke before running any of the msvc tools.
+ # It will be passed additional option parameters depending on the actual
+ # target platform.
+ #
+ # <setup-amd64>
+ # <setup-i386>
+ # <setup-ia64>
+ # <setup-arm>
+ # Platform specific setup command to invoke before running any of the
+ # msvc tools used when builing a target for a specific platform, e.g.
+ # when building a 32 or 64 bit executable.
+ : options *
+)
+{
+ if $(command)
+ {
+ options += <command>$(command) ;
+ }
+ configure $(version) : $(options) ;
+}
+
+
+# 'configure' is a newer version of 'init'. The parameter 'command' is passed as
+# a part of the 'options' list. See the 'init' rule comment for more detailed
+# information.
+#
+rule configure ( version ? : options * )
+{
+ switch $(version)
+ {
+ case "all" :
+ if $(options)
+ {
+ import errors ;
+ errors.error "MSVC toolset configuration: options should be"
+ "empty when '$(version)' is specified." ;
+ }
+
+ # Configure (i.e. mark as used) all registered versions.
+ local all-versions = [ $(.versions).all ] ;
+ if ! $(all-versions)
+ {
+ if $(.debug-configuration)
+ {
+ ECHO "notice: [msvc-cfg] Asked to configure all registered"
+ "msvc toolset versions when there are none currently"
+ "registered." ;
+ }
+ }
+ else
+ {
+ for local v in $(all-versions)
+ {
+ # Note that there is no need to skip already configured
+ # versions here as this will request configure-really rule
+ # to configure the version using default options which will
+ # in turn cause it to simply do nothing in case the version
+ # has already been configured.
+ configure-really $(v) ;
+ }
+ }
+
+ case "default" :
+ configure-really : $(options) ;
+
+ case * :
+ configure-really $(version) : $(options) ;
+ }
+}
+
+
+# Sets up flag definitions dependent on the compiler version used.
+# - 'version' is the version of compiler in N.M format.
+# - 'conditions' is the property set to be used as flag conditions.
+# - 'toolset' is the toolset for which flag settings are to be defined.
+# This makes the rule reusable for other msvc-option-compatible compilers.
+#
+rule configure-version-specific ( toolset : version : conditions )
+{
+ toolset.push-checking-for-flags-module unchecked ;
+ # Starting with versions 7.0, the msvc compiler have the /Zc:forScope and
+ # /Zc:wchar_t options that improve C++ standard conformance, but those
+ # options are off by default. If we are sure that the msvc version is at
+ # 7.*, add those options explicitly. We can be sure either if user specified
+ # version 7.* explicitly or if we auto-detected the version ourselves.
+ if ! [ MATCH ^(6\\.) : $(version) ]
+ {
+ toolset.flags $(toolset).compile CFLAGS $(conditions) : /Zc:forScope /Zc:wchar_t ;
+ toolset.flags $(toolset).compile.c++ C++FLAGS $(conditions) : /wd4675 ;
+
+ # Explicitly disable the 'function is deprecated' warning. Some msvc
+ # versions have a bug, causing them to emit the deprecation warning even
+ # with /W0.
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<warnings>off : /wd4996 ;
+
+ if [ MATCH ^([78]\\.) : $(version) ]
+ {
+ # 64-bit compatibility warning deprecated since 9.0, see
+ # http://msdn.microsoft.com/en-us/library/yt4xw8fh.aspx
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<warnings>all : /Wp64 ;
+ }
+ }
+
+ #
+ # Processor-specific optimization.
+ #
+
+ if [ MATCH ^([67]) : $(version) ]
+ {
+ # 8.0 deprecates some of the options.
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<optimization>speed $(conditions)/<optimization>space : /Ogiy /Gs ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<optimization>speed : /Ot ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<optimization>space : /Os ;
+
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/<instruction-set> : /GB ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/<instruction-set>i486 : /G4 ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/<instruction-set>$(.cpu-type-g5) : /G5 ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/<instruction-set>$(.cpu-type-g6) : /G6 ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/<instruction-set>$(.cpu-type-g7) : /G7 ;
+
+ # Improve floating-point accuracy. Otherwise, some of C++ Boost's "math"
+ # tests will fail.
+ toolset.flags $(toolset).compile CFLAGS $(conditions) : /Op ;
+
+ # 7.1 and below have single-threaded static RTL.
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<runtime-debugging>off/<runtime-link>static/<threading>single : /ML ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<runtime-debugging>on/<runtime-link>static/<threading>single : /MLd ;
+ }
+ else
+ {
+ # 8.0 and above adds some more options.
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/<instruction-set> : /favor:blend ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/<instruction-set>$(.cpu-type-em64t) : /favor:EM64T ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/<instruction-set>$(.cpu-type-amd64) : /favor:AMD64 ;
+
+ # 8.0 and above only has multi-threaded static RTL.
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<runtime-debugging>off/<runtime-link>static/<threading>single : /MT ;
+ toolset.flags $(toolset).compile CFLAGS $(conditions)/<runtime-debugging>on/<runtime-link>static/<threading>single : /MTd ;
+
+ # Specify target machine type so the linker will not need to guess.
+ toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-amd64) : /MACHINE:X64 ;
+ toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-i386) : /MACHINE:X86 ;
+ toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-ia64) : /MACHINE:IA64 ;
+ toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-arm) : /MACHINE:ARM ;
+
+ # Make sure that manifest will be generated even if there is no
+ # dependencies to put there.
+ toolset.flags $(toolset).link LINKFLAGS $(conditions) : /MANIFEST ;
+ }
+ toolset.pop-checking-for-flags-module ;
+}
+
+
+# Registers this toolset including all of its flags, features & generators. Does
+# nothing on repeated calls.
+#
+rule register-toolset ( )
+{
+ if ! msvc in [ feature.values toolset ]
+ {
+ register-toolset-really ;
+ }
+}
+
+
+# Declare action for creating static libraries. If library exists, remove it
+# before adding files. See
+# http://article.gmane.org/gmane.comp.lib.boost.build/4241 for rationale.
+if [ os.name ] in NT
+{
+ # The 'DEL' command would issue a message to stdout if the file does not
+ # exist, so need a check.
+ actions archive
+ {
+ if exist "$(<[1])" DEL "$(<[1])"
+ $(.LD) $(AROPTIONS) /out:"$(<[1])" @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")"
+ }
+}
+else
+{
+ actions archive
+ {
+ $(.RM) "$(<[1])"
+ $(.LD) $(AROPTIONS) /out:"$(<[1])" @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")"
+ }
+}
+
+
+# For the assembler the following options are turned on by default:
+#
+# -Zp4 align structures to 4 bytes
+# -Cp preserve case of user identifiers
+# -Cx preserve case in publics, externs
+#
+actions compile.asm
+{
+ $(.ASM) -c -Zp4 -Cp -Cx -D$(DEFINES) $(ASMFLAGS) $(USER_ASMFLAGS) -Fo "$(<:W)" "$(>:W)"
+}
+
+
+rule compile.c ( targets + : sources * : properties * )
+{
+ C++FLAGS on $(targets[1]) = ;
+ get-rspline $(targets) : -TC ;
+ compile-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ;
+}
+
+
+rule compile.c.preprocess ( targets + : sources * : properties * )
+{
+ C++FLAGS on $(targets[1]) = ;
+ get-rspline $(targets) : -TC ;
+ preprocess-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ;
+}
+
+
+rule compile.c.pch ( targets + : sources * : properties * )
+{
+ C++FLAGS on $(targets[1]) = ;
+ get-rspline $(targets[1]) : -TC ;
+ get-rspline $(targets[2]) : -TC ;
+ local pch-source = [ on $(<) return $(PCH_SOURCE) ] ;
+ if $(pch-source)
+ {
+ DEPENDS $(<) : $(pch-source) ;
+ compile-c-c++-pch-s $(targets) : $(sources) $(pch-source) ;
+ }
+ else
+ {
+ compile-c-c++-pch $(targets) : $(sources) ;
+ }
+}
+
+toolset.flags msvc YLOPTION : "-Yl" ;
+
+# Action for running the C/C++ compiler without using precompiled headers.
+#
+# WARNING: Synchronize any changes this in action with intel-win
+#
+# Notes regarding PDB generation, for when we use
+# <debug-symbols>on/<debug-store>database:
+#
+# 1. PDB_CFLAG is only set for <debug-symbols>on/<debug-store>database, ensuring
+# that the /Fd flag is dropped if PDB_CFLAG is empty.
+#
+# 2. When compiling executables's source files, PDB_NAME is set on a per-source
+# file basis by rule compile-c-c++. The linker will pull these into the
+# executable's PDB.
+#
+# 3. When compiling library's source files, PDB_NAME is updated to <libname>.pdb
+# for each source file by rule archive, as in this case compiler must be used
+# to create a single PDB for our library.
+#
+actions compile-c-c++ bind PDB_NAME
+{
+ $(.CC) @"@($(<[1]:W).rsp:E="$(>[1]:W)" -Fo"$(<[1]:W)" $(PDB_CFLAG)"$(PDB_NAME)" -Yu"$(>[3]:D=)" -Fp"$(>[2]:W)" $(CC_RSPLINE))" $(.CC.FILTER)
+}
+
+actions preprocess-c-c++ bind PDB_NAME
+{
+ $(.CC) @"@($(<[1]:W).rsp:E="$(>[1]:W)" -E $(PDB_CFLAG)"$(PDB_NAME)" -Yu"$(>[3]:D=)" -Fp"$(>[2]:W)" $(CC_RSPLINE))" >"$(<[1]:W)"
+}
+
+rule compile-c-c++ ( targets + : sources * )
+{
+ DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_HEADER) ] ;
+ DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_FILE) ] ;
+ PDB_NAME on $(<) = $(<[1]:S=.pdb) ;
+ LOCATE on $(<[1]:S=.pdb) = [ on $(<[1]) return $(LOCATE) ] ;
+}
+
+rule preprocess-c-c++ ( targets + : sources * )
+{
+ DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_HEADER) ] ;
+ DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_FILE) ] ;
+ PDB_NAME on $(<) = $(<:S=.pdb) ;
+ LOCATE on $(<[1]:S=.pdb) = [ on $(<[1]) return $(LOCATE) ] ;
+}
+
+# Action for running the C/C++ compiler using precompiled headers. In addition
+# to whatever else it needs to compile, this action also adds a temporary source
+# .cpp file used to compile the precompiled headers themselves.
+#
+# The global .escaped-double-quote variable is used to avoid messing up Emacs
+# syntax highlighting in the messy N-quoted code below.
+actions compile-c-c++-pch
+{
+ $(.CC) @"@($(<[1]:W).rsp:E="$(>[2]:W)" -Fo"$(<[2]:W)" -Yc"$(>[1]:D=)" $(YLOPTION)"__bjam_pch_symbol_$(>[1]:D=)" -Fp"$(<[1]:W)" $(CC_RSPLINE))" "@($(<[1]:W).cpp:E=#include $(.escaped-double-quote)$(>[1]:D=)$(.escaped-double-quote)$(.nl))" $(.CC.FILTER)
+}
+
+
+# Action for running the C/C++ compiler using precompiled headers. An already
+# built source file for compiling the precompiled headers is expected to be
+# given as one of the source parameters.
+actions compile-c-c++-pch-s
+{
+ $(.CC) @"@($(<[1]:W).rsp:E="$(>[2]:W)" -Fo"$(<[2]:W)" -Yc"$(>[1]:D=)" $(YLOPTION)"__bjam_pch_symbol_$(>[1]:D=)" -Fp"$(<[1]:W)" $(CC_RSPLINE))" $(.CC.FILTER)
+}
+
+
+rule compile.c++ ( targets + : sources * : properties * )
+{
+ get-rspline $(targets) : -TP ;
+ compile-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ;
+}
+
+rule compile.c++.preprocess ( targets + : sources * : properties * )
+{
+ get-rspline $(targets) : -TP ;
+ preprocess-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ;
+}
+
+
+rule compile.c++.pch ( targets + : sources * : properties * )
+{
+ get-rspline $(targets[1]) : -TP ;
+ get-rspline $(targets[2]) : -TP ;
+ local pch-source = [ on $(<) return $(PCH_SOURCE) ] ;
+ if $(pch-source)
+ {
+ DEPENDS $(<) : $(pch-source) ;
+ compile-c-c++-pch-s $(targets) : $(sources) $(pch-source) ;
+ }
+ else
+ {
+ compile-c-c++-pch $(targets) : $(sources) ;
+ }
+}
+
+
+# See midl.jam for details.
+#
+actions compile.idl
+{
+ $(.IDL) /nologo @"@($(<[1]:W).rsp:E=$(.nl)"$(>:W)" $(.nl)-D$(DEFINES) $(.nl)"-I$(INCLUDES:W)" $(.nl)-U$(UNDEFS) $(.nl)$(MIDLFLAGS) $(.nl)/tlb "$(<[1]:W)" $(.nl)/h "$(<[2]:W)" $(.nl)/iid "$(<[3]:W)" $(.nl)/proxy "$(<[4]:W)" $(.nl)/dlldata "$(<[5]:W)")"
+ $(.TOUCH_FILE) "$(<[4]:W)"
+ $(.TOUCH_FILE) "$(<[5]:W)"
+}
+
+
+actions compile.mc
+{
+ $(.MC) $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)"
+}
+
+
+actions compile.rc
+{
+ $(.RC) -l 0x409 -U$(UNDEFS) -D$(DEFINES) -I"$(INCLUDES:W)" -fo "$(<:W)" "$(>:W)"
+}
+
+
+rule link ( targets + : sources * : properties * )
+{
+ if <embed-manifest>on in $(properties)
+ {
+ msvc.manifest $(targets) : $(sources) : $(properties) ;
+ }
+}
+
+rule link.dll ( targets + : sources * : properties * )
+{
+ DEPENDS $(<) : [ on $(<) return $(DEF_FILE) ] ;
+ if <embed-manifest>on in $(properties)
+ {
+ msvc.manifest.dll $(targets) : $(sources) : $(properties) ;
+ }
+}
+
+# Incremental linking a DLL causes no end of problems: if the actual exports do
+# not change, the import .lib file is never updated. Therefore, the .lib is
+# always out-of-date and gets rebuilt every time. I am not sure that incremental
+# linking is such a great idea in general, but in this case I am sure we do not
+# want it.
+
+# Windows manifest is a new way to specify dependencies on managed DotNet
+# assemblies and Windows native DLLs. The manifests are embedded as resources
+# and are useful in any PE target (both DLL and EXE).
+
+if [ os.name ] in NT
+{
+ actions link bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE
+ {
+ $(.LD) $(LINKFLAGS) /out:"$(<[1]:W)" /LIBPATH:"$(LINKPATH:W)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")"
+ if %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL%
+ }
+
+ actions manifest
+ {
+ if exist "$(<[1]).manifest" (
+ $(.MT) -manifest "$(<[1]).manifest" "-outputresource:$(<[1]);1"
+ )
+ }
+
+ actions link.dll bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE
+ {
+ $(.LD) /DLL $(LINKFLAGS) /out:"$(<[1]:W)" /IMPLIB:"$(<[2]:W)" /LIBPATH:"$(LINKPATH:W)" /def:"$(DEF_FILE)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")"
+ if %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL%
+ }
+
+ actions manifest.dll
+ {
+ if exist "$(<[1]).manifest" (
+ $(.MT) -manifest "$(<[1]).manifest" "-outputresource:$(<[1]);2"
+ )
+ }
+}
+else
+{
+ actions link bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE
+ {
+ $(.LD) $(LINKFLAGS) /out:"$(<[1]:W)" /LIBPATH:"$(LINKPATH:W)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")"
+ }
+
+ actions manifest
+ {
+ if test -e "$(<[1]).manifest"; then
+ $(.MT) -manifest "$(<[1]:W).manifest" "-outputresource:$(<[1]:W);1"
+ fi
+ }
+
+ actions link.dll bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE
+ {
+ $(.LD) /DLL $(LINKFLAGS) /out:"$(<[1]:W)" /IMPLIB:"$(<[2]:W)" /LIBPATH:"$(LINKPATH:W)" /def:"$(DEF_FILE)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")"
+ }
+
+ actions manifest.dll
+ {
+ if test -e "$(<[1]).manifest"; then
+ $(.MT) -manifest "$(<[1]:W).manifest" "-outputresource:$(<[1]:W);2"
+ fi
+ }
+}
+
+# This rule sets up the pdb file that will be used when generating static
+# libraries and the debug-store option is database, so that the compiler puts
+# all the debug info into a single .pdb file named after the library.
+#
+# Poking at source targets this way is probably not clean, but it is the
+# easiest approach.
+#
+rule archive ( targets + : sources * : properties * )
+{
+ PDB_NAME on $(>) = $(<[1]:S=.pdb) ;
+ LOCATE on $(<[1]:S=.pdb) = [ on $(<[1]) return $(LOCATE) ] ;
+}
+
+
+################################################################################
+#
+# Classes.
+#
+################################################################################
+
+class msvc-pch-generator : pch-generator
+{
+ import property-set ;
+
+ rule run-pch ( project name ? : property-set : sources * )
+ {
+ # Searching for the header and source file in the sources.
+ local pch-header ;
+ local pch-source ;
+ for local s in $(sources)
+ {
+ if [ type.is-derived [ $(s).type ] H ]
+ {
+ pch-header = $(s) ;
+ }
+ else if
+ [ type.is-derived [ $(s).type ] CPP ] ||
+ [ type.is-derived [ $(s).type ] C ]
+ {
+ pch-source = $(s) ;
+ }
+ }
+
+ if ! $(pch-header)
+ {
+ import errors : user-error : errors.user-error ;
+ errors.user-error "can not build pch without pch-header" ;
+ }
+
+ # If we do not have the PCH source - that is fine. We will just create a
+ # temporary .cpp file in the action.
+
+ local generated = [ generator.run $(project) $(name)
+ : [ property-set.create
+ # Passing of <pch-source> is a dirty trick, needed because
+ # non-composing generators with multiple inputs are subtly
+ # broken. For more detailed information see:
+ # https://zigzag.cs.msu.su:7813/boost.build/ticket/111
+ <pch-source>$(pch-source)
+ [ $(property-set).raw ] ]
+ : $(pch-header) ] ;
+
+ local pch-file ;
+ for local g in $(generated)
+ {
+ if [ type.is-derived [ $(g).type ] PCH ]
+ {
+ pch-file = $(g) ;
+ }
+ }
+
+ return [ property-set.create <pch-header>$(pch-header)
+ <pch-file>$(pch-file) ] $(generated) ;
+ }
+}
+
+
+################################################################################
+#
+# Local rules.
+#
+################################################################################
+
+# Detects versions listed as '.known-versions' by checking registry information,
+# environment variables & default paths. Supports both native Windows and
+# Cygwin.
+#
+local rule auto-detect-toolset-versions ( )
+{
+ if [ os.name ] in NT CYGWIN
+ {
+ # Get installation paths from the registry.
+ for local i in $(.known-versions)
+ {
+ if $(.version-$(i)-reg)
+ {
+ local vc-path ;
+ for local x in "" "Wow6432Node\\"
+ {
+ vc-path += [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\"$(x)"\\Microsoft\\"$(.version-$(i)-reg)
+ : "ProductDir" ] ;
+ }
+
+ if $(vc-path)
+ {
+ vc-path = [ path.join [ path.make-NT $(vc-path[1]) ] "bin" ] ;
+ register-configuration $(i) : [ path.native $(vc-path[1]) ] ;
+ }
+ }
+ }
+ }
+
+ # Check environment and default installation paths.
+ for local i in $(.known-versions)
+ {
+ if ! $(i) in [ $(.versions).all ]
+ {
+ register-configuration $(i) : [ default-path $(i) ] ;
+ }
+ }
+}
+
+
+# Worker rule for toolset version configuration. Takes an explicit version id or
+# nothing in case it should configure the default toolset version (the first
+# registered one or a new 'default' one in case no toolset versions have been
+# registered yet).
+#
+local rule configure-really ( version ? : options * )
+{
+ local v = $(version) ;
+
+ # Decide what the 'default' version is.
+ if ! $(v)
+ {
+ # Take the first registered (i.e. auto-detected) version.
+ version = [ $(.versions).all ] ;
+ version = $(version[1]) ;
+ v = $(version) ;
+
+ # Note: 'version' can still be empty at this point if no versions have
+ # been auto-detected.
+ version ?= "default" ;
+ }
+
+ # Version alias -> real version number.
+ if $(.version-alias-$(version))
+ {
+ version = $(.version-alias-$(version)) ;
+ }
+
+ # Check whether the selected configuration is already in use.
+ if $(version) in [ $(.versions).used ]
+ {
+ # Allow multiple 'toolset.using' calls for the same configuration if the
+ # identical sets of options are used.
+ if $(options) && ( $(options) != [ $(.versions).get $(version) : options ] )
+ {
+ import errors ;
+ errors.error "MSVC toolset configuration: Toolset version"
+ "'$(version)' already configured." ;
+ }
+ }
+ else
+ {
+ # Register a new configuration.
+ $(.versions).register $(version) ;
+
+ # Add user-supplied to auto-detected options.
+ options = [ $(.versions).get $(version) : options ] $(options) ;
+
+ # Mark the configuration as 'used'.
+ $(.versions).use $(version) ;
+
+ # Generate conditions and save them.
+ local conditions = [ common.check-init-parameters msvc : version $(v) ]
+ ;
+
+ $(.versions).set $(version) : conditions : $(conditions) ;
+
+ local command = [ feature.get-values <command> : $(options) ] ;
+
+ # If version is specified, we try to search first in default paths, and
+ # only then in PATH.
+ command = [ common.get-invocation-command msvc : cl.exe : $(command) :
+ [ default-paths $(version) ] : $(version) ] ;
+
+ common.handle-options msvc : $(conditions) : $(command) : $(options) ;
+
+ if ! $(version)
+ {
+ # Even if version is not explicitly specified, try to detect the
+ # version from the path.
+ # FIXME: We currently detect both Microsoft Visual Studio 9.0 and
+ # 9.0express as 9.0 here.
+ if [ MATCH "(Microsoft Visual Studio 12)" : $(command) ]
+ {
+ version = 12.0 ;
+ }
+ else if [ MATCH "(Microsoft Visual Studio 11)" : $(command) ]
+ {
+ version = 11.0 ;
+ }
+ else if [ MATCH "(Microsoft Visual Studio 10)" : $(command) ]
+ {
+ version = 10.0 ;
+ }
+ else if [ MATCH "(Microsoft Visual Studio 9)" : $(command) ]
+ {
+ version = 9.0 ;
+ }
+ else if [ MATCH "(Microsoft Visual Studio 8)" : $(command) ]
+ {
+ version = 8.0 ;
+ }
+ else if [ MATCH "(NET 2003[\/\\]VC7)" : $(command) ]
+ {
+ version = 7.1 ;
+ }
+ else if [ MATCH "(Microsoft Visual C\\+\\+ Toolkit 2003)" :
+ $(command) ]
+ {
+ version = 7.1toolkit ;
+ }
+ else if [ MATCH "(.NET[\/\\]VC7)" : $(command) ]
+ {
+ version = 7.0 ;
+ }
+ else
+ {
+ version = 6.0 ;
+ }
+ }
+
+ # Generate and register setup command.
+
+ local below-8.0 = [ MATCH ^([67]\\.) : $(version) ] ;
+
+ local cpu = i386 amd64 ia64 arm ;
+ if $(below-8.0)
+ {
+ cpu = i386 ;
+ }
+
+ local setup-amd64 ;
+ local setup-i386 ;
+ local setup-ia64 ;
+ local setup-arm ;
+
+ if $(command)
+ {
+ # TODO: Note that if we specify a non-existant toolset version then
+ # this rule may find and use a corresponding compiler executable
+ # belonging to an incorrect toolset version. For example, if you
+ # have only MSVC 7.1 installed, have its executable on the path and
+ # specify you want Boost Build to use MSVC 9.0, then you want Boost
+ # Build to report an error but this may cause it to silently use the
+ # MSVC 7.1 compiler even though it thinks it is using the msvc-9.0
+ # toolset version.
+ command = [ common.get-absolute-tool-path $(command[-1]) ] ;
+ }
+
+ if $(command)
+ {
+ local parent = [ path.make $(command) ] ;
+ parent = [ path.parent $(parent) ] ;
+ parent = [ path.native $(parent) ] ;
+
+ # Setup will be used if the command name has been specified. If
+ # setup is not specified explicitly then a default setup script will
+ # be used instead. Setup scripts may be global or arhitecture/
+ # /platform/cpu specific. Setup options are used only in case of
+ # global setup scripts.
+
+ # Default setup scripts provided with different VC distributions:
+ #
+ # VC 7.1 had only the vcvars32.bat script specific to 32 bit i386
+ # builds. It was located in the bin folder for the regular version
+ # and in the root folder for the free VC 7.1 tools.
+ #
+ # Later 8.0 & 9.0 versions introduce separate platform specific
+ # vcvars*.bat scripts (e.g. 32 bit, 64 bit AMD or 64 bit Itanium)
+ # located in or under the bin folder. Most also include a global
+ # vcvarsall.bat helper script located in the root folder which runs
+ # one of the aforementioned vcvars*.bat scripts based on the options
+ # passed to it. So far only the version coming with some PlatformSDK
+ # distributions does not include this top level script but to
+ # support those we need to fall back to using the worker scripts
+ # directly in case the top level script can not be found.
+
+ local global-setup = [ feature.get-values <setup> : $(options) ] ;
+ global-setup = $(global-setup[1]) ;
+ if ! $(below-8.0)
+ {
+ global-setup ?= [ locate-default-setup $(command) : $(parent) :
+ vcvarsall.bat ] ;
+ }
+
+ local default-setup-amd64 = vcvarsx86_amd64.bat ;
+ local default-setup-i386 = vcvars32.bat ;
+ local default-setup-ia64 = vcvarsx86_ia64.bat ;
+ local default-setup-arm = vcvarsx86_arm.bat ;
+
+ # http://msdn2.microsoft.com/en-us/library/x4d2c09s(VS.80).aspx and
+ # http://msdn2.microsoft.com/en-us/library/x4d2c09s(vs.90).aspx
+ # mention an x86_IPF option, that seems to be a documentation bug
+ # and x86_ia64 is the correct option.
+ local default-global-setup-options-amd64 = x86_amd64 ;
+ local default-global-setup-options-i386 = x86 ;
+ local default-global-setup-options-ia64 = x86_ia64 ;
+ local default-global-setup-options-arm = x86_arm ;
+
+ # When using 64-bit Windows, and targeting 64-bit, it is possible to
+ # use a native 64-bit compiler, selected by the "amd64" & "ia64"
+ # parameters to vcvarsall.bat. There are two variables we can use --
+ # PROCESSOR_ARCHITECTURE and PROCESSOR_IDENTIFIER. The first is
+ # 'x86' when running 32-bit Windows, no matter which processor is
+ # used, and 'AMD64' on 64-bit windows on x86 (either AMD64 or EM64T)
+ # Windows.
+ #
+ if [ MATCH ^(AMD64) : [ os.environ PROCESSOR_ARCHITECTURE ] ]
+ {
+ default-global-setup-options-amd64 = amd64 ;
+ }
+ # TODO: The same 'native compiler usage' should be implemented for
+ # the Itanium platform by using the "ia64" parameter. For this
+ # though we need someone with access to this platform who can find
+ # out how to correctly detect this case.
+ else if $(somehow-detect-the-itanium-platform)
+ {
+ default-global-setup-options-ia64 = ia64 ;
+ }
+
+ local setup-prefix = "call " ;
+ local setup-suffix = " >nul"$(.nl) ;
+ if ! [ os.name ] in NT
+ {
+ setup-prefix = "cmd.exe /S /C call " ;
+ setup-suffix = " \">nul\" \"&&\" " ;
+ }
+
+ for local c in $(cpu)
+ {
+ local setup-options ;
+
+ setup-$(c) = [ feature.get-values <setup-$(c)> : $(options) ] ;
+
+ if ! $(setup-$(c))-is-not-empty
+ {
+ if $(global-setup)-is-not-empty
+ {
+ setup-$(c) = $(global-setup) ;
+
+ # If needed we can easily add using configuration flags
+ # here for overriding which options get passed to the
+ # global setup command for which target platform:
+ # setup-options = [ feature.get-values <setup-options-$(c)> : $(options) ] ;
+
+ setup-options ?= $(default-global-setup-options-$(c)) ;
+ }
+ else
+ {
+ setup-$(c) = [ locate-default-setup $(command) : $(parent) : $(default-setup-$(c)) ] ;
+ }
+ }
+
+ # Cygwin to Windows path translation.
+ setup-$(c) = "\""$(setup-$(c):W)"\"" ;
+
+ # Append setup options to the setup name and add the final setup
+ # prefix & suffix.
+ setup-options ?= "" ;
+ setup-$(c) = $(setup-prefix)$(setup-$(c):J=" ")" "$(setup-options:J=" ")$(setup-suffix) ;
+ }
+ }
+
+ # Get tool names (if any) and finish setup.
+
+ compiler = [ feature.get-values <compiler> : $(options) ] ;
+ compiler ?= cl ;
+
+ linker = [ feature.get-values <linker> : $(options) ] ;
+ linker ?= link ;
+
+ resource-compiler = [ feature.get-values <resource-compiler> : $(options) ] ;
+ resource-compiler ?= rc ;
+
+ # Turn on some options for i386 assembler
+ # -coff generate COFF format object file (compatible with cl.exe output)
+ local default-assembler-amd64 = ml64 ;
+ local default-assembler-i386 = "ml -coff" ;
+ local default-assembler-ia64 = ias ;
+ local default-assembler-ia64 = armasm ;
+
+ assembler = [ feature.get-values <assembler> : $(options) ] ;
+
+ idl-compiler = [ feature.get-values <idl-compiler> : $(options) ] ;
+ idl-compiler ?= midl ;
+
+ mc-compiler = [ feature.get-values <mc-compiler> : $(options) ] ;
+ mc-compiler ?= mc ;
+
+ manifest-tool = [ feature.get-values <manifest-tool> : $(options) ] ;
+ manifest-tool ?= mt ;
+
+ local cc-filter = [ feature.get-values <compiler-filter> : $(options) ]
+ ;
+
+ for local c in $(cpu)
+ {
+ # Setup script is not required in some configurations.
+ setup-$(c) ?= "" ;
+
+ local cpu-conditions = $(conditions)/$(.cpu-arch-$(c)) ;
+
+ if $(.debug-configuration)
+ {
+ for local cpu-condition in $(cpu-conditions)
+ {
+ ECHO "notice: [msvc-cfg] condition: '$(cpu-condition)', setup: '$(setup-$(c))'" ;
+ }
+ }
+
+ local cpu-assembler = $(assembler) ;
+ cpu-assembler ?= $(default-assembler-$(c)) ;
+
+ toolset.flags msvc.compile .CC $(cpu-conditions) : $(setup-$(c))$(compiler) /Zm800 -nologo ;
+ toolset.flags msvc.compile .RC $(cpu-conditions) : $(setup-$(c))$(resource-compiler) ;
+ toolset.flags msvc.compile .ASM $(cpu-conditions) : $(setup-$(c))$(cpu-assembler) -nologo ;
+ toolset.flags msvc.link .LD $(cpu-conditions) : $(setup-$(c))$(linker) /NOLOGO /INCREMENTAL:NO ;
+ toolset.flags msvc.archive .LD $(cpu-conditions) : $(setup-$(c))$(linker) /lib /NOLOGO ;
+ toolset.flags msvc.compile .IDL $(cpu-conditions) : $(setup-$(c))$(idl-compiler) ;
+ toolset.flags msvc.compile .MC $(cpu-conditions) : $(setup-$(c))$(mc-compiler) ;
+
+ toolset.flags msvc.link .MT $(cpu-conditions) : $(setup-$(c))$(manifest-tool) -nologo ;
+
+ if $(cc-filter)
+ {
+ toolset.flags msvc .CC.FILTER $(cpu-conditions) : "|" $(cc-filter) ;
+ }
+ }
+
+ # Set version-specific flags.
+ configure-version-specific msvc : $(version) : $(conditions) ;
+ }
+}
+
+
+# Returns the default installation path for the given version.
+#
+local rule default-path ( version )
+{
+ # Use auto-detected path if possible.
+ local path = [ feature.get-values <command> : [ $(.versions).get $(version)
+ : options ] ] ;
+
+ if $(path)
+ {
+ path = $(path:D) ;
+ }
+ else
+ {
+ # Check environment.
+ if $(.version-$(version)-env)
+ {
+ local vc-path = [ os.environ $(.version-$(version)-env) ] ;
+ if $(vc-path)
+ {
+ vc-path = [ path.make $(vc-path) ] ;
+ vc-path = [ path.join $(vc-path) $(.version-$(version)-envpath) ] ;
+ vc-path = [ path.native $(vc-path) ] ;
+
+ path = $(vc-path) ;
+ }
+ }
+
+ # Check default path.
+ if ! $(path) && $(.version-$(version)-path)
+ {
+ path = [ path.native [ path.join $(.ProgramFiles) $(.version-$(version)-path) ] ] ;
+ }
+ }
+
+ return $(path) ;
+}
+
+
+# Returns either the default installation path (if 'version' is not empty) or
+# list of all known default paths (if no version is given)
+#
+local rule default-paths ( version ? )
+{
+ local possible-paths ;
+
+ if $(version)
+ {
+ possible-paths += [ default-path $(version) ] ;
+ }
+ else
+ {
+ for local i in $(.known-versions)
+ {
+ possible-paths += [ default-path $(i) ] ;
+ }
+ }
+
+ return $(possible-paths) ;
+}
+
+
+rule get-rspline ( target : lang-opt )
+{
+ CC_RSPLINE on $(target) = [ on $(target) return $(lang-opt) -U$(UNDEFS)
+ $(CFLAGS) $(C++FLAGS) $(OPTIONS) -c $(.nl)-D$(DEFINES)
+ $(.nl)\"-I$(INCLUDES:W)\" ] ;
+}
+
+class msvc-linking-generator : linking-generator
+{
+ # Calls the base version. If necessary, also create a target for the
+ # manifest file.specifying source's name as the name of the created
+ # target. As result, the PCH will be named whatever.hpp.gch, and not
+ # whatever.gch.
+ rule generated-targets ( sources + : property-set : project name ? )
+ {
+ local result = [ linking-generator.generated-targets $(sources)
+ : $(property-set) : $(project) $(name) ] ;
+
+ if $(result)
+ {
+ local name-main = [ $(result[0]).name ] ;
+ local action = [ $(result[0]).action ] ;
+
+ if [ $(property-set).get <debug-symbols> ] = "on"
+ {
+ # We force the exact name on PDB. The reason is tagging -- the
+ # tag rule may reasonably special case some target types, like
+ # SHARED_LIB. The tag rule will not catch PDBs, and it cannot
+ # even easily figure out if a PDB is paired with a SHARED_LIB,
+ # EXE or something else. Because PDBs always get the same name
+ # as the main target, with .pdb as extension, just force it.
+ local target = [ class.new file-target $(name-main:S=.pdb) exact
+ : PDB : $(project) : $(action) ] ;
+ local registered-target = [ virtual-target.register $(target) ]
+ ;
+ if $(target) != $(registered-target)
+ {
+ $(action).replace-targets $(target) : $(registered-target) ;
+ }
+ result += $(registered-target) ;
+ }
+
+ if [ $(property-set).get <embed-manifest> ] = "off"
+ {
+ # Manifest is an evil target. It has .manifest appened to the
+ # name of the main target, including extension, e.g.
+ # a.exe.manifest. We use the 'exact' name to achieve this
+ # effect.
+ local target = [ class.new file-target $(name-main).manifest
+ exact : MANIFEST : $(project) : $(action) ] ;
+ local registered-target = [ virtual-target.register $(target) ]
+ ;
+ if $(target) != $(registered-target)
+ {
+ $(action).replace-targets $(target) : $(registered-target) ;
+ }
+ result += $(registered-target) ;
+ }
+ }
+ return $(result) ;
+ }
+}
+
+
+# Unsafe worker rule for the register-toolset() rule. Must not be called
+# multiple times.
+#
+local rule register-toolset-really ( )
+{
+ feature.extend toolset : msvc ;
+
+ # Intel and msvc supposedly have link-compatible objects.
+ feature.subfeature toolset msvc : vendor : intel : propagated optional ;
+
+ # Inherit MIDL flags.
+ toolset.inherit-flags msvc : midl ;
+
+ # Inherit MC flags.
+ toolset.inherit-flags msvc : mc ;
+
+ # Dynamic runtime comes only in MT flavour.
+ toolset.add-requirements
+ <toolset>msvc,<runtime-link>shared:<threading>multi ;
+
+ # Declare msvc toolset specific features.
+ {
+ feature.feature debug-store : object database : propagated ;
+ feature.feature pch-source : : dependency free ;
+ }
+
+ # Declare generators.
+ {
+ # TODO: Is it possible to combine these? Make the generators
+ # non-composing so that they do not convert each source into a separate
+ # .rsp file.
+ generators.register [ new msvc-linking-generator msvc.link :
+ OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : EXE : <toolset>msvc ] ;
+ generators.register [ new msvc-linking-generator msvc.link.dll :
+ OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : SHARED_LIB IMPORT_LIB :
+ <toolset>msvc ] ;
+
+ generators.register-archiver msvc.archive : OBJ : STATIC_LIB : <toolset>msvc ;
+ generators.register-c-compiler msvc.compile.c++ : CPP : OBJ : <toolset>msvc ;
+ generators.register-c-compiler msvc.compile.c : C : OBJ : <toolset>msvc ;
+ generators.register-c-compiler msvc.compile.c++.preprocess : CPP : PREPROCESSED_CPP : <toolset>msvc ;
+ generators.register-c-compiler msvc.compile.c.preprocess : C : PREPROCESSED_C : <toolset>msvc ;
+
+ # Using 'register-c-compiler' adds the build directory to INCLUDES.
+ generators.register-c-compiler msvc.compile.rc : RC : OBJ(%_res) : <toolset>msvc ;
+ generators.override msvc.compile.rc : rc.compile.resource ;
+ generators.register-standard msvc.compile.asm : ASM : OBJ : <toolset>msvc ;
+
+ generators.register-c-compiler msvc.compile.idl : IDL : MSTYPELIB H C(%_i) C(%_proxy) C(%_dlldata) : <toolset>msvc ;
+ generators.override msvc.compile.idl : midl.compile.idl ;
+
+ generators.register-standard msvc.compile.mc : MC : H RC : <toolset>msvc ;
+ generators.override msvc.compile.mc : mc.compile ;
+
+ # Note: the 'H' source type will catch both '.h' and '.hpp' headers as
+ # the latter have their HPP type derived from H. The type of compilation
+ # is determined entirely by the destination type.
+ generators.register [ new msvc-pch-generator msvc.compile.c.pch : H : C_PCH OBJ : <pch>on <toolset>msvc ] ;
+ generators.register [ new msvc-pch-generator msvc.compile.c++.pch : H : CPP_PCH OBJ : <pch>on <toolset>msvc ] ;
+
+ generators.override msvc.compile.c.pch : pch.default-c-pch-generator ;
+ generators.override msvc.compile.c++.pch : pch.default-cpp-pch-generator ;
+ }
+
+ toolset.flags msvc.compile PCH_FILE <pch>on : <pch-file> ;
+ toolset.flags msvc.compile PCH_SOURCE <pch>on : <pch-source> ;
+ toolset.flags msvc.compile PCH_HEADER <pch>on : <pch-header> ;
+
+ #
+ # Declare flags for compilation.
+ #
+
+ toolset.flags msvc.compile CFLAGS <optimization>speed : /O2 ;
+ toolset.flags msvc.compile CFLAGS <optimization>space : /O1 ;
+
+ toolset.flags msvc.compile CFLAGS $(.cpu-arch-ia64)/<instruction-set>$(.cpu-type-itanium) : /G1 ;
+ toolset.flags msvc.compile CFLAGS $(.cpu-arch-ia64)/<instruction-set>$(.cpu-type-itanium2) : /G2 ;
+
+ toolset.flags msvc.compile CFLAGS <debug-symbols>on/<debug-store>object : /Z7 ;
+ toolset.flags msvc.compile CFLAGS <debug-symbols>on/<debug-store>database : /Zi ;
+ toolset.flags msvc.compile CFLAGS <optimization>off : /Od ;
+ toolset.flags msvc.compile CFLAGS <inlining>off : /Ob0 ;
+ toolset.flags msvc.compile CFLAGS <inlining>on : /Ob1 ;
+ toolset.flags msvc.compile CFLAGS <inlining>full : /Ob2 ;
+
+ toolset.flags msvc.compile CFLAGS <warnings>on : /W3 ;
+ toolset.flags msvc.compile CFLAGS <warnings>off : /W0 ;
+ toolset.flags msvc.compile CFLAGS <warnings>all : /W4 ;
+ toolset.flags msvc.compile CFLAGS <warnings-as-errors>on : /WX ;
+
+ toolset.flags msvc.compile C++FLAGS <exception-handling>on/<asynch-exceptions>off/<extern-c-nothrow>off : /EHs ;
+ toolset.flags msvc.compile C++FLAGS <exception-handling>on/<asynch-exceptions>off/<extern-c-nothrow>on : /EHsc ;
+ toolset.flags msvc.compile C++FLAGS <exception-handling>on/<asynch-exceptions>on/<extern-c-nothrow>off : /EHa ;
+ toolset.flags msvc.compile C++FLAGS <exception-handling>on/<asynch-exceptions>on/<extern-c-nothrow>on : /EHac ;
+
+ # By default 8.0 enables rtti support while prior versions disabled it. We
+ # simply enable or disable it explicitly so we do not have to depend on this
+ # default behaviour.
+ toolset.flags msvc.compile CFLAGS <rtti>on : /GR ;
+ toolset.flags msvc.compile CFLAGS <rtti>off : /GR- ;
+ toolset.flags msvc.compile CFLAGS <runtime-debugging>off/<runtime-link>shared : /MD ;
+ toolset.flags msvc.compile CFLAGS <runtime-debugging>on/<runtime-link>shared : /MDd ;
+
+ toolset.flags msvc.compile CFLAGS <runtime-debugging>off/<runtime-link>static/<threading>multi : /MT ;
+ toolset.flags msvc.compile CFLAGS <runtime-debugging>on/<runtime-link>static/<threading>multi : /MTd ;
+
+ toolset.flags msvc.compile OPTIONS <cflags> : ;
+ toolset.flags msvc.compile.c++ OPTIONS <cxxflags> : ;
+
+ toolset.flags msvc.compile PDB_CFLAG <debug-symbols>on/<debug-store>database : /Fd ;
+
+ toolset.flags msvc.compile DEFINES <define> ;
+ toolset.flags msvc.compile UNDEFS <undef> ;
+ toolset.flags msvc.compile INCLUDES <include> ;
+
+ # Declare flags for the assembler.
+ toolset.flags msvc.compile.asm USER_ASMFLAGS <asmflags> ;
+
+ toolset.flags msvc.compile.asm ASMFLAGS <debug-symbols>on : "/Zi /Zd" ;
+
+ toolset.flags msvc.compile.asm ASMFLAGS <warnings>on : /W3 ;
+ toolset.flags msvc.compile.asm ASMFLAGS <warnings>off : /W0 ;
+ toolset.flags msvc.compile.asm ASMFLAGS <warnings>all : /W4 ;
+ toolset.flags msvc.compile.asm ASMFLAGS <warnings-as-errors>on : /WX ;
+
+ toolset.flags msvc.compile.asm DEFINES <define> ;
+
+ # Declare flags for linking.
+ {
+ toolset.flags msvc.link PDB_LINKFLAG <debug-symbols>on/<debug-store>database : /PDB: ; # not used yet
+ toolset.flags msvc.link LINKFLAGS <debug-symbols>on : /DEBUG ;
+ toolset.flags msvc.link DEF_FILE <def-file> ;
+
+ # The linker disables the default optimizations when using /DEBUG so we
+ # have to enable them manually for release builds with debug symbols.
+ toolset.flags msvc LINKFLAGS <debug-symbols>on/<runtime-debugging>off : /OPT:REF,ICF ;
+
+ toolset.flags msvc LINKFLAGS <user-interface>console : /subsystem:console ;
+ toolset.flags msvc LINKFLAGS <user-interface>gui : /subsystem:windows ;
+ toolset.flags msvc LINKFLAGS <user-interface>wince : /subsystem:windowsce ;
+ toolset.flags msvc LINKFLAGS <user-interface>native : /subsystem:native ;
+ toolset.flags msvc LINKFLAGS <user-interface>auto : /subsystem:posix ;
+
+ toolset.flags msvc.link OPTIONS <linkflags> ;
+ toolset.flags msvc.link LINKPATH <library-path> ;
+
+ toolset.flags msvc.link FINDLIBS_ST <find-static-library> ;
+ toolset.flags msvc.link FINDLIBS_SA <find-shared-library> ;
+ toolset.flags msvc.link LIBRARY_OPTION <toolset>msvc : "" : unchecked ;
+ toolset.flags msvc.link LIBRARIES_MENTIONED_BY_FILE : <library-file> ;
+ }
+
+ toolset.flags msvc.archive AROPTIONS <archiveflags> ;
+}
+
+
+# Locates the requested setup script under the given folder and returns its full
+# path or nothing in case the script can not be found. In case multiple scripts
+# are found only the first one is returned.
+#
+# TODO: There used to exist a code comment for the msvc.init rule stating that
+# we do not correctly detect the location of the vcvars32.bat setup script for
+# the free VC7.1 tools in case user explicitly provides a path. This should be
+# tested or simply remove this whole comment in case this toolset version is no
+# longer important.
+#
+local rule locate-default-setup ( command : parent : setup-name )
+{
+ local result = [ GLOB $(command) $(parent) : $(setup-name) ] ;
+ if $(result[1])
+ {
+ return $(result[1]) ;
+ }
+}
+
+
+# Validates given path, registers found configuration and prints debug
+# information about it.
+#
+local rule register-configuration ( version : path ? )
+{
+ if $(path)
+ {
+ local command = [ GLOB $(path) : cl.exe ] ;
+
+ if $(command)
+ {
+ if $(.debug-configuration)
+ {
+ ECHO notice: [msvc-cfg] msvc-$(version) detected, command:
+ '$(command)' ;
+ }
+
+ $(.versions).register $(version) ;
+ $(.versions).set $(version) : options : <command>$(command) ;
+ }
+ }
+}
+
+
+################################################################################
+#
+# Startup code executed when loading this module.
+#
+################################################################################
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+# Miscellaneous constants.
+.RM = [ common.rm-command ] ;
+.nl = "
+" ;
+.ProgramFiles = [ path.make [ common.get-program-files-dir ] ] ;
+.escaped-double-quote = "\"" ;
+.TOUCH_FILE = [ common.file-touch-command ] ;
+
+# List of all registered configurations.
+.versions = [ new configurations ] ;
+
+# Supported CPU architectures.
+.cpu-arch-i386 =
+ <architecture>/<address-model>
+ <architecture>/<address-model>32
+ <architecture>x86/<address-model>
+ <architecture>x86/<address-model>32 ;
+
+.cpu-arch-amd64 =
+ <architecture>/<address-model>64
+ <architecture>x86/<address-model>64 ;
+
+.cpu-arch-ia64 =
+ <architecture>ia64/<address-model>
+ <architecture>ia64/<address-model>64 ;
+
+.cpu-arch-arm =
+ <architecture>arm/<address-model>32 ;
+
+
+# Supported CPU types (only Itanium optimization options are supported from
+# VC++ 2005 on). See
+# http://msdn2.microsoft.com/en-us/library/h66s5s0e(vs.90).aspx for more
+# detailed information.
+.cpu-type-g5 = i586 pentium pentium-mmx ;
+.cpu-type-g6 = i686 pentiumpro pentium2 pentium3 pentium3m pentium-m k6
+ k6-2 k6-3 winchip-c6 winchip2 c3 c3-2 ;
+.cpu-type-em64t = prescott nocona core2 corei7 corei7-avx core-avx-i
+ conroe conroe-xe conroe-l allendale merom
+ merom-xe kentsfield kentsfield-xe penryn wolfdale
+ yorksfield nehalem sandy-bridge ivy-bridge haswell ;
+.cpu-type-amd64 = k8 opteron athlon64 athlon-fx k8-sse3 opteron-sse3
+ athlon64-sse3 amdfam10 barcelona bdver1 bdver2 bdver3 btver1 btver2 ;
+.cpu-type-g7 = pentium4 pentium4m athlon athlon-tbird athlon-4 athlon-xp
+ athlon-mp $(.cpu-type-em64t) $(.cpu-type-amd64) ;
+.cpu-type-itanium = itanium itanium1 merced ;
+.cpu-type-itanium2 = itanium2 mckinley ;
+
+
+# Known toolset versions, in order of preference.
+.known-versions = 12.0 11.0 10.0 10.0express 9.0 9.0express 8.0 8.0express 7.1
+ 7.1toolkit 7.0 6.0 ;
+
+# Version aliases.
+.version-alias-6 = 6.0 ;
+.version-alias-6.5 = 6.0 ;
+.version-alias-7 = 7.0 ;
+.version-alias-8 = 8.0 ;
+.version-alias-9 = 9.0 ;
+.version-alias-10 = 10.0 ;
+.version-alias-11 = 11.0 ;
+.version-alias-12 = 12.0 ;
+
+# Names of registry keys containing the Visual C++ installation path (relative
+# to "HKEY_LOCAL_MACHINE\SOFTWARE\\Microsoft").
+.version-6.0-reg = "VisualStudio\\6.0\\Setup\\Microsoft Visual C++" ;
+.version-7.0-reg = "VisualStudio\\7.0\\Setup\\VC" ;
+.version-7.1-reg = "VisualStudio\\7.1\\Setup\\VC" ;
+.version-8.0-reg = "VisualStudio\\8.0\\Setup\\VC" ;
+.version-8.0express-reg = "VCExpress\\8.0\\Setup\\VC" ;
+.version-9.0-reg = "VisualStudio\\9.0\\Setup\\VC" ;
+.version-9.0express-reg = "VCExpress\\9.0\\Setup\\VC" ;
+.version-10.0-reg = "VisualStudio\\10.0\\Setup\\VC" ;
+.version-10.0express-reg = "VCExpress\\10.0\\Setup\\VC" ;
+.version-11.0-reg = "VisualStudio\\11.0\\Setup\\VC" ;
+.version-12.0-reg = "VisualStudio\\12.0\\Setup\\VC" ;
+
+# Visual C++ Toolkit 2003 does not store its installation path in the registry.
+# The environment variable 'VCToolkitInstallDir' and the default installation
+# path will be checked instead.
+.version-7.1toolkit-path = "Microsoft Visual C++ Toolkit 2003" "bin" ;
+.version-7.1toolkit-env = VCToolkitInstallDir ;
+
+# Path to the folder containing "cl.exe" relative to the value of the
+# corresponding environment variable.
+.version-7.1toolkit-envpath = "bin" ;
+
+
+# Auto-detect all the available msvc installations on the system.
+auto-detect-toolset-versions ;
+
+
+# And finally trigger the actual Boost Build toolset registration.
+register-toolset ;
diff --git a/src/kenlm/jam-files/boost-build/tools/notfile.jam b/src/kenlm/jam-files/boost-build/tools/notfile.jam
new file mode 100644
index 0000000..7d0985b
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/notfile.jam
@@ -0,0 +1,65 @@
+# Copyright (c) 2005 Vladimir Prus.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import "class" : new ;
+import generators ;
+import project ;
+import targets ;
+import toolset ;
+import type ;
+
+
+type.register NOTFILE_MAIN ;
+
+
+class notfile-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8)
+ : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) :
+ $(17) : $(18) : $(19) ;
+ }
+
+ rule run ( project name ? : property-set : sources * : multiple ? )
+ {
+ local action ;
+ local action-name = [ $(property-set).get <action> ] ;
+ local m = [ MATCH ^@(.*) : $(action-name) ] ;
+ if $(m)
+ {
+ action = [ new action $(sources) : $(m[1]) : $(property-set) ] ;
+ }
+ else
+ {
+ action = [ new action $(sources) : notfile.run : $(property-set) ] ;
+ }
+ local t = [ new notfile-target $(name) : $(project) : $(action) ] ;
+ return [ virtual-target.register $(t) ] ;
+ }
+}
+
+
+generators.register [ new notfile-generator notfile.main : : NOTFILE_MAIN ] ;
+
+
+toolset.flags notfile.run ACTION : <action> ;
+
+
+actions run
+{
+ $(ACTION)
+}
+
+
+rule notfile ( target-name : action + : sources * : requirements * :
+ default-build * )
+{
+ targets.create-typed-target NOTFILE_MAIN : [ project.current ] :
+ $(target-name) : $(sources) : $(requirements) <action>$(action) :
+ $(default-build) ;
+}
+
+IMPORT $(__name__) : notfile : : notfile ;
diff --git a/src/kenlm/jam-files/boost-build/tools/package.jam b/src/kenlm/jam-files/boost-build/tools/package.jam
new file mode 100644
index 0000000..198c223
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/package.jam
@@ -0,0 +1,165 @@
+# Copyright (c) 2005 Vladimir Prus.
+# Copyright 2006 Rene Rivera.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Provides mechanism for installing whole packages into a specific directory
+# structure. This is opposed to the 'install' rule, that installs a number of
+# targets to a single directory, and does not care about directory structure at
+# all.
+
+# Example usage:
+#
+# package.install boost : <properties>
+# : <binaries>
+# : <libraries>
+# : <headers>
+# ;
+#
+# This will install binaries, libraries and headers to the 'proper' location,
+# given by command line options --prefix, --exec-prefix, --bindir, --libdir and
+# --includedir.
+#
+# The rule is just a convenient wrapper, avoiding the need to define several
+# 'install' targets.
+#
+# The only install-related feature is <install-source-root>. It will apply to
+# headers only and if present, paths of headers relatively to source root will
+# be retained after installing. If it is not specified, then "." is assumed, so
+# relative paths in headers are always preserved.
+
+import "class" : new ;
+import option ;
+import project ;
+import feature ;
+import property ;
+import stage ;
+import targets ;
+import modules ;
+
+feature.feature install-default-prefix : : free incidental ;
+
+rule install ( name package-name ? : requirements * : binaries * : libraries * : headers * )
+{
+ package-name ?= $(name) ;
+ if [ MATCH --prefix=(.*) : [ modules.peek : ARGV ] ]
+ {
+ # If --prefix is explicitly specified on the command line,
+ # then we need wipe away any settings of libdir/includir that
+ # is specified via options in config files.
+ option.set bindir : ;
+ option.set libdir : ;
+ option.set includedir : ;
+ }
+
+ # If <install-source-root> is not specified, all headers are installed to
+ # prefix/include, no matter what their relative path is. Sometimes that is
+ # what is needed.
+ local install-source-root = [ property.select <install-source-root> :
+ $(requirements) ] ;
+ install-source-root = $(install-source-root:G=) ;
+ requirements = [ property.change $(requirements) : <install-source-root> ] ;
+
+ local install-header-subdir = [ property.select <install-header-subdir> :
+ $(requirements) ] ;
+ install-header-subdir = /$(install-header-subdir:G=) ;
+ install-header-subdir ?= "" ;
+ requirements = [ property.change $(requirements) : <install-header-subdir> ]
+ ;
+
+ # First, figure out all locations. Use the default if no prefix option
+ # given.
+ local prefix = [ get-prefix $(name) : $(requirements) ] ;
+
+ # Architecture dependent files.
+ local exec-locate = [ option.get exec-prefix : $(prefix) ] ;
+
+ # Binaries.
+ local bin-locate = [ option.get bindir : $(prefix)/bin ] ;
+
+ # Object code libraries.
+ local lib-locate = [ option.get libdir : $(prefix)/lib ] ;
+
+ # Source header files.
+ local include-locate = [ option.get includedir : $(prefix)/include ] ;
+
+ stage.install $(name)-bin : $(binaries) : $(requirements)
+ <location>$(bin-locate) ;
+ alias $(name)-lib : $(name)-lib-shared $(name)-lib-static ;
+
+ # Since the install location of shared libraries differs on universe
+ # and cygwin, use target alternatives to make different targets.
+ # We should have used indirection conditioanl requirements, but it's
+ # awkward to pass bin-locate and lib-locate from there to another rule.
+ alias $(name)-lib-shared : $(name)-lib-shared-universe ;
+ alias $(name)-lib-shared : $(name)-lib-shared-cygwin : <target-os>cygwin ;
+
+ # For shared libraries, we install both explicitly specified one and the
+ # shared libraries that the installed executables depend on.
+ stage.install $(name)-lib-shared-universe : $(binaries) $(libraries) : $(requirements)
+ <location>$(lib-locate) <install-dependencies>on <install-type>SHARED_LIB ;
+ stage.install $(name)-lib-shared-cygwin : $(binaries) $(libraries) : $(requirements)
+ <location>$(bin-locate) <install-dependencies>on <install-type>SHARED_LIB ;
+
+ # For static libraries, we do not care about executable dependencies, since
+ # static libraries are already incorporated into them.
+ stage.install $(name)-lib-static : $(libraries) : $(requirements)
+ <location>$(lib-locate) <install-dependencies>on <install-type>STATIC_LIB ;
+ stage.install $(name)-headers : $(headers) : $(requirements)
+ <location>$(include-locate)$(install-header-subdir)
+ <install-source-root>$(install-source-root) ;
+ alias $(name) : $(name)-bin $(name)-lib $(name)-headers ;
+
+ local c = [ project.current ] ;
+ local project-module = [ $(c).project-module ] ;
+ module $(project-module)
+ {
+ explicit $(1)-bin $(1)-lib $(1)-headers $(1) $(1)-lib-shared $(1)-lib-static
+ $(1)-lib-shared-universe $(1)-lib-shared-cygwin ;
+ }
+}
+
+rule install-data ( target-name : package-name : data * : requirements * )
+{
+ package-name ?= target-name ;
+ if [ MATCH --prefix=(.*) : [ modules.peek : ARGV ] ]
+ {
+ # If --prefix is explicitly specified on the command line,
+ # then we need wipe away any settings of datarootdir
+ option.set datarootdir : ;
+ }
+
+ local prefix = [ get-prefix $(package-name) : $(requirements) ] ;
+ local datadir = [ option.get datarootdir : $(prefix)/share ] ;
+
+ stage.install $(target-name)
+ : $(data)
+ : $(requirements) <location>$(datadir)/$(package-name)
+ ;
+
+ local c = [ project.current ] ;
+ local project-module = [ $(c).project-module ] ;
+ module $(project-module)
+ {
+ explicit $(1) ;
+ }
+}
+
+local rule get-prefix ( package-name : requirements * )
+{
+ local prefix = [ option.get prefix : [ property.select
+ <install-default-prefix> : $(requirements) ] ] ;
+ prefix = $(prefix:G=) ;
+ requirements = [ property.change $(requirements) : <install-default-prefix>
+ ] ;
+ # Or some likely defaults if neither is given.
+ if ! $(prefix)
+ {
+ if [ modules.peek : NT ] { prefix = C:\\$(package-name) ; }
+ else if [ modules.peek : UNIX ] { prefix = /usr/local ; }
+ }
+ return $(prefix) ;
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/pathscale.jam b/src/kenlm/jam-files/boost-build/tools/pathscale.jam
new file mode 100644
index 0000000..94abcf1
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/pathscale.jam
@@ -0,0 +1,178 @@
+# Copyright 2006 Noel Belcourt
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import property ;
+import generators ;
+import toolset : flags ;
+import feature ;
+import type ;
+import os ;
+import common ;
+import fortran ;
+
+feature.extend toolset : pathscale ;
+toolset.inherit pathscale : unix ;
+generators.override pathscale.prebuilt : builtin.prebuilt ;
+generators.override pathscale.searched-lib-generator : searched-lib-generator ;
+
+# Documentation and toolchain description located
+# http://www.pathscale.com/docs.html
+
+rule init ( version ? : command * : options * )
+{
+ command = [ common.get-invocation-command pathscale : pathCC : $(command)
+ : /opt/ekopath/bin ] ;
+
+ # Determine the version
+ local command-string = $(command:J=" ") ;
+ if $(command)
+ {
+ version ?= [ MATCH "^([0-9.]+)"
+ : [ SHELL "$(command-string) -dumpversion" ] ] ;
+ }
+
+ local condition = [ common.check-init-parameters pathscale
+ : version $(version) ] ;
+
+ common.handle-options pathscale : $(condition) : $(command) : $(options) ;
+
+ toolset.flags pathscale.compile.fortran90 OPTIONS $(condition) :
+ [ feature.get-values <fflags> : $(options) ] : unchecked ;
+
+ command_c = $(command_c[1--2]) $(command[-1]:B=pathcc) ;
+
+ toolset.flags pathscale CONFIG_C_COMMAND $(condition) : $(command_c) ;
+
+ # fortran support
+ local f-command = [ common.get-invocation-command pathscale : pathf90 : $(command) ] ;
+ local command_f = $(command_f[1--2]) $(f-command[-1]:B=pathf90) ;
+ local command_f90 = $(command_f[1--2]) $(f-command[-1]:B=pathf90) ;
+
+ toolset.flags pathscale CONFIG_F_COMMAND $(condition) : $(command_f) ;
+ toolset.flags pathscale CONFIG_F90_COMMAND $(condition) : $(command_f90) ;
+
+ # always link lib rt to resolve clock_gettime()
+ flags pathscale.link FINDLIBS-SA : rt : unchecked ;
+
+ switch [ os.name ]
+ {
+ case SOLARIS :
+ toolset.flags pathscale.link RPATH_OPTION $(condition) : -Wl,-R, -Wl, : unchecked ;
+
+ case * : # GNU
+ toolset.flags pathscale.link RPATH_OPTION $(condition) : -Wl,-rpath= : unchecked ;
+ }
+}
+
+# Declare generators
+generators.register-c-compiler pathscale.compile.c : C : OBJ : <toolset>pathscale ;
+generators.register-c-compiler pathscale.compile.c++ : CPP : OBJ : <toolset>pathscale ;
+generators.register-fortran-compiler pathscale.compile.fortran : FORTRAN : OBJ : <toolset>pathscale ;
+generators.register-fortran90-compiler pathscale.compile.fortran90 : FORTRAN90 : OBJ : <toolset>pathscale ;
+
+# Declare flags and actions for compilation
+flags pathscale.compile OPTIONS <optimization>off : -O0 ;
+flags pathscale.compile OPTIONS <optimization>speed : -O3 ;
+flags pathscale.compile OPTIONS <optimization>space : -Os ;
+
+flags pathscale.compile OPTIONS <inlining>off : -noinline ;
+flags pathscale.compile OPTIONS <inlining>on : -inline ;
+flags pathscale.compile OPTIONS <inlining>full : -inline ;
+
+flags pathscale.compile OPTIONS <warnings>off : -woffall ;
+flags pathscale.compile OPTIONS <warnings>on : -Wall ;
+flags pathscale.compile OPTIONS <warnings>all : -Wall -pedantic ;
+flags pathscale.compile OPTIONS <warnings-as-errors>on : -Werror ;
+
+flags pathscale.compile OPTIONS <debug-symbols>on : -ggdb ;
+flags pathscale.compile OPTIONS <profiling>on : -pg ;
+flags pathscale.compile OPTIONS <link>shared : -fPIC ;
+flags pathscale.compile OPTIONS <address-model>32 : -m32 ;
+flags pathscale.compile OPTIONS <address-model>64 : -m64 ;
+
+flags pathscale.compile USER_OPTIONS <cflags> ;
+flags pathscale.compile.c++ USER_OPTIONS <cxxflags> ;
+flags pathscale.compile DEFINES <define> ;
+flags pathscale.compile INCLUDES <include> ;
+
+flags pathscale.compile.fortran USER_OPTIONS <fflags> ;
+flags pathscale.compile.fortran90 USER_OPTIONS <fflags> ;
+
+actions compile.c
+{
+ "$(CONFIG_C_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.fortran
+{
+ "$(CONFIG_F_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+rule compile.fortran90 ( targets * : sources * : properties * )
+{
+ # the space rule inserts spaces between targets and it's necessary
+ SPACE on $(targets) = " " ;
+ # Serialize execution of the compile.fortran90 action
+ # F90 source must be compiled in a particular order so we
+ # serialize the build as a parallel F90 compile might fail
+ JAM_SEMAPHORE on $(targets) = <s>pathscale-f90-semaphore ;
+}
+
+actions compile.fortran90
+{
+ "$(CONFIG_F90_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -module $(<[1]:D) -c -o "$(<)" "$(>)"
+}
+
+# Declare flags and actions for linking
+flags pathscale.link OPTIONS <debug-symbols>on : -ggdb -rdynamic ;
+# Strip the binary when no debugging is needed
+flags pathscale.link OPTIONS <debug-symbols>off : -g0 ;
+flags pathscale.link OPTIONS <profiling>on : -pg ;
+flags pathscale.link USER_OPTIONS <linkflags> ;
+flags pathscale.link LINKPATH <library-path> ;
+flags pathscale.link FINDLIBS-ST <find-static-library> ;
+flags pathscale.link FINDLIBS-SA <find-shared-library> ;
+flags pathscale.link FINDLIBS-SA <threading>multi : pthread ;
+flags pathscale.link LIBRARIES <library-file> ;
+flags pathscale.link LINK-RUNTIME <runtime-link>static : static ;
+flags pathscale.link LINK-RUNTIME <runtime-link>shared : dynamic ;
+flags pathscale.link RPATH <dll-path> ;
+# On gcc, there are separate options for dll path at runtime and
+# link time. On Solaris, there's only one: -R, so we have to use
+# it, even though it's bad idea.
+flags pathscale.link RPATH <xdll-path> ;
+
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -L"$(LINKPATH)" $(RPATH_OPTION:E=-Wl,-rpath=)"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST)
+}
+
+# Slight mods for dlls
+rule link.dll ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -L"$(LINKPATH)" $(RPATH_OPTION:E=-Wl,-rpath=)"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST)
+}
+
+# Declare action for creating static libraries
+# "$(CONFIG_COMMAND)" -ar -o "$(<)" "$(>)"
+actions piecemeal archive
+{
+ ar $(ARFLAGS) ru "$(<)" "$(>)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/pch.jam b/src/kenlm/jam-files/boost-build/tools/pch.jam
new file mode 100644
index 0000000..0c6e98f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/pch.jam
@@ -0,0 +1,95 @@
+# Copyright (c) 2005 Reece H. Dunn.
+# Copyright 2006 Ilya Sokolov
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+##### Using Precompiled Headers (Quick Guide) #####
+#
+# Make precompiled mypch.hpp:
+#
+# import pch ;
+#
+# cpp-pch mypch
+# : # sources
+# mypch.hpp
+# : # requiremnts
+# <toolset>msvc:<source>mypch.cpp
+# ;
+#
+# Add cpp-pch to sources:
+#
+# exe hello
+# : main.cpp hello.cpp mypch
+# ;
+
+import "class" : new ;
+import type ;
+import feature ;
+import generators ;
+
+type.register PCH : pch ;
+
+type.register C_PCH : : PCH ;
+type.register CPP_PCH : : PCH ;
+
+# Control precompiled header (PCH) generation.
+feature.feature pch :
+ on
+ off
+ : propagated ;
+
+
+feature.feature pch-header : : free dependency ;
+feature.feature pch-file : : free dependency ;
+
+# Base PCH generator. The 'run' method has the logic to prevent this generator
+# from being run unless it's being used for a top-level PCH target.
+class pch-generator : generator
+{
+ import property-set ;
+
+ rule action-class ( )
+ {
+ return compile-action ;
+ }
+
+ rule run ( project name ? : property-set : sources + )
+ {
+ if ! $(name)
+ {
+ # Unless this generator is invoked as the top-most generator for a
+ # main target, fail. This allows using 'H' type as input type for
+ # this generator, while preventing Boost.Build to try this generator
+ # when not explicitly asked for.
+ #
+ # One bad example is msvc, where pch generator produces both PCH
+ # target and OBJ target, so if there's any header generated (like by
+ # bison, or by msidl), we'd try to use pch generator to get OBJ from
+ # that H, which is completely wrong. By restricting this generator
+ # only to pch main target, such problem is solved.
+ }
+ else
+ {
+ local r = [ run-pch $(project) $(name)
+ : [ $(property-set).add-raw <define>BOOST_BUILD_PCH_ENABLED ]
+ : $(sources) ] ;
+ return [ generators.add-usage-requirements $(r)
+ : <define>BOOST_BUILD_PCH_ENABLED ] ;
+ }
+ }
+
+ # This rule must be overridden by the derived classes.
+ rule run-pch ( project name ? : property-set : sources + )
+ {
+ }
+}
+
+
+# NOTE: requirements are empty, default pch generator can be applied when
+# pch=off.
+generators.register
+ [ new dummy-generator pch.default-c-pch-generator : : C_PCH ] ;
+generators.register
+ [ new dummy-generator pch.default-cpp-pch-generator : : CPP_PCH ] ;
diff --git a/src/kenlm/jam-files/boost-build/tools/pgi.jam b/src/kenlm/jam-files/boost-build/tools/pgi.jam
new file mode 100644
index 0000000..90e827d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/pgi.jam
@@ -0,0 +1,147 @@
+# Copyright Noel Belcourt 2007.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import property ;
+import generators ;
+import os ;
+import toolset : flags ;
+import feature ;
+import fortran ;
+import type ;
+import common ;
+import gcc ;
+
+feature.extend toolset : pgi ;
+toolset.inherit pgi : unix ;
+generators.override pgi.prebuilt : builtin.lib-generator ;
+generators.override pgi.searched-lib-generator : searched-lib-generator ;
+
+# Documentation and toolchain description located
+# http://www.pgroup.com/resources/docs.htm
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters pgi : version $(version) ] ;
+
+ local l_command = [ common.get-invocation-command pgi : pgCC : $(command) ] ;
+
+ common.handle-options pgi : $(condition) : $(l_command) : $(options) ;
+
+ command_c = $(command_c[1--2]) $(l_command[-1]:B=pgcc) ;
+
+ toolset.flags pgi CONFIG_C_COMMAND $(condition) : $(command_c) ;
+
+ flags pgi.compile DEFINES $(condition) :
+ [ feature.get-values <define> : $(options) ] : unchecked ;
+
+ # IOV_MAX support
+ flags pgi.compile DEFINES $(condition) : __need_IOV_MAX : unchecked ;
+
+ # set link flags
+ flags pgi.link FINDLIBS-ST : [
+ feature.get-values <find-static-library> : $(options) ] : unchecked ;
+
+ # always link lib rt to resolve clock_gettime()
+ flags pgi.link FINDLIBS-SA : rt [
+ feature.get-values <find-shared-library> : $(options) ] : unchecked ;
+
+ gcc.init-link-flags pgi gnu $(condition) ;
+}
+
+# Declare generators
+generators.register-c-compiler pgi.compile.c : C : OBJ : <toolset>pgi ;
+generators.register-c-compiler pgi.compile.c++ : CPP : OBJ : <toolset>pgi ;
+generators.register-fortran-compiler pgi.compile.fortran : FORTRAN : OBJ : <toolset>pgi ;
+
+# Declare flags and actions for compilation
+flags pgi.compile OPTIONS : -Kieee ;
+flags pgi.compile OPTIONS <link>shared : -fpic -fPIC ;
+flags pgi.compile OPTIONS <debug-symbols>on : -gopt ;
+flags pgi.compile OPTIONS <profiling>on : -xprofile=tcov ;
+flags pgi.compile OPTIONS <optimization>speed : -fast -Mx,8,0x10000000 ;
+flags pgi.compile OPTIONS <optimization>space : -xO2 -xspace ;
+# flags pgi.compile OPTIONS <threading>multi : -mt ;
+
+flags pgi.compile OPTIONS <warnings>off : -Minform=severe ;
+flags pgi.compile OPTIONS <warnings>on : -Minform=warn ;
+
+flags pgi.compile.c++ OPTIONS <inlining>off : -INLINE:none ;
+
+flags pgi.compile OPTIONS <cflags> ;
+flags pgi.compile.c++ OPTIONS <cxxflags> ;
+flags pgi.compile DEFINES <define> ;
+flags pgi.compile INCLUDES <include> ;
+
+flags pgi.compile.fortran OPTIONS <fflags> ;
+
+actions compile.c
+{
+ "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.fortran
+{
+ "$(CONFIG_F_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+# Declare flags and actions for linking
+flags pgi.link OPTIONS <debug-symbols>on : -gopt ;
+# Strip the binary when no debugging is needed
+flags pgi.link OPTIONS <debug-symbols>off : -s ;
+flags pgi.link OPTIONS <profiling>on : -xprofile=tcov ;
+flags pgi.link OPTIONS <linkflags> ;
+flags pgi.link OPTIONS <link>shared : -fpic -fPIC ;
+flags pgi.link LINKPATH <library-path> ;
+flags pgi.link FINDLIBS-ST <find-static-library> ;
+flags pgi.link FINDLIBS-SA <find-shared-library> ;
+flags pgi.link FINDLIBS-SA <threading>multi : pthread rt ;
+flags pgi.link LIBRARIES <library-file> ;
+flags pgi.link LINK-RUNTIME <runtime-link>static : static ;
+flags pgi.link LINK-RUNTIME <runtime-link>shared : dynamic ;
+flags pgi.link RPATH <dll-path> ;
+
+# On gcc, there are separate options for dll path at runtime and
+# link time. On Solaris, there's only one: -R, so we have to use
+# it, even though it's bad idea.
+flags pgi.link RPATH <xdll-path> ;
+
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+# reddish can only link statically and, somehow, the presence of -Bdynamic on the link line
+# marks the executable as a dynamically linked exec even though no dynamic libraries are supplied.
+# Yod on redstorm refuses to load an executable that is dynamically linked.
+# removing the dynamic link options should get us where we need to be on redstorm.
+# "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME)
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bstatic -l$(FINDLIBS-ST) -Bdynamic -l$(FINDLIBS-SA) -B$(LINK-RUNTIME)
+}
+
+# Slight mods for dlls
+rule link.dll ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+# "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" -h$(<[1]:D=) -G "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME)
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -shared -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" -Wl,-h -Wl,$(<[1]:D=) "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME)
+}
+
+actions updated together piecemeal pgi.archive
+{
+ ar -rc$(ARFLAGS:E=) "$(<)" "$(>)"
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/png.jam b/src/kenlm/jam-files/boost-build/tools/png.jam
new file mode 100644
index 0000000..0544fe9
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/png.jam
@@ -0,0 +1,226 @@
+# Copyright (c) 2010 Vladimir Prus.
+# Copyright (c) 2013 Steven Watanabe
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Supports the libpng library
+#
+# After 'using libpng', the following targets are available:
+#
+# /libpng//libpng -- The libpng library
+
+import project ;
+import ac ;
+import errors ;
+import "class" : new ;
+import targets ;
+import path ;
+import modules ;
+import errors ;
+import indirect ;
+import property ;
+import property-set ;
+
+header = png.h ;
+names = libpng ;
+
+sources = png.c pngerror.c pngget.c pngmem.c pngpread.c pngread.c pngrio.c pngrtran.c pngrutil.c
+ pngset.c pngtrans.c pngwio.c pngwrite.c pngwtran.c pngwutil.c ;
+
+library-id = 0 ;
+
+if --debug-configuration in [ modules.peek : ARGV ]
+{
+ .debug = true ;
+}
+
+# Initializes the libpng library.
+#
+# libpng can be configured either to use pre-existing binaries
+# or to build the library from source.
+#
+# Options for configuring a prebuilt libpng::
+#
+# <search>
+# The directory containing the libpng binaries.
+# <name>
+# Overrides the default library name.
+# <include>
+# The directory containing the libpng headers.
+#
+# If none of these options is specified, then the environmental
+# variables LIBPNG_LIBRARY_PATH, LIBPNG_NAME, and LIBPNG_INCLUDE will
+# be used instead.
+#
+# Options for building libpng from source::
+#
+# <source>
+# The libpng source directory. Defaults to the environmental variable
+# LIBPNG_SOURCE.
+# <tag>
+# A rule which computes the actual name of the compiled
+# libraries based on the build properties. Ignored
+# when using precompiled binaries.
+# <build-name>
+# The base name to use for the compiled library. Ignored
+# when using precompiled binaries.
+#
+# Examples::
+#
+# # Find libpng in the default system location
+# using libpng ;
+# # Build libpng from source
+# using libpng : 1.5.4 : <source>/home/steven/libpng-1.5.4 ;
+# # Find libpng in /usr/local
+# using libpng : 1.5.4
+# : <include>/usr/local/include <search>/usr/local/lib ;
+# # Build libpng from source for msvc and find
+# # prebuilt binaries for gcc.
+# using libpng : 1.5.4 : <source>C:/Devel/src/libpng-1.5.4 : <toolset>msvc ;
+# using libpng : 1.5.4 : : <toolset>gcc ;
+#
+rule init (
+ version ?
+ # The libpng version (currently ignored)
+
+ : options *
+ # A list of the options to use
+
+ : requirements *
+ # The requirements for the libpng target
+
+ : is-default ?
+ # Default configurations are only used when libpng
+ # has not yet been configured.
+ )
+{
+ local caller = [ project.current ] ;
+
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+
+ project.initialize $(__name__) ;
+ .project = [ project.current ] ;
+ project libpng ;
+ }
+
+ local library-path = [ property.select <search> : $(options) ] ;
+ library-path = $(library-path:G=) ;
+ local include-path = [ property.select <include> : $(options) ] ;
+ include-path = $(include-path:G=) ;
+ local source-path = [ property.select <source> : $(options) ] ;
+ source-path = $(source-path:G=) ;
+ local library-name = [ property.select <name> : $(options) ] ;
+ library-name = $(library-name:G=) ;
+ local tag = [ property.select <tag> : $(options) ] ;
+ tag = $(tag:G=) ;
+ local build-name = [ property.select <build-name> : $(options) ] ;
+ build-name = $(build-name:G=) ;
+
+ condition = [ property-set.create $(requirements) ] ;
+ condition = [ property-set.create [ $(condition).base ] ] ;
+
+ local no-build-from-source ;
+ # Ignore environmental ZLIB_SOURCE if this initialization
+ # requested to search for a specific pre-built library.
+ if $(library-path) || $(include-path) || $(library-name)
+ {
+ if $(source-path) || $(tag) || $(build-name)
+ {
+ errors.user-error "incompatible options for libpng:"
+ [ property.select <search> <include> <name> : $(options) ] "and"
+ [ property.select <source> <tag> <build-name> : $(options) ] ;
+ }
+ else
+ {
+ no-build-from-source = true ;
+ }
+ }
+
+ source-path ?= [ modules.peek : ZLIB_SOURCE ] ;
+
+ if $(.configured.$(condition))
+ {
+ if $(is-default)
+ {
+ if $(.debug)
+ {
+ ECHO "notice: [libpng] libpng is already configured" ;
+ }
+ }
+ else
+ {
+ errors.user-error "libpng is already configured" ;
+ }
+ return ;
+ }
+ else if $(source-path) && ! $(no-build-from-source)
+ {
+ build-name ?= z ;
+ library-id = [ CALC $(library-id) + 1 ] ;
+ tag = [ MATCH ^@?(.*)$ : $(tag) ] ;
+ if $(tag) && ! [ MATCH ^([^%]*)%([^%]+)$ : $(tag) ]
+ {
+ tag = [ indirect.make $(tag) : [ $(caller).project-module ] ] ;
+ }
+ sources = [ path.glob $(source-path) : $(sources) ] ;
+ if $(.debug)
+ {
+ ECHO "notice: [libpng] Building libpng from source as $(build-name)" ;
+ if $(condition)
+ {
+ ECHO "notice: [libpng] Condition" [ $(condition).raw ] ;
+ }
+ if $(sources)
+ {
+ ECHO "notice: [libpng] found libpng source in $(source-path)" ;
+ }
+ else
+ {
+ ECHO "warning: [libpng] could not find libpng source in $(source-path)" ;
+ }
+ }
+ local target ;
+ if $(sources) {
+ target = [ targets.create-typed-target LIB : $(.project)
+ : $(build-name).$(library-id)
+ : $(sources)
+ : $(requirements)
+ <tag>@$(tag)
+ <include>$(source-path)
+ <toolset>msvc:<define>_CRT_SECURE_NO_DEPRECATE
+ <toolset>msvc:<define>_SCL_SECURE_NO_DEPRECATE
+ <link>shared:<define>ZLIB_DLL
+ :
+ : <include>$(source-path) ] ;
+ }
+
+ local mt = [ new ac-library libpng : $(.project) : $(condition) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ if $(target)
+ {
+ $(mt).set-target $(target) ;
+ }
+ targets.main-target-alternative $(mt) ;
+ } else {
+ if $(.debug)
+ {
+ ECHO "notice: [libpng] Using pre-installed library" ;
+ if $(condition)
+ {
+ ECHO "notice: [libpng] Condition" [ $(condition).raw ] ;
+ }
+ }
+
+ local mt = [ new ac-library libpng : $(.project) : $(condition) :
+ $(include-path) : $(library-path) : $(library-name) : $(root) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ targets.main-target-alternative $(mt) ;
+ }
+ .configured.$(condition) = true ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/python-config.jam b/src/kenlm/jam-files/boost-build/tools/python-config.jam
new file mode 100644
index 0000000..40aa825
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/python-config.jam
@@ -0,0 +1,27 @@
+#~ Copyright 2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for Python tools and librries. To use, just import this module.
+
+import os ;
+import toolset : using ;
+
+if [ os.name ] = NT
+{
+ for local R in 2.4 2.3 2.2
+ {
+ local python-path = [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\$(R)\\InstallPath" ] ;
+ local python-version = $(R) ;
+
+ if $(python-path)
+ {
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO "notice:" using python ":" $(python-version) ":" $(python-path) ;
+ }
+ using python : $(python-version) : $(python-path) ;
+ }
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/python.jam b/src/kenlm/jam-files/boost-build/tools/python.jam
new file mode 100644
index 0000000..a50a516
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/python.jam
@@ -0,0 +1,1258 @@
+# Copyright 2004 Vladimir Prus.
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Support for Python and the the Boost.Python library.
+#
+# This module defines
+#
+# - a project 'python' with a target 'python' in it, that corresponds to the
+# python library
+#
+# - a main target rule 'python-extension' which can be used to build a python
+# extension.
+#
+# Extensions that use Boost.Python must explicitly link to it.
+
+import type ;
+import testing ;
+import generators ;
+import project ;
+import errors ;
+import targets ;
+import "class" : new ;
+import os ;
+import common ;
+import toolset ;
+import regex ;
+import numbers ;
+import string ;
+import property ;
+import sequence ;
+import path ;
+import feature ;
+import set ;
+import builtin ;
+
+
+# Make this module a project.
+project.initialize $(__name__) ;
+project python ;
+
+# Save the project so that if 'init' is called several times we define new
+# targets in the python project, not in whatever project we were called by.
+.project = [ project.current ] ;
+
+# Dynamic linker lib. Necessary to specify it explicitly on some platforms.
+lib dl ;
+# This contains 'openpty' function need by python. Again, on some system need to
+# pass this to linker explicitly.
+lib util ;
+# Python uses pthread symbols.
+lib pthread ;
+# Extra library needed by phtread on some platforms.
+lib rt ;
+
+# The pythonpath feature specifies additional elements for the PYTHONPATH
+# environment variable, set by run-pyd. For example, pythonpath can be used to
+# access Python modules that are part of the product being built, but are not
+# installed in the development system's default paths.
+feature.feature pythonpath : : free optional path ;
+
+# Initializes the Python toolset. Note that all parameters are optional.
+#
+# - version -- the version of Python to use. Should be in Major.Minor format,
+# for example 2.3. Do not include the subminor version.
+#
+# - cmd-or-prefix: Preferably, a command that invokes a Python interpreter.
+# Alternatively, the installation prefix for Python libraries and includes. If
+# empty, will be guessed from the version, the platform's installation
+# patterns, and the python executables that can be found in PATH.
+#
+# - includes: the include path to Python headers. If empty, will be guessed.
+#
+# - libraries: the path to Python library binaries. If empty, will be guessed.
+# On MacOS/Darwin, you can also pass the path of the Python framework.
+#
+# - condition: if specified, should be a set of properties that are matched
+# against the build configuration when Boost.Build selects a Python
+# configuration to use.
+#
+# - extension-suffix: A string to append to the name of extension modules before
+# the true filename extension. Ordinarily we would just compute this based on
+# the value of the <python-debugging> feature. However ubuntu's python-dbg
+# package uses the windows convention of appending _d to debug-build extension
+# modules. We have no way of detecting ubuntu, or of probing python for the
+# "_d" requirement, and if you configure and build python using
+# --with-pydebug, you'll be using the standard *nix convention. Defaults to ""
+# (or "_d" when targeting windows and <python-debugging> is set).
+#
+# Example usage:
+#
+# using python : 2.3 ;
+# using python : 2.3 : /usr/local/bin/python ;
+#
+rule init ( version ? : cmd-or-prefix ? : includes * : libraries ?
+ : condition * : extension-suffix ? )
+{
+ project.push-current $(.project) ;
+
+ debug-message Configuring python... ;
+ for local v in version cmd-or-prefix includes libraries condition
+ {
+ if $($(v))
+ {
+ debug-message " user-specified "$(v): \"$($(v))\" ;
+ }
+ }
+
+ configure $(version) : $(cmd-or-prefix) : $(includes) : $(libraries) : $(condition) : $(extension-suffix) ;
+
+ project.pop-current ;
+}
+
+# A simpler version of SHELL that grabs stderr as well as stdout, but returns
+# nothing if there was an error.
+#
+local rule shell-cmd ( cmd )
+{
+ debug-message running command '$(cmd)" 2>&1"' ;
+ x = [ SHELL $(cmd)" 2>&1" : exit-status ] ;
+ if $(x[2]) = 0
+ {
+ return $(x[1]) ;
+ }
+ else
+ {
+ return ;
+ }
+}
+
+
+# Try to identify Cygwin symlinks. Invoking such a file directly as an NT
+# executable from a native Windows build of bjam would be fatal to the bjam
+# process. One /can/ invoke them through sh.exe or bash.exe, if you can prove
+# that those are not also symlinks. ;-)
+#
+# If a symlink is found returns non-empty; we try to extract the target of the
+# symlink from the file and return that.
+#
+# Note: 1. only works on NT 2. path is a native path.
+local rule is-cygwin-symlink ( path )
+{
+ local is-symlink = ;
+
+ # Look for a file with the given path having the S attribute set, as cygwin
+ # symlinks do. /-C means "do not use thousands separators in file sizes."
+ local dir-listing = [ shell-cmd "DIR /-C /A:S \""$(path)"\"" ] ;
+
+ if $(dir-listing)
+ {
+ # Escape any special regex characters in the base part of the path.
+ local base-pat = [ regex.escape $(path:D=) : ].[()*+?|\\$^ : \\ ] ;
+
+ # Extract the file's size from the directory listing.
+ local size-of-system-file = [ MATCH "([0-9]+) "$(base-pat) : $(dir-listing) : 1 ] ;
+
+ # If the file has a reasonably small size, look for the special symlink
+ # identification text.
+ if $(size-of-system-file) && [ numbers.less $(size-of-system-file) 1000 ]
+ {
+ local link = [ SHELL "FIND /OFF \"!<symlink>\" \""$(path)"\" 2>&1" ] ;
+ if $(link[2]) != 0
+ {
+ local nl = "
+
+" ;
+ is-symlink = [ MATCH ".*!<symlink>([^"$(nl)"]*)" : $(link[1]) : 1 ] ;
+ if $(is-symlink)
+ {
+ is-symlink = [ *nix-path-to-native $(is-symlink) ] ;
+ is-symlink = $(is-symlink:R=$(path:D)) ;
+ }
+
+ }
+ }
+ }
+ return $(is-symlink) ;
+}
+
+
+# Append ext to each member of names that does not contain '.'.
+#
+local rule default-extension ( names * : ext * )
+{
+ local result ;
+ for local n in $(names)
+ {
+ switch $(n)
+ {
+ case *.* : result += $(n) ;
+ case * : result += $(n)$(ext) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Tries to determine whether invoking "cmd" would actually attempt to launch a
+# cygwin symlink.
+#
+# Note: only works on NT.
+#
+local rule invokes-cygwin-symlink ( cmd )
+{
+ local dirs = $(cmd:D) ;
+ if ! $(dirs)
+ {
+ dirs = . [ os.executable-path ] ;
+ }
+ local base = [ default-extension $(cmd:D=) : .exe .cmd .bat ] ;
+ local paths = [ GLOB $(dirs) : $(base) ] ;
+ if $(paths)
+ {
+ # Make sure we have not run into a Cygwin symlink. Invoking such a file
+ # as an NT executable would be fatal for the bjam process.
+ return [ is-cygwin-symlink $(paths[1]) ] ;
+ }
+}
+
+
+local rule debug-message ( message * )
+{
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO notice: [python-cfg] $(message) ;
+ }
+}
+
+
+# Like W32_GETREG, except prepend HKEY_CURRENT_USER\SOFTWARE and
+# HKEY_LOCAL_MACHINE\SOFTWARE to the first argument, returning the first result
+# found. Also accounts for the fact that on 64-bit machines, 32-bit software has
+# its own area, under SOFTWARE\Wow6432node.
+#
+local rule software-registry-value ( path : data ? )
+{
+ local result ;
+ for local root in HKEY_CURRENT_USER HKEY_LOCAL_MACHINE
+ {
+ for local x64elt in "" Wow6432node\\ # Account for 64-bit windows
+ {
+ if ! $(result)
+ {
+ result = [ W32_GETREG $(root)\\SOFTWARE\\$(x64elt)$(path) : $(data) ] ;
+ }
+ }
+
+ }
+ return $(result) ;
+}
+
+
+.windows-drive-letter-re = ^([A-Za-z]):[\\/](.*) ;
+.cygwin-drive-letter-re = ^/cygdrive/([a-z])/(.*) ;
+
+.working-directory = [ PWD ] ;
+.working-drive-letter = [ SUBST $(.working-directory) $(.windows-drive-letter-re) $1 ] ;
+.working-drive-letter ?= [ SUBST $(.working-directory) $(.cygwin-drive-letter-re) $1 ] ;
+
+
+local rule windows-to-cygwin-path ( path )
+{
+ # If path is rooted with a drive letter, rewrite it using the /cygdrive
+ # mountpoint.
+ local p = [ SUBST $(path:T) $(.windows-drive-letter-re) /cygdrive/$1/$2 ] ;
+
+ # Else if path is rooted without a drive letter, use the working directory.
+ p ?= [ SUBST $(path:T) ^/(.*) /cygdrive/$(.working-drive-letter:L)/$2 ] ;
+
+ # Else return the path unchanged.
+ return $(p:E=$(path:T)) ;
+}
+
+
+# :W only works in Cygwin builds of bjam. This one works on NT builds as well.
+#
+local rule cygwin-to-windows-path ( path )
+{
+ path = $(path:R="") ; # strip any trailing slash
+
+ local drive-letter = [ SUBST $(path) $(.cygwin-drive-letter-re) $1:/$2 ] ;
+ if $(drive-letter)
+ {
+ path = $(drive-letter) ;
+ }
+ else if $(path:R=/x) = $(path) # already rooted?
+ {
+ # Look for a cygwin mount that includes each head sequence in $(path).
+ local head = $(path) ;
+ local tail = "" ;
+
+ while $(head)
+ {
+ local root = [ software-registry-value
+ "Cygnus Solutions\\Cygwin\\mounts v2\\"$(head) : native ] ;
+
+ if $(root)
+ {
+ path = $(tail:R=$(root)) ;
+ head = ;
+ }
+ tail = $(tail:R=$(head:D=)) ;
+
+ if $(head) = /
+ {
+ head = ;
+ }
+ else
+ {
+ head = $(head:D) ;
+ }
+ }
+ }
+ return [ regex.replace $(path:R="") / \\ ] ;
+}
+
+
+# Convert a *nix path to native.
+#
+local rule *nix-path-to-native ( path )
+{
+ if [ os.name ] = NT
+ {
+ path = [ cygwin-to-windows-path $(path) ] ;
+ }
+ return $(path) ;
+}
+
+
+# Convert an NT path to native.
+#
+local rule windows-path-to-native ( path )
+{
+ if [ os.name ] = NT
+ {
+ return $(path) ;
+ }
+ else
+ {
+ return [ windows-to-cygwin-path $(path) ] ;
+ }
+}
+
+
+# Return nonempty if path looks like a windows path, i.e. it starts with a drive
+# letter or contains backslashes.
+#
+local rule guess-windows-path ( path )
+{
+ return [ SUBST $(path) ($(.windows-drive-letter-re)|.*([\\]).*) $1 ] ;
+}
+
+
+local rule path-to-native ( paths * )
+{
+ local result ;
+
+ for local p in $(paths)
+ {
+ if [ guess-windows-path $(p) ]
+ {
+ result += [ windows-path-to-native $(p) ] ;
+ }
+ else
+ {
+ result += [ *nix-path-to-native $(p:T) ] ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Validate the version string and extract the major/minor part we care about.
+#
+local rule split-version ( version )
+{
+ local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ;
+ if ! $(major-minor[2]) || $(major-minor[3])
+ {
+ ECHO "Warning: \"using python\" expects a two part (major, minor) version number; got" $(version) instead ;
+
+ # Add a zero to account for the missing digit if necessary.
+ major-minor += 0 ;
+ }
+
+ return $(major-minor[1]) $(major-minor[2]) ;
+}
+
+
+# Build a list of versions from 3.0 down to 1.5. Because bjam can not enumerate
+# registry sub-keys, we have no way of finding a version with a 2-digit minor
+# version, e.g. 2.10 -- let us hope that never happens.
+#
+.version-countdown = ;
+for local v in [ numbers.range 15 30 ]
+{
+ .version-countdown = [ SUBST $(v) (.)(.*) $1.$2 ] $(.version-countdown) ;
+}
+
+
+local rule windows-installed-pythons ( version ? )
+{
+ version ?= $(.version-countdown) ;
+ local interpreters ;
+
+ for local v in $(version)
+ {
+ local install-path = [
+ software-registry-value "Python\\PythonCore\\"$(v)"\\InstallPath" ] ;
+
+ if $(install-path)
+ {
+ install-path = [ windows-path-to-native $(install-path) ] ;
+ debug-message Registry indicates Python $(v) installed at \"$(install-path)\" ;
+ }
+
+ interpreters += $(:E=python:R=$(install-path)) ;
+ }
+ return $(interpreters) ;
+}
+
+
+local rule darwin-installed-pythons ( version ? )
+{
+ version ?= $(.version-countdown) ;
+
+ local prefix
+ = [ GLOB /System/Library/Frameworks /Library/Frameworks
+ : Python.framework ] ;
+
+ return $(prefix)/Versions/$(version)/bin/python ;
+}
+
+
+# Assume "python-cmd" invokes a python interpreter and invoke it to extract all
+# the information we care about from its "sys" module. Returns void if
+# unsuccessful.
+#
+local rule probe ( python-cmd )
+{
+ # Avoid invoking a Cygwin symlink on NT.
+ local skip-symlink ;
+ if [ os.name ] = NT
+ {
+ skip-symlink = [ invokes-cygwin-symlink $(python-cmd) ] ;
+ }
+
+ if $(skip-symlink)
+ {
+ debug-message -------------------------------------------------------------------- ;
+ debug-message \"$(python-cmd)\" would attempt to invoke a Cygwin symlink, ;
+ debug-message causing a bjam built for Windows to hang. ;
+ debug-message ;
+ debug-message If you intend to target a Cygwin build of Python, please ;
+ debug-message replace the path to the link with the path to a real executable ;
+ debug-message (guessing: \"$(skip-symlink)\") "in" your 'using python' line ;
+ debug-message "in" user-config.jam or site-config.jam. Do not forget to escape ;
+ debug-message backslashes ;
+ debug-message -------------------------------------------------------------------- ;
+ }
+ else
+ {
+ # Prepare a List of Python format strings and expressions that can be
+ # used to print the constants we want from the sys module.
+
+ # We do not really want sys.version since that is a complicated string,
+ # so get the information from sys.version_info instead.
+ local format = "version=%d.%d" ;
+ local exprs = "version_info[0]" "version_info[1]" ;
+
+ for local s in $(sys-elements[2-])
+ {
+ format += $(s)=%s ;
+ exprs += $(s) ;
+ }
+
+ # Invoke Python and ask it for all those values.
+ local full-cmd =
+ $(python-cmd)" -c \"from sys import *; print('"$(format:J=\\n)"' % ("$(exprs:J=,)"))\"" ;
+
+ local output = [ shell-cmd $(full-cmd) ] ;
+ if $(output)
+ {
+ # Parse the output to get all the results.
+ local nl = "
+
+" ;
+ for s in $(sys-elements)
+ {
+ # These variables are expected to be declared local in the
+ # caller, so Jam's dynamic scoping will set their values there.
+ sys.$(s) = [ SUBST $(output) \\<$(s)=([^$(nl)]+) $1 ] ;
+ }
+ }
+ return $(output) ;
+ }
+}
+
+
+# Make sure the "libraries" and "includes" variables (in an enclosing scope)
+# have a value based on the information given.
+#
+local rule compute-default-paths ( target-os : version ? : prefix ? :
+ exec-prefix ? )
+{
+ exec-prefix ?= $(prefix) ;
+
+ if $(target-os) = windows
+ {
+ # The exec_prefix is where you're supposed to look for machine-specific
+ # libraries.
+ local default-library-path = $(exec-prefix)\\libs ;
+ local default-include-path = $(:E=Include:R=$(prefix)) ;
+
+ # If the interpreter was found in a directory called "PCBuild" or
+ # "PCBuild8," assume we're looking at a Python built from the source
+ # distro, and go up one additional level to the default root. Otherwise,
+ # the default root is the directory where the interpreter was found.
+
+ # We ask Python itself what the executable path is in case of
+ # intermediate symlinks or shell scripts.
+ local executable-dir = $(sys.executable:D) ;
+
+ if [ MATCH ^(PCBuild) : $(executable-dir:D=) ]
+ {
+ debug-message "This Python appears to reside in a source distribution;" ;
+ debug-message "prepending \""$(executable-dir)"\" to default library search path" ;
+
+ default-library-path = $(executable-dir) $(default-library-path) ;
+
+ default-include-path = $(:E=PC:R=$(executable-dir:D)) $(default-include-path) ;
+
+ debug-message "and \""$(default-include-path[1])"\" to default #include path" ;
+ }
+
+ libraries ?= $(default-library-path) ;
+ includes ?= $(default-include-path) ;
+ }
+ else
+ {
+ includes ?= $(prefix)/include/python$(version) ;
+
+ local lib = $(exec-prefix)/lib ;
+ libraries ?= $(lib)/python$(version)/config $(lib) ;
+ }
+}
+
+# The version of the python interpreter to use.
+feature.feature python : : propagated ;
+feature.feature python.interpreter : : free ;
+
+toolset.flags python.capture-output PYTHON : <python.interpreter> ;
+
+#
+# Support for Python configured --with-pydebug
+#
+feature.feature python-debugging : off on : propagated ;
+builtin.variant debug-python : debug : <python-debugging>on ;
+
+
+# Return a list of candidate commands to try when looking for a Python
+# interpreter. prefix is expected to be a native path.
+#
+local rule candidate-interpreters ( version ? : prefix ? : target-os )
+{
+ local bin-path = bin ;
+ if $(target-os) = windows
+ {
+ # On Windows, look in the root directory itself and, to work with the
+ # result of a build-from-source, the PCBuild directory.
+ bin-path = PCBuild8 PCBuild "" ;
+ }
+
+ bin-path = $(bin-path:R=$(prefix)) ;
+
+ if $(target-os) in windows darwin
+ {
+ return # Search:
+ $(:E=python:R=$(bin-path)) # Relative to the prefix, if any
+ python # In the PATH
+ [ $(target-os)-installed-pythons $(version) ] # Standard install locations
+ ;
+ }
+ else
+ {
+ # Search relative to the prefix, or if none supplied, in PATH.
+ local unversioned = $(:E=python:R=$(bin-path:E=)) ;
+
+ # If a version was specified, look for a python with that specific
+ # version appended before looking for one called, simply, "python"
+ return $(unversioned)$(version) $(unversioned) ;
+ }
+}
+
+
+# Compute system library dependencies for targets linking with static Python
+# libraries.
+#
+# On many systems, Python uses libraries such as pthreads or libdl. Since static
+# libraries carry no library dependency information of their own that the linker
+# can extract, these extra dependencies have to be given explicitly on the link
+# line of the client. The information about these dependencies is packaged into
+# the "python" target below.
+#
+# Even where Python itself uses pthreads, it never allows extension modules to
+# be entered concurrently (unless they explicitly give up the interpreter lock).
+# Therefore, extension modules do not need the efficiency overhead of threadsafe
+# code as produced by <threading>multi, and we handle libpthread along with
+# other libraries here. Note: this optimization is based on an assumption that
+# the compiler generates link-compatible code in both the single- and
+# multi-threaded cases, and that system libraries do not change their ABIs
+# either.
+#
+# Returns a list of usage-requirements that link to the necessary system
+# libraries.
+#
+local rule system-library-dependencies ( target-os )
+{
+ switch $(target-os)
+ {
+ case s[uo][nl]* : # solaris, sun, sunos
+ # Add a librt dependency for the gcc toolset on SunOS (the sun
+ # toolset adds -lrt unconditionally). While this appears to
+ # duplicate the logic already in gcc.jam, it does not as long as
+ # we are not forcing <threading>multi.
+
+ # On solaris 10, distutils.sysconfig.get_config_var('LIBS') yields
+ # '-lresolv -lsocket -lnsl -lrt -ldl'. However, that does not seem
+ # to be the right list for extension modules. For example, on my
+ # installation, adding -ldl causes at least one test to fail because
+ # the library can not be found and removing it causes no failures.
+
+ # Apparently, though, we need to add -lrt for gcc.
+ return <toolset>gcc:<library>rt ;
+
+ case osf : return <library>pthread <toolset>gcc:<library>rt ;
+
+ case qnx* : return ;
+ case darwin : return ;
+ case windows : return ;
+
+ case hpux : return <library>rt ;
+ case *bsd : return <library>pthread <toolset>gcc:<library>util ;
+
+ case aix : return <library>pthread <library>dl ;
+
+ case * : return <library>pthread <library>dl
+ <toolset>gcc:<library>util <toolset-intel:platform>linux:<library>util ;
+ }
+}
+
+
+# Declare a target to represent Python's library.
+#
+local rule declare-libpython-target ( version ? : requirements * )
+{
+ # Compute the representation of Python version in the name of Python's
+ # library file.
+ local lib-version = $(version) ;
+ if <target-os>windows in $(requirements)
+ {
+ local major-minor = [ split-version $(version) ] ;
+ lib-version = $(major-minor:J="") ;
+ if <python-debugging>on in $(requirements)
+ {
+ lib-version = $(lib-version)_d ;
+ }
+ }
+
+ if ! $(lib-version)
+ {
+ ECHO *** warning: could not determine Python version, which will ;
+ ECHO *** warning: probably prevent us from linking with the python ;
+ ECHO *** warning: library. Consider explicitly passing the version ;
+ ECHO *** warning: to 'using python'. ;
+ }
+
+ # Declare it.
+ lib python.lib : : <name>python$(lib-version) $(requirements) ;
+}
+
+
+# Implementation of init.
+local rule configure ( version ? : cmd-or-prefix ? : includes * : libraries ? :
+ condition * : extension-suffix ? )
+{
+ local prefix ;
+ local exec-prefix ;
+ local cmds-to-try ;
+ local interpreter-cmd ;
+
+ local target-os = [ feature.get-values target-os : $(condition) ] ;
+ target-os ?= [ feature.defaults target-os ] ;
+ target-os = $(target-os:G=) ;
+
+ if $(target-os) = windows && <python-debugging>on in $(condition)
+ {
+ extension-suffix ?= _d ;
+ }
+ extension-suffix ?= "" ;
+
+ # Normalize and dissect any version number.
+ local major-minor ;
+ if $(version)
+ {
+ major-minor = [ split-version $(version) ] ;
+ version = $(major-minor:J=.) ;
+ }
+
+ local cmds-to-try ;
+
+ if ! $(cmd-or-prefix) || [ GLOB $(cmd-or-prefix) : * ]
+ {
+ # If the user did not pass a command, whatever we got was a prefix.
+ prefix = $(cmd-or-prefix) ;
+ cmds-to-try = [ candidate-interpreters $(version) : $(prefix) : $(target-os) ] ;
+ }
+ else
+ {
+ # Work with the command the user gave us.
+ cmds-to-try = $(cmd-or-prefix) ;
+
+ # On Windows, do not nail down the interpreter command just yet in case
+ # the user specified something that turns out to be a cygwin symlink,
+ # which could bring down bjam if we invoke it.
+ if $(target-os) != windows
+ {
+ interpreter-cmd = $(cmd-or-prefix) ;
+ }
+ }
+
+ # Values to use in case we can not really find anything in the system.
+ local fallback-cmd = $(cmds-to-try[1]) ;
+ local fallback-version ;
+
+ # Anything left to find or check?
+ if ! ( $(interpreter-cmd) && $(includes) && $(libraries) )
+ {
+ # Values to be extracted from python's sys module. These will be set by
+ # the probe rule, above, using Jam's dynamic scoping.
+ local sys-elements = version platform prefix exec_prefix executable ;
+ local sys.$(sys-elements) ;
+
+ # Compute the string Python's sys.platform needs to match. If not
+ # targeting Windows or cygwin we will assume only native builds can
+ # possibly run, so we will not require a match and we leave sys.platform
+ # blank.
+ local platform ;
+ switch $(target-os)
+ {
+ case windows : platform = win32 ;
+ case cygwin : platform = cygwin ;
+ }
+
+ while $(cmds-to-try)
+ {
+ # Pop top command.
+ local cmd = $(cmds-to-try[1]) ;
+ cmds-to-try = $(cmds-to-try[2-]) ;
+
+ debug-message Checking interpreter command \"$(cmd)\"... ;
+ if [ probe $(cmd) ]
+ {
+ fallback-version ?= $(sys.version) ;
+
+ # Check for version/platform validity.
+ for local x in version platform
+ {
+ if $($(x)) && $($(x)) != $(sys.$(x))
+ {
+ debug-message ...$(x) "mismatch (looking for"
+ $($(x)) but found $(sys.$(x))")" ;
+ cmd = ;
+ }
+ }
+
+ if $(cmd)
+ {
+ debug-message ...requested configuration matched! ;
+
+ exec-prefix = $(sys.exec_prefix) ;
+
+ compute-default-paths $(target-os) : $(sys.version) :
+ $(sys.prefix) : $(sys.exec_prefix) ;
+
+ version = $(sys.version) ;
+ interpreter-cmd ?= $(cmd) ;
+ cmds-to-try = ; # All done.
+ }
+ }
+ else
+ {
+ debug-message ...does not invoke a working interpreter ;
+ }
+ }
+ }
+
+ # Anything left to compute?
+ if $(includes) && $(libraries)
+ {
+ .configured = true ;
+ }
+ else
+ {
+ version ?= $(fallback-version) ;
+ version ?= 2.5 ;
+ exec-prefix ?= $(prefix) ;
+ compute-default-paths $(target-os) : $(version) : $(prefix:E=) ;
+ }
+
+ if ! $(interpreter-cmd)
+ {
+ fallback-cmd ?= python ;
+ debug-message No working Python interpreter found. ;
+ if [ os.name ] != NT || ! [ invokes-cygwin-symlink $(fallback-cmd) ]
+ {
+ interpreter-cmd = $(fallback-cmd) ;
+ debug-message falling back to \"$(interpreter-cmd)\" ;
+ }
+ }
+
+ includes = [ path-to-native $(includes) ] ;
+ libraries = [ path-to-native $(libraries) ] ;
+
+ debug-message "Details of this Python configuration:" ;
+ debug-message " interpreter command:" \"$(interpreter-cmd:E=<empty>)\" ;
+ debug-message " include path:" \"$(includes:E=<empty>)\" ;
+ debug-message " library path:" \"$(libraries:E=<empty>)\" ;
+ if $(target-os) = windows
+ {
+ debug-message " DLL search path:" \"$(exec-prefix:E=<empty>)\" ;
+ }
+
+ #
+ # End autoconfiguration sequence.
+ #
+ local target-requirements = $(condition) ;
+
+ # Add the version, if any, to the target requirements.
+ if $(version)
+ {
+ if ! $(version) in [ feature.values python ]
+ {
+ feature.extend python : $(version) ;
+ }
+ target-requirements += <python>$(version:E=default) ;
+ }
+
+ target-requirements += <target-os>$(target-os) ;
+
+ # See if we can find a framework directory on darwin.
+ local framework-directory ;
+ if $(target-os) = darwin
+ {
+ # Search upward for the framework directory.
+ local framework-directory = $(libraries[-1]) ;
+ while $(framework-directory:D=) && $(framework-directory:D=) != Python.framework
+ {
+ framework-directory = $(framework-directory:D) ;
+ }
+
+ if $(framework-directory:D=) = Python.framework
+ {
+ debug-message framework directory is \"$(framework-directory)\" ;
+ }
+ else
+ {
+ debug-message "no framework directory found; using library path" ;
+ framework-directory = ;
+ }
+ }
+
+ local dll-path = $(libraries) ;
+
+ # Make sure that we can find the Python DLL on Windows.
+ if ( $(target-os) = windows ) && $(exec-prefix)
+ {
+ dll-path += $(exec-prefix) ;
+ }
+
+ #
+ # Prepare usage requirements.
+ #
+ local usage-requirements = [ system-library-dependencies $(target-os) ] ;
+ usage-requirements += <include>$(includes) <python.interpreter>$(interpreter-cmd) ;
+ if <python-debugging>on in $(condition)
+ {
+ if $(target-os) = windows
+ {
+ # In pyconfig.h, Py_DEBUG is set if _DEBUG is set. If we define
+ # Py_DEBUG we will get multiple definition warnings.
+ usage-requirements += <define>_DEBUG ;
+ }
+ else
+ {
+ usage-requirements += <define>Py_DEBUG ;
+ }
+ }
+
+ # Global, but conditional, requirements to give access to the interpreter
+ # for general utilities, like other toolsets, that run Python scripts.
+ toolset.add-requirements
+ $(target-requirements:J=,):<python.interpreter>$(interpreter-cmd) ;
+
+ # Register the right suffix for extensions.
+ register-extension-suffix $(extension-suffix) : $(target-requirements) ;
+
+ #
+ # Declare the "python" target. This should really be called
+ # python_for_embedding.
+ #
+
+ if $(framework-directory)
+ {
+ alias python
+ :
+ : $(target-requirements)
+ :
+ : $(usage-requirements) <framework>$(framework-directory)
+ ;
+ }
+ else
+ {
+ declare-libpython-target $(version) : $(target-requirements) ;
+
+ # This is an evil hack. On, Windows, when Python is embedded, nothing
+ # seems to set up sys.path to include Python's standard library
+ # (http://article.gmane.org/gmane.comp.python.general/544986). The evil
+ # here, aside from the workaround necessitated by Python's bug, is that:
+ #
+ # a. we're guessing the location of the python standard library from the
+ # location of pythonXX.lib
+ #
+ # b. we're hijacking the <testing.launcher> property to get the
+ # environment variable set up, and the user may want to use it for
+ # something else (e.g. launch the debugger).
+ local set-PYTHONPATH ;
+ if $(target-os) = windows
+ {
+ set-PYTHONPATH = [ common.prepend-path-variable-command PYTHONPATH :
+ $(libraries:D)/Lib ] ;
+ }
+
+ alias python
+ :
+ : $(target-requirements)
+ :
+ # Why python.lib must be listed here instead of along with the
+ # system libs is a mystery, but if we do not do it, on cygwin,
+ # -lpythonX.Y never appears in the command line (although it does on
+ # linux).
+ : $(usage-requirements)
+ <testing.launcher>$(set-PYTHONPATH)
+ <library-path>$(libraries) <library>python.lib
+ ;
+ }
+
+ # On *nix, we do not want to link either Boost.Python or Python extensions
+ # to libpython, because the Python interpreter itself provides all those
+ # symbols. If we linked to libpython, we would get duplicate symbols. So
+ # declare two targets -- one for building extensions and another for
+ # embedding.
+ #
+ # Unlike most *nix systems, Mac OS X's linker does not permit undefined
+ # symbols when linking a shared library. So, we still need to link against
+ # the Python framework, even when building extensions. Note that framework
+ # builds of Python always use shared libraries, so we do not need to worry
+ # about duplicate Python symbols.
+ if $(target-os) in windows cygwin darwin
+ {
+ alias python_for_extensions : python : $(target-requirements) ;
+ }
+ # On AIX we need Python extensions and Boost.Python to import symbols from
+ # the Python interpreter. Dynamic libraries opened with dlopen() do not
+ # inherit the symbols from the Python interpreter.
+ else if $(target-os) = aix
+ {
+ alias python_for_extensions
+ :
+ : $(target-requirements)
+ :
+ : $(usage-requirements) <linkflags>-Wl,-bI:$(libraries[1])/python.exp
+ ;
+ }
+ else
+ {
+ alias python_for_extensions
+ :
+ : $(target-requirements)
+ :
+ : $(usage-requirements)
+ ;
+ }
+}
+
+
+rule configured ( )
+{
+ return $(.configured) ;
+}
+
+
+type.register PYTHON_EXTENSION : : SHARED_LIB ;
+
+
+local rule register-extension-suffix ( root : condition * )
+{
+ local suffix ;
+
+ switch [ feature.get-values target-os : $(condition) ]
+ {
+ case windows : suffix = pyd ;
+ case cygwin : suffix = dll ;
+ case hpux :
+ {
+ if [ feature.get-values python : $(condition) ] in 1.5 1.6 2.0 2.1 2.2 2.3 2.4
+ {
+ suffix = sl ;
+ }
+ else
+ {
+ suffix = so ;
+ }
+ }
+ case * : suffix = so ;
+ }
+
+ type.set-generated-target-suffix PYTHON_EXTENSION : $(condition) : <$(root).$(suffix)> ;
+}
+
+
+# Unset 'lib' prefix for PYTHON_EXTENSION
+type.set-generated-target-prefix PYTHON_EXTENSION : : "" ;
+
+
+rule python-extension ( name : sources * : requirements * : default-build * :
+ usage-requirements * )
+{
+ if [ configured ]
+ {
+ requirements += <use>/python//python_for_extensions ;
+ }
+ requirements += <suppress-import-lib>true ;
+
+ local project = [ project.current ] ;
+
+ targets.main-target-alternative
+ [ new typed-target $(name) : $(project) : PYTHON_EXTENSION
+ : [ targets.main-target-sources $(sources) : $(name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ ] ;
+}
+
+IMPORT python : python-extension : : python-extension ;
+
+rule py2to3
+{
+ common.copy $(>) $(<) ;
+ 2to3 $(<) ;
+}
+
+actions 2to3
+{
+ 2to3 -wn "$(<)"
+ 2to3 -dwn "$(<)"
+}
+
+
+# Support for testing.
+type.register PY : py ;
+type.register RUN_PYD_OUTPUT ;
+type.register RUN_PYD : : TEST ;
+
+
+class python-test-generator : generator
+{
+ import set ;
+
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ self.composing = true ;
+ }
+
+ rule run ( project name ? : property-set : sources * : multiple ? )
+ {
+ local pyversion = [ $(property-set).get <python> ] ;
+ local python ;
+ local other-pythons ;
+
+ # Make new target that converting Python source by 2to3 when running with Python 3.
+ local rule make-2to3-source ( source )
+ {
+ if $(pyversion) >= 3.0
+ {
+ local a = [ new action $(source) : python.py2to3 : $(property-set) ] ;
+ local t = [ utility.basename [ $(s).name ] ] ;
+ local p = [ new file-target $(t) : PY : $(project) : $(a) ] ;
+ return $(p) ;
+ }
+ else
+ {
+ return $(source) ;
+ }
+ }
+
+ for local s in $(sources)
+ {
+ if [ $(s).type ] = PY
+ {
+ if ! $(python)
+ {
+ # First Python source ends up on command line.
+ python = [ make-2to3-source $(s) ] ;
+
+ }
+ else
+ {
+ # Other Python sources become dependencies.
+ other-pythons += [ make-2to3-source $(s) ] ;
+ }
+ }
+ }
+
+ local extensions ;
+ for local s in $(sources)
+ {
+ if [ $(s).type ] = PYTHON_EXTENSION
+ {
+ extensions += $(s) ;
+ }
+ }
+
+ local libs ;
+ for local s in $(sources)
+ {
+ if [ type.is-derived [ $(s).type ] LIB ]
+ && ! $(s) in $(extensions)
+ {
+ libs += $(s) ;
+ }
+ }
+
+ local new-sources ;
+ for local s in $(sources)
+ {
+ if [ type.is-derived [ $(s).type ] CPP ]
+ {
+ local name = [ utility.basename [ $(s).name ] ] ;
+ if $(name) = [ utility.basename [ $(python).name ] ]
+ {
+ name = $(name)_ext ;
+ }
+ local extension = [ generators.construct $(project) $(name) :
+ PYTHON_EXTENSION : $(property-set) : $(s) $(libs) ] ;
+
+ # The important part of usage requirements returned from
+ # PYTHON_EXTENSION generator are xdll-path properties that will
+ # allow us to find the python extension at runtime.
+ property-set = [ $(property-set).add $(extension[1]) ] ;
+
+ # Ignore usage requirements. We're a top-level generator and
+ # nobody is going to use what we generate.
+ new-sources += $(extension[2-]) ;
+ }
+ }
+
+ property-set = [ $(property-set).add-raw <dependency>$(other-pythons) ] ;
+
+ return [ construct-result $(python) $(extensions) $(new-sources) :
+ $(project) $(name) : $(property-set) ] ;
+ }
+}
+
+
+generators.register
+ [ new python-test-generator python.capture-output : : RUN_PYD_OUTPUT ] ;
+
+generators.register-standard testing.expect-success
+ : RUN_PYD_OUTPUT : RUN_PYD ;
+
+
+# There are two different ways of spelling OS names. One is used for [ os.name ]
+# and the other is used for the <host-os> and <target-os> properties. Until that
+# is remedied, this sets up a crude mapping from the latter to the former, that
+# will work *for the purposes of cygwin/NT cross-builds only*. Could not think
+# of a better name than "translate".
+#
+.translate-os-windows = NT ;
+.translate-os-cygwin = CYGWIN ;
+local rule translate-os ( src-os )
+{
+ local x = $(.translate-os-$(src-os)) [ os.name ] ;
+ return $(x[1]) ;
+}
+
+
+# Extract the path to a single ".pyd" source. This is used to build the
+# PYTHONPATH for running bpl tests.
+#
+local rule pyd-pythonpath ( source )
+{
+ return [ on $(source) return $(LOCATE) $(SEARCH) ] ;
+}
+
+
+# The flag settings on testing.capture-output do not apply to python.capture
+# output at the moment. Redo this explicitly.
+toolset.flags python.capture-output ARGS <testing.arg> ;
+
+
+rule capture-output ( target : sources * : properties * )
+{
+ # Setup up a proper DLL search path. Here, $(sources[1]) is a python module
+ # and $(sources[2]) is a DLL. Only $(sources[1]) is passed to
+ # testing.capture-output, so RUN_PATH variable on $(sources[2]) is not
+ # consulted. Move it over explicitly.
+ RUN_PATH on $(sources[1]) = [ on $(sources[2-]) return $(RUN_PATH) ] ;
+
+ PYTHONPATH = [ sequence.transform pyd-pythonpath : $(sources[2-]) ] ;
+ PYTHONPATH += [ feature.get-values pythonpath : $(properties) ] ;
+
+ # After test is run, we remove the Python module, but not the Python script.
+ testing.capture-output $(target) : $(sources[1]) : $(properties) :
+ $(sources[2-]) ;
+
+ # PYTHONPATH is different; it will be interpreted by whichever Python is
+ # invoked and so must follow path rules for the target os. The only OSes
+ # where we can run python for other OSes currently are NT and CYGWIN so we
+ # only need to handle those cases.
+ local target-os = [ feature.get-values target-os : $(properties) ] ;
+ # Oddly, host-os is not in properties, so grab the default value.
+ local host-os = [ feature.defaults host-os ] ;
+ host-os = $(host-os:G=) ;
+ if $(target-os) != $(host-os)
+ {
+ PYTHONPATH = [ sequence.transform $(host-os)-to-$(target-os)-path :
+ $(PYTHONPATH) ] ;
+ }
+ local path-separator = [ os.path-separator [ translate-os $(target-os) ] ] ;
+ local set-PYTHONPATH = [ common.variable-setting-command PYTHONPATH :
+ $(PYTHONPATH:J=$(path-separator)) ] ;
+ LAUNCHER on $(target) = $(set-PYTHONPATH) [ on $(target) return \"$(PYTHON)\" ] ;
+}
+
+
+rule bpl-test ( name : sources * : requirements * )
+{
+ local s ;
+ sources ?= $(name).py $(name).cpp ;
+ return [ testing.make-test run-pyd : $(sources) /boost/python//boost_python
+ : $(requirements) : $(name) ] ;
+}
+
+
+IMPORT $(__name__) : bpl-test : : bpl-test ;
diff --git a/src/kenlm/jam-files/boost-build/tools/qcc.jam b/src/kenlm/jam-files/boost-build/tools/qcc.jam
new file mode 100644
index 0000000..3b35578
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/qcc.jam
@@ -0,0 +1,238 @@
+# Copyright (c) 2001 David Abrahams.
+# Copyright (c) 2002-2003 Rene Rivera.
+# Copyright (c) 2002-2003 Vladimir Prus.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import "class" : new ;
+import common ;
+import errors ;
+import feature ;
+import generators ;
+import os ;
+import property ;
+import set ;
+import toolset ;
+import type ;
+import unix ;
+
+feature.extend toolset : qcc ;
+
+toolset.inherit-generators qcc : unix : unix.link unix.link.dll ;
+generators.override builtin.lib-generator : qcc.prebuilt ;
+toolset.inherit-flags qcc : unix ;
+toolset.inherit-rules qcc : unix ;
+
+# Initializes the qcc toolset for the given version. If necessary, command may
+# be used to specify where the compiler is located. The parameter 'options' is a
+# space-delimited list of options, each one being specified as
+# <option-name>option-value. Valid option names are: cxxflags, linkflags and
+# linker-type. Accepted values for linker-type are gnu and sun, gnu being the
+# default.
+#
+# Example:
+# using qcc : 3.4 : : <cxxflags>foo <linkflags>bar <linker-type>sun ;
+#
+rule init ( version ? : command * : options * )
+{
+ local condition = [ common.check-init-parameters qcc : version $(version) ] ;
+ local command = [ common.get-invocation-command qcc : QCC : $(command) ] ;
+ common.handle-options qcc : $(condition) : $(command) : $(options) ;
+}
+
+
+generators.register-c-compiler qcc.compile.c++ : CPP : OBJ : <toolset>qcc ;
+generators.register-c-compiler qcc.compile.c : C : OBJ : <toolset>qcc ;
+generators.register-c-compiler qcc.compile.asm : ASM : OBJ : <toolset>qcc ;
+
+
+# Declare flags for compilation.
+toolset.flags qcc.compile OPTIONS <debug-symbols>on : -gstabs+ ;
+
+# Declare flags and action for compilation.
+toolset.flags qcc.compile OPTIONS <optimization>off : -O0 ;
+toolset.flags qcc.compile OPTIONS <optimization>speed : -O3 ;
+toolset.flags qcc.compile OPTIONS <optimization>space : -Os ;
+
+toolset.flags qcc.compile OPTIONS <inlining>off : -Wc,-fno-inline ;
+toolset.flags qcc.compile OPTIONS <inlining>on : -Wc,-Wno-inline ;
+toolset.flags qcc.compile OPTIONS <inlining>full : -Wc,-finline-functions -Wc,-Wno-inline ;
+
+toolset.flags qcc.compile OPTIONS <warnings>off : -w ;
+toolset.flags qcc.compile OPTIONS <warnings>all : -Wc,-Wall ;
+toolset.flags qcc.compile OPTIONS <warnings-as-errors>on : -Wc,-Werror ;
+
+toolset.flags qcc.compile OPTIONS <profiling>on : -p ;
+
+toolset.flags qcc.compile OPTIONS <cflags> ;
+toolset.flags qcc.compile.c++ OPTIONS <cxxflags> ;
+toolset.flags qcc.compile DEFINES <define> ;
+toolset.flags qcc.compile INCLUDES <include> ;
+
+toolset.flags qcc.compile OPTIONS <link>shared : -shared ;
+
+toolset.flags qcc.compile.c++ TEMPLATE_DEPTH <c++-template-depth> ;
+
+
+rule compile.c++
+{
+ # Here we want to raise the template-depth parameter value to something
+ # higher than the default value of 17. Note that we could do this using the
+ # feature.set-default rule but we do not want to set the default value for
+ # all toolsets as well.
+ #
+ # TODO: This 'modified default' has been inherited from some 'older Boost
+ # Build implementation' and has most likely been added to make some Boost
+ # library parts compile correctly. We should see what exactly prompted this
+ # and whether we can get around the problem more locally.
+ local template-depth = [ on $(1) return $(TEMPLATE_DEPTH) ] ;
+ if ! $(template-depth)
+ {
+ TEMPLATE_DEPTH on $(1) = 128 ;
+ }
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" -Wc,-ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.asm
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+
+# The class checking that we do not try to use the <runtime-link>static property
+# while creating or using a shared library, since it is not supported by qcc/
+# /libc.
+#
+class qcc-linking-generator : unix-linking-generator
+{
+ rule generated-targets ( sources + : property-set : project name ? )
+ {
+ if <runtime-link>static in [ $(property-set).raw ]
+ {
+ local m ;
+ if [ id ] = "qcc.link.dll"
+ {
+ m = "on qcc, DLL can't be build with <runtime-link>static" ;
+ }
+ if ! $(m)
+ {
+ for local s in $(sources)
+ {
+ local type = [ $(s).type ] ;
+ if $(type) && [ type.is-derived $(type) SHARED_LIB ]
+ {
+ m = "on qcc, using DLLS together with the <runtime-link>static options is not possible " ;
+ }
+ }
+ }
+ if $(m)
+ {
+ errors.user-error $(m) : "It is suggested to use"
+ "<runtime-link>static together with <link>static." ;
+ }
+ }
+
+ return [ unix-linking-generator.generated-targets
+ $(sources) : $(property-set) : $(project) $(name) ] ;
+ }
+}
+
+generators.register [ new qcc-linking-generator qcc.link : LIB OBJ : EXE
+ : <toolset>qcc ] ;
+
+generators.register [ new qcc-linking-generator qcc.link.dll : LIB OBJ
+ : SHARED_LIB : <toolset>qcc ] ;
+
+generators.override qcc.prebuilt : builtin.prebuilt ;
+generators.override qcc.searched-lib-generator : searched-lib-generator ;
+
+
+# Declare flags for linking.
+# First, the common flags.
+toolset.flags qcc.link OPTIONS <debug-symbols>on : -gstabs+ ;
+toolset.flags qcc.link OPTIONS <profiling>on : -p ;
+toolset.flags qcc.link OPTIONS <linkflags> ;
+toolset.flags qcc.link LINKPATH <library-path> ;
+toolset.flags qcc.link FINDLIBS-ST <find-static-library> ;
+toolset.flags qcc.link FINDLIBS-SA <find-shared-library> ;
+toolset.flags qcc.link LIBRARIES <library-file> ;
+
+toolset.flags qcc.link FINDLIBS-SA : m ;
+
+# For <runtime-link>static we made sure there are no dynamic libraries in the
+# link.
+toolset.flags qcc.link OPTIONS <runtime-link>static : -static ;
+
+# Assuming this is just like with gcc.
+toolset.flags qcc.link RPATH : <dll-path> : unchecked ;
+toolset.flags qcc.link RPATH_LINK : <xdll-path> : unchecked ;
+
+
+# Declare actions for linking.
+#
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+ # Serialize execution of the 'link' action, since running N links in
+ # parallel is just slower. For now, serialize only qcc links while it might
+ # be a good idea to serialize all links.
+ JAM_SEMAPHORE on $(targets) = <s>qcc-link-semaphore ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) $(OPTIONS)
+}
+
+
+# Always remove archive and start again. Here is the rationale from Andre Hentz:
+# I had a file, say a1.c, that was included into liba.a. I moved a1.c to a2.c,
+# updated my Jamfiles and rebuilt. My program was crashing with absurd errors.
+# After some debugging I traced it back to the fact that a1.o was *still* in
+# liba.a
+RM = [ common.rm-command ] ;
+if [ os.name ] = NT
+{
+ RM = "if exist \"$(<[1])\" DEL \"$(<[1])\"" ;
+}
+
+
+# Declare action for creating static libraries. The 'r' letter means to add
+# files to the archive with replacement. Since we remove the archive, we do not
+# care about replacement, but there is no option to "add without replacement".
+# The 'c' letter suppresses warnings in case the archive does not exists yet.
+# That warning is produced only on some platforms, for whatever reasons.
+#
+# Use qcc driver to create archive, see
+# http://www.qnx.com/developers/docs/6.3.2/neutrino/utilities/q/qcc.html
+actions piecemeal archive
+{
+ $(RM) "$(<)"
+ "$(CONFIG_COMMAND)" -A "$(<)" "$(>)"
+}
+
+
+rule link.dll ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+ JAM_SEMAPHORE on $(targets) = <s>qcc-link-semaphore ;
+}
+
+
+# Differ from 'link' above only by -shared.
+#
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" $(HAVE_SONAME)-Wl,-h$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) $(OPTIONS)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/qt.jam b/src/kenlm/jam-files/boost-build/tools/qt.jam
new file mode 100644
index 0000000..8aa7ca2
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/qt.jam
@@ -0,0 +1,17 @@
+# Copyright (c) 2006 Vladimir Prus.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Forwarning toolset file to Qt GUI library. Forwards to the toolset file
+# for the current version of Qt.
+
+import qt4 ;
+
+rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * )
+{
+ qt4.init $(prefix) : $(full_bin) : $(full_inc) : $(full_lib) : $(version) : $(condition) ;
+}
+
+
diff --git a/src/kenlm/jam-files/boost-build/tools/qt3.jam b/src/kenlm/jam-files/boost-build/tools/qt3.jam
new file mode 100644
index 0000000..f82cf0a
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/qt3.jam
@@ -0,0 +1,209 @@
+# Copyright 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Support for the Qt GUI library version 3
+# (http://www.trolltech.com/products/qt3/index.html).
+# For new developments, it is recommended to use Qt4 via the qt4 Boost.Build
+# module.
+
+import modules ;
+import feature ;
+import errors ;
+import type ;
+import "class" : new ;
+import generators ;
+import project ;
+import toolset : flags ;
+
+# Convert this module into a project, so that we can declare targets here.
+project.initialize $(__name__) ;
+project qt3 ;
+
+
+# Initialized the QT support module. The 'prefix' parameter tells where QT is
+# installed. When not given, environmental variable QTDIR should be set.
+#
+rule init ( prefix ? )
+{
+ if ! $(prefix)
+ {
+ prefix = [ modules.peek : QTDIR ] ;
+ if ! $(prefix)
+ {
+ errors.error
+ "QT installation prefix not given and QTDIR variable is empty" ;
+ }
+ }
+
+ if $(.initialized)
+ {
+ if $(prefix) != $(.prefix)
+ {
+ errors.error
+ "Attempt the reinitialize QT with different installation prefix" ;
+ }
+ }
+ else
+ {
+ .initialized = true ;
+ .prefix = $(prefix) ;
+
+ generators.register-standard qt3.moc : H : CPP(moc_%) : <allow>qt3 ;
+ # Note: the OBJ target type here is fake, take a look at
+ # qt4.jam/uic-h-generator for explanations that apply in this case as
+ # well.
+ generators.register [ new moc-h-generator-qt3
+ qt3.moc.cpp : MOCCABLE_CPP : OBJ : <allow>qt3 ] ;
+
+ # The UI type is defined in types/qt.jam, and UIC_H is only used in
+ # qt.jam, but not in qt4.jam, so define it here.
+ type.register UIC_H : : H ;
+
+ generators.register-standard qt3.uic-h : UI : UIC_H : <allow>qt3 ;
+
+ # The following generator is used to convert UI files to CPP. It creates
+ # UIC_H from UI, and constructs CPP from UI/UIC_H. In addition, it also
+ # returns UIC_H target, so that it can be mocced.
+ class qt::uic-cpp-generator : generator
+ {
+ rule __init__ ( )
+ {
+ generator.__init__ qt3.uic-cpp : UI UIC_H : CPP : <allow>qt3 ;
+ }
+
+ rule run ( project name ? : properties * : sources + )
+ {
+ # Consider this:
+ # obj test : test_a.cpp : <optimization>off ;
+ #
+ # This generator will somehow be called in this case, and,
+ # will fail -- which is okay. However, if there are <library>
+ # properties they will be converted to sources, so the size of
+ # 'sources' will be more than 1. In this case, the base generator
+ # will just crash -- and that's not good. Just use a quick test
+ # here.
+
+ local result ;
+ if ! $(sources[2])
+ {
+ # Construct CPP as usual
+ result = [ generator.run $(project) $(name)
+ : $(properties) : $(sources) ] ;
+
+ # If OK, process UIC_H with moc. It's pretty clear that
+ # the object generated with UIC will have Q_OBJECT macro.
+ if $(result)
+ {
+ local action = [ $(result[1]).action ] ;
+ local sources = [ $(action).sources ] ;
+ local mocced = [ generators.construct $(project) $(name)
+ : CPP : $(properties) : $(sources[2]) ] ;
+ result += $(mocced[2-]) ;
+ }
+ }
+
+ return $(result) ;
+ }
+ }
+
+ generators.register [ new qt::uic-cpp-generator ] ;
+
+ # Finally, declare prebuilt target for QT library.
+ local usage-requirements =
+ <include>$(.prefix)/include
+ <dll-path>$(.prefix)/lib
+ <library-path>$(.prefix)/lib
+ <allow>qt3
+ ;
+ lib qt : : <name>qt-mt <threading>multi : : $(usage-requirements) ;
+ lib qt : : <name>qt <threading>single : : $(usage-requirements) ;
+ }
+}
+
+class moc-h-generator-qt3 : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_CPP
+ {
+ name = [ $(sources[1]).name ] ;
+ name = $(name:B) ;
+
+ local a = [ new action $(sources[1]) : qt3.moc.cpp :
+ $(property-set) ] ;
+
+ local target = [
+ new file-target $(name) : MOC : $(project) : $(a) ] ;
+
+ local r = [ virtual-target.register $(target) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However,
+ # we need the target to be seen by bjam, so that the dependency from
+ # sources to this generated header is detected -- if Jam does not
+ # know about this target, it won't do anything.
+ DEPENDS all : [ $(r).actualize ] ;
+
+ return $(r) ;
+ }
+ }
+}
+
+
+# Query the installation directory. This is needed in at least two scenarios.
+# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt
+# plugins to the Qt-Tree.
+#
+rule directory
+{
+ return $(.prefix) ;
+}
+
+# -f forces moc to include the processed source file. Without it, it would think
+# that .qpp is not a header and would not include it from the generated file.
+#
+actions moc
+{
+ $(.prefix)/bin/moc -f $(>) -o $(<)
+}
+
+# When moccing .cpp files, we don't need -f, otherwise generated code will
+# include .cpp and we'll get duplicated symbols.
+#
+actions moc.cpp
+{
+ $(.prefix)/bin/moc $(>) -o $(<)
+}
+
+
+space = " " ;
+
+# Sometimes it's required to make 'plugins' available during uic invocation. To
+# help with this we add paths to all dependency libraries to uic commane line.
+# The intention is that it's possible to write
+#
+# exe a : ... a.ui ... : <uses>some_plugin ;
+#
+# and have everything work. We'd add quite a bunch of unrelated paths but it
+# won't hurt.
+#
+flags qt3.uic-h LIBRARY_PATH <xdll-path> ;
+actions uic-h
+{
+ $(.prefix)/bin/uic $(>) -o $(<) -L$(space)$(LIBRARY_PATH)
+}
+
+
+flags qt3.uic-cpp LIBRARY_PATH <xdll-path> ;
+# The second target is uic-generated header name. It's placed in build dir, but
+# we want to include it using only basename.
+actions uic-cpp
+{
+ $(.prefix)/bin/uic $(>[1]) -i $(>[2]:D=) -o $(<) -L$(space)$(LIBRARY_PATH)
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/qt4.jam b/src/kenlm/jam-files/boost-build/tools/qt4.jam
new file mode 100644
index 0000000..a3aac61
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/qt4.jam
@@ -0,0 +1,755 @@
+# Copyright 2002-2006 Vladimir Prus
+# Copyright 2005 Alo Sarv
+# Copyright 2005-2009 Juergen Hunold
+#
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Qt4 library support module
+#
+# The module attempts to auto-detect QT installation location from QTDIR
+# environment variable; failing that, installation location can be passed as
+# argument:
+#
+# toolset.using qt4 : /usr/local/Trolltech/Qt-4.0.0 ;
+#
+# The module supports code generation from .ui and .qrc files, as well as
+# running the moc preprocessor on headers. Note that you must list all your
+# moc-able headers in sources.
+#
+# Example:
+#
+# exe myapp : myapp.cpp myapp.h myapp.ui myapp.qrc
+# /qt4//QtGui /qt4//QtNetwork ;
+#
+# It's also possible to run moc on cpp sources:
+#
+# import cast ;
+#
+# exe myapp : myapp.cpp [ cast _ moccable-cpp : myapp.cpp ] /qt4//QtGui ;
+#
+# When moccing source file myapp.cpp you need to include "myapp.moc" from
+# myapp.cpp. When moccing .h files, the output of moc will be automatically
+# compiled and linked in, you don't need any includes.
+#
+# This is consistent with Qt guidelines:
+# http://qt-project.org/doc/qt-4.8/moc.html
+#
+# The .qrc processing utility supports various command line option (see
+# http://qt-project.org/doc/qt-4.8/rcc.html for a complete list). The
+# module provides default arguments for the "output file" and
+# "initialization function name" options. Other options can be set through
+# the <rccflags> build property. E.g. if you wish the compression settings
+# to be more aggressive than the defaults, you can apply them too all .qrc
+# files like this:
+#
+# project my-qt-project :
+# requirements
+# <rccflags>"-compress 9 -threshold 10"
+# ;
+#
+# Of course, this property can also be specified on individual targets.
+
+
+import modules ;
+import feature ;
+import errors ;
+import type ;
+import "class" : new ;
+import generators ;
+import project ;
+import toolset : flags ;
+import os ;
+import virtual-target ;
+import scanner ;
+
+# Qt3Support control feature
+#
+# Qt4 configure defaults to build Qt4 libraries with Qt3Support.
+# The autodetection is missing, so we default to disable Qt3Support.
+# This prevents the user from inadvertedly using a deprecated API.
+#
+# The Qt3Support library can be activated by adding
+# "<qt3support>on" to requirements
+#
+# Use "<qt3support>on:<define>QT3_SUPPORT_WARNINGS"
+# to get warnings about deprecated Qt3 support funtions and classes.
+# Files ported by the "qt3to4" conversion tool contain _tons_ of
+# warnings, so this define is not set as default.
+#
+# Todo: Detect Qt3Support from Qt's configure data.
+# Or add more auto-configuration (like python).
+feature.feature qt3support : off on : propagated link-incompatible ;
+
+# The Qt version used for requirements
+# Valid are <qt>4.4 or <qt>4.5.0
+# Auto-detection via qmake sets '<qt>major.minor.patch'
+feature.feature qt : : propagated ;
+
+# Extra flags for rcc
+feature.feature rccflags : : free ;
+
+project.initialize $(__name__) ;
+project qt ;
+
+# Save the project so that we tolerate 'import + using' combo.
+.project = [ project.current ] ;
+
+# Helper utils for easy debug output
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = TRUE ;
+}
+
+local rule debug-message ( message * )
+{
+ if $(.debug-configuration) = TRUE
+ {
+ ECHO notice: [qt4-cfg] $(message) ;
+ }
+}
+
+# Capture qmake output line by line
+local rule read-output ( content )
+{
+ local lines ;
+ local nl = "
+" ;
+ local << = "([^$(nl)]*)[$(nl)](.*)" ;
+ local line+ = [ MATCH "$(<<)" : "$(content)" ] ;
+ while $(line+)
+ {
+ lines += $(line+[1]) ;
+ line+ = [ MATCH "$(<<)" : "$(line+[2])" ] ;
+ }
+ return $(lines) ;
+}
+
+# Capture Qt version from qmake
+local rule check-version ( bin_prefix )
+{
+ full-cmd = $(bin_prefix)"/qmake -v" ;
+ debug-message Running '$(full-cmd)' ;
+ local output = [ SHELL $(full-cmd) ] ;
+ for line in [ read-output $(output) ]
+ {
+ # Parse the output to get all the results.
+ if [ MATCH "QMake" : $(line) ]
+ {
+ # Skip first line of output
+ }
+ else
+ {
+ temp = [ MATCH "([0-9]*)\\.([0-9]*)\\.([0-9]*)" : $(line) ] ;
+ }
+ }
+ return $(temp) ;
+}
+
+# Validate the version string and extract the major/minor part we care about.
+#
+local rule split-version ( version )
+{
+ local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ;
+ if ! $(major-minor[2]) || $(major-minor[3])
+ {
+ ECHO "Warning: 'using qt' expects a two part (major, minor) version number; got" $(version) instead ;
+
+ # Add a zero to account for the missing digit if necessary.
+ major-minor += 0 ;
+ }
+
+ return $(major-minor[1]) $(major-minor[2]) ;
+}
+
+# Initialize the QT support module.
+# Parameters:
+# - 'prefix' parameter tells where Qt is installed.
+# - 'full_bin' optional full path to Qt binaries (qmake,moc,uic,rcc)
+# - 'full_inc' optional full path to Qt top-level include directory
+# - 'full_lib' optional full path to Qt library directory
+# - 'version' optional version of Qt, else autodetected via 'qmake -v'
+# - 'condition' optional requirements
+rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * )
+{
+ project.push-current $(.project) ;
+
+ debug-message "==== Configuring Qt ... ====" ;
+ for local v in version cmd-or-prefix includes libraries condition
+ {
+ if $($(v))
+ {
+ debug-message " user-specified "$(v): '$($(v))' ;
+ }
+ }
+
+ # Needed as default value
+ .prefix = $(prefix) ;
+
+ # pre-build paths to detect reinitializations changes
+ local inc_prefix lib_prefix bin_prefix ;
+ if $(full_inc)
+ {
+ inc_prefix = $(full_inc) ;
+ }
+ else
+ {
+ inc_prefix = $(prefix)/include ;
+ }
+ if $(full_lib)
+ {
+ lib_prefix = $(full_lib) ;
+ }
+ else
+ {
+ lib_prefix = $(prefix)/lib ;
+ }
+ if $(full_bin)
+ {
+ bin_prefix = $(full_bin) ;
+ }
+ else
+ {
+ bin_prefix = $(prefix)/bin ;
+ }
+
+ # Globally needed variables
+ .incprefix = $(inc_prefix) ;
+ .libprefix = $(lib_prefix) ;
+ .binprefix = $(bin_prefix) ;
+
+ if ! $(.initialized)
+ {
+ # Make sure this is initialised only once
+ .initialized = true ;
+
+ # Generates cpp files from header files using "moc" tool
+ generators.register-standard qt4.moc : H : CPP(moc_%) : <allow>qt4 ;
+
+ # The OBJ result type is a fake, 'H' will be really produced. See
+ # comments on the generator class, defined below the 'init' function.
+ generators.register [ new uic-generator qt4.uic : UI : OBJ :
+ <allow>qt4 ] ;
+
+ # The OBJ result type is a fake here too.
+ generators.register [ new moc-h-generator
+ qt4.moc.inc : MOCCABLE_CPP : OBJ : <allow>qt4 ] ;
+
+ generators.register [ new moc-inc-generator
+ qt4.moc.inc : MOCCABLE_H : OBJ : <allow>qt4 ] ;
+
+ # Generates .cpp files from .qrc files.
+ generators.register-standard qt4.rcc : QRC : CPP(qrc_%) : <allow>qt4 ;
+
+ # dependency scanner for wrapped files.
+ type.set-scanner QRC : qrc-scanner ;
+
+ # Save value of first occuring prefix
+ .PREFIX = $(prefix) ;
+ }
+
+ if $(version)
+ {
+ major-minor = [ split-version $(version) ] ;
+ version = $(major-minor:J=.) ;
+ }
+ else
+ {
+ version = [ check-version $(bin_prefix) ] ;
+ if $(version)
+ {
+ version = $(version:J=.) ;
+ }
+ debug-message Detected version '$(version)' ;
+ }
+
+ local target-requirements = $(condition) ;
+
+ # Add the version, if any, to the target requirements.
+ if $(version)
+ {
+ if ! $(version) in [ feature.values qt ]
+ {
+ feature.extend qt : $(version) ;
+ }
+ target-requirements += <qt>$(version:E=default) ;
+ }
+
+ local target-os = [ feature.get-values target-os : $(condition) ] ;
+ if ! $(target-os)
+ {
+ target-os ?= [ feature.defaults target-os ] ;
+ target-os = $(target-os:G=) ;
+ target-requirements += <target-os>$(target-os) ;
+ }
+
+ # Build exact requirements for the tools
+ local tools-requirements = $(target-requirements:J=/) ;
+
+ debug-message "Details of this Qt configuration:" ;
+ debug-message " prefix: " '$(prefix:E=<empty>)' ;
+ debug-message " binary path: " '$(bin_prefix:E=<empty>)' ;
+ debug-message " include path:" '$(inc_prefix:E=<empty>)' ;
+ debug-message " library path:" '$(lib_prefix:E=<empty>)' ;
+ debug-message " target requirements:" '$(target-requirements)' ;
+ debug-message " tool requirements: " '$(tools-requirements)' ;
+
+ # setup the paths for the tools
+ toolset.flags qt4.moc .BINPREFIX $(tools-requirements) : $(bin_prefix) ;
+ toolset.flags qt4.rcc .BINPREFIX $(tools-requirements) : $(bin_prefix) ;
+ toolset.flags qt4.uic .BINPREFIX $(tools-requirements) : $(bin_prefix) ;
+
+ # TODO: 2009-02-12: Better support for directories
+ # Most likely needed are separate getters for: include,libraries,binaries and sources.
+ toolset.flags qt4.directory .PREFIX $(tools-requirements) : $(prefix) ;
+
+ # Test for a buildable Qt.
+ if [ glob $(.prefix)/Jamroot ]
+ {
+ .bjam-qt = true
+
+ # this will declare QtCore (and qtmain on <target-os>windows)
+ add-shared-library QtCore ;
+ }
+ else
+ # Setup common pre-built Qt.
+ # Special setup for QtCore on which everything depends
+ {
+ local link = [ feature.get-values link : $(condition) ] ;
+
+ local usage-requirements =
+ <include>$(.incprefix)
+ <library-path>$(.libprefix)
+ <threading>multi
+ <allow>qt4 ;
+
+ if $(link) in shared
+ {
+ usage-requirements += <dll-path>$(.libprefix) ;
+ }
+
+ local suffix ;
+
+ # Since Qt-4.2, debug versions on unix have to be built
+ # separately and therefore have no suffix.
+ .suffix_version = "" ;
+ .suffix_debug = "" ;
+
+ # Control flag for auto-configuration of the debug libraries.
+ # This setup requires Qt 'configure -debug-and-release'.
+ # Only available on some platforms.
+ # ToDo: 2009-02-12: Maybe throw this away and
+ # require separate setup with <variant>debug as condition.
+ .have_separate_debug = FALSE ;
+
+ # Setup other platforms
+ if $(target-os) in windows cygwin
+ {
+ .have_separate_debug = TRUE ;
+
+ # On NT, the shared libs have "4" suffix, and "d" suffix in debug builds.
+ if $(link) in shared
+ {
+ .suffix_version = "4" ;
+ }
+ .suffix_debug = "d" ;
+
+ # On Windows we must link against the qtmain library
+ lib qtmain
+ : # sources
+ : # requirements
+ <name>qtmain$(.suffix_debug)
+ <variant>debug
+ $(target-requirements)
+ ;
+
+ lib qtmain
+ : # sources
+ : # requirements
+ <name>qtmain
+ $(target-requirements)
+ ;
+ }
+ else if $(target-os) = darwin
+ {
+ # On MacOS X, both debug and release libraries are available.
+ .suffix_debug = "_debug" ;
+
+ .have_separate_debug = TRUE ;
+
+ alias qtmain ;
+ }
+ else
+ {
+ alias qtmain : : $(target-requirements) ;
+ }
+
+ lib QtCore : qtmain
+ : # requirements
+ <name>QtCore$(.suffix_version)
+ $(target-requirements)
+ : # default-build
+ : # usage-requirements
+ <define>QT_CORE_LIB
+ <define>QT_NO_DEBUG
+ <include>$(.incprefix)/QtCore
+ $(usage-requirements)
+ ;
+
+ if $(.have_separate_debug) = TRUE
+ {
+ debug-message Configure debug libraries with suffix '$(.suffix_debug)' ;
+
+ lib QtCore : $(main)
+ : # requirements
+ <name>QtCore$(.suffix_debug)$(.suffix_version)
+ <variant>debug
+ $(target-requirements)
+ : # default-build
+ : # usage-requirements
+ <define>QT_CORE_LIB
+ <include>$(.incprefix)/QtCore
+ $(usage-requirements)
+ ;
+ }
+ }
+
+ # Initialising the remaining libraries is canonical
+ # parameters 'module' : 'depends-on' : 'usage-define' : 'requirements' : 'include'
+ # 'include' only for non-canonical include paths.
+ add-shared-library QtGui : QtCore : QT_GUI_LIB : $(target-requirements) ;
+ add-shared-library QtNetwork : QtCore : QT_NETWORK_LIB : $(target-requirements) ;
+ add-shared-library QtSql : QtCore : QT_SQL_LIB : $(target-requirements) ;
+ add-shared-library QtXml : QtCore : QT_XML_LIB : $(target-requirements) ;
+
+ add-shared-library Qt3Support : QtGui QtNetwork QtXml QtSql
+ : QT_QT3SUPPORT_LIB QT3_SUPPORT
+ : <qt3support>on $(target-requirements) ;
+
+ # Dummy target to enable "<qt3support>off" and
+ # "<library>/qt//Qt3Support" at the same time. This enables quick
+ # switching from one to the other for test/porting purposes.
+ alias Qt3Support : : <qt3support>off $(target-requirements) ;
+
+ # OpenGl Support
+ add-shared-library QtOpenGL : QtGui : QT_OPENGL_LIB : $(target-requirements) ;
+
+ # SVG-Support (Qt 4.1)
+ add-shared-library QtSvg : QtXml QtOpenGL : QT_SVG_LIB : $(target-requirements) ;
+
+ # Test-Support (Qt 4.1)
+ add-shared-library QtTest : QtCore : : $(target-requirements) ;
+
+ # Qt designer library
+ add-shared-library QtDesigner : QtGui QtXml : : $(target-requirements) ;
+ add-shared-library QtDesignerComponents : QtGui QtXml : : $(target-requirements) ;
+
+ # Support for dynamic Widgets (Qt 4.1)
+ add-static-library QtUiTools : QtGui QtXml : $(target-requirements) ;
+
+ # DBus-Support (Qt 4.2)
+ add-shared-library QtDBus : QtXml : : $(target-requirements) ;
+
+ # Script-Engine (Qt 4.3)
+ add-shared-library QtScript : QtGui QtXml : QT_SCRIPT_LIB : $(target-requirements) ;
+
+ # Tools for the Script-Engine (Qt 4.5)
+ add-shared-library QtScriptTools : QtScript : QT_SCRIPTTOOLS_LIB : $(target-requirements) ;
+
+ # WebKit (Qt 4.4)
+ add-shared-library QtWebKit : QtGui : QT_WEBKIT_LIB : $(target-requirements) ;
+
+ # Phonon Multimedia (Qt 4.4)
+ add-shared-library phonon : QtGui QtXml : QT_PHONON_LIB : $(target-requirements) ;
+
+ # Multimedia engine (Qt 4.6)
+ add-shared-library QtMultimedia : QtGui : QT_MULTIMEDIA_LIB : $(target-requirements) ;
+
+ # XmlPatterns-Engine (Qt 4.4)
+ add-shared-library QtXmlPatterns : QtNetwork : QT_XMLPATTERNS_LIB : $(target-requirements) ;
+
+ # Help-Engine (Qt 4.4)
+ add-shared-library QtHelp : QtGui QtSql QtXml : : $(target-requirements) ;
+ add-shared-library QtCLucene : QCore QtSql QtXml : : $(target-requirements) ;
+
+ # QML-Engine (Qt 4.7)
+ add-shared-library QtDeclarative : QtGui QtXml : : $(target-requirements) ;
+
+ # AssistantClient Support
+ # Compat library removed in 4.7.0
+ # Pre-4.4 help system, use QtHelp for new programs
+ if $(version) < "4.7"
+ {
+ add-shared-library QtAssistantClient : QtGui : : $(target-requirements) : QtAssistant ;
+ }
+ debug-message "==== Configured Qt-$(version) ====" ;
+
+ project.pop-current ;
+}
+
+rule initialized ( )
+{
+ return $(.initialized) ;
+}
+
+
+
+# This custom generator is needed because in QT4, UI files are translated only
+# into H files, and no C++ files are created. Further, the H files need not be
+# passed via MOC. The header is used only via inclusion. If we define a standard
+# UI -> H generator, Boost.Build will run MOC on H, and then compile the
+# resulting cpp. It will give a warning, since output from moc will be empty.
+#
+# This generator is declared with a UI -> OBJ signature, so it gets invoked when
+# linking generator tries to convert sources to OBJ, but it produces target of
+# type H. This is non-standard, but allowed. That header won't be mocced.
+#
+class uic-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(name)
+ {
+ name = [ $(sources[0]).name ] ;
+ name = $(name:B) ;
+ }
+
+ local a = [ new action $(sources[1]) : qt4.uic : $(property-set) ] ;
+
+ # The 'ui_' prefix is to match qmake's default behavior.
+ local target = [ new file-target ui_$(name) : H : $(project) : $(a) ] ;
+
+ local r = [ virtual-target.register $(target) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However, we
+ # need the target to be seen by bjam, so that dependency from sources to
+ # this generated header is detected -- if jam does not know about this
+ # target, it won't do anything.
+ DEPENDS all : [ $(r).actualize ] ;
+
+ return $(r) ;
+ }
+}
+
+
+class moc-h-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_CPP
+ {
+ name = [ $(sources[0]).name ] ;
+ name = $(name:B) ;
+
+ local a = [ new action $(sources[1]) : qt4.moc.inc :
+ $(property-set) ] ;
+
+ local target = [ new file-target $(name) : MOC : $(project) : $(a)
+ ] ;
+
+ local r = [ virtual-target.register $(target) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However,
+ # we need the target to be seen by bjam, so that dependency from
+ # sources to this generated header is detected -- if jam does not
+ # know about this target, it won't do anything.
+ DEPENDS all : [ $(r).actualize ] ;
+
+ return $(r) ;
+ }
+ }
+}
+
+
+class moc-inc-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_H
+ {
+ name = [ $(sources[0]).name ] ;
+ name = $(name:B) ;
+
+ local a = [ new action $(sources[1]) : qt4.moc.inc :
+ $(property-set) ] ;
+
+ local target = [ new file-target moc_$(name) : CPP : $(project) :
+ $(a) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However,
+ # we need the target to be seen by bjam, so that dependency from
+ # sources to this generated header is detected -- if jam does not
+ # know about this target, it won't do anything.
+ DEPENDS all : [ $(target).actualize ] ;
+
+ return [ virtual-target.register $(target) ] ;
+ }
+ }
+}
+
+
+# Query the installation directory. This is needed in at least two scenarios.
+# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt
+# plugins to the Qt-Tree.
+#
+rule directory
+{
+ return $(.PREFIX) ;
+}
+
+# Add a shared Qt library.
+rule add-shared-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? )
+{
+ add-library $(lib-name) : $(.suffix_version) : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ;
+}
+
+# Add a static Qt library.
+rule add-static-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? )
+{
+ add-library $(lib-name) : : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ;
+}
+
+# Add a Qt library.
+# Static libs are unversioned, whereas shared libs have the major number as suffix.
+# Creates both release and debug versions on platforms where both are enabled by Qt configure.
+# Flags:
+# - lib-name Qt library Name
+# - version Qt major number used as shared library suffix (QtCore4.so)
+# - depends-on other Qt libraries
+# - usage-defines those are set by qmake, so set them when using this library
+# - requirements addional requirements
+# - include non-canonical include path. The canonical path is $(.incprefix)/$(lib-name).
+rule add-library ( lib-name : version ? : depends-on * : usage-defines * : requirements * : include ? )
+{
+ if $(.bjam-qt)
+ {
+ # Import Qt module
+ # Eveything will be setup there
+ alias $(lib-name)
+ : $(.prefix)//$(lib-name)
+ :
+ :
+ : <allow>qt4 ;
+ }
+ else
+ {
+ local real_include ;
+ real_include ?= $(include) ;
+ real_include ?= $(lib-name) ;
+
+ lib $(lib-name)
+ : # sources
+ $(depends-on)
+ : # requirements
+ <name>$(lib-name)$(version)
+ $(requirements)
+ : # default-build
+ : # usage-requirements
+ <define>$(usage-defines)
+ <include>$(.incprefix)/$(real_include)
+ ;
+
+ if $(.have_separate_debug) = TRUE
+ {
+ lib $(lib-name)
+ : # sources
+ $(depends-on)
+ : # requirements
+ <name>$(lib-name)$(.suffix_debug)$(version)
+ $(requirements)
+ <variant>debug
+ : # default-build
+ : # usage-requirements
+ <define>$(usage-defines)
+ <include>$(.incprefix)/$(real_include)
+ ;
+ }
+ }
+
+ # Make library explicit so that a simple <use>qt4 will not bring in everything.
+ # And some components like QtDBus/Phonon may not be available on all platforms.
+ explicit $(lib-name) ;
+}
+
+# Use $(.BINPREFIX[-1]) for the paths as several tools-requirements can match.
+# The exact match is the last one.
+
+# Get <include> and <defines> from current toolset.
+flags qt4.moc INCLUDES <include> ;
+flags qt4.moc DEFINES <define> ;
+
+# need a newline for expansion of DEFINES and INCLUDES in the response file.
+.nl = "
+" ;
+
+# Processes headers to create Qt MetaObject information. Qt4-moc has its
+# c++-parser, so pass INCLUDES and DEFINES.
+# We use response file with one INCLUDE/DEFINE per line
+#
+actions moc
+{
+ $(.BINPREFIX[-1])/moc -f $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))"
+}
+
+# When moccing files for include only, we don't need -f, otherwise the generated
+# code will include the .cpp and we'll get duplicated symbols.
+#
+actions moc.inc
+{
+ $(.BINPREFIX[-1])/moc $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))"
+}
+
+
+# Get extra options for RCC
+flags qt4.rcc RCC_OPTIONS <rccflags> ;
+
+# Generates source files from resource files.
+#
+actions rcc
+{
+ $(.BINPREFIX[-1])/rcc $(>) -name $(>:B) $(RCC_OPTIONS) -o $(<)
+}
+
+
+# Generates user-interface source from .ui files.
+#
+actions uic
+{
+ $(.BINPREFIX[-1])/uic $(>) -o $(<)
+}
+
+
+# Scanner for .qrc files. Look for the CDATA section of the <file> tag. Ignore
+# the "alias" attribute. See http://doc.trolltech.com/qt/resources.html for
+# detailed documentation of the Qt Resource System.
+#
+class qrc-scanner : common-scanner
+{
+ rule pattern ( )
+ {
+ return "<file.*>(.*)</file>" ;
+ }
+}
+
+
+# Wrapped files are "included".
+scanner.register qrc-scanner : include ;
diff --git a/src/kenlm/jam-files/boost-build/tools/qt5.jam b/src/kenlm/jam-files/boost-build/tools/qt5.jam
new file mode 100644
index 0000000..a5fdf71
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/qt5.jam
@@ -0,0 +1,728 @@
+# Copyright 2002-2006 Vladimir Prus
+# Copyright 2005 Alo Sarv
+# Copyright 2005-2012 Juergen Hunold
+#
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Qt5 library support module
+#
+# The module attempts to auto-detect QT installation location from QTDIR
+# environment variable; failing that, installation location can be passed as
+# argument:
+#
+# toolset.using qt5 : /usr/local/Trolltech/Qt-5.0.0 ;
+#
+# The module supports code generation from .ui and .qrc files, as well as
+# running the moc preprocessor on headers. Note that you must list all your
+# moc-able headers in sources.
+#
+# Example:
+#
+# exe myapp : myapp.cpp myapp.h myapp.ui myapp.qrc
+# /qt5//QtGui /qt5//QtNetwork ;
+#
+# It's also possible to run moc on cpp sources:
+#
+# import cast ;
+#
+# exe myapp : myapp.cpp [ cast _ moccable-cpp : myapp.cpp ] /qt5//QtGui ;
+#
+# When moccing source file myapp.cpp you need to include "myapp.moc" from
+# myapp.cpp. When moccing .h files, the output of moc will be automatically
+# compiled and linked in, you don't need any includes.
+#
+# This is consistent with Qt guidelines:
+# http://qt-project.org/doc/qt-5.0/moc.html
+
+# The .qrc processing utility supports various command line option (see
+# http://qt-project.org/doc/qt-5.0/rcc.html for a complete list). The
+# module provides default arguments for the "output file" and
+# "initialization function name" options. Other options can be set through
+# the <rccflags> build property. E.g. if you wish the compression settings
+# to be more aggressive than the defaults, you can apply them too all .qrc
+# files like this:
+#
+# project my-qt-project :
+# requirements
+# <rccflags>"-compress 9 -threshold 10"
+# ;
+#
+# Of course, this property can also be specified on individual targets.
+
+
+import modules ;
+import feature ;
+import errors ;
+import type ;
+import "class" : new ;
+import generators ;
+import project ;
+import toolset : flags ;
+import os ;
+import virtual-target ;
+import scanner ;
+
+# The Qt version used for requirements
+# Valid are <qt>5.0 or <qt>5.1.0
+# Auto-detection via qmake sets '<qt>major.minor.patch'
+feature.feature qt5 : : propagated ;
+
+# Extra flags for rcc
+# $TODO: figure out how to declare this only once
+# feature.feature rccflags : : free ;
+
+project.initialize $(__name__) ;
+project qt5 ;
+
+# Save the project so that we tolerate 'import + using' combo.
+.project = [ project.current ] ;
+
+# Helper utils for easy debug output
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = TRUE ;
+}
+
+local rule debug-message ( message * )
+{
+ if $(.debug-configuration) = TRUE
+ {
+ ECHO notice: [qt5-cfg] $(message) ;
+ }
+}
+
+# Capture qmake output line by line
+local rule read-output ( content )
+{
+ local lines ;
+ local nl = "
+" ;
+ local << = "([^$(nl)]*)[$(nl)](.*)" ;
+ local line+ = [ MATCH "$(<<)" : "$(content)" ] ;
+ while $(line+)
+ {
+ lines += $(line+[1]) ;
+ line+ = [ MATCH "$(<<)" : "$(line+[2])" ] ;
+ }
+ return $(lines) ;
+}
+
+# Capture Qt version from qmake
+local rule check-version ( bin_prefix )
+{
+ full-cmd = $(bin_prefix)"/qmake -v" ;
+ debug-message Running '$(full-cmd)' ;
+ local output = [ SHELL $(full-cmd) ] ;
+ for line in [ read-output $(output) ]
+ {
+ # Parse the output to get all the results.
+ if [ MATCH "QMake" : $(line) ]
+ {
+ # Skip first line of output
+ }
+ else
+ {
+ temp = [ MATCH "([0-9]*)\\.([0-9]*)\\.([0-9]*)" : $(line) ] ;
+ }
+ }
+ return $(temp) ;
+}
+
+# Validate the version string and extract the major/minor part we care about.
+#
+local rule split-version ( version )
+{
+ local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ;
+ if ! $(major-minor[2]) || $(major-minor[3])
+ {
+ ECHO "Warning: 'using qt' expects a two part (major, minor) version number; got" $(version) instead ;
+
+ # Add a zero to account for the missing digit if necessary.
+ major-minor += 0 ;
+ }
+
+ return $(major-minor[1]) $(major-minor[2]) ;
+}
+
+# Initialize the QT support module.
+# Parameters:
+# - 'prefix' parameter tells where Qt is installed.
+# - 'version' optional version of Qt, else autodetected via 'qmake -v'
+# - 'condition' optional requirements
+# - 'namespace' optional support for configure -qtnamespace
+# - 'infix' optional support for configure -qtlibinfix
+# - 'full_bin' optional full path to Qt binaries (qmake,moc,uic,rcc)
+# - 'full_inc' optional full path to Qt top-level include directory
+# - 'full_lib' optional full path to Qt library directory
+rule init ( prefix : version ? : condition * : namespace ? : infix ? : full_bin ? : full_inc ? : full_lib ? )
+{
+ project.push-current $(.project) ;
+
+ debug-message "==== Configuring Qt ... ====" ;
+ for local v in version prefix condition namespace infix full_bin full_inc full_lib
+ {
+ if $($(v))
+ {
+ debug-message " user-specified "$(v): '$($(v))' ;
+ }
+ }
+
+ # Needed as default value
+ .prefix = $(prefix) ;
+
+ # pre-build paths to detect reinitializations changes
+ local inc_prefix lib_prefix bin_prefix ;
+ if $(full_inc)
+ {
+ inc_prefix = $(full_inc) ;
+ }
+ else
+ {
+ inc_prefix = $(prefix)/include ;
+ }
+ if $(full_lib)
+ {
+ lib_prefix = $(full_lib) ;
+ }
+ else
+ {
+ lib_prefix = $(prefix)/lib ;
+ }
+ if $(full_bin)
+ {
+ bin_prefix = $(full_bin) ;
+ }
+ else
+ {
+ bin_prefix = $(prefix)/bin ;
+ }
+
+ # Globally needed variables
+ .incprefix = $(inc_prefix) ;
+ .libprefix = $(lib_prefix) ;
+ .binprefix = $(bin_prefix) ;
+
+ if ! $(.initialized)
+ {
+ # Make sure this is initialised only once
+ .initialized = true ;
+
+ # Generates cpp files from header files using "moc" tool
+ generators.register-standard qt5.moc : H : CPP(moc_%) : <allow>qt5 ;
+
+ # The OBJ result type is a fake, 'H' will be really produced. See
+ # comments on the generator class, defined below the 'init' function.
+ generators.register [ new uic-5-generator qt5.uic : UI : OBJ :
+ <allow>qt5 ] ;
+
+ # The OBJ result type is a fake here too.
+ generators.register [ new moc-h-5-generator
+ qt5.moc.inc : MOCCABLE5_CPP : OBJ : <allow>qt5 ] ;
+
+ generators.register [ new moc-inc-5-generator
+ qt5.moc.inc : MOCCABLE5_H : OBJ : <allow>qt5 ] ;
+
+ # Generates .cpp files from .qrc files.
+ generators.register-standard qt5.rcc : QRC : CPP(qrc_%) : <allow>qt5 ;
+
+ # dependency scanner for wrapped files.
+ type.set-scanner QRC : qrc-5-scanner ;
+
+ # Save value of first occuring prefix
+ .PREFIX = $(prefix) ;
+ }
+
+ if $(version)
+ {
+ major-minor = [ split-version $(version) ] ;
+ version = $(major-minor:J=.) ;
+ }
+ else
+ {
+ version = [ check-version $(bin_prefix) ] ;
+ if $(version)
+ {
+ version = $(version:J=.) ;
+ }
+ debug-message Detected version '$(version)' ;
+ }
+
+ local target-requirements = $(condition) ;
+
+ # Add the version, if any, to the target requirements.
+ if $(version)
+ {
+ if ! $(version) in [ feature.values qt5 ]
+ {
+ feature.extend qt5 : $(version) ;
+ }
+ target-requirements += <qt5>$(version:E=default) ;
+ }
+
+ local target-os = [ feature.get-values target-os : $(condition) ] ;
+ if ! $(target-os)
+ {
+ target-os ?= [ feature.defaults target-os ] ;
+ target-os = $(target-os:G=) ;
+ target-requirements += <target-os>$(target-os) ;
+ }
+
+ # Build exact requirements for the tools
+ local tools-requirements = $(target-requirements:J=/) ;
+
+ debug-message "Details of this Qt configuration:" ;
+ debug-message " prefix: " '$(prefix:E=<empty>)' ;
+ debug-message " binary path: " '$(bin_prefix:E=<empty>)' ;
+ debug-message " include path:" '$(inc_prefix:E=<empty>)' ;
+ debug-message " library path:" '$(lib_prefix:E=<empty>)' ;
+ debug-message " target requirements:" '$(target-requirements)' ;
+ debug-message " tool requirements: " '$(tools-requirements)' ;
+
+ # setup the paths for the tools
+ toolset.flags qt5.moc .BINPREFIX $(tools-requirements) : $(bin_prefix) ;
+ toolset.flags qt5.rcc .BINPREFIX $(tools-requirements) : $(bin_prefix) ;
+ toolset.flags qt5.uic .BINPREFIX $(tools-requirements) : $(bin_prefix) ;
+
+ # TODO: 2009-02-12: Better support for directories
+ # Most likely needed are separate getters for: include,libraries,binaries and sources.
+ toolset.flags qt5.directory .PREFIX $(tools-requirements) : $(prefix) ;
+
+ # Test for a buildable Qt.
+ if [ glob $(.prefix)/Jamroot ]
+ {
+ .bjam-qt = true
+
+ # this will declare QtCore (and qtmain on <target-os>windows)
+ add-shared-library QtCore ;
+ }
+ else
+ # Setup common pre-built Qt.
+ # Special setup for QtCore on which everything depends
+ {
+ local link = [ feature.get-values link : $(condition) ] ;
+
+ local usage-requirements =
+ <include>$(.incprefix)
+ <library-path>$(.libprefix)
+ <threading>multi
+ <allow>qt5 ;
+
+ if $(link) in shared
+ {
+ usage-requirements += <dll-path>$(.libprefix) ;
+ }
+
+ local suffix ;
+
+ # debug versions on unix have to be built
+ # separately and therefore have no suffix.
+ .infix_version = "" ;
+ .suffix_debug = "" ;
+
+ # Control flag for auto-configuration of the debug libraries.
+ # This setup requires Qt 'configure -debug-and-release'.
+ # Only available on some platforms.
+ # ToDo: 2009-02-12: Maybe throw this away and
+ # require separate setup with <variant>debug as condition.
+ .have_separate_debug = FALSE ;
+
+ # Setup other platforms
+ if $(target-os) in windows cygwin
+ {
+ .have_separate_debug = TRUE ;
+
+ # On NT, the libs have "d" suffix in debug builds.
+ .suffix_debug = "d" ;
+
+ .infix_version = "5" ;
+
+ # On Windows we must link against the qtmain library
+ lib qtmain
+ : # sources
+ : # requirements
+ <name>qtmain$(.suffix_debug)
+ <variant>debug
+ $(target-requirements)
+ ;
+
+ lib qtmain
+ : # sources
+ : # requirements
+ <name>qtmain
+ $(target-requirements)
+ ;
+ }
+ else if $(target-os) = darwin
+ {
+ # On MacOS X, both debug and release libraries are available.
+ .suffix_debug = "_debug" ;
+
+ .have_separate_debug = TRUE ;
+
+ alias qtmain ;
+ }
+ else
+ {
+ alias qtmain : : $(target-requirements) ;
+ .infix_version = "5" ;
+ }
+
+ lib QtCore : qtmain
+ : # requirements
+ <name>Qt$(.infix_version)Core
+ $(target-requirements)
+ : # default-build
+ : # usage-requirements
+ <define>QT_CORE_LIB
+ <define>QT_NO_DEBUG
+ <include>$(.incprefix)/QtCore
+ $(usage-requirements)
+ ;
+
+ if $(.have_separate_debug) = TRUE
+ {
+ debug-message Configure debug libraries with suffix '$(.suffix_debug)' ;
+
+ lib QtCore : $(main)
+ : # requirements
+ <name>Qt$(.infix_version)Core$(.suffix_debug)
+ <variant>debug
+ $(target-requirements)
+ : # default-build
+ : # usage-requirements
+ <define>QT_CORE_LIB
+ <include>$(.incprefix)/QtCore
+ $(usage-requirements)
+ ;
+ }
+ }
+
+ # Initialising the remaining libraries is canonical
+ # parameters 'module' : 'depends-on' : 'usage-define' : 'requirements' : 'include'
+ # 'include' only for non-canonical include paths.
+ add-shared-library QtGui : QtCore : QT_GUI_LIB : $(target-requirements) ;
+ add-shared-library QtWidgets : QtGui : QT_WIDGETS_LIB : $(target-requirements) ;
+ add-shared-library QtNetwork : QtCore : QT_NETWORK_LIB : $(target-requirements) ;
+ add-shared-library QtSql : QtCore : QT_SQL_LIB : $(target-requirements) ;
+ add-shared-library QtXml : QtCore : QT_XML_LIB : $(target-requirements) ;
+ add-shared-library QtPrintSupport : QtGui : QT_PRINTSUPPORT_LIB : $(target-requirements) ;
+ add-shared-library QtConcurrent : QtCore : QT_CONCURRENT_LIB : $(target-requirements) ;
+
+ add-shared-library QtOpenGL : QtGui : QT_OPENGL_LIB : $(target-requirements) ;
+ add-shared-library QtSvg : QtXml QtOpenGL : QT_SVG_LIB : $(target-requirements) ;
+
+ add-shared-library QtTest : QtCore : : $(target-requirements) ;
+
+ # Qt designer library et. al.
+ add-shared-library QtDesigner : QtGui QtXml : : $(target-requirements) ;
+ add-shared-library QtDesignerComponents : QtGui QtXml : : $(target-requirements) ;
+ add-static-library QtUiTools : QtGui QtXml : $(target-requirements) ;
+
+ # DBus-Support
+ add-shared-library QtDBus : QtXml : : $(target-requirements) ;
+
+ # Script-Engine and Tools
+ add-shared-library QtScript : QtGui QtXml : QT_SCRIPT_LIB : $(target-requirements) ;
+ add-shared-library QtScriptTools : QtScript : QT_SCRIPTTOOLS_LIB : $(target-requirements) ;
+
+ # WebKit
+ add-shared-library QtWebKit : QtGui : QT_WEBKIT_LIB : $(target-requirements) ;
+ add-shared-library QtWebKitWidgets : QtGui : QT_WEBKITWIDGETS_LIB : $(target-requirements) ;
+
+ # Multimedia engine
+ add-shared-library QtMultimedia : QtGui : QT_MULTIMEDIA_LIB : $(target-requirements) ;
+ add-shared-library QtMultimediaWidgets : QtMultimedia : QT_MULTIMEDIAWIDGETS_LIB : $(target-requirements) ;
+
+ #
+ add-shared-library QtXmlPatterns : QtNetwork : QT_XMLPATTERNS_LIB : $(target-requirements) ;
+
+ # Help-Engine
+ add-shared-library QtHelp : QtGui QtSql QtXml : : $(target-requirements) ;
+ add-shared-library QtCLucene : QCore QtSql QtXml : : $(target-requirements) ;
+
+ # QtQuick
+ add-shared-library QtQml : QtCore QtNetwork QtGui : QT_QML_LIB : $(target-requirements) ;
+ add-shared-library QtQuick : QtQml : QT_QUICK_LIB : $(target-requirements) ;
+ add-shared-library QtQuickParticles : QtQml : : $(target-requirements) ;
+ add-shared-library QtQuickTest : QtQml : : $(target-requirements) ;
+
+ # Regular expression support
+ add-shared-library QtV8 : QtCore : : $(target-requirements) ;
+
+ # QML-Engine version1
+ add-shared-library QtDeclarative : QtXml : : $(target-requirements) ;
+
+ debug-message "==== Configured Qt-$(version) ====" ;
+
+ project.pop-current ;
+}
+
+rule initialized ( )
+{
+ return $(.initialized) ;
+}
+
+
+
+# This custom generator is needed because in QT5, UI files are translated only
+# into H files, and no C++ files are created. Further, the H files need not be
+# passed via MOC. The header is used only via inclusion. If we define a standard
+# UI -> H generator, Boost.Build will run MOC on H, and then compile the
+# resulting cpp. It will give a warning, since output from moc will be empty.
+#
+# This generator is declared with a UI -> OBJ signature, so it gets invoked when
+# linking generator tries to convert sources to OBJ, but it produces target of
+# type H. This is non-standard, but allowed. That header won't be mocced.
+#
+class uic-5-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(name)
+ {
+ name = [ $(sources[0]).name ] ;
+ name = $(name:B) ;
+ }
+
+ local a = [ new action $(sources[1]) : qt5.uic : $(property-set) ] ;
+
+ # The 'ui_' prefix is to match qmake's default behavior.
+ local target = [ new file-target ui_$(name) : H : $(project) : $(a) ] ;
+
+ local r = [ virtual-target.register $(target) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However, we
+ # need the target to be seen by bjam, so that dependency from sources to
+ # this generated header is detected -- if jam does not know about this
+ # target, it won't do anything.
+ DEPENDS all : [ $(r).actualize ] ;
+
+ return $(r) ;
+ }
+}
+
+
+class moc-h-5-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE5_CPP
+ {
+ name = [ $(sources[0]).name ] ;
+ name = $(name:B) ;
+
+ local a = [ new action $(sources[1]) : qt5.moc.inc :
+ $(property-set) ] ;
+
+ local target = [ new file-target $(name) : MOC : $(project) : $(a)
+ ] ;
+
+ local r = [ virtual-target.register $(target) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However,
+ # we need the target to be seen by bjam, so that dependency from
+ # sources to this generated header is detected -- if jam does not
+ # know about this target, it won't do anything.
+ DEPENDS all : [ $(r).actualize ] ;
+
+ return $(r) ;
+ }
+ }
+}
+
+
+class moc-inc-5-generator : generator
+{
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE5_H
+ {
+ name = [ $(sources[0]).name ] ;
+ name = $(name:B) ;
+
+ local a = [ new action $(sources[1]) : qt5.moc.inc :
+ $(property-set) ] ;
+
+ local target = [ new file-target moc_$(name) : CPP : $(project) :
+ $(a) ] ;
+
+ # Since this generator will return a H target, the linking generator
+ # won't use it at all, and won't set any dependency on it. However,
+ # we need the target to be seen by bjam, so that dependency from
+ # sources to this generated header is detected -- if jam does not
+ # know about this target, it won't do anything.
+ DEPENDS all : [ $(target).actualize ] ;
+
+ return [ virtual-target.register $(target) ] ;
+ }
+ }
+}
+
+
+# Query the installation directory. This is needed in at least two scenarios.
+# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt
+# plugins to the Qt-Tree.
+#
+rule directory
+{
+ return $(.PREFIX) ;
+}
+
+# Add a shared Qt library.
+rule add-shared-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? )
+{
+ add-library $(lib-name) : $(.infix_version) : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ;
+}
+
+# Add a static Qt library.
+rule add-static-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? )
+{
+ add-library $(lib-name) : $(.infix_version) : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ;
+}
+
+# Add a Qt library.
+# Static libs are unversioned, whereas shared libs have the major number as suffix.
+# Creates both release and debug versions on platforms where both are enabled by Qt configure.
+# Flags:
+# - lib-name Qt library Name
+# - version Qt major number used as shared library suffix (QtCore5.so)
+# - depends-on other Qt libraries
+# - usage-defines those are set by qmake, so set them when using this library
+# - requirements addional requirements
+# - include non-canonical include path. The canonical path is $(.incprefix)/$(lib-name).
+rule add-library ( lib-name : version ? : depends-on * : usage-defines * : requirements * : include ? )
+{
+ if $(.bjam-qt)
+ {
+ # Import Qt module
+ # Eveything will be setup there
+ alias $(lib-name)
+ : $(.prefix)//$(lib-name)
+ :
+ :
+ : <allow>qt5 ;
+ }
+ else
+ {
+ local real_include ;
+ real_include ?= $(include) ;
+ real_include ?= $(lib-name) ;
+
+ local real_name = [ MATCH ^Qt(.*) : $(lib-name) ] ;
+
+ lib $(lib-name)
+ : # sources
+ $(depends-on)
+ : # requirements
+ <name>Qt$(version)$(real_name)
+ $(requirements)
+ : # default-build
+ : # usage-requirements
+ <define>$(usage-defines)
+ <include>$(.incprefix)/$(real_include)
+ ;
+
+ if $(.have_separate_debug) = TRUE
+ {
+ lib $(lib-name)
+ : # sources
+ $(depends-on)
+ : # requirements
+ <name>Qt$(version)$(real_name)$(.suffix_debug)
+ $(requirements)
+ <variant>debug
+ : # default-build
+ : # usage-requirements
+ <define>$(usage-defines)
+ <include>$(.incprefix)/$(real_include)
+ ;
+ }
+ }
+
+ # Make library explicit so that a simple <use>qt5 will not bring in everything.
+ # And some components like QtDBus/Phonon may not be available on all platforms.
+ explicit $(lib-name) ;
+}
+
+# Use $(.BINPREFIX[-1]) for the paths as several tools-requirements can match.
+# The exact match is the last one.
+
+# Get <include> and <defines> from current toolset.
+flags qt5.moc INCLUDES <include> ;
+flags qt5.moc DEFINES <define> ;
+
+# need a newline for expansion of DEFINES and INCLUDES in the response file.
+.nl = "
+" ;
+
+# Processes headers to create Qt MetaObject information. Qt5-moc has its
+# c++-parser, so pass INCLUDES and DEFINES.
+# We use response file with one INCLUDE/DEFINE per line
+#
+actions moc
+{
+ $(.BINPREFIX[-1])/moc -f $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))"
+}
+
+# When moccing files for include only, we don't need -f, otherwise the generated
+# code will include the .cpp and we'll get duplicated symbols.
+#
+actions moc.inc
+{
+ $(.BINPREFIX[-1])/moc $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))"
+}
+
+
+# Get extra options for RCC
+flags qt5.rcc RCC_OPTIONS <rccflags> ;
+
+# Generates source files from resource files.
+#
+actions rcc
+{
+ $(.BINPREFIX[-1])/rcc $(>) -name $(>:B) $(RCC_OPTIONS) -o $(<)
+}
+
+
+# Generates user-interface source from .ui files.
+#
+actions uic
+{
+ $(.BINPREFIX[-1])/uic $(>) -o $(<)
+}
+
+
+# Scanner for .qrc files. Look for the CDATA section of the <file> tag. Ignore
+# the "alias" attribute. See http://doc.trolltech.com/qt/resources.html for
+# detailed documentation of the Qt Resource System.
+#
+class qrc-5-scanner : common-scanner
+{
+ rule pattern ( )
+ {
+ return "<file.*>(.*)</file>" ;
+ }
+}
+
+
+# Wrapped files are "included".
+scanner.register qrc-5-scanner : include ;
diff --git a/src/kenlm/jam-files/boost-build/tools/quickbook-config.jam b/src/kenlm/jam-files/boost-build/tools/quickbook-config.jam
new file mode 100644
index 0000000..e983a78
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/quickbook-config.jam
@@ -0,0 +1,44 @@
+#~ Copyright 2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for BoostBook tools. To use, just import this module.
+
+import os ;
+import toolset : using ;
+
+if [ os.name ] = NT
+{
+ local boost-dir = ;
+ for local R in snapshot cvs 1.33.0
+ {
+ boost-dir += [ W32_GETREG
+ "HKEY_LOCAL_MACHINE\\SOFTWARE\\Boost.org\\$(R)"
+ : "InstallRoot" ] ;
+ }
+ local quickbook-path = [ GLOB "$(boost-dir)\\bin" "\\Boost\\bin" : quickbook.exe ] ;
+ quickbook-path = $(quickbook-path[1]) ;
+
+ if $(quickbook-path)
+ {
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO "notice:" using quickbook ":" $(quickbook-path) ;
+ }
+ using quickbook : $(quickbook-path) ;
+ }
+}
+else
+{
+ local quickbook-path = [ GLOB "/usr/local/bin" "/usr/bin" "/opt/bin" : quickbook ] ;
+ quickbook-path = $(quickbook-path[1]) ;
+
+ if $(quickbook-path)
+ {
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO "notice:" using quickbook ":" $(quickbook-path) ;
+ }
+ using quickbook : $(quickbook-path) ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/quickbook.jam b/src/kenlm/jam-files/boost-build/tools/quickbook.jam
new file mode 100644
index 0000000..6de2d42
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/quickbook.jam
@@ -0,0 +1,361 @@
+#
+# Copyright (c) 2005 João Abecasis
+# Copyright (c) 2005 Vladimir Prus
+# Copyright (c) 2006 Rene Rivera
+#
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+#
+
+# This toolset defines a generator to translate QuickBook to BoostBook. It can
+# be used to generate nice (!) user documentation in different formats
+# (pdf/html/...), from a single text file with simple markup.
+#
+# The toolset defines the QUICKBOOK type (file extension 'qbk') and
+# a QUICKBOOK to XML (BOOSTBOOK) generator.
+#
+#
+# ===========================================================================
+# Q & A
+# ===========================================================================
+#
+# If you don't know what this is all about, some Q & A will hopefully get you
+# up to speed with QuickBook and this toolset.
+#
+#
+# What is QuickBook ?
+#
+# QuickBook is a WikiWiki style documentation tool geared towards C++
+# documentation using simple rules and markup for simple formatting tasks.
+# QuickBook extends the WikiWiki concept. Like the WikiWiki, QuickBook
+# documents are simple text files. A single QuickBook document can
+# generate a fully linked set of nice HTML and PostScript/PDF documents
+# complete with images and syntax-colorized source code.
+#
+#
+# Where can I get QuickBook ?
+#
+# Quickbook can be found in Boost's repository, under the tools/quickbook
+# directory it was added there on Jan 2005, some time after the release of
+# Boost v1.32.0 and has been an integral part of the Boost distribution
+# since v1.33.
+#
+# Here's a link to the SVN repository:
+# https://svn.boost.org/svn/boost/trunk/tools/quickbook
+#
+# And to QuickBook's QuickBook-generated docs:
+# http://www.boost.org/doc/libs/release/tools/quickbook/index.html
+#
+#
+# How do I use QuickBook and this toolset in my projects ?
+#
+# The minimal example is:
+#
+# using boostbook ;
+# import quickbook ;
+#
+# boostbook my_docs : my_docs_source.qbk ;
+#
+# where my_docs is a target name and my_docs_source.qbk is a QuickBook
+# file. The documentation format to be generated is determined by the
+# boostbook toolset. By default html documentation should be generated,
+# but you should check BoostBook's docs to be sure.
+#
+#
+# What do I need ?
+#
+# You should start by setting up the BoostBook toolset. Please refer to
+# boostbook.jam and the BoostBook documentation for information on how to
+# do this.
+#
+# A QuickBook executable is also needed. The toolset will generate this
+# executable if it can find the QuickBook sources. The following
+# directories will be searched:
+#
+# BOOST_ROOT/tools/quickbook/
+# BOOST_BUILD_PATH/../../quickbook/
+#
+# (BOOST_ROOT and BOOST_BUILD_PATH are environment variables)
+#
+# If QuickBook sources are not found the toolset will then try to use
+# the shell command 'quickbook'.
+#
+#
+# How do I provide a custom QuickBook executable ?
+#
+# You may put the following in your user-config.jam or site-config.jam:
+#
+# using quickbook : /path/to/quickbook ;
+#
+# or, if 'quickbook' can be found in your PATH,
+#
+# using quickbook : quickbook ;
+#
+#
+# For convenience three alternatives are tried to get a QuickBook executable:
+#
+# 1. If the user points us to the a QuickBook executable, that is used.
+#
+# 2. Otherwise, we search for the QuickBook sources and compile QuickBook
+# using the default toolset.
+#
+# 3. As a last resort, we rely on the shell for finding 'quickbook'.
+#
+
+import boostbook ;
+import "class" : new ;
+import feature ;
+import generators ;
+import toolset ;
+import type ;
+import scanner ;
+import project ;
+import targets ;
+import build-system ;
+import path ;
+import common ;
+import errors ;
+
+# The one and only QUICKBOOK type!
+type.register QUICKBOOK : qbk ;
+
+# <quickbook-binary> shell command to run QuickBook
+# <quickbook-binary-dependencies> targets to build QuickBook from sources.
+feature.feature <quickbook-binary> : : free ;
+feature.feature <quickbook-binary-dependencies> : : free dependency ;
+feature.feature <quickbook-define> : : free ;
+feature.feature <quickbook-indent> : : free ;
+feature.feature <quickbook-line-width> : : free ;
+
+
+# quickbook-binary-generator handles generation of the QuickBook executable, by
+# marking it as a dependency for QuickBook docs.
+#
+# If the user supplied the QuickBook command that will be used.
+#
+# Otherwise we search some sensible places for the QuickBook sources and compile
+# from scratch using the default toolset.
+#
+# As a last resort we rely on the shell to find 'quickbook'.
+#
+class quickbook-binary-generator : generator
+{
+ import modules path targets quickbook ;
+
+ rule run ( project name ? : property-set : sources * : multiple ? )
+ {
+ quickbook.freeze-config ;
+ # QuickBook invocation command and dependencies.
+ local quickbook-binary = [ modules.peek quickbook : .quickbook-binary ] ;
+ local quickbook-binary-dependencies ;
+
+ if ! $(quickbook-binary)
+ {
+ # If the QuickBook source directory was found, mark its main target
+ # as a dependency for the current project. Otherwise, try to find
+ # 'quickbook' in user's PATH
+ local quickbook-dir = [ modules.peek quickbook : .quickbook-dir ] ;
+ if $(quickbook-dir)
+ {
+ # Get the main-target in QuickBook directory.
+ local quickbook-main-target = [ targets.resolve-reference $(quickbook-dir) : $(project) ] ;
+
+ # The first element are actual targets, the second are
+ # properties found in target-id. We do not care about these
+ # since we have passed the id ourselves.
+ quickbook-main-target =
+ [ $(quickbook-main-target[1]).main-target quickbook ] ;
+
+ quickbook-binary-dependencies =
+ [ $(quickbook-main-target).generate [ $(property-set).propagated ] ] ;
+
+ # Ignore usage-requirements returned as first element.
+ quickbook-binary-dependencies = $(quickbook-binary-dependencies[2-]) ;
+
+ # Some toolsets generate extra targets (e.g. RSP). We must mark
+ # all targets as dependencies for the project, but we will only
+ # use the EXE target for quickbook-to-boostbook translation.
+ for local target in $(quickbook-binary-dependencies)
+ {
+ if [ $(target).type ] = EXE
+ {
+ quickbook-binary =
+ [ path.native
+ [ path.join
+ [ $(target).path ]
+ [ $(target).name ]
+ ]
+ ] ;
+ }
+ }
+ }
+ }
+
+ # Add $(quickbook-binary-dependencies) as a dependency of the current
+ # project and set it as the <quickbook-binary> feature for the
+ # quickbook-to-boostbook rule, below.
+ property-set = [ $(property-set).add-raw
+ <dependency>$(quickbook-binary-dependencies)
+ <quickbook-binary>$(quickbook-binary)
+ <quickbook-binary-dependencies>$(quickbook-binary-dependencies)
+ ] ;
+
+ return [ generator.run $(project) $(name) : $(property-set) : $(sources) : $(multiple) ] ;
+ }
+}
+
+
+# Define a scanner for tracking QBK include dependencies.
+#
+class qbk-scanner : common-scanner
+{
+ rule pattern ( )
+ {
+ return "\\[[ ]*include[ ]+([^]]+)\\]"
+ "\\[[ ]*include:[a-zA-Z0-9_]+[ ]+([^]]+)\\]"
+ "\\[[ ]*import[ ]+([^]]+)\\]" ;
+ }
+}
+
+
+scanner.register qbk-scanner : include ;
+
+type.set-scanner QUICKBOOK : qbk-scanner ;
+
+
+# Initialization of toolset.
+#
+# Parameters:
+# command ? -> path to QuickBook executable.
+#
+# When command is not supplied toolset will search for QuickBook directory and
+# compile the executable from source. If that fails we still search the path for
+# 'quickbook'.
+#
+rule init (
+ command ? # path to the QuickBook executable.
+ )
+{
+ if $(command)
+ {
+ if $(.config-frozen)
+ {
+ errors.user-error "quickbook: configuration cannot be changed after it has been used." ;
+ }
+ .command = $(command) ;
+ }
+}
+
+rule freeze-config ( )
+{
+ if ! $(.config-frozen)
+ {
+ .config-frozen = true ;
+
+ # QuickBook invocation command and dependencies.
+
+ .quickbook-binary = $(.command) ;
+
+ if $(.quickbook-binary)
+ {
+ # Use user-supplied command.
+ .quickbook-binary = [ common.get-invocation-command quickbook : quickbook : $(.quickbook-binary) ] ;
+ }
+ else
+ {
+ # Search for QuickBook sources in sensible places, like
+ # $(BOOST_ROOT)/tools/quickbook
+ # $(BOOST_BUILD_PATH)/../../quickbook
+
+ # And build quickbook executable from sources.
+
+ local boost-root = [ modules.peek : BOOST_ROOT ] ;
+ local boost-build-path = [ build-system.location ] ;
+
+ if $(boost-root)
+ {
+ .quickbook-dir += [ path.join $(boost-root) tools ] ;
+ }
+
+ if $(boost-build-path)
+ {
+ .quickbook-dir += $(boost-build-path)/../.. ;
+ }
+
+ .quickbook-dir = [ path.glob $(.quickbook-dir) : quickbook ] ;
+
+ # If the QuickBook source directory was found, mark its main target
+ # as a dependency for the current project. Otherwise, try to find
+ # 'quickbook' in user's PATH
+ if $(.quickbook-dir)
+ {
+ .quickbook-dir = [ path.make $(.quickbook-dir[1]) ] ;
+ }
+ else
+ {
+ ECHO "QuickBook warning: The path to the quickbook executable was" ;
+ ECHO " not provided. Additionally, couldn't find QuickBook" ;
+ ECHO " sources searching in" ;
+ ECHO " * BOOST_ROOT/tools/quickbook" ;
+ ECHO " * BOOST_BUILD_PATH/../../quickbook" ;
+ ECHO " Will now try to find a precompiled executable by searching" ;
+ ECHO " the PATH for 'quickbook'." ;
+ ECHO " To disable this warning in the future, or to completely" ;
+ ECHO " avoid compilation of quickbook, you can explicitly set the" ;
+ ECHO " path to a quickbook executable command in user-config.jam" ;
+ ECHO " or site-config.jam with the call" ;
+ ECHO " using quickbook : /path/to/quickbook ;" ;
+
+ # As a last resort, search for 'quickbook' command in path. Note
+ # that even if the 'quickbook' command is not found,
+ # get-invocation-command will still return 'quickbook' and might
+ # generate an error while generating the virtual-target.
+
+ .quickbook-binary = [ common.get-invocation-command quickbook : quickbook ] ;
+ }
+ }
+ }
+}
+
+
+generators.register [ new quickbook-binary-generator quickbook.quickbook-to-boostbook : QUICKBOOK : XML ] ;
+
+
+# <quickbook-binary> shell command to run QuickBook
+# <quickbook-binary-dependencies> targets to build QuickBook from sources.
+toolset.flags quickbook.quickbook-to-boostbook QB-COMMAND <quickbook-binary> ;
+toolset.flags quickbook.quickbook-to-boostbook QB-DEPENDENCIES <quickbook-binary-dependencies> ;
+toolset.flags quickbook.quickbook-to-boostbook INCLUDES <include> ;
+toolset.flags quickbook.quickbook-to-boostbook QB-DEFINES <quickbook-define> ;
+toolset.flags quickbook.quickbook-to-boostbook QB-INDENT <quickbook-indent> ;
+toolset.flags quickbook.quickbook-to-boostbook QB-LINE-WIDTH <quickbook-line-width> ;
+
+
+rule quickbook-to-boostbook ( target : source : properties * )
+{
+ # Signal dependency of quickbook sources on <quickbook-binary-dependencies>
+ # upon invocation of quickbook-to-boostbook.
+ DEPENDS $(target) : [ on $(target) return $(QB-DEPENDENCIES) ] ;
+}
+
+
+actions quickbook-to-boostbook
+{
+ "$(QB-COMMAND)" -I"$(INCLUDES)" -D"$(QB-DEFINES)" --indent="$(QB-INDENT)" --linewidth="$(QB-LINE-WIDTH)" --output-file="$(1)" "$(2)"
+}
+
+
+# Declare a main target to convert a quickbook source into a boostbook XML file.
+#
+rule to-boostbook ( target-name : sources * : requirements * : default-build * )
+{
+ local project = [ project.current ] ;
+
+ targets.main-target-alternative
+ [ new typed-target $(target-name) : $(project) : XML
+ : [ targets.main-target-sources $(sources) : $(target-name) ]
+ : [ targets.main-target-requirements $(requirements) : $(project) ]
+ : [ targets.main-target-default-build $(default-build) : $(project) ]
+ ] ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/rc.jam b/src/kenlm/jam-files/boost-build/tools/rc.jam
new file mode 100644
index 0000000..de4071f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/rc.jam
@@ -0,0 +1,155 @@
+# Copyright (C) Andre Hentz 2003. Permission to copy, use, modify, sell and
+# distribute this software is granted provided this copyright notice appears in
+# all copies. This software is provided "as is" without express or implied
+# warranty, and with no claim as to its suitability for any purpose.
+#
+# Copyright (c) 2006 Rene Rivera.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import generators ;
+import feature ;
+import scanner ;
+import toolset : flags ;
+import type ;
+
+if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ]
+{
+ .debug-configuration = true ;
+}
+
+type.register RC : rc ;
+
+rule init ( )
+{
+}
+
+# Configures a new resource compilation command specific to a condition,
+# usually a toolset selection condition. The possible options are:
+#
+# * <rc-type>(rc|windres) - Indicates the type of options the command
+# accepts.
+#
+# Even though the arguments are all optional, only when a command, condition,
+# and at minimum the rc-type option are given will the command be configured.
+# This is so that callers don't have to check auto-configuration values before
+# calling this. And still get the functionality of build failures when the
+# resource compiler can not be found.
+#
+rule configure ( command ? : condition ? : options * )
+{
+ local rc-type = [ feature.get-values <rc-type> : $(options) ] ;
+
+ if $(command) && $(condition) && $(rc-type)
+ {
+ flags rc.compile.resource .RC $(condition) : $(command) ;
+ flags rc.compile.resource .RC_TYPE $(condition) : $(rc-type:L) ;
+ flags rc.compile.resource DEFINES <define> ;
+ flags rc.compile.resource INCLUDES <include> ;
+ if $(.debug-configuration)
+ {
+ ECHO notice: using rc compiler :: $(condition) :: $(command) ;
+ }
+ }
+}
+
+rule compile.resource ( target : sources * : properties * )
+{
+ local rc-type = [ on $(target) return $(.RC_TYPE) ] ;
+ rc-type ?= null ;
+ compile.resource.$(rc-type) $(target) : $(sources[1]) ;
+}
+
+actions compile.resource.rc
+{
+ "$(.RC)" -l 0x409 "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -fo "$(<)" "$(>)"
+}
+
+actions compile.resource.windres
+{
+ "$(.RC)" "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -o "$(<)" -i "$(>)"
+}
+
+actions quietly compile.resource.null
+{
+ as /dev/null -o "$(<)"
+}
+
+# Since it is common practice to write
+# exe hello : hello.cpp hello.rc
+# we change the name of object created from RC file, to avoid conflict with
+# hello.cpp. The reason we generate OBJ and not RES, is that gcc does not seem
+# to like RES files, but works OK with OBJ (see
+# http://article.gmane.org/gmane.comp.lib.boost.build/5643).
+#
+# Using 'register-c-compiler' adds the build directory to INCLUDES
+generators.register-c-compiler rc.compile.resource : RC : OBJ(%_res) ;
+
+# Register scanner for resources
+class res-scanner : scanner
+{
+ import path ;
+ import regex ;
+ import scanner ;
+ import virtual-target ;
+
+ rule __init__ ( includes * )
+ {
+ scanner.__init__ ;
+ self.includes = $(includes) ;
+ }
+
+ rule pattern ( )
+ {
+ return "(([^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)[ ]+([^ \"]+|\"[^\"]+\"))|(#include[ ]*(<[^<]+>|\"[^\"]+\")))" ;
+ }
+
+ rule process ( target : matches * : binding )
+ {
+ local angle = [ regex.transform $(matches) : "#include[ ]*<([^<]+)>" ] ;
+ local quoted = [ regex.transform $(matches) : "#include[ ]*\"([^\"]+)\"" ] ;
+ local res = [ regex.transform $(matches) : "[^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)[ ]+(([^ \"]+)|\"([^\"]+)\")" : 3 4 ] ;
+
+ # Icons and other includes may be referenced as
+ #
+ # IDR_MAINFRAME ICON "res\\icon.ico"
+ #
+ # so we have to replace double backslashes with single ones.
+ res = [ regex.replace-list $(res) : "\\\\\\\\" : "/" ] ;
+
+ # CONSIDER: the new scoping rules seem to defeat "on target" variables.
+ local g = [ on $(target) return $(HDRGRIST) ] ;
+ local b = [ NORMALIZE_PATH $(binding:D) ] ;
+
+ # Attach binding of including file to included targets. When a target is
+ # directly created from a virtual target this extra information is
+ # unnecessary. But in other cases, it allows us to distinguish between
+ # two headers of the same name included from different places. We do not
+ # need this extra information for angle includes, since they should not
+ # depend on the including file (we can not get literal "." in the
+ # include path).
+ local g2 = $(g)"#"$(b) ;
+
+ angle = $(angle:G=$(g)) ;
+ quoted = $(quoted:G=$(g2)) ;
+ res = $(res:G=$(g2)) ;
+
+ local all = $(angle) $(quoted) $(res) ;
+
+ INCLUDES $(target) : $(all) ;
+ NOCARE $(all) ;
+ SEARCH on $(angle) = $(self.includes:G=) ;
+ SEARCH on $(quoted) $(res) = $(b) $(self.includes:G=) ;
+
+ # Just propagate the current scanner to includes, in hope that includes
+ # do not change scanners.
+ scanner.propagate $(__name__) : $(angle) $(quoted) : $(target) ;
+
+ ISFILE $(all) ;
+ }
+}
+
+scanner.register res-scanner : include ;
+type.set-scanner RC : res-scanner ;
diff --git a/src/kenlm/jam-files/boost-build/tools/stage.jam b/src/kenlm/jam-files/boost-build/tools/stage.jam
new file mode 100644
index 0000000..8d005ae
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/stage.jam
@@ -0,0 +1,519 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2005, 2006 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines the 'install' rule, used to copy a set of targets to a
+# single location.
+
+import "class" : new ;
+import feature ;
+import generators ;
+import path ;
+import project ;
+import targets ;
+import type ;
+import types/register ;
+import virtual-target ;
+
+
+feature.feature <install-dependencies> : off on : incidental ;
+feature.feature <install-type> : : free incidental ;
+feature.feature <install-source-root> : : free path ;
+feature.feature <so-version> : : free incidental ;
+
+# If 'on', version symlinks for shared libraries will not be created. Affects
+# Unix builds only.
+feature.feature <install-no-version-symlinks> : on : optional incidental ;
+
+
+class install-target-class : basic-target
+{
+ import "class" : new ;
+ import feature ;
+ import generators ;
+ import path ;
+ import project ;
+ import property ;
+ import property-set ;
+ import stage ;
+ import type ;
+
+ rule __init__ ( name-and-dir : project : sources * : requirements * :
+ default-build * : usage-requirements * )
+ {
+ # The usage-requirements specified here are ignored but are taken as a
+ # parameter to have this metatarget class have the same standard
+ # instantiation interface as all the other Boost Build metatarget
+ # classes.
+ basic-target.__init__ $(name-and-dir) : $(project) : $(sources) :
+ $(requirements) : $(default-build) ;
+ }
+
+ # If <location> is not set, sets it based on the project data.
+ #
+ rule update-location ( property-set )
+ {
+ local loc = [ $(property-set).get <location> ] ;
+ if ! $(loc)
+ {
+ loc = [ path.root $(self.name) [ $(self.project).get location ] ] ;
+ property-set = [ $(property-set).add-raw $(loc:G=<location>) ] ;
+ }
+
+ return $(property-set) ;
+ }
+
+ # Takes a target that is installed and a property set which is used when
+ # installing.
+ #
+ rule adjust-properties ( target : build-property-set )
+ {
+ local ps-raw ;
+ local a = [ $(target).action ] ;
+ if $(a)
+ {
+ local ps = [ $(a).properties ] ;
+ ps-raw = [ $(ps).raw ] ;
+
+ # Unless <hardcode-dll-paths>true is in properties, which can happen
+ # only if the user has explicitly requested it, nuke all <dll-path>
+ # properties.
+ if [ $(build-property-set).get <hardcode-dll-paths> ] != true
+ {
+ ps-raw = [ property.change $(ps-raw) : <dll-path> ] ;
+ }
+
+ # If any <dll-path> properties were specified for installing, add
+ # them.
+ local l = [ $(build-property-set).get <dll-path> ] ;
+ ps-raw += $(l:G=<dll-path>) ;
+
+ # Also copy <linkflags> feature from current build set, to be used
+ # for relinking.
+ local l = [ $(build-property-set).get <linkflags> ] ;
+ ps-raw += $(l:G=<linkflags>) ;
+
+ # Remove the <tag> feature on original targets.
+ ps-raw = [ property.change $(ps-raw) : <tag> ] ;
+
+ # And <location>. If stage target has another stage target in
+ # sources, then we shall get virtual targets with the <location>
+ # property set.
+ ps-raw = [ property.change $(ps-raw) : <location> ] ;
+ }
+
+ local d = [ $(build-property-set).get <dependency> ] ;
+ ps-raw += $(d:G=<dependency>) ;
+
+ local d = [ $(build-property-set).get <location> ] ;
+ ps-raw += $(d:G=<location>) ;
+
+ local ns = [ $(build-property-set).get <install-no-version-symlinks> ] ;
+ ps-raw += $(ns:G=<install-no-version-symlinks>) ;
+
+ local d = [ $(build-property-set).get <install-source-root> ] ;
+ # Make the path absolute: we shall use it to compute relative paths and
+ # making the path absolute will help.
+ if $(d)
+ {
+ d = [ path.root $(d) [ path.pwd ] ] ;
+ ps-raw += $(d:G=<install-source-root>) ;
+ }
+
+ if $(ps-raw)
+ {
+ return [ property-set.create $(ps-raw) ] ;
+ }
+ else
+ {
+ return [ property-set.empty ] ;
+ }
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ source-targets = [ targets-to-stage $(source-targets) :
+ $(property-set) ] ;
+
+ property-set = [ update-location $(property-set) ] ;
+
+ local ename = [ $(property-set).get <name> ] ;
+
+ if $(ename) && $(source-targets[2])
+ {
+ import errors : error : $(__name__) : errors.error ;
+ errors.error When <name> property is used "in" 'install', only one
+ source is allowed. ;
+ }
+
+ local result ;
+ for local i in $(source-targets)
+ {
+ local staged-targets ;
+
+ local new-properties = [ adjust-properties $(i) :
+ $(property-set) ] ;
+
+ # See if something special should be done when staging this type. It
+ # is indicated by the presence of a special "INSTALLED_" type.
+ local t = [ $(i).type ] ;
+ if $(t) && [ type.registered INSTALLED_$(t) ]
+ {
+ if $(ename)
+ {
+ import errors : error : $(__name__) : errors.error ;
+ errors.error In 'install': <name> property specified with
+ target that requires relinking. ;
+ }
+ else
+ {
+ local targets = [ generators.construct $(self.project)
+ $(name) : INSTALLED_$(t) : $(new-properties) : $(i) ] ;
+ staged-targets += $(targets[2-]) ;
+ }
+ }
+ else
+ {
+ staged-targets = [ stage.copy-file $(self.project) $(ename) :
+ $(i) : $(new-properties) ] ;
+ }
+
+ if ! $(staged-targets)
+ {
+ import errors : error : $(__name__) : errors.error ;
+ errors.error Unable to generate staged version of
+ [ $(source).str ] ;
+ }
+
+ for t in $(staged-targets)
+ {
+ result += [ virtual-target.register $(t) ] ;
+ }
+ }
+
+ return [ property-set.empty ] $(result) ;
+ }
+
+ # Given the list of source targets explicitly passed to 'stage', returns the
+ # list of targets which must be staged.
+ #
+ rule targets-to-stage ( source-targets * : property-set )
+ {
+ local result ;
+
+ # Traverse the dependencies, if needed.
+ if [ $(property-set).get <install-dependencies> ] = "on"
+ {
+ source-targets = [ collect-targets $(source-targets) ] ;
+ }
+
+ # Filter the target types, if needed.
+ local included-types = [ $(property-set).get <install-type> ] ;
+ for local r in $(source-targets)
+ {
+ local ty = [ $(r).type ] ;
+ if $(ty)
+ {
+ # Do not stage searched libs.
+ if $(ty) != SEARCHED_LIB
+ {
+ if $(included-types)
+ {
+ if [ include-type $(ty) : $(included-types) ]
+ {
+ result += $(r) ;
+ }
+ }
+ else
+ {
+ result += $(r) ;
+ }
+ }
+ }
+ else if ! $(included-types)
+ {
+ # Do not install typeless targets if there is an explicit list
+ # of allowed types.
+ result += $(r) ;
+ }
+ }
+
+ return $(result) ;
+ }
+
+ # CONSIDER: figure out why we can not use virtual-target.traverse here.
+ #
+ rule collect-targets ( targets * )
+ {
+ # Find subvariants
+ local s ;
+ for local t in $(targets)
+ {
+ s += [ $(t).creating-subvariant ] ;
+ }
+ s = [ sequence.unique $(s) ] ;
+
+ local result = [ new set ] ;
+ $(result).add $(targets) ;
+
+ for local i in $(s)
+ {
+ $(i).all-referenced-targets $(result) ;
+ }
+ local result2 ;
+ for local r in [ $(result).list ]
+ {
+ if $(r:G) != <use>
+ {
+ result2 += $(r:G=) ;
+ }
+ }
+ DELETE_MODULE $(result) ;
+ return [ sequence.unique $(result2) ] ;
+ }
+
+ # Returns true iff 'type' is subtype of some element of 'types-to-include'.
+ #
+ local rule include-type ( type : types-to-include * )
+ {
+ local found ;
+ while $(types-to-include) && ! $(found)
+ {
+ if [ type.is-subtype $(type) $(types-to-include[1]) ]
+ {
+ found = true ;
+ }
+ types-to-include = $(types-to-include[2-]) ;
+ }
+
+ return $(found) ;
+ }
+}
+
+
+# Creates a copy of target 'source'. The 'properties' object should have a
+# <location> property which specifies where the target must be placed.
+#
+rule copy-file ( project name ? : source : properties )
+{
+ name ?= [ $(source).name ] ;
+ local relative ;
+
+ local new-a = [ new non-scanning-action $(source) : common.copy :
+ $(properties) ] ;
+ local source-root = [ $(properties).get <install-source-root> ] ;
+ if $(source-root)
+ {
+ # Get the real path of the target. We probably need to strip relative
+ # path from the target name at construction.
+ local path = [ $(source).path ] ;
+ path = [ path.root $(name:D) $(path) ] ;
+ # Make the path absolute. Otherwise, it would be hard to compute the
+ # relative path. The 'source-root' is already absolute, see the
+ # 'adjust-properties' method above.
+ path = [ path.root $(path) [ path.pwd ] ] ;
+
+ relative = [ path.relative-to $(source-root) $(path) ] ;
+ }
+
+ # Note: Using $(name:D=$(relative)) might be faster here, but then we would
+ # need to explicitly check that relative is not ".", otherwise we might get
+ # paths like '<prefix>/boost/.', try to create it and mkdir would obviously
+ # fail.
+ name = [ path.join $(relative) $(name:D=) ] ;
+
+ return [ new file-target $(name) exact : [ $(source).type ] : $(project) :
+ $(new-a) ] ;
+}
+
+
+rule symlink ( name : project : source : properties )
+{
+ local a = [ new action $(source) : symlink.ln : $(properties) ] ;
+ local t = [ new file-target $(name) exact : [ $(source).type ] : $(project)
+ : $(a) ] ;
+ return [ virtual-target.register $(t) ] ;
+}
+
+
+rule relink-file ( project : source : property-set )
+{
+ local action = [ $(source).action ] ;
+ local cloned-action = [ virtual-target.clone-action $(action) : $(project) :
+ "" : $(property-set) ] ;
+ return [ $(cloned-action).targets ] ;
+}
+
+
+# Declare installed version of the EXE type. Generator for this type will cause
+# relinking to the new location.
+type.register INSTALLED_EXE : : EXE ;
+
+
+class installed-exe-generator : generator
+{
+ import type ;
+ import property-set ;
+ import modules ;
+ import stage ;
+
+ rule __init__ ( )
+ {
+ generator.__init__ install-exe : EXE : INSTALLED_EXE ;
+ }
+
+ rule run ( project name ? : property-set : source : multiple ? )
+ {
+ local stage-rule = stage.copy-file ;
+
+ if ! [ $(property-set).get <os> ] in NT CYGWIN &&
+ ! [ $(property-set).get <target-os> ] in windows cygwin
+ {
+ # If dll-path properties have been changed for the stage target,
+ # relink instead of copying.
+ local a = [ $(source).action ] ;
+ local p = [ $(a).properties ] ;
+ local original = [ $(p).get <dll-path> ] ;
+ local current = [ $(property-set).get <dll-path> ] ;
+
+ if $(current) != $(original)
+ {
+ stage-rule = stage.relink-file ;
+ }
+ }
+
+ return [ $(stage-rule) $(project) : $(source) : $(property-set) ] ;
+ }
+}
+
+
+generators.register [ new installed-exe-generator ] ;
+
+
+# Installing a shared link on Unix might cause a creation of versioned symbolic
+# links.
+type.register INSTALLED_SHARED_LIB : : SHARED_LIB ;
+
+
+class installed-shared-lib-generator : generator
+{
+ import type ;
+ import property-set ;
+ import modules ;
+ import stage ;
+
+ rule __init__ ( )
+ {
+ generator.__init__ install-shared-lib : SHARED_LIB :
+ INSTALLED_SHARED_LIB ;
+ }
+
+ rule run ( project name ? : property-set : source : multiple ? )
+ {
+ if [ $(property-set).get <os> ] in NT CYGWIN ||
+ [ $(property-set).get <target-os> ] in windows cygwin
+ {
+ local copied = [ stage.copy-file $(project) : $(source) :
+ $(property-set) ] ;
+ return [ virtual-target.register $(copied) ] ;
+ }
+ else
+ {
+ local a = [ $(source).action ] ;
+ local copied ;
+ if ! $(a)
+ {
+ # Non-derived file, just copy.
+ copied = [ stage.copy-file $(project) : $(source) :
+ $(property-set) ] ;
+ }
+ else
+ {
+ local cp = [ $(a).properties ] ;
+ local current-dll-path = [ $(cp).get <dll-path> ] ;
+ local new-dll-path = [ $(property-set).get <dll-path> ] ;
+
+ if $(current-dll-path) != $(new-dll-path)
+ {
+ # Rpath changed, need to relink.
+ copied = [ stage.relink-file $(project) : $(source) :
+ $(property-set) ] ;
+ }
+ else
+ {
+ copied = [ stage.copy-file $(project) : $(source) :
+ $(property-set) ] ;
+ }
+ }
+
+ copied = [ virtual-target.register $(copied) ] ;
+
+ local result = $(copied) ;
+ # If the name is in the form NNN.XXX.YYY.ZZZ, where all 'X', 'Y' and
+ # 'Z' are numbers, we need to create NNN.XXX and NNN.XXX.YYY
+ # symbolic links.
+ local m = [ MATCH
+ (.*)\\.([0123456789]+)\\.([0123456789]+)\\.([0123456789]+)$ :
+ [ $(copied).name ] ] ;
+ if $(m)
+ {
+ # Symlink without version at all is used to make
+ # -lsome_library work.
+ result += [ stage.symlink $(m[1]) : $(project) : $(copied) :
+ $(property-set) ] ;
+
+ # Symlinks of some libfoo.N and libfoo.N.M are used so that
+ # library can found at runtime, if libfoo.N.M.X has soname of
+ # libfoo.N. That happens when the library makes some binary
+ # compatibility guarantees. If not, it is possible to skip those
+ # symlinks.
+ local suppress = [ $(property-set).get
+ <install-no-version-symlinks> ] ;
+
+ if $(suppress) != "on"
+ {
+ result += [ stage.symlink $(m[1]).$(m[2]) : $(project) :
+ $(copied) : $(property-set) ] ;
+ result += [ stage.symlink $(m[1]).$(m[2]).$(m[3]) :
+ $(project) : $(copied) : $(property-set) ] ;
+ }
+ }
+
+ return $(result) ;
+ }
+ }
+}
+
+generators.register [ new installed-shared-lib-generator ] ;
+
+
+# Main target rule for 'install'.
+#
+rule install ( name : sources * : requirements * : default-build * )
+{
+ local project = [ project.current ] ;
+
+ # Unless the user has explicitly asked us to hardcode dll paths, add
+ # <hardcode-dll-paths>false in requirements, to override default value.
+ if ! <hardcode-dll-paths>true in $(requirements)
+ {
+ requirements += <hardcode-dll-paths>false ;
+ }
+
+ if <tag> in $(requirements:G)
+ {
+ import errors ;
+ errors.user-error The <tag> property is not allowed for the 'install'
+ rule. ;
+ }
+
+ targets.create-metatarget install-target-class : $(project) : $(name) :
+ $(sources) : $(requirements) : $(default-build) ;
+}
+
+
+IMPORT $(__name__) : install : : install ;
+IMPORT $(__name__) : install : : stage ;
diff --git a/src/kenlm/jam-files/boost-build/tools/stlport.jam b/src/kenlm/jam-files/boost-build/tools/stlport.jam
new file mode 100644
index 0000000..ed0947c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/stlport.jam
@@ -0,0 +1,309 @@
+# Copyright Gennadiy Rozental
+# Copyright 2006 Rene Rivera
+# Copyright 2003, 2004, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# The STLPort is usable by means of 'stdlib' feature. When
+# stdlib=stlport is specified, default version of STLPort will be used,
+# while stdlib=stlport-4.5 will use specific version.
+# The subfeature value 'hostios' means to use host compiler's iostreams.
+#
+# The specific version of stlport is selected by features:
+# The <runtime-link> feature selects between static and shared library
+# The <runtime-debugging>on selects STLPort with debug symbols
+# and stl debugging.
+# There's no way to use STLPort with debug symbols but without
+# stl debugging.
+
+# TODO: must implement selection of different STLPort installations based
+# on used toolset.
+# Also, finish various flags:
+#
+# This is copied from V1 toolset, "+" means "implemented"
+#+flags $(CURR_TOOLSET) DEFINES <stlport-iostream>off : _STLP_NO_OWN_IOSTREAMS=1 _STLP_HAS_NO_NEW_IOSTREAMS=1 ;
+#+flags $(CURR_TOOLSET) DEFINES <stlport-extensions>off : _STLP_NO_EXTENSIONS=1 ;
+# flags $(CURR_TOOLSET) DEFINES <stlport-anachronisms>off : _STLP_NO_ANACHRONISMS=1 ;
+# flags $(CURR_TOOLSET) DEFINES <stlport-cstd-namespace>global : _STLP_VENDOR_GLOBAL_CSTD=1 ;
+# flags $(CURR_TOOLSET) DEFINES <exception-handling>off : _STLP_NO_EXCEPTIONS=1 ;
+# flags $(CURR_TOOLSET) DEFINES <stlport-debug-alloc>on : _STLP_DEBUG_ALLOC=1 ;
+#+flags $(CURR_TOOLSET) DEFINES <runtime-build>debug : _STLP_DEBUG=1 _STLP_DEBUG_UNINITIALIZED=1 ;
+#+flags $(CURR_TOOLSET) DEFINES <runtime-link>dynamic : _STLP_USE_DYNAMIC_LIB=1 ;
+
+
+import feature : feature subfeature ;
+import project ;
+import "class" : new ;
+import targets ;
+import property-set ;
+import common ;
+import type ;
+
+# Make this module into a project.
+project.initialize $(__name__) ;
+project stlport ;
+
+# The problem: how to request to use host compiler's iostreams?
+#
+# Solution 1: Global 'stlport-iostream' feature.
+# That's ugly. Subfeature make more sense for stlport-specific thing.
+# Solution 2: Use subfeature with two values, one of which ("use STLPort iostream")
+# is default.
+# The problem is that such subfeature will appear in target paths, and that's ugly
+# Solution 3: Use optional subfeature with only one value.
+
+feature.extend stdlib : stlport ;
+feature.compose <stdlib>stlport : <library>/stlport//stlport ;
+
+# STLport iostreams or native iostreams
+subfeature stdlib stlport : iostream : hostios : optional propagated ;
+
+# STLport extensions
+subfeature stdlib stlport : extensions : noext : optional propagated ;
+
+# STLport anachronisms -- NOT YET SUPPORTED
+# subfeature stdlib stlport : anachronisms : on off ;
+
+# STLport debug allocation -- NOT YET SUPPORTED
+#subfeature stdlib stlport : debug-alloc : off on ;
+
+# Declare a special target class to handle the creation of search-lib-target
+# instances for STLport. We need a special class, because otherwise we'll have
+# - declare prebuilt targets for all possible toolsets. And by the time 'init'
+# is called we don't even know the list of toolsets that are registered
+# - when host iostreams are used, we really should produce nothing. It would
+# be hard/impossible to achieve this using prebuilt targets.
+
+class stlport-target-class : basic-target
+{
+ import feature project type errors generators ;
+ import set : difference ;
+
+ rule __init__ ( project : headers ? : libraries * : version ? )
+ {
+ basic-target.__init__ stlport : $(project) ;
+ self.headers = $(headers) ;
+ self.libraries = $(libraries) ;
+ self.version = $(version) ;
+ self.version.5 = [ MATCH "^(5[.][0123456789]+).*" : $(version) ] ;
+
+ local requirements ;
+ requirements += <stdlib-stlport:version>$(self.version) ;
+ self.requirements = [ property-set.create $(requirements) ] ;
+ }
+
+ rule generate ( property-set )
+ {
+ # Since this target is built with <stdlib>stlport, it will also
+ # have <library>/stlport//stlport in requirements, which will
+ # cause a loop in main target references. Remove that property
+ # manually.
+
+ property-set = [ property-set.create
+ [ difference
+ [ $(property-set).raw ] :
+ <library>/stlport//stlport
+ <stdlib>stlport
+ ]
+ ] ;
+ return [ basic-target.generate $(property-set) ] ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ # Deduce the name of stlport library, based on toolset and
+ # debug setting.
+ local raw = [ $(property-set).raw ] ;
+ local hostios = [ feature.get-values <stdlib-stlport:iostream> : $(raw) ] ;
+ local toolset = [ feature.get-values <toolset> : $(raw) ] ;
+
+ if $(self.version.5)
+ {
+ # Version 5.x
+
+ # STLport host IO streams no longer supported. So we always
+ # need libraries.
+
+ # name: stlport(stl)?[dg]?(_static)?.M.R
+ local name = stlport ;
+ if [ feature.get-values <runtime-debugging> : $(raw) ] = "on"
+ {
+ name += stl ;
+ switch $(toolset)
+ {
+ case gcc* : name += g ;
+ case darwin* : name += g ;
+ case * : name += d ;
+ }
+ }
+
+ if [ feature.get-values <runtime-link> : $(raw) ] = "static"
+ {
+ name += _static ;
+ }
+
+ # Starting with version 5.2.0, the STLport static libraries no
+ # longer include a version number in their name
+ local version.pre.5.2 = [ MATCH "^(5[.][01]+).*" : $(version) ] ;
+ if $(version.pre.5.2) || [ feature.get-values <runtime-link> :
+ $(raw) ] != "static"
+ {
+ name += .$(self.version.5) ;
+ }
+
+ name = $(name:J=) ;
+
+ if [ feature.get-values <install-dependencies> : $(raw) ] = "on"
+ {
+ #~ Allow explicitly asking to install the STLport lib by
+ #~ referring to it directly:
+ #~ /stlport//stlport/<install-dependencies>on
+ #~ This allows for install packaging of all libs one might need
+ #~ for a standalone distribution.
+ import path : make : path-make ;
+ local runtime-link
+ = [ feature.get-values <runtime-link> : $(raw) ] ;
+ local lib-file.props
+ = [ property-set.create $(raw) <link>$(runtime-link) ] ;
+ local lib-file.prefix
+ = [ type.generated-target-prefix $(runtime-link:U)_LIB :
+ $(lib-file.props) ] ;
+ local lib-file.suffix
+ = [ type.generated-target-suffix $(runtime-link:U)_LIB :
+ $(lib-file.props) ] ;
+ lib-file.prefix
+ ?= "" "lib" ;
+ lib-file.suffix
+ ?= "" ;
+ local lib-file
+ = [ GLOB $(self.libraries) [ modules.peek : PATH ] :
+ $(lib-file.prefix)$(name).$(lib-file.suffix) ] ;
+ lib-file
+ = [ new file-reference [ path-make $(lib-file[1]) ] :
+ $(self.project) ] ;
+ lib-file
+ = [ $(lib-file).generate "" ] ;
+ local lib-file.requirements
+ = [ targets.main-target-requirements
+ [ $(lib-file.props).raw ] <file>$(lib-file[-1])
+ : $(self.project) ] ;
+ return [ generators.construct $(self.project) $(name) : LIB :
+ $(lib-file.requirements) ] ;
+ }
+ else
+ {
+ #~ Otherwise, it is just regular library usage.
+ return [ generators.construct
+ $(self.project) $(name) : SEARCHED_LIB : $(property-set) ] ;
+ }
+ }
+ else if ! $(hostios) && $(toolset) != msvc
+ {
+ # We don't need libraries if host istreams are used. For
+ # msvc, automatic library selection will be used.
+
+ # name: stlport_<toolset>(_stldebug)?
+ local name = stlport ;
+ name = $(name)_$(toolset) ;
+ if [ feature.get-values <runtime-debugging> : $(raw) ] = "on"
+ {
+ name = $(name)_stldebug ;
+ }
+
+ return [ generators.construct
+ $(self.project) $(name) : SEARCHED_LIB : $(property-set) ] ;
+ }
+ else
+ {
+ return [ property-set.empty ] ;
+ }
+ }
+
+ rule compute-usage-requirements ( subvariant )
+ {
+ local usage-requirements =
+ <include>$(self.headers)
+ <dll-path>$(self.libraries)
+ <library-path>$(self.libraries)
+ ;
+
+ local rproperties = [ $(subvariant).build-properties ] ;
+ # CONSIDER: should this "if" sequence be replaced with
+ # some use of 'property-map' class?
+ if [ $(rproperties).get <runtime-debugging> ] = "on"
+ {
+ usage-requirements +=
+ <define>_STLP_DEBUG=1
+ <define>_STLP_DEBUG_UNINITIALIZED=1 ;
+ }
+ if [ $(rproperties).get <runtime-link> ] = "shared"
+ {
+ usage-requirements +=
+ <define>_STLP_USE_DYNAMIC_LIB=1 ;
+ }
+ if [ $(rproperties).get <stdlib-stlport:extensions> ] = noext
+ {
+ usage-requirements +=
+ <define>_STLP_NO_EXTENSIONS=1 ;
+ }
+ if [ $(rproperties).get <stdlib-stlport:iostream> ] = hostios
+ {
+ usage-requirements +=
+ <define>_STLP_NO_OWN_IOSTREAMS=1
+ <define>_STLP_HAS_NO_NEW_IOSTREAMS=1 ;
+ }
+ if $(self.version.5)
+ {
+ # Version 5.x
+ if [ $(rproperties).get <threading> ] = "single"
+ {
+ # Since STLport5 doesn't normally support single-thread
+ # we force STLport5 into the multi-thread mode. Hence
+ # getting what other libs provide of single-thread code
+ # linking against a multi-thread lib.
+ usage-requirements +=
+ <define>_STLP_THREADS=1 ;
+ }
+ }
+
+ return [ property-set.create $(usage-requirements) ] ;
+ }
+}
+
+rule stlport-target ( headers ? : libraries * : version ? )
+{
+ local project = [ project.current ] ;
+
+ targets.main-target-alternative
+ [ new stlport-target-class $(project) : $(headers) : $(libraries)
+ : $(version)
+ ] ;
+}
+
+local .version-subfeature-defined ;
+
+# Initialize stlport support.
+rule init (
+ version ? :
+ headers : # Location of header files
+ libraries * # Location of libraries, lib and bin subdirs of STLport.
+ )
+{
+ # FIXME: need to use common.check-init-parameters here.
+ # At the moment, that rule always tries to define subfeature
+ # of the 'toolset' feature, while we need to define subfeature
+ # of <stdlib>stlport, so tweaks to check-init-parameters are needed.
+ if $(version)
+ {
+ if ! $(.version-subfeature-defined)
+ {
+ feature.subfeature stdlib stlport : version : : propagated ;
+ .version-subfeature-defined = true ;
+ }
+ feature.extend-subfeature stdlib stlport : version : $(version) ;
+ }
+
+ # Declare the main target for this STLPort version.
+ stlport-target $(headers) : $(libraries) : $(version) ;
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/sun.jam b/src/kenlm/jam-files/boost-build/tools/sun.jam
new file mode 100644
index 0000000..0ca927d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/sun.jam
@@ -0,0 +1,142 @@
+# Copyright (C) Christopher Currie 2003. Permission to copy, use,
+# modify, sell and distribute this software is granted provided this
+# copyright notice appears in all copies. This software is provided
+# "as is" without express or implied warranty, and with no claim as
+# to its suitability for any purpose.
+
+import property ;
+import generators ;
+import os ;
+import toolset : flags ;
+import feature ;
+import type ;
+import common ;
+
+feature.extend toolset : sun ;
+toolset.inherit sun : unix ;
+generators.override sun.prebuilt : builtin.lib-generator ;
+generators.override sun.prebuilt : builtin.prebuilt ;
+generators.override sun.searched-lib-generator : searched-lib-generator ;
+
+feature.extend stdlib : sun-stlport ;
+feature.compose <stdlib>sun-stlport
+ : <cxxflags>-library=stlport4 <linkflags>-library=stlport4
+ ;
+
+rule init ( version ? : command * : options * )
+{
+ local condition = [
+ common.check-init-parameters sun : version $(version) ] ;
+
+ command = [ common.get-invocation-command sun : CC
+ : $(command) : "/opt/SUNWspro/bin" ] ;
+
+ # Even if the real compiler is not found, put CC to
+ # command line so that user see command line that would have being executed.
+ command ?= CC ;
+
+ common.handle-options sun : $(condition) : $(command) : $(options) ;
+
+ command_c = $(command[1--2]) $(command[-1]:B=cc) ;
+
+ toolset.flags sun CONFIG_C_COMMAND $(condition) : $(command_c) ;
+}
+
+# Declare generators
+generators.register-c-compiler sun.compile.c : C : OBJ : <toolset>sun ;
+generators.register-c-compiler sun.compile.c++ : CPP : OBJ : <toolset>sun ;
+
+# Declare flags and actions for compilation
+flags sun.compile OPTIONS <debug-symbols>on : -g ;
+flags sun.compile OPTIONS <profiling>on : -xprofile=tcov ;
+flags sun.compile OPTIONS <optimization>speed : -xO4 ;
+flags sun.compile OPTIONS <optimization>space : -xO2 -xspace ;
+flags sun.compile OPTIONS <threading>multi : -mt ;
+flags sun.compile OPTIONS <warnings>off : -erroff ;
+flags sun.compile OPTIONS <warnings>on : -erroff=%none ;
+flags sun.compile OPTIONS <warnings>all : -erroff=%none ;
+flags sun.compile OPTIONS <warnings-as-errors>on : -errwarn ;
+
+flags sun.compile.c++ OPTIONS <inlining>off : +d ;
+
+# The -m32 and -m64 options are supported starting
+# with Sun Studio 12. On earlier compilers, the
+# 'address-model' feature is not supported and should not
+# be used. Instead, use -xarch=generic64 command line
+# option.
+# See http://svn.boost.org/trac/boost/ticket/1186
+# for details.
+flags sun OPTIONS <address-model>32 : -m32 ;
+flags sun OPTIONS <address-model>64 : -m64 ;
+# On sparc, there's a difference between -Kpic
+# and -KPIC. The first is slightly more efficient,
+# but has the limits on the size of GOT table.
+# For minimal fuss on user side, we use -KPIC here.
+# See http://svn.boost.org/trac/boost/ticket/1186#comment:6
+# for detailed explanation.
+flags sun OPTIONS <link>shared : -KPIC ;
+
+flags sun.compile OPTIONS <cflags> ;
+flags sun.compile.c++ OPTIONS <cxxflags> ;
+flags sun.compile DEFINES <define> ;
+flags sun.compile INCLUDES <include> ;
+
+actions compile.c
+{
+ "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+actions compile.c++
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"
+}
+
+# Declare flags and actions for linking
+flags sun.link OPTIONS <debug-symbols>on : -g ;
+# Strip the binary when no debugging is needed
+flags sun.link OPTIONS <debug-symbols>off : -s ;
+flags sun.link OPTIONS <profiling>on : -xprofile=tcov ;
+flags sun.link OPTIONS <threading>multi : -mt ;
+flags sun.link OPTIONS <linkflags> ;
+flags sun.link LINKPATH <library-path> ;
+flags sun.link FINDLIBS-ST <find-static-library> ;
+flags sun.link FINDLIBS-SA <find-shared-library> ;
+flags sun.link LIBRARIES <library-file> ;
+flags sun.link LINK-RUNTIME <runtime-link>static : static ;
+flags sun.link LINK-RUNTIME <runtime-link>shared : dynamic ;
+flags sun.link RPATH <dll-path> ;
+# On gcc, there are separate options for dll path at runtime and
+# link time. On Solaris, there's only one: -R, so we have to use
+# it, even though it's bad idea.
+flags sun.link RPATH <xdll-path> ;
+
+# The POSIX real-time library is always needed (nanosleep, clock_gettime etc.)
+flags sun.link FINDLIBS-SA : rt ;
+
+rule link ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+actions link bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME)
+}
+
+# Slight mods for dlls
+rule link.dll ( targets * : sources * : properties * )
+{
+ SPACE on $(targets) = " " ;
+}
+
+actions link.dll bind LIBRARIES
+{
+ "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" -h$(<[1]:D=) -G "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME)
+}
+
+# Declare action for creating static libraries
+actions piecemeal archive
+{
+ "$(CONFIG_COMMAND)" -xar -o "$(<)" "$(>)"
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/symlink.jam b/src/kenlm/jam-files/boost-build/tools/symlink.jam
new file mode 100644
index 0000000..b33e826
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/symlink.jam
@@ -0,0 +1,140 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2002, 2003 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Defines the "symlink" special target. 'symlink' targets make symbolic links
+# to the sources.
+
+import targets modules path class os feature project property-set ;
+
+.count = 0 ;
+
+feature.feature symlink-location : project-relative build-relative : incidental ;
+
+# The class representing "symlink" targets.
+#
+class symlink-targets : basic-target
+{
+ import numbers modules class property project path ;
+
+ rule __init__ (
+ project
+ : targets *
+ : sources *
+ )
+ {
+ # Generate a fake name for now. Need unnamed targets eventually.
+ local c = [ modules.peek symlink : .count ] ;
+ modules.poke symlink : .count : [ numbers.increment $(c) ] ;
+ local fake-name = symlink#$(c) ;
+
+ basic-target.__init__ $(fake-name) : $(project) : $(sources) ;
+
+ # Remember the targets to map the sources onto. Pad or truncate
+ # to fit the sources given.
+ self.targets = ;
+ for local source in $(sources)
+ {
+ if $(targets)
+ {
+ self.targets += $(targets[1]) ;
+ targets = $(targets[2-]) ;
+ }
+ else
+ {
+ self.targets += $(source) ;
+ }
+ }
+
+ # The virtual targets corresponding to the given targets.
+ self.virtual-targets = ;
+ }
+
+ rule construct ( name : source-targets * : property-set )
+ {
+ local i = 1 ;
+ for local t in $(source-targets)
+ {
+ local s = $(self.targets[$(i)]) ;
+ local a = [ class.new action $(t) : symlink.ln : $(property-set) ] ;
+ local vt = [ class.new file-target $(s:D=)
+ : [ $(t).type ] : $(self.project) : $(a) ] ;
+
+ # Place the symlink in the directory relative to the project
+ # location, instead of placing it in the build directory.
+ if [ property.select <symlink-location> : [ $(property-set).raw ] ] = <symlink-location>project-relative
+ {
+ $(vt).set-path [ path.root $(s:D) [ $(self.project).get location ] ] ;
+ }
+
+ self.virtual-targets += $(vt) ;
+ i = [ numbers.increment $(i) ] ;
+ }
+ return [ property-set.empty ] $(self.virtual-targets) ;
+ }
+}
+
+# Creates a symbolic link from a set of targets to a set of sources.
+# The targets and sources map one to one. The symlinks generated are
+# limited to be the ones given as the sources. That is, the targets
+# are either padded or trimmed to equate to the sources. The padding
+# is done with the name of the corresponding source. For example::
+#
+# symlink : one two ;
+#
+# Is equal to::
+#
+# symlink one two : one two ;
+#
+# Names for symlink are relative to the project location. They cannot
+# include ".." path components.
+rule symlink (
+ targets *
+ : sources *
+ )
+{
+ local project = [ project.current ] ;
+
+ return [ targets.main-target-alternative
+ [ class.new symlink-targets $(project) : $(targets) :
+ # Note: inline targets are not supported for symlink, intentionally,
+ # since it's used to linking existing non-local targets.
+ $(sources) ] ] ;
+}
+
+rule ln
+{
+ local os ;
+ if [ modules.peek : UNIX ] { os = UNIX ; }
+ else { os ?= [ os.name ] ; }
+ # Remember the path to make the link relative to where the symlink is located.
+ local path-to-source = [ path.relative-to
+ [ path.make [ on $(<) return $(LOCATE) ] ]
+ [ path.make [ on $(>) return $(LOCATE) ] ] ] ;
+ if $(path-to-source) = .
+ {
+ PATH_TO_SOURCE on $(<) = "" ;
+ }
+ else
+ {
+ PATH_TO_SOURCE on $(<) = [ path.native $(path-to-source) ] ;
+ }
+ ln-$(os) $(<) : $(>) ;
+}
+
+actions ln-UNIX
+{
+ ln -f -s '$(>:D=:R=$(PATH_TO_SOURCE))' '$(<)'
+}
+
+# there is a way to do this; we fall back to a copy for now
+actions ln-NT
+{
+ echo "NT symlinks not supported yet, making copy"
+ del /f /q "$(<)" 2>nul >nul
+ copy "$(>)" "$(<)" $(NULL_OUT)
+}
+
+IMPORT $(__name__) : symlink : : symlink ;
diff --git a/src/kenlm/jam-files/boost-build/tools/testing-aux.jam b/src/kenlm/jam-files/boost-build/tools/testing-aux.jam
new file mode 100644
index 0000000..64ba003
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/testing-aux.jam
@@ -0,0 +1,220 @@
+# This module is imported by testing.py. The definitions here are
+# too tricky to do in Python
+
+# Causes the 'target' to exist after bjam invocation if and only if all the
+# dependencies were successfully built.
+#
+rule expect-success ( target : dependency + : requirements * )
+{
+ **passed** $(target) : $(sources) ;
+}
+IMPORT testing : expect-success : : testing.expect-success ;
+
+# Causes the 'target' to exist after bjam invocation if and only if all some of
+# the dependencies were not successfully built.
+#
+rule expect-failure ( target : dependency + : properties * )
+{
+ local grist = [ MATCH ^<(.*)> : $(dependency:G) ] ;
+ local marker = $(dependency:G=$(grist)*fail) ;
+ (failed-as-expected) $(marker) ;
+ FAIL_EXPECTED $(dependency) ;
+ LOCATE on $(marker) = [ on $(dependency) return $(LOCATE) ] ;
+ RMOLD $(marker) ;
+ DEPENDS $(marker) : $(dependency) ;
+ DEPENDS $(target) : $(marker) ;
+ **passed** $(target) : $(marker) ;
+}
+IMPORT testing : expect-failure : : testing.expect-failure ;
+
+# The rule/action combination used to report successful passing of a test.
+#
+rule **passed**
+{
+ # Force deletion of the target, in case any dependencies failed to build.
+ RMOLD $(<) ;
+}
+
+
+# Used to create test files signifying passed tests.
+#
+actions **passed**
+{
+ echo passed > "$(<)"
+}
+
+
+# Used to create replacement object files that do not get created during tests
+# that are expected to fail.
+#
+actions (failed-as-expected)
+{
+ echo failed as expected > "$(<)"
+}
+
+# Runs executable 'sources' and stores stdout in file 'target'. Unless
+# --preserve-test-targets command line option has been specified, removes the
+# executable. The 'target-to-remove' parameter controls what should be removed:
+# - if 'none', does not remove anything, ever
+# - if empty, removes 'source'
+# - if non-empty and not 'none', contains a list of sources to remove.
+#
+rule capture-output ( target : source : properties * : targets-to-remove * )
+{
+ output-file on $(target) = $(target:S=.output) ;
+ LOCATE on $(target:S=.output) = [ on $(target) return $(LOCATE) ] ;
+
+ # The INCLUDES kill a warning about independent target...
+ INCLUDES $(target) : $(target:S=.output) ;
+ # but it also puts .output into dependency graph, so we must tell jam it is
+ # OK if it cannot find the target or updating rule.
+ NOCARE $(target:S=.output) ;
+
+ # This has two-fold effect. First it adds input files to the dependendency
+ # graph, preventing a warning. Second, it causes input files to be bound
+ # before target is created. Therefore, they are bound using SEARCH setting
+ # on them and not LOCATE setting of $(target), as in other case (due to jam
+ # bug).
+ DEPENDS $(target) : [ on $(target) return $(INPUT_FILES) ] ;
+
+ if $(targets-to-remove) = none
+ {
+ targets-to-remove = ;
+ }
+ else if ! $(targets-to-remove)
+ {
+ targets-to-remove = $(source) ;
+ }
+
+ if [ on $(target) return $(REMOVE_TEST_TARGETS) ]
+ {
+ TEMPORARY $(targets-to-remove) ;
+ # Set a second action on target that will be executed after capture
+ # output action. The 'RmTemps' rule has the 'ignore' modifier so it is
+ # always considered succeeded. This is needed for 'run-fail' test. For
+ # that test the target will be marked with FAIL_EXPECTED, and without
+ # 'ignore' successful execution will be negated and be reported as
+ # failure. With 'ignore' we do not detect a case where removing files
+ # fails, but it is not likely to happen.
+ RmTemps $(target) : $(targets-to-remove) ;
+ }
+}
+
+
+if [ os.name ] = NT
+{
+ .STATUS = %status% ;
+ .SET_STATUS = "set status=%ERRORLEVEL%" ;
+ .RUN_OUTPUT_NL = "echo." ;
+ .STATUS_0 = "%status% EQU 0 (" ;
+ .STATUS_NOT_0 = "%status% NEQ 0 (" ;
+ .VERBOSE = "%verbose% EQU 1 (" ;
+ .ENDIF = ")" ;
+ .SHELL_SET = "set " ;
+ .CATENATE = type ;
+ .CP = copy ;
+}
+else
+{
+ .STATUS = "$status" ;
+ .SET_STATUS = "status=$?" ;
+ .RUN_OUTPUT_NL = "echo" ;
+ .STATUS_0 = "test $status -eq 0 ; then" ;
+ .STATUS_NOT_0 = "test $status -ne 0 ; then" ;
+ .VERBOSE = "test $verbose -eq 1 ; then" ;
+ .ENDIF = "fi" ;
+ .SHELL_SET = "" ;
+ .CATENATE = cat ;
+ .CP = cp ;
+}
+
+
+.VERBOSE_TEST = 0 ;
+if --verbose-test in [ modules.peek : ARGV ]
+{
+ .VERBOSE_TEST = 1 ;
+}
+
+
+.RM = [ common.rm-command ] ;
+
+
+actions capture-output bind INPUT_FILES output-file
+{
+ $(PATH_SETUP)
+ $(LAUNCHER) "$(>)" $(ARGS) "$(INPUT_FILES)" > "$(output-file)" 2>&1
+ $(.SET_STATUS)
+ $(.RUN_OUTPUT_NL) >> "$(output-file)"
+ echo EXIT STATUS: $(.STATUS) >> "$(output-file)"
+ if $(.STATUS_0)
+ $(.CP) "$(output-file)" "$(<)"
+ $(.ENDIF)
+ $(.SHELL_SET)verbose=$(.VERBOSE_TEST)
+ if $(.STATUS_NOT_0)
+ $(.SHELL_SET)verbose=1
+ $(.ENDIF)
+ if $(.VERBOSE)
+ echo ====== BEGIN OUTPUT ======
+ $(.CATENATE) "$(output-file)"
+ echo ====== END OUTPUT ======
+ $(.ENDIF)
+ exit $(.STATUS)
+}
+
+IMPORT testing : capture-output : : testing.capture-output ;
+
+
+actions quietly updated ignore piecemeal together RmTemps
+{
+ $(.RM) "$(>)"
+}
+
+
+.MAKE_FILE = [ common.file-creation-command ] ;
+
+actions unit-test
+{
+ $(PATH_SETUP)
+ $(LAUNCHER) "$(>)" $(ARGS) && $(.MAKE_FILE) "$(<)"
+}
+
+# Note that this rule may be called multiple times for a single target in case
+# there are multiple actions operating on the same target in sequence. One such
+# example are msvc exe targets first created by a linker action and then updated
+# with an embedded manifest file by a separate action.
+rule record-time ( target : source : start end user system )
+{
+ local src-string = [$(source:G=:J=",")"] " ;
+ USER_TIME on $(target) += $(src-string)$(user) ;
+ SYSTEM_TIME on $(target) += $(src-string)$(system) ;
+
+ # We need the following variables because attempting to perform such
+ # variable expansion in actions would not work due to quotes getting treated
+ # as regular characters.
+ USER_TIME_SECONDS on $(target) += $(src-string)$(user)" seconds" ;
+ SYSTEM_TIME_SECONDS on $(target) += $(src-string)$(system)" seconds" ;
+}
+
+# Calling this rule requests that Boost Build time how long it takes to build
+# the 'source' target and display the results both on the standard output and in
+# the 'target' file.
+#
+rule time ( target : sources + : properties * )
+{
+ # Set up rule for recording timing information.
+ __TIMING_RULE__ on $(sources) = testing.record-time $(target) ;
+
+ # Make sure the sources get rebuilt any time we need to retrieve that
+ # information.
+ REBUILDS $(target) : $(sources) ;
+}
+
+
+actions time
+{
+ echo user: $(USER_TIME)
+ echo system: $(SYSTEM_TIME)
+
+ echo user: $(USER_TIME_SECONDS) > "$(<)"
+ echo system: $(SYSTEM_TIME_SECONDS) >> "$(<)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/testing.jam b/src/kenlm/jam-files/boost-build/tools/testing.jam
new file mode 100644
index 0000000..21e1bd1
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/testing.jam
@@ -0,0 +1,594 @@
+# Copyright 2005 Dave Abrahams
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# This module implements regression testing framework. It declares a number of
+# main target rules which perform some action and, if the results are OK,
+# creates an output file.
+#
+# The exact list of rules is:
+# 'compile' -- creates .test file if compilation of sources was
+# successful.
+# 'compile-fail' -- creates .test file if compilation of sources failed.
+# 'run' -- creates .test file is running of executable produced from
+# sources was successful. Also leaves behind .output file
+# with the output from program run.
+# 'run-fail' -- same as above, but .test file is created if running fails.
+#
+# In all cases, presence of .test file is an indication that the test passed.
+# For more convenient reporting, you might want to use C++ Boost regression
+# testing utilities (see http://www.boost.org/more/regression.html).
+#
+# For historical reason, a 'unit-test' rule is available which has the same
+# syntax as 'exe' and behaves just like 'run'.
+
+# Things to do:
+# - Teach compiler_status handle Jamfile.v2.
+# Notes:
+# - <no-warn> is not implemented, since it is Como-specific, and it is not
+# clear how to implement it
+# - std::locale-support is not implemented (it is used in one test).
+
+
+import alias ;
+import "class" ;
+import common ;
+import errors ;
+import feature ;
+import generators ;
+import os ;
+import path ;
+import project ;
+import property ;
+import property-set ;
+import regex ;
+import sequence ;
+import targets ;
+import toolset ;
+import type ;
+import virtual-target ;
+
+
+rule init ( )
+{
+}
+
+
+# Feature controling the command used to lanch test programs.
+feature.feature testing.launcher : : free optional ;
+
+feature.feature test-info : : free incidental ;
+feature.feature testing.arg : : free incidental ;
+feature.feature testing.input-file : : free dependency ;
+
+feature.feature preserve-test-targets : on off : incidental propagated ;
+
+# Register target types.
+type.register TEST : test ;
+type.register COMPILE : : TEST ;
+type.register COMPILE_FAIL : : TEST ;
+type.register RUN_OUTPUT : run ;
+type.register RUN : : TEST ;
+type.register RUN_FAIL : : TEST ;
+type.register LINK_FAIL : : TEST ;
+type.register LINK : : TEST ;
+type.register UNIT_TEST : passed : TEST ;
+
+
+# Declare the rules which create main targets. While the 'type' module already
+# creates rules with the same names for us, we need extra convenience: default
+# name of main target, so write our own versions.
+
+# Helper rule. Create a test target, using basename of first source if no target
+# name is explicitly passed. Remembers the created target in a global variable.
+#
+rule make-test ( target-type : sources + : requirements * : target-name ? )
+{
+ target-name ?= $(sources[1]:D=:S=) ;
+
+ # Having periods (".") in the target name is problematic because the typed
+ # generator will strip the suffix and use the bare name for the file
+ # targets. Even though the location-prefix averts problems most times it
+ # does not prevent ambiguity issues when referring to the test targets. For
+ # example when using the XML log output. So we rename the target to remove
+ # the periods, and provide an alias for users.
+ local real-name = [ regex.replace $(target-name) "[.]" "~" ] ;
+
+ local project = [ project.current ] ;
+ # The <location-prefix> forces the build system for generate paths in the
+ # form '$build_dir/array1.test/gcc/debug'. This is necessary to allow
+ # post-processing tools to work.
+ local t = [ targets.create-typed-target [ type.type-from-rule-name
+ $(target-type) ] : $(project) : $(real-name) : $(sources) :
+ $(requirements) <location-prefix>$(real-name).test ] ;
+
+ # The alias to the real target, per period replacement above.
+ if $(real-name) != $(target-name)
+ {
+ alias $(target-name) : $(t) ;
+ }
+
+ # Remember the test (for --dump-tests). A good way would be to collect all
+ # given a project. This has some technical problems: e.g. we can not call
+ # this dump from a Jamfile since projects referred by 'build-project' are
+ # not available until the whole Jamfile has been loaded.
+ .all-tests += $(t) ;
+ return $(t) ;
+}
+
+
+# Note: passing more that one cpp file here is known to fail. Passing a cpp file
+# and a library target works.
+#
+rule compile ( sources + : requirements * : target-name ? )
+{
+ return [ make-test compile : $(sources) : $(requirements) : $(target-name) ]
+ ;
+}
+
+
+rule compile-fail ( sources + : requirements * : target-name ? )
+{
+ return [ make-test compile-fail : $(sources) : $(requirements) :
+ $(target-name) ] ;
+}
+
+
+rule link ( sources + : requirements * : target-name ? )
+{
+ return [ make-test link : $(sources) : $(requirements) : $(target-name) ] ;
+}
+
+
+rule link-fail ( sources + : requirements * : target-name ? )
+{
+ return [ make-test link-fail : $(sources) : $(requirements) : $(target-name)
+ ] ;
+}
+
+
+rule handle-input-files ( input-files * )
+{
+ if $(input-files[2])
+ {
+ # Check that sorting made when creating property-set instance will not
+ # change the ordering.
+ if [ sequence.insertion-sort $(input-files) ] != $(input-files)
+ {
+ errors.user-error "Names of input files must be sorted alphabetically"
+ : "due to internal limitations" ;
+ }
+ }
+ return <testing.input-file>$(input-files) ;
+}
+
+
+rule run ( sources + : args * : input-files * : requirements * : target-name ? :
+ default-build * )
+{
+ requirements += <testing.arg>$(args:J=" ") ;
+ requirements += [ handle-input-files $(input-files) ] ;
+ return [ make-test run : $(sources) : $(requirements) : $(target-name) ] ;
+}
+
+
+rule run-fail ( sources + : args * : input-files * : requirements * :
+ target-name ? : default-build * )
+{
+ requirements += <testing.arg>$(args:J=" ") ;
+ requirements += [ handle-input-files $(input-files) ] ;
+ return [ make-test run-fail : $(sources) : $(requirements) : $(target-name)
+ ] ;
+}
+
+
+# Use 'test-suite' as a synonym for 'alias', for backward compatibility.
+IMPORT : alias : : test-suite ;
+
+
+# For all main targets in 'project-module', which are typed targets with type
+# derived from 'TEST', produce some interesting information.
+#
+rule dump-tests
+{
+ for local t in $(.all-tests)
+ {
+ dump-test $(t) ;
+ }
+}
+
+
+# Given a project location in normalized form (slashes are forward), compute the
+# name of the Boost library.
+#
+local rule get-library-name ( path )
+{
+ # Path is in normalized form, so all slashes are forward.
+ local match1 = [ MATCH /(tools|libs)/(.*)/(test|example) : $(path) ] ;
+ local match2 = [ MATCH /(tools|libs)/(.*)$ : $(path) ] ;
+ local match3 = [ MATCH (/status$) : $(path) ] ;
+
+ if $(match1) { return $(match1[2]) ; }
+ else if $(match2) { return $(match2[2]) ; }
+ else if $(match3) { return "" ; }
+ else if --dump-tests in [ modules.peek : ARGV ]
+ {
+ # The 'run' rule and others might be used outside boost. In that case,
+ # just return the path, since the 'library name' makes no sense.
+ return $(path) ;
+ }
+}
+
+
+# Was an XML dump requested?
+.out-xml = [ MATCH --out-xml=(.*) : [ modules.peek : ARGV ] ] ;
+
+
+# Takes a target (instance of 'basic-target') and prints
+# - its type
+# - its name
+# - comments specified via the <test-info> property
+# - relative location of all source from the project root.
+#
+rule dump-test ( target )
+{
+ local type = [ $(target).type ] ;
+ local name = [ $(target).name ] ;
+ local project = [ $(target).project ] ;
+
+ local project-root = [ $(project).get project-root ] ;
+ local library = [ get-library-name [ path.root [ $(project).get location ]
+ [ path.pwd ] ] ] ;
+ if $(library)
+ {
+ name = $(library)/$(name) ;
+ }
+
+ local sources = [ $(target).sources ] ;
+ local source-files ;
+ for local s in $(sources)
+ {
+ if [ class.is-a $(s) : file-reference ]
+ {
+ local location = [ path.root [ path.root [ $(s).name ]
+ [ $(s).location ] ] [ path.pwd ] ] ;
+
+ source-files += [ path.relative-to [ path.root $(project-root)
+ [ path.pwd ] ] $(location) ] ;
+ }
+ }
+
+ local target-name = [ $(project).get location ] // [ $(target).name ] .test
+ ;
+ target-name = $(target-name:J=) ;
+
+ local r = [ $(target).requirements ] ;
+ # Extract values of the <test-info> feature.
+ local test-info = [ $(r).get <test-info> ] ;
+
+ # If the user requested XML output on the command-line, add the test info to
+ # that XML file rather than dumping them to stdout.
+ if $(.out-xml)
+ {
+ local nl = "
+" ;
+ .contents on $(.out-xml) +=
+ "$(nl) <test type=\"$(type)\" name=\"$(name)\">"
+ "$(nl) <target><![CDATA[$(target-name)]]></target>"
+ "$(nl) <info><![CDATA[$(test-info)]]></info>"
+ "$(nl) <source><![CDATA[$(source-files)]]></source>"
+ "$(nl) </test>"
+ ;
+ }
+ else
+ {
+ # Format them into a single string of quoted strings.
+ test-info = \"$(test-info:J=\"\ \")\" ;
+
+ ECHO boost-test($(type)) \"$(name)\" [$(test-info)] ":"
+ \"$(source-files)\" ;
+ }
+}
+
+
+# Register generators. Depending on target type, either 'expect-success' or
+# 'expect-failure' rule will be used.
+generators.register-standard testing.expect-success : OBJ : COMPILE ;
+generators.register-standard testing.expect-failure : OBJ : COMPILE_FAIL ;
+generators.register-standard testing.expect-success : RUN_OUTPUT : RUN ;
+generators.register-standard testing.expect-failure : RUN_OUTPUT : RUN_FAIL ;
+generators.register-standard testing.expect-failure : EXE : LINK_FAIL ;
+generators.register-standard testing.expect-success : EXE : LINK ;
+
+# Generator which runs an EXE and captures output.
+generators.register-standard testing.capture-output : EXE : RUN_OUTPUT ;
+
+# Generator which creates a target if sources run successfully. Differs from RUN
+# in that run output is not captured. The reason why it exists is that the 'run'
+# rule is much better for automated testing, but is not user-friendly (see
+# http://article.gmane.org/gmane.comp.lib.boost.build/6353).
+generators.register-standard testing.unit-test : EXE : UNIT_TEST ;
+
+
+# The action rules called by generators.
+
+# Causes the 'target' to exist after bjam invocation if and only if all the
+# dependencies were successfully built.
+#
+rule expect-success ( target : dependency + : requirements * )
+{
+ **passed** $(target) : $(sources) ;
+}
+
+
+# Causes the 'target' to exist after bjam invocation if and only if all some of
+# the dependencies were not successfully built.
+#
+rule expect-failure ( target : dependency + : properties * )
+{
+ local grist = [ MATCH ^<(.*)> : $(dependency:G) ] ;
+ local marker = $(dependency:G=$(grist)*fail) ;
+ (failed-as-expected) $(marker) ;
+ FAIL_EXPECTED $(dependency) ;
+ LOCATE on $(marker) = [ on $(dependency) return $(LOCATE) ] ;
+ RMOLD $(marker) ;
+ DEPENDS $(marker) : $(dependency) ;
+ DEPENDS $(target) : $(marker) ;
+ **passed** $(target) : $(marker) ;
+}
+
+
+# The rule/action combination used to report successful passing of a test.
+#
+rule **passed**
+{
+ # Dump all the tests, if needed. We do it here, since dump should happen
+ # only after all Jamfiles have been read, and there is no such place
+ # currently defined (but there should be).
+ if ! $(.dumped-tests) && ( --dump-tests in [ modules.peek : ARGV ] )
+ {
+ .dumped-tests = true ;
+ dump-tests ;
+ }
+
+ # Force deletion of the target, in case any dependencies failed to build.
+ RMOLD $(<) ;
+}
+
+
+# Used to create test files signifying passed tests.
+#
+actions **passed**
+{
+ echo passed > "$(<)"
+}
+
+
+# Used to create replacement object files that do not get created during tests
+# that are expected to fail.
+#
+actions (failed-as-expected)
+{
+ echo failed as expected > "$(<)"
+}
+
+
+rule run-path-setup ( target : source : properties * )
+{
+ # For testing, we need to make sure that all dynamic libraries needed by the
+ # test are found. So, we collect all paths from dependency libraries (via
+ # xdll-path property) and add whatever explicit dll-path user has specified.
+ # The resulting paths are added to the environment on each test invocation.
+ local dll-paths = [ feature.get-values <dll-path> : $(properties) ] ;
+ dll-paths += [ feature.get-values <xdll-path> : $(properties) ] ;
+ dll-paths += [ on $(source) return $(RUN_PATH) ] ;
+ dll-paths = [ sequence.unique $(dll-paths) ] ;
+ if $(dll-paths)
+ {
+ dll-paths = [ sequence.transform path.native : $(dll-paths) ] ;
+ PATH_SETUP on $(target) = [ common.prepend-path-variable-command
+ [ os.shared-library-path-variable ] : $(dll-paths) ] ;
+ }
+}
+
+
+local argv = [ modules.peek : ARGV ] ;
+
+toolset.flags testing.capture-output ARGS <testing.arg> ;
+toolset.flags testing.capture-output INPUT_FILES <testing.input-file> ;
+toolset.flags testing.capture-output LAUNCHER <testing.launcher> ;
+
+
+# Runs executable 'sources' and stores stdout in file 'target'. Unless
+# --preserve-test-targets command line option has been specified, removes the
+# executable. The 'target-to-remove' parameter controls what should be removed:
+# - if 'none', does not remove anything, ever
+# - if empty, removes 'source'
+# - if non-empty and not 'none', contains a list of sources to remove.
+#
+rule capture-output ( target : source : properties * : targets-to-remove * )
+{
+ output-file on $(target) = $(target:S=.output) ;
+ LOCATE on $(target:S=.output) = [ on $(target) return $(LOCATE) ] ;
+
+ # The INCLUDES kill a warning about independent target...
+ INCLUDES $(target) : $(target:S=.output) ;
+ # but it also puts .output into dependency graph, so we must tell jam it is
+ # OK if it cannot find the target or updating rule.
+ NOCARE $(target:S=.output) ;
+
+ # This has two-fold effect. First it adds input files to the dependendency
+ # graph, preventing a warning. Second, it causes input files to be bound
+ # before target is created. Therefore, they are bound using SEARCH setting
+ # on them and not LOCATE setting of $(target), as in other case (due to jam
+ # bug).
+ DEPENDS $(target) : [ on $(target) return $(INPUT_FILES) ] ;
+
+ if $(targets-to-remove) = none
+ {
+ targets-to-remove = ;
+ }
+ else if ! $(targets-to-remove)
+ {
+ targets-to-remove = $(source) ;
+ }
+
+ run-path-setup $(target) : $(source) : $(properties) ;
+
+ if [ feature.get-values preserve-test-targets : $(properties) ] = off
+ {
+ TEMPORARY $(targets-to-remove) ;
+ # Set a second action on target that will be executed after capture
+ # output action. The 'RmTemps' rule has the 'ignore' modifier so it is
+ # always considered succeeded. This is needed for 'run-fail' test. For
+ # that test the target will be marked with FAIL_EXPECTED, and without
+ # 'ignore' successful execution will be negated and be reported as
+ # failure. With 'ignore' we do not detect a case where removing files
+ # fails, but it is not likely to happen.
+ RmTemps $(target) : $(targets-to-remove) ;
+ }
+}
+
+
+if [ os.name ] = NT
+{
+ .STATUS = %status% ;
+ .SET_STATUS = "set status=%ERRORLEVEL%" ;
+ .RUN_OUTPUT_NL = "echo." ;
+ .STATUS_0 = "%status% EQU 0 (" ;
+ .STATUS_NOT_0 = "%status% NEQ 0 (" ;
+ .VERBOSE = "%verbose% EQU 1 (" ;
+ .ENDIF = ")" ;
+ .SHELL_SET = "set " ;
+ .CATENATE = type ;
+ .CP = copy ;
+}
+else
+{
+ .STATUS = "$status" ;
+ .SET_STATUS = "status=$?" ;
+ .RUN_OUTPUT_NL = "echo" ;
+ .STATUS_0 = "test $status -eq 0 ; then" ;
+ .STATUS_NOT_0 = "test $status -ne 0 ; then" ;
+ .VERBOSE = "test $verbose -eq 1 ; then" ;
+ .ENDIF = "fi" ;
+ .SHELL_SET = "" ;
+ .CATENATE = cat ;
+ .CP = cp ;
+}
+
+
+.VERBOSE_TEST = 0 ;
+if --verbose-test in [ modules.peek : ARGV ]
+{
+ .VERBOSE_TEST = 1 ;
+}
+
+
+.RM = [ common.rm-command ] ;
+
+
+actions capture-output bind INPUT_FILES output-file
+{
+ $(PATH_SETUP)
+ $(LAUNCHER) "$(>)" $(ARGS) "$(INPUT_FILES)" > "$(output-file)" 2>&1
+ $(.SET_STATUS)
+ $(.RUN_OUTPUT_NL) >> "$(output-file)"
+ echo EXIT STATUS: $(.STATUS) >> "$(output-file)"
+ if $(.STATUS_0)
+ $(.CP) "$(output-file)" "$(<)"
+ $(.ENDIF)
+ $(.SHELL_SET)verbose=$(.VERBOSE_TEST)
+ if $(.STATUS_NOT_0)
+ $(.SHELL_SET)verbose=1
+ $(.ENDIF)
+ if $(.VERBOSE)
+ echo ====== BEGIN OUTPUT ======
+ $(.CATENATE) "$(output-file)"
+ echo ====== END OUTPUT ======
+ $(.ENDIF)
+ exit $(.STATUS)
+}
+
+
+actions quietly updated ignore piecemeal together RmTemps
+{
+ $(.RM) "$(>)"
+}
+
+
+.MAKE_FILE = [ common.file-creation-command ] ;
+
+toolset.flags testing.unit-test LAUNCHER <testing.launcher> ;
+toolset.flags testing.unit-test ARGS <testing.arg> ;
+
+
+rule unit-test ( target : source : properties * )
+{
+ run-path-setup $(target) : $(source) : $(properties) ;
+}
+
+
+actions unit-test
+{
+ $(PATH_SETUP)
+ $(LAUNCHER) "$(>)" $(ARGS) && $(.MAKE_FILE) "$(<)"
+}
+
+
+IMPORT $(__name__) : compile compile-fail run run-fail link link-fail
+ : : compile compile-fail run run-fail link link-fail ;
+
+
+# This is a composing generator to support cases where a generator for the
+# specified target constructs other targets as well. One such example is msvc's
+# exe generator that constructs both EXE and PDB targets.
+type.register TIME : time ;
+generators.register-composing testing.time : : TIME ;
+
+
+# Note that this rule may be called multiple times for a single target in case
+# there are multiple actions operating on the same target in sequence. One such
+# example are msvc exe targets first created by a linker action and then updated
+# with an embedded manifest file by a separate action.
+rule record-time ( target : source : start end user system )
+{
+ local src-string = [$(source:G=:J=",")"] " ;
+ USER_TIME on $(target) += $(src-string)$(user) ;
+ SYSTEM_TIME on $(target) += $(src-string)$(system) ;
+
+ # We need the following variables because attempting to perform such
+ # variable expansion in actions would not work due to quotes getting treated
+ # as regular characters.
+ USER_TIME_SECONDS on $(target) += $(src-string)$(user)" seconds" ;
+ SYSTEM_TIME_SECONDS on $(target) += $(src-string)$(system)" seconds" ;
+}
+
+
+IMPORT testing : record-time : : testing.record-time ;
+
+
+# Calling this rule requests that Boost Build time how long it takes to build
+# the 'source' target and display the results both on the standard output and in
+# the 'target' file.
+#
+rule time ( target : sources + : properties * )
+{
+ # Set up rule for recording timing information.
+ __TIMING_RULE__ on $(sources) = testing.record-time $(target) ;
+
+ # Make sure the sources get rebuilt any time we need to retrieve that
+ # information.
+ REBUILDS $(target) : $(sources) ;
+}
+
+
+actions time
+{
+ echo user: $(USER_TIME)
+ echo system: $(SYSTEM_TIME)
+
+ echo user: $(USER_TIME_SECONDS) > "$(<)"
+ echo system: $(SYSTEM_TIME_SECONDS) >> "$(<)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/tiff.jam b/src/kenlm/jam-files/boost-build/tools/tiff.jam
new file mode 100644
index 0000000..14f235e
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/tiff.jam
@@ -0,0 +1,230 @@
+# Copyright (c) 2010 Vladimir Prus.
+# Copyright (c) 2013 Steven Watanabe
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Supports the libtiff library
+#
+# After 'using libtiff', the following targets are available:
+#
+# /libtiff//libtiff -- The libtiff library
+
+import project ;
+import ac ;
+import errors ;
+import "class" : new ;
+import targets ;
+import path ;
+import modules ;
+import errors ;
+import indirect ;
+import property ;
+import property-set ;
+
+header = tiff.h tiffio.hxx ;
+
+names = libtiff ;
+
+sources = tif_aux.c tif_close.c tif_codec.c tif_color.c tif_compress.c tif_dir.c tif_dirinfo.c
+ tif_dirread.c tif_dirwrite.c tif_dumpmode.c tif_error.c tif_extension.c tif_fax3.c tif_fax3sm.c
+ tif_getimage.c tif_jbig.c tif_jpeg.c tif_jpeg_12.c tif_ojpeg.c tif_flush.c tif_luv.c tif_lzw.c
+ tif_next.c tif_open.c tif_packbits.c tif_pixarlog.c tif_predict.c tif_print.c tif_read.c tif_stream.cxx
+ tif_swab.c tif_strip.c tif_thunder.c tif_tile.c tif_version.c tif_warning.c tif_write.c tif_zip.c ;
+
+library-id = 0 ;
+
+if --debug-configuration in [ modules.peek : ARGV ]
+{
+ .debug = true ;
+}
+
+# Initializes the libtiff library.
+#
+# libtiff can be configured either to use pre-existing binaries
+# or to build the library from source.
+#
+# Options for configuring a prebuilt libtiff::
+#
+# <search>
+# The directory containing the libtiff binaries.
+# <name>
+# Overrides the default library name.
+# <include>
+# The directory containing the libtiff headers.
+#
+# If none of these options is specified, then the environmental
+# variables LIBTIFF_LIBRARY_PATH, LIBTIFF_NAME, and LIBTIFF_INCLUDE will
+# be used instead.
+#
+# Options for building libtiff from source::
+#
+# <source>
+# The libtiff source directory. Defaults to the environmental variable
+# LIBTIFF_SOURCE.
+# <tag>
+# A rule which computes the actual name of the compiled
+# libraries based on the build properties. Ignored
+# when using precompiled binaries.
+# <build-name>
+# The base name to use for the compiled library. Ignored
+# when using precompiled binaries.
+#
+# Examples::
+#
+# # Find libtiff in the default system location
+# using libtiff ;
+# # Build libtiff from source
+# using libtiff : 4.0.1 : <source>/home/steven/libtiff-4.0.1 ;
+# # Find libtiff in /usr/local
+# using libtiff : 4.0.1
+# : <include>/usr/local/include <search>/usr/local/lib ;
+# # Build libtiff from source for msvc and find
+# # prebuilt binaries for gcc.
+# using libtiff : 4.0.1 : <source>C:/Devel/src/libtiff-4.0.1 : <toolset>msvc ;
+# using libtiff : 4.0.1 : : <toolset>gcc ;
+#
+rule init (
+ version ?
+ # The libtiff version (currently ignored)
+
+ : options *
+ # A list of the options to use
+
+ : requirements *
+ # The requirements for the libtiff target
+
+ : is-default ?
+ # Default configurations are only used when libtiff
+ # has not yet been configured.
+ )
+{
+ local caller = [ project.current ] ;
+
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+
+ project.initialize $(__name__) ;
+ .project = [ project.current ] ;
+ project libtiff ;
+ }
+
+ local library-path = [ property.select <search> : $(options) ] ;
+ library-path = $(library-path:G=) ;
+ local include-path = [ property.select <include> : $(options) ] ;
+ include-path = $(include-path:G=) ;
+ local source-path = [ property.select <source> : $(options) ] ;
+ source-path = $(source-path:G=) ;
+ local library-name = [ property.select <name> : $(options) ] ;
+ library-name = $(library-name:G=) ;
+ local tag = [ property.select <tag> : $(options) ] ;
+ tag = $(tag:G=) ;
+ local build-name = [ property.select <build-name> : $(options) ] ;
+ build-name = $(build-name:G=) ;
+
+ condition = [ property-set.create $(requirements) ] ;
+ condition = [ property-set.create [ $(condition).base ] ] ;
+
+ local no-build-from-source ;
+ # Ignore environmental ZLIB_SOURCE if this initialization
+ # requested to search for a specific pre-built library.
+ if $(library-path) || $(include-path) || $(library-name)
+ {
+ if $(source-path) || $(tag) || $(build-name)
+ {
+ errors.user-error "incompatible options for libtiff:"
+ [ property.select <search> <include> <name> : $(options) ] "and"
+ [ property.select <source> <tag> <build-name> : $(options) ] ;
+ }
+ else
+ {
+ no-build-from-source = true ;
+ }
+ }
+
+ source-path ?= [ modules.peek : ZLIB_SOURCE ] ;
+
+ if $(.configured.$(condition))
+ {
+ if $(is-default)
+ {
+ if $(.debug)
+ {
+ ECHO "notice: [libtiff] libtiff is already configured" ;
+ }
+ }
+ else
+ {
+ errors.user-error "libtiff is already configured" ;
+ }
+ return ;
+ }
+ else if $(source-path) && ! $(no-build-from-source)
+ {
+ build-name ?= z ;
+ library-id = [ CALC $(library-id) + 1 ] ;
+ tag = [ MATCH ^@?(.*)$ : $(tag) ] ;
+ if $(tag) && ! [ MATCH ^([^%]*)%([^%]+)$ : $(tag) ]
+ {
+ tag = [ indirect.make $(tag) : [ $(caller).project-module ] ] ;
+ }
+ sources = [ path.glob $(source-path) : $(sources) ] ;
+ if $(.debug)
+ {
+ ECHO "notice: [libtiff] Building libtiff from source as $(build-name)" ;
+ if $(condition)
+ {
+ ECHO "notice: [libtiff] Condition" [ $(condition).raw ] ;
+ }
+ if $(sources)
+ {
+ ECHO "notice: [libtiff] found libtiff source in $(source-path)" ;
+ }
+ else
+ {
+ ECHO "warning: [libtiff] could not find libtiff source in $(source-path)" ;
+ }
+ }
+ local target ;
+ if $(sources) {
+ target = [ targets.create-typed-target LIB : $(.project)
+ : $(build-name).$(library-id)
+ : $(sources)
+ : $(requirements)
+ <tag>@$(tag)
+ <include>$(source-path)
+ <toolset>msvc:<define>_CRT_SECURE_NO_DEPRECATE
+ <toolset>msvc:<define>_SCL_SECURE_NO_DEPRECATE
+ <link>shared:<define>ZLIB_DLL
+ :
+ : <include>$(source-path) ] ;
+ }
+
+ local mt = [ new ac-library libtiff : $(.project) : $(condition) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ if $(target)
+ {
+ $(mt).set-target $(target) ;
+ }
+ targets.main-target-alternative $(mt) ;
+ } else {
+ if $(.debug)
+ {
+ ECHO "notice: [libtiff] Using pre-installed library" ;
+ if $(condition)
+ {
+ ECHO "notice: [libtiff] Condition" [ $(condition).raw ] ;
+ }
+ }
+
+ local mt = [ new ac-library libtiff : $(.project) : $(condition) :
+ $(include-path) : $(library-path) : $(library-name) : $(root) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ targets.main-target-alternative $(mt) ;
+ }
+ .configured.$(condition) = true ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/types/asm.jam b/src/kenlm/jam-files/boost-build/tools/types/asm.jam
new file mode 100644
index 0000000..a340db3
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/asm.jam
@@ -0,0 +1,4 @@
+# Copyright Craig Rodrigues 2005. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+type ASM : s S asm ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/cpp.jam b/src/kenlm/jam-files/boost-build/tools/types/cpp.jam
new file mode 100644
index 0000000..29d8faa
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/cpp.jam
@@ -0,0 +1,90 @@
+# Copyright 2004 David Abrahams
+# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus
+# Copyright 2010 Rene Rivera
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import scanner ;
+import type ;
+
+
+class c-scanner : scanner
+{
+ import path ;
+ import regex ;
+ import scanner ;
+ import sequence ;
+ import virtual-target ;
+
+ rule __init__ ( includes * )
+ {
+ scanner.__init__ ;
+
+ for local i in $(includes)
+ {
+ self.includes += [ sequence.transform path.native : [ regex.split
+ $(i:G=) "&&" ] ] ;
+ }
+ }
+
+ rule pattern ( )
+ {
+ return "#[ \t]*include[ ]*(<(.*)>|\"(.*)\")" ;
+ }
+
+ rule process ( target : matches * : binding )
+ {
+ local angle = [ regex.transform $(matches) : "<(.*)>" ] ;
+ angle = [ sequence.transform path.native : $(angle) ] ;
+ local quoted = [ regex.transform $(matches) : "\"(.*)\"" ] ;
+ quoted = [ sequence.transform path.native : $(quoted) ] ;
+
+ # CONSIDER: the new scoping rules seem to defeat "on target" variables.
+ local g = [ on $(target) return $(HDRGRIST) ] ;
+ local b = [ NORMALIZE_PATH $(binding:D) ] ;
+
+ # Attach binding of including file to included targets. When a target is
+ # directly created from a virtual target this extra information is
+ # unnecessary. But in other cases, it allows us to distinguish between
+ # two headers of the same name included from different places. We do not
+ # need this extra information for angle includes, since they should not
+ # depend on the including file (we can not get literal "." in the
+ # include path).
+ local g2 = $(g)"#"$(b) ;
+
+ angle = $(angle:G=$(g)) ;
+ quoted = $(quoted:G=$(g2)) ;
+
+ local all = $(angle) $(quoted) ;
+
+ INCLUDES $(target) : $(all) ;
+ NOCARE $(all) ;
+ SEARCH on $(angle) = $(self.includes:G=) ;
+ SEARCH on $(quoted) = $(b) $(self.includes:G=) ;
+
+ # Just propagate the current scanner to includes, in hope that includes
+ # do not change scanners.
+ scanner.propagate $(__name__) : $(all) : $(target) ;
+
+ ISFILE $(all) ;
+ }
+}
+
+scanner.register c-scanner : include ;
+
+type.register CPP : cpp cxx cc ;
+type.register H : h ;
+type.register HPP : hpp : H ;
+type.register C : c ;
+
+# It most cases where a CPP file or a H file is a source of some action, we
+# should rebuild the result if any of files included by CPP/H are changed. One
+# case when this is not needed is installation, which is handled specifically.
+type.set-scanner CPP : c-scanner ;
+type.set-scanner C : c-scanner ;
+# One case where scanning of H/HPP files is necessary is PCH generation -- if
+# any header included by HPP being precompiled changes, we need to recompile the
+# header.
+type.set-scanner H : c-scanner ;
+type.set-scanner HPP : c-scanner ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/exe.jam b/src/kenlm/jam-files/boost-build/tools/types/exe.jam
new file mode 100644
index 0000000..4710951
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/exe.jam
@@ -0,0 +1,9 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+import type ;
+
+type.register EXE ;
+type.set-generated-target-suffix EXE : <target-os>windows : "exe" ;
+type.set-generated-target-suffix EXE : <target-os>cygwin : "exe" ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/html.jam b/src/kenlm/jam-files/boost-build/tools/types/html.jam
new file mode 100644
index 0000000..5cd337d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/html.jam
@@ -0,0 +1,4 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+type HTML : html ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/lib.jam b/src/kenlm/jam-files/boost-build/tools/types/lib.jam
new file mode 100644
index 0000000..854ab8f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/lib.jam
@@ -0,0 +1,74 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+import type ; # for set-generated-target-suffix
+import os ;
+
+# The following naming scheme is used for libraries.
+#
+# On *nix:
+# libxxx.a static library
+# libxxx.so shared library
+#
+# On windows (msvc)
+# libxxx.lib static library
+# xxx.dll DLL
+# xxx.lib import library
+#
+# On windows (mingw):
+# libxxx.a static library
+# libxxx.dll DLL
+# libxxx.dll.a import library
+#
+# On cygwin i.e. <target-os>cygwin
+# libxxx.a static library
+# cygxxx.dll DLL
+# libxxx.dll.a import library
+#
+
+type.register LIB ;
+
+# FIXME: should not register both extensions on both platforms.
+type.register STATIC_LIB : a lib : LIB ;
+
+# The 'lib' prefix is used everywhere
+type.set-generated-target-prefix STATIC_LIB : : lib ;
+
+# Use '.lib' suffix for windows
+type.set-generated-target-suffix STATIC_LIB : <target-os>windows : lib ;
+
+# Except with gcc.
+type.set-generated-target-suffix STATIC_LIB : <toolset>gcc <target-os>windows : a ;
+
+# Use xxx.lib for import libs
+type IMPORT_LIB : : STATIC_LIB ;
+type.set-generated-target-prefix IMPORT_LIB : : "" ;
+type.set-generated-target-suffix IMPORT_LIB : : lib ;
+
+# Except with gcc (mingw or cygwin), where use libxxx.dll.a
+type.set-generated-target-prefix IMPORT_LIB : <toolset>gcc : lib ;
+type.set-generated-target-suffix IMPORT_LIB : <toolset>gcc : dll.a ;
+
+type.register SHARED_LIB : so dll dylib : LIB ;
+
+# Both mingw and cygwin use libxxx.dll naming scheme.
+# On Linux, use "lib" prefix
+type.set-generated-target-prefix SHARED_LIB : : lib ;
+# But don't use it on windows
+type.set-generated-target-prefix SHARED_LIB : <target-os>windows : "" ;
+# But use it again on mingw
+type.set-generated-target-prefix SHARED_LIB : <toolset>gcc <target-os>windows : lib ;
+# And use 'cyg' on cygwin
+type.set-generated-target-prefix SHARED_LIB : <target-os>cygwin : cyg ;
+
+
+type.set-generated-target-suffix SHARED_LIB : <target-os>windows : dll ;
+type.set-generated-target-suffix SHARED_LIB : <target-os>cygwin : dll ;
+type.set-generated-target-suffix SHARED_LIB : <target-os>darwin : dylib ;
+
+type SEARCHED_LIB : : LIB ;
+# This is needed so that when we create a target of SEARCHED_LIB
+# type, there's no prefix or suffix automatically added.
+type.set-generated-target-prefix SEARCHED_LIB : : "" ;
+type.set-generated-target-suffix SEARCHED_LIB : : "" ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/obj.jam b/src/kenlm/jam-files/boost-build/tools/types/obj.jam
new file mode 100644
index 0000000..6afbcaa
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/obj.jam
@@ -0,0 +1,9 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+import type ;
+
+type.register OBJ : o obj ;
+type.set-generated-target-suffix OBJ : <target-os>windows : obj ;
+type.set-generated-target-suffix OBJ : <target-os>cygwin : obj ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/objc.jam b/src/kenlm/jam-files/boost-build/tools/types/objc.jam
new file mode 100644
index 0000000..709cbd0
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/objc.jam
@@ -0,0 +1,26 @@
+# Copyright Rene Rivera 2008, 2010.
+# Distributed under the Boost Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+import type ;
+import scanner ;
+import types/cpp ;
+
+class objc-scanner : c-scanner
+{
+ rule __init__ ( includes * )
+ {
+ c-scanner.__init__ $(includes) ;
+ }
+
+ rule pattern ( )
+ {
+ return "#[ \t]*include|import[ ]*(<(.*)>|\"(.*)\")" ;
+ }
+}
+
+scanner.register objc-scanner : include ;
+
+type.register OBJECTIVE_C : m ;
+type.register OBJECTIVE_CPP : mm ;
+type.set-scanner OBJECTIVE_C : objc-scanner ;
+type.set-scanner OBJECTIVE_CPP : objc-scanner ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/preprocessed.jam b/src/kenlm/jam-files/boost-build/tools/types/preprocessed.jam
new file mode 100644
index 0000000..c9187ba
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/preprocessed.jam
@@ -0,0 +1,9 @@
+# Copyright Steven Watanabe 2011
+# Distributed under the Boost Software License Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import type ;
+
+type.register PREPROCESSED_C : i : C ;
+type.register PREPROCESSED_CPP : ii : CPP ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/qt.jam b/src/kenlm/jam-files/boost-build/tools/types/qt.jam
new file mode 100644
index 0000000..4951063
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/qt.jam
@@ -0,0 +1,12 @@
+# Copyright Vladimir Prus 2005. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+type UI : ui ;
+type QRC : qrc ;
+type MOCCABLE_CPP ;
+type MOCCABLE_H ;
+type MOCCABLE5_CPP ;
+type MOCCABLE5_H ;
+# Result of running moc.
+type MOC : moc : H ;
diff --git a/src/kenlm/jam-files/boost-build/tools/types/register.jam b/src/kenlm/jam-files/boost-build/tools/types/register.jam
new file mode 100644
index 0000000..203992c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/register.jam
@@ -0,0 +1,39 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# This module's job is to automatically import all the type
+# registration modules in its directory.
+import type os path modules ;
+
+# Register the given type on the specified OSes, or on remaining OSes
+# if os is not specified. This rule is injected into each of the type
+# modules for the sake of convenience.
+local rule type ( type : suffixes * : base-type ? : os * )
+{
+ if ! [ type.registered $(type) ]
+ {
+ if ( ! $(os) ) || [ os.name ] in $(os)
+ {
+ type.register $(type) : $(suffixes) : $(base-type) ;
+ }
+ }
+}
+
+.this-module's-file = [ modules.binding $(__name__) ] ;
+.this-module's-dir = [ path.parent $(.this-module's-file) ] ;
+.sibling-jamfiles = [ path.glob $(.this-module's-dir) : *.jam ] ;
+.sibling-modules = [ MATCH ^(.*)\.jam$ : $(.sibling-jamfiles) ] ;
+
+# A loop over all modules in this directory
+for m in $(.sibling-modules)
+{
+ m = [ path.basename $(m) ] ;
+ m = types/$(m) ;
+
+ # Inject the type rule into the new module
+ IMPORT $(__name__) : type : $(m) : type ;
+ import $(m) ;
+}
+
+
diff --git a/src/kenlm/jam-files/boost-build/tools/types/rsp.jam b/src/kenlm/jam-files/boost-build/tools/types/rsp.jam
new file mode 100644
index 0000000..bdf8a7c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/types/rsp.jam
@@ -0,0 +1,4 @@
+# Copyright David Abrahams 2004. Distributed under the Boost
+# Software License, Version 1.0. (See accompanying
+# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+type RSP : rsp ;
diff --git a/src/kenlm/jam-files/boost-build/tools/unix.jam b/src/kenlm/jam-files/boost-build/tools/unix.jam
new file mode 100644
index 0000000..7594985
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/unix.jam
@@ -0,0 +1,224 @@
+# Copyright (c) 2004 Vladimir Prus.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This file implements linking semantic common to all unixes. On unix, static
+# libraries must be specified in a fixed order on the linker command line. Generators
+# declared there store information about the order and use it property.
+
+import feature ;
+import "class" : new ;
+import generators ;
+import type ;
+import set ;
+import order ;
+import builtin ;
+
+class unix-linking-generator : linking-generator
+{
+ import property-set ;
+ import type ;
+ import unix ;
+
+ rule __init__ ( id
+ composing ? : # Specify if generator is composing. The generator will be
+ # composing if non-empty string is passed, or parameter is
+ # not given. To make generator non-composing, pass empty
+ # string ("")
+ source-types + : target-types + :
+ requirements * )
+ {
+ composing ?= true ;
+ generator.__init__ $(id) $(composing) : $(source-types) : $(target-types) :
+ $(requirements) ;
+ }
+
+ rule run ( project name ? : property-set : sources + )
+ {
+ local result = [ linking-generator.run $(project) $(name) : $(property-set)
+ : $(sources) ] ;
+
+ unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ;
+
+ return $(result) ;
+ }
+
+ rule generated-targets ( sources + : property-set : project name ? )
+ {
+ local sources2 ;
+ local libraries ;
+ for local l in $(sources)
+ {
+ if [ type.is-derived [ $(l).type ] LIB ]
+ {
+ libraries += $(l) ;
+ }
+ else
+ {
+ sources2 += $(l) ;
+ }
+ }
+
+ sources = $(sources2) [ unix.order-libraries $(libraries) ] ;
+
+ return [ linking-generator.generated-targets $(sources) : $(property-set)
+ : $(project) $(name) ] ;
+ }
+
+}
+
+class unix-archive-generator : archive-generator
+{
+ import unix ;
+
+ rule __init__ ( id composing ? : source-types + : target-types + :
+ requirements * )
+ {
+ composing ?= true ;
+ archive-generator.__init__ $(id) $(composing) : $(source-types) : $(target-types) :
+ $(requirements) ;
+ }
+
+ rule run ( project name ? : property-set : sources + )
+ {
+ local result = [ archive-generator.run $(project) $(name) : $(property-set)
+ : $(sources) ] ;
+
+ unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ;
+
+ return $(result) ;
+
+ }
+}
+
+class unix-searched-lib-generator : searched-lib-generator
+{
+ import unix ;
+ rule __init__ ( * : * )
+ {
+ generator.__init__
+ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule optional-properties ( )
+ {
+ return $(self.requirements) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ local result = [ searched-lib-generator.run $(project) $(name)
+ : $(property-set) : $(sources) ] ;
+
+ unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ;
+
+ return $(result) ;
+ }
+}
+
+class unix-prebuilt-lib-generator : generator
+{
+ import unix ;
+ rule __init__ ( * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
+ }
+
+ rule run ( project name ? : property-set : sources * )
+ {
+ local f = [ $(property-set).get <file> ] ;
+ unix.set-library-order-aux $(f) : $(sources) ;
+ return $(f) $(sources) ;
+ }
+}
+
+generators.register
+ [ new unix-prebuilt-lib-generator unix.prebuilt : : LIB
+ : <file> <toolset>unix ] ;
+
+generators.override unix.prebuilt : builtin.lib-generator ;
+
+
+# Declare generators
+generators.register [ new unix-linking-generator unix.link : LIB OBJ : EXE
+ : <toolset>unix ] ;
+
+generators.register [ new unix-archive-generator unix.archive : OBJ : STATIC_LIB
+ : <toolset>unix ] ;
+
+generators.register [ new unix-linking-generator unix.link.dll : LIB OBJ : SHARED_LIB
+ : <toolset>unix ] ;
+
+generators.register [ new unix-searched-lib-generator
+ unix.searched-lib-generator : : SEARCHED_LIB : <toolset>unix ] ;
+
+
+# The derived toolset must specify their own actions.
+actions link {
+}
+
+actions link.dll {
+}
+
+actions archive {
+}
+
+actions searched-lib-generator {
+}
+
+actions prebuilt {
+}
+
+
+
+
+
+.order = [ new order ] ;
+
+rule set-library-order-aux ( from * : to * )
+{
+ for local f in $(from)
+ {
+ for local t in $(to)
+ {
+ if $(f) != $(t)
+ {
+ $(.order).add-pair $(f) $(t) ;
+ }
+ }
+ }
+}
+
+rule set-library-order ( sources * : property-set : result * )
+{
+ local used-libraries ;
+ local deps = [ $(property-set).dependency ] ;
+ for local l in $(sources) $(deps:G=)
+ {
+ if [ $(l).type ] && [ type.is-derived [ $(l).type ] LIB ]
+ {
+ used-libraries += $(l) ;
+ }
+ }
+
+ local created-libraries ;
+ for local l in $(result)
+ {
+ if [ $(l).type ] && [ type.is-derived [ $(l).type ] LIB ]
+ {
+ created-libraries += $(l) ;
+ }
+ }
+
+ created-libraries = [ set.difference $(created-libraries) : $(used-libraries) ] ;
+ set-library-order-aux $(created-libraries) : $(used-libraries) ;
+}
+
+rule order-libraries ( libraries * )
+{
+ local r = [ $(.order).order $(libraries) ] ;
+ return $(r) ;
+}
+
\ No newline at end of file
diff --git a/src/kenlm/jam-files/boost-build/tools/vacpp.jam b/src/kenlm/jam-files/boost-build/tools/vacpp.jam
new file mode 100644
index 0000000..f4080fc
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/vacpp.jam
@@ -0,0 +1,150 @@
+# Copyright Vladimir Prus 2004.
+# Copyright Toon Knapen 2004.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt
+# or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# Boost.Build V2 toolset for the IBM XL C++ compiler
+#
+
+import toolset : flags ;
+import feature ;
+import common ;
+import generators ;
+import os ;
+
+feature.extend toolset : vacpp ;
+toolset.inherit vacpp : unix ;
+generators.override vacpp.prebuilt : builtin.prebuilt ;
+generators.override vacpp.searched-lib-generator : searched-lib-generator ;
+
+# Configure the vacpp toolset
+rule init ( version ? : command * : options * )
+{
+ local condition = [
+ common.check-init-parameters vacpp : version $(version) ] ;
+
+ command = [ common.get-invocation-command vacpp : xlC
+ : $(command) : "/usr/vacpp/bin/xlC" ] ;
+
+ common.handle-options vacpp : $(condition) : $(command) : $(options) ;
+}
+
+# Declare generators
+generators.register-c-compiler vacpp.compile.c : C : OBJ : <toolset>vacpp ;
+generators.register-c-compiler vacpp.compile.c++ : CPP : OBJ : <toolset>vacpp ;
+
+# Allow C++ style comments in C files
+flags vacpp CFLAGS : -qcpluscmt ;
+
+# Declare flags
+flags vacpp CFLAGS <optimization>off : -qNOOPTimize ;
+flags vacpp CFLAGS <optimization>speed : -O3 -qstrict ;
+flags vacpp CFLAGS <optimization>space : -O2 -qcompact ;
+
+# Discretionary inlining (not recommended)
+flags vacpp CFLAGS <inlining>off : -qnoinline ;
+flags vacpp CFLAGS <inlining>on : -qinline ;
+#flags vacpp CFLAGS <inlining>full : -qinline ;
+flags vacpp CFLAGS <inlining>full : ;
+
+# Exception handling
+flags vacpp C++FLAGS <exception-handling>off : -qnoeh ;
+flags vacpp C++FLAGS <exception-handling>on : -qeh ;
+
+# Run-time Type Identification
+flags vacpp C++FLAGS <rtti>off : -qnortti ;
+flags vacpp C++FLAGS <rtti>on : -qrtti ;
+
+# Enable 64-bit memory addressing model
+flags vacpp CFLAGS <address-model>64 : -q64 ;
+flags vacpp LINKFLAGS <address-model>64 : -q64 ;
+flags vacpp ARFLAGS <target-os>aix/<address-model>64 : -X 64 ;
+
+# Use absolute path when generating debug information
+flags vacpp CFLAGS <debug-symbols>on : -g -qfullpath ;
+flags vacpp LINKFLAGS <debug-symbols>on : -g -qfullpath ;
+flags vacpp LINKFLAGS <debug-symbols>off : -s ;
+
+if [ os.name ] = AIX
+{
+ flags vacpp.compile C++FLAGS : -qfuncsect ;
+
+ # The -bnoipath strips the prepending (relative) path of libraries from
+ # the loader section in the target library or executable. Hence, during
+ # load-time LIBPATH (identical to LD_LIBRARY_PATH) or a hard-coded
+ # -blibpath (*similar* to -lrpath/-lrpath-link) is searched. Without
+ # this option, the prepending (relative) path + library name is
+ # hard-coded in the loader section, causing *only* this path to be
+ # searched during load-time. Note that the AIX linker does not have an
+ # -soname equivalent, this is as close as it gets.
+ #
+ # The above options are definately for AIX 5.x, and most likely also for
+ # AIX 4.x and AIX 6.x. For details about the AIX linker see:
+ # http://download.boulder.ibm.com/ibmdl/pub/software/dw/aix/es-aix_ll.pdf
+ #
+ flags vacpp.link LINKFLAGS <link>shared : -bnoipath ;
+
+ # Run-time linking
+ flags vacpp.link EXE-LINKFLAGS <link>shared : -brtl ;
+}
+else
+{
+ # Linux PPC
+ flags vacpp.compile CFLAGS <link>shared : -qpic=large ;
+ flags vacpp FINDLIBS : rt ;
+}
+
+# Profiling
+flags vacpp CFLAGS <profiling>on : -pg ;
+flags vacpp LINKFLAGS <profiling>on : -pg ;
+
+flags vacpp.compile OPTIONS <cflags> ;
+flags vacpp.compile.c++ OPTIONS <cxxflags> ;
+flags vacpp DEFINES <define> ;
+flags vacpp UNDEFS <undef> ;
+flags vacpp HDRS <include> ;
+flags vacpp STDHDRS <sysinclude> ;
+flags vacpp.link OPTIONS <linkflags> ;
+flags vacpp ARFLAGS <arflags> ;
+
+flags vacpp LIBPATH <library-path> ;
+flags vacpp NEEDLIBS <library-file> ;
+flags vacpp FINDLIBS <find-shared-library> ;
+flags vacpp FINDLIBS <find-static-library> ;
+
+# Select the compiler name according to the threading model.
+flags vacpp VA_C_COMPILER <threading>single : xlc ;
+flags vacpp VA_C_COMPILER <threading>multi : xlc_r ;
+flags vacpp VA_CXX_COMPILER <threading>single : xlC ;
+flags vacpp VA_CXX_COMPILER <threading>multi : xlC_r ;
+
+SPACE = " " ;
+
+flags vacpp.link.dll HAVE_SONAME <target-os>linux : "" ;
+
+actions vacpp.link bind NEEDLIBS
+{
+ $(VA_CXX_COMPILER) $(EXE-LINKFLAGS) $(LINKFLAGS) -o "$(<[1])" -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS) $(USER_OPTIONS)
+}
+
+actions vacpp.link.dll bind NEEDLIBS
+{
+ xlC_r -G $(LINKFLAGS) -o "$(<[1])" $(HAVE_SONAME)-Wl,-soname$(SPACE)-Wl,$(<[-1]:D=) -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS) $(USER_OPTIONS)
+}
+
+actions vacpp.compile.c
+{
+ $(VA_C_COMPILER) -c $(OPTIONS) $(USER_OPTIONS) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)"
+}
+
+actions vacpp.compile.c++
+{
+ $(VA_CXX_COMPILER) -c $(OPTIONS) $(USER_OPTIONS) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)"
+}
+
+actions updated together piecemeal vacpp.archive
+{
+ ar $(ARFLAGS) ru "$(<)" "$(>)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/whale.jam b/src/kenlm/jam-files/boost-build/tools/whale.jam
new file mode 100644
index 0000000..9335ff0
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/whale.jam
@@ -0,0 +1,116 @@
+# Copyright (C) Vladimir Prus 2002-2005.
+
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# This module implements support for Whale/Dolphin/WD parser/lexer tools.
+# See http://www.cs.queensu.ca/home/okhotin/whale/ for details.
+#
+# There are three interesting target types:
+# - WHL (the parser sources), that are converted to CPP and H
+# - DLP (the lexer sources), that are converted to CPP and H
+# - WD (combined parser/lexer sources), that are converted to WHL + DLP
+
+import type ;
+import generators ;
+import path ;
+import "class" : new ;
+import errors ;
+
+rule init ( path # path the Whale/Dolphin/WD binaries
+ )
+{
+ if $(.configured) && $(.path) != $(path)
+ {
+ errors.user-error "Attempt to reconfigure Whale support" :
+ "Previously configured with path \"$(.path:E=<empty>)\"" :
+ "Now configuring with path \"$(path:E=<empty>)\"" ;
+
+ }
+ .configured = true ;
+ .path = $(path) ;
+
+ .whale = [ path.join $(path) whale ] ;
+ .dolphin = [ path.join $(path) dolphin ] ;
+ .wd = [ path.join $(path) wd ] ;
+}
+
+
+# Declare the types.
+type.register WHL : whl ;
+type.register DLP : dlp ;
+type.register WHL_LR0 : lr0 ;
+type.register WD : wd ;
+
+# Declare standard generators.
+generators.register-standard whale.whale : WHL : CPP H H(%_symbols) ;
+generators.register-standard whale.dolphin : DLP : CPP H ;
+generators.register-standard whale.wd : WD : WHL(%_parser) DLP(%_lexer) ;
+
+# The conversions defines above a ambiguious when we generated CPP from WD.
+# We can either go via WHL type, or via DLP type.
+# The following custom generator handles this by running both conversions.
+
+class wd-to-cpp : generator
+{
+ rule __init__ ( * : * : * )
+ {
+ generator.__init__ $(1) : $(2) : $(3) ;
+ }
+
+ rule run ( project name ? : property-set : source * )
+ {
+ if ! $(source[2])
+ {
+ local new-sources ;
+ if ! [ $(source).type ] in WHL DLP
+ {
+ local r1 = [ generators.construct $(project) $(name)
+ : WHL : $(property-set) : $(source) ] ;
+ local r2 = [ generators.construct $(project) $(name)
+ : DLP : $(property-set) : $(source) ] ;
+
+ new-sources = [ sequence.unique $(r1[2-]) $(r2[2-]) ] ;
+ }
+ else
+ {
+ new-sources = $(source) ;
+ }
+
+ local result ;
+ for local i in $(new-sources)
+ {
+ local t = [ generators.construct $(project) $(name) : CPP
+ : $(property-set) : $(i) ] ;
+ result += $(t[2-]) ;
+ }
+ return $(result) ;
+ }
+ }
+
+}
+
+
+generators.override whale.wd-to-cpp : whale.whale ;
+generators.override whale.wd-to-cpp : whale.dolphin ;
+
+
+generators.register [ new wd-to-cpp whale.wd-to-cpp : : CPP ] ;
+
+
+actions whale
+{
+ $(.whale) -d $(<[1]:D) $(>)
+}
+
+actions dolphin
+{
+ $(.dolphin) -d $(<[1]:D) $(>)
+}
+
+actions wd
+{
+ $(.wd) -d $(<[1]:D) -g $(>)
+}
+
diff --git a/src/kenlm/jam-files/boost-build/tools/xlf.jam b/src/kenlm/jam-files/boost-build/tools/xlf.jam
new file mode 100644
index 0000000..e7fcc60
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/xlf.jam
@@ -0,0 +1,39 @@
+# Copyright (C) 2004 Toon Knapen
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# toolset configuration for the IBM Fortran compiler (xlf)
+#
+
+import toolset : flags ;
+import feature ;
+import fortran ;
+
+rule init ( version ? : command * : options * )
+{
+}
+
+# Declare flags and action for compilation
+flags xlf OPTIONS <optimization>off : -O0 ;
+flags xlf OPTIONS <optimization>speed : -O3 ;
+flags xlf OPTIONS <optimization>space : -Os ;
+
+flags xlf OPTIONS <debug-symbols>on : -g ;
+flags xlf OPTIONS <profiling>on : -pg ;
+
+flags xlf DEFINES <define> ;
+flags xlf INCLUDES <include> ;
+
+rule compile-fortran
+{
+}
+
+actions compile-fortran
+{
+ xlf $(OPTIONS) -I$(INCLUDES) -c -o "$(<)" "$(>)"
+}
+
+generators.register-fortran-compiler xlf.compile-fortran : FORTRAN : OBJ ;
diff --git a/src/kenlm/jam-files/boost-build/tools/xsltproc-config.jam b/src/kenlm/jam-files/boost-build/tools/xsltproc-config.jam
new file mode 100644
index 0000000..d1be25f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/xsltproc-config.jam
@@ -0,0 +1,36 @@
+#~ Copyright 2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Automatic configuration for the xsltproc toolset. To use, just import this
+# module.
+
+import os ;
+import toolset : using ;
+
+
+local rule locate-executable ( name )
+{
+ local path = [ modules.peek : PATH ] ;
+ local exe ;
+ if [ os.name ] = NT
+ {
+ exe = [ GLOB $(path) "C:\\Boost\\bin" : $(name)\.exe ] ;
+ }
+ else
+ {
+ exe = [ GLOB $(path) : $(name) ] ;
+ }
+ return $(exe[1]) ;
+}
+
+
+local xsltproc-exe = [ locate-executable xsltproc ] ;
+if $(xsltproc-exe)
+{
+ if --debug-configuration in [ modules.peek : ARGV ]
+ {
+ ECHO notice: using xsltproc ":" $(xsltproc-exe) ;
+ }
+ using xsltproc : $(xsltproc-exe) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/xsltproc.jam b/src/kenlm/jam-files/boost-build/tools/xsltproc.jam
new file mode 100644
index 0000000..d847646
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/xsltproc.jam
@@ -0,0 +1,205 @@
+# Copyright (C) 2003 Doug Gregor. Permission to copy, use, modify, sell and
+# distribute this software is granted provided this copyright notice appears in
+# all copies. This software is provided "as is" without express or implied
+# warranty, and with no claim as to its suitability for any purpose.
+
+# This module defines rules to apply an XSLT stylesheet to an XML file using the
+# xsltproc driver, part of libxslt.
+
+import common ;
+import feature ;
+import modules ;
+import os ;
+import path ;
+import regex ;
+import sequence ;
+
+
+feature.feature xsl:param : : free ;
+feature.feature xsl:path : : free ;
+feature.feature catalog : : free ;
+
+
+# Initialize xsltproc support. The parameters are:
+# xsltproc: The xsltproc executable
+#
+rule init ( xsltproc ? )
+{
+ if $(xsltproc)
+ {
+ modify-config ;
+ .xsltproc = $(xsltproc) ;
+ check-xsltproc ;
+ }
+}
+
+
+rule freeze-config ( )
+{
+ if ! $(.config-frozen)
+ {
+ .config-frozen = true ;
+ .xsltproc ?= [ modules.peek : XSLTPROC ] ;
+ .xsltproc ?= xsltproc ;
+ check-xsltproc ;
+ .is-cygwin = [ .is-cygwin $(.xsltproc) ] ;
+ }
+}
+
+
+rule modify-config ( )
+{
+ if $(.config-frozen)
+ {
+ import errors ;
+ errors.user-error
+ "xsltproc: Cannot change xsltproc command after it has been used." ;
+ }
+}
+
+
+rule check-xsltproc ( )
+{
+ if $(.xsltproc)
+ {
+ local status = [ SHELL "\"$(.xsltproc)\" -V" : no-output : exit-status ]
+ ;
+ if $(status[2]) != 0
+ {
+ import errors ;
+ errors.user-error "xsltproc: Could not run \"$(.xsltproc)\" -V." ;
+ }
+ }
+}
+
+
+# Returns a non-empty string if a cygwin xsltproc binary was specified.
+#
+rule is-cygwin ( )
+{
+ freeze-config ;
+ return $(.is-cygwin) ;
+}
+
+
+rule .is-cygwin ( xsltproc )
+{
+ if [ os.on-windows ]
+ {
+ local file = [ path.make [ modules.binding $(__name__) ] ] ;
+ local dir = [ path.native [ path.join [ path.parent $(file) ] xsltproc ]
+ ] ;
+ if [ os.name ] = CYGWIN
+ {
+ dir = $(dir:W) ;
+ }
+ local command =
+ "\"$(xsltproc)\" \"$(dir)\\test.xsl\" \"$(dir)\\test.xml\" 2>&1" ;
+ local status = [ SHELL $(command) : no-output : exit-status ] ;
+ if $(status[2]) != "0"
+ {
+ return true ;
+ }
+ }
+}
+
+
+rule compute-xslt-flags ( target : properties * )
+{
+ # Raw flags.
+ local flags = [ feature.get-values <flags> : $(properties) ] ;
+
+ # Translate <xsl:param> into command line flags.
+ for local param in [ feature.get-values <xsl:param> : $(properties) ]
+ {
+ local namevalue = [ regex.split $(param) "=" ] ;
+ flags += --stringparam $(namevalue[1]) \"$(namevalue[2])\" ;
+ }
+
+ # Translate <xsl:path>.
+ for local path in [ feature.get-values <xsl:path> : $(properties) ]
+ {
+ flags += --path \"$(path:G=)\" ;
+ }
+
+ # Take care of implicit dependencies.
+ local other-deps ;
+ for local dep in [ feature.get-values <implicit-dependency> : $(properties)
+ ]
+ {
+ other-deps += [ $(dep:G=).creating-subvariant ] ;
+ }
+
+ local implicit-target-directories ;
+ for local dep in [ sequence.unique $(other-deps) ]
+ {
+ implicit-target-directories += [ $(dep).all-target-directories ] ;
+ }
+
+ for local dir in $(implicit-target-directories)
+ {
+ flags += --path \"$(dir:T)\" ;
+ }
+
+ return $(flags) ;
+}
+
+
+local rule .xsltproc ( target : source stylesheet : properties * : dirname ? :
+ action )
+{
+ freeze-config ;
+ STYLESHEET on $(target) = $(stylesheet) ;
+ FLAGS on $(target) += [ compute-xslt-flags $(target) : $(properties) ] ;
+ NAME on $(target) = $(.xsltproc) ;
+
+ for local catalog in [ feature.get-values <catalog> : $(properties) ]
+ {
+ CATALOG = [ common.variable-setting-command XML_CATALOG_FILES :
+ $(catalog:T) ] ;
+ }
+
+ if [ os.on-windows ] && ! [ is-cygwin ]
+ {
+ action = $(action).windows ;
+ }
+
+ $(action) $(target) : $(source) ;
+}
+
+
+rule xslt ( target : source stylesheet : properties * )
+{
+ return [ .xsltproc $(target) : $(source) $(stylesheet) : $(properties) : :
+ xslt-xsltproc ] ;
+}
+
+
+rule xslt-dir ( target : source stylesheet : properties * : dirname )
+{
+ return [ .xsltproc $(target) : $(source) $(stylesheet) : $(properties) :
+ $(dirname) : xslt-xsltproc-dir ] ;
+}
+
+actions xslt-xsltproc.windows
+{
+ $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<)" "$(STYLESHEET:W)" "$(>:W)"
+}
+
+
+actions xslt-xsltproc bind STYLESHEET
+{
+ $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<)" "$(STYLESHEET:T)" "$(>:T)"
+}
+
+
+actions xslt-xsltproc-dir.windows bind STYLESHEET
+{
+ $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<:D)/" "$(STYLESHEET:W)" "$(>:W)"
+}
+
+
+actions xslt-xsltproc-dir bind STYLESHEET
+{
+ $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<:D)/" "$(STYLESHEET:T)" "$(>:T)"
+}
diff --git a/src/kenlm/jam-files/boost-build/tools/xsltproc/included.xsl b/src/kenlm/jam-files/boost-build/tools/xsltproc/included.xsl
new file mode 100644
index 0000000..ef86394
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/xsltproc/included.xsl
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Copyright (c) 2010 Steven Watanabe
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+ -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+</xsl:stylesheet>
diff --git a/src/kenlm/jam-files/boost-build/tools/xsltproc/test.xml b/src/kenlm/jam-files/boost-build/tools/xsltproc/test.xml
new file mode 100644
index 0000000..57c8ba1
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/xsltproc/test.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="utf-8"?>
+<root/>
diff --git a/src/kenlm/jam-files/boost-build/tools/xsltproc/test.xsl b/src/kenlm/jam-files/boost-build/tools/xsltproc/test.xsl
new file mode 100644
index 0000000..a142c91
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/xsltproc/test.xsl
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Copyright (c) 2010 Steven Watanabe
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+ -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+ <xsl:include href="included.xsl"/>
+</xsl:stylesheet>
diff --git a/src/kenlm/jam-files/boost-build/tools/zlib.jam b/src/kenlm/jam-files/boost-build/tools/zlib.jam
new file mode 100644
index 0000000..8095eee
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/tools/zlib.jam
@@ -0,0 +1,227 @@
+# Copyright (c) 2010 Vladimir Prus.
+# Copyright (c) 2013 Steven Watanabe
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Supports the zlib library
+#
+# After 'using zlib', the following targets are available:
+#
+# /zlib//zlib -- The zlib library
+
+import project ;
+import ac ;
+import errors ;
+import "class" : new ;
+import targets ;
+import path ;
+import modules ;
+import errors ;
+import indirect ;
+import property ;
+import property-set ;
+
+header = zlib.h ;
+names = z zlib zll zdll ;
+
+sources = adler32.c compress.c
+ crc32.c deflate.c gzclose.c gzio.c gzlib.c gzread.c gzwrite.c
+ infback.c inffast.c inflate.c inftrees.c trees.c uncompr.c zutil.c ;
+
+library-id = 0 ;
+
+if --debug-configuration in [ modules.peek : ARGV ]
+{
+ .debug = true ;
+}
+
+# Initializes the zlib library.
+#
+# zlib can be configured either to use pre-existing binaries
+# or to build the library from source.
+#
+# Options for configuring a prebuilt zlib::
+#
+# <search>
+# The directory containing the zlib binaries.
+# <name>
+# Overrides the default library name.
+# <include>
+# The directory containing the zlib headers.
+#
+# If none of these options is specified, then the environmental
+# variables ZLIB_LIBRARY_PATH, ZLIB_NAME, and ZLIB_INCLUDE will
+# be used instead.
+#
+# Options for building zlib from source::
+#
+# <source>
+# The zlib source directory. Defaults to the environmental variable
+# ZLIB_SOURCE.
+# <tag>
+# A rule which computes the actual name of the compiled
+# libraries based on the build properties. Ignored
+# when using precompiled binaries.
+# <build-name>
+# The base name to use for the compiled library. Ignored
+# when using precompiled binaries.
+#
+# Examples::
+#
+# # Find zlib in the default system location
+# using zlib ;
+# # Build zlib from source
+# using zlib : 1.2.7 : <source>/home/steven/zlib-1.2.7 ;
+# # Find zlib in /usr/local
+# using zlib : 1.2.7
+# : <include>/usr/local/include <search>/usr/local/lib ;
+# # Build zlib from source for msvc and find
+# # prebuilt binaries for gcc.
+# using zlib : 1.2.7 : <source>C:/Devel/src/zlib-1.2.7 : <toolset>msvc ;
+# using zlib : 1.2.7 : : <toolset>gcc ;
+#
+rule init (
+ version ?
+ # The zlib version (currently ignored)
+
+ : options *
+ # A list of the options to use
+
+ : requirements *
+ # The requirements for the zlib target
+
+ : is-default ?
+ # Default configurations are only used when zlib
+ # has not yet been configured.
+ )
+{
+ local caller = [ project.current ] ;
+
+ if ! $(.initialized)
+ {
+ .initialized = true ;
+
+ project.initialize $(__name__) ;
+ .project = [ project.current ] ;
+ project zlib ;
+ }
+
+ local library-path = [ property.select <search> : $(options) ] ;
+ library-path = $(library-path:G=) ;
+ local include-path = [ property.select <include> : $(options) ] ;
+ include-path = $(include-path:G=) ;
+ local source-path = [ property.select <source> : $(options) ] ;
+ source-path = $(source-path:G=) ;
+ local library-name = [ property.select <name> : $(options) ] ;
+ library-name = $(library-name:G=) ;
+ local tag = [ property.select <tag> : $(options) ] ;
+ tag = $(tag:G=) ;
+ local build-name = [ property.select <build-name> : $(options) ] ;
+ build-name = $(build-name:G=) ;
+
+ condition = [ property-set.create $(requirements) ] ;
+ condition = [ property-set.create [ $(condition).base ] ] ;
+
+ local no-build-from-source ;
+ # Ignore environmental ZLIB_SOURCE if this initialization
+ # requested to search for a specific pre-built library.
+ if $(library-path) || $(include-path) || $(library-name)
+ {
+ if $(source-path) || $(tag) || $(build-name)
+ {
+ errors.user-error "incompatible options for zlib:"
+ [ property.select <search> <include> <name> : $(options) ] "and"
+ [ property.select <source> <tag> <build-name> : $(options) ] ;
+ }
+ else
+ {
+ no-build-from-source = true ;
+ }
+ }
+
+ source-path ?= [ modules.peek : ZLIB_SOURCE ] ;
+
+ if $(.configured.$(condition))
+ {
+ if $(is-default)
+ {
+ if $(.debug)
+ {
+ ECHO "notice: [zlib] zlib is already configured" ;
+ }
+ }
+ else
+ {
+ errors.user-error "zlib is already configured" ;
+ }
+ return ;
+ }
+ else if $(source-path) && ! $(no-build-from-source)
+ {
+ build-name ?= z ;
+ library-id = [ CALC $(library-id) + 1 ] ;
+ tag = [ MATCH ^@?(.*)$ : $(tag) ] ;
+ if $(tag) && ! [ MATCH ^([^%]*)%([^%]+)$ : $(tag) ]
+ {
+ tag = [ indirect.make $(tag) : [ $(caller).project-module ] ] ;
+ }
+ sources = [ path.glob $(source-path) : $(sources) ] ;
+ if $(.debug)
+ {
+ ECHO "notice: [zlib] Building zlib from source as $(build-name)" ;
+ if $(condition)
+ {
+ ECHO "notice: [zlib] Condition" [ $(condition).raw ] ;
+ }
+ if $(sources)
+ {
+ ECHO "notice: [zlib] found zlib source in $(source-path)" ;
+ }
+ else
+ {
+ ECHO "warning: [zlib] could not find zlib source in $(source-path)" ;
+ }
+ }
+ local target ;
+ if $(sources) {
+ target = [ targets.create-typed-target LIB : $(.project)
+ : $(build-name).$(library-id)
+ : $(sources)
+ : $(requirements)
+ <tag>@$(tag)
+ <include>$(source-path)
+ <toolset>msvc:<define>_CRT_SECURE_NO_DEPRECATE
+ <toolset>msvc:<define>_SCL_SECURE_NO_DEPRECATE
+ <link>shared:<define>ZLIB_DLL
+ :
+ : <include>$(source-path) ] ;
+ }
+
+ local mt = [ new ac-library zlib : $(.project) : $(condition) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ if $(target)
+ {
+ $(mt).set-target $(target) ;
+ }
+ targets.main-target-alternative $(mt) ;
+ } else {
+ if $(.debug)
+ {
+ ECHO "notice: [zlib] Using pre-installed library" ;
+ if $(condition)
+ {
+ ECHO "notice: [zlib] Condition" [ $(condition).raw ] ;
+ }
+ }
+
+ local mt = [ new ac-library zlib : $(.project) : $(condition) :
+ $(include-path) : $(library-path) : $(library-name) : $(root) ] ;
+ $(mt).set-header $(header) ;
+ $(mt).set-default-names $(names) ;
+ targets.main-target-alternative $(mt) ;
+ }
+ .configured.$(condition) = true ;
+}
diff --git a/src/kenlm/jam-files/boost-build/user-config.jam b/src/kenlm/jam-files/boost-build/user-config.jam
new file mode 100644
index 0000000..fbbf13f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/user-config.jam
@@ -0,0 +1,92 @@
+# Copyright 2003, 2005 Douglas Gregor
+# Copyright 2004 John Maddock
+# Copyright 2002, 2003, 2004, 2007 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# This file is used to configure your Boost.Build installation. You can modify
+# this file in place, or you can place it in a permanent location so that it
+# does not get overwritten should you get a new version of Boost.Build. See:
+#
+# http://www.boost.org/boost-build2/doc/html/bbv2/overview/configuration.html
+#
+# for documentation about possible permanent locations.
+
+# This file specifies which toolsets (C++ compilers), libraries, and other
+# tools are available. Often, you should be able to just uncomment existing
+# example lines and adjust them to taste. The complete list of supported tools,
+# and configuration instructions can be found at:
+#
+# http://boost.org/boost-build2/doc/html/bbv2/reference/tools.html
+#
+
+# This file uses Jam language syntax to describe available tools. Mostly,
+# there are 'using' lines, that contain the name of the used tools, and
+# parameters to pass to those tools -- where paremeters are separated by
+# semicolons. Important syntax notes:
+#
+# - Both ':' and ';' must be separated from other tokens by whitespace
+# - The '\' symbol is a quote character, so when specifying Windows paths you
+# should use '/' or '\\' instead.
+#
+# More details about the syntax can be found at:
+#
+# http://boost.org/boost-build2/doc/html/bbv2/advanced.html#bbv2.advanced.jam_language
+#
+
+# ------------------
+# GCC configuration.
+# ------------------
+
+# Configure gcc (default version).
+# using gcc ;
+
+# Configure specific gcc version, giving alternative name to use.
+# using gcc : 3.2 : g++-3.2 ;
+
+
+# -------------------
+# MSVC configuration.
+# -------------------
+
+# Configure msvc (default version, searched for in standard locations and PATH).
+# using msvc ;
+
+# Configure specific msvc version (searched for in standard locations and PATH).
+# using msvc : 8.0 ;
+
+
+# ----------------------
+# Borland configuration.
+# ----------------------
+# using borland ;
+
+
+# ----------------------
+# STLPort configuration.
+# ----------------------
+
+# Configure specifying location of STLPort headers. Libraries must be either
+# not needed or available to the compiler by default.
+# using stlport : : /usr/include/stlport ;
+
+# Configure specifying location of both headers and libraries explicitly.
+# using stlport : : /usr/include/stlport /usr/lib ;
+
+
+# -----------------
+# QT configuration.
+# -----------------
+
+# Configure assuming QTDIR gives the installation prefix.
+# using qt ;
+
+# Configure with an explicit installation prefix.
+# using qt : /usr/opt/qt ;
+
+# ---------------------
+# Python configuration.
+# ---------------------
+
+# Configure specific Python version.
+# using python : 3.1 : /usr/bin/python3 : /usr/include/python3.1 : /usr/lib ;
diff --git a/src/kenlm/jam-files/boost-build/util/assert.jam b/src/kenlm/jam-files/boost-build/util/assert.jam
new file mode 100644
index 0000000..65e880f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/assert.jam
@@ -0,0 +1,346 @@
+# Copyright 2001, 2002, 2003 Dave Abrahams
+# Copyright 2006 Rene Rivera
+# Copyright 2002, 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import errors ;
+import modules ;
+
+
+################################################################################
+#
+# Private implementation details.
+#
+################################################################################
+
+# Rule added as a replacement for the regular Jam = operator but which does not
+# ignore trailing empty string elements.
+#
+local rule exact-equal-test ( lhs * : rhs * )
+{
+ local lhs_extended = $(lhs) xxx ;
+ local rhs_extended = $(rhs) xxx ;
+ if $(lhs_extended) = $(rhs_extended)
+ {
+ return true ;
+ }
+}
+
+
+# Two lists are considered set-equal if they contain the same elements, ignoring
+# duplicates and ordering.
+#
+local rule set-equal-test ( set1 * : set2 * )
+{
+ if ( $(set1) in $(set2) ) && ( $(set2) in $(set1) )
+ {
+ return true ;
+ }
+}
+
+
+################################################################################
+#
+# Public interface.
+#
+################################################################################
+
+# Assert the equality of A and B, ignoring trailing empty string elements.
+#
+rule equal ( a * : b * )
+{
+ if $(a) != $(b)
+ {
+ errors.error-skip-frames 3 assertion failure: \"$(a)\" "==" \"$(b)\"
+ (ignoring trailing empty strings) ;
+ }
+}
+
+
+# Assert that the result of calling RULE-NAME on the given arguments has a false
+# logical value (is either an empty list or all empty strings).
+#
+rule false ( rule-name args * : * )
+{
+ local result ;
+ module [ CALLER_MODULE ]
+ {
+ modules.poke assert : result : [ $(1) : $(2) : $(3) : $(4) : $(5) : $(6)
+ : $(7) : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15)
+ : $(16) : $(17) : $(18) : $(19) ] ;
+ }
+
+ if $(result)
+ {
+ errors.error-skip-frames 3 assertion failure: Expected false result from
+ "[" $(rule-name) [ errors.lol->list $(args) : $(2) : $(3) : $(4) :
+ $(5) : $(6) : $(7) : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) :
+ $(14) : $(15) : $(16) : $(17) : $(18) : $(19) ] "]" : Got: "["
+ \"$(result)\" "]" ;
+ }
+}
+
+
+# Assert that ELEMENT is present in LIST.
+#
+rule "in" ( element : list * )
+{
+ if ! $(element) in $(list)
+ {
+ errors.error-skip-frames 3 assertion failure: Expected \"$(element)\" in
+ "[" \"$(list)\" "]" ;
+ }
+}
+
+
+# Assert the inequality of A and B, ignoring trailing empty string elements.
+#
+rule not-equal ( a * : b * )
+{
+ if $(a) = $(b)
+ {
+ errors.error-skip-frames 3 assertion failure: \"$(a)\" "!=" \"$(b)\"
+ (ignoring trailing empty strings) ;
+ }
+}
+
+
+# Assert that ELEMENT is not present in LIST.
+#
+rule not-in ( element : list * )
+{
+ if $(element) in $(list)
+ {
+ errors.error-skip-frames 3 assertion failure: Did not expect
+ \"$(element)\" in "[" \"$(list)\" "]" ;
+ }
+}
+
+
+# Assert the inequality of A and B as sets.
+#
+rule not-set-equal ( a * : b * )
+{
+ if [ set-equal-test $(a) : $(b) ]
+ {
+ errors.error-skip-frames 3 assertion failure: Expected "[" \"$(a)\" "]"
+ and "[" \"$(b)\" "]" to not be equal as sets ;
+ }
+}
+
+
+# Assert that A and B are not exactly equal, not ignoring trailing empty string
+# elements.
+#
+rule not-exact-equal ( a * : b * )
+{
+ if [ exact-equal-test $(a) : $(b) ]
+ {
+ errors.error-skip-frames 3 assertion failure: \"$(a)\" "!=" \"$(b)\" ;
+ }
+}
+
+
+# Assert that EXPECTED is the result of calling RULE-NAME with the given
+# arguments.
+#
+rule result ( expected * : rule-name args * : * )
+{
+ local result ;
+ module [ CALLER_MODULE ]
+ {
+ modules.poke assert : result : [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7)
+ : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) :
+ $(16) : $(17) : $(18) : $(19) ] ;
+ }
+
+ if ! [ exact-equal-test $(result) : $(expected) ]
+ {
+ errors.error-skip-frames 3 assertion failure: "[" $(rule-name) [
+ errors.lol->list $(args) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17)
+ : $(18) : $(19) ] "]" : Expected: "[" \"$(expected)\" "]" : Got: "["
+ \"$(result)\" "]" ;
+ }
+}
+
+
+# Assert that EXPECTED is set-equal (i.e. duplicates and ordering are ignored)
+# to the result of calling RULE-NAME with the given arguments. Note that rules
+# called this way may accept at most 18 parameters.
+#
+rule result-set-equal ( expected * : rule-name args * : * )
+{
+ local result ;
+ module [ CALLER_MODULE ]
+ {
+ modules.poke assert : result : [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7)
+ : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) :
+ $(16) : $(17) : $(18) : $(19) ] ;
+ }
+
+ if ! [ set-equal-test $(result) : $(expected) ]
+ {
+ errors.error-skip-frames 3 assertion failure: "[" $(rule-name) [
+ errors.lol->list $(args) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) :
+ $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15) : $(16) : $(17)
+ : $(18) : $(19) ] "]" : Expected: "[" \"$(expected)\" "]" : Got: "["
+ \"$(result)\" "]" ;
+ }
+}
+
+
+# Assert the equality of A and B as sets.
+#
+rule set-equal ( a * : b * )
+{
+ if ! [ set-equal-test $(a) : $(b) ]
+ {
+ errors.error-skip-frames 3 assertion failure: Expected "[" \"$(a)\" "]"
+ and "[" \"$(b)\" "]" to be equal as sets ;
+ }
+}
+
+
+# Assert that the result of calling RULE-NAME on the given arguments has a true
+# logical value (is neither an empty list nor all empty strings).
+#
+rule true ( rule-name args * : * )
+{
+ local result ;
+ module [ CALLER_MODULE ]
+ {
+ modules.poke assert : result : [ $(1) : $(2) : $(3) : $(4) : $(5) : $(6)
+ : $(7) : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) : $(14) : $(15)
+ : $(16) : $(17) : $(18) : $(19) ] ;
+ }
+
+ if ! $(result)
+ {
+ errors.error-skip-frames 3 assertion failure: Expected true result from
+ "[" $(rule-name) [ errors.lol->list $(args) : $(2) : $(3) : $(4) :
+ $(5) : $(6) : $(7) : $(8) : $(9) : $(10) : $(11) : $(12) : $(13) :
+ $(14) : $(15) : $(16) : $(17) : $(18) : $(19) ] "]" ;
+ }
+}
+
+
+# Assert the exact equality of A and B, not ignoring trailing empty string
+# elements.
+#
+rule exact-equal ( a * : b * )
+{
+ if ! [ exact-equal-test $(a) : $(b) ]
+ {
+ errors.error-skip-frames 3 assertion failure: \"$(a)\" "==" \"$(b)\" ;
+ }
+}
+
+
+# Assert that the given variable is not an empty list.
+#
+rule variable-not-empty ( name )
+{
+ local value = [ modules.peek [ CALLER_MODULE ] : $(name) ] ;
+ if ! $(value)-is-not-empty
+ {
+ errors.error-skip-frames 3 assertion failure: Expected variable
+ \"$(name)\" not to be an empty list ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ # Helper rule used to avoid test duplication related to different list
+ # equality test rules.
+ #
+ local rule run-equality-test ( equality-assert : ignore-trailing-empty-strings ? )
+ {
+ local not-equality-assert = not-$(equality-assert) ;
+
+ # When the given equality test is expected to ignore trailing empty
+ # strings some of the test results should be inverted.
+ local not-equality-assert-i = not-$(equality-assert) ;
+ if $(ignore-trailing-empty-strings)
+ {
+ not-equality-assert-i = $(equality-assert) ;
+ }
+
+ $(equality-assert) : ;
+ $(equality-assert) "" "" : "" "" ;
+ $(not-equality-assert-i) : "" "" ;
+ $(equality-assert) x : x ;
+ $(not-equality-assert) : x ;
+ $(not-equality-assert) "" : x ;
+ $(not-equality-assert) "" "" : x ;
+ $(not-equality-assert-i) x : x "" ;
+ $(equality-assert) x "" : x "" ;
+ $(not-equality-assert) x : "" x ;
+ $(equality-assert) "" x : "" x ;
+
+ $(equality-assert) 1 2 3 : 1 2 3 ;
+ $(not-equality-assert) 1 2 3 : 3 2 1 ;
+ $(not-equality-assert) 1 2 3 : 1 5 3 ;
+ $(not-equality-assert) 1 2 3 : 1 "" 3 ;
+ $(not-equality-assert) 1 2 3 : 1 1 2 3 ;
+ $(not-equality-assert) 1 2 3 : 1 2 2 3 ;
+ $(not-equality-assert) 1 2 3 : 5 6 7 ;
+
+ # Extra variables used here just to make sure Boost Jam or Boost Build
+ # do not handle lists with empty strings differently depending on
+ # whether they are literals or stored in variables.
+
+ local empty = ;
+ local empty-strings = "" "" ;
+ local x-empty-strings = x "" "" ;
+ local empty-strings-x = "" "" x ;
+
+ $(equality-assert) : $(empty) ;
+ $(not-equality-assert-i) "" : $(empty) ;
+ $(not-equality-assert-i) "" "" : $(empty) ;
+ $(not-equality-assert-i) : $(empty-strings) ;
+ $(not-equality-assert-i) "" : $(empty-strings) ;
+ $(equality-assert) "" "" : $(empty-strings) ;
+ $(equality-assert) $(empty) : $(empty) ;
+ $(equality-assert) $(empty-strings) : $(empty-strings) ;
+ $(not-equality-assert-i) $(empty) : $(empty-strings) ;
+ $(equality-assert) $(x-empty-strings) : $(x-empty-strings) ;
+ $(equality-assert) $(empty-strings-x) : $(empty-strings-x) ;
+ $(not-equality-assert) $(empty-strings-x) : $(x-empty-strings) ;
+ $(not-equality-assert-i) x : $(x-empty-strings) ;
+ $(not-equality-assert) x : $(empty-strings-x) ;
+ $(not-equality-assert-i) x : $(x-empty-strings) ;
+ $(not-equality-assert-i) x "" : $(x-empty-strings) ;
+ $(equality-assert) x "" "" : $(x-empty-strings) ;
+ $(not-equality-assert) x : $(empty-strings-x) ;
+ $(not-equality-assert) "" x : $(empty-strings-x) ;
+ $(equality-assert) "" "" x : $(empty-strings-x) ;
+ }
+
+
+ # ---------------
+ # Equality tests.
+ # ---------------
+
+ run-equality-test equal : ignore-trailing-empty-strings ;
+ run-equality-test exact-equal ;
+
+
+ # -------------------------
+ # assert.set-equal() tests.
+ # -------------------------
+
+ set-equal : ;
+ not-set-equal "" "" : ;
+ set-equal "" "" : "" ;
+ set-equal "" "" : "" "" ;
+ set-equal a b c : a b c ;
+ set-equal a b c : b c a ;
+ set-equal a b c a : a b c ;
+ set-equal a b c : a b c a ;
+ not-set-equal a b c : a b c d ;
+ not-set-equal a b c d : a b c ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/container.jam b/src/kenlm/jam-files/boost-build/util/container.jam
new file mode 100644
index 0000000..dd49639
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/container.jam
@@ -0,0 +1,339 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2002, 2003 Rene Rivera
+# Copyright 2002, 2003, 2004 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Various container classes.
+
+# Base for container objects. This lets us construct recursive structures. That
+# is containers with containers in them, specifically so we can tell literal
+# values from node values.
+#
+class node
+{
+ rule __init__ (
+ value ? # Optional value to set node to initially.
+ )
+ {
+ self.value = $(value) ;
+ }
+
+ # Set the value of this node, passing nothing will clear it.
+ #
+ rule set ( value * )
+ {
+ self.value = $(value) ;
+ }
+
+ # Get the value of this node.
+ #
+ rule get ( )
+ {
+ return $(self.value) ;
+ }
+}
+
+
+# A simple vector. Interface mimics the C++ std::vector and std::list, with the
+# exception that indices are one (1) based to follow Jam standard.
+#
+# TODO: Possibly add assertion checks.
+#
+class vector : node
+{
+ import numbers ;
+ import utility ;
+ import sequence ;
+
+ rule __init__ (
+ values * # Initial contents of vector.
+ )
+ {
+ node.__init__ ;
+ self.value = $(values) ;
+ }
+
+ # Get the value of the first element.
+ #
+ rule front ( )
+ {
+ return $(self.value[1]) ;
+ }
+
+ # Get the value of the last element.
+ #
+ rule back ( )
+ {
+ return $(self.value[-1]) ;
+ }
+
+ # Get the value of the element at the given index, one based. Access to
+ # elements of recursive structures is supported directly. Specifying
+ # additional index values recursively accesses the elements as containers.
+ # For example: [ $(v).at 1 : 2 ] would retrieve the second element of our
+ # first element, assuming the first element is a container.
+ #
+ rule at (
+ index # The element index, one based.
+ : * # Additional indices to access recursively.
+ )
+ {
+ local r = $(self.value[$(index)]) ;
+ if $(2)
+ {
+ r = [ $(r).at $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ;
+ }
+ return $(r) ;
+ }
+
+ # Get the value contained in the given element. This has the same
+ # functionality and interface as "at" but in addition gets the value of the
+ # referenced element, assuming it is a "node".
+ #
+ rule get-at (
+ index # The element index, one based.
+ : * # Additional indices to access recursively.
+ )
+ {
+ local r = $(self.value[$(index)]) ;
+ if $(2)
+ {
+ r = [ $(r).at $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ;
+ }
+ return [ $(r).get ] ;
+ }
+
+ # Insert the given value into the front of the vector pushing the rest of
+ # the elements back.
+ #
+ rule push-front (
+ value # Value to become first element.
+ )
+ {
+ self.value = $(value) $(self.value) ;
+ }
+
+ # Remove the front element from the vector. Does not return the value. No
+ # effect if vector is empty.
+ #
+ rule pop-front ( )
+ {
+ self.value = $(self.value[2-]) ;
+ }
+
+ # Add the given value at the end of the vector.
+ #
+ rule push-back (
+ value # Value to become back element.
+ )
+ {
+ self.value += $(value) ;
+ }
+
+ # Remove the back element from the vector. Does not return the value. No
+ # effect if vector is empty.
+ #
+ rule pop-back ( )
+ {
+ self.value = $(self.value[1--2]) ;
+ }
+
+ # Insert the given value at the given index, one based. The values at and to
+ # the right of the index are pushed back to make room for the new value.
+ # If the index is passed the end of the vector the element is added to the
+ # end.
+ #
+ rule insert (
+ index # The index to insert at, one based.
+ : value # The value to insert.
+ )
+ {
+ local left = $(self.value[1-$(index)]) ;
+ local right = $(self.value[$(index)-]) ;
+ if $(right)-is-not-empty
+ {
+ left = $(left[1--2]) ;
+ }
+ self.value = $(left) $(value) $(right) ;
+ }
+
+ # Remove one or more elements from the vector. The range is inclusive, and
+ # not specifying an end is equivalent to the [start, start] range.
+ #
+ rule erase (
+ start # Index of first element to remove.
+ end ? # Optional, index of last element to remove.
+ )
+ {
+ end ?= $(start) ;
+ local left = $(self.value[1-$(start)]) ;
+ left = $(left[1--2]) ;
+ local right = $(self.value[$(end)-]) ;
+ right = $(right[2-]) ;
+ self.value = $(left) $(right) ;
+ }
+
+ # Remove all elements from the vector.
+ #
+ rule clear ( )
+ {
+ self.value = ;
+ }
+
+ # The number of elements in the vector.
+ #
+ rule size ( )
+ {
+ return [ sequence.length $(self.value) ] ;
+ }
+
+ # Returns "true" if there are NO elements in the vector, empty otherwise.
+ #
+ rule empty ( )
+ {
+ if ! $(self.value)-is-not-empty
+ {
+ return true ;
+ }
+ }
+
+ # Returns the textual representation of content.
+ #
+ rule str ( )
+ {
+ return "[" [ sequence.transform utility.str : $(self.value) ] "]" ;
+ }
+
+ # Sorts the vector inplace, calling 'utility.less' for comparisons.
+ #
+ rule sort ( )
+ {
+ self.value = [ sequence.insertion-sort $(self.value) : utility.less ] ;
+ }
+
+ # Returns true if content is equal to the content of other vector. Uses
+ # 'utility.equal' for comparison.
+ #
+ rule equal ( another )
+ {
+ local mismatch ;
+ local size = [ size ] ;
+ if $(size) = [ $(another).size ]
+ {
+ for local i in [ numbers.range 1 $(size) ]
+ {
+ if ! [ utility.equal [ at $(i) ] [ $(another).at $(i) ] ]
+ {
+ mismatch = true ;
+ }
+ }
+ }
+ else
+ {
+ mismatch = true ;
+ }
+
+ if ! $(mismatch)
+ {
+ return true ;
+ }
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+ import "class" : new ;
+
+ local v1 = [ new vector ] ;
+ assert.true $(v1).equal $(v1) ;
+ assert.true $(v1).empty ;
+ assert.result 0 : $(v1).size ;
+ assert.result "[" "]" : $(v1).str ;
+ $(v1).push-back b ;
+ $(v1).push-front a ;
+ assert.result "[" a b "]" : $(v1).str ;
+ assert.result a : $(v1).front ;
+ assert.result b : $(v1).back ;
+ $(v1).insert 2 : d ;
+ $(v1).insert 2 : c ;
+ $(v1).insert 4 : f ;
+ $(v1).insert 4 : e ;
+ $(v1).pop-back ;
+ assert.result 5 : $(v1).size ;
+ assert.result d : $(v1).at 3 ;
+ $(v1).pop-front ;
+ assert.result c : $(v1).front ;
+ assert.false $(v1).empty ;
+ $(v1).erase 3 4 ;
+ assert.result 2 : $(v1).size ;
+
+ local v2 = [ new vector q w e r t y ] ;
+ assert.result 6 : $(v2).size ;
+ $(v1).push-back $(v2) ;
+ assert.result 3 : $(v1).size ;
+ local v2-alias = [ $(v1).back ] ;
+ assert.result e : $(v2-alias).at 3 ;
+ $(v1).clear ;
+ assert.true $(v1).empty ;
+ assert.false $(v2-alias).empty ;
+ $(v2).pop-back ;
+ assert.result t : $(v2-alias).back ;
+
+ local v3 = [ new vector ] ;
+ $(v3).push-back [ new vector 1 2 3 4 5 ] ;
+ $(v3).push-back [ new vector a b c ] ;
+ assert.result "[" "[" 1 2 3 4 5 "]" "[" a b c "]" "]" : $(v3).str ;
+ $(v3).push-back [ new vector [ new vector x y z ] [ new vector 7 8 9 ] ] ;
+ assert.result 1 : $(v3).at 1 : 1 ;
+ assert.result b : $(v3).at 2 : 2 ;
+ assert.result a b c : $(v3).get-at 2 ;
+ assert.result 7 8 9 : $(v3).get-at 3 : 2 ;
+
+ local v4 = [ new vector 4 3 6 ] ;
+ $(v4).sort ;
+ assert.result 3 4 6 : $(v4).get ;
+ assert.false $(v4).equal $(v3) ;
+
+ local v5 = [ new vector 3 4 6 ] ;
+ assert.true $(v4).equal $(v5) ;
+ # Check that vectors of different sizes are considered non-equal.
+ $(v5).pop-back ;
+ assert.false $(v4).equal $(v5) ;
+
+ local v6 = [ new vector [ new vector 1 2 3 ] ] ;
+ assert.true $(v6).equal [ new vector [ new vector 1 2 3 ] ] ;
+
+ local v7 = [ new vector 111 222 333 ] ;
+ assert.true $(v7).equal $(v7) ;
+ $(v7).insert 4 : 444 ;
+ assert.result 111 222 333 444 : $(v7).get ;
+ $(v7).insert 999 : xxx ;
+ assert.result 111 222 333 444 xxx : $(v7).get ;
+
+ local v8 = [ new vector "" "" "" ] ;
+ assert.true $(v8).equal $(v8) ;
+ assert.false $(v8).empty ;
+ assert.result 3 : $(v8).size ;
+ assert.result "" : $(v8).at 1 ;
+ assert.result "" : $(v8).at 2 ;
+ assert.result "" : $(v8).at 3 ;
+ assert.result : $(v8).at 4 ;
+ $(v8).insert 2 : 222 ;
+ assert.result 4 : $(v8).size ;
+ assert.result "" 222 "" "" : $(v8).get ;
+ $(v8).insert 999 : "" ;
+ assert.result 5 : $(v8).size ;
+ assert.result "" 222 "" "" "" : $(v8).get ;
+ $(v8).insert 999 : xxx ;
+ assert.result 6 : $(v8).size ;
+ assert.result "" 222 "" "" "" xxx : $(v8).get ;
+
+ # Regression test for a bug causing vector.equal to compare only the first
+ # and the last element in the given vectors.
+ local v9 = [ new vector 111 xxx 222 ] ;
+ local v10 = [ new vector 111 yyy 222 ] ;
+ assert.false $(v9).equal $(v10) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/doc.jam b/src/kenlm/jam-files/boost-build/util/doc.jam
new file mode 100644
index 0000000..7ff2df4
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/doc.jam
@@ -0,0 +1,1002 @@
+# Copyright 2002, 2005 Dave Abrahams
+# Copyright 2002, 2003, 2006 Rene Rivera
+# Copyright 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Documentation system, handles --help requests.
+# It defines rules that attach documentation to modules, rules, and variables.
+# Collects and generates documentation for the various parts of the build
+# system. The documentation is collected from comments integrated into the code.
+
+import modules ;
+import print ;
+import set ;
+import container ;
+import "class" ;
+import sequence ;
+import path ;
+
+
+# The type of output to generate.
+# "console" is formated text echoed to the console (the default);
+# "text" is formated text appended to the output file;
+# "html" is HTML output to the file.
+#
+help-output = console ;
+
+
+# The file to output documentation to when generating "text" or "html" help.
+# This is without extension as the extension is determined by the type of
+# output.
+#
+help-output-file = help ;
+
+# Whether to include local rules in help output.
+#
+.option.show-locals ?= ;
+
+# When showing documentation for a module, whether to also generate
+# automatically the detailed docs for each item in the module.
+#
+.option.detailed ?= ;
+
+# Generate debug output as the help is generated and modules are parsed.
+#
+.option.debug ?= ;
+
+# Enable or disable a documentation option.
+#
+local rule set-option (
+ option # The option name.
+ : value ? # Enabled (non-empty), or disabled (empty)
+)
+{
+ .option.$(option) = $(value) ;
+}
+
+
+# Set the type of output.
+#
+local rule set-output ( type )
+{
+ help-output = $(type) ;
+}
+
+
+# Set the output to a file.
+#
+local rule set-output-file ( file )
+{
+ help-output-file = $(file) ;
+}
+
+
+# Extracts the brief comment from a complete comment. The brief comment is the
+# first sentence.
+#
+local rule brief-comment (
+ docs * # The comment documentation.
+)
+{
+ local d = $(docs:J=" ") ;
+ local p = [ MATCH ".*([.])$" : $(d) ] ;
+ if ! $(p) { d = $(d)"." ; }
+ d = $(d)" " ;
+ local m = [ MATCH "^([^.]+[.])(.*)" : $(d) ] ;
+ local brief = $(m[1]) ;
+ while $(m[2]) && [ MATCH "^([^ ])" : $(m[2]) ]
+ {
+ m = [ MATCH "^([^.]+[.])(.*)" : $(m[2]) ] ;
+ brief += $(m[1]) ;
+ }
+ return $(brief:J="") ;
+}
+
+
+# Specifies the documentation for the current module.
+#
+local rule set-module-doc (
+ module-name ? # The name of the module to document.
+ : docs * # The documentation for the module.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).brief = [ brief-comment $(docs) ] ;
+ $(module-name).docs = $(docs) ;
+
+ if ! $(module-name) in $(documented-modules)
+ {
+ documented-modules += $(module-name) ;
+ }
+}
+
+
+# Specifies the documentation for the current module.
+#
+local rule set-module-copyright (
+ module-name ? # The name of the module to document.
+ : copyright * # The copyright for the module.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).copy-brief = [ brief-comment $(copyright) ] ;
+ $(module-name).copy-docs = $(docs) ;
+
+ if ! $(module-name) in $(documented-modules)
+ {
+ documented-modules += $(module-name) ;
+ }
+}
+
+
+# Specifies the documentation for a rule in the current module. If called in the
+# global module, this documents a global rule.
+#
+local rule set-rule-doc (
+ name # The name of the rule.
+ module-name ? # The name of the module to document.
+ is-local ? # Whether the rule is local to the module.
+ : docs * # The documentation for the rule.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).$(name).brief = [ brief-comment $(docs) ] ;
+ $(module-name).$(name).docs = $(docs) ;
+ $(module-name).$(name).is-local = $(is-local) ;
+
+ if ! $(name) in $($(module-name).rules)
+ {
+ $(module-name).rules += $(name) ;
+ }
+}
+
+
+# Specify a class, will turn a rule into a class.
+#
+local rule set-class-doc (
+ name # The name of the class.
+ module-name ? # The name of the module to document.
+ : super-name ? # The super class name.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).$(name).is-class = true ;
+ $(module-name).$(name).super-name = $(super-name) ;
+ $(module-name).$(name).class-rules =
+ [ MATCH "^($(name)[.].*)" : $($(module-name).rules) ] ;
+ $(module-name).$($(module-name).$(name).class-rules).is-class-rule = true ;
+
+ $(module-name).classes += $(name) ;
+ $(module-name).class-rules += $($(module-name).$(name).class-rules) ;
+ $(module-name).rules =
+ [ set.difference $($(module-name).rules) :
+ $(name) $($(module-name).$(name).class-rules) ] ;
+}
+
+
+# Set the argument call signature of a rule.
+#
+local rule set-rule-arguments-signature (
+ name # The name of the rule.
+ module-name ? # The name of the module to document.
+ : signature * # The arguments signature.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).$(name).signature = $(signature) ;
+}
+
+
+# Specifies the documentation for an argument of a rule.
+#
+local rule set-argument-doc (
+ name # The name of the argument.
+ qualifier # Argument syntax qualifier, "*", "+", etc.
+ rule-name # The name of the rule.
+ module-name ? # THe optional name of the module.
+ : docs * # The documentation.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).$(rule-name).args.$(name).qualifier = $(qualifier) ;
+ $(module-name).$(rule-name).args.$(name).docs = $(docs) ;
+
+ if ! $(name) in $($(module-name).$(rule-name).args)
+ {
+ $(module-name).$(rule-name).args += $(name) ;
+ }
+}
+
+
+# Specifies the documentation for a variable in the current module. If called in
+# the global module, the global variable is documented.
+#
+local rule set-variable-doc (
+ name # The name of the variable.
+ default # The default value.
+ initial # The initial value.
+ module-name ? # The name of the module to document.
+ : docs * # The documentation for the variable.
+)
+{
+ module-name ?= * ;
+
+ $(module-name).$(name).brief = [ brief-comment $(docs) ] ;
+ $(module-name).$(name).default = $(default) ;
+ $(module-name).$(name).initial = $(initial) ;
+ $(module-name).$(name).docs = $(docs) ;
+
+ if ! $(name) in $($(module-name).variables)
+ {
+ $(module-name).variables += $(name) ;
+ }
+}
+
+
+# Generates a general description of the documentation and help system.
+#
+local rule print-help-top ( )
+{
+ print.section "General command line usage" ;
+
+ print.text " b2 [options] [properties] [targets]
+
+ Options, properties and targets can be specified in any order.
+ " ;
+
+ print.section "Important Options" ;
+
+ print.list-start ;
+ print.list-item "--clean Remove targets instead of building" ;
+ print.list-item "-a Rebuild everything" ;
+ print.list-item "-n Don't execute the commands, only print them" ;
+ print.list-item "-d+2 Show commands as they are executed" ;
+ print.list-item "-d0 Supress all informational messages" ;
+ print.list-item "-q Stop at first error" ;
+ print.list-item "--reconfigure Rerun all configuration checks" ;
+ print.list-item "--debug-configuration Diagnose configuration" ;
+ print.list-item "--debug-building Report which targets are built with what properties" ;
+ print.list-item "--debug-generator Diagnose generator search/execution" ;
+ print.list-end ;
+
+ print.section "Further Help"
+ The following options can be used to obtain additional documentation.
+ ;
+
+ print.list-start ;
+ print.list-item "--help-options Print more obscure command line options." ;
+ print.list-item "--help-internal Boost.Build implementation details." ;
+ print.list-item "--help-doc-options Implementation details doc formatting." ;
+ print.list-end ;
+}
+
+
+# Generate Jam/Boost.Jam command usage information.
+#
+local rule print-help-usage ( )
+{
+ print.section "Boost.Build Usage"
+ "b2 [ options... ] targets..."
+ ;
+ print.list-start ;
+ print.list-item -a;
+ Build all targets, even if they are current. ;
+ print.list-item -fx;
+ Read '"x"' as the Jamfile for building instead of searching for the
+ Boost.Build system. ;
+ print.list-item -jx;
+ Run up to '"x"' commands concurrently. ;
+ print.list-item -n;
+ Do not execute build commands. Instead print out the commands as they
+ would be executed if building. ;
+ print.list-item -ox;
+ Output the used build commands to file '"x"'. ;
+ print.list-item -q;
+ Quit as soon as a build failure is encountered. Without this option
+ Boost.Jam will continue building as many targets as it can. ;
+ print.list-item -sx=y;
+ Sets a Jam variable '"x"' to the value '"y"', overriding any value that
+ variable would have from the environment. ;
+ print.list-item -tx;
+ Rebuild the target '"x"', even if it is up-to-date. ;
+ print.list-item -v;
+ Display the version of b2. ;
+ print.list-item --x;
+ Any option not explicitly handled by Boost.Build remains available to
+ build scripts using the '"ARGV"' variable. ;
+ print.list-item --abbreviate-paths;
+ Use abbreviated paths for targets. ;
+ print.list-item --hash;
+ Shorten target paths by using an MD5 hash. ;
+ print.list-item -dn;
+ Enables output of diagnostic messages. The debug level '"n"' and all
+ below it are enabled by this option. ;
+ print.list-item -d+n;
+ Enables output of diagnostic messages. Only the output for debug level
+ '"n"' is enabled. ;
+ print.list-end ;
+ print.section "Debug Levels"
+ Each debug level shows a different set of information. Usually with
+ higher levels producing more verbose information. The following levels
+ are supported: ;
+ print.list-start ;
+ print.list-item 0;
+ Turn off all diagnostic output. Only errors are reported. ;
+ print.list-item 1;
+ Show the actions taken for building targets, as they are executed. ;
+ print.list-item 2;
+ Show "quiet" actions and display all action text, as they are executed. ;
+ print.list-item 3;
+ Show dependency analysis, and target/source timestamps/paths. ;
+ print.list-item 4;
+ Show arguments of shell invocations. ;
+ print.list-item 5;
+ Show rule invocations and variable expansions. ;
+ print.list-item 6;
+ Show directory/header file/archive scans, and attempts at binding to targets. ;
+ print.list-item 7;
+ Show variable settings. ;
+ print.list-item 8;
+ Show variable fetches, variable expansions, and evaluation of '"if"' expressions. ;
+ print.list-item 9;
+ Show variable manipulation, scanner tokens, and memory usage. ;
+ print.list-item 10;
+ Show execution times for rules. ;
+ print.list-item 11;
+ Show parsing progress of Jamfiles. ;
+ print.list-item 12;
+ Show graph for target dependencies. ;
+ print.list-item 13;
+ Show changes in target status (fate). ;
+ print.list-end ;
+}
+
+
+# Generates description of options controlling the help system. This
+# automatically reads the options as all variables in the doc module of the form
+# ".option.*".
+#
+local rule print-help-options (
+ module-name # The doc module.
+)
+{
+ print.section "Help Options"
+ These are all the options available for enabling or disabling to control
+ the help system in various ways. Options can be enabled or disabled with
+ '"--help-enable-<option>"', and "'--help-disable-<option>'"
+ respectively.
+ ;
+ local options-to-list = [ MATCH ^[.]option[.](.*) : $($(module-name).variables) ] ;
+ if $(options-to-list)
+ {
+ print.list-start ;
+ for local option in [ sequence.insertion-sort $(options-to-list) ]
+ {
+ local def = disabled ;
+ if $($(module-name)..option.$(option).default) != "(empty)"
+ {
+ def = enabled ;
+ }
+ print.list-item $(option): $($(module-name)..option.$(option).docs)
+ Default is $(def). ;
+ }
+ print.list-end ;
+ }
+}
+
+
+# Generate brief documentation for all the known items in the section for a
+# module. Possible sections are: "rules", and "variables".
+#
+local rule print-help-module-section (
+ module # The module name.
+ section # rules or variables.
+ : section-head # The title of the section.
+ section-description * # The detailed description of the section.
+)
+{
+ if $($(module).$(section))
+ {
+ print.section $(section-head) $(section-description) ;
+ print.list-start ;
+ for local item in [ sequence.insertion-sort $($(module).$(section)) ]
+ {
+ local show = ;
+ if ! $($(module).$(item).is-local)
+ {
+ show = yes ;
+ }
+ if $(.option.show-locals)
+ {
+ show = yes ;
+ }
+ if $(show)
+ {
+ print.list-item $(item): $($(module).$(item).brief) ;
+ }
+ }
+ print.list-end ;
+ }
+}
+
+
+# Generate documentation for all possible modules. We attempt to list all known
+# modules together with a brief description of each.
+#
+local rule print-help-all (
+ ignored # Usually the module name, but is ignored here.
+)
+{
+ print.section "Modules"
+ "These are all the known modules. Use --help <module> to get more"
+ "detailed information."
+ ;
+ if $(documented-modules)
+ {
+ print.list-start ;
+ for local module-name in [ sequence.insertion-sort $(documented-modules) ]
+ {
+ # The brief docs for each module.
+ print.list-item $(module-name): $($(module-name).brief) ;
+ }
+ print.list-end ;
+ }
+ # The documentation for each module when details are requested.
+ if $(documented-modules) && $(.option.detailed)
+ {
+ for local module-name in [ sequence.insertion-sort $(documented-modules) ]
+ {
+ # The brief docs for each module.
+ print-help-module $(module-name) ;
+ }
+ }
+}
+
+
+# Generate documentation for a module. Basic information about the module is
+# generated.
+#
+local rule print-help-module (
+ module-name # The module to generate docs for.
+)
+{
+ # Print the docs.
+ print.section "Module '$(module-name)'" $($(module-name).docs) ;
+
+ # Print out the documented classes.
+ print-help-module-section $(module-name) classes : "Module '$(module-name)' classes"
+ Use --help $(module-name).<class-name> to get more information. ;
+
+ # Print out the documented rules.
+ print-help-module-section $(module-name) rules : "Module '$(module-name)' rules"
+ Use --help $(module-name).<rule-name> to get more information. ;
+
+ # Print out the documented variables.
+ print-help-module-section $(module-name) variables : "Module '$(module-name)' variables"
+ Use --help $(module-name).<variable-name> to get more information. ;
+
+ # Print out all the same information but indetailed form.
+ if $(.option.detailed)
+ {
+ print-help-classes $(module-name) ;
+ print-help-rules $(module-name) ;
+ print-help-variables $(module-name) ;
+ }
+}
+
+
+# Generate documentation for a set of rules in a module.
+#
+local rule print-help-rules (
+ module-name # Module of the rules.
+ : name * # Optional list of rules to describe.
+)
+{
+ name ?= $($(module-name).rules) ;
+ if [ set.intersection $(name) : $($(module-name).rules) $($(module-name).class-rules) ]
+ {
+ # Print out the given rules.
+ for local rule-name in [ sequence.insertion-sort $(name) ]
+ {
+ if $(.option.show-locals) || ! $($(module-name).$(rule-name).is-local)
+ {
+ local signature = $($(module-name).$(rule-name).signature:J=" ") ;
+ signature ?= "" ;
+ print.section "Rule '$(module-name).$(rule-name) ( $(signature) )'"
+ $($(module-name).$(rule-name).docs) ;
+ if $($(module-name).$(rule-name).args)
+ {
+ print.list-start ;
+ for local arg-name in $($(module-name).$(rule-name).args)
+ {
+ print.list-item $(arg-name): $($(module-name).$(rule-name).args.$(arg-name).docs) ;
+ }
+ print.list-end ;
+ }
+ }
+ }
+ }
+}
+
+
+# Generate documentation for a set of classes in a module.
+#
+local rule print-help-classes (
+ module-name # Module of the classes.
+ : name * # Optional list of classes to describe.
+)
+{
+ name ?= $($(module-name).classes) ;
+ if [ set.intersection $(name) : $($(module-name).classes) ]
+ {
+ # Print out the given classes.
+ for local class-name in [ sequence.insertion-sort $(name) ]
+ {
+ if $(.option.show-locals) || ! $($(module-name).$(class-name).is-local)
+ {
+ local signature = $($(module-name).$(class-name).signature:J=" ") ;
+ signature ?= "" ;
+ print.section "Class '$(module-name).$(class-name) ( $(signature) )'"
+ $($(module-name).$(class-name).docs)
+ "Inherits from '"$($(module-name).$(class-name).super-name)"'." ;
+ if $($(module-name).$(class-name).args)
+ {
+ print.list-start ;
+ for local arg-name in $($(module-name).$(class-name).args)
+ {
+ print.list-item $(arg-name): $($(module-name).$(class-name).args.$(arg-name).docs) ;
+ }
+ print.list-end ;
+ }
+ }
+
+ # Print out the documented rules of the class.
+ print-help-module-section $(module-name) $(class-name).class-rules : "Class '$(module-name).$(class-name)' rules"
+ Use --help $(module-name).<rule-name> to get more information. ;
+
+ # Print out all the rules if details are requested.
+ if $(.option.detailed)
+ {
+ print-help-rules $(module-name) : $($(module-name).$(class-name).class-rules) ;
+ }
+ }
+ }
+}
+
+
+# Generate documentation for a set of variables in a module.
+#
+local rule print-help-variables (
+ module-name ? # Module of the variables.
+ : name * # Optional list of variables to describe.
+)
+{
+ name ?= $($(module-name).variables) ;
+ if [ set.intersection $(name) : $($(module-name).variables) ]
+ {
+ # Print out the given variables.
+ for local variable-name in [ sequence.insertion-sort $(name) ]
+ {
+ print.section "Variable '$(module-name).$(variable-name)'" $($(module-name).$(variable-name).docs) ;
+ if $($(module-name).$(variable-name).default) ||
+ $($(module-name).$(variable-name).initial)
+ {
+ print.list-start ;
+ if $($(module-name).$(variable-name).default)
+ {
+ print.list-item "default value:" '$($(module-name).$(variable-name).default:J=" ")' ;
+ }
+ if $($(module-name).$(variable-name).initial)
+ {
+ print.list-item "initial value:" '$($(module-name).$(variable-name).initial:J=" ")' ;
+ }
+ print.list-end ;
+ }
+ }
+ }
+}
+
+
+# Generate documentation for a project.
+#
+local rule print-help-project (
+ unused ?
+ : jamfile * # The project Jamfile.
+)
+{
+ if $(jamfile<$(jamfile)>.docs)
+ {
+ # Print the docs.
+ print.section "Project-specific help"
+ Project has jamfile at $(jamfile) ;
+
+ print.lines $(jamfile<$(jamfile)>.docs) "" ;
+ }
+}
+
+
+# Generate documentation for a config file.
+#
+local rule print-help-config (
+ unused ?
+ : type # The type of configuration file user or site.
+ config-file # The configuration Jamfile.
+)
+{
+ if $(jamfile<$(config-file)>.docs)
+ {
+ # Print the docs.
+ print.section "Configuration help"
+ Configuration file at $(config-file) ;
+
+ print.lines $(jamfile<$(config-file)>.docs) "" ;
+ }
+}
+
+
+ws = " " ;
+
+# Extract the text from a block of comments.
+#
+local rule extract-comment (
+ var # The name of the variable to extract from.
+)
+{
+ local comment = ;
+ local line = $($(var)[1]) ;
+ local l = [ MATCH "^[$(ws)]*(#)(.*)$" : $(line) ] ;
+ while $(l[1]) && $($(var))
+ {
+ if $(l[2]) { comment += [ MATCH "^[$(ws)]?(.*)$" : $(l[2]) ] ; }
+ else { comment += "" ; }
+ $(var) = $($(var)[2-]) ;
+ line = $($(var)[1]) ;
+ l = [ MATCH "^[$(ws)]*(#)(.*)$" : $(line) ] ;
+ }
+ return $(comment) ;
+}
+
+
+# Extract s single line of Jam syntax, ignoring any comments.
+#
+local rule extract-syntax (
+ var # The name of the variable to extract from.
+)
+{
+ local syntax = ;
+ local line = $($(var)[1]) ;
+ while ! $(syntax) && ! [ MATCH "^[$(ws)]*(#)" : $(line) ] && $($(var))
+ {
+ local m = [ MATCH "^[$(ws)]*(.*)$" : $(line) ] ;
+ if $(m)
+ {
+ syntax = $(m) ;
+ }
+ $(var) = $($(var)[2-]) ;
+ line = $($(var)[1]) ;
+ }
+ return $(syntax) ;
+}
+
+
+# Extract the next token, this is either a single Jam construct or a comment as
+# a single token.
+#
+local rule extract-token (
+ var # The name of the variable to extract from.
+)
+{
+ local parts = ;
+ while ! $(parts)
+ {
+ parts = [ MATCH "^[$(ws)]*([^$(ws)]+)[$(ws)]*(.*)" : $($(var)[1]) ] ;
+ if ! $(parts)
+ {
+ $(var) = $($(var)[2-]) ;
+ }
+ }
+ local token = ;
+ if [ MATCH "^(#)" : $(parts[1]) ]
+ {
+ token = $(parts:J=" ") ;
+ $(var) = $($(var)[2-]) ;
+ }
+ else
+ {
+ token = $(parts[1]) ;
+ $(var) = $(parts[2-]:J=" ") $($(var)[2-]) ;
+ }
+ return $(token) ;
+}
+
+
+# Scan for a rule declaration as the next item in the variable.
+#
+local rule scan-rule (
+ syntax ? # The first part of the text which contains the rule declaration.
+ : var # The name of the variable to extract from.
+)
+{
+ local rule-parts =
+ [ MATCH "^[$(ws)]*(rule|local[$(ws)]*rule)[$(ws)]+([^$(ws)]+)[$(ws)]*(.*)" : $(syntax:J=" ") ] ;
+ if $(rule-parts[1])
+ {
+ # Mark as doc for rule.
+ local rule-name = $(rule-parts[2]) ;
+ if $(scope-name)
+ {
+ rule-name = $(scope-name).$(rule-name) ;
+ }
+ local is-local = [ MATCH "^(local).*" : $(rule-parts[1]) ] ;
+ if $(comment-block)
+ {
+ set-rule-doc $(rule-name) $(module-name) $(is-local) : $(comment-block) ;
+ }
+ # Parse args of rule.
+ $(var) = $(rule-parts[3-]) $($(var)) ;
+ set-rule-arguments-signature $(rule-name) $(module-name) : [ scan-rule-arguments $(var) ] ;
+ # Scan within this rules scope.
+ local scope-level = [ extract-token $(var) ] ;
+ local scope-name = $(rule-name) ;
+ while $(scope-level)
+ {
+ local comment-block = [ extract-comment $(var) ] ;
+ local syntax-block = [ extract-syntax $(var) ] ;
+ if [ scan-rule $(syntax-block) : $(var) ]
+ {
+ }
+ else if [ MATCH "^(\\{)" : $(syntax-block) ]
+ {
+ scope-level += "{" ;
+ }
+ else if [ MATCH "^[^\\}]*([\\}])[$(ws)]*$" : $(syntax-block) ]
+ {
+ scope-level = $(scope-level[2-]) ;
+ }
+ }
+
+ return true ;
+ }
+}
+
+
+# Scan the arguments of a rule.
+#
+local rule scan-rule-arguments (
+ var # The name of the variable to extract from.
+)
+{
+ local arg-syntax = ;
+ local token = [ extract-token $(var) ] ;
+ while $(token) != "(" && $(token) != "{"
+ {
+ token = [ extract-token $(var) ] ;
+ }
+ if $(token) != "{"
+ {
+ token = [ extract-token $(var) ] ;
+ }
+ local arg-signature = ;
+ while $(token) != ")" && $(token) != "{"
+ {
+ local arg-name = ;
+ local arg-qualifier = " " ;
+ local arg-doc = ;
+ if $(token) = ":"
+ {
+ arg-signature += $(token) ;
+ token = [ extract-token $(var) ] ;
+ }
+ arg-name = $(token) ;
+ arg-signature += $(token) ;
+ token = [ extract-token $(var) ] ;
+ if [ MATCH "^([\\*\\+\\?])" : $(token) ]
+ {
+ arg-qualifier = $(token) ;
+ arg-signature += $(token) ;
+ token = [ extract-token $(var) ] ;
+ }
+ if $(token) = ":"
+ {
+ arg-signature += $(token) ;
+ token = [ extract-token $(var) ] ;
+ }
+ if [ MATCH "^(#)" : $(token) ]
+ {
+ $(var) = $(token) $($(var)) ;
+ arg-doc = [ extract-comment $(var) ] ;
+ token = [ extract-token $(var) ] ;
+ }
+ set-argument-doc $(arg-name) $(arg-qualifier) $(rule-name) $(module-name) : $(arg-doc) ;
+ }
+ while $(token) != "{"
+ {
+ token = [ extract-token $(var) ] ;
+ }
+ $(var) = "{" $($(var)) ;
+ arg-signature ?= "" ;
+ return $(arg-signature) ;
+}
+
+
+# Scan for a variable declaration.
+#
+local rule scan-variable (
+ syntax ? # The first part of the text which contains the variable declaration.
+ : var # The name of the variable to extract from.
+)
+{
+ # [1] = name, [2] = value(s)
+ local var-parts =
+ [ MATCH "^[$(ws)]*([^$(ws)]+)[$(ws)]+([\\?\\=]*)[$(ws)]+([^\\;]*)\\;" : $(syntax) ] ;
+ if $(var-parts)
+ {
+ local value = [ MATCH "^(.*)[ ]$" : $(var-parts[3-]:J=" ") ] ;
+ local default-value = "" ;
+ local initial-valie = "" ;
+ if $(var-parts[2]) = "?="
+ {
+ default-value = $(value) ;
+ default-value ?= "(empty)" ;
+ }
+ else
+ {
+ initial-value = $(value) ;
+ initial-value ?= "(empty)" ;
+ }
+ if $(comment-block)
+ {
+ set-variable-doc $(var-parts[1]) $(default-value) $(initial-value) $(module-name) : $(comment-block) ;
+ }
+ return true ;
+ }
+}
+
+
+# Scan a class declaration.
+#
+local rule scan-class (
+ syntax ? # The syntax text for the class declaration.
+)
+{
+ # [1] = class?, [2] = name, [3] = superclass
+ local class-parts =
+ [ MATCH "^[$(ws)]*([^$(ws)]+)[$(ws)]+([^$(ws)]+)[$(ws)]+:*[$(ws)]*([^$(ws);]*)" : $(syntax) ] ;
+ if $(class-parts[1]) = "class" || $(class-parts[1]) = "class.class"
+ {
+ set-class-doc $(class-parts[2]) $(module-name) : $(class-parts[3]) ;
+ }
+}
+
+
+# Scan a module file for documentation comments. This also invokes any actions
+# assigned to the module. The actions are the rules that do the actual output of
+# the documentation. This rule is invoked as the header scan rule for the module
+# file.
+#
+rule scan-module (
+ target # The module file.
+ : text * # The text in the file, one item per line.
+ : action * # Rule to call to output docs for the module.
+)
+{
+ if $(.option.debug) { ECHO "HELP:" scanning module target '$(target)' ; }
+ local module-name = $(target:B) ;
+ local module-documented = ;
+ local comment-block = ;
+ local syntax-block = ;
+ # This is a hack because we can not get the line of a file if it happens to
+ # not have a new-line termination.
+ text += "}" ;
+ while $(text)
+ {
+ comment-block = [ extract-comment text ] ;
+ syntax-block = [ extract-syntax text ] ;
+ if $(.option.debug)
+ {
+ ECHO "HELP:" comment block; '$(comment-block)' ;
+ ECHO "HELP:" syntax block; '$(syntax-block)' ;
+ }
+ if [ scan-rule $(syntax-block) : text ] { }
+ else if [ scan-variable $(syntax-block) : text ] { }
+ else if [ scan-class $(syntax-block) ] { }
+ else if [ MATCH .*([cC]opyright).* : $(comment-block:J=" ") ]
+ {
+ # mark as the copy for the module.
+ set-module-copyright $(module-name) : $(comment-block) ;
+ }
+ else if $(action[1]) in "print-help-project" "print-help-config"
+ && ! $(jamfile<$(target)>.docs)
+ {
+ # special module docs for the project jamfile.
+ jamfile<$(target)>.docs = $(comment-block) ;
+ }
+ else if ! $(module-documented)
+ {
+ # document the module.
+ set-module-doc $(module-name) : $(comment-block) ;
+ module-documented = true ;
+ }
+ }
+ if $(action)
+ {
+ $(action[1]) $(module-name) : $(action[2-]) ;
+ }
+}
+
+
+# Import scan-module to global scope, so that it is available during header
+# scanning phase.
+#
+IMPORT $(__name__) : scan-module : : doc.scan-module ;
+
+
+# Read in a file using the SHELL builtin and return the individual lines as
+# would be done for header scanning.
+#
+local rule read-file (
+ file # The file to read in.
+)
+{
+ file = [ path.native [ path.root [ path.make $(file) ] [ path.pwd ] ] ] ;
+ if ! $(.file<$(file)>.lines)
+ {
+ local content ;
+ switch [ modules.peek : OS ]
+ {
+ case NT :
+ content = [ SHELL "TYPE \"$(file)\"" ] ;
+
+ case * :
+ content = [ SHELL "cat \"$(file)\"" ] ;
+ }
+ local lines ;
+ local nl = "
+" ;
+ local << = "([^$(nl)]*)[$(nl)](.*)" ;
+ local line+ = [ MATCH "$(<<)" : "$(content)" ] ;
+ while $(line+)
+ {
+ lines += $(line+[1]) ;
+ line+ = [ MATCH "$(<<)" : "$(line+[2])" ] ;
+ }
+ .file<$(file)>.lines = $(lines) ;
+ }
+ return $(.file<$(file)>.lines) ;
+}
+
+
+# Add a scan action to perform to generate the help documentation. The action
+# rule is passed the name of the module as the first argument. The second
+# argument(s) are optional and passed directly as specified here.
+#
+local rule do-scan (
+ modules + # The modules to scan and perform the action on.
+ : action * # The action rule, plus the secondary arguments to pass to the action rule.
+)
+{
+ if $(help-output) = text
+ {
+ print.output $(help-output-file).txt plain ;
+ ALWAYS $(help-output-file).txt ;
+ DEPENDS all : $(help-output-file).txt ;
+ }
+ if $(help-output) = html
+ {
+ print.output $(help-output-file).html html ;
+ ALWAYS $(help-output-file).html ;
+ DEPENDS all : $(help-output-file).html ;
+ }
+ for local module-file in $(modules[1--2])
+ {
+ scan-module $(module-file) : [ read-file $(module-file) ] ;
+ }
+ scan-module $(modules[-1]) : [ read-file $(modules[-1]) ] : $(action) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/indirect.jam b/src/kenlm/jam-files/boost-build/util/indirect.jam
new file mode 100644
index 0000000..40884da
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/indirect.jam
@@ -0,0 +1,117 @@
+# Copyright 2003 Dave Abrahams
+# Copyright 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import modules ;
+import numbers ;
+
+
+# The pattern that indirect rules must match: module%rule
+.pattern = ^([^%]*)%([^%]+)$ ;
+
+
+#
+# Type checking rules.
+#
+local rule indirect-rule ( x )
+{
+ if ! [ MATCH $(.pattern) : $(x) ]
+ {
+ return "expected a string of the form module%rule, but got \""$(x)"\" for argument" ;
+ }
+}
+
+
+# Make an indirect rule which calls the given rule. If context is supplied it is
+# expected to be the module in which to invoke the rule by the 'call' rule
+# below. Otherwise, the rule will be invoked in the module of this rule's
+# caller.
+#
+rule make ( rulename bound-args * : context ? )
+{
+ context ?= [ CALLER_MODULE ] ;
+ context ?= "" ;
+ return $(context)%$(rulename) $(bound-args) ;
+}
+
+
+# Make an indirect rule which calls the given rule. 'rulename' may be a
+# qualified rule; if so it is returned unchanged. Otherwise, if frames is not
+# supplied, the result will be invoked (by 'call', below) in the module of the
+# caller. Otherwise, frames > 1 specifies additional call frames to back up in
+# order to find the module context.
+#
+rule make-qualified ( rulename bound-args * : frames ? )
+{
+ if [ MATCH $(.pattern) : $(rulename) ]
+ {
+ return $(rulename) $(bound-args) ;
+ }
+ else
+ {
+ frames ?= 1 ;
+ # If the rule name includes a Jamfile module, grab it.
+ local module-context = [ MATCH ^(Jamfile<[^>]*>)\\..* : $(rulename) ] ;
+
+ if ! $(module-context)
+ {
+ # Take the first dot-separated element as module name. This disallows
+ # module names with dots, but allows rule names with dots.
+ module-context = [ MATCH ^([^.]*)\\..* : $(rulename) ] ;
+ }
+ module-context ?= [ CALLER_MODULE $(frames) ] ;
+ return [ make $(rulename) $(bound-args) : $(module-context) ] ;
+ }
+}
+
+
+# Returns the module name in which the given indirect rule will be invoked.
+#
+rule get-module ( [indirect-rule] x )
+{
+ local m = [ MATCH $(.pattern) : $(x) ] ;
+ if ! $(m[1])
+ {
+ m = ;
+ }
+ return $(m[1]) ;
+}
+
+
+# Returns the rulename that will be called when x is invoked.
+#
+rule get-rule ( [indirect-rule] x )
+{
+ local m = [ MATCH $(.pattern) : $(x) ] ;
+ return $(m[2]) ;
+}
+
+
+# Invoke the given indirect-rule.
+#
+rule call ( [indirect-rule] r args * : * )
+{
+ return [ modules.call-in [ get-module $(r) ] : [ get-rule $(r) ] $(args) :
+ $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) : $(10) : $(11) :
+ $(12) : $(13) : $(14) : $(15) : $(16) : $(17) : $(18) : $(19) ] ;
+}
+
+
+rule __test__
+{
+ import assert ;
+
+ rule foo-barr! ( x )
+ {
+ assert.equal $(x) : x ;
+ }
+
+ assert.equal [ get-rule [ make foo-barr! ] ] : foo-barr! ;
+ assert.equal [ get-module [ make foo-barr! ] ] : [ CALLER_MODULE ] ;
+
+ call [ make foo-barr! ] x ;
+ call [ make foo-barr! x ] ;
+ call [ make foo-barr! : [ CALLER_MODULE ] ] x ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/numbers.jam b/src/kenlm/jam-files/boost-build/util/numbers.jam
new file mode 100644
index 0000000..665347d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/numbers.jam
@@ -0,0 +1,218 @@
+# Copyright 2001, 2002 Dave Abrahams
+# Copyright 2002, 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import errors ;
+
+
+rule trim-leading-zeroes ( value )
+{
+ return [ CALC $(value) + 0 ] ;
+}
+
+
+rule check ( numbers * )
+{
+ for local n in $(numbers)
+ {
+ switch $(n)
+ {
+ case *[^0-9]* :
+ errors.error $(n) "in" $(numbers) : is not a number ;
+ }
+ }
+}
+
+
+rule increment ( number )
+{
+ return [ CALC $(number) + 1 ] ;
+}
+
+
+rule decrement ( number )
+{
+ return [ CALC $(number) - 1 ] ;
+}
+
+
+rule range ( start finish ? : step ? )
+{
+ if ! $(finish)
+ {
+ finish = $(start) ;
+ start = 1 ;
+ }
+ step ?= 1 ;
+
+ check $(start) $(finish) $(step) ;
+
+ if $(finish) != 0
+ {
+ local result ;
+ while [ less $(start) $(finish) ] || $(start) = $(finish)
+ {
+ result += $(start) ;
+ start = [ CALC $(start) + $(step) ] ;
+ }
+ return $(result) ;
+ }
+}
+
+
+rule less ( n1 n2 )
+{
+ switch [ CALC $(n2) - $(n1) ]
+ {
+ case [1-9]* : return true ;
+ }
+}
+
+
+rule log10 ( number )
+{
+ switch $(number)
+ {
+ case *[^0-9]* : errors.error $(number) is not a number ;
+ case 0 : errors.error can't take log of zero ;
+ case [1-9] : return 0 ;
+ case [1-9]? : return 1 ;
+ case [1-9]?? : return 2 ;
+ case [1-9]??? : return 3 ;
+ case [1-9]???? : return 4 ;
+ case [1-9]????? : return 5 ;
+ case [1-9]?????? : return 6 ;
+ case [1-9]??????? : return 7 ;
+ case [1-9]???????? : return 8 ;
+ case [1-9]????????? : return 9 ;
+ case * :
+ {
+ import sequence ;
+ import string ;
+ local chars = [ string.chars $(number) ] ;
+ while $(chars[1]) = 0
+ {
+ chars = $(chars[2-]) ;
+ }
+ if ! $(chars)
+ {
+ errors.error can't take log of zero ;
+ }
+ else
+ {
+ return [ decrement [ sequence.length $(chars) ] ] ;
+ }
+ }
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ assert.result 1 : increment 0 ;
+ assert.result 2 : increment 1 ;
+ assert.result 1 : decrement 2 ;
+ assert.result 0 : decrement 1 ;
+ assert.result 50 : increment 49 ;
+ assert.result 49 : decrement 50 ;
+ assert.result 99 : increment 98 ;
+ assert.result 99 : decrement 100 ;
+ assert.result 100 : increment 99 ;
+ assert.result 999 : decrement 1000 ;
+ assert.result 1000 : increment 999 ;
+
+ assert.result 1 2 3 : range 3 ;
+ assert.result 1 2 3 4 5 6 7 8 9 10 11 12 : range 12 ;
+ assert.result 3 4 5 6 7 8 9 10 11 : range 3 11 ;
+ assert.result : range 0 ;
+ assert.result 1 4 7 10 : range 10 : 3 ;
+ assert.result 2 4 6 8 10 : range 2 10 : 2 ;
+ assert.result 25 50 75 100 : range 25 100 : 25 ;
+
+ assert.result 0 : trim-leading-zeroes 0 ;
+ assert.result 1234 : trim-leading-zeroes 1234 ;
+ assert.result 123456 : trim-leading-zeroes 0000123456 ;
+ assert.result 1000123456 : trim-leading-zeroes 1000123456 ;
+ assert.result 10000 : trim-leading-zeroes 10000 ;
+ assert.result 10000 : trim-leading-zeroes 00010000 ;
+
+ assert.true less 1 2 ;
+ assert.true less 1 12 ;
+ assert.true less 1 21 ;
+ assert.true less 005 217 ;
+ assert.false less 0 0 ;
+ assert.false less 03 3 ;
+ assert.false less 3 03 ;
+ assert.true less 005 217 ;
+ assert.true less 0005 217 ;
+ assert.true less 5 00217 ;
+
+ # TEMPORARY disabled, because nested "try"/"catch" do not work and I do no
+ # have the time to fix that right now.
+ if $(0)
+ {
+ try ;
+ {
+ decrement 0 ;
+ }
+ catch can't decrement zero! ;
+
+ try ;
+ {
+ check foo ;
+ }
+ catch : not a number ;
+
+ try ;
+ {
+ increment foo ;
+ }
+ catch : not a number ;
+
+ try ;
+ {
+ log10 0 ;
+ }
+ catch can't take log of zero ;
+
+ try ;
+ {
+ log10 000 ;
+ }
+ catch can't take log of zero ;
+
+ }
+
+ assert.result 0 : log10 1 ;
+ assert.result 0 : log10 9 ;
+ assert.result 1 : log10 10 ;
+ assert.result 1 : log10 99 ;
+ assert.result 2 : log10 100 ;
+ assert.result 2 : log10 101 ;
+ assert.result 2 : log10 125 ;
+ assert.result 2 : log10 999 ;
+ assert.result 3 : log10 1000 ;
+ assert.result 10 : log10 12345678901 ;
+
+ for local x in [ range 75 110 : 5 ]
+ {
+ for local y in [ range $(x) 111 : 3 ]
+ {
+ if $(x) != $(y)
+ {
+ assert.true less $(x) $(y) ;
+ }
+ }
+ }
+
+ for local x in [ range 90 110 : 2 ]
+ {
+ for local y in [ range 80 $(x) : 4 ]
+ {
+ assert.false less $(x) $(y) ;
+ }
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/util/option.jam b/src/kenlm/jam-files/boost-build/util/option.jam
new file mode 100644
index 0000000..f6dc375
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/option.jam
@@ -0,0 +1,109 @@
+# Copyright (c) 2005 Vladimir Prus.
+#
+# Use, modification and distribution is subject to the Boost Software
+# License Version 1.0. (See accompanying file LICENSE_1_0.txt or
+# http://www.boost.org/LICENSE_1_0.txt)
+
+import modules ;
+
+# Set a value for a named option, to be used when not overridden on the command
+# line.
+rule set ( name : value ? )
+{
+ .option.$(name) = $(value) ;
+}
+
+rule get ( name : default-value ? : implied-value ? )
+{
+ local m = [ MATCH --$(name)=(.*) : [ modules.peek : ARGV ] ] ;
+ if $(m)
+ {
+ return $(m[1]) ;
+ }
+ else
+ {
+ m = [ MATCH (--$(name)) : [ modules.peek : ARGV ] ] ;
+ if $(m) && $(implied-value)
+ {
+ return $(implied-value) ;
+ }
+ else if $(.option.$(name))
+ {
+ return $(.option.$(name)) ;
+ }
+ else
+ {
+ return $(default-value) ;
+ }
+ }
+}
+
+
+# Check command-line args as soon as possible. For each option try to load
+# module named after option. Is that succeeds, invoke 'process' rule in the
+# module. The rule may return "true" to indicate that the regular build process
+# should not be attempted.
+#
+# Options take the general form of: --<name>[=<value>] [<value>]
+#
+rule process ( )
+{
+ local ARGV = [ modules.peek : ARGV ] ;
+ local BOOST_BUILD_PATH = [ modules.peek : BOOST_BUILD_PATH ] ;
+
+ local dont-build ;
+ local args = $(ARGV) ;
+ while $(args)
+ {
+ local arg = [ MATCH ^--(.*) : $(args[1]) ] ;
+ while $(args[2-]) && ! $(arg)
+ {
+ args = $(args[2-]) ;
+ arg = [ MATCH ^--(.*) : $(args[1]) ] ;
+ }
+ args = $(args[2-]) ;
+
+ if $(arg)
+ {
+ local split = [ MATCH ^(([^-=]+)[^=]*)(=?)(.*)$ : $(arg) ] ;
+ local full-name = $(split[1]) ;
+ local prefix = $(split[2]) ;
+ local values ;
+
+ if $(split[3])
+ {
+ values = $(split[4]) ;
+ }
+ if $(args) && ! [ MATCH ^(--).* : $(args[1]) ]
+ {
+ values += $(args[1]) ;
+ args = $(args[2-]) ;
+ }
+
+ # Jook in options subdirectories of BOOST_BUILD_PATH for modules
+ # matching the full option name and then its prefix.
+ local plugin-dir = options ;
+ local option-files = [ GLOB $(plugin-dir:D=$(BOOST_BUILD_PATH)) :
+ $(full-name).jam $(prefix).jam ] ;
+
+ if $(option-files)
+ {
+ # Load the file into a module named for the option.
+ local f = $(option-files[1]) ;
+ local module-name = --$(f:D=:S=) ;
+ modules.load $(module-name) : $(f:D=) : $(f:D) ;
+
+ # If there is a process rule, call it with the full option name
+ # and its value (if any). If there was no "=" in the option, the
+ # value will be empty.
+ if process in [ RULENAMES $(module-name) ]
+ {
+ dont-build += [ modules.call-in $(module-name) : process
+ --$(full-name) : $(values) ] ;
+ }
+ }
+ }
+ }
+
+ return $(dont-build) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/order.jam b/src/kenlm/jam-files/boost-build/util/order.jam
new file mode 100644
index 0000000..a74fc8c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/order.jam
@@ -0,0 +1,169 @@
+# Copyright (C) 2003 Vladimir Prus
+# Use, modification, and distribution is subject to the Boost Software
+# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy
+# at http://www.boost.org/LICENSE_1_0.txt)
+
+# This module defines a class which allows to order arbitrary object with
+# regard to arbitrary binary relation.
+#
+# The primary use case is the gcc toolset, which is sensitive to library order:
+# if library 'a' uses symbols from library 'b', then 'a' must be present before
+# 'b' on the linker's command line.
+#
+# This requirement can be lifted for gcc with GNU ld, but for gcc with Solaris
+# LD (and for Solaris toolset as well), the order always matters.
+#
+# So, we need to store order requirements and then order libraries according to
+# them. It is not possible to use the dependency graph as order requirements.
+# What we need is a "use symbols" relationship while dependency graph provides
+# the "needs to be updated" relationship.
+#
+# For example::
+# lib a : a.cpp b;
+# lib b ;
+#
+# For static linking, library 'a' need not depend on 'b'. However, it should
+# still come before 'b' on the command line.
+
+class order
+{
+ rule __init__ ( )
+ {
+ }
+
+ # Adds the constraint that 'first' should preceede 'second'.
+ rule add-pair ( first second )
+ {
+ .constraits += $(first)--$(second) ;
+ }
+ NATIVE_RULE class@order : add-pair ;
+
+ # Given a list of objects, reorder them so that the constraints specified by
+ # 'add-pair' are satisfied.
+ #
+ # The algorithm was adopted from an awk script by Nikita Youshchenko
+ # (yoush at cs dot msu dot su)
+ rule order ( objects * )
+ {
+ # The algorithm used is the same is standard transitive closure, except
+ # that we're not keeping in-degree for all vertices, but rather removing
+ # edges.
+ local result ;
+ if $(objects)
+ {
+ local constraints = [ eliminate-unused-constraits $(objects) ] ;
+
+ # Find some library that nobody depends upon and add it to the
+ # 'result' array.
+ local obj ;
+ while $(objects)
+ {
+ local new_objects ;
+ while $(objects)
+ {
+ obj = $(objects[1]) ;
+ if [ has-no-dependents $(obj) : $(constraints) ]
+ {
+ # Emulate break ;
+ new_objects += $(objects[2-]) ;
+ objects = ;
+ }
+ else
+ {
+ new_objects += $(obj) ;
+ obj = ;
+ objects = $(objects[2-]) ;
+ }
+ }
+
+ if ! $(obj)
+ {
+ errors.error "Circular order dependencies" ;
+ }
+ # No problem with placing first.
+ result += $(obj) ;
+ # Remove all contraints where 'obj' comes first, since they are
+ # already satisfied.
+ constraints = [ remove-satisfied $(constraints) : $(obj) ] ;
+
+ # Add the remaining objects for further processing on the next
+ # iteration
+ objects = $(new_objects) ;
+ }
+
+ }
+ return $(result) ;
+ }
+ NATIVE_RULE class@order : order ;
+
+ # Eliminate constraints which mention objects not in 'objects'. In
+ # graph-theory terms, this is finding a subgraph induced by ordered
+ # vertices.
+ rule eliminate-unused-constraits ( objects * )
+ {
+ local result ;
+ for local c in $(.constraints)
+ {
+ local m = [ MATCH (.*)--(.*) : $(c) ] ;
+ if $(m[1]) in $(objects) && $(m[2]) in $(objects)
+ {
+ result += $(c) ;
+ }
+ }
+ return $(result) ;
+ }
+
+ # Returns true if there's no constraint in 'constaraints' where 'obj' comes
+ # second.
+ rule has-no-dependents ( obj : constraints * )
+ {
+ local failed ;
+ while $(constraints) && ! $(failed)
+ {
+ local c = $(constraints[1]) ;
+ local m = [ MATCH (.*)--(.*) : $(c) ] ;
+ if $(m[2]) = $(obj)
+ {
+ failed = true ;
+ }
+ constraints = $(constraints[2-]) ;
+ }
+ if ! $(failed)
+ {
+ return true ;
+ }
+ }
+
+ rule remove-satisfied ( constraints * : obj )
+ {
+ local result ;
+ for local c in $(constraints)
+ {
+ local m = [ MATCH (.*)--(.*) : $(c) ] ;
+ if $(m[1]) != $(obj)
+ {
+ result += $(c) ;
+ }
+ }
+ return $(result) ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ import "class" : new ;
+ import assert ;
+
+ c1 = [ new order ] ;
+ $(c1).add-pair l1 l2 ;
+
+ assert.result l1 l2 : $(c1).order l1 l2 ;
+ assert.result l1 l2 : $(c1).order l2 l1 ;
+
+ $(c1).add-pair l2 l3 ;
+ assert.result l1 l2 : $(c1).order l2 l1 ;
+ $(c1).add-pair x l2 ;
+ assert.result l1 l2 : $(c1).order l2 l1 ;
+ assert.result l1 l2 l3 : $(c1).order l2 l3 l1 ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/os.jam b/src/kenlm/jam-files/boost-build/util/os.jam
new file mode 100644
index 0000000..daef27f
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/os.jam
@@ -0,0 +1,171 @@
+# Copyright 2001, 2002, 2003, 2005 Dave Abrahams
+# Copyright 2006 Rene Rivera
+# Copyright 2003, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import modules ;
+import string ;
+
+
+# Return the value(s) of the given environment variable(s) at the time bjam was
+# invoked.
+rule environ ( variable-names + )
+{
+ return [ modules.peek .ENVIRON : $(variable-names) ] ;
+}
+
+.name = [ modules.peek : OS ] ;
+.platform = [ modules.peek : OSPLAT ] ;
+.version = [ modules.peek : OSVER ] ;
+
+
+local rule constant ( c : os ? )
+{
+ os ?= $(.name) ;
+ # First look for a platform-specific name, then the general value.
+ local variables = .$(c)-$(os) .$(c) ;
+ local result = $($(variables)) ;
+ return $(result[1]) ;
+}
+
+rule get-constant ( os ? )
+{
+ # Find the name of the constant being accessed, which is equal to the name
+ # used to invoke us.
+ local bt = [ BACKTRACE 1 ] ;
+ local rulename = [ MATCH ([^.]*)$ : $(bt[4]) ] ;
+ return [ constant $(rulename) : $(os) ] ;
+}
+
+
+# export all the common constants
+.constants = name platform version shared-library-path-variable path-separator executable-path-variable executable-suffix ;
+for local constant in $(.constants)
+{
+ IMPORT $(__name__) : get-constant : $(__name__) : $(constant) ;
+}
+EXPORT $(__name__) : $(.constants) ;
+
+.executable-path-variable-NT = PATH ;
+# On Windows the case and capitalization of PATH is not always predictable, so
+# let's find out what variable name was really set.
+if $(.name) = NT
+{
+ for local n in [ VARNAMES .ENVIRON ]
+ {
+ if $(n:L) = path
+ {
+ .executable-path-variable-NT = $(n) ;
+ }
+ }
+}
+
+# Specific constants for various platforms. There's no need to define any
+# constant whose value would be the same as the default, below.
+.shared-library-path-variable-NT = $(.executable-path-variable-NT) ;
+.path-separator-NT = ";" ;
+.expand-variable-prefix-NT = % ;
+.expand-variable-suffix-NT = % ;
+.executable-suffix-NT = .exe ;
+
+.shared-library-path-variable-CYGWIN = PATH ;
+
+.shared-library-path-variable-MACOSX = DYLD_LIBRARY_PATH ;
+
+.shared-library-path-variable-AIX = LIBPATH ;
+
+# Default constants
+.shared-library-path-variable = LD_LIBRARY_PATH ;
+.path-separator = ":" ;
+.expand-variable-prefix = $ ;
+.expand-variable-suffix = "" ;
+.executable-path-variable = PATH ;
+.executable-suffix = "" ;
+
+
+# Return a list of the directories in the PATH. Yes, that information is (sort
+# of) available in the global module, but jam code can change those values, and
+# it isn't always clear what case/capitalization to use when looking. This rule
+# is a more reliable way to get there.
+rule executable-path ( )
+{
+ return [ string.words [ environ [ constant executable-path-variable ] ]
+ : [ constant path-separator ] ] ;
+}
+
+
+# Initialize the list of home directories for the current user depending on the
+# OS.
+if $(.name) = NT
+{
+ local home = [ environ HOMEDRIVE HOMEPATH ] ;
+ .home-directories = $(home[1])$(home[2]) [ environ HOME ] [ environ USERPROFILE ] ;
+}
+else
+{
+ .home-directories = [ environ HOME ] ;
+}
+
+
+# Can't use 'constant' mechanism because it only returns 1-element values.
+rule home-directories ( )
+{
+ return $(.home-directories) ;
+}
+
+
+# Return the string needed to represent the expansion of the named shell
+# variable.
+rule expand-variable ( variable )
+{
+ local prefix = [ constant expand-variable-prefix ] ;
+ local suffix = [ constant expand-variable-suffix ] ;
+ return $(prefix)$(variable)$(suffix) ;
+}
+
+
+# Returns true if running on windows, whether in cygwin or not.
+rule on-windows ( )
+{
+ local result ;
+ if [ modules.peek : NT ]
+ {
+ result = true ;
+ }
+ else if [ modules.peek : UNIX ]
+ {
+ switch [ modules.peek : JAMUNAME ]
+ {
+ case CYGWIN* :
+ {
+ result = true ;
+ }
+ }
+ }
+ return $(result) ;
+}
+
+
+if ! [ on-windows ]
+{
+ .on-unix = 1 ;
+}
+
+
+rule on-unix
+{
+ return $(.on-unix) ;
+}
+
+
+rule __test__
+{
+ import assert ;
+ if ! ( --quiet in [ modules.peek : ARGV ] )
+ {
+ ECHO os: name= [ name ] ;
+ ECHO os: version= [ version ] ;
+ }
+ assert.true name ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/path.jam b/src/kenlm/jam-files/boost-build/util/path.jam
new file mode 100644
index 0000000..545d83c
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/path.jam
@@ -0,0 +1,910 @@
+# Copyright 2002-2006. Vladimir Prus
+# Copyright 2003-2004. Dave Abrahams
+# Copyright 2003-2006. Rene Rivera
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+# Performs various path manipulations. Paths are always in a 'normalized'
+# representation. In it, a path may be either:
+#
+# - '.', or
+#
+# - ['/'] [ ( '..' '/' )* (token '/')* token ]
+#
+# In plain english, path can be rooted, '..' elements are allowed only at the
+# beginning, and it never ends in slash, except for path consisting of slash
+# only.
+
+import modules ;
+import regex ;
+import sequence ;
+import set ;
+
+
+os = [ modules.peek : OS ] ;
+if [ modules.peek : UNIX ]
+{
+ local uname = [ modules.peek : JAMUNAME ] ;
+ switch $(uname)
+ {
+ case CYGWIN* : os = CYGWIN ;
+ case * : os = UNIX ;
+ }
+}
+
+
+# Converts the native path into normalized form.
+#
+rule make ( native )
+{
+ return [ make-$(os) $(native) ] ;
+}
+
+
+# Builds native representation of the path.
+#
+rule native ( path )
+{
+ return [ native-$(os) $(path) ] ;
+}
+
+
+# Tests if a path is rooted.
+#
+rule is-rooted ( path )
+{
+ return [ MATCH "^(/)" : $(path) ] ;
+}
+
+
+# Tests if a path has a parent.
+#
+rule has-parent ( path )
+{
+ if $(path) != /
+ {
+ return 1 ;
+ }
+ else
+ {
+ return ;
+ }
+}
+
+
+# Returns the path without any directory components.
+#
+rule basename ( path )
+{
+ return [ MATCH "([^/]+)$" : $(path) ] ;
+}
+
+
+# Returns parent directory of the path. If no parent exists, error is issued.
+#
+rule parent ( path )
+{
+ if [ has-parent $(path) ]
+ {
+ if $(path) = .
+ {
+ return .. ;
+ }
+ else
+ {
+ # Strip everything at the end of path up to and including the last
+ # slash.
+ local result = [ regex.match "((.*)/)?([^/]+)" : $(path) : 2 3 ] ;
+
+ # Did we strip what we shouldn't?
+ if $(result[2]) = ".."
+ {
+ return $(path)/.. ;
+ }
+ else
+ {
+ if ! $(result[1])
+ {
+ if [ is-rooted $(path) ]
+ {
+ result = / ;
+ }
+ else
+ {
+ result = . ;
+ }
+ }
+ return $(result[1]) ;
+ }
+ }
+ }
+ else
+ {
+ import errors ;
+ errors.error "Path '$(path)' has no parent" ;
+ }
+}
+
+
+# Returns path2 such that "[ join path path2 ] = .". The path may not contain
+# ".." element or be rooted.
+#
+rule reverse ( path )
+{
+ if $(path) = .
+ {
+ return $(path) ;
+ }
+ else
+ {
+ local tokens = [ regex.split $(path) / ] ;
+ local tokens2 ;
+ for local i in $(tokens)
+ {
+ tokens2 += .. ;
+ }
+ return [ sequence.join $(tokens2) : / ] ;
+ }
+}
+
+
+# Concatenates the passed path elements. Generates an error if any element other
+# than the first one is rooted. Skips any empty or undefined path elements.
+#
+rule join ( elements + )
+{
+ if ! $(elements[2-])
+ {
+ return $(elements[1]) ;
+ }
+ else
+ {
+ for local e in $(elements[2-])
+ {
+ if [ is-rooted $(e) ]
+ {
+ import errors ;
+ errors.error only the first element may be rooted ;
+ }
+ }
+ return [ NORMALIZE_PATH "$(elements)" ] ;
+ }
+}
+
+
+# If 'path' is relative, it is rooted at 'root'. Otherwise, it is unchanged.
+#
+rule root ( path root )
+{
+ if [ is-rooted $(path) ]
+ {
+ return $(path) ;
+ }
+ else
+ {
+ return [ join $(root) $(path) ] ;
+ }
+}
+
+
+# Returns the current working directory.
+#
+rule pwd ( )
+{
+ if ! $(.pwd)
+ {
+ .pwd = [ make [ PWD ] ] ;
+ }
+ return $(.pwd) ;
+}
+
+
+# Returns the list of files matching the given pattern in the specified
+# directory. Both directories and patterns are supplied as portable paths. Each
+# pattern should be non-absolute path, and can't contain "." or ".." elements.
+# Each slash separated element of pattern can contain the following special
+# characters:
+# - '?', which match any character
+# - '*', which matches arbitrary number of characters.
+# A file $(d)/e1/e2/e3 (where 'd' is in $(dirs)) matches pattern p1/p2/p3 if and
+# only if e1 matches p1, e2 matches p2 and so on.
+#
+# For example:
+# [ glob . : *.cpp ]
+# [ glob . : */build/Jamfile ]
+#
+rule glob ( dirs * : patterns + : exclude-patterns * )
+{
+ local result ;
+ local real-patterns ;
+ local real-exclude-patterns ;
+ for local d in $(dirs)
+ {
+ for local p in $(patterns)
+ {
+ local pattern = [ path.root $(p) $(d) ] ;
+ real-patterns += [ path.native $(pattern) ] ;
+ }
+
+ for local p in $(exclude-patterns)
+ {
+ local pattern = [ path.root $(p) $(d) ] ;
+ real-exclude-patterns += [ path.native $(pattern) ] ;
+ }
+ }
+
+ local inc = [ GLOB-RECURSIVELY $(real-patterns) ] ;
+ inc = [ sequence.transform NORMALIZE_PATH : $(inc) ] ;
+ local exc = [ GLOB-RECURSIVELY $(real-exclude-patterns) ] ;
+ exc = [ sequence.transform NORMALIZE_PATH : $(exc) ] ;
+
+ return [ sequence.transform path.make : [ set.difference $(inc) : $(exc) ] ]
+ ;
+}
+
+
+# Recursive version of GLOB. Builds the glob of files while also searching in
+# the subdirectories of the given roots. An optional set of exclusion patterns
+# will filter out the matching entries from the result. The exclusions also
+# apply to the subdirectory scanning, such that directories that match the
+# exclusion patterns will not be searched.
+#
+rule glob-tree ( roots * : patterns + : exclude-patterns * )
+{
+ return [ sequence.transform path.make : [ .glob-tree [ sequence.transform
+ path.native : $(roots) ] : $(patterns) : $(exclude-patterns) ] ] ;
+}
+
+
+local rule .glob-tree ( roots * : patterns * : exclude-patterns * )
+{
+ local excluded ;
+ if $(exclude-patterns)
+ {
+ excluded = [ GLOB $(roots) : $(exclude-patterns) ] ;
+ }
+ local result = [ set.difference [ GLOB $(roots) : $(patterns) ] :
+ $(excluded) ] ;
+ local subdirs ;
+ for local d in [ set.difference [ GLOB $(roots) : * ] : $(excluded) ]
+ {
+ if ! ( $(d:D=) in . .. ) && ! [ CHECK_IF_FILE $(d) ]
+ {
+ subdirs += $(d) ;
+ }
+ }
+ if $(subdirs)
+ {
+ result += [ .glob-tree $(subdirs) : $(patterns) : $(exclude-patterns) ]
+ ;
+ }
+ return $(result) ;
+}
+
+
+# Returns true is the specified file exists.
+#
+rule exists ( file )
+{
+ return [ path.glob $(file:D) : $(file:D=) ] ;
+}
+NATIVE_RULE path : exists ;
+
+
+# Find out the absolute name of path and returns the list of all the parents,
+# starting with the immediate one. Parents are returned as relative names. If
+# 'upper_limit' is specified, directories above it will be pruned.
+#
+rule all-parents ( path : upper_limit ? : cwd ? )
+{
+ cwd ?= [ pwd ] ;
+ local path_ele = [ regex.split [ root $(path) $(cwd) ] / ] ;
+
+ if ! $(upper_limit)
+ {
+ upper_limit = / ;
+ }
+ local upper_ele = [ regex.split [ root $(upper_limit) $(cwd) ] / ] ;
+
+ # Leave only elements in 'path_ele' below 'upper_ele'.
+ while $(path_ele) && ( $(upper_ele[1]) = $(path_ele[1]) )
+ {
+ upper_ele = $(upper_ele[2-]) ;
+ path_ele = $(path_ele[2-]) ;
+ }
+
+ # Have all upper elements been removed ?
+ if $(upper_ele)
+ {
+ import errors ;
+ errors.error "$(upper_limit) is not prefix of $(path)" ;
+ }
+
+ # Create the relative paths to parents, number of elements in 'path_ele'.
+ local result ;
+ for local i in $(path_ele)
+ {
+ path = [ parent $(path) ] ;
+ result += $(path) ;
+ }
+ return $(result) ;
+}
+
+
+# Search for 'pattern' in parent directories of 'dir', up to and including
+# 'upper_limit', if it is specified, or up to the filesystem root otherwise.
+#
+rule glob-in-parents ( dir : patterns + : upper-limit ? )
+{
+ local result ;
+ local parent-dirs = [ all-parents $(dir) : $(upper-limit) ] ;
+
+ while $(parent-dirs) && ! $(result)
+ {
+ result = [ glob $(parent-dirs[1]) : $(patterns) ] ;
+ parent-dirs = $(parent-dirs[2-]) ;
+ }
+ return $(result) ;
+}
+
+
+# Assuming 'child' is a subdirectory of 'parent', return the relative path from
+# 'parent' to 'child'.
+#
+rule relative ( child parent : no-error ? )
+{
+ local not-a-child ;
+ if $(parent) = "."
+ {
+ return $(child) ;
+ }
+ else
+ {
+ local split1 = [ regex.split $(parent) / ] ;
+ local split2 = [ regex.split $(child) / ] ;
+
+ while $(split1)
+ {
+ if $(split1[1]) = $(split2[1])
+ {
+ split1 = $(split1[2-]) ;
+ split2 = $(split2[2-]) ;
+ }
+ else
+ {
+ not-a-child = true ;
+ split1 = ;
+ }
+ }
+ if $(split2)
+ {
+ if $(not-a-child)
+ {
+ if $(no-error)
+ {
+ return not-a-child ;
+ }
+ else
+ {
+ import errors ;
+ errors.error $(child) is not a subdir of $(parent) ;
+ }
+ }
+ else
+ {
+ return [ join $(split2) ] ;
+ }
+ }
+ else
+ {
+ return "." ;
+ }
+ }
+}
+
+
+# Returns the minimal path to path2 that is relative to path1.
+#
+rule relative-to ( path1 path2 )
+{
+ local root_1 = [ regex.split [ reverse $(path1) ] / ] ;
+ local split1 = [ regex.split $(path1) / ] ;
+ local split2 = [ regex.split $(path2) / ] ;
+
+ while $(split1) && $(root_1)
+ {
+ if $(split1[1]) = $(split2[1])
+ {
+ root_1 = $(root_1[2-]) ;
+ split1 = $(split1[2-]) ;
+ split2 = $(split2[2-]) ;
+ }
+ else
+ {
+ split1 = ;
+ }
+ }
+ return [ join . $(root_1) $(split2) ] ;
+}
+
+
+# Returns the list of paths used by the operating system for looking up
+# programs.
+#
+rule programs-path ( )
+{
+ local result ;
+ local raw = [ modules.peek : PATH Path path ] ;
+ for local p in $(raw)
+ {
+ if $(p)
+ {
+ result += [ path.make $(p) ] ;
+ }
+ }
+ return $(result) ;
+}
+
+
+rule makedirs ( path )
+{
+ local result = true ;
+ local native = [ native $(path) ] ;
+ if ! [ exists $(native) ]
+ {
+ if [ makedirs [ parent $(path) ] ]
+ {
+ if ! [ MAKEDIR $(native) ]
+ {
+ import errors ;
+ errors.error "Could not create directory '$(path)'" ;
+ result = ;
+ }
+ }
+ }
+ return $(result) ;
+}
+
+
+# Converts native Windows paths into our internal canonic path representation.
+# Supports 'invalid' paths containing multiple successive path separator
+# characters.
+#
+# TODO: Check and if needed add support for Windows 'X:file' path format where
+# the file is located in the current folder on drive X.
+#
+rule make-NT ( native )
+{
+ local result = [ NORMALIZE_PATH $(native) ] ;
+
+ # We need to add an extra '/' in front in case this is a rooted Windows path
+ # starting with a drive letter and not a path separator character since the
+ # builtin NORMALIZE_PATH rule has no knowledge of this leading drive letter
+ # and treats it as a regular folder name.
+ if [ regex.match "(^.:)" : $(native) ]
+ {
+ result = /$(result) ;
+ }
+
+ return $(result) ;
+}
+
+
+rule native-NT ( path )
+{
+ local remove-slash = [ MATCH "^/(.:.*)" : $(path) ] ;
+ if $(remove-slash)
+ {
+ path = $(remove-slash) ;
+ }
+ return [ regex.replace $(path) / \\ ] ;
+}
+
+
+rule make-UNIX ( native )
+{
+ # VP: I have no idea now 'native' can be empty here! But it can!
+ if ! $(native)
+ {
+ import errors ;
+ errors.error "Empty path passed to 'make-UNIX'" ;
+ }
+ else
+ {
+ return [ NORMALIZE_PATH $(native:T) ] ;
+ }
+}
+
+
+rule native-UNIX ( path )
+{
+ return $(path) ;
+}
+
+
+rule make-CYGWIN ( path )
+{
+ return [ make-NT $(path) ] ;
+}
+
+
+rule native-CYGWIN ( path )
+{
+ local result = $(path) ;
+ if [ regex.match "(^/.:)" : $(path) ] # Windows absolute path.
+ {
+ result = [ MATCH "^/?(.*)" : $(path) ] ; # Remove leading '/'.
+ }
+ return [ native-UNIX $(result) ] ;
+}
+
+
+# split-path-VMS: splits input native path into device dir file (each part is
+# optional).
+#
+# example:
+#
+# dev:[dir]file.c => dev: [dir] file.c
+#
+rule split-path-VMS ( native )
+{
+ local matches = [ MATCH ([a-zA-Z0-9_-]+:)?(\\[[^\]]*\\])?(.*)?$ : $(native)
+ ] ;
+ local device = $(matches[1]) ;
+ local dir = $(matches[2]) ;
+ local file = $(matches[3]) ;
+
+ return $(device) $(dir) $(file) ;
+}
+
+
+# Converts a native VMS path into a portable path spec.
+#
+# Does not handle current-device absolute paths such as "[dir]File.c" as it is
+# not clear how to represent them in the portable path notation.
+#
+# Adds a trailing dot (".") to the file part if no extension is present (helps
+# when converting it back into native path).
+#
+rule make-VMS ( native )
+{
+ if [ MATCH ^(\\[[a-zA-Z0-9]) : $(native) ]
+ {
+ import errors ;
+ errors.error "Can't handle default-device absolute paths: " $(native) ;
+ }
+
+ local parts = [ split-path-VMS $(native) ] ;
+ local device = $(parts[1]) ;
+ local dir = $(parts[2]) ;
+ local file = $(parts[3]) ;
+ local elems ;
+
+ if $(device)
+ {
+ #
+ # rooted
+ #
+ elems = /$(device) ;
+ }
+
+ if $(dir) = "[]"
+ {
+ #
+ # Special case: current directory
+ #
+ elems = $(elems) "." ;
+ }
+ else if $(dir)
+ {
+ dir = [ regex.replace $(dir) "\\[|\\]" "" ] ;
+ local dir_parts = [ regex.split $(dir) \\. ] ;
+
+ if $(dir_parts[1]) = ""
+ {
+ #
+ # Relative path
+ #
+ dir_parts = $(dir_parts[2--1]) ;
+ }
+
+ #
+ # replace "parent-directory" parts (- => ..)
+ #
+ dir_parts = [ regex.replace-list $(dir_parts) : - : .. ] ;
+
+ elems = $(elems) $(dir_parts) ;
+ }
+
+ if $(file)
+ {
+ if ! [ MATCH (\\.) : $(file) ]
+ {
+ #
+ # Always add "." to end of non-extension file.
+ #
+ file = $(file). ;
+ }
+ elems = $(elems) $(file) ;
+ }
+
+ local portable = [ path.join $(elems) ] ;
+
+ return $(portable) ;
+}
+
+
+# Converts a portable path spec into a native VMS path.
+#
+# Relies on having at least one dot (".") included in the file name to be able
+# to differentiate it from the directory part.
+#
+rule native-VMS ( path )
+{
+ local device = "" ;
+ local dir = $(path) ;
+ local file = "" ;
+ local native ;
+ local split ;
+
+ #
+ # Has device ?
+ #
+ if [ is-rooted $(dir) ]
+ {
+ split = [ MATCH ^/([^:]+:)/?(.*) : $(dir) ] ;
+ device = $(split[1]) ;
+ dir = $(split[2]) ;
+ }
+
+ #
+ # Has file ?
+ #
+ # This is no exact science, just guess work:
+ #
+ # If the last part of the current path spec includes some chars, followed by
+ # a dot, optionally followed by more chars - then it is a file (keep your
+ # fingers crossed).
+ #
+ split = [ regex.split $(dir) / ] ;
+ local maybe_file = $(split[-1]) ;
+
+ if [ MATCH ^([^.]+\\..*) : $(maybe_file) ]
+ {
+ file = $(maybe_file) ;
+ dir = [ sequence.join $(split[1--2]) : / ] ;
+ }
+
+ #
+ # Has dir spec ?
+ #
+ if $(dir) = "."
+ {
+ dir = "[]" ;
+ }
+ else if $(dir)
+ {
+ dir = [ regex.replace $(dir) \\.\\. - ] ;
+ dir = [ regex.replace $(dir) / . ] ;
+
+ if $(device) = ""
+ {
+ #
+ # Relative directory
+ #
+ dir = "."$(dir) ;
+ }
+ dir = "["$(dir)"]" ;
+ }
+
+ native = [ sequence.join $(device) $(dir) $(file) ] ;
+
+ return $(native) ;
+}
+
+# Remove one level of indirection
+IMPORT $(__name__) : make-$(os) native-$(os) : $(__name__) : make native ;
+EXPORT $(__name__) : make native ;
+
+rule __test__ ( )
+{
+ import assert ;
+ import errors : try catch ;
+
+ assert.true is-rooted "/" ;
+ assert.true is-rooted "/foo" ;
+ assert.true is-rooted "/foo/bar" ;
+ assert.result : is-rooted "." ;
+ assert.result : is-rooted "foo" ;
+ assert.result : is-rooted "foo/bar" ;
+
+ assert.true has-parent "foo" ;
+ assert.true has-parent "foo/bar" ;
+ assert.true has-parent "." ;
+ assert.result : has-parent "/" ;
+
+ assert.result "." : basename "." ;
+ assert.result ".." : basename ".." ;
+ assert.result "foo" : basename "foo" ;
+ assert.result "foo" : basename "bar/foo" ;
+ assert.result "foo" : basename "gaz/bar/foo" ;
+ assert.result "foo" : basename "/gaz/bar/foo" ;
+
+ assert.result "." : parent "foo" ;
+ assert.result "/" : parent "/foo" ;
+ assert.result "foo/bar" : parent "foo/bar/giz" ;
+ assert.result ".." : parent "." ;
+ assert.result ".." : parent "../foo" ;
+ assert.result "../../foo" : parent "../../foo/bar" ;
+
+ assert.result "." : reverse "." ;
+ assert.result ".." : reverse "foo" ;
+ assert.result "../../.." : reverse "foo/bar/giz" ;
+
+ assert.result "foo" : join "foo" ;
+ assert.result "/foo" : join "/" "foo" ;
+ assert.result "foo/bar" : join "foo" "bar" ;
+ assert.result "foo/bar" : join "foo/giz" "../bar" ;
+ assert.result "foo/giz" : join "foo/bar/baz" "../../giz" ;
+ assert.result ".." : join "." ".." ;
+ assert.result ".." : join "foo" "../.." ;
+ assert.result "../.." : join "../foo" "../.." ;
+ assert.result "/foo" : join "/bar" "../foo" ;
+ assert.result "foo/giz" : join "foo/giz" "." ;
+ assert.result "." : join lib2 ".." ;
+ assert.result "/" : join "/a" ".." ;
+
+ assert.result /a/b : join /a/b/c .. ;
+
+ assert.result "foo/bar/giz" : join "foo" "bar" "giz" ;
+ assert.result "giz" : join "foo" ".." "giz" ;
+ assert.result "foo/giz" : join "foo" "." "giz" ;
+
+ try ;
+ {
+ join "a" "/b" ;
+ }
+ catch only first element may be rooted ;
+
+ local CWD = "/home/ghost/build" ;
+ assert.result : all-parents . : . : $(CWD) ;
+ assert.result . .. ../.. ../../.. : all-parents "Jamfile" : "" : $(CWD) ;
+ assert.result foo . .. ../.. ../../.. : all-parents "foo/Jamfile" : "" :
+ $(CWD) ;
+ assert.result ../Work .. ../.. ../../.. : all-parents "../Work/Jamfile" : ""
+ : $(CWD) ;
+
+ local CWD = "/home/ghost" ;
+ assert.result . .. : all-parents "Jamfile" : "/home" : $(CWD) ;
+ assert.result . : all-parents "Jamfile" : "/home/ghost" : $(CWD) ;
+
+ assert.result "c/d" : relative "a/b/c/d" "a/b" ;
+ assert.result "foo" : relative "foo" "." ;
+
+ local save-os = [ modules.peek path : os ] ;
+ modules.poke path : os : NT ;
+
+ assert.result "foo/bar/giz" : make-NT "foo/bar/giz" ;
+ assert.result "foo/bar/giz" : make-NT "foo\\bar\\giz" ;
+ assert.result "foo" : make-NT "foo/" ;
+ assert.result "foo" : make-NT "foo\\" ;
+ assert.result "foo" : make-NT "foo/." ;
+ assert.result "foo" : make-NT "foo/bar/.." ;
+ assert.result "foo" : make-NT "foo/bar/../" ;
+ assert.result "foo" : make-NT "foo/bar/..\\" ;
+ assert.result "foo/bar" : make-NT "foo/././././bar" ;
+ assert.result "/foo" : make-NT "\\foo" ;
+ assert.result "/D:/My Documents" : make-NT "D:\\My Documents" ;
+ assert.result "/c:/boost/tools/build/new/project.jam" : make-NT
+ "c:\\boost\\tools\\build\\test\\..\\new\\project.jam" ;
+
+ # Test processing 'invalid' paths containing multiple successive path
+ # separators.
+ assert.result "foo" : make-NT "foo//" ;
+ assert.result "foo" : make-NT "foo///" ;
+ assert.result "foo" : make-NT "foo\\\\" ;
+ assert.result "foo" : make-NT "foo\\\\\\" ;
+ assert.result "/foo" : make-NT "//foo" ;
+ assert.result "/foo" : make-NT "///foo" ;
+ assert.result "/foo" : make-NT "\\\\foo" ;
+ assert.result "/foo" : make-NT "\\\\\\foo" ;
+ assert.result "/foo" : make-NT "\\/\\/foo" ;
+ assert.result "foo/bar" : make-NT "foo//\\//\\\\bar//\\//\\\\\\//\\//\\\\" ;
+ assert.result "foo" : make-NT "foo/bar//.." ;
+ assert.result "foo/bar" : make-NT "foo/bar/giz//.." ;
+ assert.result "foo/giz" : make-NT
+ "foo//\\//\\\\bar///\\\\//\\\\////\\/..///giz\\//\\\\\\//\\//\\\\" ;
+ assert.result "../../../foo" : make-NT "..///.//..///.//..////foo///" ;
+
+ # Test processing 'invalid' rooted paths with too many '..' path elements
+ # that would place them before the root.
+ assert.result : make-NT "/.." ;
+ assert.result : make-NT "/../" ;
+ assert.result : make-NT "/../." ;
+ assert.result : make-NT "/.././" ;
+ assert.result : make-NT "/foo/../bar/giz/.././././../../." ;
+ assert.result : make-NT "/foo/../bar/giz/.././././../.././" ;
+ assert.result : make-NT "//foo/../bar/giz/.././././../../." ;
+ assert.result : make-NT "//foo/../bar/giz/.././././../.././" ;
+ assert.result : make-NT "\\\\foo/../bar/giz/.././././../../." ;
+ assert.result : make-NT "\\\\foo/../bar/giz/.././././../.././" ;
+ assert.result : make-NT "/..///.//..///.//..////foo///" ;
+
+ assert.result "foo\\bar\\giz" : native-NT "foo/bar/giz" ;
+ assert.result "foo" : native-NT "foo" ;
+ assert.result "\\foo" : native-NT "/foo" ;
+ assert.result "D:\\My Documents\\Work" : native-NT "/D:/My Documents/Work" ;
+
+ modules.poke path : os : UNIX ;
+
+ assert.result "foo/bar/giz" : make-UNIX "foo/bar/giz" ;
+ assert.result "/sub1" : make-UNIX "/sub1/." ;
+ assert.result "/sub1" : make-UNIX "/sub1/sub2/.." ;
+ assert.result "sub1" : make-UNIX "sub1/." ;
+ assert.result "sub1" : make-UNIX "sub1/sub2/.." ;
+ assert.result "/foo/bar" : native-UNIX "/foo/bar" ;
+
+ modules.poke path : os : VMS ;
+
+ #
+ # Do not really need to poke os before these
+ #
+ assert.result "disk:" "[dir]" "file" : split-path-VMS "disk:[dir]file" ;
+ assert.result "disk:" "[dir]" "" : split-path-VMS "disk:[dir]" ;
+ assert.result "disk:" "" "" : split-path-VMS "disk:" ;
+ assert.result "disk:" "" "file" : split-path-VMS "disk:file" ;
+ assert.result "" "[dir]" "file" : split-path-VMS "[dir]file" ;
+ assert.result "" "[dir]" "" : split-path-VMS "[dir]" ;
+ assert.result "" "" "file" : split-path-VMS "file" ;
+ assert.result "" "" "" : split-path-VMS "" ;
+
+ #
+ # Special case: current directory
+ #
+ assert.result "" "[]" "" : split-path-VMS "[]" ;
+ assert.result "disk:" "[]" "" : split-path-VMS "disk:[]" ;
+ assert.result "" "[]" "file" : split-path-VMS "[]file" ;
+ assert.result "disk:" "[]" "file" : split-path-VMS "disk:[]file" ;
+
+ #
+ # Make portable paths
+ #
+ assert.result "/disk:" : make-VMS "disk:" ;
+ assert.result "foo/bar/giz" : make-VMS "[.foo.bar.giz]" ;
+ assert.result "foo" : make-VMS "[.foo]" ;
+ assert.result "foo" : make-VMS "[.foo.bar.-]" ;
+ assert.result ".." : make-VMS "[.-]" ;
+ assert.result ".." : make-VMS "[-]" ;
+ assert.result "." : make-VMS "[]" ;
+ assert.result "giz.h" : make-VMS "giz.h" ;
+ assert.result "foo/bar/giz.h" : make-VMS "[.foo.bar]giz.h" ;
+ assert.result "/disk:/my_docs" : make-VMS "disk:[my_docs]" ;
+ assert.result "/disk:/boost/tools/build/new/project.jam" : make-VMS
+ "disk:[boost.tools.build.test.-.new]project.jam" ;
+
+ #
+ # Special case (adds '.' to end of file w/o extension to disambiguate from
+ # directory in portable path spec)
+ #
+ assert.result "Jamfile." : make-VMS "Jamfile" ;
+ assert.result "dir/Jamfile." : make-VMS "[.dir]Jamfile" ;
+ assert.result "/disk:/dir/Jamfile." : make-VMS "disk:[dir]Jamfile" ;
+
+ #
+ # Make native paths
+ #
+ assert.result "disk:" : native-VMS "/disk:" ;
+ assert.result "[.foo.bar.giz]" : native-VMS "foo/bar/giz" ;
+ assert.result "[.foo]" : native-VMS "foo" ;
+ assert.result "[.-]" : native-VMS ".." ;
+ assert.result "[.foo.-]" : native-VMS "foo/.." ;
+ assert.result "[]" : native-VMS "." ;
+ assert.result "disk:[my_docs.work]" : native-VMS "/disk:/my_docs/work" ;
+ assert.result "giz.h" : native-VMS "giz.h" ;
+ assert.result "disk:Jamfile." : native-VMS "/disk:Jamfile." ;
+ assert.result "disk:[my_docs.work]Jamfile." : native-VMS
+ "/disk:/my_docs/work/Jamfile." ;
+
+ modules.poke path : os : $(save-os) ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/print.jam b/src/kenlm/jam-files/boost-build/util/print.jam
new file mode 100644
index 0000000..c867e4e
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/print.jam
@@ -0,0 +1,488 @@
+# Copyright 2003 Douglas Gregor
+# Copyright 2002, 2003, 2005 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+# Utilities for generating format independent output. Using these
+# will help in generation of documentation in at minimum plain/console
+# and html.
+
+import modules ;
+import numbers ;
+import string ;
+import regex ;
+import "class" ;
+import scanner ;
+import path ;
+
+# The current output target. Defaults to console.
+output-target = console ;
+
+# The current output type. Defaults to plain. Other possible values are "html".
+output-type = plain ;
+
+# Whitespace.
+.whitespace = [ string.whitespace ] ;
+
+
+# Set the target and type of output to generate. This sets both the destination
+# output and the type of docs to generate to that output. The target can be
+# either a file or "console" for echoing to the console. If the type of output
+# is not specified it defaults to plain text.
+#
+rule output (
+ target # The target file or device; file or "console".
+ type ? # The type of output; "plain" or "html".
+)
+{
+ type ?= plain ;
+ if $(output-target) != $(target)
+ {
+ output-target = $(target) ;
+ output-type = $(type) ;
+ if $(output-type) = html
+ {
+ text
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"
+ "<html>"
+ "<head>"
+ "</head>"
+ "<body link=\"#0000ff\" vlink=\"#800080\">"
+ : true
+ : prefix ;
+ text
+ "</body>"
+ "</html>"
+ :
+ : suffix ;
+ }
+ }
+}
+
+
+# Generate a section with a description. The type of output can be controlled by
+# the value of the 'output-type' variable.
+#
+rule section (
+ name # The name of the section.
+ description * # A number of description lines.
+)
+{
+ if $(output-type) = plain
+ {
+ lines [ split-at-words $(name): ] ;
+ lines ;
+ }
+ else if $(output-type) = html
+ {
+ name = [ escape-html $(name) ] ;
+ text <h3>$(name)</h3> <p> ;
+ }
+ local pre = ;
+ while $(description)
+ {
+ local paragraph = ;
+ while $(description) && [ string.is-whitespace $(description[1]) ] { description = $(description[2-]) ; }
+ if $(pre)
+ {
+ while $(description) && (
+ $(pre) = " $(description[1])" ||
+ ( $(pre) < [ string.chars [ MATCH "^([$(.whitespace)]*)" : " $(description[1])" ] ] )
+ )
+ { paragraph += $(description[1]) ; description = $(description[2-]) ; }
+ while [ string.is-whitespace $(paragraph[-1]) ] { paragraph = $(paragraph[1--2]) ; }
+ pre = ;
+ if $(output-type) = plain
+ {
+ lines $(paragraph) "" : " " " " ;
+ }
+ else if $(output-type) = html
+ {
+ text <blockquote> ;
+ lines $(paragraph) ;
+ text </blockquote> ;
+ }
+ }
+ else
+ {
+ while $(description) && ! [ string.is-whitespace $(description[1]) ]
+ { paragraph += $(description[1]) ; description = $(description[2-]) ; }
+ if $(paragraph[1]) = :: && ! $(paragraph[2])
+ {
+ pre = " " ;
+ }
+ if $(paragraph[1]) = ::
+ {
+ if $(output-type) = plain
+ {
+ lines $(paragraph[2-]) "" : " " " " ;
+ lines ;
+ }
+ else if $(output-type) = html
+ {
+ text <blockquote> ;
+ lines $(paragraph[2-]) ;
+ text </blockquote> ;
+ }
+ }
+ else
+ {
+ local p = [ MATCH "(.*)(::)$" : $(paragraph[-1]) ] ;
+ local pws = [ MATCH "([ ]*)$" : $(p[1]) ] ;
+ p = [ MATCH "(.*)($(pws))($(p[2]))$" : $(paragraph[-1]) ] ;
+ if $(p[3]) = ::
+ {
+ pre = [ string.chars [ MATCH "^([$(.whitespace)]*)" : " $(p[1])" ] ] ;
+ if ! $(p[2]) || $(p[2]) = "" { paragraph = $(paragraph[1--2]) $(p[1]): ; }
+ else { paragraph = $(paragraph[1--2]) $(p[1]) ; }
+ if $(output-type) = plain
+ {
+ lines [ split-at-words " " $(paragraph) ] : " " " " ;
+ lines ;
+ }
+ else if $(output-type) = html
+ {
+ text </p> <p> [ escape-html $(paragraph) ] ;
+ }
+ }
+ else
+ {
+ if $(output-type) = plain
+ {
+ lines [ split-at-words " " $(paragraph) ] : " " " " ;
+ lines ;
+ }
+ else if $(output-type) = html
+ {
+ text </p> <p> [ escape-html $(paragraph) ] ;
+ }
+ }
+ }
+ }
+ }
+ if $(output-type) = html
+ {
+ text </p> ;
+ }
+}
+
+
+# Generate the start of a list of items. The type of output can be controlled by
+# the value of the 'output-type' variable.
+#
+rule list-start ( )
+{
+ if $(output-type) = plain
+ {
+ }
+ else if $(output-type) = html
+ {
+ text <ul> ;
+ }
+}
+
+
+# Generate an item in a list. The type of output can be controlled by the value
+# of the 'output-type' variable.
+#
+rule list-item (
+ item + # The item to list.
+)
+{
+ if $(output-type) = plain
+ {
+ lines [ split-at-words "*" $(item) ] : " " " " ;
+ }
+ else if $(output-type) = html
+ {
+ text <li> [ escape-html $(item) ] </li> ;
+ }
+}
+
+
+# Generate the end of a list of items. The type of output can be controlled by
+# the value of the 'output-type' variable.
+#
+rule list-end ( )
+{
+ if $(output-type) = plain
+ {
+ lines ;
+ }
+ else if $(output-type) = html
+ {
+ text </ul> ;
+ }
+}
+
+
+# Split the given text into separate lines, word-wrapping to a margin. The
+# default margin is 78 characters.
+#
+rule split-at-words (
+ text + # The text to split.
+ : margin ? # An optional margin, default is 78.
+)
+{
+ local lines = ;
+ text = [ string.words $(text:J=" ") ] ;
+ text = $(text:J=" ") ;
+ margin ?= 78 ;
+ local char-match-1 = ".?" ;
+ local char-match = "" ;
+ while $(margin) != 0
+ {
+ char-match = $(char-match)$(char-match-1) ;
+ margin = [ numbers.decrement $(margin) ] ;
+ }
+ while $(text)
+ {
+ local s = "" ;
+ local t = "" ;
+ # divide s into the first X characters and the rest
+ s = [ MATCH "^($(char-match))(.*)" : $(text) ] ;
+
+ if $(s[2])
+ {
+ # split the first half at a space
+ t = [ MATCH "^(.*)[\\ ]([^\\ ]*)$" : $(s[1]) ] ;
+ }
+ else
+ {
+ t = $(s) ;
+ }
+
+ if ! $(t[2])
+ {
+ t += "" ;
+ }
+
+ text = $(t[2])$(s[2]) ;
+ lines += $(t[1]) ;
+ }
+ return $(lines) ;
+}
+
+
+# Generate a set of fixed lines. Each single item passed in is output on a
+# separate line. For console this just echos each line, but for html this will
+# split them with <br>.
+#
+rule lines (
+ text * # The lines of text.
+ : indent ? # Optional indentation prepended to each line after the first.
+ outdent ? # Optional indentation to prepend to the first line.
+)
+{
+ text ?= "" ;
+ indent ?= "" ;
+ outdent ?= "" ;
+ if $(output-type) = plain
+ {
+ text $(outdent)$(text[1]) $(indent)$(text[2-]) ;
+ }
+ else if $(output-type) = html
+ {
+ local indent-chars = [ string.chars $(indent) ] ;
+ indent = "" ;
+ for local c in $(indent-chars)
+ {
+ if $(c) = " " { c = " " ; }
+ else if $(c) = " " { c = " " ; }
+ indent = $(indent)$(c) ;
+ }
+ local html-text = [ escape-html $(text) : " " ] ;
+ text $(html-text[1])<br> $(indent)$(html-text[2-])<br> ;
+ }
+}
+
+
+# Output text directly to the current target. When doing output to a file, one
+# can indicate if the text should be output to "prefix" it, as the "body"
+# (default), or "suffix" of the file. This is independant of the actual
+# execution order of the text rule. This rule invokes a singular action, one
+# action only once, which does the build of the file. Therefore actions on the
+# target outside of this rule will happen entirely before and/or after all
+# output using this rule.
+#
+rule text (
+ strings * # The strings of text to output.
+ : overwrite ? # True to overwrite the output (if it is a file).
+ : prefix-body-suffix ? # Indication to output prefix, body, or suffix (for
+ # a file).
+)
+{
+ prefix-body-suffix ?= body ;
+ if $(output-target) = console
+ {
+ if ! $(strings)
+ {
+ ECHO ;
+ }
+ else
+ {
+ for local s in $(strings)
+ {
+ ECHO $(s) ;
+ }
+ }
+ }
+ if ! $($(output-target).did-action)
+ {
+ $(output-target).did-action = yes ;
+ $(output-target).text-prefix = ;
+ $(output-target).text-body = ;
+ $(output-target).text-suffix = ;
+
+ nl on $(output-target) = "
+" ;
+ text-redirect on $(output-target) = ">>" ;
+ if $(overwrite)
+ {
+ text-redirect on $(output-target) = ">" ;
+ }
+ text-content on $(output-target) = ;
+
+ text-action $(output-target) ;
+
+ if $(overwrite) && $(output-target) != console
+ {
+ check-for-update $(output-target) ;
+ }
+ }
+ $(output-target).text-$(prefix-body-suffix) += $(strings) ;
+ text-content on $(output-target) =
+ $($(output-target).text-prefix)
+ $($(output-target).text-body)
+ $($(output-target).text-suffix) ;
+}
+
+
+# Outputs the text to the current targets, after word-wrapping it.
+#
+rule wrapped-text ( text + )
+{
+ local lines = [ split-at-words $(text) ] ;
+ text $(lines) ;
+}
+
+
+# Escapes text into html/xml printable equivalents. Does not know about tags and
+# therefore tags fed into this will also be escaped. Currently escapes space,
+# "<", ">", and "&".
+#
+rule escape-html (
+ text + # The text to escape.
+ : space ? # What to replace spaces with, defaults to " ".
+)
+{
+ local html-text = ;
+ while $(text)
+ {
+ local html = $(text[1]) ;
+ text = $(text[2-]) ;
+ html = [ regex.replace $(html) "&" "&" ] ;
+ html = [ regex.replace $(html) "<" "<" ] ;
+ html = [ regex.replace $(html) ">" ">" ] ;
+ if $(space)
+ {
+ html = [ regex.replace $(html) " " "$(space)" ] ;
+ }
+ html-text += $(html) ;
+ }
+ return $(html-text) ;
+}
+
+
+# Outputs the text strings collected by the text rule to the output file.
+#
+actions quietly text-action
+{
+ @($(STDOUT):E=$(text-content:J=$(nl))) $(text-redirect) "$(<)"
+}
+
+
+rule get-scanner ( )
+{
+ if ! $(.scanner)
+ {
+ .scanner = [ class.new print-scanner ] ;
+ }
+ return $(.scanner) ;
+}
+
+
+# The following code to update print targets when their contents change is a
+# horrible hack. It basically creates a target which binds to this file
+# (print.jam) and installs a scanner on it which reads the target and compares
+# its contents to the new contents that we are writing.
+#
+rule check-for-update ( target )
+{
+ local scanner = [ get-scanner ] ;
+ local file = [ path.native [ modules.binding $(__name__) ] ] ;
+ local g = [ MATCH <(.*)> : $(target:G) ] ;
+ local dependency-target = $(__file__:G=$(g:E=)-$(target:G=)-$(scanner)) ;
+ DEPENDS $(target) : $(dependency-target) ;
+ SEARCH on $(dependency-target) = $(file:D) ;
+ ISFILE $(dependency-target) ;
+ NOUPDATE $(dependency-target) ;
+ base on $(dependency-target) = $(target) ;
+ scanner.install $(scanner) : $(dependency-target) ;
+ return $(dependency-target) ;
+}
+
+
+class print-scanner : scanner
+{
+ import path ;
+ import os ;
+
+ rule pattern ( )
+ {
+ return "(One match...)" ;
+ }
+
+ rule process ( target : matches * : binding )
+ {
+ local base = [ on $(target) return $(base) ] ;
+ local nl = [ on $(base) return $(nl) ] ;
+ local text-content = [ on $(base) return $(text-content) ] ;
+ local dir = [ on $(base) return $(LOCATE) ] ;
+ if $(dir)
+ {
+ dir = [ path.make $(dir) ] ;
+ }
+ local file = [ path.native [ path.join $(dir) $(base:G=) ] ] ;
+ local actual-content ;
+ if [ os.name ] = NT
+ {
+ actual-content = [ SHELL "type \"$(file)\" 2>nul" ] ;
+ }
+ else
+ {
+ actual-content = [ SHELL "cat \"$(file)\" 2>/dev/null" ] ;
+ }
+ if $(text-content:J=$(nl)) != $(actual-content)
+ {
+ ALWAYS $(base) ;
+ }
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ assert.result one two three : split-at-words one two three : 5 ;
+ assert.result "one two" three : split-at-words one two three : 8 ;
+ assert.result "one two" three : split-at-words one two three : 9 ;
+ assert.result "one two three" : split-at-words one two three ;
+
+ # VP, 2004-12-03 The following test fails for some reason, so commenting it
+ # out.
+ #assert.result "one two three" "&<>" :
+ # escape-html "one two three" "&<>" ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/regex.jam b/src/kenlm/jam-files/boost-build/util/regex.jam
new file mode 100644
index 0000000..be8b3cf
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/regex.jam
@@ -0,0 +1,203 @@
+# Copyright 2001, 2002 Dave Abrahams
+# Copyright 2003 Douglas Gregor
+# Copyright 2003 Rene Rivera
+# Copyright 2002, 2003, 2004, 2005 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+#
+# Returns a list of the following substrings:
+# 1) from beginning till the first occurrence of 'separator' or till the end,
+# 2) between each occurrence of 'separator' and the next occurrence,
+# 3) from the last occurrence of 'separator' till the end.
+# If no separator is present, the result will contain only one element.
+#
+
+rule split ( string separator )
+{
+ local result ;
+ local s = $(string) ;
+
+ # Break pieaces off 's' until it has no separators left.
+ local match = 1 ;
+ while $(match)
+ {
+ match = [ MATCH ^(.*)($(separator))(.*) : $(s) ] ;
+ if $(match)
+ {
+ match += "" ; # in case 3rd item was empty - works around MATCH bug
+ result = $(match[3]) $(result) ;
+ s = $(match[1]) ;
+ }
+ }
+ # Combine the remaining part at the beginning, which does not have
+ # separators, with the pieces broken off. Note that the rule's signature
+ # does not allow the initial s to be empty.
+ return $(s) $(result) ;
+}
+
+if [ HAS_NATIVE_RULE regex : split : 1 ]
+{
+ NATIVE_RULE regex : split ;
+}
+
+# Returns the concatenated results of Applying regex.split to every element of
+# the list using the separator pattern.
+#
+rule split-list ( list * : separator )
+{
+ local result ;
+ for s in $(list)
+ {
+ result += [ split $(s) $(separator) ] ;
+ }
+ return $(result) ;
+}
+
+
+# Match string against pattern, and return the elements indicated by indices.
+#
+rule match ( pattern : string : indices * )
+{
+ indices ?= 1 2 3 4 5 6 7 8 9 ;
+ local x = [ MATCH $(pattern) : $(string) ] ;
+ return $(x[$(indices)]) ;
+}
+
+
+# Matches all elements of 'list' agains the 'pattern' and returns a list of
+# elements indicated by indices of all successful matches. If 'indices' is
+# omitted returns a list of first parenthesised groups of all successful
+# matches.
+#
+rule transform ( list * : pattern : indices * )
+{
+ indices ?= 1 ;
+ local result ;
+ for local e in $(list)
+ {
+ local m = [ MATCH $(pattern) : $(e) ] ;
+ if $(m)
+ {
+ result += $(m[$(indices)]) ;
+ }
+ }
+ return $(result) ;
+}
+
+NATIVE_RULE regex : transform ;
+
+
+# Escapes all of the characters in symbols using the escape symbol escape-symbol
+# for the given string, and returns the escaped string.
+#
+rule escape ( string : symbols : escape-symbol )
+{
+ local result = "" ;
+ local m = 1 ;
+ while $(m)
+ {
+ m = [ MATCH ^([^$(symbols)]*)([$(symbols)])(.*) : $(string) ] ;
+ if $(m)
+ {
+ m += "" ; # Supposedly a bug fix; borrowed from regex.split
+ result = "$(result)$(m[1])$(escape-symbol)$(m[2])" ;
+ string = $(m[3]) ;
+ }
+ }
+ string ?= "" ;
+ result = "$(result)$(string)" ;
+ return $(result) ;
+}
+
+
+# Replaces occurrences of a match string in a given string and returns the new
+# string. The match string can be a regex expression.
+#
+rule replace (
+ string # The string to modify.
+ match # The characters to replace.
+ replacement # The string to replace with.
+ )
+{
+ local result = "" ;
+ local parts = 1 ;
+ while $(parts)
+ {
+ parts = [ MATCH ^(.*)($(match))(.*) : $(string) ] ;
+ if $(parts)
+ {
+ parts += "" ;
+ result = "$(replacement)$(parts[3])$(result)" ;
+ string = $(parts[1]) ;
+ }
+ }
+ string ?= "" ;
+ result = "$(string)$(result)" ;
+ return $(result) ;
+}
+
+if [ HAS_NATIVE_RULE regex : replace : 1 ]
+{
+ NATIVE_RULE regex : replace ;
+}
+
+
+# Replaces occurrences of a match string in a given list of strings and returns
+# a list of new strings. The match string can be a regex expression.
+#
+# list - the list of strings to modify.
+# match - the search expression.
+# replacement - the string to replace with.
+#
+rule replace-list ( list * : match : replacement )
+{
+ local result ;
+ for local e in $(list)
+ {
+ result += [ replace $(e) $(match) $(replacement) ] ;
+ }
+ return $(result) ;
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ assert.result a b c : split "a/b/c" / ;
+ assert.result "" a b c : split "/a/b/c" / ;
+ assert.result "" "" a b c : split "//a/b/c" / ;
+ assert.result "" a "" b c : split "/a//b/c" / ;
+ assert.result "" a "" b c "" : split "/a//b/c/" / ;
+ assert.result "" a "" b c "" "" : split "/a//b/c//" / ;
+
+ assert.result a c b d
+ : match (.)(.)(.)(.) : abcd : 1 3 2 4 ;
+
+ assert.result a b c d
+ : match (.)(.)(.)(.) : abcd ;
+
+ assert.result ababab cddc
+ : match ((ab)*)([cd]+) : abababcddc : 1 3 ;
+
+ assert.result a.h c.h
+ : transform <a.h> \"b.h\" <c.h> : <(.*)> ;
+
+ assert.result a.h b.h c.h
+ : transform <a.h> \"b.h\" <c.h> : <([^>]*)>|\"([^\"]*)\" : 1 2 ;
+
+ assert.result "^<?xml version=\"1.0\"^>"
+ : escape "<?xml version=\"1.0\">" : "&|()<>^" : "^" ;
+
+ assert.result "<?xml version=\\\"1.0\\\">"
+ : escape "<?xml version=\"1.0\">" : "\\\"" : "\\" ;
+
+ assert.result "string string " : replace "string string " " " " " ;
+ assert.result " string string" : replace " string string" " " " " ;
+ assert.result "string string" : replace "string string" " " " " ;
+ assert.result "-" : replace "&" "&" "-" ;
+
+ assert.result "-" "a-b" : replace-list "&" "a&b" : "&" : "-" ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/sequence.jam b/src/kenlm/jam-files/boost-build/util/sequence.jam
new file mode 100644
index 0000000..a87eb4d
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/sequence.jam
@@ -0,0 +1,339 @@
+# Copyright 2001, 2002, 2003 Dave Abrahams
+# Copyright 2006 Rene Rivera
+# Copyright 2002, 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import assert ;
+import numbers ;
+import modules ;
+
+
+# Note that algorithms in this module execute largely in the caller's module
+# namespace, so that local rules can be used as function objects. Also note that
+# most predicates can be multi-element lists. In that case, all but the first
+# element are prepended to the first argument which is passed to the rule named
+# by the first element.
+
+
+# Return the elements e of $(sequence) for which [ $(predicate) e ] has a
+# non-null value.
+#
+rule filter ( predicate + : sequence * )
+{
+ local caller = [ CALLER_MODULE ] ;
+ local result ;
+
+ for local e in $(sequence)
+ {
+ if [ modules.call-in $(caller) : $(predicate) $(e) ]
+ {
+ result += $(e) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Return a new sequence consisting of [ $(function) $(e) ] for each element e of
+# $(sequence).
+#
+rule transform ( function + : sequence * )
+{
+ local caller = [ CALLER_MODULE ] ;
+ local result ;
+
+ for local e in $(sequence)
+ {
+ result += [ modules.call-in $(caller) : $(function) $(e) ] ;
+ }
+ return $(result) ;
+}
+
+if [ HAS_NATIVE_RULE sequence : transform : 1 ]
+{
+ NATIVE_RULE sequence : transform ;
+}
+
+rule reverse ( s * )
+{
+ local r ;
+ for local x in $(s)
+ {
+ r = $(x) $(r) ;
+ }
+ return $(r) ;
+}
+
+
+rule less ( a b )
+{
+ if $(a) < $(b)
+ {
+ return true ;
+ }
+}
+
+
+# Insertion-sort s using the BinaryPredicate ordered.
+#
+rule insertion-sort ( s * : ordered * )
+{
+ if ! $(ordered)
+ {
+ return [ SORT $(s) ] ;
+ }
+ else
+ {
+ local caller = [ CALLER_MODULE ] ;
+ ordered ?= sequence.less ;
+ local result = $(s[1]) ;
+ if $(ordered) = sequence.less
+ {
+ local head tail ;
+ for local x in $(s[2-])
+ {
+ head = ;
+ tail = $(result) ;
+ while $(tail) && ( $(tail[1]) < $(x) )
+ {
+ head += $(tail[1]) ;
+ tail = $(tail[2-]) ;
+ }
+ result = $(head) $(x) $(tail) ;
+ }
+ }
+ else
+ {
+ for local x in $(s[2-])
+ {
+ local head tail ;
+ tail = $(result) ;
+ while $(tail) && [ modules.call-in $(caller) : $(ordered) $(tail[1]) $(x) ]
+ {
+ head += $(tail[1]) ;
+ tail = $(tail[2-]) ;
+ }
+ result = $(head) $(x) $(tail) ;
+ }
+ }
+
+ return $(result) ;
+ }
+}
+
+
+# Merge two ordered sequences using the BinaryPredicate ordered.
+#
+rule merge ( s1 * : s2 * : ordered * )
+{
+ ordered ?= sequence.less ;
+ local result__ ;
+ local caller = [ CALLER_MODULE ] ;
+
+ while $(s1) && $(s2)
+ {
+ if [ modules.call-in $(caller) : $(ordered) $(s1[1]) $(s2[1]) ]
+ {
+ result__ += $(s1[1]) ;
+ s1 = $(s1[2-]) ;
+ }
+ else if [ modules.call-in $(caller) : $(ordered) $(s2[1]) $(s1[1]) ]
+ {
+ result__ += $(s2[1]) ;
+ s2 = $(s2[2-]) ;
+ }
+ else
+ {
+ s2 = $(s2[2-]) ;
+ }
+
+ }
+ result__ += $(s1) ;
+ result__ += $(s2) ;
+
+ return $(result__) ;
+}
+
+
+# Join the elements of s into one long string. If joint is supplied, it is used
+# as a separator.
+#
+rule join ( s * : joint ? )
+{
+ joint ?= "" ;
+ return $(s:J=$(joint)) ;
+}
+
+
+# Find the length of any sequence.
+#
+rule length ( s * )
+{
+ local result = 0 ;
+ for local i in $(s)
+ {
+ result = [ CALC $(result) + 1 ] ;
+ }
+ return $(result) ;
+}
+
+
+rule unique ( list * : stable ? )
+{
+ local result ;
+ local prev ;
+ if $(stable)
+ {
+ for local f in $(list)
+ {
+ if ! $(f) in $(result)
+ {
+ result += $(f) ;
+ }
+ }
+ }
+ else
+ {
+ for local i in [ SORT $(list) ]
+ {
+ if $(i) != $(prev)
+ {
+ result += $(i) ;
+ }
+ prev = $(i) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns the maximum number in 'elements'. Uses 'ordered' for comparisons or
+# 'numbers.less' if none is provided.
+#
+rule max-element ( elements + : ordered ? )
+{
+ ordered ?= numbers.less ;
+
+ local max = $(elements[1]) ;
+ for local e in $(elements[2-])
+ {
+ if [ $(ordered) $(max) $(e) ]
+ {
+ max = $(e) ;
+ }
+ }
+ return $(max) ;
+}
+
+
+# Returns all of 'elements' for which corresponding element in parallel list
+# 'rank' is equal to the maximum value in 'rank'.
+#
+rule select-highest-ranked ( elements * : ranks * )
+{
+ if $(elements)
+ {
+ local max-rank = [ max-element $(ranks) ] ;
+ local result ;
+ while $(elements)
+ {
+ if $(ranks[1]) = $(max-rank)
+ {
+ result += $(elements[1]) ;
+ }
+ elements = $(elements[2-]) ;
+ ranks = $(ranks[2-]) ;
+ }
+ return $(result) ;
+ }
+}
+NATIVE_RULE sequence : select-highest-ranked ;
+
+
+rule __test__ ( )
+{
+ # Use a unique module so we can test the use of local rules.
+ module sequence.__test__
+ {
+ import assert ;
+ import sequence ;
+
+ local rule is-even ( n )
+ {
+ if $(n) in 0 2 4 6 8
+ {
+ return true ;
+ }
+ }
+
+ assert.result 4 6 4 2 8 : sequence.filter is-even : 1 4 6 3 4 7 2 3 8 ;
+
+ # Test that argument binding works.
+ local rule is-equal-test ( x y )
+ {
+ if $(x) = $(y)
+ {
+ return true ;
+ }
+ }
+
+ assert.result 3 3 3 : sequence.filter is-equal-test 3 : 1 2 3 4 3 5 3 5 7 ;
+
+ local rule append-x ( n )
+ {
+ return $(n)x ;
+ }
+
+ assert.result 1x 2x 3x : sequence.transform append-x : 1 2 3 ;
+
+ local rule repeat2 ( x )
+ {
+ return $(x) $(x) ;
+ }
+
+ assert.result 1 1 2 2 3 3 : sequence.transform repeat2 : 1 2 3 ;
+
+ local rule test-greater ( a b )
+ {
+ if $(a) > $(b)
+ {
+ return true ;
+ }
+ }
+ assert.result 1 2 3 4 5 6 7 8 9 : sequence.insertion-sort 9 6 5 3 8 7 1 2 4 ;
+ assert.result 9 8 7 6 5 4 3 2 1 : sequence.insertion-sort 9 6 5 3 8 7 1 2 4 : test-greater ;
+ assert.result 1 2 3 4 5 6 : sequence.merge 1 3 5 : 2 4 6 ;
+ assert.result 6 5 4 3 2 1 : sequence.merge 5 3 1 : 6 4 2 : test-greater ;
+ assert.result 1 2 3 : sequence.merge 1 2 3 : ;
+ assert.result 1 : sequence.merge 1 : 1 ;
+
+ assert.result foo-bar-baz : sequence.join foo bar baz : - ;
+ assert.result substandard : sequence.join sub stan dard ;
+ assert.result 3.0.1 : sequence.join 3.0.1 : - ;
+
+ assert.result 0 : sequence.length ;
+ assert.result 3 : sequence.length a b c ;
+ assert.result 17 : sequence.length 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 ;
+
+ assert.result 1 : sequence.length a ;
+ assert.result 10 : sequence.length a b c d e f g h i j ;
+ assert.result 11 : sequence.length a b c d e f g h i j k ;
+ assert.result 12 : sequence.length a b c d e f g h i j k l ;
+
+ local p2 = x ;
+ for local i in 1 2 3 4 5 6 7 8
+ {
+ p2 = $(p2) $(p2) ;
+ }
+ assert.result 256 : sequence.length $(p2) ;
+
+ assert.result 1 2 3 4 5 : sequence.unique 1 2 3 2 4 3 3 5 5 5 ;
+
+ assert.result 5 : sequence.max-element 1 3 5 0 4 ;
+
+ assert.result e-3 h-3 : sequence.select-highest-ranked e-1 e-3 h-3 m-2 : 1 3 3 2 ;
+
+ assert.result 7 6 5 4 3 2 1 : sequence.reverse 1 2 3 4 5 6 7 ;
+ }
+}
diff --git a/src/kenlm/jam-files/boost-build/util/set.jam b/src/kenlm/jam-files/boost-build/util/set.jam
new file mode 100644
index 0000000..fc17913
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/set.jam
@@ -0,0 +1,93 @@
+# Copyright 2001, 2002 Dave Abrahams
+# Copyright 2003 Vladimir Prus
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+class set
+{
+ rule __init__ ( )
+ {
+ }
+
+ rule add ( elements * )
+ {
+ for local e in $(elements)
+ {
+ if ! $($(e))
+ {
+ $(e) = 1 ;
+ self.result += $(e) ;
+ }
+ }
+ }
+
+ rule contains ( element )
+ {
+ return $($(element)) ;
+ }
+
+ rule list ( )
+ {
+ return $(self.result) ;
+ }
+}
+
+
+
+# Returns the elements of set1 that are not in set2.
+#
+rule difference ( set1 * : set2 * )
+{
+ local result = ;
+ for local element in $(set1)
+ {
+ if ! ( $(element) in $(set2) )
+ {
+ result += $(element) ;
+ }
+ }
+ return $(result) ;
+}
+
+NATIVE_RULE set : difference ;
+
+
+# Removes all the items appearing in both set1 & set2.
+#
+rule intersection ( set1 * : set2 * )
+{
+ local result ;
+ for local v in $(set1)
+ {
+ if $(v) in $(set2)
+ {
+ result += $(v) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# Returns whether set1 & set2 contain the same elements. Note that this ignores
+# any element ordering differences as well as any element duplication.
+#
+rule equal ( set1 * : set2 * )
+{
+ if $(set1) in $(set2) && ( $(set2) in $(set1) )
+ {
+ return true ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+
+ assert.result 0 1 4 6 8 9 : difference 0 1 2 3 4 5 6 7 8 9 : 2 3 5 7 ;
+ assert.result 2 5 7 : intersection 0 1 2 4 5 6 7 8 9 : 2 3 5 7 ;
+
+ assert.true equal : ;
+ assert.true equal 1 1 2 3 : 3 2 2 1 ;
+ assert.false equal 2 3 : 3 2 2 1 ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/string.jam b/src/kenlm/jam-files/boost-build/util/string.jam
new file mode 100644
index 0000000..a39ed11
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/string.jam
@@ -0,0 +1,189 @@
+# Copyright 2002 Dave Abrahams
+# Copyright 2002, 2003 Rene Rivera
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import regex ;
+
+
+# Characters considered whitespace, as a list.
+.whitespace-chars = " " " " "
+" ;
+
+# Characters considered whitespace, as a single string.
+.whitespace = $(.whitespace-chars:J="") ;
+
+
+# Returns the canonical set of whitespace characters, as a list.
+#
+rule whitespace-chars ( )
+{
+ return $(.whitespace-chars) ;
+}
+
+
+# Returns the canonical set of whitespace characters, as a single string.
+#
+rule whitespace ( )
+{
+ return $(.whitespace) ;
+}
+
+
+# Splits the given string into a list of strings composed of each character of
+# the string in sequence.
+#
+rule chars (
+ string # The string to split.
+ )
+{
+ local result ;
+ while $(string)
+ {
+ local s = [ MATCH (.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.*) : $(string) ] ;
+ string = $(s[9]) ;
+ result += $(s[1-8]) ;
+ }
+
+ # Trim off empty strings.
+ while $(result[1]) && ! $(result[-1])
+ {
+ result = $(result[1--2]) ;
+ }
+
+ return $(result) ;
+}
+
+
+# Apply a set of standard transformations to string to produce an abbreviation
+# no more than 5 characters long.
+#
+rule abbreviate ( string )
+{
+ local r = $(.abbreviated-$(string)) ;
+ if $(r)
+ {
+ return $(r) ;
+ }
+ # Anything less than 4 characters gets no abbreviation.
+ else if ! [ MATCH (....) : $(string) ]
+ {
+ .abbreviated-$(string) = $(string) ;
+ return $(string) ;
+ }
+ else
+ {
+ # Separate the initial letter in case it's a vowel.
+ local s1 = [ MATCH ^(.)(.*) : $(string) ] ;
+
+ # Drop trailing "ing".
+ local s2 = [ MATCH ^(.*)ing$ : $(s1[2]) ] ;
+ s2 ?= $(s1[2]) ;
+
+ # Reduce all doubled characters to one.
+ local last = "" ;
+ for local c in [ chars $(s2) ]
+ {
+ if $(c) != $(last)
+ {
+ r += $(c) ;
+ last = $(c) ;
+ }
+ }
+ s2 = $(r:J="") ;
+
+ # Chop all vowels out of the remainder.
+ s2 = [ regex.replace $(s2) [AEIOUaeiou] "" ] ;
+
+ # Shorten remaining consonants to 4 characters.
+ s2 = [ MATCH ^(.?.?.?.?) : $(s2) ] ;
+
+ # Glue the initial character back on to the front.
+ s2 = $(s1[1])$(s2) ;
+
+ .abbreviated-$(string) = $(s2) ;
+ return $(s2) ;
+ }
+}
+
+
+# Concatenates the given strings, inserting the given separator between each
+# string.
+#
+rule join (
+ strings * # The strings to join.
+ : separator ? # The optional separator.
+ )
+{
+ separator ?= "" ;
+ return $(strings:J=$(separator)) ;
+}
+
+
+# Split a string into whitespace separated words.
+#
+rule words (
+ string # The string to split.
+ : whitespace * # Optional, characters to consider as whitespace.
+ )
+{
+ whitespace = $(whitespace:J="") ;
+ whitespace ?= $(.whitespace) ;
+ local w = ;
+ while $(string)
+ {
+ string = [ MATCH "^[$(whitespace)]*([^$(whitespace)]*)(.*)" : $(string) ] ;
+ if $(string[1]) && $(string[1]) != ""
+ {
+ w += $(string[1]) ;
+ }
+ string = $(string[2]) ;
+ }
+ return $(w) ;
+}
+
+
+# Check that the given string is composed entirely of whitespace.
+#
+rule is-whitespace (
+ string ? # The string to test.
+ )
+{
+ if ! $(string) { return true ; }
+ else if $(string) = "" { return true ; }
+ else if [ MATCH "^([$(.whitespace)]+)$" : $(string) ] { return true ; }
+ else { return ; }
+}
+
+rule __test__ ( )
+{
+ import assert ;
+ assert.result a b c : chars abc ;
+
+ assert.result rntm : abbreviate runtime ;
+ assert.result ovrld : abbreviate overload ;
+ assert.result dbg : abbreviate debugging ;
+ assert.result async : abbreviate asynchronous ;
+ assert.result pop : abbreviate pop ;
+ assert.result aaa : abbreviate aaa ;
+ assert.result qck : abbreviate quack ;
+ assert.result sttc : abbreviate static ;
+
+ # Check boundary cases.
+ assert.result a : chars a ;
+ assert.result : chars "" ;
+ assert.result a b c d e f g h : chars abcdefgh ;
+ assert.result a b c d e f g h i : chars abcdefghi ;
+ assert.result a b c d e f g h i j : chars abcdefghij ;
+ assert.result a b c d e f g h i j k : chars abcdefghijk ;
+
+ assert.result a//b/c/d : join a "" b c d : / ;
+ assert.result abcd : join a "" b c d ;
+
+ assert.result a b c : words "a b c" ;
+
+ assert.true is-whitespace " " ;
+ assert.false is-whitespace " a b c " ;
+ assert.true is-whitespace "" ;
+ assert.true is-whitespace ;
+}
diff --git a/src/kenlm/jam-files/boost-build/util/utility.jam b/src/kenlm/jam-files/boost-build/util/utility.jam
new file mode 100644
index 0000000..26981c5
--- /dev/null
+++ b/src/kenlm/jam-files/boost-build/util/utility.jam
@@ -0,0 +1,235 @@
+# Copyright 2001, 2002 Dave Abrahams
+# Copyright 2002, 2003, 2004, 2005 Vladimir Prus
+# Copyright 2008 Jurko Gospodnetic
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+import "class" : is-instance ;
+
+
+# For all elements of 'list' which do not already have 'suffix', add 'suffix'.
+#
+rule apply-default-suffix ( suffix : list * )
+{
+ local result ;
+ for local i in $(list)
+ {
+ if $(i:S) = $(suffix)
+ {
+ result += $(i) ;
+ }
+ else
+ {
+ result += $(i)$(suffix) ;
+ }
+ }
+ return $(result) ;
+}
+
+
+# If 'name' contains a dot, returns the part before the last dot. If 'name'
+# contains no dot, returns it unmodified.
+#
+rule basename ( name )
+{
+ if $(name:S)
+ {
+ name = $(name:B) ;
+ }
+ return $(name) ;
+}
+
+
+# Return the file of the caller of the rule that called caller-file.
+#
+rule caller-file ( )
+{
+ local bt = [ BACKTRACE ] ;
+ return $(bt[9]) ;
+}
+
+
+# Tests if 'a' is equal to 'b'. If 'a' is a class instance, calls its 'equal'
+# method. Uses ordinary jam's comparison otherwise.
+#
+rule equal ( a b )
+{
+ if [ is-instance $(a) ]
+ {
+ return [ $(a).equal $(b) ] ;
+ }
+ else
+ {
+ if $(a) = $(b)
+ {
+ return true ;
+ }
+ }
+}
+
+
+# Tests if 'a' is less than 'b'. If 'a' is a class instance, calls its 'less'
+# method. Uses ordinary jam's comparison otherwise.
+#
+rule less ( a b )
+{
+ if [ is-instance $(a) ]
+ {
+ return [ $(a).less $(b) ] ;
+ }
+ else
+ {
+ if $(a) < $(b)
+ {
+ return true ;
+ }
+ }
+}
+
+
+# Returns the textual representation of argument. If it is a class instance,
+# class its 'str' method. Otherwise, returns the argument.
+#
+rule str ( value )
+{
+ if [ is-instance $(value) ]
+ {
+ return [ $(value).str ] ;
+ }
+ else
+ {
+ return $(value) ;
+ }
+}
+
+
+# Accepts a list of gristed values and returns them ungristed. Reports an error
+# in case any of the passed parameters is not gristed, i.e. surrounded in angle
+# brackets < and >.
+#
+rule ungrist ( names * )
+{
+ local result ;
+ for local name in $(names)
+ {
+ local stripped = [ MATCH ^<(.*)>$ : $(name) ] ;
+ if ! $(stripped)-defined
+ {
+ import errors ;
+ local quoted-names = \"$(names)\" ;
+ errors.error "in" ungrist $(quoted-names:J=" "): \"$(name)\" is not
+ of the form <.*> ;
+ }
+ result += $(stripped) ;
+ }
+ return $(result) ;
+}
+
+
+# If the passed value is quoted, unquotes it. Otherwise returns the value
+# unchanged.
+#
+rule unquote ( value ? )
+{
+ local match-result = [ MATCH ^(\")(.*)(\")$ : $(value) ] ;
+ if $(match-result)
+ {
+ return $(match-result[2]) ;
+ }
+ else
+ {
+ return $(value) ;
+ }
+}
+
+
+rule __test__ ( )
+{
+ import assert ;
+ import "class" : new ;
+ import errors : try catch ;
+
+ assert.result 123 : str 123 ;
+
+ class test-class__
+ {
+ rule __init__ ( ) { }
+ rule str ( ) { return "str-test-class" ; }
+ rule less ( a ) { return "yes, of course!" ; }
+ rule equal ( a ) { return "not sure" ; }
+ }
+
+ assert.result "str-test-class" : str [ new test-class__ ] ;
+ assert.true less 1 2 ;
+ assert.false less 2 1 ;
+ assert.result "yes, of course!" : less [ new test-class__ ] 1 ;
+ assert.true equal 1 1 ;
+ assert.false equal 1 2 ;
+ assert.result "not sure" : equal [ new test-class__ ] 1 ;
+
+ assert.result foo.lib foo.lib : apply-default-suffix .lib : foo.lib foo.lib
+ ;
+
+ assert.result foo : basename foo ;
+ assert.result foo : basename foo.so ;
+ assert.result foo.so : basename foo.so.1 ;
+
+ assert.result : unquote ;
+ assert.result "" : unquote "" ;
+ assert.result "" : unquote \"\" ;
+ assert.result \" : unquote \"\"\" ;
+ assert.result \"\" : unquote \"\"\"\" ;
+ assert.result foo : unquote foo ;
+ assert.result \"foo : unquote \"foo ;
+ assert.result foo\" : unquote foo\" ;
+ assert.result foo : unquote \"foo\" ;
+ assert.result \"foo\" : unquote \"\"foo\"\" ;
+
+ assert.result : ungrist ;
+ assert.result "" : ungrist <> ;
+ assert.result foo : ungrist <foo> ;
+ assert.result <foo> : ungrist <<foo>> ;
+ assert.result foo bar : ungrist <foo> <bar> ;
+
+ try ;
+ {
+ ungrist "" ;
+ }
+ catch "in" ungrist \"\": \"\" is not of the form <.*> ;
+
+ try ;
+ {
+ ungrist foo ;
+ }
+ catch "in" ungrist \"foo\": \"foo\" is not of the form <.*> ;
+
+ try ;
+ {
+ ungrist <foo ;
+ }
+ catch "in" ungrist \"<foo\": \"<foo\" is not of the form <.*> ;
+
+ try ;
+ {
+ ungrist foo> ;
+ }
+ catch "in" ungrist \"foo>\": \"foo>\" is not of the form <.*> ;
+
+ try ;
+ {
+ ungrist foo bar ;
+ }
+ catch "in" ungrist "\"foo\" \"bar\"": \"foo\" is not of the form <.*> ;
+
+ try ;
+ {
+ ungrist foo <bar> ;
+ }
+ catch "in" ungrist "\"foo\" \"<bar>\"": \"foo\" is not of the form <.*> ;
+
+ try ;
+ {
+ ungrist <foo> bar ;
+ }
+ catch "in" ungrist "\"<foo>\" \"bar\"": \"bar\" is not of the form <.*> ;
+}
diff --git a/src/kenlm/jam-files/empty_test_main.cc b/src/kenlm/jam-files/empty_test_main.cc
new file mode 100644
index 0000000..a3971f8
--- /dev/null
+++ b/src/kenlm/jam-files/empty_test_main.cc
@@ -0,0 +1,8 @@
+/* Program to verify compilation against the unit test framework */
+
+#define BOOST_TEST_MODULE EmptyTest
+#include <boost/test/unit_test.hpp>
+
+namespace {
+BOOST_AUTO_TEST_CASE(Empty) {}
+} // namespace
diff --git a/src/kenlm/jam-files/engine/Jambase b/src/kenlm/jam-files/engine/Jambase
new file mode 100644
index 0000000..94f8fbd
--- /dev/null
+++ b/src/kenlm/jam-files/engine/Jambase
@@ -0,0 +1,2473 @@
+#
+# /+\
+# +\ Copyright 1993, 2000 Christopher Seiwald.
+# \+/
+#
+# This file is part of Jam - see jam.c for Copyright information.
+#
+
+# This file is ALSO:
+# Copyright 2001-2004 David Abrahams.
+# Copyright 2002-2004 Rene Rivera.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+if $(NT)
+{
+ SLASH ?= \\ ;
+}
+SLASH ?= / ;
+
+
+# Glob for patterns in the directories starting from the given start directory,
+# up to and including the root of the file-system. We stop globbing as soon as
+# we find at least one match.
+#
+rule find-to-root ( dir : patterns + )
+{
+ local globs = [ GLOB $(dir) : $(patterns) ] ;
+ while ! $(globs) && $(dir:P) != $(dir)
+ {
+ dir = $(dir:P) ;
+ globs = [ GLOB $(dir) : $(patterns) ] ;
+ }
+ return $(globs) ;
+}
+
+
+# This global will hold the location of the user's boost-build.jam file.
+.boost-build-file = ;
+
+# This global will hold the location of the build system bootstrap file.
+.bootstrap-file = ;
+
+# Remember the value of $(BOOST_BUILD_PATH) supplied to us by the user.
+BOOST_BUILD_PATH.user-value = $(BOOST_BUILD_PATH) ;
+
+# On Unix only, when BOOST_BUILD_PATH is not supplied by the user, set it to a
+# sensible default value. This allows Boost.Build to work without any
+# environment variables, which is good in itself and also required by the Debian
+# Policy.
+if ! $(BOOST_BUILD_PATH) && $(UNIX)
+{
+ BOOST_BUILD_PATH = /usr/share/boost-build ;
+}
+
+
+rule _poke ( module-name ? : variables + : value * )
+{
+ module $(<)
+ {
+ $(>) = $(3) ;
+ }
+}
+
+
+# This rule can be invoked from an optional user's boost-build.jam file to both
+# indicate where to find the build system files, and to load them. The path
+# indicated is relative to the location of the boost-build.jam file.
+#
+rule boost-build ( dir ? )
+{
+ if $(.bootstrap-file)
+ {
+ ECHO "Error: Illegal attempt to re-bootstrap the build system by invoking" ;
+ ECHO ;
+ ECHO " 'boost-build" $(dir) ";'" ;
+ ECHO ;
+ EXIT "Please consult the documentation at 'http://www.boost.org'." ;
+ }
+
+ # Add the given directory to the path so we can find the build system. If
+ # dir is empty, has no effect.
+ BOOST_BUILD_PATH = $(dir:R=$(.boost-build-file:D)) $(BOOST_BUILD_PATH) ;
+
+ # We might have just modified the *global* value of BOOST_BUILD_PATH. The
+ # code that loads the rest of Boost.Build, in particular the site-config.jam
+ # and user-config.jam configuration files uses os.environ, so we need to
+ # update the value there.
+ _poke .ENVIRON : BOOST_BUILD_PATH : $(BOOST_BUILD_PATH) ;
+
+ # Try to find the build system bootstrap file 'bootstrap.jam'.
+ local bootstrap-file = [ GLOB $(BOOST_BUILD_PATH) : bootstrap.jam ] ;
+ .bootstrap-file = $(bootstrap-file[1]) ;
+
+ # There is no bootstrap.jam we can find, exit with an error.
+ if ! $(.bootstrap-file)
+ {
+ ECHO "Unable to load Boost.Build: could not find build system." ;
+ ECHO --------------------------------------------------------- ;
+ ECHO "$(.boost-build-file) attempted to load the build system by invoking" ;
+ ECHO ;
+ ECHO " 'boost-build" $(dir) ";'" ;
+ ECHO ;
+ ECHO "but we were unable to find \"bootstrap.jam\" in the specified directory" ;
+ ECHO "or in BOOST_BUILD_PATH (searching "$(BOOST_BUILD_PATH:J=", ")")." ;
+ ECHO ;
+ EXIT "Please consult the documentation at 'http://www.boost.org'." ;
+ }
+
+ if [ MATCH .*(--debug-configuration).* : $(ARGV) ]
+ {
+ ECHO "notice: loading Boost.Build from"
+ [ NORMALIZE_PATH $(.bootstrap-file:D) ] ;
+ }
+
+ # Load the build system, now that we know where to start from.
+ include $(.bootstrap-file) ;
+}
+
+
+if [ MATCH .*(b2).* : $(ARGV[1]:BL) ]
+ || [ MATCH .*(bjam).* : $(ARGV[1]:BL) ]
+ || $(BOOST_ROOT) # A temporary measure so Jam works with Boost.Build v1.
+{
+ # We attempt to load "boost-build.jam" by searching from the current
+ # invocation directory up to the root of the file-system.
+ #
+ # boost-build.jam is expected to invoke the "boost-build" rule to load the
+ # Boost.Build files.
+
+ local search-path = $(BOOST_BUILD_PATH) $(BOOST_ROOT) ;
+ local self = [ SELF_PATH ] ;
+ local boost-build-relative = ../../share/boost-build ;
+ local self-based-path = [ NORMALIZE_PATH $(boost-build-relative:R=$(self)) ] ;
+
+ local boost-build-files =
+ [ find-to-root [ PWD ] : boost-build.jam ]
+ [ GLOB $(self-based-path) : boost-build.jam ]
+ # Another temporary measure so Jam works with Boost.Build v1.
+ [ GLOB $(search-path) : boost-build.jam ] ;
+
+ .boost-build-file = $(boost-build-files[1]) ;
+
+ # There is no boost-build.jam we can find, exit with an error, and
+ # information.
+ if ! $(.boost-build-file)
+ {
+ ECHO "Unable to load Boost.Build: could not find \"boost-build.jam\"" ;
+ ECHO --------------------------------------------------------------- ;
+
+ if ! [ MATCH .*(bjam).* : $(ARGV[1]:BL) ]
+ {
+ ECHO "BOOST_ROOT must be set, either in the environment, or " ;
+ ECHO "on the command-line with -sBOOST_ROOT=..., to the root" ;
+ ECHO "of the boost installation." ;
+ ECHO ;
+ }
+
+ ECHO "Attempted search from" [ PWD ] "up to the root" ;
+ ECHO "at" $(self-based-path) ;
+ ECHO "and in these directories from BOOST_BUILD_PATH and BOOST_ROOT: "$(search-path:J=", ")"." ;
+ EXIT "Please consult the documentation at 'http://www.boost.org'." ;
+ }
+
+ if [ MATCH .*(--debug-configuration).* : $(ARGV) ]
+ {
+ ECHO "notice: found boost-build.jam at"
+ [ NORMALIZE_PATH $(.boost-build-file) ] ;
+ }
+
+ # Now load the boost-build.jam to get the build system loaded. This
+ # incidentaly loads the users jamfile and attempts to build targets.
+ #
+ # We also set it up so we can tell whether we are loading the new V2 system
+ # or the the old V1 system.
+ include $(.boost-build-file) ;
+
+ # Check that, at minimum, the bootstrap file was found.
+ if ! $(.bootstrap-file)
+ {
+ ECHO "Unable to load Boost.Build" ;
+ ECHO -------------------------- ;
+ ECHO "\"$(.boost-build-file)\" was found by searching from" [ PWD ] "up to the root" ;
+ ECHO "and in these directories from BOOST_BUILD_PATH and BOOST_ROOT: "$(search-path:J=", ")"." ;
+ ECHO ;
+ ECHO "However, it failed to call the \"boost-build\" rule to indicate" ;
+ ECHO "the location of the build system." ;
+ ECHO ;
+ EXIT "Please consult the documentation at 'http://www.boost.org'." ;
+ }
+}
+else
+{
+
+#
+# JAMBASE - jam 2.3 ruleset providing make(1)-like functionality
+#
+# Supports UNIX, NT, and VMS.
+#
+# 12/27/93 (seiwald) - purturb library sources with SOURCE_GRIST
+# 04/18/94 (seiwald) - use '?=' when setting OS specific vars
+# 04/21/94 (seiwald) - do RmTemps together
+# 05/05/94 (seiwald) - all supported C compilers support -o: relegate
+# RELOCATE as an option; set Ranlib to "" to disable it
+# 06/01/94 (seiwald) - new 'actions existing' to do existing sources
+# 08/25/94 (seiwald) - new ObjectCcFlags rule to append to per-target CCFLAGS
+# 08/29/94 (seiwald) - new ObjectHdrs rule to append to per-target HDRS
+# 09/19/94 (seiwald) - LinkLibraries and Undefs now append
+# - Rule names downshifted.
+# 10/06/94 (seiwald) - Dumb yyacc stuff moved into Jamfile.
+# 10/14/94 (seiwald) - (Crude) support for .s, .C, .cc, .cpp, and .f files.
+# 01/08/95 (seiwald) - Shell now handled with awk, not sed
+# 01/09/95 (seiwald) - Install* now take dest directory as target
+# 01/10/95 (seiwald) - All entries sorted.
+# 01/10/95 (seiwald) - NT support moved in, with LauraW's help.
+# 01/10/95 (seiwald) - VMS support moved in.
+# 02/06/95 (seiwald) - ObjectC++Flags and SubDirC++Flags added.
+# 02/07/95 (seiwald) - Iron out when HDRSEARCH uses "" or SEARCH_SOURCE.
+# 02/08/95 (seiwald) - SubDir works on VMS.
+# 02/14/95 (seiwald) - MkDir and entourage.
+# 04/30/95 (seiwald) - Use install -c flag so that it copies, not moves.
+# 07/10/95 (taylor) - Support for Microsoft C++.
+# 11/21/96 (peterk) - Support for BeOS
+# 07/19/99 (sickel) - Support for Mac OS X Server (and maybe client)
+# 02/18/00 (belmonte)- Support for Cygwin.
+
+# Special targets defined in this file:
+#
+# all - parent of first, shell, files, lib, exe
+# first - first dependency of 'all', for potential initialization
+# shell - parent of all Shell targets
+# files - parent of all File targets
+# lib - parent of all Library targets
+# exe - parent of all Main targets
+# dirs - parent of all MkDir targets
+# clean - removes all Shell, File, Library, and Main targets
+# uninstall - removes all Install targets
+#
+
+# Rules defined by this file:
+#
+# as obj.o : source.s ; .s -> .o
+# Bulk dir : files ; populate directory with many files
+# Cc obj.o : source.c ; .c -> .o
+# C++ obj.o : source.cc ; .cc -> .o
+# Clean clean : sources ; remove sources with 'jam clean'
+# File dest : source ; copy file
+# Fortran obj.o : source.f ; .f -> .o
+# GenFile source.c : program args ; make custom file
+# Hardlink target : source ; make link from source to target
+# HdrRule source : headers ; handle #includes
+# InstallInto dir : sources ; install any files
+# InstallBin dir : sources ; install binaries
+# InstallLib dir : sources ; install files
+# InstallFile dir : sources ; install files
+# InstallMan dir : sources ; install man pages
+# InstallShell dir : sources ; install shell scripts
+# Lex source.c : source.l ; .l -> .c
+# Library lib : source ; archive library from compiled sources
+# LibraryFromObjects lib : objects ; archive library from objects
+# LinkLibraries images : libraries ; bag libraries onto Mains
+# Main image : source ; link executable from compiled sources
+# MainFromObjects image : objects ; link executable from objects
+# MkDir dir ; make a directory, if not there
+# Object object : source ; compile object from source
+# ObjectCcFlags source : flags ; add compiler flags for object
+# ObjectC++Flags source : flags ; add compiler flags for object
+# ObjectHdrs source : dirs ; add include directories for object
+# Objects sources ; compile sources
+# RmTemps target : sources ; remove temp sources after target made
+# Setuid images ; mark executables Setuid
+# SubDir TOP d1 d2 ... ; start a subdirectory Jamfile
+# SubDirCcFlags flags ; add compiler flags until next SubDir
+# SubDirC++Flags flags ; add compiler flags until next SubDir
+# SubDirHdrs dirs ; add include dirs until next SubDir
+# SubInclude TOP d1 d2 ... ; include a subdirectory Jamfile
+# Shell exe : source ; make a shell executable
+# Undefines images : symbols ; save undef's for linking
+# UserObject object : source ; handle unknown suffixes for Object
+# Yacc source.c : source.y ; .y -> .c
+#
+# Utility rules that have no side effects (not supported):
+#
+# FAppendSuffix f1 f2 ... : $(SUF) ; return $(<) with suffixes
+# FConcat value ... ; return contatenated values
+# FDirName d1 d2 ... ; return path from root to dir
+# FGrist d1 d2 ... ; return d1!d2!...
+# FGristFiles value ; return $(value:G=$(SOURCE_GRIST))
+# FGristSourceFiles value ; return $(value:G=$(SOURCE_GRIST))
+# FRelPath d1 : d2 ; return rel path from d1 to d2
+# FSubDir d1 d2 ... ; return path to root
+#
+
+
+# Brief review of the jam language:
+#
+# Statements:
+# rule RULE - statements to process a rule
+# actions RULE - system commands to carry out target update
+#
+# Modifiers on actions:
+# together - multiple instances of same rule on target get executed
+# once with their sources ($(>)) concatenated
+# updated - refers to updated sources ($(>)) only
+# ignore - ignore return status of command
+# quietly - don't trace its execution unless verbose
+# piecemeal - iterate command each time with a small subset of $(>)
+# existing - refers to currently existing sources ($(>)) only
+# bind vars - subject to binding before expanding in actions
+#
+# Special rules:
+# ALWAYS - always build a target
+# DEPENDS - builds the dependency graph
+# ECHO - blurt out targets on stdout
+# EXIT - blurt out targets and exit
+# INCLUDES - marks sources as headers for target (a codependency)
+# NOCARE - don't panic if the target can't be built
+# NOUPDATE - create the target if needed but never update it
+# NOTFILE - ignore the timestamp of the target (it's not a file)
+# TEMPORARY - target need not be present if sources haven't changed
+#
+# Special variables set by jam:
+# $(<) - targets of a rule (to the left of the :)
+# $(>) - sources of a rule (to the right of the :)
+# $(xxx) - true on xxx (UNIX, VMS, NT, OS2, MAC)
+# $(OS) - name of OS - varies wildly
+# $(JAMVERSION) - version number (2.3)
+#
+# Special variables used by jam:
+# SEARCH - where to find something (used during binding and actions)
+# LOCATE - where to plop something not found with SEARCH
+# HDRRULE - rule to call to handle include files
+# HDRSCAN - egrep regex to extract include files
+#
+# Special targets:
+# all - default if none given on command line
+#
+
+# Initialize variables
+#
+
+#
+# OS specific variable settings
+#
+if $(NT)
+{
+ # the list of supported toolsets on Windows NT and Windows 95/98
+ #
+ local SUPPORTED_TOOLSETS = "BORLANDC" "VC7" "VISUALC" "VISUALC16" "INTELC" "WATCOM"
+ "MINGW" "LCC" ;
+
+ # this variable holds the current toolset
+ #
+ TOOLSET = "" ;
+
+ # if the JAM_TOOLSET environment variable is defined, check that it is
+ # one of our supported values
+ #
+ if $(JAM_TOOLSET)
+ {
+ local t ;
+
+ for t in $(SUPPORTED_TOOLSETS)
+ {
+ $(t) = $($(t):J=" ") ; # reconstitute paths with spaces in them
+ if $(t) = $(JAM_TOOLSET) { TOOLSET = $(t) ; }
+ }
+
+ if ! $(TOOLSET)
+ {
+ ECHO "The JAM_TOOLSET environment variable is defined but its value" ;
+ ECHO "is invalid, please use one of the following:" ;
+ ECHO ;
+
+ for t in $(SUPPORTED_TOOLSETS) { ECHO " " $(t) ; }
+ EXIT ;
+ }
+ }
+
+ # if TOOLSET is empty, we'll try to detect the toolset from other
+ # environment variables to remain backwards compatible with Jam 2.3
+ #
+ if ! $(TOOLSET)
+ {
+ if $(BCCROOT)
+ {
+ TOOLSET = BORLANDC ;
+ BORLANDC = $(BCCROOT:J=" ") ;
+ }
+ else if $(MSVC)
+ {
+ TOOLSET = VISUALC16 ;
+ VISUALC16 = $(MSVC:J=" ") ;
+ }
+ else if $(MSVCNT)
+ {
+ TOOLSET = VISUALC ;
+ VISUALC = $(MSVCNT:J=" ") ;
+ }
+ else if $(MSVCDir)
+ {
+ TOOLSET = VISUALC ;
+ VISUALC = $(MSVCDir:J=" ") ;
+ }
+ else if $(MINGW)
+ {
+ TOOLSET = MINGW ;
+ }
+ else
+ {
+ ECHO "Jam cannot be run because, either:" ;
+ ECHO " a. You didn't set BOOST_ROOT to indicate the root of your" ;
+ ECHO " Boost installation." ;
+ ECHO " b. You are trying to use stock Jam but didn't indicate which" ;
+ ECHO " compilation toolset to use. To do so, follow these simple" ;
+ ECHO " instructions:" ;
+ ECHO ;
+ ECHO " - define one of the following environment variable, with the" ;
+ ECHO " appropriate value according to this list:" ;
+ ECHO ;
+ ECHO " Variable Toolset Description" ;
+ ECHO ;
+ ECHO " BORLANDC Borland C++ BC++ install path" ;
+ ECHO " VISUALC Microsoft Visual C++ VC++ install path" ;
+ ECHO " VISUALC16 Microsoft Visual C++ 16 bit VC++ 16 bit install" ;
+ ECHO " INTELC Intel C/C++ IC++ install path" ;
+ ECHO " WATCOM Watcom C/C++ Watcom install path" ;
+ ECHO " MINGW MinGW (gcc) MinGW install path" ;
+ ECHO " LCC Win32-LCC LCC-Win32 install path" ;
+ ECHO ;
+ ECHO " - define the JAM_TOOLSET environment variable with the *name*" ;
+ ECHO " of the toolset variable you want to use." ;
+ ECHO ;
+ ECHO " e.g.: set VISUALC=C:\\Visual6" ;
+ ECHO " set JAM_TOOLSET=VISUALC" ;
+ EXIT ;
+ }
+ }
+
+ CP ?= copy ;
+ RM ?= del /f/q ;
+ SLASH ?= \\ ;
+ SUFLIB ?= .lib ;
+ SUFOBJ ?= .obj ;
+ SUFEXE ?= .exe ;
+
+ if $(TOOLSET) = BORLANDC
+ {
+ ECHO "Compiler is Borland C++" ;
+
+ AR ?= tlib /C /P64 ;
+ CC ?= bcc32 ;
+ CCFLAGS ?= -q -y -d -v -w-par -w-ccc -w-rch -w-pro -w-aus ;
+ C++ ?= bcc32 ;
+ C++FLAGS ?= -q -y -d -v -w-par -w-ccc -w-rch -w-pro -w-aus -P ;
+ LINK ?= $(CC) ;
+ LINKFLAGS ?= $(CCFLAGS) ;
+ STDLIBPATH ?= $(BORLANDC)\\lib ;
+ STDHDRS ?= $(BORLANDC)\\include ;
+ NOARSCAN ?= true ;
+ }
+ else if $(TOOLSET) = VISUALC16
+ {
+ ECHO "Compiler is Microsoft Visual C++ 16 bit" ;
+
+ AR ?= lib /nologo ;
+ CC ?= cl /nologo ;
+ CCFLAGS ?= /D \"WIN\" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= $(CC) ;
+ LINKFLAGS ?= $(CCFLAGS) ;
+ LINKLIBS ?=
+ \"$(VISUALC16)\\lib\\mlibce.lib\"
+ \"$(VISUALC16)\\lib\\oldnames.lib\"
+ ;
+ LINKLIBS ?= ;
+ NOARSCAN ?= true ;
+ OPTIM ?= "" ;
+ STDHDRS ?= $(VISUALC16)\\include ;
+ UNDEFFLAG ?= "/u _" ;
+ }
+ else if $(TOOLSET) = VISUALC
+ {
+ ECHO "Compiler is Microsoft Visual C++" ;
+
+ AR ?= lib ;
+ AS ?= masm386 ;
+ CC ?= cl /nologo ;
+ CCFLAGS ?= "" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= link /nologo ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= \"$(VISUALC)\\lib\\advapi32.lib\"
+ # $(VISUALC)\\lib\\libc.lib
+ # $(VISUALC)\\lib\\oldnames.lib
+ \"$(VISUALC)\\lib\\gdi32.lib\"
+ \"$(VISUALC)\\lib\\user32.lib\"
+ \"$(VISUALC)\\lib\\kernel32.lib\" ;
+ OPTIM ?= "" ;
+ STDHDRS ?= $(VISUALC)\\include ;
+ UNDEFFLAG ?= "/u _" ;
+ }
+ else if $(TOOLSET) = VC7
+ {
+ ECHO "Compiler is Microsoft Visual C++ .NET" ;
+
+ AR ?= lib ;
+ AS ?= masm386 ;
+ CC ?= cl /nologo ;
+ CCFLAGS ?= "" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= link /nologo ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= \"$(VISUALC)\\PlatformSDK\\lib\\advapi32.lib\"
+ # $(VISUALC)\\lib\\libc.lib
+ # $(VISUALC)\\lib\\oldnames.lib
+ \"$(VISUALC)\\PlatformSDK\\lib\\gdi32.lib\"
+ \"$(VISUALC)\\PlatformSDK\\lib\\user32.lib\"
+ \"$(VISUALC)\\PlatformSDK\\lib\\kernel32.lib\" ;
+ OPTIM ?= "" ;
+ STDHDRS ?= \"$(VISUALC)\\include\"
+ \"$(VISUALC)\\PlatformSDK\\include\" ;
+ UNDEFFLAG ?= "/u _" ;
+ }
+ else if $(TOOLSET) = INTELC
+ {
+ ECHO "Compiler is Intel C/C++" ;
+
+ if ! $(VISUALC)
+ {
+ ECHO "As a special exception, when using the Intel C++ compiler, you need" ;
+ ECHO "to define the VISUALC environment variable to indicate the location" ;
+ ECHO "of your Visual C++ installation. Aborting.." ;
+ EXIT ;
+ }
+
+ AR ?= lib ;
+ AS ?= masm386 ;
+ CC ?= icl /nologo ;
+ CCFLAGS ?= "" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= link /nologo ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= $(VISUALC)\\lib\\advapi32.lib
+ # $(VISUALC)\\lib\\libc.lib
+ # $(VISUALC)\\lib\\oldnames.lib
+ $(VISUALC)\\lib\\kernel32.lib
+ ;
+ OPTIM ?= "" ;
+ STDHDRS ?= $(INTELC)\include $(VISUALC)\\include ;
+ UNDEFFLAG ?= "/u _" ;
+ }
+ else if $(TOOLSET) = WATCOM
+ {
+ ECHO "Compiler is Watcom C/C++" ;
+
+ AR ?= wlib ;
+ CC ?= wcc386 ;
+ CCFLAGS ?= /zq /DWIN32 /I$(WATCOM)\\h ; # zq=quiet
+ C++ ?= wpp386 ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ CP ?= copy ;
+ DOT ?= . ;
+ DOTDOT ?= .. ;
+ LINK ?= wcl386 ;
+ LINKFLAGS ?= /zq ; # zq=quiet
+ LINKLIBS ?= ;
+ MV ?= move ;
+ NOARSCAN ?= true ;
+ OPTIM ?= ;
+ RM ?= del /f ;
+ SLASH ?= \\ ;
+ STDHDRS ?= $(WATCOM)\\h $(WATCOM)\\h\\nt ;
+ SUFEXE ?= .exe ;
+ SUFLIB ?= .lib ;
+ SUFOBJ ?= .obj ;
+ UNDEFFLAG ?= "/u _" ;
+ }
+ else if $(TOOLSET) = MINGW
+ {
+ ECHO "Compiler is GCC with Mingw" ;
+
+ AR ?= ar -ru ;
+ CC ?= gcc ;
+ CCFLAGS ?= "" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= $(CC) ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= "" ;
+ OPTIM ?= ;
+ SUFOBJ = .o ;
+ SUFLIB = .a ;
+ SLASH = / ;
+# NOARSCAN ?= true ;
+ }
+ else if $(TOOLSET) = LCC
+ {
+ ECHO "Compiler is Win32-LCC" ;
+
+ AR ?= lcclib ;
+ CC ?= lcc ;
+ CCFLAGS ?= "" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= lcclnk ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= "" ;
+ OPTIM ?= ;
+ NOARSCAN = true ;
+ }
+ else
+ {
+#
+# XXX: We need better comments here !!
+#
+ EXIT On NT, set BCCROOT, MSVCNT, MINGW or MSVC to the root of the
+ Borland or Microsoft directories. ;
+ }
+
+}
+else if $(OS2)
+{
+ # the list of supported toolsets on Windows NT and Windows 95/98
+ #
+ local SUPPORTED_TOOLSETS = "EMX" "WATCOM" ;
+
+ # this variable holds the current toolset
+ #
+ TOOLSET = "" ;
+
+ # if the JAM_TOOLSET environment variable is defined, check that it is
+ # one of our supported values
+ #
+ if $(JAM_TOOLSET)
+ {
+ local t ;
+
+ for t in $(SUPPORTED_TOOLSETS)
+ {
+ $(t) = $($(t):J=" ") ; # reconstitute paths with spaces in them
+ if $(t) = $(JAM_TOOLSET) { TOOLSET = $(t) ; }
+ }
+
+ if ! $(TOOLSET)
+ {
+ ECHO "The JAM_TOOLSET environment variable is defined but its value" ;
+ ECHO "is invalid, please use one of the following:" ;
+ ECHO ;
+
+ for t in $(SUPPORTED_TOOLSETS) { ECHO " " $(t) ; }
+ EXIT ;
+ }
+ }
+
+ # if TOOLSET is empty, we'll try to detect the toolset from other
+ # environment variables to remain backwards compatible with Jam 2.3
+ #
+ if ! $(TOOLSET)
+ {
+ if $(watcom)
+ {
+ WATCOM = $(watcom:J=" ") ;
+ TOOLSET = WATCOM ;
+ }
+ else
+ {
+ ECHO "Jam cannot be run because you didn't indicate which compilation toolset" ;
+ ECHO "to use. To do so, follow these simple instructions:" ;
+ ECHO ;
+ ECHO " - define one of the following environment variable, with the" ;
+ ECHO " appropriate value according to this list:" ;
+ ECHO ;
+ ECHO " Variable Toolset Description" ;
+ ECHO ;
+ ECHO " WATCOM Watcom C/C++ Watcom install path" ;
+ ECHO " EMX EMX (gcc) EMX install path" ;
+ ECHO " VISUALAGE IBM Visual Age C/C++ VisualAge install path" ;
+ ECHO ;
+ ECHO " - define the JAM_TOOLSET environment variable with the *name*" ;
+ ECHO " of the toolset variable you want to use." ;
+ ECHO ;
+ ECHO " e.g.: set WATCOM=C:\WATCOM" ;
+ ECHO " set JAM_TOOLSET=WATCOM" ;
+ ECHO ;
+ EXIT ;
+ }
+ }
+
+ RM = del /f ;
+ CP = copy ;
+ MV ?= move ;
+ DOT ?= . ;
+ DOTDOT ?= .. ;
+ SUFLIB ?= .lib ;
+ SUFOBJ ?= .obj ;
+ SUFEXE ?= .exe ;
+
+ if $(TOOLSET) = WATCOM
+ {
+ AR ?= wlib ;
+ BINDIR ?= \\os2\\apps ;
+ CC ?= wcc386 ;
+ CCFLAGS ?= /zq /DOS2 /I$(WATCOM)\\h ; # zq=quiet
+ C++ ?= wpp386 ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= wcl386 ;
+ LINKFLAGS ?= /zq ; # zq=quiet
+ LINKLIBS ?= ;
+ NOARSCAN ?= true ;
+ OPTIM ?= ;
+ SLASH ?= \\ ;
+ STDHDRS ?= $(WATCOM)\\h ;
+ UNDEFFLAG ?= "/u _" ;
+ }
+ else if $(TOOLSET) = EMX
+ {
+ ECHO "Compiler is GCC-EMX" ;
+ AR ?= ar -ru ;
+ CC ?= gcc ;
+ CCFLAGS ?= "" ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ LINK ?= $(CC) ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= "" ;
+ OPTIM ?= ;
+ SUFOBJ = .o ;
+ SUFLIB = .a ;
+ UNDEFFLAG ?= "-U" ;
+ SLASH = / ;
+# NOARSCAN ?= true ;
+ }
+ else
+ {
+ # should never happen
+ EXIT "Sorry, but the $(JAM_TOOLSET) toolset isn't supported for now" ;
+ }
+}
+else if $(VMS)
+{
+ C++ ?= cxx ;
+ C++FLAGS ?= ;
+ CC ?= cc ;
+ CCFLAGS ?= ;
+ CHMOD ?= set file/prot= ;
+ CP ?= copy/replace ;
+ CRELIB ?= true ;
+ DOT ?= [] ;
+ DOTDOT ?= [-] ;
+ EXEMODE ?= (w:e) ;
+ FILEMODE ?= (w:r) ;
+ HDRS ?= ;
+ LINK ?= link ;
+ LINKFLAGS ?= "" ;
+ LINKLIBS ?= ;
+ MKDIR ?= create/dir ;
+ MV ?= rename ;
+ OPTIM ?= "" ;
+ RM ?= delete ;
+ RUNVMS ?= mcr ;
+ SHELLMODE ?= (w:er) ;
+ SLASH ?= . ;
+ STDHDRS ?= decc$library_include ;
+ SUFEXE ?= .exe ;
+ SUFLIB ?= .olb ;
+ SUFOBJ ?= .obj ;
+
+ switch $(OS)
+ {
+ case OPENVMS : CCFLAGS ?= /stand=vaxc ;
+ case VMS : LINKLIBS ?= sys$library:vaxcrtl.olb/lib ;
+ }
+}
+else if $(MAC)
+{
+ local OPT ;
+
+ CW ?= "{CW}" ;
+
+ MACHDRS ?=
+ "$(UMACHDRS):Universal:Interfaces:CIncludes"
+ "$(CW):MSL:MSL_C:MSL_Common:Include"
+ "$(CW):MSL:MSL_C:MSL_MacOS:Include" ;
+
+ MACLIBS ?=
+ "$(CW):MacOS Support:Universal:Libraries:StubLibraries:Interfacelib"
+ "$(CW):MacOS Support:Universal:Libraries:StubLibraries:Mathlib" ;
+
+ MPWLIBS ?=
+ "$(CW):MacOS Support:Libraries:Runtime:Runtime PPC:MSL MPWCRuntime.lib"
+ "$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL C.PPC MPW.Lib" ;
+
+ MPWNLLIBS ?=
+ "$(CW):MacOS Support:Libraries:Runtime:Runtime PPC:MSL MPWCRuntime.lib"
+ "$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL C.PPC MPW(NL).Lib" ;
+
+ SIOUXHDRS ?= ;
+
+ SIOUXLIBS ?=
+ "$(CW):MacOS Support:Libraries:Runtime:Runtime PPC:MSL RuntimePPC.lib"
+ "$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL SIOUX.PPC.Lib"
+ "$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL C.PPC.Lib" ;
+
+ C++ ?= mwcppc ;
+ C++FLAGS ?= -w off -nomapcr ;
+ CC ?= mwcppc ;
+ CCFLAGS ?= -w off -nomapcr ;
+ CP ?= duplicate -y ;
+ DOT ?= ":" ;
+ DOTDOT ?= "::" ;
+ HDRS ?= $(MACHDRS) $(MPWHDRS) ;
+ LINK ?= mwlinkppc ;
+ LINKFLAGS ?= -mpwtool -warn ;
+ LINKLIBS ?= $(MACLIBS) $(MPWLIBS) ;
+ MKDIR ?= newfolder ;
+ MV ?= rename -y ;
+ NOARSCAN ?= true ;
+ OPTIM ?= ;
+ RM ?= delete -y ;
+ SLASH ?= ":" ;
+ STDHDRS ?= ;
+ SUFLIB ?= .lib ;
+ SUFOBJ ?= .o ;
+}
+else if $(OS) = BEOS && $(METROWERKS)
+{
+ AR ?= mwld -xml -o ;
+ BINDIR ?= /boot/apps ;
+ CC ?= mwcc ;
+ CCFLAGS ?= -nosyspath ;
+ C++ ?= $(CC) ;
+ C++FLAGS ?= -nosyspath ;
+ FORTRAN ?= "" ;
+ LIBDIR ?= /boot/develop/libraries ;
+ LINK ?= mwld ;
+ LINKFLAGS ?= "" ;
+ MANDIR ?= /boot/documentation/"Shell Tools"/HTML ;
+ NOARSCAN ?= true ;
+ STDHDRS ?= /boot/develop/headers/posix ;
+}
+else if $(OS) = BEOS
+{
+ BINDIR ?= /boot/apps ;
+ CC ?= gcc ;
+ C++ ?= $(CC) ;
+ FORTRAN ?= "" ;
+ LIBDIR ?= /boot/develop/libraries ;
+ LINK ?= gcc ;
+ LINKLIBS ?= -lnet ;
+ NOARSCAN ?= true ;
+ STDHDRS ?= /boot/develop/headers/posix ;
+}
+else if $(UNIX)
+{
+ switch $(OS)
+ {
+ case AIX :
+ LINKLIBS ?= -lbsd ;
+
+ case AMIGA :
+ CC ?= gcc ;
+ YACC ?= "bison -y" ;
+
+ case CYGWIN :
+ CC ?= gcc ;
+ CCFLAGS += -D__cygwin__ ;
+ LEX ?= flex ;
+ RANLIB ?= "" ;
+ SUFEXE ?= .exe ;
+ YACC ?= "bison -y" ;
+
+ case DGUX :
+ RANLIB ?= "" ;
+ RELOCATE ?= true ;
+
+ case HPUX :
+ YACC = ;
+ CFLAGS += -Ae ;
+ CCFLAGS += -Ae ;
+ RANLIB ?= "" ;
+
+ case INTERIX :
+ CC ?= gcc ;
+ RANLIB ?= "" ;
+
+ case IRIX :
+ RANLIB ?= "" ;
+
+ case MPEIX :
+ CC ?= gcc ;
+ C++ ?= gcc ;
+ CCFLAGS += -D_POSIX_SOURCE ;
+ HDRS += /usr/include ;
+ RANLIB ?= "" ;
+ NOARSCAN ?= true ;
+ NOARUPDATE ?= true ;
+
+ case MVS :
+ RANLIB ?= "" ;
+
+ case NEXT :
+ AR ?= libtool -o ;
+ RANLIB ?= "" ;
+
+ case MACOSX :
+ AR ?= libtool -o ;
+ C++ ?= c++ ;
+ MANDIR ?= /usr/local/share/man ;
+ RANLIB ?= "" ;
+
+ case NCR :
+ RANLIB ?= "" ;
+
+ case PTX :
+ RANLIB ?= "" ;
+
+ case QNX :
+ AR ?= wlib ;
+ CC ?= cc ;
+ CCFLAGS ?= -Q ; # quiet
+ C++ ?= $(CC) ;
+ C++FLAGS ?= -Q ; # quiet
+ LINK ?= $(CC) ;
+ LINKFLAGS ?= -Q ; # quiet
+ NOARSCAN ?= true ;
+ RANLIB ?= "" ;
+
+ case SCO :
+ RANLIB ?= "" ;
+ RELOCATE ?= true ;
+
+ case SINIX :
+ RANLIB ?= "" ;
+
+ case SOLARIS :
+ RANLIB ?= "" ;
+ AR ?= "/usr/ccs/bin/ar ru" ;
+
+ case UNICOS :
+ NOARSCAN ?= true ;
+ OPTIM ?= -O0 ;
+
+ case UNIXWARE :
+ RANLIB ?= "" ;
+ RELOCATE ?= true ;
+ }
+
+ # UNIX defaults
+
+ CCFLAGS ?= ;
+ C++FLAGS ?= $(CCFLAGS) ;
+ CHMOD ?= chmod ;
+ CHGRP ?= chgrp ;
+ CHOWN ?= chown ;
+ LEX ?= lex ;
+ LINKFLAGS ?= $(CCFLAGS) ;
+ LINKLIBS ?= ;
+ OPTIM ?= -O ;
+ RANLIB ?= ranlib ;
+ YACC ?= yacc ;
+ YACCFILES ?= y.tab ;
+ YACCFLAGS ?= -d ;
+}
+
+#
+# General defaults; a lot like UNIX
+#
+
+ AR ?= ar ru ;
+ AS ?= as ;
+ ASFLAGS ?= ;
+ AWK ?= awk ;
+ BINDIR ?= /usr/local/bin ;
+ C++ ?= cc ;
+ C++FLAGS ?= ;
+ CC ?= cc ;
+ CCFLAGS ?= ;
+ CP ?= cp -f ;
+ CRELIB ?= ;
+ DOT ?= . ;
+ DOTDOT ?= .. ;
+ EXEMODE ?= 711 ;
+ FILEMODE ?= 644 ;
+ FORTRAN ?= f77 ;
+ FORTRANFLAGS ?= ;
+ HDRS ?= ;
+ INSTALLGRIST ?= installed ;
+ JAMFILE ?= Jamfile ;
+ JAMRULES ?= Jamrules ;
+ LEX ?= ;
+ LIBDIR ?= /usr/local/lib ;
+ LINK ?= $(CC) ;
+ LINKFLAGS ?= ;
+ LINKLIBS ?= ;
+ LN ?= ln ;
+ MANDIR ?= /usr/local/man ;
+ MKDIR ?= mkdir ;
+ MV ?= mv -f ;
+ OPTIM ?= ;
+ RCP ?= rcp ;
+ RM ?= rm -f ;
+ RSH ?= rsh ;
+ SED ?= sed ;
+ SHELLHEADER ?= "#!/bin/sh" ;
+ SHELLMODE ?= 755 ;
+ SLASH ?= / ;
+ STDHDRS ?= /usr/include ;
+ SUFEXE ?= "" ;
+ SUFLIB ?= .a ;
+ SUFOBJ ?= .o ;
+ UNDEFFLAG ?= "-u _" ;
+ YACC ?= ;
+ YACCFILES ?= ;
+ YACCFLAGS ?= ;
+
+ HDRPATTERN =
+ "^[ ]*#[ ]*include[ ]*[<\"]([^\">]*)[\">].*$" ;
+
+ OSFULL = $(OS)$(OSVER)$(OSPLAT) $(OS)$(OSPLAT) $(OS)$(OSVER) $(OS) ;
+
+
+#
+# Base dependencies - first for "bootstrap" kinds of rules
+#
+
+DEPENDS all : shell files lib exe obj ;
+DEPENDS all shell files lib exe obj : first ;
+NOTFILE all first shell files lib exe obj dirs clean uninstall ;
+ALWAYS clean uninstall ;
+
+#
+# Rules
+#
+
+rule As
+{
+ DEPENDS $(<) : $(>) ;
+ ASFLAGS on $(<) += $(ASFLAGS) $(SUBDIRASFLAGS) ;
+}
+
+rule Bulk
+{
+ local i ;
+
+ for i in $(>)
+ {
+ File $(i:D=$(<)) : $(i) ;
+ }
+}
+
+rule Cc
+{
+ local _h ;
+
+ DEPENDS $(<) : $(>) ;
+
+ # Just to clarify here: this sets the per-target CCFLAGS to
+ # be the current value of (global) CCFLAGS and SUBDIRCCFLAGS.
+
+ CCFLAGS on $(<) += $(CCFLAGS) $(SUBDIRCCFLAGS) ;
+
+ # If the compiler's -o flag doesn't work, relocate the .o
+
+ if $(RELOCATE)
+ {
+ CcMv $(<) : $(>) ;
+ }
+
+ _h = $(SEARCH_SOURCE) $(HDRS) $(SUBDIRHDRS) ;
+
+ if $(VMS) && $(_h)
+ {
+ SLASHINC on $(<) = "/inc=(" $(_h[1]) ,$(_h[2-]) ")" ;
+ }
+ else if $(MAC) && $(_h)
+ {
+ local _i _j ;
+ _j = $(_h[1]) ;
+ for _i in $(_h[2-])
+ {
+ _j = $(_j),$(_i) ;
+ }
+ MACINC on $(<) = \"$(_j)\" ;
+ }
+}
+
+rule C++
+{
+ local _h ;
+
+ DEPENDS $(<) : $(>) ;
+ C++FLAGS on $(<) += $(C++FLAGS) $(SUBDIRC++FLAGS) ;
+
+ if $(RELOCATE)
+ {
+ CcMv $(<) : $(>) ;
+ }
+
+ _h = $(SEARCH_SOURCE) $(HDRS) $(SUBDIRHDRS) ;
+
+ if $(VMS) && $(_h)
+ {
+ SLASHINC on $(<) = "/inc=(" $(_h[1]) ,$(_h[2-]) ")" ;
+ }
+ else if $(MAC) && $(_h)
+ {
+ local _i _j ;
+ _j = $(_h[1]) ;
+ for _i in $(_h[2-])
+ {
+ _j = $(_j),$(_i) ;
+ }
+ MACINC on $(<) = \"$(_j)\" ;
+ }
+}
+
+rule Chmod
+{
+ if $(CHMOD) { Chmod1 $(<) ; }
+}
+
+rule File
+{
+ DEPENDS files : $(<) ;
+ DEPENDS $(<) : $(>) ;
+ SEARCH on $(>) = $(SEARCH_SOURCE) ;
+ MODE on $(<) = $(FILEMODE) ;
+ Chmod $(<) ;
+}
+
+rule Fortran
+{
+ DEPENDS $(<) : $(>) ;
+}
+
+rule GenFile
+{
+ local _t = [ FGristSourceFiles $(<) ] ;
+ local _s = [ FAppendSuffix $(>[1]) : $(SUFEXE) ] ;
+ Depends $(_t) : $(_s) $(>[2-]) ;
+ GenFile1 $(_t) : $(_s) $(>[2-]) ;
+ Clean clean : $(_t) ;
+}
+
+rule GenFile1
+{
+ MakeLocate $(<) : $(LOCATE_SOURCE) ;
+ SEARCH on $(>) = $(SEARCH_SOURCE) ;
+}
+
+rule HardLink
+{
+ DEPENDS files : $(<) ;
+ DEPENDS $(<) : $(>) ;
+ SEARCH on $(>) = $(SEARCH_SOURCE) ;
+}
+
+rule HdrMacroFile
+{
+ # HdrMacroFile file ;
+ #
+ # this rule is used to indicate that a given file contains definitions
+ # for filename macros (e.g. "#define MYFILE_H <myfile.h>") that can
+ # later be used in #include statements in the rest of the source
+ #
+ # theses files must be parsed before any make is tried..
+ #
+ HDRMACRO $(<) ;
+}
+
+rule HdrRule
+{
+ # HdrRule source : headers ;
+
+ # N.B. This rule is called during binding, potentially after
+ # the fate of many targets has been determined, and must be
+ # used with caution: don't add dependencies to unrelated
+ # targets, and don't set variables on $(<).
+
+ # Tell Jam that anything depending on $(<) also depends on $(>),
+ # set SEARCH so Jam can find the headers, but then say we don't
+ # care if we can't actually find the headers (they may have been
+ # within ifdefs),
+
+ local s ;
+
+ if $(HDRGRIST)
+ {
+ s = $(>:G=$(HDRGRIST)) ;
+ } else {
+ s = $(>) ;
+ }
+
+ INCLUDES $(<) : $(s) ;
+ SEARCH on $(s) = $(HDRSEARCH) ;
+ NOCARE $(s) ;
+
+ # Propagate on $(<) to $(>)
+
+ HDRSEARCH on $(s) = $(HDRSEARCH) ;
+ HDRSCAN on $(s) = $(HDRSCAN) ;
+ HDRRULE on $(s) = $(HDRRULE) ;
+ HDRGRIST on $(s) = $(HDRGRIST) ;
+}
+
+rule InstallInto
+{
+ # InstallInto dir : sources ;
+
+ local i t ;
+
+ t = $(>:G=$(INSTALLGRIST)) ;
+
+ # Arrange for jam install
+ # Arrange for jam uninstall
+ # sources are in SEARCH_SOURCE
+ # targets are in dir
+
+ Depends install : $(t) ;
+ Clean uninstall : $(t) ;
+ SEARCH on $(>) = $(SEARCH_SOURCE) ;
+ MakeLocate $(t) : $(<) ;
+
+ # For each source, make gristed target name
+ # and Install, Chmod, Chown, and Chgrp
+
+ for i in $(>)
+ {
+ local tt = $(i:G=$(INSTALLGRIST)) ;
+
+ Depends $(tt) : $(i) ;
+ Install $(tt) : $(i) ;
+ Chmod $(tt) ;
+
+ if $(OWNER) && $(CHOWN)
+ {
+ Chown $(tt) ;
+ OWNER on $(tt) = $(OWNER) ;
+ }
+
+ if $(GROUP) && $(CHGRP)
+ {
+ Chgrp $(tt) ;
+ GROUP on $(tt) = $(GROUP) ;
+ }
+ }
+}
+
+rule InstallBin
+{
+ local _t = [ FAppendSuffix $(>) : $(SUFEXE) ] ;
+
+ InstallInto $(<) : $(_t) ;
+ MODE on $(_t:G=installed) = $(EXEMODE) ;
+}
+
+rule InstallFile
+{
+ InstallInto $(<) : $(>) ;
+ MODE on $(>:G=installed) = $(FILEMODE) ;
+}
+
+rule InstallLib
+{
+ InstallInto $(<) : $(>) ;
+ MODE on $(>:G=installed) = $(FILEMODE) ;
+}
+
+rule InstallMan
+{
+ # Really this just strips the . from the suffix
+
+ local i s d ;
+
+ for i in $(>)
+ {
+ switch $(i:S)
+ {
+ case .1 : s = 1 ; case .2 : s = 2 ; case .3 : s = 3 ;
+ case .4 : s = 4 ; case .5 : s = 5 ; case .6 : s = 6 ;
+ case .7 : s = 7 ; case .8 : s = 8 ; case .l : s = l ;
+ case .n : s = n ; case .man : s = 1 ;
+ }
+
+ d = man$(s) ;
+
+ InstallInto $(d:R=$(<)) : $(i) ;
+ }
+
+ MODE on $(>:G=installed) = $(FILEMODE) ;
+}
+
+rule InstallShell
+{
+ InstallInto $(<) : $(>) ;
+ MODE on $(>:G=installed) = $(SHELLMODE) ;
+}
+
+rule Lex
+{
+ LexMv $(<) : $(>) ;
+ DEPENDS $(<) : $(>) ;
+ MakeLocate $(<) : $(LOCATE_SOURCE) ;
+ Clean clean : $(<) ;
+}
+
+rule Library
+{
+ LibraryFromObjects $(<) : $(>:S=$(SUFOBJ)) ;
+ Objects $(>) ;
+}
+
+rule LibraryFromObjects
+{
+ local _i _l _s ;
+
+ # Add grist to file names
+
+ _s = [ FGristFiles $(>) ] ;
+ _l = $(<:S=$(SUFLIB)) ;
+
+ # library depends on its member objects
+
+ if $(KEEPOBJS)
+ {
+ DEPENDS obj : $(_s) ;
+ }
+ else
+ {
+ DEPENDS lib : $(_l) ;
+ }
+
+ # Set LOCATE for the library and its contents. The bound
+ # value shows up as $(NEEDLIBS) on the Link actions.
+ # For compatibility, we only do this if the library doesn't
+ # already have a path.
+
+ if ! $(_l:D)
+ {
+ MakeLocate $(_l) $(_l)($(_s:BS)) : $(LOCATE_TARGET) ;
+ }
+
+ if $(NOARSCAN)
+ {
+ # If we can't scan the library to timestamp its contents,
+ # we have to just make the library depend directly on the
+ # on-disk object files.
+
+ DEPENDS $(_l) : $(_s) ;
+ }
+ else
+ {
+ # If we can scan the library, we make the library depend
+ # on its members and each member depend on the on-disk
+ # object file.
+
+ DEPENDS $(_l) : $(_l)($(_s:BS)) ;
+
+ for _i in $(_s)
+ {
+ DEPENDS $(_l)($(_i:BS)) : $(_i) ;
+ }
+ }
+
+ Clean clean : $(_l) ;
+
+ if $(CRELIB) { CreLib $(_l) : $(_s[1]) ; }
+
+ Archive $(_l) : $(_s) ;
+
+ if $(RANLIB) { Ranlib $(_l) ; }
+
+ # If we can't scan the library, we have to leave the .o's around.
+
+ if ! ( $(NOARSCAN) || $(KEEPOBJS) ) { RmTemps $(_l) : $(_s) ; }
+}
+
+rule Link
+{
+ MODE on $(<) = $(EXEMODE) ;
+ Chmod $(<) ;
+}
+
+rule LinkLibraries
+{
+ # make library dependencies of target
+ # set NEEDLIBS variable used by 'actions Main'
+
+ local _t = [ FAppendSuffix $(<) : $(SUFEXE) ] ;
+
+ DEPENDS $(_t) : $(>:S=$(SUFLIB)) ;
+ NEEDLIBS on $(_t) += $(>:S=$(SUFLIB)) ;
+}
+
+rule Main
+{
+ MainFromObjects $(<) : $(>:S=$(SUFOBJ)) ;
+ Objects $(>) ;
+}
+
+rule MainFromObjects
+{
+ local _s _t ;
+
+ # Add grist to file names
+ # Add suffix to exe
+
+ _s = [ FGristFiles $(>) ] ;
+ _t = [ FAppendSuffix $(<) : $(SUFEXE) ] ;
+
+ if $(_t) != $(<)
+ {
+ DEPENDS $(<) : $(_t) ;
+ NOTFILE $(<) ;
+ }
+
+ # make compiled sources a dependency of target
+
+ DEPENDS exe : $(_t) ;
+ DEPENDS $(_t) : $(_s) ;
+ MakeLocate $(_t) : $(LOCATE_TARGET) ;
+
+ Clean clean : $(_t) ;
+
+ Link $(_t) : $(_s) ;
+}
+
+rule MakeLocate
+{
+ if $(>)
+ {
+ LOCATE on $(<) = $(>) ;
+ Depends $(<) : $(>[1]) ;
+ MkDir $(>[1]) ;
+ }
+}
+
+rule MkDir
+{
+ # If dir exists, don't update it
+ # Do this even for $(DOT).
+
+ NOUPDATE $(<) ;
+
+ if $(<) != $(DOT) && ! $($(<)-mkdir)
+ {
+ local s ;
+
+ # Cheesy gate to prevent multiple invocations on same dir
+ # MkDir1 has the actions
+ # Arrange for jam dirs
+
+ $(<)-mkdir = true ;
+ MkDir1 $(<) ;
+ Depends dirs : $(<) ;
+
+ # Recursively make parent directories.
+ # $(<:P) = $(<)'s parent, & we recurse until root
+
+ s = $(<:P) ;
+
+ if $(NT)
+ {
+ switch $(s)
+ {
+ case *: : s = ;
+ case *:\\ : s = ;
+ }
+ }
+
+ if $(s) && $(s) != $(<)
+ {
+ Depends $(<) : $(s) ;
+ MkDir $(s) ;
+ }
+ else if $(s)
+ {
+ NOTFILE $(s) ;
+ }
+
+ }
+}
+
+rule Object
+{
+ local h ;
+
+ # locate object and search for source, if wanted
+
+ Clean clean : $(<) ;
+
+ MakeLocate $(<) : $(LOCATE_TARGET) ;
+ SEARCH on $(>) = $(SEARCH_SOURCE) ;
+
+ # Save HDRS for -I$(HDRS) on compile.
+ # We shouldn't need -I$(SEARCH_SOURCE) as cc can find headers
+ # in the .c file's directory, but generated .c files (from
+ # yacc, lex, etc) are located in $(LOCATE_TARGET), possibly
+ # different from $(SEARCH_SOURCE).
+
+ HDRS on $(<) = $(SEARCH_SOURCE) $(HDRS) $(SUBDIRHDRS) ;
+
+ # handle #includes for source: Jam scans for headers with
+ # the regexp pattern $(HDRSCAN) and then invokes $(HDRRULE)
+ # with the scanned file as the target and the found headers
+ # as the sources. HDRSEARCH is the value of SEARCH used for
+ # the found header files. Finally, if jam must deal with
+ # header files of the same name in different directories,
+ # they can be distinguished with HDRGRIST.
+
+ # $(h) is where cc first looks for #include "foo.h" files.
+ # If the source file is in a distant directory, look there.
+ # Else, look in "" (the current directory).
+
+ if $(SEARCH_SOURCE)
+ {
+ h = $(SEARCH_SOURCE) ;
+ }
+ else
+ {
+ h = "" ;
+ }
+
+ HDRRULE on $(>) = HdrRule ;
+ HDRSCAN on $(>) = $(HDRPATTERN) ;
+ HDRSEARCH on $(>) = $(HDRS) $(SUBDIRHDRS) $(h) $(STDHDRS) ;
+ HDRGRIST on $(>) = $(HDRGRIST) ;
+
+ # if source is not .c, generate .c with specific rule
+
+ switch $(>:S)
+ {
+ case .asm : As $(<) : $(>) ;
+ case .c : Cc $(<) : $(>) ;
+ case .C : C++ $(<) : $(>) ;
+ case .cc : C++ $(<) : $(>) ;
+ case .cpp : C++ $(<) : $(>) ;
+ case .f : Fortran $(<) : $(>) ;
+ case .l : Cc $(<) : $(<:S=.c) ;
+ Lex $(<:S=.c) : $(>) ;
+ case .s : As $(<) : $(>) ;
+ case .y : Cc $(<) : $(<:S=.c) ;
+ Yacc $(<:S=.c) : $(>) ;
+ case * : UserObject $(<) : $(>) ;
+ }
+}
+
+
+rule ObjectCcFlags
+{
+ CCFLAGS on [ FGristFiles $(<:S=$(SUFOBJ)) ] += $(>) ;
+}
+
+rule ObjectC++Flags
+{
+ C++FLAGS on [ FGristFiles $(<:S=$(SUFOBJ)) ] += $(>) ;
+}
+
+rule ObjectHdrs
+{
+ HDRS on [ FGristFiles $(<:S=$(SUFOBJ)) ] += $(>) ;
+}
+
+rule Objects
+{
+ local _i ;
+
+ for _i in [ FGristFiles $(<) ]
+ {
+ Object $(_i:S=$(SUFOBJ)) : $(_i) ;
+ DEPENDS obj : $(_i:S=$(SUFOBJ)) ;
+ }
+}
+
+rule RmTemps
+{
+ TEMPORARY $(>) ;
+}
+
+rule Setuid
+{
+ MODE on [ FAppendSuffix $(<) : $(SUFEXE) ] = 4711 ;
+}
+
+rule Shell
+{
+ DEPENDS shell : $(<) ;
+ DEPENDS $(<) : $(>) ;
+ SEARCH on $(>) = $(SEARCH_SOURCE) ;
+ MODE on $(<) = $(SHELLMODE) ;
+ Clean clean : $(<) ;
+ Chmod $(<) ;
+}
+
+rule SubDir
+{
+ local _r _s ;
+
+ #
+ # SubDir TOP d1 [ ... ]
+ #
+ # This introduces a Jamfile that is part of a project tree
+ # rooted at $(TOP). It (only once) includes the project-specific
+ # rules file $(TOP)/Jamrules and then sets search & locate stuff.
+ #
+ # If the variable $(TOPRULES) is set (where TOP is the first arg
+ # to SubDir), that file is included instead of $(TOP)/Jamrules.
+ #
+ # d1 ... are the directory elements that lead to this directory
+ # from $(TOP). We construct the system dependent path from these
+ # directory elements in order to set search & locate stuff.
+ #
+
+ if ! $($(<[1]))
+ {
+ if ! $(<[1])
+ {
+ EXIT SubDir syntax error ;
+ }
+
+ $(<[1]) = [ FSubDir $(<[2-]) ] ;
+ }
+
+ #
+ # If $(TOP)/Jamrules hasn't been included, do so.
+ #
+
+ if ! $($(<[1])-included)
+ {
+ # Gated entry.
+
+ $(<[1])-included = TRUE ;
+
+ # File is $(TOPRULES) or $(TOP)/Jamrules.
+
+ _r = $($(<[1])RULES) ;
+
+ if ! $(_r)
+ {
+ _r = $(JAMRULES:R=$($(<[1]))) ;
+ }
+
+ # Include it.
+
+ include $(_r) ;
+ }
+
+ # Get path to current directory from root using SubDir.
+ # Save dir tokens for other potential uses.
+
+ _s = [ FDirName $(<[2-]) ] ;
+ SUBDIR = $(_s:R=$($(<[1]))) ;
+ SUBDIR_TOKENS = $(<[2-]) ;
+
+ # Now set up SEARCH_SOURCE, LOCATE_TARGET, SOURCE_GRIST
+ # These can be reset if needed. For example, if the source
+ # directory should not hold object files, LOCATE_TARGET can
+ # subsequently be redefined.
+
+ SEARCH_SOURCE = $(SUBDIR) ;
+ LOCATE_SOURCE = $(ALL_LOCATE_TARGET) $(SUBDIR) ;
+ LOCATE_TARGET = $(ALL_LOCATE_TARGET) $(SUBDIR) ;
+ SOURCE_GRIST = [ FGrist $(<[2-]) ] ;
+
+ # Reset per-directory ccflags, hdrs
+
+ SUBDIRCCFLAGS = ;
+ SUBDIRC++FLAGS = ;
+ SUBDIRHDRS = ;
+}
+
+rule SubDirCcFlags
+{
+ SUBDIRCCFLAGS += $(<) ;
+}
+
+rule SubDirC++Flags
+{
+ SUBDIRC++FLAGS += $(<) ;
+}
+
+rule SubDirHdrs
+{
+ SUBDIRHDRS += $(<) ;
+}
+
+rule SubInclude
+{
+ local _s ;
+
+ # That's
+ # SubInclude TOP d1 [ d2 [ d3 [ d4 ] ] ]
+ #
+ # to include a subdirectory's Jamfile.
+
+ if ! $($(<[1]))
+ {
+ EXIT Top level of source tree has not been set with $(<[1]) ;
+ }
+
+ _s = [ FDirName $(<[2-]) ] ;
+
+ include $(JAMFILE:D=$(_s):R=$($(<[1]))) ;
+}
+
+rule Undefines
+{
+ UNDEFS on [ FAppendSuffix $(<) : $(SUFEXE) ] += $(UNDEFFLAG)$(>) ;
+}
+
+rule UserObject
+{
+ EXIT "Unknown suffix on" $(>) "- see UserObject rule in Jamfile(5)." ;
+}
+
+rule Yacc
+{
+ local _h ;
+
+ _h = $(<:BS=.h) ;
+
+ # Some places don't have a yacc.
+
+ MakeLocate $(<) $(_h) : $(LOCATE_SOURCE) ;
+
+ if $(YACC)
+ {
+ DEPENDS $(<) $(_h) : $(>) ;
+ Yacc1 $(<) $(_h) : $(>) ;
+ YaccMv $(<) $(_h) : $(>) ;
+ Clean clean : $(<) $(_h) ;
+ }
+
+ # Make sure someone includes $(_h) else it will be a deadly independent
+ # target.
+ INCLUDES $(<) : $(_h) ;
+}
+
+#
+# Utility rules; no side effects on these.
+#
+
+rule FGrist
+{
+ # Turn individual elements in $(<) into grist.
+
+ local _g _i ;
+
+ _g = $(<[1]) ;
+
+ for _i in $(<[2-])
+ {
+ _g = $(_g)!$(_i) ;
+ }
+
+ return $(_g) ;
+}
+
+rule FGristFiles
+{
+ if ! $(SOURCE_GRIST)
+ {
+ return $(<) ;
+ }
+ else
+ {
+ return $(<:G=$(SOURCE_GRIST)) ;
+ }
+}
+
+rule FGristSourceFiles
+{
+ # Produce source file name name with grist in it,
+ # if SOURCE_GRIST is set.
+
+ # Leave header files alone, because they have a global
+ # visibility.
+
+ if ! $(SOURCE_GRIST)
+ {
+ return $(<) ;
+ }
+ else
+ {
+ local _i _o ;
+
+ for _i in $(<)
+ {
+ switch $(_i)
+ {
+ case *.h : _o += $(_i) ;
+ case * : _o += $(_i:G=$(SOURCE_GRIST)) ;
+ }
+ }
+
+ return $(_o) ;
+ }
+}
+
+rule FConcat
+{
+ # Puts the variables together, removing spaces.
+
+ local _t _r ;
+
+ $(_r) = $(<[1]) ;
+
+ for _t in $(<[2-])
+ {
+ $(_r) = $(_r)$(_t) ;
+ }
+
+ return $(_r) ;
+}
+
+rule FSubDir
+{
+ local _i _d ;
+
+ # If $(>) is the path to the current directory, compute the
+ # path (using ../../ etc) back to that root directory.
+ # Sets result in $(<)
+
+ if ! $(<[1])
+ {
+ _d = $(DOT) ;
+ }
+ else
+ {
+ _d = $(DOTDOT) ;
+
+ for _i in $(<[2-])
+ {
+ _d = $(_d:R=$(DOTDOT)) ;
+ }
+ }
+
+ return $(_d) ;
+}
+
+rule FDirName
+{
+ local _s _i ;
+
+ # Turn individual elements in $(<) into a usable path.
+
+ if ! $(<)
+ {
+ _s = $(DOT) ;
+ }
+ else if $(VMS)
+ {
+ # This handles the following cases:
+ # a -> [.a]
+ # a b c -> [.a.b.c]
+ # x: -> x:
+ # x: a -> x:[a]
+ # x:[a] b -> x:[a.b]
+
+ switch $(<[1])
+ {
+ case *:* : _s = $(<[1]) ;
+ case \\[*\\] : _s = $(<[1]) ;
+ case * : _s = [.$(<[1])] ;
+ }
+
+ for _i in [.$(<[2-])]
+ {
+ _s = $(_i:R=$(_s)) ;
+ }
+ }
+ else if $(MAC)
+ {
+ _s = $(DOT) ;
+
+ for _i in $(<)
+ {
+ _s = $(_i:R=$(_s)) ;
+ }
+ }
+ else
+ {
+ _s = $(<[1]) ;
+
+ for _i in $(<[2-])
+ {
+ _s = $(_i:R=$(_s)) ;
+ }
+ }
+
+ return $(_s) ;
+}
+
+
+rule _makeCommon
+{
+ # strip common initial elements
+
+ if $($(<)[1]) && $($(<)[1]) = $($(>)[1])
+ {
+ $(<) = $($(<)[2-]) ;
+ $(>) = $($(>)[2-]) ;
+ _makeCommon $(<) : $(>) ;
+ }
+}
+
+
+rule FRelPath
+{
+ local _l _r ;
+
+ # first strip off common parts
+
+ _l = $(<) ;
+ _r = $(>) ;
+
+ _makeCommon _l : _r ;
+
+ # now make path to root and path down
+
+ _l = [ FSubDir $(_l) ] ;
+ _r = [ FDirName $(_r) ] ;
+
+ # Concatenate and save
+
+ # XXX This should be better
+
+ if $(_r) = $(DOT) {
+ return $(_l) ;
+ } else {
+ return $(_r:R=$(_l)) ;
+ }
+}
+
+rule FAppendSuffix
+{
+ # E.g., "FAppendSuffix yacc lex foo.bat : $(SUFEXE) ;"
+ # returns (yacc,lex,foo.bat) on Unix and
+ # (yacc.exe,lex.exe,foo.bat) on NT.
+
+ if $(>)
+ {
+ local _i _o ;
+
+ for _i in $(<)
+ {
+ if $(_i:S)
+ {
+ _o += $(_i) ;
+ }
+ else
+ {
+ _o += $(_i:S=$(>)) ;
+ }
+ }
+ return $(_o) ;
+ }
+ else
+ {
+ return $(<) ;
+ }
+}
+
+rule unmakeDir
+{
+ if $(>[1]:D) && $(>[1]:D) != $(>[1]) && $(>[1]:D) != \\\\
+ {
+ unmakeDir $(<) : $(>[1]:D) $(>[1]:BS) $(>[2-]) ;
+ }
+ else
+ {
+ $(<) = $(>) ;
+ }
+}
+
+
+rule FConvertToSlashes
+{
+ local _d, _s, _i ;
+
+ unmakeDir _d : $(<) ;
+
+ _s = $(_d[1]) ;
+ for _i in $(_d[2-])
+ {
+ _s = $(_s)/$(_i) ;
+ }
+ return $(_s) ;
+}
+
+
+#
+# Actions
+#
+
+#
+# First the defaults
+#
+
+actions updated together piecemeal Archive
+{
+ $(AR) $(<) $(>)
+}
+
+actions As
+{
+ $(AS) $(ASFLAGS) -I$(HDRS) -o $(<) $(>)
+}
+
+actions C++
+{
+ $(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o $(<) $(>)
+}
+
+actions Cc
+{
+ $(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o $(<) $(>)
+}
+
+actions Chgrp
+{
+ $(CHGRP) $(GROUP) $(<)
+}
+
+actions Chmod1
+{
+ $(CHMOD) $(MODE) $(<)
+}
+
+actions Chown
+{
+ $(CHOWN) $(OWNER) $(<)
+}
+
+actions piecemeal together existing Clean
+{
+ $(RM) $(>)
+}
+
+actions File
+{
+ $(CP) $(>) $(<)
+}
+
+actions GenFile1
+{
+ $(>[1]) $(<) $(>[2-])
+}
+
+actions Fortran
+{
+ $(FORTRAN) $(FORTRANFLAGS) -o $(<) $(>)
+}
+
+actions HardLink
+{
+ $(RM) $(<) && $(LN) $(>) $(<)
+}
+
+actions Install
+{
+ $(CP) $(>) $(<)
+}
+
+actions Lex
+{
+ $(LEX) $(>)
+}
+
+actions LexMv
+{
+ $(MV) lex.yy.c $(<)
+}
+
+actions Link bind NEEDLIBS
+{
+ $(LINK) $(LINKFLAGS) -o $(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)
+}
+
+actions MkDir1
+{
+ $(MKDIR) $(<)
+}
+
+actions together Ranlib
+{
+ $(RANLIB) $(<)
+}
+
+actions quietly updated piecemeal together RmTemps
+{
+ $(RM) $(>)
+}
+
+actions Shell
+{
+ $(AWK) '
+ NR == 1 { print "$(SHELLHEADER)" }
+ NR == 1 && /^[#:]/ { next }
+ /^##/ { next }
+ { print }
+ ' < $(>) > $(<)
+}
+
+actions Yacc1
+{
+ $(YACC) $(YACCFLAGS) $(>)
+}
+
+actions YaccMv
+{
+ $(MV) $(YACCFILES).c $(<[1])
+ $(MV) $(YACCFILES).h $(<[2])
+}
+
+#
+# RELOCATE - for compilers with broken -o flags
+#
+
+if $(RELOCATE)
+{
+ actions C++
+ {
+ $(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) $(>)
+ }
+
+ actions Cc
+ {
+ $(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) $(>)
+ }
+
+ actions ignore CcMv
+ {
+ [ $(<) != $(>:BS=$(SUFOBJ)) ] && $(MV) $(>:BS=$(SUFOBJ)) $(<)
+ }
+}
+
+#
+# NOARUPDATE - can't update an archive
+#
+
+if $(NOARUPDATE)
+{
+ actions Archive
+ {
+ $(AR) $(<) $(>)
+ }
+}
+
+#
+# NT specific actions
+#
+
+if $(NT)
+{
+ if $(TOOLSET) = VISUALC || $(TOOLSET) = VC7 || $(TOOLSET) = INTELC
+ {
+ actions updated together piecemeal Archive
+ {
+ if exist $(<) set _$(<:B)_=$(<)
+ $(AR) /out:$(<) %_$(<:B)_% $(>)
+ }
+
+ actions As
+ {
+ $(AS) /Ml /p /v /w2 $(>) $(<) ,nul,nul;
+ }
+
+ actions Cc
+ {
+ $(CC) /c $(CCFLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) /I$(STDHDRS) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) /c $(C++FLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) /I$(STDHDRS) /Tp$(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) $(LINKFLAGS) /out:$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)
+ }
+ }
+ else if $(TOOLSET) = VISUALC16
+ {
+ actions updated together piecemeal Archive
+ {
+ $(AR) $(<) -+$(>)
+ }
+
+ actions Cc
+ {
+ $(CC) /c $(CCFLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) /c $(C++FLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) /Tp$(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) $(LINKFLAGS) /out:$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)
+ }
+ }
+ else if $(TOOLSET) = BORLANDC
+ {
+ actions updated together piecemeal Archive
+ {
+ $(AR) $(<) -+$(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) -e$(<) $(LINKFLAGS) $(UNDEFS) -L$(LINKLIBS) $(NEEDLIBS) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)
+ }
+
+ actions Cc
+ {
+ $(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)
+ }
+
+ }
+ else if $(TOOLSET) = MINGW
+ {
+ actions together piecemeal Archive
+ {
+ $(AR) $(<) $(>:T)
+ }
+
+ actions Cc
+ {
+ $(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)
+ }
+ }
+ else if $(TOOLSET) = WATCOM
+ {
+ actions together piecemeal Archive
+ {
+ $(AR) $(<) +-$(>)
+ }
+
+ actions Cc
+ {
+ $(CC) $(CCFLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) $(C++FLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) $(LINKFLAGS) /Fe=$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)
+ }
+
+ actions Shell
+ {
+ $(CP) $(>) $(<)
+ }
+ }
+ else if $(TOOLSET) = LCC
+ {
+ actions together piecemeal Archive
+ {
+ $(AR) /out:$(<) $(>)
+ }
+
+ actions Cc
+ {
+ $(CC) $(CCFLAGS) $(OPTIM) -Fo$(<) -I$(HDRS) $(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) $(LINKFLAGS) -o $(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)
+ }
+
+ actions Shell
+ {
+ $(CP) $(>) $(<)
+ }
+ }
+}
+
+#
+# OS2 specific actions
+#
+
+else if $(OS2)
+{
+ if $(TOOLSET) = WATCOM
+ {
+ actions together piecemeal Archive
+ {
+ $(AR) $(<) +-$(>)
+ }
+
+ actions Cc
+ {
+ $(CC) $(CCFLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) $(C++FLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) $(LINKFLAGS) /Fe=$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)
+ }
+
+ actions Shell
+ {
+ $(CP) $(>) $(<)
+ }
+ }
+ else if $(TOOLSET) = EMX
+ {
+ actions together piecemeal Archive
+ {
+ $(AR) $(<) $(>:T)
+ }
+
+ actions Cc
+ {
+ $(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)
+ }
+
+ actions C++
+ {
+ $(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)
+ }
+ }
+}
+
+#
+# VMS specific actions
+#
+
+else if $(VMS)
+{
+ actions updated together piecemeal Archive
+ {
+ lib/replace $(<) $(>[1]) ,$(>[2-])
+ }
+
+ actions Cc
+ {
+ $(CC)/obj=$(<) $(CCFLAGS) $(OPTIM) $(SLASHINC) $(>)
+ }
+
+ actions C++
+ {
+ $(C++)/obj=$(<) $(C++FLAGS) $(OPTIM) $(SLASHINC) $(>)
+ }
+
+ actions piecemeal together existing Clean
+ {
+ $(RM) $(>[1]);* ,$(>[2-]);*
+ }
+
+ actions together quietly CreLib
+ {
+ if f$search("$(<)") .eqs. "" then lib/create $(<)
+ }
+
+ actions GenFile1
+ {
+ mcr $(>[1]) $(<) $(>[2-])
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK)/exe=$(<) $(LINKFLAGS) $(>[1]) ,$(>[2-]) ,$(NEEDLIBS)/lib ,$(LINKLIBS)
+ }
+
+ actions quietly updated piecemeal together RmTemps
+ {
+ $(RM) $(>[1]);* ,$(>[2-]);*
+ }
+
+ actions Shell
+ {
+ $(CP) $(>) $(<)
+ }
+}
+
+#
+# Mac specifc actions
+#
+
+else if $(MAC)
+{
+ actions together Archive
+ {
+ $(LINK) -library -o $(<) $(>)
+ }
+
+ actions Cc
+ {
+ set -e MWCincludes $(MACINC)
+ $(CC) -o $(<) $(CCFLAGS) $(OPTIM) $(>)
+ }
+
+ actions C++
+ {
+ set -e MWCincludes $(MACINC)
+ $(CC) -o $(<) $(C++FLAGS) $(OPTIM) $(>)
+ }
+
+ actions Link bind NEEDLIBS
+ {
+ $(LINK) -o $(<) $(LINKFLAGS) $(>) $(NEEDLIBS) "$(LINKLIBS)"
+ }
+}
+
+#
+# Backwards compatibility with jam 1, where rules were uppercased.
+#
+
+rule BULK { Bulk $(<) : $(>) ; }
+rule FILE { File $(<) : $(>) ; }
+rule HDRRULE { HdrRule $(<) : $(>) ; }
+rule INSTALL { Install $(<) : $(>) ; }
+rule LIBRARY { Library $(<) : $(>) ; }
+rule LIBS { LinkLibraries $(<) : $(>) ; }
+rule LINK { Link $(<) : $(>) ; }
+rule MAIN { Main $(<) : $(>) ; }
+rule SETUID { Setuid $(<) ; }
+rule SHELL { Shell $(<) : $(>) ; }
+rule UNDEFINES { Undefines $(<) : $(>) ; }
+
+# Old INSTALL* didn't take dest directory.
+
+rule INSTALLBIN { InstallBin $(BINDIR) : $(<) ; }
+rule INSTALLLIB { InstallLib $(LIBDIR) : $(<) ; }
+rule INSTALLMAN { InstallMan $(MANDIR) : $(<) ; }
+
+# Compatibility with jam 2.2.
+
+rule addDirName { $(<) += [ FDirName $(>) ] ; }
+rule makeDirName { $(<) = [ FDirName $(>) ] ; }
+rule makeGristedName { $(<) = [ FGristSourceFiles $(>) ] ; }
+rule makeRelPath { $(<[1]) = [ FRelPath $(<[2-]) : $(>) ] ; }
+rule makeSuffixed { $(<[1]) = [ FAppendSuffix $(>) : $(<[2]) ] ; }
+
+#
+# Now include the user's Jamfile.
+#
+
+{
+ if $(JAMFILE) { include $(JAMFILE) ; }
+}
+
+}
diff --git a/src/kenlm/jam-files/engine/boost-jam.spec b/src/kenlm/jam-files/engine/boost-jam.spec
new file mode 100644
index 0000000..bc572fc
--- /dev/null
+++ b/src/kenlm/jam-files/engine/boost-jam.spec
@@ -0,0 +1,64 @@
+Name: boost-jam
+Version: 3.1.19
+Summary: Build tool
+Release: 1
+Source: %{name}-%{version}.tgz
+
+License: Boost Software License, Version 1.0
+Group: Development/Tools
+URL: http://www.boost.org
+Packager: Rene Rivera <grafik@redshift-software.com>
+BuildRoot: /var/tmp/%{name}-%{version}.root
+
+%description
+Boost Jam is a build tool based on FTJam, which in turn is based on
+Perforce Jam. It contains significant improvements made to facilitate
+its use in the Boost Build System, but should be backward compatible
+with Perforce Jam.
+
+Authors:
+ Perforce Jam : Cristopher Seiwald
+ FT Jam : David Turner
+ Boost Jam : David Abrahams
+
+Copyright:
+ /+\
+ +\ Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ \+/
+ License is hereby granted to use this software and distribute it
+ freely, as long as this copyright notice is retained and modifications
+ are clearly marked.
+ ALL WARRANTIES ARE HEREBY DISCLAIMED.
+
+Also:
+ Copyright 2001-2006 David Abrahams.
+ Copyright 2002-2006 Rene Rivera.
+ Copyright 2003-2006 Vladimir Prus.
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+%prep
+%setup -n %{name}-%{version}
+
+%build
+LOCATE_TARGET=bin ./build.sh $BOOST_JAM_TOOLSET
+
+%install
+rm -rf $RPM_BUILD_ROOT
+mkdir -p $RPM_BUILD_ROOT%{_bindir}
+mkdir -p $RPM_BUILD_ROOT%{_docdir}/%{name}-%{version}
+install -m 755 bin/bjam $RPM_BUILD_ROOT%{_bindir}/bjam-%{version}
+ln -sf bjam-%{version} $RPM_BUILD_ROOT%{_bindir}/bjam
+cp -R *.html *.png *.css LICENSE*.txt images jam $RPM_BUILD_ROOT%{_docdir}/%{name}-%{version}
+
+find $RPM_BUILD_ROOT -name CVS -type d -exec rm -r {} \;
+
+%files
+%defattr(-,root,root)
+%attr(755,root,root) /usr/bin/*
+%doc %{_docdir}/%{name}-%{version}
+
+
+%clean
+rm -rf $RPM_BUILD_ROOT
diff --git a/src/kenlm/jam-files/engine/boost-no-inspect b/src/kenlm/jam-files/engine/boost-no-inspect
new file mode 100644
index 0000000..8a06f3a
--- /dev/null
+++ b/src/kenlm/jam-files/engine/boost-no-inspect
@@ -0,0 +1 @@
+this really out of our hands, so tell inspect to ignore directory
\ No newline at end of file
diff --git a/src/kenlm/jam-files/engine/build.bat b/src/kenlm/jam-files/engine/build.bat
new file mode 100644
index 0000000..d435c4e
--- /dev/null
+++ b/src/kenlm/jam-files/engine/build.bat
@@ -0,0 +1,590 @@
+@ECHO OFF
+
+REM ~ Copyright 2002-2007 Rene Rivera.
+REM ~ Distributed under the Boost Software License, Version 1.0.
+REM ~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+setlocal
+goto Start
+
+
+:Set_Error
+color 00
+goto :eof
+
+
+:Clear_Error
+ver >nul
+goto :eof
+
+
+:Error_Print
+REM Output an error message and set the errorlevel to indicate failure.
+setlocal
+ECHO ###
+ECHO ### %1
+ECHO ###
+ECHO ### You can specify the toolset as the argument, i.e.:
+ECHO ### .\build.bat msvc
+ECHO ###
+ECHO ### Toolsets supported by this script are: borland, como, gcc, gcc-nocygwin,
+ECHO ### intel-win32, metrowerks, mingw, msvc, vc7, vc8, vc9, vc10, vc11, vc12
+ECHO ###
+call :Set_Error
+endlocal
+goto :eof
+
+
+:Test_Path
+REM Tests for the given file(executable) presence in the directories in the PATH
+REM environment variable. Additionaly sets FOUND_PATH to the path of the
+REM found file.
+call :Clear_Error
+setlocal
+set test=%~$PATH:1
+endlocal
+if not errorlevel 1 set FOUND_PATH=%~dp$PATH:1
+goto :eof
+
+
+:Test_Option
+REM Tests whether the given string is in the form of an option: "--*"
+call :Clear_Error
+setlocal
+set test=%1
+if not defined test (
+ call :Set_Error
+ goto Test_Option_End
+)
+set test=###%test%###
+set test=%test:"###=%
+set test=%test:###"=%
+set test=%test:###=%
+if not "-" == "%test:~1,1%" call :Set_Error
+:Test_Option_End
+endlocal
+goto :eof
+
+
+:Test_Empty
+REM Tests whether the given string is not empty
+call :Clear_Error
+setlocal
+set test=%1
+if not defined test (
+ call :Clear_Error
+ goto Test_Empty_End
+)
+set test=###%test%###
+set test=%test:"###=%
+set test=%test:###"=%
+set test=%test:###=%
+if not "" == "%test%" call :Set_Error
+:Test_Empty_End
+endlocal
+goto :eof
+
+
+:Call_If_Exists
+if EXIST %1 call %*
+goto :eof
+
+
+:Guess_Toolset
+REM Try and guess the toolset to bootstrap the build with...
+REM Sets BOOST_JAM_TOOLSET to the first found toolset.
+REM May also set BOOST_JAM_TOOLSET_ROOT to the
+REM location of the found toolset.
+
+call :Clear_Error
+call :Test_Empty %ProgramFiles%
+if not errorlevel 1 set ProgramFiles=C:\Program Files
+
+call :Clear_Error
+if NOT "_%VS120COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET=vc12"
+ set "BOOST_JAM_TOOLSET_ROOT=%VS120COMNTOOLS%..\..\VC\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio 12.0\VC\VCVARSALL.BAT" (
+ set "BOOST_JAM_TOOLSET=vc12"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio 12.0\VC\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%VS110COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET=vc11"
+ set "BOOST_JAM_TOOLSET_ROOT=%VS110COMNTOOLS%..\..\VC\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio 11.0\VC\VCVARSALL.BAT" (
+ set "BOOST_JAM_TOOLSET=vc11"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio 11.0\VC\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%VS100COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET=vc10"
+ set "BOOST_JAM_TOOLSET_ROOT=%VS100COMNTOOLS%..\..\VC\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio 10.0\VC\VCVARSALL.BAT" (
+ set "BOOST_JAM_TOOLSET=vc10"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio 10.0\VC\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%VS90COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET=vc9"
+ set "BOOST_JAM_TOOLSET_ROOT=%VS90COMNTOOLS%..\..\VC\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio 9.0\VC\VCVARSALL.BAT" (
+ set "BOOST_JAM_TOOLSET=vc9"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio 9.0\VC\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%VS80COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET=vc8"
+ set "BOOST_JAM_TOOLSET_ROOT=%VS80COMNTOOLS%..\..\VC\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio 8\VC\VCVARSALL.BAT" (
+ set "BOOST_JAM_TOOLSET=vc8"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio 8\VC\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%VS71COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET=vc7"
+ set "BOOST_JAM_TOOLSET_ROOT=%VS71COMNTOOLS%\..\..\VC7\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%VCINSTALLDIR%_" == "__" (
+ REM %VCINSTALLDIR% is also set for VC9 (and probably VC8)
+ set "BOOST_JAM_TOOLSET=vc7"
+ set "BOOST_JAM_TOOLSET_ROOT=%VCINSTALLDIR%\VC7\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio .NET 2003\VC7\bin\VCVARS32.BAT" (
+ set "BOOST_JAM_TOOLSET=vc7"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio .NET 2003\VC7\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio .NET\VC7\bin\VCVARS32.BAT" (
+ set "BOOST_JAM_TOOLSET=vc7"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio .NET\VC7\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%MSVCDir%_" == "__" (
+ set "BOOST_JAM_TOOLSET=msvc"
+ set "BOOST_JAM_TOOLSET_ROOT=%MSVCDir%\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual Studio\VC98\bin\VCVARS32.BAT" (
+ set "BOOST_JAM_TOOLSET=msvc"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual Studio\VC98\"
+ goto :eof)
+call :Clear_Error
+if EXIST "%ProgramFiles%\Microsoft Visual C++\VC98\bin\VCVARS32.BAT" (
+ set "BOOST_JAM_TOOLSET=msvc"
+ set "BOOST_JAM_TOOLSET_ROOT=%ProgramFiles%\Microsoft Visual C++\VC98\"
+ goto :eof)
+call :Clear_Error
+call :Test_Path cl.exe
+if not errorlevel 1 (
+ set "BOOST_JAM_TOOLSET=msvc"
+ set "BOOST_JAM_TOOLSET_ROOT=%FOUND_PATH%..\"
+ goto :eof)
+call :Clear_Error
+call :Test_Path vcvars32.bat
+if not errorlevel 1 (
+ set "BOOST_JAM_TOOLSET=msvc"
+ call "%FOUND_PATH%VCVARS32.BAT"
+ set "BOOST_JAM_TOOLSET_ROOT=%MSVCDir%\"
+ goto :eof)
+call :Clear_Error
+if EXIST "C:\Borland\BCC55\Bin\bcc32.exe" (
+ set "BOOST_JAM_TOOLSET=borland"
+ set "BOOST_JAM_TOOLSET_ROOT=C:\Borland\BCC55\"
+ goto :eof)
+call :Clear_Error
+call :Test_Path bcc32.exe
+if not errorlevel 1 (
+ set "BOOST_JAM_TOOLSET=borland"
+ set "BOOST_JAM_TOOLSET_ROOT=%FOUND_PATH%..\"
+ goto :eof)
+call :Clear_Error
+call :Test_Path icl.exe
+if not errorlevel 1 (
+ set "BOOST_JAM_TOOLSET=intel-win32"
+ set "BOOST_JAM_TOOLSET_ROOT=%FOUND_PATH%..\"
+ goto :eof)
+call :Clear_Error
+if EXIST "C:\MinGW\bin\gcc.exe" (
+ set "BOOST_JAM_TOOLSET=mingw"
+ set "BOOST_JAM_TOOLSET_ROOT=C:\MinGW\"
+ goto :eof)
+call :Clear_Error
+if NOT "_%CWFolder%_" == "__" (
+ set "BOOST_JAM_TOOLSET=metrowerks"
+ set "BOOST_JAM_TOOLSET_ROOT=%CWFolder%\"
+ goto :eof )
+call :Clear_Error
+call :Test_Path mwcc.exe
+if not errorlevel 1 (
+ set "BOOST_JAM_TOOLSET=metrowerks"
+ set "BOOST_JAM_TOOLSET_ROOT=%FOUND_PATH%..\..\"
+ goto :eof)
+call :Clear_Error
+call :Error_Print "Could not find a suitable toolset."
+goto :eof
+
+
+:Guess_Yacc
+REM Tries to find bison or yacc in common places so we can build the grammar.
+call :Clear_Error
+call :Test_Path yacc.exe
+if not errorlevel 1 (
+ set "YACC=yacc -d"
+ goto :eof)
+call :Clear_Error
+call :Test_Path bison.exe
+if not errorlevel 1 (
+ set "YACC=bison -d --yacc"
+ goto :eof)
+call :Clear_Error
+if EXIST "C:\Program Files\GnuWin32\bin\bison.exe" (
+ set "YACC=C:\Program Files\GnuWin32\bin\bison.exe" -d --yacc
+ goto :eof)
+call :Clear_Error
+call :Error_Print "Could not find Yacc to build the Jam grammar."
+goto :eof
+
+
+:Start
+set BOOST_JAM_TOOLSET=
+set BOOST_JAM_ARGS=
+
+REM If no arguments guess the toolset;
+REM or if first argument is an option guess the toolset;
+REM otherwise the argument is the toolset to use.
+call :Clear_Error
+call :Test_Empty %1
+if not errorlevel 1 (
+ call :Guess_Toolset
+ if not errorlevel 1 ( goto Setup_Toolset ) else ( goto Finish )
+)
+
+call :Clear_Error
+call :Test_Option %1
+if not errorlevel 1 (
+ call :Guess_Toolset
+ if not errorlevel 1 ( goto Setup_Toolset ) else ( goto Finish )
+)
+
+call :Clear_Error
+set BOOST_JAM_TOOLSET=%1
+shift
+goto Setup_Toolset
+
+
+:Setup_Toolset
+REM Setup the toolset command and options. This bit of code
+REM needs to be flexible enough to handle both when
+REM the toolset was guessed at and found, or when the toolset
+REM was indicated in the command arguments.
+REM NOTE: The strange multiple "if ?? == _toolset_" tests are that way
+REM because in BAT variables are subsituted only once during a single
+REM command. A complete "if ... ( commands ) else ( commands )"
+REM is a single command, even though it's in multiple lines here.
+:Setup_Args
+call :Clear_Error
+call :Test_Empty %1
+if not errorlevel 1 goto Config_Toolset
+call :Clear_Error
+call :Test_Option %1
+if errorlevel 1 (
+ set BOOST_JAM_ARGS=%BOOST_JAM_ARGS% %1
+ shift
+ goto Setup_Args
+)
+:Config_Toolset
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_metrowerks_" goto Skip_METROWERKS
+if NOT "_%CWFolder%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%CWFolder%\"
+ )
+set "PATH=%BOOST_JAM_TOOLSET_ROOT%Other Metrowerks Tools\Command Line Tools;%PATH%"
+set "BOOST_JAM_CC=mwcc -runtime ss -cwd include -DNT -lkernel32.lib -ladvapi32.lib -luser32.lib"
+set "BOOST_JAM_OPT_JAM=-o bootstrap\jam0.exe"
+set "BOOST_JAM_OPT_MKJAMBASE=-o bootstrap\mkjambase0.exe"
+set "BOOST_JAM_OPT_YYACC=-o bootstrap\yyacc0.exe"
+set "_known_=1"
+:Skip_METROWERKS
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_msvc_" goto Skip_MSVC
+if NOT "_%MSVCDir%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%MSVCDir%\"
+ )
+call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%bin\VCVARS32.BAT"
+if not "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ )
+set "BOOST_JAM_CC=cl /nologo /GZ /Zi /MLd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_MSVC
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_vc7_" goto Skip_VC7
+if NOT "_%VS71COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%VS71COMNTOOLS%..\..\VC7\"
+ )
+if "_%VCINSTALLDIR%_" == "__" call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%bin\VCVARS32.BAT"
+if NOT "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if "_%VCINSTALLDIR%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ ) )
+set "BOOST_JAM_CC=cl /nologo /GZ /Zi /MLd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_VC7
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_vc8_" goto Skip_VC8
+if NOT "_%VS80COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%VS80COMNTOOLS%..\..\VC\"
+ )
+if "_%VCINSTALLDIR%_" == "__" call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%VCVARSALL.BAT" %BOOST_JAM_ARGS%
+if NOT "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if "_%VCINSTALLDIR%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ ) )
+set "BOOST_JAM_CC=cl /nologo /RTC1 /Zi /MTd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG -wd4996 kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_VC8
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_vc9_" goto Skip_VC9
+if NOT "_%VS90COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%VS90COMNTOOLS%..\..\VC\"
+ )
+if "_%VCINSTALLDIR%_" == "__" call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%VCVARSALL.BAT" %BOOST_JAM_ARGS%
+if NOT "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if "_%VCINSTALLDIR%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ ) )
+set "BOOST_JAM_CC=cl /nologo /RTC1 /Zi /MTd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG -wd4996 kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_VC9
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_vc10_" goto Skip_VC10
+if NOT "_%VS100COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%VS100COMNTOOLS%..\..\VC\"
+ )
+if "_%VCINSTALLDIR%_" == "__" call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%VCVARSALL.BAT" %BOOST_JAM_ARGS%
+if NOT "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if "_%VCINSTALLDIR%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ ) )
+set "BOOST_JAM_CC=cl /nologo /RTC1 /Zi /MTd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG -wd4996 kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_VC10
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_vc11_" goto Skip_VC11
+if NOT "_%VS110COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%VS110COMNTOOLS%..\..\VC\"
+ )
+if "_%VCINSTALLDIR%_" == "__" call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%VCVARSALL.BAT" %BOOST_JAM_ARGS%
+if NOT "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if "_%VCINSTALLDIR%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ ) )
+set "BOOST_JAM_CC=cl /nologo /RTC1 /Zi /MTd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG -wd4996 kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_VC11
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_vc12_" goto Skip_VC12
+if NOT "_%VS120COMNTOOLS%_" == "__" (
+ set "BOOST_JAM_TOOLSET_ROOT=%VS120COMNTOOLS%..\..\VC\"
+ )
+if "_%VCINSTALLDIR%_" == "__" call :Call_If_Exists "%BOOST_JAM_TOOLSET_ROOT%VCVARSALL.BAT" %BOOST_JAM_ARGS%
+if NOT "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if "_%VCINSTALLDIR%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ ) )
+set "BOOST_JAM_CC=cl /nologo /RTC1 /Zi /MTd /Fobootstrap/ /Fdbootstrap/ -DNT -DYYDEBUG -wd4996 kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_VC12
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_borland_" goto Skip_BORLAND
+if "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ call :Test_Path bcc32.exe )
+if "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ if not errorlevel 1 (
+ set "BOOST_JAM_TOOLSET_ROOT=%FOUND_PATH%..\"
+ ) )
+if not "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%Bin;%PATH%"
+ )
+set "BOOST_JAM_CC=bcc32 -WC -w- -q -I%BOOST_JAM_TOOLSET_ROOT%Include -L%BOOST_JAM_TOOLSET_ROOT%Lib /DNT -nbootstrap"
+set "BOOST_JAM_OPT_JAM=-ejam0"
+set "BOOST_JAM_OPT_MKJAMBASE=-emkjambasejam0"
+set "BOOST_JAM_OPT_YYACC=-eyyacc0"
+set "_known_=1"
+:Skip_BORLAND
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_como_" goto Skip_COMO
+set "BOOST_JAM_CC=como -DNT"
+set "BOOST_JAM_OPT_JAM=-o bootstrap\jam0.exe"
+set "BOOST_JAM_OPT_MKJAMBASE=-o bootstrap\mkjambase0.exe"
+set "BOOST_JAM_OPT_YYACC=-o bootstrap\yyacc0.exe"
+set "_known_=1"
+:Skip_COMO
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_gcc_" goto Skip_GCC
+set "BOOST_JAM_CC=gcc -DNT"
+set "BOOST_JAM_OPT_JAM=-o bootstrap\jam0.exe"
+set "BOOST_JAM_OPT_MKJAMBASE=-o bootstrap\mkjambase0.exe"
+set "BOOST_JAM_OPT_YYACC=-o bootstrap\yyacc0.exe"
+set "_known_=1"
+:Skip_GCC
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_gcc-nocygwin_" goto Skip_GCC_NOCYGWIN
+set "BOOST_JAM_CC=gcc -DNT -mno-cygwin"
+set "BOOST_JAM_OPT_JAM=-o bootstrap\jam0.exe"
+set "BOOST_JAM_OPT_MKJAMBASE=-o bootstrap\mkjambase0.exe"
+set "BOOST_JAM_OPT_YYACC=-o bootstrap\yyacc0.exe"
+set "_known_=1"
+:Skip_GCC_NOCYGWIN
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_intel-win32_" goto Skip_INTEL_WIN32
+set "BOOST_JAM_CC=icl -DNT /nologo kernel32.lib advapi32.lib user32.lib"
+set "BOOST_JAM_OPT_JAM=/Febootstrap\jam0"
+set "BOOST_JAM_OPT_MKJAMBASE=/Febootstrap\mkjambase0"
+set "BOOST_JAM_OPT_YYACC=/Febootstrap\yyacc0"
+set "_known_=1"
+:Skip_INTEL_WIN32
+if NOT "_%BOOST_JAM_TOOLSET%_" == "_mingw_" goto Skip_MINGW
+if not "_%BOOST_JAM_TOOLSET_ROOT%_" == "__" (
+ set "PATH=%BOOST_JAM_TOOLSET_ROOT%bin;%PATH%"
+ )
+set "BOOST_JAM_CC=gcc -DNT"
+set "BOOST_JAM_OPT_JAM=-o bootstrap\jam0.exe"
+set "BOOST_JAM_OPT_MKJAMBASE=-o bootstrap\mkjambase0.exe"
+set "BOOST_JAM_OPT_YYACC=-o bootstrap\yyacc0.exe"
+set "_known_=1"
+:Skip_MINGW
+call :Clear_Error
+if "_%_known_%_" == "__" (
+ call :Error_Print "Unknown toolset: %BOOST_JAM_TOOLSET%"
+)
+if errorlevel 1 goto Finish
+
+echo ###
+echo ### Using '%BOOST_JAM_TOOLSET%' toolset.
+echo ###
+
+set YYACC_SOURCES=yyacc.c
+set MKJAMBASE_SOURCES=mkjambase.c
+set BJAM_SOURCES=
+set BJAM_SOURCES=%BJAM_SOURCES% command.c compile.c constants.c debug.c
+set BJAM_SOURCES=%BJAM_SOURCES% execcmd.c execnt.c filent.c frames.c function.c
+set BJAM_SOURCES=%BJAM_SOURCES% glob.c hash.c hdrmacro.c headers.c jam.c
+set BJAM_SOURCES=%BJAM_SOURCES% jambase.c jamgram.c lists.c make.c make1.c
+set BJAM_SOURCES=%BJAM_SOURCES% object.c option.c output.c parse.c pathnt.c
+set BJAM_SOURCES=%BJAM_SOURCES% pathsys.c regexp.c rules.c scan.c search.c
+set BJAM_SOURCES=%BJAM_SOURCES% subst.c timestamp.c variable.c modules.c
+set BJAM_SOURCES=%BJAM_SOURCES% strings.c filesys.c builtins.c md5.c class.c
+set BJAM_SOURCES=%BJAM_SOURCES% cwd.c w32_getreg.c native.c modules/set.c
+set BJAM_SOURCES=%BJAM_SOURCES% modules/path.c modules/regex.c
+set BJAM_SOURCES=%BJAM_SOURCES% modules/property-set.c modules/sequence.c
+set BJAM_SOURCES=%BJAM_SOURCES% modules/order.c
+
+set BJAM_UPDATE=
+:Check_Update
+call :Test_Empty %1
+if not errorlevel 1 goto Check_Update_End
+call :Clear_Error
+setlocal
+set test=%1
+set test=###%test%###
+set test=%test:"###=%
+set test=%test:###"=%
+set test=%test:###=%
+if "%test%" == "--update" goto Found_Update
+endlocal
+shift
+if not "_%BJAM_UPDATE%_" == "_update_" goto Check_Update
+:Found_Update
+endlocal
+set BJAM_UPDATE=update
+:Check_Update_End
+if "_%BJAM_UPDATE%_" == "_update_" (
+ if not exist ".\bootstrap\jam0.exe" (
+ set BJAM_UPDATE=
+ )
+)
+
+@echo ON
+@if "_%BJAM_UPDATE%_" == "_update_" goto Skip_Bootstrap
+if exist bootstrap rd /S /Q bootstrap
+md bootstrap
+@if not exist jamgram.y goto Bootstrap_GrammarPrep
+@if not exist jamgramtab.h goto Bootstrap_GrammarPrep
+@goto Skip_GrammarPrep
+:Bootstrap_GrammarPrep
+%BOOST_JAM_CC% %BOOST_JAM_OPT_YYACC% %YYACC_SOURCES%
+@if not exist ".\bootstrap\yyacc0.exe" goto Skip_GrammarPrep
+.\bootstrap\yyacc0 jamgram.y jamgramtab.h jamgram.yy
+:Skip_GrammarPrep
+@if not exist jamgram.c goto Bootstrap_GrammarBuild
+@if not exist jamgram.h goto Bootstrap_GrammarBuild
+@goto Skip_GrammarBuild
+:Bootstrap_GrammarBuild
+@echo OFF
+if "_%YACC%_" == "__" (
+ call :Guess_Yacc
+)
+if errorlevel 1 goto Finish
+@echo ON
+%YACC% jamgram.y
+@if errorlevel 1 goto Finish
+del /f jamgram.c
+rename y.tab.c jamgram.c
+del /f jamgram.h
+rename y.tab.h jamgram.h
+:Skip_GrammarBuild
+@echo ON
+@if exist jambase.c goto Skip_Jambase
+%BOOST_JAM_CC% %BOOST_JAM_OPT_MKJAMBASE% %MKJAMBASE_SOURCES%
+@if not exist ".\bootstrap\mkjambase0.exe" goto Skip_Jambase
+.\bootstrap\mkjambase0 jambase.c Jambase
+:Skip_Jambase
+%BOOST_JAM_CC% %BOOST_JAM_OPT_JAM% %BJAM_SOURCES%
+:Skip_Bootstrap
+@if not exist ".\bootstrap\jam0.exe" goto Skip_Jam
+@set args=%*
+@echo OFF
+:Set_Args
+setlocal
+call :Test_Empty %args%
+if not errorlevel 1 goto Set_Args_End
+set test=###%args:~0,2%###
+set test=%test:"###=%
+set test=%test:###"=%
+set test=%test:###=%
+set test=%test:~0,1%
+if "-" == "%test%" goto Set_Args_End
+endlocal
+set args=%args:~1%
+goto Set_Args
+:Set_Args_End
+@echo ON
+@if "_%BJAM_UPDATE%_" == "_update_" goto Skip_Clean
+.\bootstrap\jam0 -f build.jam --toolset=%BOOST_JAM_TOOLSET% "--toolset-root=%BOOST_JAM_TOOLSET_ROOT% " %args% clean
+:Skip_Clean
+.\bootstrap\jam0 -f build.jam --toolset=%BOOST_JAM_TOOLSET% "--toolset-root=%BOOST_JAM_TOOLSET_ROOT% " %args%
+:Skip_Jam
+
+:Finish
diff --git a/src/kenlm/jam-files/engine/build.jam b/src/kenlm/jam-files/engine/build.jam
new file mode 100644
index 0000000..f13d950
--- /dev/null
+++ b/src/kenlm/jam-files/engine/build.jam
@@ -0,0 +1,1022 @@
+#~ Copyright 2002-2007 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or copy at
+#~ http://www.boost.org/LICENSE_1_0.txt)
+
+# Clean env vars of any "extra" empty values.
+for local v in ARGV CC CFLAGS LIBS
+{
+ local values ;
+ for local x in $($(v))
+ {
+ if $(x) != ""
+ {
+ values += $(x) ;
+ }
+ }
+ $(v) = $(values) ;
+}
+
+# Platform related specifics.
+if $(OS) = NT { rule .path { return "$(<:J=\\)" ; } ./ = "/" ; }
+else { rule .path { return "$(<:J=/)" ; } }
+
+. = "." ;
+./ ?= "" ;
+
+# Info about what we are building.
+_VERSION_ = 3 1 19 ;
+NAME = boost-jam ;
+VERSION = $(_VERSION_:J=$(.)) ;
+RELEASE = 1 ;
+LICENSE = LICENSE_1_0 ;
+
+# Generate development debug binaries?
+if --debug in $(ARGV)
+{
+ debug = true ;
+}
+
+if --profile in $(ARGV)
+{
+ profile = true ;
+}
+
+# Attempt to generate and/or build the grammar?
+if --grammar in $(ARGV)
+{
+ grammar = true ;
+}
+
+# Do we need to add a default build type argument?
+if ! ( --release in $(ARGV) ) &&
+ ! ( --debug in $(ARGV) ) &&
+ ! ( --profile in $(ARGV) )
+{
+ ARGV += --release ;
+}
+
+# Enable, and configure, Python hooks.
+with-python = ;
+python-location = [ MATCH --with-python=(.*) : $(ARGV) ] ;
+if $(python-location)
+{
+ with-python = true ;
+}
+if $(with-python)
+{
+ if $(OS) = NT
+ {
+ --python-include = [ .path $(python-location) include ] ;
+ --python-lib = ;
+ for local v in 27 26 25 24 23 22
+ {
+ --python-lib ?=
+ [ GLOB [ .path $(python-location) libs ] : "python$(v).lib" ]
+ [ GLOB $(python-location) [ .path $(python-location) libs ]
+ $(Path) $(PATH) $(path) : "python$(v).dll" ]
+ ;
+ if ! $(--python-lib[2])
+ {
+ --python-lib = ;
+ }
+ }
+ --python-lib = $(--python-lib[1]) ;
+ }
+ else if $(OS) = MACOSX
+ {
+ --python-include = [ .path $(python-location) Headers ] ;
+ --python-lib = $(python-location) Python ;
+ }
+ else
+ {
+ --python-include = ;
+ --python-lib = ;
+ for local v in 2.7 2.6 2.5 2.4 2.3 2.2
+ {
+ local inc = [ GLOB [ .path $(python-location) include ] : python$(v) ] ;
+ local lib = [ GLOB [ .path $(python-location) lib ] : libpython$(v)* ] ;
+ if $(inc) && $(lib)
+ {
+ --python-include ?= $(inc) ;
+ --python-lib ?= $(lib[1]:D) python$(v) ;
+ }
+ }
+ }
+}
+
+# Boehm GC?
+if --gc in $(ARGV)
+{
+ --boehm-gc = true ;
+}
+if $(--boehm-gc)
+{
+ --extra-include += [ .path [ PWD ] "boehm_gc" "include" ] ;
+}
+
+# Duma?
+if --duma in $(ARGV)
+{
+ --duma = true ;
+}
+if $(--duma)
+{
+ --extra-include += [ .path [ PWD ] "duma" ] ;
+}
+
+# An explicit root for the toolset? (trim spaces)
+toolset-root = [ MATCH --toolset-root=(.*) : $(ARGV) ] ;
+{
+ local t = [ MATCH "[ ]*(.*)" : $(toolset-root:J=" ") ] ;
+ toolset-root = ;
+ while $(t)
+ {
+ t = [ MATCH "([^ ]+)([ ]*)(.*)" : $(t) ] ;
+ toolset-root += $(t[1]) ;
+ if $(t[3]) { toolset-root += $(t[2]) ; }
+ t = $(t[3]) ;
+ }
+ toolset-root = $(toolset-root:J="") ;
+}
+
+# Configure the implemented toolsets. These are minimal commands and options to
+# compile the full Jam. When adding new toolsets make sure to add them to the
+# "known" list also.
+
+rule toolset ( name command .type ? : opt.out + : opt.define * : flags * : linklibs * )
+{
+ .type ?= "" ;
+ tool.$(name)$(.type).cc ?= $(command) ;
+ tool.$(name)$(.type).opt.out ?= $(opt.out) ;
+ tool.$(name)$(.type).opt.define ?= $(opt.define) ;
+ tool.$(name)$(.type).flags ?= $(flags) ;
+ tool.$(name)$(.type).linklibs ?= $(linklibs) ;
+ if ! $(name) in $(toolsets) { toolsets += $(name) ; }
+}
+
+rule if-os ( os + : yes-opt * : no-opt * )
+ { if $(os) in $(OS) { return $(yes-opt) ; } else { return $(no-opt) ; } }
+
+rule opt ( type : yes-opt * : no-opt * )
+ { if $(type) in $(ARGV) { return $(yes-opt) ; } else { return $(no-opt) ; } }
+
+## HP-UX aCC compiler
+toolset acc cc : "-o " : -D
+ : -Ae
+ [ opt --release : -s -O3 ]
+ [ opt --debug : -g -pg ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Borland C++ 5.5.x
+toolset borland bcc32 : -e -n : /D
+ : -WC -w- -q "-I$(toolset-root)Include" "-L$(toolset-root)Lib"
+ [ opt --release : -O2 -vi -w-inl ]
+ [ opt --debug : -v -Od -vi- ]
+ -I$(--python-include) -I$(--extra-include)
+ : $(--python-lib[1]) ;
+## Generic Unix cc
+if ! $(CC) { CC = cc ; }
+toolset cc $(CC) : "-o " : -D
+ : $(CFLAGS)
+ [ opt --release : -s -O ]
+ [ opt --debug : -g ]
+ -I$(--python-include) -I$(--extra-include)
+ : $(LIBS) -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Comeau C/C++ 4.x
+toolset como como : "-o " : -D
+ : --c
+ [ opt --release : --inlining ]
+ [ opt --debug : --no_inlining ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Clang Linux 2.8+
+toolset clang clang : "-o " : -D
+ : -Wno-unused -Wno-format
+ [ opt --release : -Os ]
+ [ opt --debug : -g -O0 -fno-inline ]
+ [ opt --profile : -finline-functions -g ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## MacOSX Darwin, using GCC 2.9.x, 3.x
+toolset darwin cc : "-o " : -D
+ :
+ [ opt --release : -Wl,-x -O3 -finline-functions ]
+ [ opt --debug : -g -O0 -fno-inline -pg ]
+ [ opt --profile : -Wl,-x -O3 -finline-functions -g -pg ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## GCC 2.x, 3.x, 4.x
+toolset gcc gcc : "-o " : -D
+ : -pedantic -fno-strict-aliasing
+ [ opt --release : [ opt --symbols : -g : -s ] -O3 ]
+ [ opt --debug : -g -O0 -fno-inline ]
+ [ opt --profile : -O3 -g -pg ]
+ -I$(--python-include) -I$(--extra-include) -Wno-long-long
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## GCC 2.x, 3.x on CYGWIN but without cygwin1.dll
+toolset gcc-nocygwin gcc : "-o " : -D
+ : -s -O3 -mno-cygwin
+ [ opt --release : -finline-functions ]
+ [ opt --debug : -s -O3 -fno-inline -pg ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Intel C/C++ for Darwin
+toolset intel-darwin icc : "-o " : -D
+ :
+ [ opt --release : -O3 ]
+ [ opt --debug : -g -O0 -p ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Intel C/C++ for Linux
+toolset intel-linux icc : "-o " : -D
+ :
+ [ opt --release : -Xlinker -s -O3 ]
+ [ opt --debug : -g -O0 -p ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Intel C/C++ for Win32
+toolset intel-win32 icl : /Fe : -D
+ : /nologo
+ [ opt --release : /MT /O2 /Ob2 /Gy /GF /GA /GB ]
+ [ opt --debug : /MTd /DEBUG /Z7 /Od /Ob0 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+## KCC ?
+toolset kcc KCC : "-o " : -D
+ :
+ [ opt --release : -s +K2 ]
+ [ opt --debug : -g +K0 ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Borland Kylix
+toolset kylix bc++ : -o : -D
+ : -tC -q
+ [ opt --release : -O2 -vi -w-inl ]
+ [ opt --debug : -v -Od -vi- ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Metrowerks CodeWarrior 8.x
+{
+ # Even though CW can compile all files at once, it crashes if it tries in
+ # the bjam case.
+ local mwcc ; if $(OS) != NT { mwcc = mwc$(OSPLAT:L) ; }
+ mwcc ?= mwcc ;
+ toolset metrowerks $(mwcc) : "-o " : -D
+ : -c -lang c -subsystem console -cwd include
+ [ opt --release : -runtime ss -opt full -inline all ]
+ [ opt --debug : -runtime ssd -opt none -inline off ]
+ -I$(--python-include) -I$(--extra-include) ;
+ toolset metrowerks $(mwcc) .link : "-o " :
+ : -subsystem console -lkernel32.lib -ladvapi32.lib -luser32.lib
+ [ opt --release : -runtime ss ]
+ [ opt --debug : -runtime ssd ]
+ : $(--python-lib[1]) ;
+}
+## MINGW GCC
+toolset mingw gcc : "-o " : -D
+ :
+ [ opt --release : -s -O3 -finline-functions ]
+ [ opt --debug : -g -O0 -fno-inline -pg ]
+ -I$(--python-include) -I$(--extra-include)
+ : $(--python-lib[2]) ;
+## MIPS Pro
+toolset mipspro cc : "-o " : -D
+ :
+ [ opt --release : -s -O3 -g0 -INLINE:none ]
+ [ opt --debug : -g -O0 -INLINE ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Microsoft Visual Studio C++ 6.x
+toolset msvc cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /ML /O2 /Ob2 /Gy /GF /GA /GB ]
+ [ opt --debug : /MLd /DEBUG /Z7 /Od /Ob0 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+## QNX 6.x GCC 3.x/2.95.3
+toolset qcc qcc : "-o " : -D
+ : -Wc,-pedantic -Wc,-fno-strict-aliasing
+ [ opt --release : [ opt --symbols : -g ] -O3 -Wc,-finline-functions ]
+ [ opt --debug : -g -O0 -Wc,-fno-inline ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Qlogic Pathscale 2.4
+toolset pathscale pathcc : "-o " : -D
+ :
+ [ opt --release : -s -Ofast -O3 ]
+ [ opt --debug : -g ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Portland Group Pgi 6.2
+toolset pgi pgcc : "-o " : -D
+ :
+ [ opt --release : -s -O3 ]
+ [ opt --debug : -g ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Sun Workshop 6 C++
+toolset sun cc : "-o " : -D
+ :
+ [ opt --release : -s -xO3 ]
+ [ opt --debug : -g ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Sun Workshop 6 C++ (old alias)
+toolset sunpro cc : "-o " : -D
+ :
+ [ opt --release : -s -xO3 ]
+ [ opt --debug : -g ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## Compaq Alpha CXX
+toolset tru64cxx cc : "-o " : -D
+ :
+ [ opt --release : -s -O5 -inline speed ]
+ [ opt --debug : -g -O0 -pg ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) ;
+## IBM VisualAge C++
+toolset vacpp xlc : "-o " : -D
+ :
+ [ opt --release : -s -O3 -qstrict -qinline ]
+ [ opt --debug : -g -qNOOPTimize -qnoinline -pg ]
+ -I$(--python-include) -I$(--extra-include)
+ : -L$(--python-lib[1]) -l$(--python-lib[2]) [ if-os AIX : -bmaxdata:0x40000000 ] ;
+## Microsoft Visual C++ .NET 7.x
+toolset vc7 cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /ML /O2 /Ob2 /Gy /GF /GA /GB ]
+ [ opt --debug : /MLd /DEBUG /Z7 /Od /Ob0 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+## Microsoft Visual C++ 2005
+toolset vc8 cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /MT /O2 /Ob2 /Gy /GF /GA /wd4996 ]
+ [ opt --debug : /MTd /DEBUG /Z7 /Od /Ob0 /wd4996 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+## Microsoft Visual C++ 2008
+toolset vc9 cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /MT /O2 /Ob2 /Gy /GF /GA /wd4996 ]
+ [ opt --debug : /MTd /DEBUG /Z7 /Od /Ob0 /wd4996 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+## Microsoft Visual C++ 2010
+toolset vc10 cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /MT /O2 /Ob2 /Gy /GF /GA /wd4996 ]
+ [ opt --debug : /MTd /DEBUG /Z7 /Od /Ob0 /wd4996 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+toolset vc11 cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /GL /MT /O2 /Ob2 /Gy /GF /GA /wd4996 ]
+ [ opt --debug : /MTd /DEBUG /Z7 /Od /Ob0 /wd4996 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+toolset vc12 cl : /Fe /Fe /Fd /Fo : -D
+ : /nologo
+ [ opt --release : /GL /MT /O2 /Ob2 /Gy /GF /GA /wd4996 ]
+ [ opt --debug : /MTd /DEBUG /Z7 /Od /Ob0 /wd4996 ]
+ -I$(--python-include) -I$(--extra-include)
+ : kernel32.lib advapi32.lib user32.lib $(--python-lib[1]) ;
+
+# First set the build commands and options according to the
+# preset toolset.
+toolset = [ MATCH --toolset=(.*) : $(ARGV) ] ;
+if ! $(toolset)
+{
+ # For some reason, the following test does not catch empty toolset.
+ ECHO "###" ;
+ ECHO "###" No toolset specified. Please use --toolset option. ;
+ ECHO "###" ;
+ ECHO "###" Known toolsets are: $(toolsets:J=", ") ;
+ EXIT "###" ;
+}
+if ! $(toolset) in $(toolsets)
+{
+ ECHO "###" ;
+ ECHO "###" Unknown toolset: $(toolset) ;
+ ECHO "###" ;
+ ECHO "###" Known toolsets are: $(toolsets:J=", ") ;
+ EXIT "###" ;
+}
+--cc = $(tool.$(toolset).cc) ;
+if $(tool.$(toolset).opt.out[2])
+{
+ if $(tool.$(toolset).opt.out[1]) = $(tool.$(toolset).opt.out[2])
+ {
+ --out = $(tool.$(toolset).opt.out[1]) ;
+ --dir = $(tool.$(toolset).opt.out[3-]) ;
+ }
+ else
+ {
+ --bin = $(tool.$(toolset).opt.out[1]) ;
+ --dir = $(tool.$(toolset).opt.out[2-]) ;
+ }
+}
+else
+{
+ --out = $(tool.$(toolset).opt.out) ;
+}
+--def = $(tool.$(toolset).opt.define) ;
+--flags = $(tool.$(toolset).flags) ;
+--defs = $(tool.$(toolset).defines) ;
+--libs = $(tool.$(toolset).linklibs) ;
+if $(tool.$(toolset).link.cc)
+{
+ --link = $(tool.$(toolset).link.cc) ;
+ if $(tool.$(toolset).link.opt.out[2])
+ {
+ if $(tool.$(toolset).link.opt.out[1]) = $(tool.$(toolset).link.opt.out[2])
+ {
+ --link-out = $(tool.$(toolset).link.opt.out[1]) ;
+ --link-dir = $(tool.$(toolset).link.opt.out[3-]) ;
+ }
+ else
+ {
+ --link-bin = $(tool.$(toolset).link.opt.out[1]) ;
+ --link-dir = $(tool.$(toolset).link.opt.out[2-]) ;
+ }
+ }
+ else
+ {
+ --link-out = $(tool.$(toolset).link.opt.out) ;
+ }
+ --link-def = $(tool.$(toolset).link.opt.define) ;
+ --link-flags = $(tool.$(toolset).link.flags) ;
+ --link-defs = $(tool.$(toolset).link.defines) ;
+ --link-libs = $(tool.$(toolset).link.linklibs) ;
+}
+
+# Put executables in platform-specific subdirectory.
+locate-target = $(LOCATE_TARGET) ;
+if $(OSPLAT)
+{
+ locate-target ?= bin$(.)$(OS:L)$(OSPLAT:L) ;
+ platform = $(OS:L)$(OSPLAT:L) ;
+}
+else
+{
+ locate-target ?= bin$(.)$(OS:L) ;
+ platform = $(OS:L) ;
+}
+if $(debug)
+{
+ locate-target = [ .path $(locate-target)$(.)debug ] ;
+}
+if $(profile)
+{
+ locate-target = [ .path $(locate-target)$(.)profile ] ;
+}
+else
+{
+ locate-target = [ .path $(locate-target) ] ;
+}
+
+if --show-locate-target in $(ARGV)
+{
+ ECHO $(locate-target) ;
+}
+
+# We have some different files for UNIX, and NT.
+jam.source =
+ command.c compile.c constants.c debug.c execcmd.c frames.c function.c glob.c
+ hash.c hcache.c headers.c hdrmacro.c jam.c jambase.c jamgram.c lists.c
+ make.c make1.c mem.c object.c option.c output.c parse.c pathsys.c regexp.c
+ rules.c scan.c search.c subst.c w32_getreg.c timestamp.c variable.c
+ modules.c strings.c filesys.c builtins.c class.c cwd.c native.c md5.c
+ [ .path modules set.c ] [ .path modules path.c ] [ .path modules regex.c ]
+ [ .path modules property-set.c ] [ .path modules sequence.c ] [ .path modules order.c ] ;
+if $(OS) = NT
+{
+ jam.source += execnt.c filent.c pathnt.c ;
+}
+else
+{
+ jam.source += execunix.c fileunix.c pathunix.c ;
+}
+
+# Debug assertions, or not.
+if ! $(debug) || --noassert in $(ARGV)
+{
+ --defs += NDEBUG ;
+}
+
+# Enable some optional features.
+--defs += OPT_HEADER_CACHE_EXT ;
+--defs += OPT_GRAPH_DEBUG_EXT ;
+--defs += OPT_SEMAPHORE ;
+--defs += OPT_AT_FILES ;
+--defs += OPT_DEBUG_PROFILE ;
+
+# Bug fixes
+--defs += OPT_FIX_TARGET_VARIABLES_EXT ;
+#~ --defs += OPT_NO_EXTERNAL_VARIABLE_SPLIT ;
+
+# Improvements
+--defs += OPT_IMPROVED_PATIENCE_EXT ;
+
+# Use Boehm GC memory allocator?
+if $(--boehm-gc)
+{
+ --defs += OPT_BOEHM_GC ;
+ if $(debug)
+ {
+ --defs += GC_DEBUG ;
+ }
+}
+
+if $(--duma)
+{
+ --defs += OPT_DUMA ;
+}
+
+if ( $(OS) = NT ) && ! NT in $(--defs)
+{
+ --defs += NT ;
+}
+--defs += YYSTACKSIZE=5000 ;
+
+if $(with-python)
+{
+ --defs += HAVE_PYTHON ;
+}
+
+if $(debug)
+{
+ --defs += BJAM_NEWSTR_NO_ALLOCATE ;
+}
+
+
+# The basic symbolic targets...
+NOTFILE all clean dist ;
+ALWAYS clean ;
+
+# Utility rules and actions...
+rule .clean
+{
+ [DELETE] clean : $(<) ;
+}
+if $(OS) = NT { actions piecemeal together existing [DELETE] {
+ del /F /Q "$(>)"
+} }
+if $(UNIX) = true { actions piecemeal together existing [DELETE] {
+ rm -f "$(>)"
+} }
+if $(OS) = NT {
+ --chmod+w = "attrib -r " ;
+}
+if $(UNIX) = true {
+ --chmod+w = "chmod +w " ;
+}
+
+rule .mkdir
+{
+ NOUPDATE $(<) ;
+ if $(<:P) { DEPENDS $(<) : $(<:P) ; .mkdir $(<:P) ; }
+ if ! $(md<$(<)>) { [MKDIR] $(<) ; md<$(<)> = - ; }
+}
+if $(OS) = NT { actions [MKDIR] {
+ md "$(<)"
+} }
+if $(UNIX) = true { actions [MKDIR] {
+ mkdir "$(<)"
+} }
+
+rule .exe
+{
+ local exe = $(<) ;
+ if $(OS) = NT || ( $(UNIX) = true && $(OS) = CYGWIN ) { exe = $(exe:S=.exe) ; }
+ LOCATE on $(exe) = $(locate-target) ;
+ DEPENDS all : $(exe) ;
+ .mkdir $(locate-target) ;
+ if $(--link)
+ {
+ local objs ;
+ for local s in $(>)
+ {
+ # Translate any subdir elements into a simple file name.
+ local o = [ MATCH "([^/]+)[/]?(.+)" : $(s) ] ;
+ o = $(o:J=_) ;
+ o = $(o:S=.o) ;
+ objs += $(o) ;
+ LOCATE on $(o) = $(locate-target) ;
+ DEPENDS $(exe) : $(o) ;
+ DEPENDS $(o) : $(s) ;
+ DEPENDS $(o) : $(locate-target) ;
+ [COMPILE] $(o) : $(s) ;
+ .clean $(o) ;
+ }
+ DEPENDS $(exe) : $(objs) ;
+ DEPENDS $(exe) : $(locate-target) ;
+ [COMPILE.LINK] $(exe) : $(objs) ;
+ .clean $(exe) ;
+ }
+ else
+ {
+ DEPENDS $(exe) : $(>) ;
+ DEPENDS $(exe) : $(locate-target) ;
+ [COMPILE] $(exe) : $(>) ;
+ .clean $(exe) ;
+ }
+ return $(exe) ;
+}
+if ! $(--def[2]) { actions [COMPILE] {
+ "$(--cc)" "$(--bin)$(<:D=)" "$(--dir)$(<:D)$(./)" $(--out)$(<) "$(--def)$(--defs)" "$(--flags)" "$(>)" "$(--libs)"
+} }
+else { actions [COMPILE] {
+ "$(--cc)" "$(--bin)$(<:D=)" "$(--dir)$(<:D)$(./)" $(--out)$(<) "$(--def[1])$(--defs:J=$(--def[2]))$(--def[3])" "$(--flags)" "$(>)" "$(--libs)"
+} }
+
+actions [COMPILE.LINK] {
+ "$(--link)" "$(--link-bin)$(<:D=)" "$(--link-dir)$(<:D)$(./)" "$(--link-out)$(<)" "$(--link-def)$(--link-defs)" "$(--link-flags)" "$(>)" "$(--link-libs)"
+}
+
+rule .link
+{
+ DEPENDS all : $(<) ;
+ DEPENDS $(<) : $(>) ;
+ [LINK] $(<) : $(>) ;
+ .clean $(<) ;
+}
+if $(OS) = NT { actions [LINK] {
+ copy "$(>)" "$(<)"
+} }
+if $(UNIX) = true { actions [LINK] {
+ ln -fs "$(>)" "$(<)"
+} }
+
+rule .copy
+{
+ DEPENDS all : $(<) ;
+ DEPENDS $(<) : $(>) ;
+ [COPY] $(<) : $(>) ;
+ .clean $(<) ;
+}
+
+# Will be redefined later.
+actions [COPY]
+{
+}
+
+
+rule .move
+{
+ DEPENDS $(<) : $(>) ;
+ [MOVE] $(<) : $(>) ;
+}
+if $(OS) = NT { actions [MOVE] {
+ del /f "$(<)"
+ rename "$(>)" "$(<)"
+} }
+if $(UNIX) = true { actions [MOVE] {
+ mv -f "$(>)" "$(<)"
+} }
+
+# Generate the grammar tokens table, and the real yacc grammar.
+rule .yyacc
+{
+ local exe = [ .exe yyacc : yyacc.c ] ;
+ NOUPDATE $(exe) ;
+ DEPENDS $(<) : $(exe) $(>) ;
+ LEAVES $(<) ;
+ yyacc.exe on $(<) = $(exe:R=$(locate-target)) ;
+ [YYACC] $(<) : $(>) ;
+}
+actions [YYACC] {
+ $(--chmod+w)$(<[1])
+ $(--chmod+w)$(<[2])
+ "$(yyacc.exe)" "$(<)" "$(>)"
+}
+if $(grammar)
+{
+ .yyacc jamgram.y jamgramtab.h : jamgram.yy ;
+}
+else if $(debug)
+{
+ .exe yyacc : yyacc.c ;
+}
+
+# How to build the grammar.
+if $(OS) = NT
+{
+ SUFEXE = .exe ;
+ # try some other likely spellings...
+ PATH ?= $(Path) ;
+ PATH ?= $(path) ;
+}
+SUFEXE ?= "" ;
+
+yacc ?= [ GLOB $(PATH) : yacc$(SUFEXE) ] ;
+yacc ?= [ GLOB $(PATH) : bison$(SUFEXE) ] ;
+yacc ?= [ GLOB "$(ProgramFiles:J= )\\GnuWin32\\bin"
+ "C:\\Program Files\\GnuWin32\\bin" : bison$(SUFEXE) ] ;
+yacc = $(yacc[1]) ;
+switch $(yacc:D=:S=)
+{
+ case bison : yacc += -d --yacc ;
+ case yacc : yacc += -d ;
+}
+if $(debug) && $(yacc)
+{
+ yacc += -t -v ;
+}
+yacc += $(YACCFLAGS) ;
+
+rule .yacc
+{
+ DEPENDS $(<) : $(>) ;
+ LEAVES $(<) ;
+ [YACC] $(<) : $(>) ;
+}
+if $(OS) = NT { actions [YACC] {
+ "$(yacc)" "$(>)"
+ if not errorlevel 1 (
+ del /f "$(<[1])"
+ rename y.tab$(<[1]:S) "$(<[1])"
+ del /f $(<[2])
+ rename y.tab$(<[2]:S) "$(<[2])"
+ ) else set _error_ =
+} }
+if $(UNIX) = true { actions [YACC] {
+ if ` "$(yacc)" "$(>)" ` ; then
+ mv -f y.tab$(<[1]:S) "$(<[1])"
+ mv -f y.tab$(<[2]:S) "$(<[2])"
+ else
+ exit 1
+ fi
+} }
+if $(grammar) && ! $(yacc)
+{
+ EXIT Could not find the 'yacc' tool, and therefore can not build the
+ grammar. ;
+}
+if $(grammar) && $(yacc)
+{
+ .yacc jamgram.c jamgram.h : jamgram.y ;
+}
+
+# How to build the compiled in jambase.
+rule .mkjambase
+{
+ local exe = [ .exe mkjambase : mkjambase.c ] ;
+ DEPENDS $(<) : $(exe) $(>) ;
+ LEAVES $(<) ;
+ mkjambase.exe on $(<) = $(exe:R=$(locate-target)) ;
+ [MKJAMBASE] $(<) : $(>) ;
+}
+actions [MKJAMBASE] {
+ $(--chmod+w)$(<)
+ $(mkjambase.exe) "$(<)" "$(>)"
+}
+if $(debug)
+{
+ .mkjambase jambase.c : Jambase ;
+}
+
+# How to build Jam.
+rule .jam
+{
+ $(>).exe = [ .exe $(>) : $(jam.source) ] ;
+ DEPENDS all : $($(>).exe) ;
+
+ # Make a copy under the old name.
+ $(<).exe = $(<:S=$($(>).exe:S)) ;
+ LOCATE on $($(<).exe) = $(locate-target) ;
+ .copy $($(<).exe) : $($(>).exe) ;
+ DEPENDS all : $($(<).exe) ;
+}
+.jam bjam : b2 ;
+
+
+# Scan sources for header dependencies.
+#
+# In order to keep things simple, we made a slight compromise here - we only
+# detect changes in headers included relative to the current folder as opposed
+# to those included from somewhere on the include path.
+rule .scan ( targets + )
+{
+ HDRRULE on $(targets) = .hdr.scan ;
+ HDRSCAN on $(targets) = "^[ \t]*#[ \t]*include[ \t]*\"([^\"]*)\".*$" ;
+}
+rule .hdr.scan ( target : includes * : binding )
+{
+ local target-path = [ NORMALIZE_PATH $(binding:D) ] ;
+ # Extra grist provides target name uniqueness when referencing same name
+ # header files from different folders.
+ local include-targets = <$(target-path)>$(includes) ;
+ NOCARE $(include-targets) ;
+ INCLUDES $(target) : $(include-targets) ;
+ SEARCH on $(include-targets) = $(target-path) ;
+ ISFILE $(include-targets) ;
+ .scan $(include-targets) ;
+}
+.scan $(jam.source) ;
+
+
+# Distribution making from here on out. Assumes that the docs are already built
+# as HTML at ../doc/html. Otherwise they will not be included in the built
+# distribution archive.
+dist.license =
+ [ GLOB . : $(LICENSE).txt ]
+ ;
+dist.license = $(dist.license:D=)
+ [ GLOB [ .path .. .. .. ] : $(LICENSE).txt ]
+ [ GLOB [ .path .. boost ] : $(LICENSE).txt ] ;
+dist.docs =
+ [ GLOB . : *.png *.css *.html ]
+ ;
+dist.docs = $(dist.docs:D=)
+ [ GLOB [ .path images ] : *.png ]
+ [ GLOB [ .path jam ] : *.html ]
+ ;
+dist.source =
+ [ GLOB . : *.c *.h ]
+ ;
+dist.source = $(dist.source:D=)
+ $(dist.license[1])
+ $(dist.docs)
+ build.jam build.bat build.sh
+ Jambase
+ jamgram.y jamgram.yy
+ [ .path modules set.c ]
+ [ .path modules path.c ]
+ [ .path modules regex.c ]
+ [ .path modules property-set.c ]
+ [ .path modules sequence.c ]
+ [ .path modules order.c ]
+ [ GLOB [ .path boehm_gc ] : * ]
+ [ GLOB [ .path boehm_gc include ] : * ]
+ [ GLOB [ .path boehm_gc include private ] : * ]
+ [ GLOB [ .path boehm_gc cord ] : * ]
+ [ GLOB [ .path boehm_gc Mac_files ] : * ]
+ [ GLOB [ .path boehm_gc tests ] : * ]
+ [ GLOB [ .path boehm_gc doc ] : * ]
+ ;
+dist.bin =
+ bjam
+ ;
+dist.bin =
+ $(dist.license[1])
+ $(dist.bin:S=$(bjam.exe:S))
+ ;
+
+if $(OS) = NT
+{
+ zip ?= [ GLOB "$(ProgramFiles:J= )\\7-ZIP" "C:\\Program Files\\7-ZIP" : "7z.exe" ] ;
+ zip ?= [ GLOB "$(ProgramFiles:J= )\\7-ZIP" "C:\\Program Files\\7-ZIP" : "7zn.exe" ] ;
+ zip ?= [ GLOB $(PATH) : zip.exe ] ;
+ zip ?= zip ;
+ zip = $(zip[1]) ;
+ switch $(zip:D=:S=)
+ {
+ case 7z* : zip += a -r -tzip -mx=9 ;
+ case zip : zip += -9r ;
+ }
+ actions piecemeal [PACK] {
+ "$(zip)" "$(<)" "$(>)"
+ }
+ actions piecemeal [ZIP] {
+ "$(zip)" "$(<)" "$(>)"
+ }
+ actions piecemeal [COPY] {
+ copy /Y "$(>)" "$(<)" >NUL:
+ }
+}
+if $(UNIX) = true
+{
+ tar ?= [ GLOB $(PATH) : star bsdtar tar ] ;
+ tar = $(tar[1]) ;
+ switch $(tar:D=:S=)
+ {
+ case star : tar += -c artype=pax -D -d -to-stdout ;
+ case * : tar += -c -f - ;
+ }
+ actions [PACK] {
+ "$(tar)" "$(>)" | gzip -c9 > "$(<)"
+ }
+ #~ actions [PACK] {
+ #~ tar cf "$(<:S=.tar)" "$(>)"
+ #~ }
+ actions [ZIP] {
+ gzip -c9 "$(>)" > "$(<)"
+ }
+ actions [COPY] {
+ cp -Rpf "$(>)" "$(<)"
+ }
+}
+
+# The single binary, compressed.
+rule .binary
+{
+ local zip ;
+ if $(OS) = NT { zip = $($(<).exe:S=.zip) ; }
+ if $(UNIX) = true { zip = $($(<).exe:S=.tgz) ; }
+ zip = $(zip:S=)-$(VERSION)-$(RELEASE)-$(platform)$(zip:S) ;
+ DEPENDS $(zip) : $($(<).exe) ;
+ DEPENDS dist : $(zip) ;
+ #~ LOCATE on $(zip) = $(locate-target) ;
+ if $(OS) = NT { [ZIP] $(zip) : $($(<).exe) ; }
+ if $(UNIX) = true { [PACK] $(zip) : $($(<).exe) ; }
+ .clean $(zip) ;
+}
+
+# Package some file.
+rule .package ( dst-dir : src-files + )
+{
+ local dst-files ;
+ local src-files-actual ;
+ for local src-path in $(src-files)
+ {
+ if ! [ GLOB $(src-path:P) : $(src-path:B) ] || [ CHECK_IF_FILE $(src-path) ]
+ {
+ local src-subdir = $(src-path:D) ;
+ local src-file = $(src-path) ;
+ while $(src-subdir:D) { src-subdir = $(src-subdir:D) ; }
+ if $(src-subdir) = ".."
+ {
+ src-file = $(src-file:D=) ;
+ }
+ dst-files += $(src-file:R=$(dst-dir)) ;
+ src-files-actual += $(src-path) ;
+ }
+ }
+
+ local pack ;
+ if $(OS) = NT { pack = $(dst-dir).zip ; }
+ if $(UNIX) = true { pack = $(dst-dir).tgz ; }
+
+ DEPENDS dist : $(pack) ;
+ DEPENDS $(pack) : $(dst-files) ;
+
+ local dst-files-queue = $(dst-files) ;
+ for local src-path in $(src-files-actual)
+ {
+ local dst-file = $(dst-files-queue[1]) ;
+ dst-files-queue = $(dst-files-queue[2-]) ;
+ DEPENDS $(dst-file) : $(src-path) $(dst-file:D) ;
+ .mkdir $(dst-file:D) ;
+
+ [COPY] $(dst-file) : $(src-path) ;
+ .clean $(dst-file) ;
+ }
+
+ [PACK] $(pack) : $(dst-files) ;
+ .clean $(pack) ;
+}
+
+# RPM distro file.
+rpm-tool = [ GLOB $(PATH) : "rpmbuild" ] ;
+rpm-tool ?= [ GLOB $(PATH) : "rpm" ] ;
+rpm-tool = $(rpm-tool[1]) ;
+rule .rpm ( name : source )
+{
+ local rpm-arch ;
+ switch $(OSPLAT)
+ {
+ case X86 : rpm-arch ?= i386 ;
+ case PPC : rpm-arch ?= ppc ;
+ case AXP : rpm-arch ?= alpha ;
+ # no guaranty for these:
+ case IA64 : rpm-arch ?= ia64 ;
+ case ARM : rpm-arch ?= arm ;
+ case SPARC : rpm-arch ?= sparc ;
+ case * : rpm-arch ?= other ;
+ }
+ local target = $(name)-rpm ;
+ NOTFILE $(target) ;
+ DEPENDS dist : $(target) ;
+ DEPENDS $(target) : $(name).$(rpm-arch).rpm $(name).src.rpm ;
+ DEPENDS $(name).$(rpm-arch).rpm : $(source) ;
+ DEPENDS $(name).src.rpm : $(name).$(rpm-arch).rpm ;
+ docs on $(target) = $(dist.docs:J=" ") ;
+ arch on $(target) = $(rpm-arch) ;
+ if $(rpm-arch) = ppc { target-opt on $(target) = --target= ; }
+ else { target-opt on $(target) = "--target " ; }
+ [RPM] $(target) : $(source) ;
+ .clean $(name).$(rpm-arch).rpm $(name).src.rpm ;
+}
+actions [RPM] {
+ set -e
+ export BOOST_JAM_TOOLSET="$(toolset)"
+ $(rpm-tool) -ta $(target-opt)$(arch) $(>) | tee rpm.out
+ cp `grep -e '^Wrote:' rpm.out | sed 's/^Wrote: //'` .
+ rm -f rpm.out
+}
+
+# The distribution targets. Do not bother with them unless this is a
+# distribution build.
+if dist in $(ARGV)
+{
+ #~ .binary bjam ;
+ .package $(NAME)-$(VERSION) : $(dist.source) ;
+ .package $(NAME)-$(VERSION)-$(RELEASE)-$(platform) : $(dist.bin) ;
+ if $(rpm-tool)
+ {
+ #~ .rpm $(NAME)-$(VERSION)-$(RELEASE) : $(NAME)-$(VERSION).tgz ;
+ }
+}
diff --git a/src/kenlm/jam-files/engine/build.sh b/src/kenlm/jam-files/engine/build.sh
new file mode 100755
index 0000000..470ea3c
--- /dev/null
+++ b/src/kenlm/jam-files/engine/build.sh
@@ -0,0 +1,303 @@
+#!/bin/sh
+
+#~ Copyright 2002-2005 Rene Rivera.
+#~ Distributed under the Boost Software License, Version 1.0.
+#~ (See accompanying file LICENSE_1_0.txt or copy at
+#~ http://www.boost.org/LICENSE_1_0.txt)
+
+# Reset the toolset.
+BOOST_JAM_TOOLSET=
+
+# Run a command, and echo before doing so. Also checks the exit status and quits
+# if there was an error.
+echo_run ()
+{
+ echo "$@"
+ $@
+ r=$?
+ if test $r -ne 0 ; then
+ exit $r
+ fi
+}
+
+# Print an error message, and exit with a status of 1.
+error_exit ()
+{
+ echo "###"
+ echo "###" "$@"
+ echo "###"
+ echo "### You can specify the toolset as the argument, i.e.:"
+ echo "### ./build.sh gcc"
+ echo "###"
+ echo "### Toolsets supported by this script are:"
+ echo "### acc, como, darwin, gcc, intel-darwin, intel-linux, kcc, kylix,"
+ echo "### mipspro, mingw(msys), pathscale, pgi, qcc, sun, sunpro, tru64cxx, vacpp"
+ echo "###"
+ echo "### A special toolset; cc, is available which is used as a fallback"
+ echo "### when a more specific toolset is not found and the cc command is"
+ echo "### detected. The 'cc' toolset will use the CC, CFLAGS, and LIBS"
+ echo "### environment variables, if present."
+ echo "###"
+ exit 1
+}
+
+# Check that a command is in the PATH.
+test_path ()
+{
+ if `command -v command 1>/dev/null 2>/dev/null`; then
+ command -v $1 1>/dev/null 2>/dev/null
+ else
+ hash $1 1>/dev/null 2>/dev/null
+ fi
+}
+
+# Check that the OS name, as returned by "uname", is as given.
+test_uname ()
+{
+ if test_path uname; then
+ test `uname` = $*
+ fi
+}
+
+# Try and guess the toolset to bootstrap the build with...
+Guess_Toolset ()
+{
+ if test -r /mingw/bin/gcc ; then
+ BOOST_JAM_TOOLSET=mingw
+ BOOST_JAM_TOOLSET_ROOT=/mingw/
+ elif test_uname Darwin ; then BOOST_JAM_TOOLSET=darwin
+ elif test_uname IRIX ; then BOOST_JAM_TOOLSET=mipspro
+ elif test_uname IRIX64 ; then BOOST_JAM_TOOLSET=mipspro
+ elif test_uname OSF1 ; then BOOST_JAM_TOOLSET=tru64cxx
+ elif test_uname QNX && test_path qcc ; then BOOST_JAM_TOOLSET=qcc
+ elif test_path gcc ; then BOOST_JAM_TOOLSET=gcc
+ elif test_path icc ; then BOOST_JAM_TOOLSET=intel-linux
+ elif test -r /opt/intel/cc/9.0/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET=intel-linux
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/cc/9.0
+ elif test -r /opt/intel_cc_80/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET=intel-linux
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel_cc_80
+ elif test -r /opt/intel/compiler70/ia32/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET=intel-linux
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler70/ia32/
+ elif test -r /opt/intel/compiler60/ia32/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET=intel-linux
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler60/ia32/
+ elif test -r /opt/intel/compiler50/ia32/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET=intel-linux
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler50/ia32/
+ elif test_path pgcc ; then BOOST_JAM_TOOLSET=pgi
+ elif test_path pathcc ; then BOOST_JAM_TOOLSET=pathscale
+ elif test_path xlc ; then BOOST_JAM_TOOLSET=vacpp
+ elif test_path como ; then BOOST_JAM_TOOLSET=como
+ elif test_path KCC ; then BOOST_JAM_TOOLSET=kcc
+ elif test_path bc++ ; then BOOST_JAM_TOOLSET=kylix
+ elif test_path aCC ; then BOOST_JAM_TOOLSET=acc
+ elif test_uname HP-UX ; then BOOST_JAM_TOOLSET=acc
+ elif test -r /opt/SUNWspro/bin/cc ; then
+ BOOST_JAM_TOOLSET=sunpro
+ BOOST_JAM_TOOLSET_ROOT=/opt/SUNWspro/
+ # Test for "cc" as the default fallback.
+ elif test_path $CC ; then BOOST_JAM_TOOLSET=cc
+ elif test_path cc ; then
+ BOOST_JAM_TOOLSET=cc
+ CC=cc
+ fi
+ if test "$BOOST_JAM_TOOLSET" = "" ; then
+ error_exit "Could not find a suitable toolset."
+ fi
+}
+
+# The one option we support in the invocation
+# is the name of the toolset to force building
+# with.
+case "$1" in
+ --guess-toolset) Guess_Toolset ; echo "$BOOST_JAM_TOOLSET" ; exit 1 ;;
+ -*) Guess_Toolset ;;
+ ?*) BOOST_JAM_TOOLSET=$1 ; shift ;;
+ *) Guess_Toolset ;;
+esac
+BOOST_JAM_OPT_JAM="-o bootstrap/jam0"
+BOOST_JAM_OPT_MKJAMBASE="-o bootstrap/mkjambase0"
+BOOST_JAM_OPT_YYACC="-o bootstrap/yyacc0"
+case $BOOST_JAM_TOOLSET in
+ mingw)
+ if test -r ${BOOST_JAM_TOOLSET_ROOT}bin/gcc ; then
+ export PATH=${BOOST_JAM_TOOLSET_ROOT}bin:$PATH
+ fi
+ BOOST_JAM_CC="gcc -DNT"
+ ;;
+
+ gcc)
+ BOOST_JAM_CC=gcc
+ ;;
+
+ darwin)
+ BOOST_JAM_CC=cc
+ ;;
+
+ intel-darwin)
+ BOOST_JAM_CC=icc
+ ;;
+
+ intel-linux)
+ if test -r /opt/intel/cc/9.0/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/cc/9.0/
+ elif test -r /opt/intel_cc_80/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel_cc_80/
+ elif test -r /opt/intel/compiler70/ia32/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler70/ia32/
+ elif test -r /opt/intel/compiler60/ia32/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler60/ia32/
+ elif test -r /opt/intel/compiler50/ia32/bin/iccvars.sh ; then
+ BOOST_JAM_TOOLSET_ROOT=/opt/intel/compiler50/ia32/
+ fi
+ if test -r ${BOOST_JAM_TOOLSET_ROOT}bin/iccvars.sh ; then
+ # iccvars does not change LD_RUN_PATH. We adjust LD_RUN_PATH here in
+ # order not to have to rely on ld.so.conf knowing the icc library
+ # directory. We do this before running iccvars.sh in order to allow a
+ # user to add modifications to LD_RUN_PATH in iccvars.sh.
+ if test -z "${LD_RUN_PATH}"; then
+ LD_RUN_PATH="${BOOST_JAM_TOOLSET_ROOT}lib"
+ else
+ LD_RUN_PATH="${BOOST_JAM_TOOLSET_ROOT}lib:${LD_RUN_PATH}"
+ fi
+ export LD_RUN_PATH
+ . ${BOOST_JAM_TOOLSET_ROOT}bin/iccvars.sh
+ fi
+ BOOST_JAM_CC=icc
+ ;;
+
+ vacpp)
+ BOOST_JAM_CC=xlc
+ ;;
+
+ como)
+ BOOST_JAM_CC="como --c"
+ ;;
+
+ kcc)
+ BOOST_JAM_CC=KCC
+ ;;
+
+ kylix)
+ BOOST_JAM_CC=bc++
+ ;;
+
+ mipspro)
+ BOOST_JAM_CC=cc
+ ;;
+
+ pathscale)
+ BOOST_JAM_CC=pathcc
+ ;;
+
+ pgi)
+ BOOST_JAM_CC=pgcc
+ ;;
+
+ sun*)
+ if test -z "${BOOST_JAM_TOOLSET_ROOT}" -a -r /opt/SUNWspro/bin/cc ; then
+ BOOST_JAM_TOOLSET_ROOT=/opt/SUNWspro/
+ fi
+ if test -r "${BOOST_JAM_TOOLSET_ROOT}bin/cc" ; then
+ PATH=${BOOST_JAM_TOOLSET_ROOT}bin:${PATH}
+ export PATH
+ fi
+ BOOST_JAM_CC=cc
+ ;;
+
+ clang*)
+ BOOST_JAM_CC="clang -Wno-unused -Wno-format"
+ BOOST_JAM_TOOLSET=clang
+ ;;
+
+ tru64cxx)
+ BOOST_JAM_CC=cc
+ ;;
+
+ acc)
+ BOOST_JAM_CC="cc -Ae"
+ ;;
+
+ cc)
+ if test -z "$CC" ; then CC=cc ; fi
+ BOOST_JAM_CC=$CC
+ BOOST_JAM_OPT_JAM="$BOOST_JAM_OPT_JAM $CFLAGS $LIBS"
+ BOOST_JAM_OPT_MKJAMBASE="$BOOST_JAM_OPT_MKJAMBASE $CFLAGS $LIBS"
+ BOOST_JAM_OPT_YYACC="$BOOST_JAM_OPT_YYACC $CFLAGS $LIBS"
+ ;;
+
+ qcc)
+ BOOST_JAM_CC=qcc
+ ;;
+
+ *)
+ error_exit "Unknown toolset: $BOOST_JAM_TOOLSET"
+ ;;
+esac
+
+echo "###"
+echo "### Using '$BOOST_JAM_TOOLSET' toolset."
+echo "###"
+
+YYACC_SOURCES="yyacc.c"
+MKJAMBASE_SOURCES="mkjambase.c"
+BJAM_SOURCES="\
+ command.c compile.c constants.c debug.c execcmd.c frames.c function.c glob.c\
+ hash.c hdrmacro.c headers.c jam.c jambase.c jamgram.c lists.c make.c make1.c\
+ object.c option.c output.c parse.c pathsys.c regexp.c rules.c\
+ scan.c search.c subst.c timestamp.c variable.c modules.c strings.c filesys.c\
+ builtins.c class.c cwd.c native.c md5.c w32_getreg.c modules/set.c\
+ modules/path.c modules/regex.c modules/property-set.c modules/sequence.c\
+ modules/order.c"
+case $BOOST_JAM_TOOLSET in
+ mingw)
+ BJAM_SOURCES="${BJAM_SOURCES} execnt.c filent.c pathnt.c"
+ ;;
+
+ *)
+ BJAM_SOURCES="${BJAM_SOURCES} execunix.c fileunix.c pathunix.c"
+ ;;
+esac
+
+BJAM_UPDATE=
+if test "$1" = "--update" -o "$2" = "--update" -o "$3" = "--update" -o "$4" = "--update" ; then
+ BJAM_UPDATE="update"
+fi
+if test "${BJAM_UPDATE}" = "update" -a ! -x "./bootstrap/jam0" ; then
+ BJAM_UPDATE=
+fi
+
+if test "${BJAM_UPDATE}" != "update" ; then
+ echo_run rm -rf bootstrap
+ echo_run mkdir bootstrap
+ if test ! -r jamgram.y -o ! -r jamgramtab.h ; then
+ echo_run ${BOOST_JAM_CC} ${BOOST_JAM_OPT_YYACC} ${YYACC_SOURCES}
+ if test -x "./bootstrap/yyacc0" ; then
+ echo_run ./bootstrap/yyacc0 jamgram.y jamgramtab.h jamgram.yy
+ fi
+ fi
+ if test ! -r jamgram.c -o ! -r jamgram.h ; then
+ if test_path yacc ; then YACC="yacc -d"
+ elif test_path bison ; then YACC="bison -y -d --yacc"
+ fi
+ echo_run $YACC jamgram.y
+ mv -f y.tab.c jamgram.c
+ mv -f y.tab.h jamgram.h
+ fi
+ if test ! -r jambase.c ; then
+ echo_run ${BOOST_JAM_CC} ${BOOST_JAM_OPT_MKJAMBASE} ${MKJAMBASE_SOURCES}
+ if test -x "./bootstrap/mkjambase0" ; then
+ echo_run ./bootstrap/mkjambase0 jambase.c Jambase
+ fi
+ fi
+ echo_run ${BOOST_JAM_CC} ${BOOST_JAM_OPT_JAM} ${BJAM_SOURCES}
+fi
+if test -x "./bootstrap/jam0" ; then
+ if test "${BJAM_UPDATE}" != "update" ; then
+ echo_run ./bootstrap/jam0 -f build.jam --toolset=$BOOST_JAM_TOOLSET "--toolset-root=$BOOST_JAM_TOOLSET_ROOT" "$@" clean
+ fi
+ echo_run ./bootstrap/jam0 -f build.jam --toolset=$BOOST_JAM_TOOLSET "--toolset-root=$BOOST_JAM_TOOLSET_ROOT" "$@"
+fi
diff --git a/src/kenlm/jam-files/engine/builtins.c b/src/kenlm/jam-files/engine/builtins.c
new file mode 100644
index 0000000..e4130bb
--- /dev/null
+++ b/src/kenlm/jam-files/engine/builtins.c
@@ -0,0 +1,2354 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+#include "jam.h"
+#include "builtins.h"
+
+#include "compile.h"
+#include "constants.h"
+#include "cwd.h"
+#include "filesys.h"
+#include "frames.h"
+#include "hash.h"
+#include "hdrmacro.h"
+#include "lists.h"
+#include "make.h"
+#include "md5.h"
+#include "native.h"
+#include "object.h"
+#include "parse.h"
+#include "pathsys.h"
+#include "rules.h"
+#include "strings.h"
+#include "subst.h"
+#include "timestamp.h"
+#include "variable.h"
+
+#include <ctype.h>
+
+#if defined(USE_EXECUNIX)
+# include <sys/types.h>
+# include <sys/wait.h>
+#else
+/*
+ * NT does not have wait() and associated macros and uses the system() return
+ * value instead. Status code group are documented at:
+ * http://msdn.microsoft.com/en-gb/library/ff565436.aspx
+ */
+# define WIFEXITED(w) (((w) & 0XFFFFFF00) == 0)
+# define WEXITSTATUS(w)(w)
+#endif
+
+/*
+ * builtins.c - builtin jam rules
+ *
+ * External routines:
+ * load_builtins() - define builtin rules
+ * unknown_rule() - reports an unknown rule occurrence to the
+ * user and exits
+ *
+ * Internal routines:
+ * append_if_exists() - if file exists, append it to the list
+ * builtin_calc() - CALC rule
+ * builtin_delete_module() - DELETE_MODULE ( MODULE ? )
+ * builtin_depends() - DEPENDS/INCLUDES rule
+ * builtin_echo() - ECHO rule
+ * builtin_exit() - EXIT rule
+ * builtin_export() - EXPORT ( MODULE ? : RULES * )
+ * builtin_flags() - NOCARE, NOTFILE, TEMPORARY rule
+ * builtin_glob() - GLOB rule
+ * builtin_glob_recursive() - ???
+ * builtin_hdrmacro() - ???
+ * builtin_import() - IMPORT rule
+ * builtin_match() - MATCH rule, regexp matching
+ * builtin_rebuilds() - REBUILDS rule
+ * builtin_rulenames() - RULENAMES ( MODULE ? )
+ * builtin_split_by_characters() - splits the given string into tokens
+ * builtin_varnames() - VARNAMES ( MODULE ? )
+ * get_source_line() - get a frame's file and line number
+ * information
+ */
+
+
+/*
+ * compile_builtin() - define builtin rules
+ */
+
+#define P0 (PARSE *)0
+#define C0 (OBJECT *)0
+
+#if defined( OS_NT ) || defined( OS_CYGWIN )
+ LIST * builtin_system_registry ( FRAME *, int );
+ LIST * builtin_system_registry_names( FRAME *, int );
+#endif
+
+int glob( char const * s, char const * c );
+
+void backtrace ( FRAME * );
+void backtrace_line ( FRAME * );
+void print_source_line( FRAME * );
+
+
+RULE * bind_builtin( char const * name_, LIST * (* f)( FRAME *, int flags ),
+ int flags, char const * * args )
+{
+ FUNCTION * func;
+ RULE * result;
+ OBJECT * name = object_new( name_ );
+
+ func = function_builtin( f, flags, args );
+
+ result = new_rule_body( root_module(), name, func, 1 );
+
+ function_free( func );
+
+ object_free( name );
+
+ return result;
+}
+
+
+RULE * duplicate_rule( char const * name_, RULE * other )
+{
+ OBJECT * name = object_new( name_ );
+ RULE * result = import_rule( other, root_module(), name );
+ object_free( name );
+ return result;
+}
+
+
+/*
+ * load_builtins() - define builtin rules
+ */
+
+void load_builtins()
+{
+ duplicate_rule( "Always",
+ bind_builtin( "ALWAYS",
+ builtin_flags, T_FLAG_TOUCHED, 0 ) );
+
+ duplicate_rule( "Depends",
+ bind_builtin( "DEPENDS",
+ builtin_depends, 0, 0 ) );
+
+ duplicate_rule( "echo",
+ duplicate_rule( "Echo",
+ bind_builtin( "ECHO",
+ builtin_echo, 0, 0 ) ) );
+
+ {
+ char const * args[] = { "message", "*", ":", "result-value", "?", 0 };
+ duplicate_rule( "exit",
+ duplicate_rule( "Exit",
+ bind_builtin( "EXIT",
+ builtin_exit, 0, args ) ) );
+ }
+
+ {
+ char const * args[] = { "directories", "*", ":", "patterns", "*", ":",
+ "case-insensitive", "?", 0 };
+ duplicate_rule( "Glob",
+ bind_builtin( "GLOB", builtin_glob, 0, args ) );
+ }
+
+ {
+ char const * args[] = { "patterns", "*", 0 };
+ bind_builtin( "GLOB-RECURSIVELY",
+ builtin_glob_recursive, 0, args );
+ }
+
+ duplicate_rule( "Includes",
+ bind_builtin( "INCLUDES",
+ builtin_depends, 1, 0 ) );
+
+ {
+ char const * args[] = { "targets", "*", ":", "targets-to-rebuild", "*",
+ 0 };
+ bind_builtin( "REBUILDS",
+ builtin_rebuilds, 0, args );
+ }
+
+ duplicate_rule( "Leaves",
+ bind_builtin( "LEAVES",
+ builtin_flags, T_FLAG_LEAVES, 0 ) );
+
+ duplicate_rule( "Match",
+ bind_builtin( "MATCH",
+ builtin_match, 0, 0 ) );
+
+ {
+ char const * args[] = { "string", ":", "delimiters", 0 };
+ bind_builtin( "SPLIT_BY_CHARACTERS",
+ builtin_split_by_characters, 0, args );
+ }
+
+ duplicate_rule( "NoCare",
+ bind_builtin( "NOCARE",
+ builtin_flags, T_FLAG_NOCARE, 0 ) );
+
+ duplicate_rule( "NOTIME",
+ duplicate_rule( "NotFile",
+ bind_builtin( "NOTFILE",
+ builtin_flags, T_FLAG_NOTFILE, 0 ) ) );
+
+ duplicate_rule( "NoUpdate",
+ bind_builtin( "NOUPDATE",
+ builtin_flags, T_FLAG_NOUPDATE, 0 ) );
+
+ duplicate_rule( "Temporary",
+ bind_builtin( "TEMPORARY",
+ builtin_flags, T_FLAG_TEMP, 0 ) );
+
+ bind_builtin( "ISFILE",
+ builtin_flags, T_FLAG_ISFILE, 0 );
+
+ duplicate_rule( "HdrMacro",
+ bind_builtin( "HDRMACRO",
+ builtin_hdrmacro, 0, 0 ) );
+
+ /* FAIL_EXPECTED is used to indicate that the result of a target build
+ * action should be inverted (ok <=> fail) this can be useful when
+ * performing test runs from Jamfiles.
+ */
+ bind_builtin( "FAIL_EXPECTED",
+ builtin_flags, T_FLAG_FAIL_EXPECTED, 0 );
+
+ bind_builtin( "RMOLD",
+ builtin_flags, T_FLAG_RMOLD, 0 );
+
+ {
+ char const * args[] = { "targets", "*", 0 };
+ bind_builtin( "UPDATE",
+ builtin_update, 0, args );
+ }
+
+ {
+ char const * args[] = { "targets", "*",
+ ":", "log", "?",
+ ":", "ignore-minus-n", "?",
+ ":", "ignore-minus-q", "?", 0 };
+ bind_builtin( "UPDATE_NOW",
+ builtin_update_now, 0, args );
+ }
+
+ {
+ char const * args[] = { "string", "pattern", "replacements", "+", 0 };
+ duplicate_rule( "subst",
+ bind_builtin( "SUBST",
+ builtin_subst, 0, args ) );
+ }
+
+ {
+ char const * args[] = { "module", "?", 0 };
+ bind_builtin( "RULENAMES",
+ builtin_rulenames, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", "?", 0 };
+ bind_builtin( "VARNAMES",
+ builtin_varnames, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", "?", 0 };
+ bind_builtin( "DELETE_MODULE",
+ builtin_delete_module, 0, args );
+ }
+
+ {
+ char const * args[] = { "source_module", "?",
+ ":", "source_rules", "*",
+ ":", "target_module", "?",
+ ":", "target_rules", "*",
+ ":", "localize", "?", 0 };
+ bind_builtin( "IMPORT",
+ builtin_import, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", "?", ":", "rules", "*", 0 };
+ bind_builtin( "EXPORT",
+ builtin_export, 0, args );
+ }
+
+ {
+ char const * args[] = { "levels", "?", 0 };
+ bind_builtin( "CALLER_MODULE",
+ builtin_caller_module, 0, args );
+ }
+
+ {
+ char const * args[] = { "levels", "?", 0 };
+ bind_builtin( "BACKTRACE",
+ builtin_backtrace, 0, args );
+ }
+
+ {
+ char const * args[] = { 0 };
+ bind_builtin( "PWD",
+ builtin_pwd, 0, args );
+ }
+
+ {
+ char const * args[] = { "modules_to_import", "+",
+ ":", "target_module", "?", 0 };
+ bind_builtin( "IMPORT_MODULE",
+ builtin_import_module, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", "?", 0 };
+ bind_builtin( "IMPORTED_MODULES",
+ builtin_imported_modules, 0, args );
+ }
+
+ {
+ char const * args[] = { "instance_module", ":", "class_module", 0 };
+ bind_builtin( "INSTANCE",
+ builtin_instance, 0, args );
+ }
+
+ {
+ char const * args[] = { "sequence", "*", 0 };
+ bind_builtin( "SORT",
+ builtin_sort, 0, args );
+ }
+
+ {
+ char const * args[] = { "path_parts", "*", 0 };
+ bind_builtin( "NORMALIZE_PATH",
+ builtin_normalize_path, 0, args );
+ }
+
+ {
+ char const * args[] = { "args", "*", 0 };
+ bind_builtin( "CALC",
+ builtin_calc, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", ":", "rule", 0 };
+ bind_builtin( "NATIVE_RULE",
+ builtin_native_rule, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", ":", "rule", ":", "version", 0 };
+ bind_builtin( "HAS_NATIVE_RULE",
+ builtin_has_native_rule, 0, args );
+ }
+
+ {
+ char const * args[] = { "module", "*", 0 };
+ bind_builtin( "USER_MODULE",
+ builtin_user_module, 0, args );
+ }
+
+ {
+ char const * args[] = { 0 };
+ bind_builtin( "NEAREST_USER_LOCATION",
+ builtin_nearest_user_location, 0, args );
+ }
+
+ {
+ char const * args[] = { "file", 0 };
+ bind_builtin( "CHECK_IF_FILE",
+ builtin_check_if_file, 0, args );
+ }
+
+#ifdef HAVE_PYTHON
+ {
+ char const * args[] = { "python-module",
+ ":", "function",
+ ":", "jam-module",
+ ":", "rule-name", 0 };
+ bind_builtin( "PYTHON_IMPORT_RULE",
+ builtin_python_import_rule, 0, args );
+ }
+#endif
+
+# if defined( OS_NT ) || defined( OS_CYGWIN )
+ {
+ char const * args[] = { "key_path", ":", "data", "?", 0 };
+ bind_builtin( "W32_GETREG",
+ builtin_system_registry, 0, args );
+ }
+
+ {
+ char const * args[] = { "key_path", ":", "result-type", 0 };
+ bind_builtin( "W32_GETREGNAMES",
+ builtin_system_registry_names, 0, args );
+ }
+# endif
+
+ {
+ char const * args[] = { "command", ":", "*", 0 };
+ duplicate_rule( "SHELL",
+ bind_builtin( "COMMAND",
+ builtin_shell, 0, args ) );
+ }
+
+ {
+ char const * args[] = { "string", 0 };
+ bind_builtin( "MD5",
+ builtin_md5, 0, args );
+ }
+
+ {
+ char const * args[] = { "name", ":", "mode", 0 };
+ bind_builtin( "FILE_OPEN",
+ builtin_file_open, 0, args );
+ }
+
+ {
+ char const * args[] = { "string", ":", "width", 0 };
+ bind_builtin( "PAD",
+ builtin_pad, 0, args );
+ }
+
+ {
+ char const * args[] = { "targets", "*", 0 };
+ bind_builtin( "PRECIOUS",
+ builtin_precious, 0, args );
+ }
+
+ {
+ char const * args [] = { 0 };
+ bind_builtin( "SELF_PATH", builtin_self_path, 0, args );
+ }
+
+ {
+ char const * args [] = { "path", 0 };
+ bind_builtin( "MAKEDIR", builtin_makedir, 0, args );
+ }
+
+ /* Initialize builtin modules. */
+ init_set();
+ init_path();
+ init_regex();
+ init_property_set();
+ init_sequence();
+ init_order();
+}
+
+
+/*
+ * builtin_calc() - CALC rule
+ *
+ * Performs simple mathematical operations on two arguments.
+ */
+
+LIST * builtin_calc( FRAME * frame, int flags )
+{
+ LIST * arg = lol_get( frame->args, 0 );
+
+ LIST * result = L0;
+ long lhs_value;
+ long rhs_value;
+ long result_value;
+ char buffer[ 16 ];
+ char const * lhs;
+ char const * op;
+ char const * rhs;
+ LISTITER iter = list_begin( arg );
+ LISTITER const end = list_end( arg );
+
+ if ( iter == end ) return L0;
+ lhs = object_str( list_item( iter ) );
+
+ iter = list_next( iter );
+ if ( iter == end ) return L0;
+ op = object_str( list_item( iter ) );
+
+ iter = list_next( iter );
+ if ( iter == end ) return L0;
+ rhs = object_str( list_item( iter ) );
+
+ lhs_value = atoi( lhs );
+ rhs_value = atoi( rhs );
+
+ if ( !strcmp( "+", op ) )
+ result_value = lhs_value + rhs_value;
+ else if ( !strcmp( "-", op ) )
+ result_value = lhs_value - rhs_value;
+ else
+ return L0;
+
+ sprintf( buffer, "%ld", result_value );
+ result = list_push_back( result, object_new( buffer ) );
+ return result;
+}
+
+
+/*
+ * builtin_depends() - DEPENDS/INCLUDES rule
+ *
+ * The DEPENDS/INCLUDES builtin rule appends each of the listed sources on the
+ * dependency/includes list of each of the listed targets. It binds both the
+ * targets and sources as TARGETs.
+ */
+
+LIST * builtin_depends( FRAME * frame, int flags )
+{
+ LIST * const targets = lol_get( frame->args, 0 );
+ LIST * const sources = lol_get( frame->args, 1 );
+
+ LISTITER iter = list_begin( targets );
+ LISTITER end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * const t = bindtarget( list_item( iter ) );
+
+ if ( flags )
+ target_include_many( t, sources );
+ else
+ t->depends = targetlist( t->depends, sources );
+ }
+
+ /* Enter reverse links */
+ iter = list_begin( sources );
+ end = list_end( sources );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * const s = bindtarget( list_item( iter ) );
+ if ( flags )
+ {
+ LISTITER t_iter = list_begin( targets );
+ LISTITER const t_end = list_end( targets );
+ for ( ; t_iter != t_end; t_iter = list_next( t_iter ) )
+ s->dependants = targetentry( s->dependants, bindtarget(
+ list_item( t_iter ) )->includes );
+ }
+ else
+ s->dependants = targetlist( s->dependants, targets );
+ }
+
+ return L0;
+}
+
+
+/*
+ * builtin_rebuilds() - REBUILDS rule
+ *
+ * Appends each of the rebuild-targets listed in its second argument to the
+ * rebuilds list for each of the targets listed in its first argument.
+ */
+
+LIST * builtin_rebuilds( FRAME * frame, int flags )
+{
+ LIST * targets = lol_get( frame->args, 0 );
+ LIST * rebuilds = lol_get( frame->args, 1 );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * const t = bindtarget( list_item( iter ) );
+ t->rebuilds = targetlist( t->rebuilds, rebuilds );
+ }
+ return L0;
+}
+
+
+/*
+ * builtin_echo() - ECHO rule
+ *
+ * Echoes the targets to the user. No other actions are taken.
+ */
+
+LIST * builtin_echo( FRAME * frame, int flags )
+{
+ list_print( lol_get( frame->args, 0 ) );
+ printf( "\n" );
+ fflush( stdout );
+ return L0;
+}
+
+
+/*
+ * builtin_exit() - EXIT rule
+ *
+ * Echoes the targets to the user and exits the program with a failure status.
+ */
+
+LIST * builtin_exit( FRAME * frame, int flags )
+{
+ LIST * const code = lol_get( frame->args, 1 );
+ list_print( lol_get( frame->args, 0 ) );
+ printf( "\n" );
+ if ( !list_empty( code ) )
+ exit( atoi( object_str( list_front( code ) ) ) );
+ else
+ exit( EXITBAD ); /* yeech */
+ return L0;
+}
+
+
+/*
+ * builtin_flags() - NOCARE, NOTFILE, TEMPORARY rule
+ *
+ * Marks the target with the appropriate flag, for use by make0(). It binds each
+ * target as a TARGET.
+ */
+
+LIST * builtin_flags( FRAME * frame, int flags )
+{
+ LIST * const targets = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ bindtarget( list_item( iter ) )->flags |= flags;
+ return L0;
+}
+
+
+/*
+ * builtin_glob() - GLOB rule
+ */
+
+struct globbing
+{
+ LIST * patterns;
+ LIST * results;
+ LIST * case_insensitive;
+};
+
+
+static void downcase_inplace( char * p )
+{
+ for ( ; *p; ++p )
+ *p = tolower( *p );
+}
+
+
+static void builtin_glob_back( void * closure, OBJECT * file, int status,
+ timestamp const * const time )
+{
+ PROFILE_ENTER( BUILTIN_GLOB_BACK );
+
+ struct globbing * const globbing = (struct globbing *)closure;
+ PATHNAME f;
+ string buf[ 1 ];
+ LISTITER iter;
+ LISTITER end;
+
+ /* Null out directory for matching. We wish we had file_dirscan() pass up a
+ * PATHNAME.
+ */
+ path_parse( object_str( file ), &f );
+ f.f_dir.len = 0;
+
+ /* For globbing, we unconditionally ignore current and parent directory
+ * items. Since these items always exist, there is no reason why caller of
+ * GLOB would want to see them. We could also change file_dirscan(), but
+ * then paths with embedded "." and ".." would not work anywhere.
+ */
+ if ( !strcmp( f.f_base.ptr, "." ) || !strcmp( f.f_base.ptr, ".." ) )
+ {
+ PROFILE_EXIT( BUILTIN_GLOB_BACK );
+ return;
+ }
+
+ string_new( buf );
+ path_build( &f, buf );
+
+ if ( globbing->case_insensitive )
+ downcase_inplace( buf->value );
+
+ iter = list_begin( globbing->patterns );
+ end = list_end( globbing->patterns );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ if ( !glob( object_str( list_item( iter ) ), buf->value ) )
+ {
+ globbing->results = list_push_back( globbing->results, object_copy(
+ file ) );
+ break;
+ }
+ }
+
+ string_free( buf );
+
+ PROFILE_EXIT( BUILTIN_GLOB_BACK );
+}
+
+
+static LIST * downcase_list( LIST * in )
+{
+ LIST * result = L0;
+ LISTITER iter = list_begin( in );
+ LISTITER const end = list_end( in );
+
+ string s[ 1 ];
+ string_new( s );
+
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ string_append( s, object_str( list_item( iter ) ) );
+ downcase_inplace( s->value );
+ result = list_push_back( result, object_new( s->value ) );
+ string_truncate( s, 0 );
+ }
+
+ string_free( s );
+ return result;
+}
+
+
+LIST * builtin_glob( FRAME * frame, int flags )
+{
+ LIST * const l = lol_get( frame->args, 0 );
+ LIST * const r = lol_get( frame->args, 1 );
+
+ LISTITER iter;
+ LISTITER end;
+ struct globbing globbing;
+
+ globbing.results = L0;
+ globbing.patterns = r;
+
+ globbing.case_insensitive =
+# if defined( OS_NT ) || defined( OS_CYGWIN )
+ l; /* Always case-insensitive if any files can be found. */
+# else
+ lol_get( frame->args, 2 );
+# endif
+
+ if ( globbing.case_insensitive )
+ globbing.patterns = downcase_list( r );
+
+ iter = list_begin( l );
+ end = list_end( l );
+ for ( ; iter != end; iter = list_next( iter ) )
+ file_dirscan( list_item( iter ), builtin_glob_back, &globbing );
+
+ if ( globbing.case_insensitive )
+ list_free( globbing.patterns );
+
+ return globbing.results;
+}
+
+
+static int has_wildcards( char const * const str )
+{
+ return str[ strcspn( str, "[]*?" ) ] ? 1 : 0;
+}
+
+
+/*
+ * append_if_exists() - if file exists, append it to the list
+ */
+
+static LIST * append_if_exists( LIST * list, OBJECT * file )
+{
+ return file_query( file )
+ ? list_push_back( list, object_copy( file ) )
+ : list ;
+}
+
+
+LIST * glob1( OBJECT * dirname, OBJECT * pattern )
+{
+ LIST * const plist = list_new( object_copy( pattern ) );
+ struct globbing globbing;
+
+ globbing.results = L0;
+ globbing.patterns = plist;
+
+ globbing.case_insensitive
+# if defined( OS_NT ) || defined( OS_CYGWIN )
+ = plist; /* always case-insensitive if any files can be found */
+# else
+ = L0;
+# endif
+
+ if ( globbing.case_insensitive )
+ globbing.patterns = downcase_list( plist );
+
+ file_dirscan( dirname, builtin_glob_back, &globbing );
+
+ if ( globbing.case_insensitive )
+ list_free( globbing.patterns );
+
+ list_free( plist );
+
+ return globbing.results;
+}
+
+
+LIST * glob_recursive( char const * pattern )
+{
+ LIST * result = L0;
+
+ /* Check if there's metacharacters in pattern */
+ if ( !has_wildcards( pattern ) )
+ {
+ /* No metacharacters. Check if the path exists. */
+ OBJECT * const p = object_new( pattern );
+ result = append_if_exists( result, p );
+ object_free( p );
+ }
+ else
+ {
+ /* Have metacharacters in the pattern. Split into dir/name. */
+ PATHNAME path[ 1 ];
+ path_parse( pattern, path );
+
+ if ( path->f_dir.ptr )
+ {
+ LIST * dirs = L0;
+ string dirname[ 1 ];
+ string basename[ 1 ];
+ string_new( dirname );
+ string_new( basename );
+
+ string_append_range( dirname, path->f_dir.ptr,
+ path->f_dir.ptr + path->f_dir.len );
+
+ path->f_grist.ptr = 0;
+ path->f_grist.len = 0;
+ path->f_dir.ptr = 0;
+ path->f_dir.len = 0;
+ path_build( path, basename );
+
+ dirs = has_wildcards( dirname->value )
+ ? glob_recursive( dirname->value )
+ : list_push_back( dirs, object_new( dirname->value ) );
+
+ if ( has_wildcards( basename->value ) )
+ {
+ OBJECT * const b = object_new( basename->value );
+ LISTITER iter = list_begin( dirs );
+ LISTITER const end = list_end( dirs );
+ for ( ; iter != end; iter = list_next( iter ) )
+ result = list_append( result, glob1( list_item( iter ), b )
+ );
+ object_free( b );
+ }
+ else
+ {
+ LISTITER iter = list_begin( dirs );
+ LISTITER const end = list_end( dirs );
+ string file_string[ 1 ];
+ string_new( file_string );
+
+ /* No wildcard in basename. */
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ OBJECT * p;
+ path->f_dir.ptr = object_str( list_item( iter ) );
+ path->f_dir.len = strlen( object_str( list_item( iter ) ) );
+ path_build( path, file_string );
+
+ p = object_new( file_string->value );
+
+ result = append_if_exists( result, p );
+
+ object_free( p );
+
+ string_truncate( file_string, 0 );
+ }
+
+ string_free( file_string );
+ }
+
+ string_free( dirname );
+ string_free( basename );
+
+ list_free( dirs );
+ }
+ else
+ {
+ /* No directory, just a pattern. */
+ OBJECT * const p = object_new( pattern );
+ result = list_append( result, glob1( constant_dot, p ) );
+ object_free( p );
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * builtin_glob_recursive() - ???
+ */
+
+LIST * builtin_glob_recursive( FRAME * frame, int flags )
+{
+ LIST * result = L0;
+ LIST * const l = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( l );
+ LISTITER const end = list_end( l );
+ for ( ; iter != end; iter = list_next( iter ) )
+ result = list_append( result, glob_recursive( object_str( list_item(
+ iter ) ) ) );
+ return result;
+}
+
+
+/*
+ * builtin_match() - MATCH rule, regexp matching
+ */
+
+LIST * builtin_match( FRAME * frame, int flags )
+{
+ LIST * l;
+ LIST * r;
+ LIST * result = L0;
+ LISTITER l_iter;
+ LISTITER l_end;
+ LISTITER r_iter;
+ LISTITER r_end;
+
+ string buf[ 1 ];
+ string_new( buf );
+
+ /* For each pattern */
+
+ l = lol_get( frame->args, 0 );
+ l_iter = list_begin( l );
+ l_end = list_end( l );
+ for ( ; l_iter != l_end; l_iter = list_next( l_iter ) )
+ {
+ /* Result is cached and intentionally never freed. */
+ regexp * re = regex_compile( list_item( l_iter ) );
+
+ /* For each string to match against. */
+ r = lol_get( frame->args, 1 );
+ r_iter = list_begin( r );
+ r_end = list_end( r );
+ for ( ; r_iter != r_end; r_iter = list_next( r_iter ) )
+ {
+ if ( regexec( re, object_str( list_item( r_iter ) ) ) )
+ {
+ int i;
+ int top;
+
+ /* Find highest parameter */
+
+ for ( top = NSUBEXP; top-- > 1; )
+ if ( re->startp[ top ] )
+ break;
+
+ /* And add all parameters up to highest onto list. */
+ /* Must have parameters to have results! */
+ for ( i = 1; i <= top; ++i )
+ {
+ string_append_range( buf, re->startp[ i ], re->endp[ i ] );
+ result = list_push_back( result, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+ }
+ }
+ }
+
+ string_free( buf );
+ return result;
+}
+
+
+/*
+ * builtin_split_by_characters() - splits the given string into tokens
+ */
+
+LIST * builtin_split_by_characters( FRAME * frame, int flags )
+{
+ LIST * l1 = lol_get( frame->args, 0 );
+ LIST * l2 = lol_get( frame->args, 1 );
+
+ LIST * result = L0;
+
+ string buf[ 1 ];
+
+ char const * delimiters = object_str( list_front( l2 ) );
+ char * t;
+
+ string_copy( buf, object_str( list_front( l1 ) ) );
+
+ t = strtok( buf->value, delimiters );
+ while ( t )
+ {
+ result = list_push_back( result, object_new( t ) );
+ t = strtok( NULL, delimiters );
+ }
+
+ string_free( buf );
+
+ return result;
+}
+
+
+/*
+ * builtin_hdrmacro() - ???
+ */
+
+LIST * builtin_hdrmacro( FRAME * frame, int flags )
+{
+ LIST * const l = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( l );
+ LISTITER const end = list_end( l );
+
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * const t = bindtarget( list_item( iter ) );
+
+ /* Scan file for header filename macro definitions. */
+ if ( DEBUG_HEADER )
+ printf( "scanning '%s' for header file macro definitions\n",
+ object_str( list_item( iter ) ) );
+
+ macro_headers( t );
+ }
+
+ return L0;
+}
+
+
+/*
+ * builtin_rulenames() - RULENAMES ( MODULE ? )
+ *
+ * Returns a list of the non-local rule names in the given MODULE. If MODULE is
+ * not supplied, returns the list of rule names in the global module.
+ */
+
+static void add_rule_name( void * r_, void * result_ )
+{
+ RULE * const r = (RULE *)r_;
+ LIST * * const result = (LIST * *)result_;
+ if ( r->exported )
+ *result = list_push_back( *result, object_copy( r->name ) );
+}
+
+
+LIST * builtin_rulenames( FRAME * frame, int flags )
+{
+ LIST * arg0 = lol_get( frame->args, 0 );
+ LIST * result = L0;
+ module_t * const source_module = bindmodule( list_empty( arg0 )
+ ? 0
+ : list_front( arg0 ) );
+
+ if ( source_module->rules )
+ hashenumerate( source_module->rules, add_rule_name, &result );
+ return result;
+}
+
+
+/*
+ * builtin_varnames() - VARNAMES ( MODULE ? )
+ *
+ * Returns a list of the variable names in the given MODULE. If MODULE is not
+ * supplied, returns the list of variable names in the global module.
+ */
+
+/* helper function for builtin_varnames(), below. Used with hashenumerate, will
+ * prepend the key of each element to the list
+ */
+static void add_hash_key( void * np, void * result_ )
+{
+ LIST * * result = (LIST * *)result_;
+ *result = list_push_back( *result, object_copy( *(OBJECT * *)np ) );
+}
+
+
+LIST * builtin_varnames( FRAME * frame, int flags )
+{
+ LIST * arg0 = lol_get( frame->args, 0 );
+ LIST * result = L0;
+ module_t * source_module = bindmodule( list_empty( arg0 )
+ ? 0
+ : list_front( arg0 ) );
+
+ struct hash * const vars = source_module->variables;
+ if ( vars )
+ hashenumerate( vars, add_hash_key, &result );
+ return result;
+}
+
+
+/*
+ * builtin_delete_module() - DELETE_MODULE ( MODULE ? )
+ *
+ * Clears all rules and variables from the given module.
+ */
+
+LIST * builtin_delete_module( FRAME * frame, int flags )
+{
+ LIST * const arg0 = lol_get( frame->args, 0 );
+ module_t * const source_module = bindmodule( list_empty( arg0 ) ? 0 :
+ list_front( arg0 ) );
+ delete_module( source_module );
+ return L0;
+}
+
+
+/*
+ * unknown_rule() - reports an unknown rule occurrence to the user and exits
+ */
+
+void unknown_rule( FRAME * frame, char const * key, module_t * module,
+ OBJECT * rule_name )
+{
+ backtrace_line( frame->prev );
+ if ( key )
+ printf("%s error", key);
+ else
+ printf("ERROR");
+ printf( ": rule \"%s\" unknown in ", object_str( rule_name ) );
+ if ( module->name )
+ printf( "module \"%s\".\n", object_str( module->name ) );
+ else
+ printf( "root module.\n" );
+ backtrace( frame->prev );
+ exit( 1 );
+}
+
+
+/*
+ * builtin_import() - IMPORT rule
+ *
+ * IMPORT
+ * (
+ * SOURCE_MODULE ? :
+ * SOURCE_RULES * :
+ * TARGET_MODULE ? :
+ * TARGET_RULES * :
+ * LOCALIZE ?
+ * )
+ *
+ * Imports rules from the SOURCE_MODULE into the TARGET_MODULE as local rules.
+ * If either SOURCE_MODULE or TARGET_MODULE is not supplied, it refers to the
+ * global module. SOURCE_RULES specifies which rules from the SOURCE_MODULE to
+ * import; TARGET_RULES specifies the names to give those rules in
+ * TARGET_MODULE. If SOURCE_RULES contains a name that does not correspond to
+ * a rule in SOURCE_MODULE, or if it contains a different number of items than
+ * TARGET_RULES, an error is issued. If LOCALIZE is specified, the rules will be
+ * executed in TARGET_MODULE, with corresponding access to its module local
+ * variables.
+ */
+
+LIST * builtin_import( FRAME * frame, int flags )
+{
+ LIST * source_module_list = lol_get( frame->args, 0 );
+ LIST * source_rules = lol_get( frame->args, 1 );
+ LIST * target_module_list = lol_get( frame->args, 2 );
+ LIST * target_rules = lol_get( frame->args, 3 );
+ LIST * localize = lol_get( frame->args, 4 );
+
+ module_t * target_module = bindmodule( list_empty( target_module_list )
+ ? 0
+ : list_front( target_module_list ) );
+ module_t * source_module = bindmodule( list_empty( source_module_list )
+ ? 0
+ : list_front( source_module_list ) );
+
+ LISTITER source_iter = list_begin( source_rules );
+ LISTITER const source_end = list_end( source_rules );
+ LISTITER target_iter = list_begin( target_rules );
+ LISTITER const target_end = list_end( target_rules );
+
+ for ( ;
+ source_iter != source_end && target_iter != target_end;
+ source_iter = list_next( source_iter ),
+ target_iter = list_next( target_iter ) )
+ {
+ RULE * r;
+ RULE * imported;
+
+ if ( !source_module->rules || !(r = (RULE *)hash_find(
+ source_module->rules, list_item( source_iter ) ) ) )
+ unknown_rule( frame, "IMPORT", source_module, list_item( source_iter
+ ) );
+
+ imported = import_rule( r, target_module, list_item( target_iter ) );
+ if ( !list_empty( localize ) )
+ rule_localize( imported, target_module );
+ /* This rule is really part of some other module. Just refer to it here,
+ * but do not let it out.
+ */
+ imported->exported = 0;
+ }
+
+ if ( source_iter != source_end || target_iter != target_end )
+ {
+ backtrace_line( frame->prev );
+ printf( "import error: length of source and target rule name lists "
+ "don't match!\n" );
+ printf( " source: " );
+ list_print( source_rules );
+ printf( "\n target: " );
+ list_print( target_rules );
+ printf( "\n" );
+ backtrace( frame->prev );
+ exit( 1 );
+ }
+
+ return L0;
+}
+
+
+/*
+ * builtin_export() - EXPORT ( MODULE ? : RULES * )
+ *
+ * The EXPORT rule marks RULES from the SOURCE_MODULE as non-local (and thus
+ * exportable). If an element of RULES does not name a rule in MODULE, an error
+ * is issued.
+ */
+
+LIST * builtin_export( FRAME * frame, int flags )
+{
+ LIST * const module_list = lol_get( frame->args, 0 );
+ LIST * const rules = lol_get( frame->args, 1 );
+ module_t * const m = bindmodule( list_empty( module_list ) ? 0 : list_front(
+ module_list ) );
+
+ LISTITER iter = list_begin( rules );
+ LISTITER const end = list_end( rules );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ RULE * r;
+ if ( !m->rules || !( r = (RULE *)hash_find( m->rules, list_item( iter )
+ ) ) )
+ unknown_rule( frame, "EXPORT", m, list_item( iter ) );
+ r->exported = 1;
+ }
+ return L0;
+}
+
+
+/*
+ * get_source_line() - get a frame's file and line number information
+ *
+ * This is the execution traceback information to be indicated for in debug
+ * output or an error backtrace.
+ */
+
+static void get_source_line( FRAME * frame, char const * * file, int * line )
+{
+ if ( frame->file )
+ {
+ char const * f = object_str( frame->file );
+ int l = frame->line;
+ if ( !strcmp( f, "+" ) )
+ {
+ f = "jambase.c";
+ l += 3;
+ }
+ *file = f;
+ *line = l;
+ }
+ else
+ {
+ *file = "(builtin)";
+ *line = -1;
+ }
+}
+
+
+void print_source_line( FRAME * frame )
+{
+ char const * file;
+ int line;
+ get_source_line( frame, &file, &line );
+ if ( line < 0 )
+ printf( "(builtin):" );
+ else
+ printf( "%s:%d:", file, line );
+}
+
+
+/*
+ * backtrace_line() - print a single line of error backtrace for the given
+ * frame.
+ */
+
+void backtrace_line( FRAME * frame )
+{
+ if ( frame == 0 )
+ {
+ printf( "(no frame):" );
+ }
+ else
+ {
+ print_source_line( frame );
+ printf( " in %s\n", frame->rulename );
+ }
+}
+
+
+/*
+ * backtrace() - Print the entire backtrace from the given frame to the Jambase
+ * which invoked it.
+ */
+
+void backtrace( FRAME * frame )
+{
+ if ( !frame ) return;
+ while ( ( frame = frame->prev ) )
+ backtrace_line( frame );
+}
+
+
+/*
+ * builtin_backtrace() - A Jam version of the backtrace function, taking no
+ * arguments and returning a list of quadruples: FILENAME LINE MODULE. RULENAME
+ * describing each frame. Note that the module-name is always followed by a
+ * period.
+ */
+
+LIST * builtin_backtrace( FRAME * frame, int flags )
+{
+ LIST * const levels_arg = lol_get( frame->args, 0 );
+ int levels = list_empty( levels_arg )
+ ? (int)( (unsigned int)(-1) >> 1 )
+ : atoi( object_str( list_front( levels_arg ) ) );
+
+ LIST * result = L0;
+ for ( ; ( frame = frame->prev ) && levels; --levels )
+ {
+ char const * file;
+ int line;
+ char buf[ 32 ];
+ string module_name[ 1 ];
+ get_source_line( frame, &file, &line );
+ sprintf( buf, "%d", line );
+ string_new( module_name );
+ if ( frame->module->name )
+ {
+ string_append( module_name, object_str( frame->module->name ) );
+ string_append( module_name, "." );
+ }
+ result = list_push_back( result, object_new( file ) );
+ result = list_push_back( result, object_new( buf ) );
+ result = list_push_back( result, object_new( module_name->value ) );
+ result = list_push_back( result, object_new( frame->rulename ) );
+ string_free( module_name );
+ }
+ return result;
+}
+
+
+/*
+ * builtin_caller_module() - CALLER_MODULE ( levels ? )
+ *
+ * If levels is not supplied, returns the name of the module of the rule which
+ * called the one calling this one. If levels is supplied, it is interpreted as
+ * an integer specifying a number of additional levels of call stack to traverse
+ * in order to locate the module in question. If no such module exists, returns
+ * the empty list. Also returns the empty list when the module in question is
+ * the global module. This rule is needed for implementing module import
+ * behavior.
+ */
+
+LIST * builtin_caller_module( FRAME * frame, int flags )
+{
+ LIST * const levels_arg = lol_get( frame->args, 0 );
+ int const levels = list_empty( levels_arg )
+ ? 0
+ : atoi( object_str( list_front( levels_arg ) ) );
+
+ int i;
+ for ( i = 0; ( i < levels + 2 ) && frame->prev; ++i )
+ frame = frame->prev;
+
+ return frame->module == root_module()
+ ? L0
+ : list_new( object_copy( frame->module->name ) );
+}
+
+
+/*
+ * Return the current working directory.
+ *
+ * Usage: pwd = [ PWD ] ;
+ */
+
+LIST * builtin_pwd( FRAME * frame, int flags )
+{
+ return list_new( object_copy( cwd() ) );
+}
+
+
+/*
+ * Adds targets to the list of target that jam will attempt to update.
+ */
+
+LIST * builtin_update( FRAME * frame, int flags )
+{
+ LIST * result = list_copy( targets_to_update() );
+ LIST * arg1 = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( arg1 ), end = list_end( arg1 );
+ clear_targets_to_update();
+ for ( ; iter != end; iter = list_next( iter ) )
+ mark_target_for_updating( object_copy( list_item( iter ) ) );
+ return result;
+}
+
+extern int anyhow;
+int last_update_now_status;
+
+/* Takes a list of target names and immediately updates them.
+ *
+ * Parameters:
+ * 1. Target list.
+ * 2. Optional file descriptor (converted to a string) for a log file where all
+ * the related build output should be redirected.
+ * 3. If specified, makes the build temporarily disable the -n option, i.e.
+ * forces all needed out-of-date targets to be rebuilt.
+ * 4. If specified, makes the build temporarily disable the -q option, i.e.
+ * forces the build to continue even if one of the targets fails to build.
+ */
+LIST * builtin_update_now( FRAME * frame, int flags )
+{
+ LIST * targets = lol_get( frame->args, 0 );
+ LIST * log = lol_get( frame->args, 1 );
+ LIST * force = lol_get( frame->args, 2 );
+ LIST * continue_ = lol_get( frame->args, 3 );
+ int status;
+ int original_stdout = 0;
+ int original_stderr = 0;
+ int original_noexec = 0;
+ int original_quitquick = 0;
+
+ if ( !list_empty( log ) )
+ {
+ /* Temporarily redirect stdout and stderr to the given log file. */
+ int const fd = atoi( object_str( list_front( log ) ) );
+ original_stdout = dup( 0 );
+ original_stderr = dup( 1 );
+ dup2( fd, 0 );
+ dup2( fd, 1 );
+ }
+
+ if ( !list_empty( force ) )
+ {
+ original_noexec = globs.noexec;
+ globs.noexec = 0;
+ }
+
+ if ( !list_empty( continue_ ) )
+ {
+ original_quitquick = globs.quitquick;
+ globs.quitquick = 0;
+ }
+
+ status = make( targets, anyhow );
+
+ if ( !list_empty( force ) )
+ {
+ globs.noexec = original_noexec;
+ }
+
+ if ( !list_empty( continue_ ) )
+ {
+ globs.quitquick = original_quitquick;
+ }
+
+ if ( !list_empty( log ) )
+ {
+ /* Flush whatever stdio might have buffered, while descriptions 0 and 1
+ * still refer to the log file.
+ */
+ fflush( stdout );
+ fflush( stderr );
+ dup2( original_stdout, 0 );
+ dup2( original_stderr, 1 );
+ close( original_stdout );
+ close( original_stderr );
+ }
+
+ last_update_now_status = status;
+
+ return status ? L0 : list_new( object_copy( constant_ok ) );
+}
+
+
+LIST * builtin_import_module( FRAME * frame, int flags )
+{
+ LIST * const arg1 = lol_get( frame->args, 0 );
+ LIST * const arg2 = lol_get( frame->args, 1 );
+ module_t * const m = list_empty( arg2 )
+ ? root_module()
+ : bindmodule( list_front( arg2 ) );
+ import_module( arg1, m );
+ return L0;
+}
+
+
+LIST * builtin_imported_modules( FRAME * frame, int flags )
+{
+ LIST * const arg0 = lol_get( frame->args, 0 );
+ OBJECT * const module = list_empty( arg0 ) ? 0 : list_front( arg0 );
+ return imported_modules( bindmodule( module ) );
+}
+
+
+LIST * builtin_instance( FRAME * frame, int flags )
+{
+ LIST * arg1 = lol_get( frame->args, 0 );
+ LIST * arg2 = lol_get( frame->args, 1 );
+ module_t * const instance = bindmodule( list_front( arg1 ) );
+ module_t * const class_module = bindmodule( list_front( arg2 ) );
+ instance->class_module = class_module;
+ module_set_fixed_variables( instance, class_module->num_fixed_variables );
+ return L0;
+}
+
+
+LIST * builtin_sort( FRAME * frame, int flags )
+{
+ return list_sort( lol_get( frame->args, 0 ) );
+}
+
+
+LIST * builtin_normalize_path( FRAME * frame, int flags )
+{
+ LIST * arg = lol_get( frame->args, 0 );
+
+ /* First, we iterate over all '/'-separated elements, starting from the end
+ * of string. If we see a '..', we remove a preceeding path element. If we
+ * see '.', we remove it. Removal is done by overwriting data using '\1'
+ * characters. After the whole string has been processed, we do a second
+ * pass, removing any entered '\1' characters.
+ */
+
+ string in[ 1 ];
+ string out[ 1 ];
+ /* Last character of the part of string still to be processed. */
+ char * end;
+ /* Working pointer. */
+ char * current;
+ /* Number of '..' elements seen and not processed yet. */
+ int dotdots = 0;
+ int rooted = 0;
+ OBJECT * result = 0;
+ LISTITER arg_iter = list_begin( arg );
+ LISTITER arg_end = list_end( arg );
+
+ /* Make a copy of input: we should not change it. Prepend a '/' before it as
+ * a guard for the algorithm later on and remember whether it was originally
+ * rooted or not.
+ */
+ string_new( in );
+ string_push_back( in, '/' );
+ for ( ; arg_iter != arg_end; arg_iter = list_next( arg_iter ) )
+ {
+ if ( object_str( list_item( arg_iter ) )[ 0 ] != '\0' )
+ {
+ if ( in->size == 1 )
+ rooted = ( object_str( list_item( arg_iter ) )[ 0 ] == '/' ) ||
+ ( object_str( list_item( arg_iter ) )[ 0 ] == '\\' );
+ else
+ string_append( in, "/" );
+ string_append( in, object_str( list_item( arg_iter ) ) );
+ }
+ }
+
+ /* Convert \ into /. On Windows, paths using / and \ are equivalent, and we
+ * want this function to obtain a canonic representation.
+ */
+ for ( current = in->value, end = in->value + in->size;
+ current < end; ++current )
+ if ( *current == '\\' )
+ *current = '/';
+
+ /* Now we remove any extra path elements by overwriting them with '\1'
+ * characters and cound how many more unused '..' path elements there are
+ * remaining. Note that each remaining path element with always starts with
+ * a '/' character.
+ */
+ for ( end = in->value + in->size - 1; end >= in->value; )
+ {
+ /* Set 'current' to the next occurence of '/', which always exists. */
+ for ( current = end; *current != '/'; --current );
+
+ if ( current == end )
+ {
+ /* Found a trailing or duplicate '/'. Remove it. */
+ *current = '\1';
+ }
+ else if ( ( end - current == 1 ) && ( *( current + 1 ) == '.' ) )
+ {
+ /* Found '/.'. Remove them all. */
+ *current = '\1';
+ *(current + 1) = '\1';
+ }
+ else if ( ( end - current == 2 ) && ( *( current + 1 ) == '.' ) &&
+ ( *( current + 2 ) == '.' ) )
+ {
+ /* Found '/..'. Remove them all. */
+ *current = '\1';
+ *(current + 1) = '\1';
+ *(current + 2) = '\1';
+ ++dotdots;
+ }
+ else if ( dotdots )
+ {
+ memset( current, '\1', end - current + 1 );
+ --dotdots;
+ }
+ end = current - 1;
+ }
+
+ string_new( out );
+
+ /* Now we know that we need to add exactly dotdots '..' path elements to the
+ * front and that our string is either empty or has a '/' as its first
+ * significant character. If we have any dotdots remaining then the passed
+ * path must not have been rooted or else it is invalid we return an empty
+ * list.
+ */
+ if ( dotdots )
+ {
+ if ( rooted )
+ {
+ string_free( out );
+ string_free( in );
+ return L0;
+ }
+ do
+ string_append( out, "/.." );
+ while ( --dotdots );
+ }
+
+ /* Now we actually remove all the path characters marked for removal. */
+ for ( current = in->value; *current; ++current )
+ if ( *current != '\1' )
+ string_push_back( out, *current );
+
+ /* Here we know that our string contains no '\1' characters and is either
+ * empty or has a '/' as its initial character. If the original path was not
+ * rooted and we have a non-empty path we need to drop the initial '/'. If
+ * the original path was rooted and we have an empty path we need to add
+ * back the '/'.
+ */
+ result = object_new( out->size
+ ? out->value + !rooted
+ : ( rooted ? "/" : "." ) );
+
+ string_free( out );
+ string_free( in );
+
+ return list_new( result );
+}
+
+
+LIST * builtin_native_rule( FRAME * frame, int flags )
+{
+ LIST * module_name = lol_get( frame->args, 0 );
+ LIST * rule_name = lol_get( frame->args, 1 );
+
+ module_t * module = bindmodule( list_front( module_name ) );
+
+ native_rule_t * np;
+ if ( module->native_rules && (np = (native_rule_t *)hash_find(
+ module->native_rules, list_front( rule_name ) ) ) )
+ {
+ new_rule_body( module, np->name, np->procedure, 1 );
+ }
+ else
+ {
+ backtrace_line( frame->prev );
+ printf( "error: no native rule \"%s\" defined in module \"%s.\"\n",
+ object_str( list_front( rule_name ) ), object_str( module->name ) );
+ backtrace( frame->prev );
+ exit( 1 );
+ }
+ return L0;
+}
+
+
+LIST * builtin_has_native_rule( FRAME * frame, int flags )
+{
+ LIST * module_name = lol_get( frame->args, 0 );
+ LIST * rule_name = lol_get( frame->args, 1 );
+ LIST * version = lol_get( frame->args, 2 );
+
+ module_t * module = bindmodule( list_front( module_name ) );
+
+ native_rule_t * np;
+ if ( module->native_rules && (np = (native_rule_t *)hash_find(
+ module->native_rules, list_front( rule_name ) ) ) )
+ {
+ int expected_version = atoi( object_str( list_front( version ) ) );
+ if ( np->version == expected_version )
+ return list_new( object_copy( constant_true ) );
+ }
+ return L0;
+}
+
+
+LIST * builtin_user_module( FRAME * frame, int flags )
+{
+ LIST * const module_name = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( module_name );
+ LISTITER const end = list_end( module_name );
+ for ( ; iter != end; iter = list_next( iter ) )
+ bindmodule( list_item( iter ) )->user_module = 1;
+ return L0;
+}
+
+
+LIST * builtin_nearest_user_location( FRAME * frame, int flags )
+{
+ FRAME * const nearest_user_frame = frame->module->user_module
+ ? frame
+ : frame->prev_user;
+ if ( !nearest_user_frame )
+ return L0;
+
+ {
+ LIST * result = L0;
+ char const * file;
+ int line;
+ char buf[ 32 ];
+
+ get_source_line( nearest_user_frame, &file, &line );
+ sprintf( buf, "%d", line );
+ result = list_push_back( result, object_new( file ) );
+ result = list_push_back( result, object_new( buf ) );
+ return result;
+ }
+}
+
+
+LIST * builtin_check_if_file( FRAME * frame, int flags )
+{
+ LIST * const name = lol_get( frame->args, 0 );
+ return file_is_file( list_front( name ) ) == 1
+ ? list_new( object_copy( constant_true ) )
+ : L0;
+}
+
+
+LIST * builtin_md5( FRAME * frame, int flags )
+{
+ LIST * l = lol_get( frame->args, 0 );
+ char const * s = object_str( list_front( l ) );
+
+ md5_state_t state;
+ md5_byte_t digest[ 16 ];
+ char hex_output[ 16 * 2 + 1 ];
+
+ int di;
+
+ md5_init( &state );
+ md5_append( &state, (md5_byte_t const *)s, strlen( s ) );
+ md5_finish( &state, digest );
+
+ for ( di = 0; di < 16; ++di )
+ sprintf( hex_output + di * 2, "%02x", digest[ di ] );
+
+ return list_new( object_new( hex_output ) );
+}
+
+
+LIST * builtin_file_open( FRAME * frame, int flags )
+{
+ char const * name = object_str( list_front( lol_get( frame->args, 0 ) ) );
+ char const * mode = object_str( list_front( lol_get( frame->args, 1 ) ) );
+ int fd;
+ char buffer[ sizeof( "4294967295" ) ];
+
+ if ( strcmp(mode, "w") == 0 )
+ fd = open( name, O_WRONLY|O_CREAT|O_TRUNC, 0666 );
+ else
+ fd = open( name, O_RDONLY );
+
+ if ( fd != -1 )
+ {
+ sprintf( buffer, "%d", fd );
+ return list_new( object_new( buffer ) );
+ }
+ return L0;
+}
+
+
+LIST * builtin_pad( FRAME * frame, int flags )
+{
+ OBJECT * string = list_front( lol_get( frame->args, 0 ) );
+ char const * width_s = object_str( list_front( lol_get( frame->args, 1 ) ) );
+
+ int current = strlen( object_str( string ) );
+ int desired = atoi( width_s );
+ if ( current >= desired )
+ return list_new( object_copy( string ) );
+ else
+ {
+ char * buffer = BJAM_MALLOC( desired + 1 );
+ int i;
+ LIST * result;
+
+ strcpy( buffer, object_str( string ) );
+ for ( i = current; i < desired; ++i )
+ buffer[ i ] = ' ';
+ buffer[ desired ] = '\0';
+ result = list_new( object_new( buffer ) );
+ BJAM_FREE( buffer );
+ return result;
+ }
+}
+
+
+LIST * builtin_precious( FRAME * frame, int flags )
+{
+ LIST * targets = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ bindtarget( list_item( iter ) )->flags |= T_FLAG_PRECIOUS;
+ return L0;
+}
+
+
+LIST * builtin_self_path( FRAME * frame, int flags )
+{
+ extern char const * saved_argv0;
+ char * p = executable_path( saved_argv0 );
+ if ( p )
+ {
+ LIST * const result = list_new( object_new( p ) );
+ free( p );
+ return result;
+ }
+ return L0;
+}
+
+
+LIST * builtin_makedir( FRAME * frame, int flags )
+{
+ LIST * const path = lol_get( frame->args, 0 );
+ return file_mkdir( object_str( list_front( path ) ) )
+ ? L0
+ : list_new( object_copy( list_front( path ) ) );
+}
+
+
+#ifdef HAVE_PYTHON
+
+LIST * builtin_python_import_rule( FRAME * frame, int flags )
+{
+ static int first_time = 1;
+ char const * python_module = object_str( list_front( lol_get( frame->args,
+ 0 ) ) );
+ char const * python_function = object_str( list_front( lol_get( frame->args,
+ 1 ) ) );
+ OBJECT * jam_module = list_front( lol_get( frame->args, 2 ) );
+ OBJECT * jam_rule = list_front( lol_get( frame->args, 3 ) );
+
+ PyObject * pName;
+ PyObject * pModule;
+ PyObject * pDict;
+ PyObject * pFunc;
+
+ if ( first_time )
+ {
+ /* At the first invocation, we add the value of the global
+ * EXTRA_PYTHONPATH to the sys.path Python variable.
+ */
+ LIST * extra = 0;
+ module_t * outer_module = frame->module;
+ LISTITER iter, end;
+
+ first_time = 0;
+
+ extra = var_get( root_module(), constant_extra_pythonpath );
+
+ iter = list_begin( extra ), end = list_end( extra );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ string buf[ 1 ];
+ string_new( buf );
+ string_append( buf, "import sys\nsys.path.append(\"" );
+ string_append( buf, object_str( list_item( iter ) ) );
+ string_append( buf, "\")\n" );
+ PyRun_SimpleString( buf->value );
+ string_free( buf );
+ }
+ }
+
+ pName = PyString_FromString( python_module );
+ pModule = PyImport_Import( pName );
+ Py_DECREF( pName );
+
+ if ( pModule != NULL )
+ {
+ pDict = PyModule_GetDict( pModule );
+ pFunc = PyDict_GetItemString( pDict, python_function );
+
+ if ( pFunc && PyCallable_Check( pFunc ) )
+ {
+ module_t * m = bindmodule( jam_module );
+ new_rule_body( m, jam_rule, function_python( pFunc, 0 ), 0 );
+ }
+ else
+ {
+ if ( PyErr_Occurred() )
+ PyErr_Print();
+ fprintf( stderr, "Cannot find function \"%s\"\n", python_function );
+ }
+ Py_DECREF( pModule );
+ }
+ else
+ {
+ PyErr_Print();
+ fprintf( stderr, "Failed to load \"%s\"\n", python_module );
+ }
+ return L0;
+
+}
+
+#endif /* #ifdef HAVE_PYTHON */
+
+
+void lol_build( LOL * lol, char const * * elements )
+{
+ LIST * l = L0;
+ lol_init( lol );
+
+ while ( elements && *elements )
+ {
+ if ( !strcmp( *elements, ":" ) )
+ {
+ lol_add( lol, l );
+ l = L0;
+ }
+ else
+ {
+ l = list_push_back( l, object_new( *elements ) );
+ }
+ ++elements;
+ }
+
+ if ( l != L0 )
+ lol_add( lol, l );
+}
+
+
+#ifdef HAVE_PYTHON
+
+/*
+ * Calls the bjam rule specified by name passed in 'args'. The name is looked up
+ * in the context of bjam's 'python_interface' module. Returns the list of
+ * strings returned by the rule.
+ */
+
+PyObject * bjam_call( PyObject * self, PyObject * args )
+{
+ FRAME inner[ 1 ];
+ LIST * result;
+ PARSE * p;
+ OBJECT * rulename;
+
+ /* Build up the list of arg lists. */
+ frame_init( inner );
+ inner->prev = 0;
+ inner->prev_user = 0;
+ inner->module = bindmodule( constant_python_interface );
+
+ /* Extract the rule name and arguments from 'args'. */
+
+ /* PyTuple_GetItem returns borrowed reference. */
+ rulename = object_new( PyString_AsString( PyTuple_GetItem( args, 0 ) ) );
+ {
+ int i = 1;
+ int size = PyTuple_Size( args );
+ for ( ; i < size; ++i )
+ {
+ PyObject * a = PyTuple_GetItem( args, i );
+ if ( PyString_Check( a ) )
+ {
+ lol_add( inner->args, list_new( object_new(
+ PyString_AsString( a ) ) ) );
+ }
+ else if ( PySequence_Check( a ) )
+ {
+ LIST * l = 0;
+ int s = PySequence_Size( a );
+ int i = 0;
+ for ( ; i < s; ++i )
+ {
+ /* PySequence_GetItem returns new reference. */
+ PyObject * e = PySequence_GetItem( a, i );
+ char * s = PyString_AsString( e );
+ if ( !s )
+ {
+ printf( "Invalid parameter type passed from Python\n" );
+ exit( 1 );
+ }
+ l = list_push_back( l, object_new( s ) );
+ Py_DECREF( e );
+ }
+ lol_add( inner->args, l );
+ }
+ }
+ }
+
+ result = evaluate_rule( bindrule( rulename, inner->module), rulename, inner );
+ object_free( rulename );
+
+ frame_free( inner );
+
+ /* Convert the bjam list into a Python list result. */
+ {
+ PyObject * const pyResult = PyList_New( list_length( result ) );
+ int i = 0;
+ LISTITER iter = list_begin( result );
+ LISTITER const end = list_end( result );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ PyList_SetItem( pyResult, i, PyString_FromString( object_str(
+ list_item( iter ) ) ) );
+ i += 1;
+ }
+ list_free( result );
+ return pyResult;
+ }
+}
+
+
+/*
+ * Accepts four arguments:
+ * - module name
+ * - rule name,
+ * - Python callable.
+ * - (optional) bjam language function signature.
+ * Creates a bjam rule with the specified name in the specified module, which
+ * will invoke the Python callable.
+ */
+
+PyObject * bjam_import_rule( PyObject * self, PyObject * args )
+{
+ char * module;
+ char * rule;
+ PyObject * func;
+ PyObject * bjam_signature = NULL;
+ module_t * m;
+ RULE * r;
+ OBJECT * module_name;
+ OBJECT * rule_name;
+
+ if ( !PyArg_ParseTuple( args, "ssO|O:import_rule",
+ &module, &rule, &func, &bjam_signature ) )
+ return NULL;
+
+ if ( !PyCallable_Check( func ) )
+ {
+ PyErr_SetString( PyExc_RuntimeError, "Non-callable object passed to "
+ "bjam.import_rule" );
+ return NULL;
+ }
+
+ module_name = *module ? object_new( module ) : 0;
+ m = bindmodule( module_name );
+ if ( module_name )
+ object_free( module_name );
+ rule_name = object_new( rule );
+ new_rule_body( m, rule_name, function_python( func, bjam_signature ), 0 );
+ object_free( rule_name );
+
+ Py_INCREF( Py_None );
+ return Py_None;
+}
+
+
+/*
+ * Accepts four arguments:
+ * - an action name
+ * - an action body
+ * - a list of variable that will be bound inside the action
+ * - integer flags.
+ * Defines an action on bjam side.
+ */
+
+PyObject * bjam_define_action( PyObject * self, PyObject * args )
+{
+ char * name;
+ char * body;
+ module_t * m;
+ PyObject * bindlist_python;
+ int flags;
+ LIST * bindlist = L0;
+ int n;
+ int i;
+ OBJECT * name_str;
+ FUNCTION * body_func;
+
+ if ( !PyArg_ParseTuple( args, "ssO!i:define_action", &name, &body,
+ &PyList_Type, &bindlist_python, &flags ) )
+ return NULL;
+
+ n = PyList_Size( bindlist_python );
+ for ( i = 0; i < n; ++i )
+ {
+ PyObject * next = PyList_GetItem( bindlist_python, i );
+ if ( !PyString_Check( next ) )
+ {
+ PyErr_SetString( PyExc_RuntimeError, "bind list has non-string "
+ "type" );
+ return NULL;
+ }
+ bindlist = list_push_back( bindlist, object_new( PyString_AsString( next
+ ) ) );
+ }
+
+ name_str = object_new( name );
+ body_func = function_compile_actions( body, constant_builtin, -1 );
+ new_rule_actions( root_module(), name_str, body_func, bindlist, flags );
+ function_free( body_func );
+ object_free( name_str );
+
+ Py_INCREF( Py_None );
+ return Py_None;
+}
+
+
+/*
+ * Returns the value of a variable in root Jam module.
+ */
+
+PyObject * bjam_variable( PyObject * self, PyObject * args )
+{
+ char * name;
+ LIST * value;
+ PyObject * result;
+ int i;
+ OBJECT * varname;
+ LISTITER iter;
+ LISTITER end;
+
+ if ( !PyArg_ParseTuple( args, "s", &name ) )
+ return NULL;
+
+ varname = object_new( name );
+ value = var_get( root_module(), varname );
+ object_free( varname );
+ iter = list_begin( value );
+ end = list_end( value );
+
+ result = PyList_New( list_length( value ) );
+ for ( i = 0; iter != end; iter = list_next( iter ), ++i )
+ PyList_SetItem( result, i, PyString_FromString( object_str( list_item(
+ iter ) ) ) );
+
+ return result;
+}
+
+
+PyObject * bjam_backtrace( PyObject * self, PyObject * args )
+{
+ PyObject * result = PyList_New( 0 );
+ struct frame * f = frame_before_python_call;
+
+ for ( ; f = f->prev; )
+ {
+ PyObject * tuple = PyTuple_New( 4 );
+ char const * file;
+ int line;
+ char buf[ 32 ];
+ string module_name[ 1 ];
+
+ get_source_line( f, &file, &line );
+ sprintf( buf, "%d", line );
+ string_new( module_name );
+ if ( f->module->name )
+ {
+ string_append( module_name, object_str( f->module->name ) );
+ string_append( module_name, "." );
+ }
+
+ /* PyTuple_SetItem steals reference. */
+ PyTuple_SetItem( tuple, 0, PyString_FromString( file ) );
+ PyTuple_SetItem( tuple, 1, PyString_FromString( buf ) );
+ PyTuple_SetItem( tuple, 2, PyString_FromString( module_name->value ) );
+ PyTuple_SetItem( tuple, 3, PyString_FromString( f->rulename ) );
+
+ string_free( module_name );
+
+ PyList_Append( result, tuple );
+ Py_DECREF( tuple );
+ }
+ return result;
+}
+
+PyObject * bjam_caller( PyObject * self, PyObject * args )
+{
+ return PyString_FromString( frame_before_python_call->prev->module->name ?
+ object_str( frame_before_python_call->prev->module->name ) : "" );
+}
+
+#endif /* #ifdef HAVE_PYTHON */
+
+
+#ifdef HAVE_POPEN
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+ #define popen windows_popen_wrapper
+ #define pclose _pclose
+
+ /*
+ * This wrapper is a workaround for a funny _popen() feature on Windows
+ * where it eats external quotes in some cases. The bug seems to be related
+ * to the quote stripping functionality used by the Windows cmd.exe
+ * interpreter when its /S is not specified.
+ *
+ * Cleaned up quote from the cmd.exe help screen as displayed on Windows XP
+ * SP3:
+ *
+ * 1. If all of the following conditions are met, then quote characters on
+ * the command line are preserved:
+ *
+ * - no /S switch
+ * - exactly two quote characters
+ * - no special characters between the two quote characters, where
+ * special is one of: &<>()@^|
+ * - there are one or more whitespace characters between the two quote
+ * characters
+ * - the string between the two quote characters is the name of an
+ * executable file.
+ *
+ * 2. Otherwise, old behavior is to see if the first character is a quote
+ * character and if so, strip the leading character and remove the last
+ * quote character on the command line, preserving any text after the
+ * last quote character.
+ *
+ * This causes some commands containing quotes not to be executed correctly.
+ * For example:
+ *
+ * "\Long folder name\aaa.exe" --name="Jurko" --no-surname
+ *
+ * would get its outermost quotes stripped and would be executed as:
+ *
+ * \Long folder name\aaa.exe" --name="Jurko --no-surname
+ *
+ * which would report an error about '\Long' not being a valid command.
+ *
+ * cmd.exe help seems to indicate it would be enough to add an extra space
+ * character in front of the command to avoid this but this does not work,
+ * most likely due to the shell first stripping all leading whitespace
+ * characters from the command.
+ *
+ * Solution implemented here is to quote the whole command in case it
+ * contains any quote characters. Note thought this will not work correctly
+ * should Windows ever 'fix' this feature.
+ * (03.06.2008.) (Jurko)
+ */
+ static FILE * windows_popen_wrapper( char const * command,
+ char const * mode )
+ {
+ int const extra_command_quotes_needed = !!strchr( command, '"' );
+ string quoted_command;
+ FILE * result;
+
+ if ( extra_command_quotes_needed )
+ {
+ string_new( "ed_command );
+ string_append( "ed_command, "\"" );
+ string_append( "ed_command, command );
+ string_append( "ed_command, "\"" );
+ command = quoted_command.value;
+ }
+
+ result = _popen( command, "r" );
+
+ if ( extra_command_quotes_needed )
+ string_free( "ed_command );
+
+ return result;
+ }
+#endif /* defined(_MSC_VER) || defined(__BORLANDC__) */
+
+
+static char * rtrim( char * const s )
+{
+ char * p = s;
+ while ( *p ) ++p;
+ for ( --p; p >= s && isspace( *p ); *p-- = 0 );
+ return s;
+}
+
+
+LIST * builtin_shell( FRAME * frame, int flags )
+{
+ LIST * command = lol_get( frame->args, 0 );
+ LIST * result = L0;
+ string s;
+ int ret;
+ char buffer[ 1024 ];
+ FILE * p = NULL;
+ int exit_status = -1;
+ int exit_status_opt = 0;
+ int no_output_opt = 0;
+ int strip_eol_opt = 0;
+
+ /* Process the variable args options. */
+ {
+ int a = 1;
+ LIST * arg = lol_get( frame->args, a );
+ for ( ; !list_empty( arg ); arg = lol_get( frame->args, ++a ) )
+ {
+ if ( !strcmp( "exit-status", object_str( list_front( arg ) ) ) )
+ exit_status_opt = 1;
+ else if ( !strcmp( "no-output", object_str( list_front( arg ) ) ) )
+ no_output_opt = 1;
+ else if ( !strcmp("strip-eol", object_str( list_front( arg ) ) ) )
+ strip_eol_opt = 1;
+ }
+ }
+
+ /* The following fflush() call seems to be indicated as a workaround for a
+ * popen() bug on POSIX implementations related to synhronizing input
+ * stream positions for the called and the calling process.
+ */
+ fflush( NULL );
+
+ p = popen( object_str( list_front( command ) ), "r" );
+ if ( p == NULL )
+ return L0;
+
+ string_new( &s );
+
+ while ( ( ret = fread( buffer, sizeof( char ), sizeof( buffer ) - 1, p ) ) >
+ 0 )
+ {
+ buffer[ ret ] = 0;
+ if ( !no_output_opt )
+ {
+ if ( strip_eol_opt )
+ rtrim( buffer );
+ string_append( &s, buffer );
+ }
+ }
+
+ exit_status = pclose( p );
+
+ /* The command output is returned first. */
+ result = list_new( object_new( s.value ) );
+ string_free( &s );
+
+ /* The command exit result next. */
+ if ( exit_status_opt )
+ {
+ if ( WIFEXITED( exit_status ) )
+ exit_status = WEXITSTATUS( exit_status );
+ else
+ exit_status = -1;
+ sprintf( buffer, "%d", exit_status );
+ result = list_push_back( result, object_new( buffer ) );
+ }
+
+ return result;
+}
+
+#else /* #ifdef HAVE_POPEN */
+
+LIST * builtin_shell( FRAME * frame, int flags )
+{
+ return L0;
+}
+
+#endif /* #ifdef HAVE_POPEN */
diff --git a/src/kenlm/jam-files/engine/builtins.h b/src/kenlm/jam-files/engine/builtins.h
new file mode 100644
index 0000000..b7a967c
--- /dev/null
+++ b/src/kenlm/jam-files/engine/builtins.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+#ifndef JAM_BUILTINS_H
+# define JAM_BUILTINS_H
+
+# include "frames.h"
+
+/*
+ * builtins.h - compile parsed jam statements
+ */
+
+void load_builtins();
+void init_set();
+void init_path();
+void init_regex();
+void init_property_set();
+void init_sequence();
+void init_order();
+
+void property_set_done();
+
+LIST *builtin_calc( FRAME * frame, int flags );
+LIST *builtin_depends( FRAME * frame, int flags );
+LIST *builtin_rebuilds( FRAME * frame, int flags );
+LIST *builtin_echo( FRAME * frame, int flags );
+LIST *builtin_exit( FRAME * frame, int flags );
+LIST *builtin_flags( FRAME * frame, int flags );
+LIST *builtin_glob( FRAME * frame, int flags );
+LIST *builtin_glob_recursive( FRAME * frame, int flags );
+LIST *builtin_subst( FRAME * frame, int flags );
+LIST *builtin_match( FRAME * frame, int flags );
+LIST *builtin_split_by_characters( FRAME * frame, int flags );
+LIST *builtin_hdrmacro( FRAME * frame, int flags );
+LIST *builtin_rulenames( FRAME * frame, int flags );
+LIST *builtin_varnames( FRAME * frame, int flags );
+LIST *builtin_delete_module( FRAME * frame, int flags );
+LIST *builtin_import( FRAME * frame, int flags );
+LIST *builtin_export( FRAME * frame, int flags );
+LIST *builtin_caller_module( FRAME * frame, int flags );
+LIST *builtin_backtrace( FRAME * frame, int flags );
+LIST *builtin_pwd( FRAME * frame, int flags );
+LIST *builtin_update( FRAME * frame, int flags );
+LIST *builtin_update_now( FRAME * frame, int flags );
+LIST *builtin_import_module( FRAME * frame, int flags );
+LIST *builtin_imported_modules( FRAME * frame, int flags );
+LIST *builtin_instance( FRAME * frame, int flags );
+LIST *builtin_sort( FRAME * frame, int flags );
+LIST *builtin_normalize_path( FRAME * frame, int flags );
+LIST *builtin_native_rule( FRAME * frame, int flags );
+LIST *builtin_has_native_rule( FRAME * frame, int flags );
+LIST *builtin_user_module( FRAME * frame, int flags );
+LIST *builtin_nearest_user_location( FRAME * frame, int flags );
+LIST *builtin_check_if_file( FRAME * frame, int flags );
+LIST *builtin_python_import_rule( FRAME * frame, int flags );
+LIST *builtin_shell( FRAME * frame, int flags );
+LIST *builtin_md5( FRAME * frame, int flags );
+LIST *builtin_file_open( FRAME * frame, int flags );
+LIST *builtin_pad( FRAME * frame, int flags );
+LIST *builtin_precious( FRAME * frame, int flags );
+LIST *builtin_self_path( FRAME * frame, int flags );
+LIST *builtin_makedir( FRAME * frame, int flags );
+
+void backtrace( FRAME *frame );
+extern int last_update_now_status;
+
+#endif
diff --git a/src/kenlm/jam-files/engine/bump_version.py b/src/kenlm/jam-files/engine/bump_version.py
new file mode 100644
index 0000000..1771422
--- /dev/null
+++ b/src/kenlm/jam-files/engine/bump_version.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+
+# This script is used to bump the bjam version. It takes a single argument, e.g
+#
+# ./bump_version.py 3.1.9
+#
+# and updates all the necessary files.
+#
+# Copyright 2006 Rene Rivera.
+# Copyright 2005-2006 Vladimir Prus.
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+
+
+import os
+import os.path
+import re
+import string
+import sys
+
+srcdir = os.path.abspath(os.path.dirname(__file__))
+docdir = os.path.abspath(os.path.join(srcdir, "..", "doc"))
+
+
+def edit(file, *replacements):
+ print(" '%s'..." % file)
+ f = open(file, 'r')
+ text = f.read()
+ f.close()
+ for (source, target) in replacements:
+ text, n = re.compile(source, re.MULTILINE).subn(target, text)
+ assert n > 0
+ f = open(file, 'w')
+ f.write(text)
+ f.close()
+
+
+def make_edits(ver):
+ ver03 = (list(ver) + [0] * 3)[0:3]
+ ver02 = ver03[0:2]
+
+ join = lambda v, s : s.join(str(x) for x in v)
+ dotJoin = lambda v : join(v, ".")
+
+ print("Setting version to %s" % str(ver03))
+
+ edit(os.path.join(srcdir, "boost-jam.spec"),
+ ('^(Version:) .*$', '\\1 %s' % dotJoin(ver03)))
+
+ edit(os.path.join(srcdir, "build.jam"),
+ ('^(_VERSION_ =).* ;$', '\\1 %s ;' % join(ver03, " ")))
+
+ edit(os.path.join(docdir, "bjam.qbk"),
+ ('(\[version).*(\])', '\\1: %s\\2' % dotJoin(ver03)),
+ ('(\[def :version:).*(\])', '\\1 %s\\2' % dotJoin(ver03)))
+
+ edit(os.path.join(srcdir, "patchlevel.h"),
+ ('^(#define VERSION_MAJOR) .*$', '\\1 %s' % ver03[0]),
+ ('^(#define VERSION_MINOR) .*$', '\\1 %s' % ver03[1]),
+ ('^(#define VERSION_PATCH) .*$', '\\1 %s' % ver03[2]),
+ ('^(#define VERSION_MAJOR_SYM) .*$', '\\1 "%02d"' % ver03[0]),
+ ('^(#define VERSION_MINOR_SYM) .*$', '\\1 "%02d"' % ver03[1]),
+ ('^(#define VERSION_PATCH_SYM) .*$', '\\1 "%02d"' % ver03[2]),
+ ('^(#define VERSION) .*$', '\\1 "%s"' % dotJoin(ver)),
+ ('^(#define JAMVERSYM) .*$', '\\1 "JAMVERSION=%s"' % dotJoin(ver02)))
+
+
+def main():
+ if len(sys.argv) < 2:
+ print("Expect new version as argument.")
+ sys.exit(1)
+ if len(sys.argv) > 3:
+ print("Too many arguments.")
+ sys.exit(1)
+
+ version = sys.argv[1].split(".")
+ if len(version) > 3:
+ print("Expect version argument in the format: <MAJOR>.<MINOR>.<PATCH>")
+ sys.exit(1)
+
+ try:
+ version = list(int(x) for x in version)
+ except ValueError:
+ print("Version values must be valid integers.")
+ sys.exit(1)
+
+ while version and version[-1] == 0:
+ version.pop()
+
+ if not version:
+ print("At least one of the version values must be positive.")
+ sys.exit()
+
+ make_edits(version)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/kenlm/jam-files/engine/class.c b/src/kenlm/jam-files/engine/class.c
new file mode 100644
index 0000000..a4abfaa
--- /dev/null
+++ b/src/kenlm/jam-files/engine/class.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright Vladimir Prus 2003.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "class.h"
+
+#include "constants.h"
+#include "frames.h"
+#include "hash.h"
+#include "lists.h"
+#include "object.h"
+#include "rules.h"
+#include "strings.h"
+#include "variable.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+
+static struct hash * classes = 0;
+
+
+static void check_defined( LIST * class_names )
+{
+ LISTITER iter = list_begin( class_names );
+ LISTITER const end = list_end( class_names );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ if ( !hash_find( classes, list_item( iter ) ) )
+ {
+ printf( "Class %s is not defined\n", object_str( list_item( iter ) )
+ );
+ abort();
+ }
+ }
+}
+
+
+static OBJECT * class_module_name( OBJECT * declared_name )
+{
+ string name[ 1 ];
+ OBJECT * result;
+
+ string_new( name );
+ string_append( name, "class@" );
+ string_append( name, object_str( declared_name ) );
+
+ result = object_new( name->value );
+ string_free( name );
+
+ return result;
+}
+
+
+struct import_base_data
+{
+ OBJECT * base_name;
+ module_t * base_module;
+ module_t * class_module;
+};
+
+
+static void import_base_rule( void * r_, void * d_ )
+{
+ RULE * r = (RULE *)r_;
+ RULE * ir1;
+ RULE * ir2;
+ struct import_base_data * d = (struct import_base_data *)d_;
+ OBJECT * qname;
+
+ string qualified_name[ 1 ];
+ string_new ( qualified_name );
+ string_append ( qualified_name, object_str( d->base_name ) );
+ string_push_back( qualified_name, '.' );
+ string_append ( qualified_name, object_str( r->name ) );
+ qname = object_new( qualified_name->value );
+ string_free( qualified_name );
+
+ ir1 = import_rule( r, d->class_module, r->name );
+ ir2 = import_rule( r, d->class_module, qname );
+
+ object_free( qname );
+
+ /* Copy 'exported' flag. */
+ ir1->exported = ir2->exported = r->exported;
+
+ /* If we are importing a class method, localize it. */
+ if ( ( r->module == d->base_module ) || ( r->module->class_module &&
+ ( r->module->class_module == d->base_module ) ) )
+ {
+ rule_localize( ir1, d->class_module );
+ rule_localize( ir2, d->class_module );
+ }
+}
+
+
+/*
+ * For each exported rule 'n', declared in class module for base, imports that
+ * rule in 'class' as 'n' and as 'base.n'. Imported rules are localized and
+ * marked as exported.
+ */
+
+static void import_base_rules( module_t * class_, OBJECT * base )
+{
+ OBJECT * module_name = class_module_name( base );
+ module_t * base_module = bindmodule( module_name );
+ LIST * imported;
+ struct import_base_data d;
+ d.base_name = base;
+ d.base_module = base_module;
+ d.class_module = class_;
+ object_free( module_name );
+
+ if ( base_module->rules )
+ hashenumerate( base_module->rules, import_base_rule, &d );
+
+ imported = imported_modules( base_module );
+ import_module( imported, class_ );
+ list_free( imported );
+}
+
+
+OBJECT * make_class_module( LIST * xname, LIST * bases, FRAME * frame )
+{
+ OBJECT * name = class_module_name( list_front( xname ) );
+ OBJECT * * pp;
+ module_t * class_module = 0;
+ module_t * outer_module = frame->module;
+ int found;
+
+ if ( !classes )
+ classes = hashinit( sizeof( OBJECT * ), "classes" );
+
+ pp = (OBJECT * *)hash_insert( classes, list_front( xname ), &found );
+ if ( !found )
+ {
+ *pp = object_copy( list_front( xname ) );
+ }
+ else
+ {
+ printf( "Class %s already defined\n", object_str( list_front( xname ) )
+ );
+ abort();
+ }
+ check_defined( bases );
+
+ class_module = bindmodule( name );
+
+ {
+ /*
+ Initialize variables that Boost.Build inserts in every object.
+ We want to avoid creating the object's hash if it isn't needed.
+ */
+ int num = class_module->num_fixed_variables;
+ module_add_fixed_var( class_module, constant_name, &num );
+ module_add_fixed_var( class_module, constant_class, &num );
+ module_set_fixed_variables( class_module, num );
+ }
+
+ var_set( class_module, constant_name, xname, VAR_SET );
+ var_set( class_module, constant_bases, bases, VAR_SET );
+
+ {
+ LISTITER iter = list_begin( bases );
+ LISTITER const end = list_end( bases );
+ for ( ; iter != end; iter = list_next( iter ) )
+ import_base_rules( class_module, list_item( iter ) );
+ }
+
+ return name;
+}
+
+
+static void free_class( void * xclass, void * data )
+{
+ object_free( *(OBJECT * *)xclass );
+}
+
+
+void class_done( void )
+{
+ if ( classes )
+ {
+ hashenumerate( classes, free_class, (void *)0 );
+ hashdone( classes );
+ classes = 0;
+ }
+}
diff --git a/src/kenlm/jam-files/engine/class.h b/src/kenlm/jam-files/engine/class.h
new file mode 100644
index 0000000..256d298
--- /dev/null
+++ b/src/kenlm/jam-files/engine/class.h
@@ -0,0 +1,14 @@
+/* Copyright Vladimir Prus 2003. Distributed under the Boost */
+/* Software License, Version 1.0. (See accompanying */
+/* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
+
+#ifndef CLASS_H_VP_2003_08_01
+#define CLASS_H_VP_2003_08_01
+
+#include "lists.h"
+#include "frames.h"
+
+OBJECT * make_class_module( LIST * xname, LIST * bases, FRAME * frame );
+void class_done( void );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/command.c b/src/kenlm/jam-files/engine/command.c
new file mode 100644
index 0000000..571b8b6
--- /dev/null
+++ b/src/kenlm/jam-files/engine/command.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * command.c - maintain lists of commands
+ */
+
+#include "jam.h"
+#include "command.h"
+
+#include "lists.h"
+#include "rules.h"
+
+#include <assert.h>
+
+
+/*
+ * cmd_new() - return a new CMD.
+ */
+
+CMD * cmd_new( RULE * rule, LIST * targets, LIST * sources, LIST * shell )
+{
+ CMD * cmd = (CMD *)BJAM_MALLOC( sizeof( CMD ) );
+ FRAME frame[ 1 ];
+
+ assert( cmd );
+ cmd->rule = rule;
+ cmd->shell = shell;
+ cmd->next = 0;
+ cmd->noop = 0;
+
+ lol_init( &cmd->args );
+ lol_add( &cmd->args, targets );
+ lol_add( &cmd->args, sources );
+ string_new( cmd->buf );
+
+ frame_init( frame );
+ frame->module = rule->module;
+ lol_init( frame->args );
+ lol_add( frame->args, list_copy( targets ) );
+ lol_add( frame->args, list_copy( sources ) );
+ function_run_actions( rule->actions->command, frame, stack_global(),
+ cmd->buf );
+ frame_free( frame );
+
+ return cmd;
+}
+
+
+/*
+ * cmd_free() - free a CMD
+ */
+
+void cmd_free( CMD * cmd )
+{
+ lol_free( &cmd->args );
+ list_free( cmd->shell );
+ string_free( cmd->buf );
+ BJAM_FREE( (void *)cmd );
+}
+
+
+/*
+ * cmd_release_targets_and_shell()
+ *
+ * Makes the CMD release its hold on its targets & shell lists and forget
+ * about them. Useful in case caller still has references to those lists and
+ * wants to reuse them after freeing the CMD object.
+ */
+
+void cmd_release_targets_and_shell( CMD * cmd )
+{
+ cmd->args.list[ 0 ] = L0; /* targets */
+ cmd->shell = L0; /* shell */
+}
diff --git a/src/kenlm/jam-files/engine/command.h b/src/kenlm/jam-files/engine/command.h
new file mode 100644
index 0000000..ba65577
--- /dev/null
+++ b/src/kenlm/jam-files/engine/command.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 1994 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * command.h - the CMD structure and routines to manipulate them
+ *
+ * Both ACTION and CMD contain a rule, targets, and sources. An
+ * ACTION describes a rule to be applied to the given targets and
+ * sources; a CMD is what actually gets executed by the shell. The
+ * differences are due to:
+ *
+ * ACTIONS must be combined if 'actions together' is given.
+ * ACTIONS must be split if 'actions piecemeal' is given.
+ * ACTIONS must have current sources omitted for 'actions updated'.
+ *
+ * The CMD datatype holds a single command that is to be executed
+ * against a target, and they can chain together to represent the
+ * full collection of commands used to update a target.
+ *
+ * Structures:
+ *
+ * CMD - an action, ready to be formatted into a buffer and executed.
+ *
+ * External routines:
+ *
+ * cmd_new() - return a new CMD or 0 if too many args.
+ * cmd_free() - delete CMD and its parts.
+ * cmd_next() - walk the CMD chain.
+ * cmd_release_targets_and_shell() - CMD forgets about its targets & shell.
+ */
+
+
+/*
+ * CMD - an action, ready to be formatted into a buffer and executed.
+ */
+
+#ifndef COMMAND_SW20111118_H
+#define COMMAND_SW20111118_H
+
+#include "lists.h"
+#include "rules.h"
+#include "strings.h"
+
+
+typedef struct _cmd CMD;
+struct _cmd
+{
+ CMD * next;
+ RULE * rule; /* rule->actions contains shell script */
+ LIST * shell; /* $(JAMSHELL) value */
+ LOL args; /* LISTs for $(<), $(>) */
+ string buf[ 1 ]; /* actual commands */
+ int noop; /* no-op commands should be faked instead of executed */
+};
+
+CMD * cmd_new
+(
+ RULE * rule, /* rule (referenced) */
+ LIST * targets, /* $(<) (ownership transferred) */
+ LIST * sources, /* $(>) (ownership transferred) */
+ LIST * shell /* $(JAMSHELL) (ownership transferred) */
+);
+
+void cmd_release_targets_and_shell( CMD * );
+
+void cmd_free( CMD * );
+
+#define cmd_next( c ) ((c)->next)
+
+#endif
diff --git a/src/kenlm/jam-files/engine/compile.c b/src/kenlm/jam-files/engine/compile.c
new file mode 100644
index 0000000..db46937
--- /dev/null
+++ b/src/kenlm/jam-files/engine/compile.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * compile.c - compile parsed jam statements
+ *
+ * External routines:
+ * evaluate_rule() - execute a rule invocation
+ *
+ * Internal routines:
+ * debug_compile() - printf with indent to show rule expansion
+ */
+
+#include "jam.h"
+#include "compile.h"
+
+#include "builtins.h"
+#include "class.h"
+#include "constants.h"
+#include "hash.h"
+#include "hdrmacro.h"
+#include "make.h"
+#include "modules.h"
+#include "parse.h"
+#include "rules.h"
+#include "search.h"
+#include "strings.h"
+#include "variable.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <string.h>
+
+
+static void debug_compile( int which, char const * s, FRAME * );
+
+/* Internal functions from builtins.c */
+void backtrace( FRAME * );
+void backtrace_line( FRAME * );
+void print_source_line( FRAME * );
+void unknown_rule( FRAME *, char const * key, module_t *, OBJECT * rule_name );
+
+
+/*
+ * evaluate_rule() - execute a rule invocation
+ */
+
+LIST * evaluate_rule( RULE * rule, OBJECT * rulename, FRAME * frame )
+{
+ LIST * result = L0;
+ profile_frame prof[ 1 ];
+ module_t * prev_module = frame->module;
+
+ if ( DEBUG_COMPILE )
+ {
+ /* Try hard to indicate in which module the rule is going to execute. */
+ char buf[ 256 ] = "";
+ if ( rule->module->name )
+ {
+ strncat( buf, object_str( rule->module->name ), sizeof( buf ) -
+ 1 );
+ strncat( buf, ".", sizeof( buf ) - 1 );
+ if ( strncmp( buf, object_str( rule->name ), strlen( buf ) ) == 0 )
+ {
+ buf[ 0 ] = 0;
+ }
+ }
+ strncat( buf, object_str( rule->name ), sizeof( buf ) - 1 );
+ debug_compile( 1, buf, frame );
+
+ lol_print( frame->args );
+ printf( "\n" );
+ }
+
+ if ( rule->procedure && rule->module != prev_module )
+ {
+ /* Propagate current module to nested rule invocations. */
+ frame->module = rule->module;
+ }
+
+ /* Record current rule name in frame. */
+ if ( rule->procedure )
+ {
+ frame->rulename = object_str( rulename );
+ /* And enter record profile info. */
+ if ( DEBUG_PROFILE )
+ profile_enter( function_rulename( rule->procedure ), prof );
+ }
+
+ /* Check traditional targets $(<) and sources $(>). */
+ if ( !rule->actions && !rule->procedure )
+ unknown_rule( frame, NULL, frame->module, rule->name );
+
+ /* If this rule will be executed for updating the targets then construct the
+ * action for make().
+ */
+ if ( rule->actions )
+ {
+ TARGETS * t;
+
+ /* The action is associated with this instance of this rule. */
+ ACTION * const action = (ACTION *)BJAM_MALLOC( sizeof( ACTION ) );
+ memset( (char *)action, '\0', sizeof( *action ) );
+
+ action->rule = rule;
+ action->targets = targetlist( (TARGETS *)0, lol_get( frame->args, 0 ) );
+ action->sources = targetlist( (TARGETS *)0, lol_get( frame->args, 1 ) );
+ action->refs = 1;
+
+ /* If we have a group of targets all being built using the same action
+ * then we must not allow any of them to be used as sources unless they
+ * are all up to date and their action does not need to be run or their
+ * action has had a chance to finish its work and build all of them
+ * anew.
+ *
+ * Without this it might be possible, in case of a multi-process build,
+ * for their action, triggered to building one of the targets, to still
+ * be running when another target in the group reports as done in order
+ * to avoid triggering the same action again and gets used prematurely.
+ *
+ * As a quick-fix to achieve this effect we make all the targets list
+ * each other as 'included targets'. More precisely, we mark the first
+ * listed target as including all the other targets in the list and vice
+ * versa. This makes anyone depending on any of those targets implicitly
+ * depend on all of them, thus making sure none of those targets can be
+ * used as sources until all of them have been built. Note that direct
+ * dependencies could not have been used due to the 'circular
+ * dependency' issue.
+ *
+ * TODO: Although the current implementation solves the problem of one
+ * of the targets getting used before its action completes its work, it
+ * also forces the action to run whenever any of the targets in the
+ * group is not up to date even though some of them might not actually
+ * be used by the targets being built. We should see how we can
+ * correctly recognize such cases and use that to avoid running the
+ * action if possible and not rebuild targets not actually depending on
+ * targets that are not up to date.
+ *
+ * TODO: Current solution using fake INCLUDES relations may cause
+ * actions to be run when the affected targets are built by multiple
+ * actions. E.g. if we have the following actions registered in the
+ * order specified:
+ * (I) builds targets A & B
+ * (II) builds target B
+ * and we want to build a target depending on target A, then both
+ * actions (I) & (II) will be run, even though the second one does not
+ * have any direct relationship to target A. Consider whether this is
+ * desired behaviour or not. It could be that Boost Build should (or
+ * possibly already does) run all actions registered for a given target
+ * if any of them needs to be run in which case our INCLUDES relations
+ * are not actually causing any actions to be run that would not have
+ * been run without them.
+ */
+ if ( action->targets )
+ {
+ TARGET * const t0 = action->targets->target;
+ for ( t = action->targets->next; t; t = t->next )
+ {
+ target_include( t->target, t0 );
+ target_include( t0, t->target );
+ }
+ }
+
+ /* Append this action to the actions of each target. */
+ for ( t = action->targets; t; t = t->next )
+ t->target->actions = actionlist( t->target->actions, action );
+
+ action_free( action );
+ }
+
+ /* Now recursively compile any parse tree associated with this rule.
+ * function_refer()/function_free() call pair added to ensure the rule does
+ * not get freed while in use.
+ */
+ if ( rule->procedure )
+ {
+ FUNCTION * const function = rule->procedure;
+ function_refer( function );
+ result = function_run( function, frame, stack_global() );
+ function_free( function );
+ }
+
+ if ( DEBUG_PROFILE && rule->procedure )
+ profile_exit( prof );
+
+ if ( DEBUG_COMPILE )
+ debug_compile( -1, 0, frame );
+
+ return result;
+}
+
+
+/*
+ * Call the given rule with the specified parameters. The parameters should be
+ * of type LIST* and end with a NULL pointer. This differs from 'evaluate_rule'
+ * in that frame for the called rule is prepared inside 'call_rule'.
+ *
+ * This function is useful when a builtin rule (in C) wants to call another rule
+ * which might be implemented in Jam.
+ */
+
+LIST * call_rule( OBJECT * rulename, FRAME * caller_frame, ... )
+{
+ va_list va;
+ LIST * result;
+
+ FRAME inner[ 1 ];
+ frame_init( inner );
+ inner->prev = caller_frame;
+ inner->prev_user = caller_frame->module->user_module
+ ? caller_frame
+ : caller_frame->prev_user;
+ inner->module = caller_frame->module;
+
+ va_start( va, caller_frame );
+ for ( ; ; )
+ {
+ LIST * const l = va_arg( va, LIST * );
+ if ( !l )
+ break;
+ lol_add( inner->args, l );
+ }
+ va_end( va );
+
+ result = evaluate_rule( bindrule( rulename, inner->module ), rulename, inner );
+
+ frame_free( inner );
+
+ return result;
+}
+
+
+/*
+ * debug_compile() - printf with indent to show rule expansion
+ */
+
+static void debug_compile( int which, char const * s, FRAME * frame )
+{
+ static int level = 0;
+ static char indent[ 36 ] = ">>>>|>>>>|>>>>|>>>>|>>>>|>>>>|>>>>|";
+
+ if ( which >= 0 )
+ {
+ int i;
+
+ print_source_line( frame );
+
+ i = ( level + 1 ) * 2;
+ while ( i > 35 )
+ {
+ fputs( indent, stdout );
+ i -= 35;
+ }
+
+ printf( "%*.*s ", i, i, indent );
+ }
+
+ if ( s )
+ printf( "%s ", s );
+
+ level += which;
+}
diff --git a/src/kenlm/jam-files/engine/compile.h b/src/kenlm/jam-files/engine/compile.h
new file mode 100644
index 0000000..c70f98b
--- /dev/null
+++ b/src/kenlm/jam-files/engine/compile.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * compile.h - compile parsed jam statements
+ */
+
+#ifndef COMPILE_DWA20011022_H
+#define COMPILE_DWA20011022_H
+
+#include "frames.h"
+#include "lists.h"
+#include "object.h"
+#include "rules.h"
+
+void compile_builtins();
+
+LIST * evaluate_rule( RULE * rule, OBJECT * rulename, FRAME * );
+LIST * call_rule( OBJECT * rulename, FRAME * caller_frame, ... );
+
+/* Flags for compile_set(), etc */
+
+#define ASSIGN_SET 0x00 /* = assign variable */
+#define ASSIGN_APPEND 0x01 /* += append variable */
+#define ASSIGN_DEFAULT 0x02 /* set only if unset */
+
+/* Flags for compile_setexec() */
+
+#define EXEC_UPDATED 0x01 /* executes updated */
+#define EXEC_TOGETHER 0x02 /* executes together */
+#define EXEC_IGNORE 0x04 /* executes ignore */
+#define EXEC_QUIETLY 0x08 /* executes quietly */
+#define EXEC_PIECEMEAL 0x10 /* executes piecemeal */
+#define EXEC_EXISTING 0x20 /* executes existing */
+
+/* Conditions for compile_if() */
+
+#define EXPR_NOT 0 /* ! cond */
+#define EXPR_AND 1 /* cond && cond */
+#define EXPR_OR 2 /* cond || cond */
+#define EXPR_EXISTS 3 /* arg */
+#define EXPR_EQUALS 4 /* arg = arg */
+#define EXPR_NOTEQ 5 /* arg != arg */
+#define EXPR_LESS 6 /* arg < arg */
+#define EXPR_LESSEQ 7 /* arg <= arg */
+#define EXPR_MORE 8 /* arg > arg */
+#define EXPR_MOREEQ 9 /* arg >= arg */
+#define EXPR_IN 10 /* arg in arg */
+
+#endif
diff --git a/src/kenlm/jam-files/engine/constants.c b/src/kenlm/jam-files/engine/constants.c
new file mode 100644
index 0000000..891d322
--- /dev/null
+++ b/src/kenlm/jam-files/engine/constants.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2011 Steven Watanabe
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * constants.c - constant objects
+ *
+ * External functions:
+ *
+ * constants_init() - initialize constants
+ * constants_done() - free constants
+ *
+ */
+
+#include "constants.h"
+
+
+void constants_init( void )
+{
+ constant_empty = object_new( "" );
+ constant_dot = object_new( "." );
+ constant_plus = object_new( "+" );
+ constant_star = object_new( "*" );
+ constant_question_mark = object_new( "?" );
+ constant_ok = object_new( "ok" );
+ constant_true = object_new( "true" );
+ constant_name = object_new( "__name__" );
+ constant_bases = object_new( "__bases__" );
+ constant_class = object_new( "__class__" );
+ constant_typecheck = object_new( ".typecheck" );
+ constant_builtin = object_new( "(builtin)" );
+ constant_HCACHEFILE = object_new( "HCACHEFILE" );
+ constant_HCACHEMAXAGE = object_new( "HCACHEMAXAGE" );
+ constant_HDRSCAN = object_new( "HDRSCAN" );
+ constant_HDRRULE = object_new( "HDRRULE" );
+ constant_BINDRULE = object_new( "BINDRULE" );
+ constant_LOCATE = object_new( "LOCATE" );
+ constant_SEARCH = object_new( "SEARCH" );
+ constant_JAM_SEMAPHORE = object_new( "JAM_SEMAPHORE" );
+ constant_TIMING_RULE = object_new( "__TIMING_RULE__" );
+ constant_ACTION_RULE = object_new( "__ACTION_RULE__" );
+ constant_JAMSHELL = object_new( "JAMSHELL" );
+ constant_TMPDIR = object_new( "TMPDIR" );
+ constant_TMPNAME = object_new( "TMPNAME" );
+ constant_TMPFILE = object_new( "TMPFILE" );
+ constant_STDOUT = object_new( "STDOUT" );
+ constant_STDERR = object_new( "STDERR" );
+ constant_JAMDATE = object_new( "JAMDATE" );
+ constant_JAM_TIMESTAMP_RESOLUTION = object_new( "JAM_TIMESTAMP_RESOLUTION" );
+ constant_JAM_VERSION = object_new( "JAM_VERSION" );
+ constant_JAMUNAME = object_new( "JAMUNAME" );
+ constant_ENVIRON = object_new( ".ENVIRON" );
+ constant_ARGV = object_new( "ARGV" );
+ constant_all = object_new( "all" );
+ constant_PARALLELISM = object_new( "PARALLELISM" );
+ constant_KEEP_GOING = object_new( "KEEP_GOING" );
+ constant_other = object_new( "[OTHER]" );
+ constant_total = object_new( "[TOTAL]" );
+ constant_FILE_DIRSCAN = object_new( "FILE_DIRSCAN" );
+ constant_MAIN = object_new( "MAIN" );
+ constant_MAIN_MAKE = object_new( "MAIN_MAKE" );
+ constant_MAKE_MAKE0 = object_new( "MAKE_MAKE0" );
+ constant_MAKE_MAKE1 = object_new( "MAKE_MAKE1" );
+ constant_MAKE_MAKE0SORT = object_new( "MAKE_MAKE0SORT" );
+ constant_BINDMODULE = object_new( "BINDMODULE" );
+ constant_IMPORT_MODULE = object_new( "IMPORT_MODULE" );
+ constant_BUILTIN_GLOB_BACK = object_new( "BUILTIN_GLOB_BACK" );
+ constant_timestamp = object_new( "timestamp" );
+ constant_python = object_new("__python__");
+ constant_python_interface = object_new( "python_interface" );
+ constant_extra_pythonpath = object_new( "EXTRA_PYTHONPATH" );
+ constant_MAIN_PYTHON = object_new( "MAIN_PYTHON" );
+}
+
+void constants_done( void )
+{
+ object_free( constant_empty );
+ object_free( constant_dot );
+ object_free( constant_plus );
+ object_free( constant_star );
+ object_free( constant_question_mark );
+ object_free( constant_ok );
+ object_free( constant_true );
+ object_free( constant_name );
+ object_free( constant_bases );
+ object_free( constant_class );
+ object_free( constant_typecheck );
+ object_free( constant_builtin );
+ object_free( constant_HCACHEFILE );
+ object_free( constant_HCACHEMAXAGE );
+ object_free( constant_HDRSCAN );
+ object_free( constant_HDRRULE );
+ object_free( constant_BINDRULE );
+ object_free( constant_LOCATE );
+ object_free( constant_SEARCH );
+ object_free( constant_JAM_SEMAPHORE );
+ object_free( constant_TIMING_RULE );
+ object_free( constant_ACTION_RULE );
+ object_free( constant_JAMSHELL );
+ object_free( constant_TMPDIR );
+ object_free( constant_TMPNAME );
+ object_free( constant_TMPFILE );
+ object_free( constant_STDOUT );
+ object_free( constant_STDERR );
+ object_free( constant_JAMDATE );
+ object_free( constant_JAM_TIMESTAMP_RESOLUTION );
+ object_free( constant_JAM_VERSION );
+ object_free( constant_JAMUNAME );
+ object_free( constant_ENVIRON );
+ object_free( constant_ARGV );
+ object_free( constant_all );
+ object_free( constant_PARALLELISM );
+ object_free( constant_KEEP_GOING );
+ object_free( constant_other );
+ object_free( constant_total );
+ object_free( constant_FILE_DIRSCAN );
+ object_free( constant_MAIN );
+ object_free( constant_MAIN_MAKE );
+ object_free( constant_MAKE_MAKE0 );
+ object_free( constant_MAKE_MAKE1 );
+ object_free( constant_MAKE_MAKE0SORT );
+ object_free( constant_BINDMODULE );
+ object_free( constant_IMPORT_MODULE );
+ object_free( constant_BUILTIN_GLOB_BACK );
+ object_free( constant_timestamp );
+ object_free( constant_python );
+ object_free( constant_python_interface );
+ object_free( constant_extra_pythonpath );
+ object_free( constant_MAIN_PYTHON );
+}
+
+OBJECT * constant_empty;
+OBJECT * constant_dot;
+OBJECT * constant_plus;
+OBJECT * constant_star;
+OBJECT * constant_question_mark;
+OBJECT * constant_ok;
+OBJECT * constant_true;
+OBJECT * constant_name;
+OBJECT * constant_bases;
+OBJECT * constant_class;
+OBJECT * constant_typecheck;
+OBJECT * constant_builtin;
+OBJECT * constant_HCACHEFILE;
+OBJECT * constant_HCACHEMAXAGE;
+OBJECT * constant_HDRSCAN;
+OBJECT * constant_HDRRULE;
+OBJECT * constant_BINDRULE;
+OBJECT * constant_LOCATE;
+OBJECT * constant_SEARCH;
+OBJECT * constant_JAM_SEMAPHORE;
+OBJECT * constant_TIMING_RULE;
+OBJECT * constant_ACTION_RULE;
+OBJECT * constant_JAMSHELL;
+OBJECT * constant_TMPDIR;
+OBJECT * constant_TMPNAME;
+OBJECT * constant_TMPFILE;
+OBJECT * constant_STDOUT;
+OBJECT * constant_STDERR;
+OBJECT * constant_JAMDATE;
+OBJECT * constant_JAM_VERSION;
+OBJECT * constant_JAMUNAME;
+OBJECT * constant_ENVIRON;
+OBJECT * constant_ARGV;
+OBJECT * constant_all;
+OBJECT * constant_PARALLELISM;
+OBJECT * constant_KEEP_GOING;
+OBJECT * constant_other;
+OBJECT * constant_total;
+OBJECT * constant_FILE_DIRSCAN;
+OBJECT * constant_MAIN;
+OBJECT * constant_MAIN_MAKE;
+OBJECT * constant_MAKE_MAKE0;
+OBJECT * constant_MAKE_MAKE1;
+OBJECT * constant_MAKE_MAKE0SORT;
+OBJECT * constant_BINDMODULE;
+OBJECT * constant_IMPORT_MODULE;
+OBJECT * constant_BUILTIN_GLOB_BACK;
+OBJECT * constant_timestamp;
+OBJECT * constant_JAM_TIMESTAMP_RESOLUTION;
+OBJECT * constant_python;
+OBJECT * constant_python_interface;
+OBJECT * constant_extra_pythonpath;
+OBJECT * constant_MAIN_PYTHON;
diff --git a/src/kenlm/jam-files/engine/constants.h b/src/kenlm/jam-files/engine/constants.h
new file mode 100644
index 0000000..60d7073
--- /dev/null
+++ b/src/kenlm/jam-files/engine/constants.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011 Steven Watanabe
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * constants.h - constant objects
+ */
+
+#ifndef BOOST_JAM_CONSTANTS_H
+#define BOOST_JAM_CONSTANTS_H
+
+#include "object.h"
+
+void constants_init( void );
+void constants_done( void );
+
+extern OBJECT * constant_empty; /* "" */
+extern OBJECT * constant_dot; /* "." */
+extern OBJECT * constant_plus; /* "+" */
+extern OBJECT * constant_star; /* "*" */
+extern OBJECT * constant_question_mark; /* "?" */
+extern OBJECT * constant_ok; /* "ok" */
+extern OBJECT * constant_true; /* "true" */
+extern OBJECT * constant_name; /* "__name__" */
+extern OBJECT * constant_bases; /* "__bases__" */
+extern OBJECT * constant_class; /* "__class__" */
+extern OBJECT * constant_typecheck; /* ".typecheck" */
+extern OBJECT * constant_builtin; /* "(builtin)" */
+extern OBJECT * constant_HCACHEFILE; /* "HCACHEFILE" */
+extern OBJECT * constant_HCACHEMAXAGE; /* "HCACHEMAXAGE" */
+extern OBJECT * constant_HDRSCAN; /* "HDRSCAN" */
+extern OBJECT * constant_HDRRULE; /* "HDRRULE" */
+extern OBJECT * constant_BINDRULE; /* "BINDRULE" */
+extern OBJECT * constant_LOCATE; /* "LOCATE" */
+extern OBJECT * constant_SEARCH; /* "SEARCH" */
+extern OBJECT * constant_JAM_SEMAPHORE; /* "JAM_SEMAPHORE" */
+extern OBJECT * constant_TIMING_RULE; /* "__TIMING_RULE__" */
+extern OBJECT * constant_ACTION_RULE; /* "__ACTION_RULE__" */
+extern OBJECT * constant_JAMSHELL; /* "JAMSHELL" */
+extern OBJECT * constant_TMPDIR; /* "TMPDIR" */
+extern OBJECT * constant_TMPNAME; /* "TMPNAME" */
+extern OBJECT * constant_TMPFILE; /* "TMPFILE" */
+extern OBJECT * constant_STDOUT; /* "STDOUT" */
+extern OBJECT * constant_STDERR; /* "STDERR" */
+extern OBJECT * constant_JAMDATE; /* "JAMDATE" */
+extern OBJECT * constant_JAM_TIMESTAMP_RESOLUTION; /* "JAM_TIMESTAMP_RESOLUTION" */
+extern OBJECT * constant_JAM_VERSION; /* "JAM_VERSION" */
+extern OBJECT * constant_JAMUNAME; /* "JAMUNAME" */
+extern OBJECT * constant_ENVIRON; /* ".ENVIRON" */
+extern OBJECT * constant_ARGV; /* "ARGV" */
+extern OBJECT * constant_all; /* "all" */
+extern OBJECT * constant_PARALLELISM; /* "PARALLELISM" */
+extern OBJECT * constant_KEEP_GOING; /* "KEEP_GOING" */
+extern OBJECT * constant_other; /* "[OTHER]" */
+extern OBJECT * constant_total; /* "[TOTAL]" */
+extern OBJECT * constant_FILE_DIRSCAN; /* "FILE_DIRSCAN" */
+extern OBJECT * constant_MAIN; /* "MAIN" */
+extern OBJECT * constant_MAIN_MAKE; /* "MAIN_MAKE" */
+extern OBJECT * constant_MAKE_MAKE0; /* "MAKE_MAKE0" */
+extern OBJECT * constant_MAKE_MAKE1; /* "MAKE_MAKE1" */
+extern OBJECT * constant_MAKE_MAKE0SORT; /* "MAKE_MAKE0SORT" */
+extern OBJECT * constant_BINDMODULE; /* "BINDMODULE" */
+extern OBJECT * constant_IMPORT_MODULE; /* "IMPORT_MODULE" */
+extern OBJECT * constant_BUILTIN_GLOB_BACK; /* "BUILTIN_GLOB_BACK" */
+extern OBJECT * constant_timestamp; /* "timestamp" */
+extern OBJECT * constant_python; /* "__python__" */
+extern OBJECT * constant_python_interface; /* "python_interface" */
+extern OBJECT * constant_extra_pythonpath; /* "EXTRA_PYTHONPATH" */
+extern OBJECT * constant_MAIN_PYTHON; /* "MAIN_PYTHON" */
+
+#endif
diff --git a/src/kenlm/jam-files/engine/cwd.c b/src/kenlm/jam-files/engine/cwd.c
new file mode 100644
index 0000000..7ebe970
--- /dev/null
+++ b/src/kenlm/jam-files/engine/cwd.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2002. Vladimir Prus
+ * Copyright 2005. Rene Rivera
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "cwd.h"
+
+#include "jam.h"
+#include "mem.h"
+#include "pathsys.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+/* MinGW on Windows declares PATH_MAX in limits.h */
+#if defined( NT ) && !defined( __GNUC__ )
+# include <direct.h>
+# define PATH_MAX _MAX_PATH
+#else
+# include <unistd.h>
+# if defined( __COMO__ )
+# include <linux/limits.h>
+# endif
+#endif
+
+#ifndef PATH_MAX
+# define PATH_MAX 1024
+#endif
+
+
+static OBJECT * cwd_;
+
+
+void cwd_init( void )
+{
+ int buffer_size = PATH_MAX;
+ char * cwd_buffer = 0;
+ int error;
+
+ assert( !cwd_ );
+
+ do
+ {
+ char * const buffer = BJAM_MALLOC_RAW( buffer_size );
+ cwd_buffer = getcwd( buffer, buffer_size );
+ error = errno;
+ if ( cwd_buffer )
+ {
+ /* We store the path using its canonical/long/key format. */
+ OBJECT * const cwd = object_new( cwd_buffer );
+ cwd_ = path_as_key( cwd );
+ object_free( cwd );
+ }
+ buffer_size *= 2;
+ BJAM_FREE_RAW( buffer );
+ }
+ while ( !cwd_ && error == ERANGE );
+
+ if ( !cwd_ )
+ {
+ perror( "can not get current working directory" );
+ exit( EXITBAD );
+ }
+}
+
+
+OBJECT * cwd( void )
+{
+ assert( cwd_ );
+ return cwd_;
+}
+
+
+void cwd_done( void )
+{
+ assert( cwd_ );
+ object_free( cwd_ );
+ cwd_ = NULL;
+}
diff --git a/src/kenlm/jam-files/engine/cwd.h b/src/kenlm/jam-files/engine/cwd.h
new file mode 100644
index 0000000..886714a
--- /dev/null
+++ b/src/kenlm/jam-files/engine/cwd.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2002. Vladimir Prus
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * cwd.h - manages the current working folder information
+ */
+
+#ifndef CWD_H
+#define CWD_H
+
+#include "object.h"
+
+
+/* cwd() - returns the current working folder */
+OBJECT * cwd( void );
+
+/* cwd_init() - initialize the cwd module functionality
+ *
+ * The current working folder can not change in Boost Jam so this function
+ * gets the current working folder information from the OS and stores it
+ * internally.
+ *
+ * Expected to be called at program startup before the program's current
+ * working folder has been changed
+ */
+void cwd_init( void );
+
+/* cwd_done() - cleans up the cwd module functionality */
+void cwd_done( void );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/debian/changelog b/src/kenlm/jam-files/engine/debian/changelog
new file mode 100644
index 0000000..2908428
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debian/changelog
@@ -0,0 +1,72 @@
+bjam (3.1.12-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Rene Rivera <grafik@redshift-software.com> Sat, 01 Oct 2005 00:00:00 +0000
+
+bjam (3.1.11-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Rene Rivera <grafik@redshift-software.com> Sat, 30 Apr 2005 00:00:00 +0000
+
+bjam (3.1.10-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Rene Rivera <grafik@redshift-software.com> Tue, 1 Jun 2004 05:42:35 +0000
+
+bjam (3.1.9-2) unstable; urgency=low
+
+ * Use default value of BOOST_BUILD_PATH is not is set in environment.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Wed, 17 Dec 2003 16:44:35 +0300
+
+bjam (3.1.9-1) unstable; urgency=low
+
+ * Implement NATIVE_FILE builtin and several native rules.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Thu, 11 Dec 2003 13:15:26 +0300
+
+bjam (3.1.8-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Tue, 4 Nov 2003 20:50:43 +0300
+
+bjam (3.1.7-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Thu, 11 Sep 2003 10:45:44 +0400
+
+bjam (3.1.6-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Tue, 1 Jul 2003 09:12:18 +0400
+
+bjam (3.1.5-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Mon, 19 May 2003 14:05:13 +0400
+
+bjam (3.1.3-2) unstable; urgency=low
+
+ * Changed Debian package to be similar to Jam's package.
+
+ -- Vladimir Prus <ghost@cs.msu.su> Thu, 10 Oct 2002 18:43:26 +0400
+
+bjam (3.1.3-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Vladimir Prus <ghost@zigzag.lvk.cs.msu.su> Fri, 4 Oct 2002 18:16:54 +0400
+
+bjam (3.1.2-1) unstable; urgency=low
+
+ * Initial Release.
+
+ -- Vladimir Prus <ghost@cs.msu.su> Wed, 14 Aug 2002 14:08:00 +0400
+
diff --git a/src/kenlm/jam-files/engine/debian/control b/src/kenlm/jam-files/engine/debian/control
new file mode 100644
index 0000000..c7f1519
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debian/control
@@ -0,0 +1,16 @@
+Source: bjam
+Section: devel
+Priority: optional
+Maintainer: Vladimir Prus <ghost@cs.msu.su>
+Build-Depends: debhelper (>> 3.0.0), docbook-to-man, bison
+Standards-Version: 3.5.2
+
+Package: bjam
+Architecture: any
+Depends: ${shlibs:Depends}
+Description: Build tool
+ Boost.Jam is a portable build tool with its own interpreted language, which
+ allows to implement rather complex logic in a readable way and without
+ resorting to external programs. It is a descendant of Jam/MR tool modified to
+ suit the needs of Boost.Build. In particular, modules and rule parameters
+ were added, as well as several new builtins.
diff --git a/src/kenlm/jam-files/engine/debian/copyright b/src/kenlm/jam-files/engine/debian/copyright
new file mode 100644
index 0000000..f72e4e3
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debian/copyright
@@ -0,0 +1,25 @@
+This package was debianized by Vladimir Prus <ghost@cs.msu.su> on
+Wed, 17 July 2002, 19:27:00 +0400.
+
+Copyright:
+
+ /+\
+ +\ Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ \+/
+
+ This is Release 2.4 of Jam/MR, a make-like program.
+
+ License is hereby granted to use this software and distribute it
+ freely, as long as this copyright notice is retained and modifications
+ are clearly marked.
+
+ ALL WARRANTIES ARE HEREBY DISCLAIMED.
+
+Some portions are also:
+
+ Copyright 2001-2006 David Abrahams.
+ Copyright 2002-2006 Rene Rivera.
+ Copyright 2003-2006 Vladimir Prus.
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
diff --git a/src/kenlm/jam-files/engine/debian/jam.man.sgml b/src/kenlm/jam-files/engine/debian/jam.man.sgml
new file mode 100644
index 0000000..ee21d4d
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debian/jam.man.sgml
@@ -0,0 +1,236 @@
+<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
+
+<!-- Process this file with docbook-to-man to generate an nroff manual
+ page: `docbook-to-man manpage.sgml > manpage.1'. You may view
+ the manual page with: `docbook-to-man manpage.sgml | nroff -man |
+ less'. A typical entry in a Makefile or Makefile.am is:
+
+manpage.1: manpage.sgml
+ docbook-to-man $< > $@
+ -->
+
+ <!ENTITY dhfirstname "<firstname>Yann</firstname>">
+ <!ENTITY dhsurname "<surname>Dirson</surname>">
+ <!-- Please adjust the date whenever revising the manpage. -->
+ <!ENTITY dhdate "<date>mai 23, 2001</date>">
+ <!ENTITY dhemail "<email>dirson@debian.org</email>">
+ <!ENTITY dhusername "Yann Dirson">
+ <!ENTITY dhpackage "jam">
+
+ <!ENTITY debian "<productname>Debian GNU/Linux</productname>">
+ <!ENTITY gnu "<acronym>GNU</acronym>">
+]>
+
+<refentry>
+ <refentryinfo>
+ <address>
+ &dhemail;
+ </address>
+ <author>
+ &dhfirstname;
+ &dhsurname;
+ </author>
+ <copyright>
+ <year>2001</year>
+ <holder>&dhusername;</holder>
+ </copyright>
+ &dhdate;
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>JAM</refentrytitle>
+ <manvolnum>1</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>Jam/MR</refname>
+ <refpurpose>Make(1) Redux</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <cmdsynopsis>
+ <command>jam</command>
+
+ <arg><option>-a</option></arg>
+ <arg><option>-n</option></arg>
+ <arg><option>-v</option></arg>
+
+ <arg><option>-d <replaceable/debug/</option></arg>
+ <arg><option>-f <replaceable/jambase/</option></arg>
+ <arg><option>-j <replaceable/jobs/</option></arg>
+ <arg><option>-o <replaceable/actionsfile/</option></arg>
+ <arg><option>-s <replaceable/var/=<replaceable/value/</option></arg>
+ <arg><option>-t <replaceable/target/</option></arg>
+
+ <arg repeat><option><replaceable/target/</option></arg>
+ </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>Jam is a program construction tool, like make(1).</para>
+
+ <para>Jam recursively builds target files from source files, using
+ dependency information and updating actions expressed in the
+ Jambase file, which is written in jam's own interpreted language.
+ The default Jambase is compiled into jam and provides a
+ boilerplate for common use, relying on a user-provide file
+ "Jamfile" to enumerate actual targets and sources.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><option/-a/</term>
+ <listitem>
+ <para>Build all targets anyway, even if they are up-to-date.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-d <replaceable/n/</option></term>
+ <listitem>
+ <para>Enable cummulative debugging levels from 1 to
+ <replaceable/n/. Interesting values are:
+
+ <glosslist>
+ <glossentry><glossterm/1/ <glossdef><simpara/Show
+ actions (the default)/</glossdef></glossentry>
+
+ <glossentry><glossterm/2/ <glossdef><simpara/Show
+ "quiet" actions and display all action
+ text/</glossdef></glossentry>
+
+ <glossentry><glossterm/3/ <glossdef><simpara>Show
+ dependency analysis, and target/source
+ timestamps/paths</simpara></glossdef></glossentry>
+
+ <glossentry><glossterm/4/ <glossdef><simpara/Show shell
+ arguments/</glossdef></glossentry>
+
+ <glossentry><glossterm/5/ <glossdef><simpara/Show rule
+ invocations and variable
+ expansions/</glossdef></glossentry>
+
+ <glossentry><glossterm/6/ <glossdef><simpara>Show
+ directory/header file/archive
+ scans</simpara></glossdef></glossentry>
+
+ <glossentry><glossterm/7/ <glossdef><simpara/Show
+ variable settings/</glossdef></glossentry>
+
+ <glossentry><glossterm/8/ <glossdef><simpara/Show
+ variable fetches/</glossdef></glossentry>
+
+ <glossentry><glossterm/9/ <glossdef><simpara/Show
+ variable manipulation, scanner
+ tokens/</glossdef></glossentry>
+ </glosslist>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-d +<replaceable/n/</option></term>
+ <listitem>
+ <para>Enable debugging level <replaceable/n/.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option/-d 0/</term>
+ <listitem>
+ <para>Turn off all debugging levels. Only errors are not
+ suppressed.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-f <replaceable/jambase/</option></term>
+ <listitem>
+ <para>Read <replaceable/jambase/ instead of using the
+ built-in Jambase. Only one <option/-f/ flag is permitted,
+ but the <replaceable/jambase/ may explicitly include other
+ files.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-j <replaceable/n/</option></term>
+ <listitem>
+ <para>Run up to <replaceable/n/ shell commands concurrently
+ (UNIX and NT only). The default is 1.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option/-n/</term>
+ <listitem>
+ <para>Don't actually execute the updating actions, but do
+ everything else. This changes the debug level default to
+ <option/-d2/.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-o <replaceable/file/</option></term>
+ <listitem>
+ <para>Write the updating actions to the specified file
+ instead of running them (or outputting them, as on the
+ Mac).</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-s <replaceable/var/=<replaceable/value/</option></term>
+ <listitem>
+ <para>Set the variable <replaceable/var/ to
+ <replaceable/value/, overriding both internal variables and
+ variables imported from the environment. </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>-t <replaceable/target/</option></term>
+ <listitem>
+ <para>Rebuild <replaceable/target/ and everything that
+ depends on it, even if it is up-to-date.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option/-v/</term>
+ <listitem>
+ <para>Print the version of jam and exit.</para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+
+ <para>Jam is documented fully in HTML pages available on Debian
+ systems from
+ <filename>/usr/share/doc/jam/Jam.html</filename>.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>AUTHOR</title>
+
+ <para>This manual page was created by &dhusername; &dhemail; from
+ the <filename/Jam.html/ documentation, for the &debian; system
+ (but may be used by others).</para>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+sgml-omittag:t
+sgml-shorttag:t
+End:
+-->
diff --git a/src/kenlm/jam-files/engine/debian/rules b/src/kenlm/jam-files/engine/debian/rules
new file mode 100755
index 0000000..756052a
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debian/rules
@@ -0,0 +1,73 @@
+#!/usr/bin/make -f
+# Sample debian/rules that uses debhelper.
+# GNU copyright 1997 to 1999 by Joey Hess.
+# GNU copyright 2001 by Yann Dirson.
+
+# This is the debian/rules file for packages jam and ftjam
+# It should be usable with both packages without any change
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+# This is the debhelper compatability version to use.
+export DH_COMPAT=3
+
+topdir=$(shell pwd)
+
+jam=bjam
+binname=bjam
+
+build: build-stamp
+build-stamp: debian/jam.1
+ dh_testdir
+
+ ./build.sh
+
+ touch build-stamp
+
+%.1: %.man.sgml
+ /usr/bin/docbook-to-man $< > $@
+
+clean:
+ dh_testdir
+ dh_testroot
+ rm -f build-stamp
+ rm -rf bin.*
+ rm -f jam0 debian/jam.1
+ dh_clean
+
+install: build
+ dh_testdir
+ dh_testroot
+ dh_clean -k
+ dh_installdirs
+
+ install -d ${topdir}/debian/${jam}/usr/bin
+ install -m755 bin.linuxx86/bjam ${topdir}/debian/${jam}/usr/bin/
+ install -d ${topdir}/debian/${jam}/usr/share/man/man1/
+ install -m644 debian/jam.1 ${topdir}/debian/${jam}/usr/share/man/man1/${binname}.1
+
+
+# Build architecture-independent files here.
+binary-indep: build install
+# We have nothing to do by default.
+
+# Build architecture-dependent files here.
+binary-arch: build install
+ dh_testdir
+ dh_testroot
+ dh_installdocs README RELNOTES Jambase *.html
+# dh_installemacsen
+# dh_undocumented
+ dh_installchangelogs
+ dh_strip
+ dh_compress
+ dh_fixperms
+ dh_installdeb
+ dh_shlibdeps
+ dh_gencontrol
+ dh_md5sums
+ dh_builddeb
+
+binary: binary-indep binary-arch
+.PHONY: build clean binary-indep binary-arch binary install configure
diff --git a/src/kenlm/jam-files/engine/debug.c b/src/kenlm/jam-files/engine/debug.c
new file mode 100644
index 0000000..2a65655
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debug.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2005. Rene Rivera
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "jam.h"
+#include "debug.h"
+
+#include "hash.h"
+
+
+static profile_frame * profile_stack = 0;
+static struct hash * profile_hash = 0;
+static profile_info profile_other = { 0 };
+static profile_info profile_total = { 0 };
+
+
+profile_frame * profile_init( OBJECT * rulename, profile_frame * frame )
+{
+ if ( DEBUG_PROFILE ) profile_enter( rulename, frame );
+ return frame;
+}
+
+
+void profile_enter( OBJECT * rulename, profile_frame * frame )
+{
+ if ( DEBUG_PROFILE )
+ {
+ clock_t start = clock();
+ profile_info * p;
+
+ if ( !profile_hash && rulename )
+ profile_hash = hashinit( sizeof( profile_info ), "profile" );
+
+ if ( rulename )
+ {
+ int found;
+ p = (profile_info *)hash_insert( profile_hash, rulename, &found );
+ if ( !found )
+ {
+ p->name = rulename;
+ p->cumulative = 0;
+ p->net = 0;
+ p->num_entries = 0;
+ p->stack_count = 0;
+ p->memory = 0;
+ }
+ }
+ else
+ {
+ p = &profile_other;
+ }
+
+ ++p->num_entries;
+ ++p->stack_count;
+
+ frame->info = p;
+
+ frame->caller = profile_stack;
+ profile_stack = frame;
+
+ frame->entry_time = clock();
+ frame->overhead = 0;
+ frame->subrules = 0;
+
+ /* caller pays for the time it takes to play with the hash table */
+ if ( frame->caller )
+ frame->caller->overhead += frame->entry_time - start;
+ }
+}
+
+
+void profile_memory( long mem )
+{
+ if ( DEBUG_PROFILE )
+ if ( profile_stack && profile_stack->info )
+ profile_stack->info->memory += mem;
+}
+
+
+void profile_exit( profile_frame * frame )
+{
+ if ( DEBUG_PROFILE )
+ {
+ /* Cumulative time for this call. */
+ clock_t const t = clock() - frame->entry_time - frame->overhead;
+ /* If this rule is already present on the stack, do not add the time for
+ * this instance.
+ */
+ if ( frame->info->stack_count == 1 )
+ frame->info->cumulative += t;
+ /* Net time does not depend on presense of the same rule in call stack.
+ */
+ frame->info->net += t - frame->subrules;
+
+ if ( frame->caller )
+ {
+ /* Caller's cumulative time must account for this overhead. */
+ frame->caller->overhead += frame->overhead;
+ frame->caller->subrules += t;
+ }
+ /* Pop this stack frame. */
+ --frame->info->stack_count;
+ profile_stack = frame->caller;
+ }
+}
+
+
+static void dump_profile_entry( void * p_, void * ignored )
+{
+ profile_info * p = (profile_info *)p_;
+ unsigned long mem_each = ( p->memory / ( p->num_entries ? p->num_entries : 1
+ ) );
+ double cumulative = p->cumulative;
+ double net = p->net;
+ double q = p->net;
+ q /= ( p->num_entries ? p->num_entries : 1 );
+ cumulative /= CLOCKS_PER_SEC;
+ net /= CLOCKS_PER_SEC;
+ q /= CLOCKS_PER_SEC;
+ if ( !ignored )
+ {
+ profile_total.cumulative += p->net;
+ profile_total.memory += p->memory;
+ }
+ printf( "%10ld %12.6f %12.6f %12.8f %10ld %10ld %s\n", p->num_entries,
+ cumulative, net, q, p->memory, mem_each, object_str( p->name ) );
+}
+
+
+void profile_dump()
+{
+ if ( profile_hash )
+ {
+ printf( "%10s %12s %12s %12s %10s %10s %s\n", "--count--", "--gross--",
+ "--net--", "--each--", "--mem--", "--each--", "--name--" );
+ hashenumerate( profile_hash, dump_profile_entry, 0 );
+ profile_other.name = constant_other;
+ dump_profile_entry( &profile_other, 0 );
+ profile_total.name = constant_total;
+ dump_profile_entry( &profile_total, (void *)1 );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/debug.h b/src/kenlm/jam-files/engine/debug.h
new file mode 100644
index 0000000..4151d27
--- /dev/null
+++ b/src/kenlm/jam-files/engine/debug.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2005. Rene Rivera
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef BJAM_DEBUG_H
+#define BJAM_DEBUG_H
+
+#include "constants.h"
+#include "object.h"
+#include <time.h>
+
+
+typedef struct profile_info
+{
+ /* name of rule being called */
+ OBJECT * name;
+ /* cumulative time spent in rule */
+ clock_t cumulative;
+ /* time spent in rule proper */
+ clock_t net;
+ /* number of time rule was entered */
+ unsigned long num_entries;
+ /* number of the times this function is present in stack */
+ unsigned long stack_count;
+ /* bytes of memory allocated by the call */
+ unsigned long memory;
+} profile_info;
+
+typedef struct profile_frame
+{
+ /* permanent storage where data accumulates */
+ profile_info * info;
+ /* overhead for profiling in this call */
+ clock_t overhead;
+ /* time of last entry to rule */
+ clock_t entry_time;
+ /* stack frame of caller */
+ struct profile_frame * caller;
+ /* time spent in subrules */
+ clock_t subrules;
+} profile_frame;
+
+profile_frame * profile_init( OBJECT * rulename, profile_frame * );
+void profile_enter( OBJECT * rulename, profile_frame * );
+void profile_memory( long mem );
+void profile_exit( profile_frame * );
+void profile_dump();
+
+#define PROFILE_ENTER( scope ) profile_frame PROF_ ## scope, *PROF_ ## scope ## _p = profile_init( constant_ ## scope, &PROF_ ## scope )
+#define PROFILE_EXIT( scope ) profile_exit( PROF_ ## scope ## _p )
+
+#endif
diff --git a/src/kenlm/jam-files/engine/execcmd.c b/src/kenlm/jam-files/engine/execcmd.c
new file mode 100644
index 0000000..f751cbf
--- /dev/null
+++ b/src/kenlm/jam-files/engine/execcmd.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ * Copyright 2007 Noel Belcourt.
+ *
+ * Utility functions shared between different exec*.c platform specific
+ * implementation modules.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+#include "jam.h"
+#include "execcmd.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+
+/* Internal interrupt counter. */
+static int intr;
+
+
+/* Constructs a list of command-line elements using the format specified by the
+ * given shell list.
+ *
+ * Given argv array should have at least MAXARGC + 1 elements.
+ * Slot numbers may be between 0 and 998 (inclusive).
+ *
+ * Constructed argv list will be zero terminated. Character arrays referenced by
+ * the argv structure elements will be either elements from the give shell list,
+ * internal static buffers or the given command string and should thus not
+ * considered owned by or released via the argv structure and should be
+ * considered invalidated by the next argv_from_shell() call.
+ *
+ * Shell list elements:
+ * - Starting with '%' - represent the command string.
+ * - Starting with '!' - represent the slot number (increased by one).
+ * - Anything else - used as a literal.
+ * - If no '%' element is found, the command string is appended as an extra.
+ */
+
+void argv_from_shell( char const * * argv, LIST * shell, char const * command,
+ int const slot )
+{
+ static char jobno[ 4 ];
+
+ int i;
+ int gotpercent = 0;
+ LISTITER iter = list_begin( shell );
+ LISTITER end = list_end( shell );
+
+ assert( 0 <= slot );
+ assert( slot < 999 );
+ sprintf( jobno, "%d", slot + 1 );
+
+ for ( i = 0; iter != end && i < MAXARGC; ++i, iter = list_next( iter ) )
+ {
+ switch ( object_str( list_item( iter ) )[ 0 ] )
+ {
+ case '%': argv[ i ] = command; ++gotpercent; break;
+ case '!': argv[ i ] = jobno; break;
+ default : argv[ i ] = object_str( list_item( iter ) );
+ }
+ }
+
+ if ( !gotpercent )
+ argv[ i++ ] = command;
+
+ argv[ i ] = NULL;
+}
+
+
+/* Returns whether the given command string contains lines longer than the given
+ * maximum.
+ */
+int check_cmd_for_too_long_lines( char const * command, int const max,
+ int * const error_length, int * const error_max_length )
+{
+ while ( *command )
+ {
+ size_t const l = strcspn( command, "\n" );
+ if ( l > max )
+ {
+ *error_length = l;
+ *error_max_length = max;
+ return EXEC_CHECK_LINE_TOO_LONG;
+ }
+ command += l;
+ if ( *command )
+ ++command;
+ }
+ return EXEC_CHECK_OK;
+}
+
+
+/* Checks whether the given shell list is actually a request to execute raw
+ * commands without an external shell.
+ */
+int is_raw_command_request( LIST * shell )
+{
+ return !list_empty( shell ) &&
+ !strcmp( object_str( list_front( shell ) ), "%" ) &&
+ list_next( list_begin( shell ) ) == list_end( shell );
+}
+
+
+/* Returns whether an interrupt has been detected so far. */
+
+int interrupted( void )
+{
+ return intr != 0;
+}
+
+
+/* Internal interrupt handler. */
+
+void onintr( int disp )
+{
+ ++intr;
+ printf( "...interrupted\n" );
+}
diff --git a/src/kenlm/jam-files/engine/execcmd.h b/src/kenlm/jam-files/engine/execcmd.h
new file mode 100644
index 0000000..ab145aa
--- /dev/null
+++ b/src/kenlm/jam-files/engine/execcmd.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * execcmd.h - execute a shell script.
+ *
+ * Defines the interface to be implemented in platform specific implementation
+ * modules as well as different shared utility functions prepared in the
+ * execcmd.c module.
+ */
+
+#ifndef EXECCMD_H
+#define EXECCMD_H
+
+#include "lists.h"
+#include "strings.h"
+#include "timestamp.h"
+
+
+typedef struct timing_info
+{
+ double system;
+ double user;
+ timestamp start;
+ timestamp end;
+} timing_info;
+
+typedef void (* ExecCmdCallback)
+(
+ void * const closure,
+ int const status,
+ timing_info const * const,
+ char const * const cmd_stdout,
+ char const * const cmd_stderr,
+ int const cmd_exit_reason
+);
+
+/* Status codes passed to ExecCmdCallback routines. */
+#define EXEC_CMD_OK 0
+#define EXEC_CMD_FAIL 1
+#define EXEC_CMD_INTR 2
+
+int exec_check
+(
+ string const * command,
+ LIST * * pShell,
+ int * error_length,
+ int * error_max_length
+);
+
+/* exec_check() return codes. */
+#define EXEC_CHECK_OK 101
+#define EXEC_CHECK_NOOP 102
+#define EXEC_CHECK_LINE_TOO_LONG 103
+#define EXEC_CHECK_TOO_LONG 104
+
+void exec_cmd
+(
+ string const * command,
+ ExecCmdCallback func,
+ void * closure,
+ LIST * shell
+);
+
+void exec_wait();
+
+
+/******************************************************************************
+ * *
+ * Utility functions defined in the execcmd.c module. *
+ * *
+ ******************************************************************************/
+
+/* Constructs a list of command-line elements using the format specified by the
+ * given shell list.
+ */
+void argv_from_shell( char const * * argv, LIST * shell, char const * command,
+ int const slot );
+
+/* Interrupt routine bumping the internal interrupt counter. Needs to be
+ * registered by platform specific exec*.c modules.
+ */
+void onintr( int disp );
+
+/* Returns whether an interrupt has been detected so far. */
+int interrupted( void );
+
+/* Checks whether the given shell list is actually a request to execute raw
+ * commands without an external shell.
+ */
+int is_raw_command_request( LIST * shell );
+
+/* Utility worker for exec_check() checking whether all the given command lines
+ * are under the specified length limit.
+ */
+int check_cmd_for_too_long_lines( char const * command, int const max,
+ int * const error_length, int * const error_max_length );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/execnt.c b/src/kenlm/jam-files/engine/execnt.c
new file mode 100644
index 0000000..ca89bd0
--- /dev/null
+++ b/src/kenlm/jam-files/engine/execnt.c
@@ -0,0 +1,1232 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2007 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * execnt.c - execute a shell command on Windows NT
+ *
+ * If $(JAMSHELL) is defined, uses that to formulate the actual command. The
+ * default is: cmd.exe /Q/C
+ *
+ * In $(JAMSHELL), % expands to the command string and ! expands to the slot
+ * number (starting at 1) for multiprocess (-j) invocations. If $(JAMSHELL) does
+ * not include a %, it is tacked on as the last argument.
+ *
+ * Each $(JAMSHELL) placeholder must be specified as a separate individual
+ * element in a jam variable value.
+ *
+ * Do not just set JAMSHELL to cmd.exe - it will not work!
+ *
+ * External routines:
+ * exec_check() - preprocess and validate the command
+ * exec_cmd() - launch an async command execution
+ * exec_wait() - wait for any of the async command processes to terminate
+ *
+ * Internal routines:
+ * filetime_to_seconds() - Windows FILETIME --> number of seconds conversion
+ */
+
+#include "jam.h"
+#ifdef USE_EXECNT
+#include "execcmd.h"
+
+#include "lists.h"
+#include "output.h"
+#include "pathsys.h"
+#include "string.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <time.h>
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+#include <tlhelp32.h>
+
+
+/* get the maximum shell command line length according to the OS */
+static int maxline();
+/* valid raw command string length */
+static long raw_command_length( char const * command );
+/* add two 64-bit unsigned numbers, h1l1 and h2l2 */
+static FILETIME add_64(
+ unsigned long h1, unsigned long l1,
+ unsigned long h2, unsigned long l2 );
+/* */
+static FILETIME add_FILETIME( FILETIME t1, FILETIME t2 );
+/* */
+static FILETIME negate_FILETIME( FILETIME t );
+/* record the timing info for the process */
+static void record_times( HANDLE const, timing_info * const );
+/* calc the current running time of an *active* process */
+static double running_time( HANDLE const );
+/* terminate the given process, after terminating all its children first */
+static void kill_process_tree( DWORD const procesdId, HANDLE const );
+/* waits for a command to complete or time out */
+static int try_wait( int const timeoutMillis );
+/* reads any pending output for running commands */
+static void read_output();
+/* checks if a command ran out of time, and kills it */
+static int try_kill_one();
+/* is the first process a parent (direct or indirect) to the second one */
+static int is_parent_child( DWORD const parent, DWORD const child );
+/* */
+static void close_alert( PROCESS_INFORMATION const * const );
+/* close any alerts hanging around */
+static void close_alerts();
+/* prepare a command file to be executed using an external shell */
+static char const * prepare_command_file( string const * command, int slot );
+/* invoke the actual external process using the given command line */
+static void invoke_cmd( char const * const command, int const slot );
+/* find a free slot in the running commands table */
+static int get_free_cmdtab_slot();
+/* put together the final command string we are to run */
+static void string_new_from_argv( string * result, char const * const * argv );
+/* frees and renews the given string */
+static void string_renew( string * const );
+/* reports the last failed Windows API related error message */
+static void reportWindowsError( char const * const apiName );
+/* closes a Windows HANDLE and resets its variable to 0. */
+static void closeWinHandle( HANDLE * const handle );
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/* CreateProcessA() Windows API places a limit of 32768 characters (bytes) on
+ * the allowed command-line length, including a trailing Unicode (2-byte)
+ * nul-terminator character.
+ */
+#define MAX_RAW_COMMAND_LENGTH 32766
+
+/* We hold handles for pipes used to communicate with child processes in two
+ * element arrays indexed as follows.
+ */
+#define EXECCMD_PIPE_READ 0
+#define EXECCMD_PIPE_WRITE 1
+
+static int intr_installed;
+
+
+/* The list of commands we run. */
+static struct
+{
+ /* Temporary command file used to execute the action when needed. */
+ string command_file[ 1 ];
+
+ /* Pipes for communicating with the child process. Parent reads from (0),
+ * child writes to (1).
+ */
+ HANDLE pipe_out[ 2 ];
+ HANDLE pipe_err[ 2 ];
+
+ string buffer_out[ 1 ]; /* buffer to hold stdout, if any */
+ string buffer_err[ 1 ]; /* buffer to hold stderr, if any */
+
+ PROCESS_INFORMATION pi; /* running process information */
+
+ /* Function called when the command completes. */
+ ExecCmdCallback func;
+
+ /* Opaque data passed back to the 'func' callback. */
+ void * closure;
+}
+cmdtab[ MAXJOBS ] = { { 0 } };
+
+
+/*
+ * Execution unit tests.
+ */
+
+void execnt_unit_test()
+{
+#if !defined( NDEBUG )
+ /* vc6 preprocessor is broken, so assert with these strings gets confused.
+ * Use a table instead.
+ */
+ {
+ typedef struct test { char * command; int result; } test;
+ test tests[] = {
+ { "", 0 },
+ { " ", 0 },
+ { "x", 1 },
+ { "\nx", 1 },
+ { "x\n", 1 },
+ { "\nx\n", 1 },
+ { "\nx \n", 2 },
+ { "\nx \n ", 2 },
+ { " \n\t\t\v\r\r\n \t x \v \t\t\r\n\n\n \n\n\v\t", 8 },
+ { "x\ny", -1 },
+ { "x\n\n y", -1 },
+ { "echo x > foo.bar", -1 },
+ { "echo x < foo.bar", -1 },
+ { "echo x | foo.bar", -1 },
+ { "echo x \">\" foo.bar", 18 },
+ { "echo x '<' foo.bar", 18 },
+ { "echo x \"|\" foo.bar", 18 },
+ { "echo x \\\">\\\" foo.bar", -1 },
+ { "echo x \\\"<\\\" foo.bar", -1 },
+ { "echo x \\\"|\\\" foo.bar", -1 },
+ { "\"echo x > foo.bar\"", 18 },
+ { "echo x \"'\"<' foo.bar", -1 },
+ { "echo x \\\\\"<\\\\\" foo.bar", 22 },
+ { "echo x \\x\\\"<\\\\\" foo.bar", -1 },
+ { 0 } };
+ test const * t;
+ for ( t = tests; t->command; ++t )
+ assert( raw_command_length( t->command ) == t->result );
+ }
+
+ {
+ int const length = maxline() + 9;
+ char * const cmd = (char *)BJAM_MALLOC_ATOMIC( length + 1 );
+ memset( cmd, 'x', length );
+ cmd[ length ] = 0;
+ assert( raw_command_length( cmd ) == length );
+ BJAM_FREE( cmd );
+ }
+#endif
+}
+
+
+/*
+ * exec_check() - preprocess and validate the command
+ */
+
+int exec_check
+(
+ string const * command,
+ LIST * * pShell,
+ int * error_length,
+ int * error_max_length
+)
+{
+ /* Default shell does nothing when triggered with an empty or a
+ * whitespace-only command so we simply skip running it in that case. We
+ * still pass them on to non-default shells as we do not really know what
+ * they are going to do with such commands.
+ */
+ if ( list_empty( *pShell ) )
+ {
+ char const * s = command->value;
+ while ( isspace( *s ) ) ++s;
+ if ( !*s )
+ return EXEC_CHECK_NOOP;
+ }
+
+ /* Check prerequisites for executing raw commands. */
+ if ( is_raw_command_request( *pShell ) )
+ {
+ int const raw_cmd_length = raw_command_length( command->value );
+ if ( raw_cmd_length < 0 )
+ {
+ /* Invalid characters detected - fallback to default shell. */
+ list_free( *pShell );
+ *pShell = L0;
+ }
+ else if ( raw_cmd_length > MAX_RAW_COMMAND_LENGTH )
+ {
+ *error_length = raw_cmd_length;
+ *error_max_length = MAX_RAW_COMMAND_LENGTH;
+ return EXEC_CHECK_TOO_LONG;
+ }
+ else
+ return raw_cmd_length ? EXEC_CHECK_OK : EXEC_CHECK_NOOP;
+ }
+
+ /* Now we know we are using an external shell. Note that there is no need to
+ * check for too long command strings when using an external shell since we
+ * use a command file and assume no one is going to set up a JAMSHELL format
+ * string longer than a few hundred bytes at most which should be well under
+ * the total command string limit. Should someone actually construct such a
+ * JAMSHELL value it will get reported as an 'invalid parameter'
+ * CreateProcessA() Windows API failure which seems like a good enough
+ * result for such intentional mischief.
+ */
+
+ /* Check for too long command lines. */
+ return check_cmd_for_too_long_lines( command->value, maxline(),
+ error_length, error_max_length );
+}
+
+
+/*
+ * exec_cmd() - launch an async command execution
+ *
+ * We assume exec_check() already verified that the given command can have its
+ * command string constructed as requested.
+ */
+
+void exec_cmd
+(
+ string const * cmd_orig,
+ ExecCmdCallback func,
+ void * closure,
+ LIST * shell
+)
+{
+ int const slot = get_free_cmdtab_slot();
+ int const is_raw_cmd = is_raw_command_request( shell );
+ string cmd_local[ 1 ];
+
+ /* Initialize default shell - anything more than /Q/C is non-portable. */
+ static LIST * default_shell;
+ if ( !default_shell )
+ default_shell = list_new( object_new( "cmd.exe /Q/C" ) );
+
+ /* Specifying no shell means requesting the default shell. */
+ if ( list_empty( shell ) )
+ shell = default_shell;
+
+ if ( DEBUG_EXECCMD )
+ if ( is_raw_cmd )
+ printf( "Executing raw command directly\n" );
+ else
+ {
+ printf( "Executing using a command file and the shell: " );
+ list_print( shell );
+ printf( "\n" );
+ }
+
+ /* If we are running a raw command directly - trim its leading whitespaces
+ * as well as any trailing all-whitespace lines but keep any trailing
+ * whitespace in the final/only line containing something other than
+ * whitespace).
+ */
+ if ( is_raw_cmd )
+ {
+ char const * start = cmd_orig->value;
+ char const * p = cmd_orig->value + cmd_orig->size;
+ char const * end = p;
+ while ( isspace( *start ) ) ++start;
+ while ( p > start && isspace( p[ -1 ] ) )
+ if ( *--p == '\n' )
+ end = p;
+ string_new( cmd_local );
+ string_append_range( cmd_local, start, end );
+ assert( cmd_local->size == raw_command_length( cmd_orig->value ) );
+ }
+ /* If we are not running a raw command directly, prepare a command file to
+ * be executed using an external shell and the actual command string using
+ * that command file.
+ */
+ else
+ {
+ char const * const cmd_file = prepare_command_file( cmd_orig, slot );
+ char const * argv[ MAXARGC + 1 ]; /* +1 for NULL */
+ argv_from_shell( argv, shell, cmd_file, slot );
+ string_new_from_argv( cmd_local, argv );
+ }
+
+ /* Catch interrupts whenever commands are running. */
+ if ( !intr_installed )
+ {
+ intr_installed = 1;
+ signal( SIGINT, onintr );
+ }
+
+ /* Save input data into the selected running commands table slot. */
+ cmdtab[ slot ].func = func;
+ cmdtab[ slot ].closure = closure;
+
+ /* Invoke the actual external process using the constructed command line. */
+ invoke_cmd( cmd_local->value, slot );
+
+ /* Free our local command string copy. */
+ string_free( cmd_local );
+}
+
+
+/*
+ * exec_wait() - wait for any of the async command processes to terminate
+ *
+ * Wait and drive at most one execution completion, while processing the I/O for
+ * all ongoing commands.
+ */
+
+void exec_wait()
+{
+ int i = -1;
+ int exit_reason; /* reason why a command completed */
+
+ /* Wait for a command to complete, while snarfing up any output. */
+ while ( 1 )
+ {
+ /* Check for a complete command, briefly. */
+ i = try_wait( 500 );
+ /* Read in the output of all running commands. */
+ read_output();
+ /* Close out pending debug style dialogs. */
+ close_alerts();
+ /* Process the completed command we found. */
+ if ( i >= 0 ) { exit_reason = EXIT_OK; break; }
+ /* Check if a command ran out of time. */
+ i = try_kill_one();
+ if ( i >= 0 ) { exit_reason = EXIT_TIMEOUT; break; }
+ }
+
+ /* We have a command... process it. */
+ {
+ DWORD exit_code;
+ timing_info time;
+ int rstat;
+
+ /* The time data for the command. */
+ record_times( cmdtab[ i ].pi.hProcess, &time );
+
+ /* Removed the used temporary command file. */
+ if ( cmdtab[ i ].command_file->size )
+ unlink( cmdtab[ i ].command_file->value );
+
+ /* Find out the process exit code. */
+ GetExitCodeProcess( cmdtab[ i ].pi.hProcess, &exit_code );
+
+ /* The dispossition of the command. */
+ if ( interrupted() )
+ rstat = EXEC_CMD_INTR;
+ else if ( exit_code )
+ rstat = EXEC_CMD_FAIL;
+ else
+ rstat = EXEC_CMD_OK;
+
+ /* Call the callback, may call back to jam rule land. */
+ (*cmdtab[ i ].func)( cmdtab[ i ].closure, rstat, &time,
+ cmdtab[ i ].buffer_out->value, cmdtab[ i ].buffer_err->value,
+ exit_reason );
+
+ /* Clean up our child process tracking data. No need to clear the
+ * temporary command file name as it gets reused.
+ */
+ closeWinHandle( &cmdtab[ i ].pi.hProcess );
+ closeWinHandle( &cmdtab[ i ].pi.hThread );
+ closeWinHandle( &cmdtab[ i ].pipe_out[ EXECCMD_PIPE_READ ] );
+ closeWinHandle( &cmdtab[ i ].pipe_out[ EXECCMD_PIPE_WRITE ] );
+ closeWinHandle( &cmdtab[ i ].pipe_err[ EXECCMD_PIPE_READ ] );
+ closeWinHandle( &cmdtab[ i ].pipe_err[ EXECCMD_PIPE_WRITE ] );
+ string_renew( cmdtab[ i ].buffer_out );
+ string_renew( cmdtab[ i ].buffer_err );
+ }
+}
+
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+/*
+ * Invoke the actual external process using the given command line. Track the
+ * process in our running commands table.
+ */
+
+static void invoke_cmd( char const * const command, int const slot )
+{
+ SECURITY_ATTRIBUTES sa = { sizeof( SECURITY_ATTRIBUTES ), 0, 0 };
+ SECURITY_DESCRIPTOR sd;
+ STARTUPINFO si = { sizeof( STARTUPINFO ), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0 };
+
+ /* Init the security data. */
+ InitializeSecurityDescriptor( &sd, SECURITY_DESCRIPTOR_REVISION );
+ SetSecurityDescriptorDacl( &sd, TRUE, NULL, FALSE );
+ sa.lpSecurityDescriptor = &sd;
+ sa.bInheritHandle = TRUE;
+
+ /* Create pipes for communicating with the child process. */
+ if ( !CreatePipe( &cmdtab[ slot ].pipe_out[ EXECCMD_PIPE_READ ],
+ &cmdtab[ slot ].pipe_out[ EXECCMD_PIPE_WRITE ], &sa, 0 ) )
+ {
+ reportWindowsError( "CreatePipe" );
+ exit( EXITBAD );
+ }
+ if ( globs.pipe_action && !CreatePipe( &cmdtab[ slot ].pipe_err[
+ EXECCMD_PIPE_READ ], &cmdtab[ slot ].pipe_err[ EXECCMD_PIPE_WRITE ],
+ &sa, 0 ) )
+ {
+ reportWindowsError( "CreatePipe" );
+ exit( EXITBAD );
+ }
+
+ /* Set handle inheritance off for the pipe ends the parent reads from. */
+ SetHandleInformation( cmdtab[ slot ].pipe_out[ EXECCMD_PIPE_READ ],
+ HANDLE_FLAG_INHERIT, 0 );
+ if ( globs.pipe_action )
+ SetHandleInformation( cmdtab[ slot ].pipe_err[ EXECCMD_PIPE_READ ],
+ HANDLE_FLAG_INHERIT, 0 );
+
+ /* Hide the child window, if any. */
+ si.dwFlags |= STARTF_USESHOWWINDOW;
+ si.wShowWindow = SW_HIDE;
+
+ /* Redirect the child's output streams to our pipes. */
+ si.dwFlags |= STARTF_USESTDHANDLES;
+ si.hStdOutput = cmdtab[ slot ].pipe_out[ EXECCMD_PIPE_WRITE ];
+ si.hStdError = globs.pipe_action
+ ? cmdtab[ slot ].pipe_err[ EXECCMD_PIPE_WRITE ]
+ : cmdtab[ slot ].pipe_out[ EXECCMD_PIPE_WRITE ];
+
+ /* Let the child inherit stdin, as some commands assume it is available. */
+ si.hStdInput = GetStdHandle( STD_INPUT_HANDLE );
+
+ /* Create output buffers. */
+ string_new( cmdtab[ slot ].buffer_out );
+ string_new( cmdtab[ slot ].buffer_err );
+
+ if ( DEBUG_EXECCMD )
+ printf( "Command string for CreateProcessA(): '%s'\n", command );
+
+ /* Run the command by creating a sub-process for it. */
+ if ( !CreateProcessA(
+ NULL , /* application name */
+ (char *)command , /* command line */
+ NULL , /* process attributes */
+ NULL , /* thread attributes */
+ TRUE , /* inherit handles */
+ CREATE_NEW_PROCESS_GROUP, /* create flags */
+ NULL , /* env vars, null inherits env */
+ NULL , /* current dir, null is our current dir */
+ &si , /* startup info */
+ &cmdtab[ slot ].pi ) ) /* child process info, if created */
+ {
+ reportWindowsError( "CreateProcessA" );
+ exit( EXITBAD );
+ }
+}
+
+
+/*
+ * For more details on Windows cmd.exe shell command-line length limitations see
+ * the following MSDN article:
+ * http://support.microsoft.com/default.aspx?scid=kb;en-us;830473
+ */
+
+static int raw_maxline()
+{
+ OSVERSIONINFO os_info;
+ os_info.dwOSVersionInfoSize = sizeof( os_info );
+ GetVersionEx( &os_info );
+
+ if ( os_info.dwMajorVersion >= 5 ) return 8191; /* XP */
+ if ( os_info.dwMajorVersion == 4 ) return 2047; /* NT 4.x */
+ return 996; /* NT 3.5.1 */
+}
+
+static int maxline()
+{
+ static result;
+ if ( !result ) result = raw_maxline();
+ return result;
+}
+
+
+/*
+ * Closes a Windows HANDLE and resets its variable to 0.
+ */
+
+static void closeWinHandle( HANDLE * const handle )
+{
+ if ( *handle )
+ {
+ CloseHandle( *handle );
+ *handle = 0;
+ }
+}
+
+
+/*
+ * Frees and renews the given string.
+ */
+
+static void string_renew( string * const s )
+{
+ string_free( s );
+ string_new( s );
+}
+
+
+/*
+ * raw_command_length() - valid raw command string length
+ *
+ * Checks whether the given command may be executed as a raw command. If yes,
+ * returns the corresponding command string length. If not, returns -1.
+ *
+ * Rules for constructing raw command strings:
+ * - Command may not contain unquoted shell I/O redirection characters.
+ * - May have at most one command line with non-whitespace content.
+ * - Leading whitespace trimmed.
+ * - Trailing all-whitespace lines trimmed.
+ * - Trailing whitespace on the sole command line kept (may theoretically
+ * affect the executed command).
+ */
+
+static long raw_command_length( char const * command )
+{
+ char const * p;
+ char const * escape = 0;
+ char inquote = 0;
+ char const * newline = 0;
+
+ /* Skip leading whitespace. */
+ while ( isspace( *command ) )
+ ++command;
+
+ p = command;
+
+ /* Look for newlines and unquoted I/O redirection. */
+ do
+ {
+ p += strcspn( p, "\n\"'<>|\\" );
+ switch ( *p )
+ {
+ case '\n':
+ /* If our command contains non-whitespace content split over
+ * multiple lines we can not execute it directly.
+ */
+ newline = p;
+ while ( isspace( *++p ) );
+ if ( *p ) return -1;
+ break;
+
+ case '\\':
+ escape = escape && escape == p - 1 ? 0 : p;
+ ++p;
+ break;
+
+ case '"':
+ case '\'':
+ if ( escape && escape == p - 1 )
+ escape = 0;
+ else if ( inquote == *p )
+ inquote = 0;
+ else if ( !inquote )
+ inquote = *p;
+ ++p;
+ break;
+
+ case '<':
+ case '>':
+ case '|':
+ if ( !inquote )
+ return -1;
+ ++p;
+ break;
+ }
+ }
+ while ( *p );
+
+ /* Return the number of characters the command will occupy. */
+ return ( newline ? newline : p ) - command;
+}
+
+
+/* 64-bit arithmetic helpers. */
+
+/* Compute the carry bit from the addition of two 32-bit unsigned numbers. */
+#define add_carry_bit( a, b ) ((((a) | (b)) >> 31) & (~((a) + (b)) >> 31) & 0x1)
+
+/* Compute the high 32 bits of the addition of two 64-bit unsigned numbers, h1l1
+ * and h2l2.
+ */
+#define add_64_hi( h1, l1, h2, l2 ) ((h1) + (h2) + add_carry_bit(l1, l2))
+
+
+/*
+ * Add two 64-bit unsigned numbers, h1l1 and h2l2.
+ */
+
+static FILETIME add_64
+(
+ unsigned long h1, unsigned long l1,
+ unsigned long h2, unsigned long l2
+)
+{
+ FILETIME result;
+ result.dwLowDateTime = l1 + l2;
+ result.dwHighDateTime = add_64_hi( h1, l1, h2, l2 );
+ return result;
+}
+
+
+static FILETIME add_FILETIME( FILETIME t1, FILETIME t2 )
+{
+ return add_64( t1.dwHighDateTime, t1.dwLowDateTime, t2.dwHighDateTime,
+ t2.dwLowDateTime );
+}
+
+
+static FILETIME negate_FILETIME( FILETIME t )
+{
+ /* 2s complement negation */
+ return add_64( ~t.dwHighDateTime, ~t.dwLowDateTime, 0, 1 );
+}
+
+
+/*
+ * filetime_to_seconds() - Windows FILETIME --> number of seconds conversion
+ */
+
+static double filetime_to_seconds( FILETIME const ft )
+{
+ return ft.dwHighDateTime * ( (double)( 1UL << 31 ) * 2.0 * 1.0e-7 ) +
+ ft.dwLowDateTime * 1.0e-7;
+}
+
+
+static void record_times( HANDLE const process, timing_info * const time )
+{
+ FILETIME creation;
+ FILETIME exit;
+ FILETIME kernel;
+ FILETIME user;
+ if ( GetProcessTimes( process, &creation, &exit, &kernel, &user ) )
+ {
+ time->system = filetime_to_seconds( kernel );
+ time->user = filetime_to_seconds( user );
+ timestamp_from_filetime( &time->start, &creation );
+ timestamp_from_filetime( &time->end, &exit );
+ }
+}
+
+
+#define IO_BUFFER_SIZE ( 16 * 1024 )
+
+static char ioBuffer[ IO_BUFFER_SIZE + 1 ];
+
+
+static void read_pipe
+(
+ HANDLE in, /* the pipe to read from */
+ string * out
+)
+{
+ DWORD bytesInBuffer = 0;
+ DWORD bytesAvailable = 0;
+
+ do
+ {
+ /* check if we have any data to read */
+ if ( !PeekNamedPipe( in, ioBuffer, IO_BUFFER_SIZE, &bytesInBuffer,
+ &bytesAvailable, NULL ) )
+ bytesAvailable = 0;
+
+ /* read in the available data */
+ if ( bytesAvailable > 0 )
+ {
+ /* we only read in the available bytes, to avoid blocking */
+ if ( ReadFile( in, ioBuffer, bytesAvailable <= IO_BUFFER_SIZE ?
+ bytesAvailable : IO_BUFFER_SIZE, &bytesInBuffer, NULL ) )
+ {
+ if ( bytesInBuffer > 0 )
+ {
+ /* Clean up some illegal chars. */
+ int i;
+ for ( i = 0; i < bytesInBuffer; ++i )
+ {
+ if ( ( (unsigned char)ioBuffer[ i ] < 1 ) )
+ ioBuffer[ i ] = '?';
+ }
+ /* Null, terminate. */
+ ioBuffer[ bytesInBuffer ] = '\0';
+ /* Append to the output. */
+ string_append( out, ioBuffer );
+ /* Subtract what we read in. */
+ bytesAvailable -= bytesInBuffer;
+ }
+ else
+ {
+ /* Likely read a error, bail out. */
+ bytesAvailable = 0;
+ }
+ }
+ else
+ {
+ /* Definitely read a error, bail out. */
+ bytesAvailable = 0;
+ }
+ }
+ }
+ while ( bytesAvailable > 0 );
+}
+
+
+static void read_output()
+{
+ int i;
+ for ( i = 0; i < globs.jobs; ++i )
+ if ( cmdtab[ i ].pi.hProcess )
+ {
+ /* Read stdout data. */
+ if ( cmdtab[ i ].pipe_out[ EXECCMD_PIPE_READ ] )
+ read_pipe( cmdtab[ i ].pipe_out[ EXECCMD_PIPE_READ ],
+ cmdtab[ i ].buffer_out );
+ /* Read stderr data. */
+ if ( cmdtab[ i ].pipe_err[ EXECCMD_PIPE_READ ] )
+ read_pipe( cmdtab[ i ].pipe_err[ EXECCMD_PIPE_READ ],
+ cmdtab[ i ].buffer_err );
+ }
+}
+
+
+/*
+ * Waits for a single child process command to complete, or the timeout,
+ * whichever comes first. Returns the index of the completed command in the
+ * cmdtab array, or -1.
+ */
+
+static int try_wait( int const timeoutMillis )
+{
+ int i;
+ int num_active;
+ int wait_api_result;
+ HANDLE active_handles[ MAXJOBS ];
+ int active_procs[ MAXJOBS ];
+
+ /* Prepare a list of all active processes to wait for. */
+ for ( num_active = 0, i = 0; i < globs.jobs; ++i )
+ if ( cmdtab[ i ].pi.hProcess )
+ {
+ active_handles[ num_active ] = cmdtab[ i ].pi.hProcess;
+ active_procs[ num_active ] = i;
+ ++num_active;
+ }
+
+ /* Wait for a child to complete, or for our timeout window to expire. */
+ wait_api_result = WaitForMultipleObjects( num_active, active_handles,
+ FALSE, timeoutMillis );
+ if ( ( WAIT_OBJECT_0 <= wait_api_result ) &&
+ ( wait_api_result < WAIT_OBJECT_0 + num_active ) )
+ {
+ /* Terminated process detected - return its index. */
+ return active_procs[ wait_api_result - WAIT_OBJECT_0 ];
+ }
+
+ /* Timeout. */
+ return -1;
+}
+
+
+static int try_kill_one()
+{
+ /* Only need to check if a timeout was specified with the -l option. */
+ if ( globs.timeout > 0 )
+ {
+ int i;
+ for ( i = 0; i < globs.jobs; ++i )
+ if ( cmdtab[ i ].pi.hProcess )
+ {
+ double const t = running_time( cmdtab[ i ].pi.hProcess );
+ if ( t > (double)globs.timeout )
+ {
+ /* The job may have left an alert dialog around, try and get
+ * rid of it before killing the job itself.
+ */
+ close_alert( &cmdtab[ i ].pi );
+ /* We have a "runaway" job, kill it. */
+ kill_process_tree( cmdtab[ i ].pi.dwProcessId,
+ cmdtab[ i ].pi.hProcess );
+ /* And return its running commands table slot. */
+ return i;
+ }
+ }
+ }
+ return -1;
+}
+
+
+static void close_alerts()
+{
+ /* We only attempt this every 5 seconds or so, because it is not a cheap
+ * operation, and we will catch the alerts eventually. This check uses
+ * floats as some compilers define CLOCKS_PER_SEC as a float or double.
+ */
+ if ( ( (float)clock() / (float)( CLOCKS_PER_SEC * 5 ) ) < ( 1.0 / 5.0 ) )
+ {
+ int i;
+ for ( i = 0; i < globs.jobs; ++i )
+ if ( cmdtab[ i ].pi.hProcess )
+ close_alert( &cmdtab[ i ].pi );
+ }
+}
+
+
+/*
+ * Calc the current running time of an *active* process.
+ */
+
+static double running_time( HANDLE const process )
+{
+ FILETIME creation;
+ FILETIME exit;
+ FILETIME kernel;
+ FILETIME user;
+ if ( GetProcessTimes( process, &creation, &exit, &kernel, &user ) )
+ {
+ /* Compute the elapsed time. */
+ FILETIME current;
+ GetSystemTimeAsFileTime( ¤t );
+ return filetime_to_seconds( add_FILETIME( current,
+ negate_FILETIME( creation ) ) );
+ }
+ return 0.0;
+}
+
+
+/*
+ * Not really optimal, or efficient, but it is easier this way, and it is not
+ * like we are going to be killing thousands, or even tens of processes.
+ */
+
+static void kill_process_tree( DWORD const pid, HANDLE const process )
+{
+ HANDLE const process_snapshot_h = CreateToolhelp32Snapshot(
+ TH32CS_SNAPPROCESS, 0 );
+ if ( INVALID_HANDLE_VALUE != process_snapshot_h )
+ {
+ BOOL ok = TRUE;
+ PROCESSENTRY32 pinfo;
+ pinfo.dwSize = sizeof( PROCESSENTRY32 );
+ for (
+ ok = Process32First( process_snapshot_h, &pinfo );
+ ok == TRUE;
+ ok = Process32Next( process_snapshot_h, &pinfo ) )
+ {
+ if ( pinfo.th32ParentProcessID == pid )
+ {
+ /* Found a child, recurse to kill it and anything else below it.
+ */
+ HANDLE const ph = OpenProcess( PROCESS_ALL_ACCESS, FALSE,
+ pinfo.th32ProcessID );
+ if ( ph )
+ {
+ kill_process_tree( pinfo.th32ProcessID, ph );
+ CloseHandle( ph );
+ }
+ }
+ }
+ CloseHandle( process_snapshot_h );
+ }
+ /* Now that the children are all dead, kill the root. */
+ TerminateProcess( process, -2 );
+}
+
+
+static double creation_time( HANDLE const process )
+{
+ FILETIME creation;
+ FILETIME exit;
+ FILETIME kernel;
+ FILETIME user;
+ return GetProcessTimes( process, &creation, &exit, &kernel, &user )
+ ? filetime_to_seconds( creation )
+ : 0.0;
+}
+
+
+/*
+ * Recursive check if first process is parent (directly or indirectly) of the
+ * second one. Both processes are passed as process ids, not handles. Special
+ * return value 2 means that the second process is smss.exe and its parent
+ * process is System (first argument is ignored).
+ */
+
+static int is_parent_child( DWORD const parent, DWORD const child )
+{
+ HANDLE process_snapshot_h = INVALID_HANDLE_VALUE;
+
+ if ( !child )
+ return 0;
+ if ( parent == child )
+ return 1;
+
+ process_snapshot_h = CreateToolhelp32Snapshot( TH32CS_SNAPPROCESS, 0 );
+ if ( INVALID_HANDLE_VALUE != process_snapshot_h )
+ {
+ BOOL ok = TRUE;
+ PROCESSENTRY32 pinfo;
+ pinfo.dwSize = sizeof( PROCESSENTRY32 );
+ for (
+ ok = Process32First( process_snapshot_h, &pinfo );
+ ok == TRUE;
+ ok = Process32Next( process_snapshot_h, &pinfo ) )
+ {
+ if ( pinfo.th32ProcessID == child )
+ {
+ /* Unfortunately, process ids are not really unique. There might
+ * be spurious "parent and child" relationship match between two
+ * non-related processes if real parent process of a given
+ * process has exited (while child process kept running as an
+ * "orphan") and the process id of such parent process has been
+ * reused by internals of the operating system when creating
+ * another process.
+ *
+ * Thus an additional check is needed - process creation time.
+ * This check may fail (i.e. return 0) for system processes due
+ * to insufficient privileges, and that is OK.
+ */
+ double tchild = 0.0;
+ double tparent = 0.0;
+ HANDLE const hchild = OpenProcess( PROCESS_QUERY_INFORMATION,
+ FALSE, pinfo.th32ProcessID );
+ CloseHandle( process_snapshot_h );
+
+ /* csrss.exe may display message box like following:
+ * xyz.exe - Unable To Locate Component
+ * This application has failed to start because
+ * boost_foo-bar.dll was not found. Re-installing the
+ * application may fix the problem
+ * This actually happens when starting a test process that
+ * depends on a dynamic library which failed to build. We want
+ * to automatically close these message boxes even though
+ * csrss.exe is not our child process. We may depend on the fact
+ * that (in all current versions of Windows) csrss.exe is a
+ * direct child of the smss.exe process, which in turn is a
+ * direct child of the System process, which always has process
+ * id == 4. This check must be performed before comparing
+ * process creation times.
+ */
+ if ( !stricmp( pinfo.szExeFile, "csrss.exe" ) &&
+ is_parent_child( parent, pinfo.th32ParentProcessID ) == 2 )
+ return 1;
+ if ( !stricmp( pinfo.szExeFile, "smss.exe" ) &&
+ ( pinfo.th32ParentProcessID == 4 ) )
+ return 2;
+
+ if ( hchild )
+ {
+ HANDLE hparent = OpenProcess( PROCESS_QUERY_INFORMATION,
+ FALSE, pinfo.th32ParentProcessID );
+ if ( hparent )
+ {
+ tchild = creation_time( hchild );
+ tparent = creation_time( hparent );
+ CloseHandle( hparent );
+ }
+ CloseHandle( hchild );
+ }
+
+ /* Return 0 if one of the following is true:
+ * 1. we failed to read process creation time
+ * 2. child was created before alleged parent
+ */
+ if ( ( tchild == 0.0 ) || ( tparent == 0.0 ) ||
+ ( tchild < tparent ) )
+ return 0;
+
+ return is_parent_child( parent, pinfo.th32ParentProcessID ) & 1;
+ }
+ }
+
+ CloseHandle( process_snapshot_h );
+ }
+
+ return 0;
+}
+
+
+/*
+ * Called by the OS for each topmost window.
+ */
+
+BOOL CALLBACK close_alert_window_enum( HWND hwnd, LPARAM lParam )
+{
+ char buf[ 7 ] = { 0 };
+ PROCESS_INFORMATION const * const pi = (PROCESS_INFORMATION *)lParam;
+ DWORD pid;
+ DWORD tid;
+
+ /* We want to find and close any window that:
+ * 1. is visible and
+ * 2. is a dialog and
+ * 3. is displayed by any of our child processes
+ */
+ if (
+ /* We assume hidden windows do not require user interaction. */
+ !IsWindowVisible( hwnd )
+ /* Failed to read class name; presume it is not a dialog. */
+ || !GetClassNameA( hwnd, buf, sizeof( buf ) )
+ /* All Windows system dialogs use the same Window class name. */
+ || strcmp( buf, "#32770" ) )
+ return TRUE;
+
+ /* GetWindowThreadProcessId() returns 0 on error, otherwise thread id of
+ * the window's message pump thread.
+ */
+ tid = GetWindowThreadProcessId( hwnd, &pid );
+ if ( !tid || !is_parent_child( pi->dwProcessId, pid ) )
+ return TRUE;
+
+ /* Ask real nice. */
+ PostMessageA( hwnd, WM_CLOSE, 0, 0 );
+
+ /* Wait and see if it worked. If not, insist. */
+ if ( WaitForSingleObject( pi->hProcess, 200 ) == WAIT_TIMEOUT )
+ {
+ PostThreadMessageA( tid, WM_QUIT, 0, 0 );
+ WaitForSingleObject( pi->hProcess, 300 );
+ }
+
+ /* Done, we do not want to check any other windows now. */
+ return FALSE;
+}
+
+
+static void close_alert( PROCESS_INFORMATION const * const pi )
+{
+ EnumWindows( &close_alert_window_enum, (LPARAM)pi );
+}
+
+
+/*
+ * Open a command file to store the command into for executing using an external
+ * shell. Returns a pointer to a FILE open for writing or 0 in case such a file
+ * could not be opened. The file name used is stored back in the corresponding
+ * running commands table slot.
+ *
+ * Expects the running commands table slot's command_file attribute to contain
+ * either a zeroed out string object or one prepared previously by this same
+ * function.
+ */
+
+static FILE * open_command_file( int const slot )
+{
+ string * const command_file = cmdtab[ slot ].command_file;
+
+ /* If the temporary command file name has not already been prepared for this
+ * slot number, prepare a new one containing a '##' place holder that will
+ * be changed later and needs to be located at a fixed distance from the
+ * end.
+ */
+ if ( !command_file->value )
+ {
+ DWORD const procID = GetCurrentProcessId();
+ string const * const tmpdir = path_tmpdir();
+ string_new( command_file );
+ string_reserve( command_file, tmpdir->size + 64 );
+ command_file->size = sprintf( command_file->value,
+ "%s\\jam%d-%02d-##.bat", tmpdir->value, procID, slot );
+ }
+
+ /* For some reason opening a command file can fail intermittently. But doing
+ * some retries works. Most likely this is due to a previously existing file
+ * of the same name that happens to still be opened by an active virus
+ * scanner. Originally pointed out and fixed by Bronek Kozicki.
+ *
+ * We first try to open several differently named files to avoid having to
+ * wait idly if not absolutely necessary. Our temporary command file names
+ * contain a fixed position place holder we use for generating different
+ * file names.
+ */
+ {
+ char * const index1 = command_file->value + command_file->size - 6;
+ char * const index2 = index1 + 1;
+ int waits_remaining;
+ assert( command_file->value < index1 );
+ assert( index2 + 1 < command_file->value + command_file->size );
+ assert( index2[ 1 ] == '.' );
+ for ( waits_remaining = 3; ; --waits_remaining )
+ {
+ int index;
+ for ( index = 0; index != 20; ++index )
+ {
+ FILE * f;
+ *index1 = '0' + index / 10;
+ *index2 = '0' + index % 10;
+ f = fopen( command_file->value, "w" );
+ if ( f ) return f;
+ }
+ if ( !waits_remaining ) break;
+ Sleep( 250 );
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * Prepare a command file to be executed using an external shell.
+ */
+
+static char const * prepare_command_file( string const * command, int slot )
+{
+ FILE * const f = open_command_file( slot );
+ if ( !f )
+ {
+ printf( "failed to write command file!\n" );
+ exit( EXITBAD );
+ }
+ fputs( command->value, f );
+ fclose( f );
+ return cmdtab[ slot ].command_file->value;
+}
+
+
+/*
+ * Find a free slot in the running commands table.
+ */
+
+static int get_free_cmdtab_slot()
+{
+ int slot;
+ for ( slot = 0; slot < MAXJOBS; ++slot )
+ if ( !cmdtab[ slot ].pi.hProcess )
+ return slot;
+ printf( "no slots for child!\n" );
+ exit( EXITBAD );
+}
+
+
+/*
+ * Put together the final command string we are to run.
+ */
+
+static void string_new_from_argv( string * result, char const * const * argv )
+{
+ assert( argv );
+ assert( argv[ 0 ] );
+ string_copy( result, *(argv++) );
+ while ( *argv )
+ {
+ string_push_back( result, ' ' );
+ string_append( result, *(argv++) );
+ }
+}
+
+
+/*
+ * Reports the last failed Windows API related error message.
+ */
+
+static void reportWindowsError( char const * const apiName )
+{
+ char * errorMessage;
+ DWORD const errorCode = GetLastError();
+ DWORD apiResult = FormatMessageA(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER | /* __in DWORD dwFlags */
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL, /* __in_opt LPCVOID lpSource */
+ errorCode, /* __in DWORD dwMessageId */
+ 0, /* __in DWORD dwLanguageId */
+ (LPSTR)&errorMessage, /* __out LPTSTR lpBuffer */
+ 0, /* __in DWORD nSize */
+ 0 ); /* __in_opt va_list * Arguments */
+ if ( !apiResult )
+ printf( "%s() Windows API failed: %d.\n", apiName, errorCode );
+ else
+ {
+ printf( "%s() Windows API failed: %d - %s\n", apiName, errorCode,
+ errorMessage );
+ LocalFree( errorMessage );
+ }
+}
+
+
+#endif /* USE_EXECNT */
diff --git a/src/kenlm/jam-files/engine/execunix.c b/src/kenlm/jam-files/engine/execunix.c
new file mode 100644
index 0000000..965e580
--- /dev/null
+++ b/src/kenlm/jam-files/engine/execunix.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ * Copyright 2007 Noel Belcourt.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+#include "jam.h"
+#include "execcmd.h"
+
+#include "lists.h"
+#include "output.h"
+#include "strings.h"
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h> /* vfork(), _exit(), STDOUT_FILENO and such */
+#include <sys/resource.h>
+#include <sys/times.h>
+#include <sys/wait.h>
+
+#if defined(sun) || defined(__sun)
+ #include <wait.h>
+#endif
+
+#ifdef USE_EXECUNIX
+
+#include <sys/times.h>
+
+#if defined(__APPLE__)
+ #define NO_VFORK
+#endif
+
+#ifdef NO_VFORK
+ #define vfork() fork()
+#endif
+
+
+/*
+ * execunix.c - execute a shell script on UNIX/OS2/AmigaOS
+ *
+ * If $(JAMSHELL) is defined, uses that to formulate execvp()/spawnvp(). The
+ * default is: /bin/sh -c
+ *
+ * In $(JAMSHELL), % expands to the command string and ! expands to the slot
+ * number (starting at 1) for multiprocess (-j) invocations. If $(JAMSHELL) does
+ * not include a %, it is tacked on as the last argument.
+ *
+ * Each word must be an individual element in a jam variable value.
+ *
+ * Do not just set JAMSHELL to /bin/sh - it will not work!
+ *
+ * External routines:
+ * exec_check() - preprocess and validate the command.
+ * exec_cmd() - launch an async command execution.
+ * exec_wait() - wait for any of the async command processes to terminate.
+ */
+
+/* find a free slot in the running commands table */
+static int get_free_cmdtab_slot();
+
+/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+
+static clock_t tps;
+static int old_time_initialized;
+static struct tms old_time;
+
+/* We hold stdout & stderr child process information in two element arrays
+ * indexed as follows.
+ */
+#define OUT 0
+#define ERR 1
+
+static struct
+{
+ int pid; /* on win32, a real process handle */
+ int fd[ 2 ]; /* file descriptors for stdout and stderr */
+ FILE * stream[ 2 ]; /* child's stdout and stderr file streams */
+ clock_t start_time; /* start time of child process */
+ int exit_reason; /* termination status */
+ char * buffer[ 2 ]; /* buffers to hold stdout and stderr, if any */
+ int buf_size[ 2 ]; /* buffer sizes in bytes */
+ timestamp start_dt; /* start of command timestamp */
+
+ /* Function called when the command completes. */
+ ExecCmdCallback func;
+
+ /* Opaque data passed back to the 'func' callback. */
+ void * closure;
+} cmdtab[ MAXJOBS ] = { { 0 } };
+
+
+/*
+ * exec_check() - preprocess and validate the command.
+ */
+
+int exec_check
+(
+ string const * command,
+ LIST * * pShell,
+ int * error_length,
+ int * error_max_length
+)
+{
+ int const is_raw_cmd = is_raw_command_request( *pShell );
+
+ /* We allow empty commands for non-default shells since we do not really
+ * know what they are going to do with such commands.
+ */
+ if ( !command->size && ( is_raw_cmd || list_empty( *pShell ) ) )
+ return EXEC_CHECK_NOOP;
+
+ return is_raw_cmd
+ ? EXEC_CHECK_OK
+ : check_cmd_for_too_long_lines( command->value, MAXLINE, error_length,
+ error_max_length );
+}
+
+
+/*
+ * exec_cmd() - launch an async command execution.
+ */
+
+/* We hold file descriptors for pipes used to communicate with child processes
+ * in two element arrays indexed as follows.
+ */
+#define EXECCMD_PIPE_READ 0
+#define EXECCMD_PIPE_WRITE 1
+
+void exec_cmd
+(
+ string const * command,
+ ExecCmdCallback func,
+ void * closure,
+ LIST * shell
+)
+{
+ int const slot = get_free_cmdtab_slot();
+ int out[ 2 ];
+ int err[ 2 ];
+ int len;
+ char const * argv[ MAXARGC + 1 ]; /* +1 for NULL */
+
+ /* Initialize default shell. */
+ static LIST * default_shell;
+ if ( !default_shell )
+ default_shell = list_push_back( list_new(
+ object_new( "/bin/sh" ) ),
+ object_new( "-c" ) );
+
+ if ( list_empty( shell ) )
+ shell = default_shell;
+
+ /* Forumulate argv. If shell was defined, be prepared for % and ! subs.
+ * Otherwise, use stock /bin/sh.
+ */
+ argv_from_shell( argv, shell, command->value, slot );
+
+ if ( DEBUG_EXECCMD )
+ {
+ int i;
+ printf( "Using shell: " );
+ list_print( shell );
+ printf( "\n" );
+ for ( i = 0; argv[ i ]; ++i )
+ printf( " argv[%d] = '%s'\n", i, argv[ i ] );
+ }
+
+ /* Create pipes for collecting child output. */
+ if ( pipe( out ) < 0 || ( globs.pipe_action && pipe( err ) < 0 ) )
+ {
+ perror( "pipe" );
+ exit( EXITBAD );
+ }
+
+ /* Initialize old_time only once. */
+ if ( !old_time_initialized )
+ {
+ times( &old_time );
+ old_time_initialized = 1;
+ }
+
+ /* Start the command */
+
+ timestamp_current( &cmdtab[ slot ].start_dt );
+
+ if ( 0 < globs.timeout )
+ {
+ /* Handle hung processes by manually tracking elapsed time and signal
+ * process when time limit expires.
+ */
+ struct tms buf;
+ cmdtab[ slot ].start_time = times( &buf );
+
+ /* Make a global, only do this once. */
+ if ( !tps ) tps = sysconf( _SC_CLK_TCK );
+ }
+
+ /* Child does not need the read pipe ends used by the parent. */
+ fcntl( out[ EXECCMD_PIPE_READ ], F_SETFD, FD_CLOEXEC );
+ if ( globs.pipe_action )
+ fcntl( err[ EXECCMD_PIPE_READ ], F_SETFD, FD_CLOEXEC );
+
+ if ( ( cmdtab[ slot ].pid = vfork() ) == -1 )
+ {
+ perror( "vfork" );
+ exit( EXITBAD );
+ }
+
+ if ( cmdtab[ slot ].pid == 0 )
+ {
+ /*****************/
+ /* Child process */
+ /*****************/
+ int const pid = getpid();
+
+ /* Redirect stdout and stderr to pipes inherited from the parent. */
+ dup2( out[ EXECCMD_PIPE_WRITE ], STDOUT_FILENO );
+ dup2( globs.pipe_action ? err[ EXECCMD_PIPE_WRITE ] :
+ out[ EXECCMD_PIPE_WRITE ], STDERR_FILENO );
+ close( out[ EXECCMD_PIPE_WRITE ] );
+ if ( globs.pipe_action )
+ close( err[ EXECCMD_PIPE_WRITE ] );
+
+ /* Make this process a process group leader so that when we kill it, all
+ * child processes of this process are terminated as well. We use
+ * killpg( pid, SIGKILL ) to kill the process group leader and all its
+ * children.
+ */
+ if ( 0 < globs.timeout )
+ {
+ struct rlimit r_limit;
+ r_limit.rlim_cur = globs.timeout;
+ r_limit.rlim_max = globs.timeout;
+ setrlimit( RLIMIT_CPU, &r_limit );
+ }
+ setpgid( pid, pid );
+ execvp( argv[ 0 ], (char * *)argv );
+ perror( "execvp" );
+ _exit( 127 );
+ }
+
+ /******************/
+ /* Parent process */
+ /******************/
+ setpgid( cmdtab[ slot ].pid, cmdtab[ slot ].pid );
+
+ /* Parent not need the write pipe ends used by the child. */
+ close( out[ EXECCMD_PIPE_WRITE ] );
+ if ( globs.pipe_action )
+ close( err[ EXECCMD_PIPE_WRITE ] );
+
+ /* Set both pipe read file descriptors to non-blocking. */
+ fcntl( out[ EXECCMD_PIPE_READ ], F_SETFL, O_NONBLOCK );
+ if ( globs.pipe_action )
+ fcntl( err[ EXECCMD_PIPE_READ ], F_SETFL, O_NONBLOCK );
+
+ /* Parent reads from out[ EXECCMD_PIPE_READ ]. */
+ cmdtab[ slot ].fd[ OUT ] = out[ EXECCMD_PIPE_READ ];
+ cmdtab[ slot ].stream[ OUT ] = fdopen( cmdtab[ slot ].fd[ OUT ], "rb" );
+ if ( !cmdtab[ slot ].stream[ OUT ] )
+ {
+ perror( "fdopen" );
+ exit( EXITBAD );
+ }
+
+ /* Parent reads from err[ EXECCMD_PIPE_READ ]. */
+ if ( globs.pipe_action )
+ {
+ cmdtab[ slot ].fd[ ERR ] = err[ EXECCMD_PIPE_READ ];
+ cmdtab[ slot ].stream[ ERR ] = fdopen( cmdtab[ slot ].fd[ ERR ], "rb" );
+ if ( !cmdtab[ slot ].stream[ ERR ] )
+ {
+ perror( "fdopen" );
+ exit( EXITBAD );
+ }
+ }
+
+ /* Save input data into the selected running commands table slot. */
+ cmdtab[ slot ].func = func;
+ cmdtab[ slot ].closure = closure;
+}
+
+#undef EXECCMD_PIPE_READ
+#undef EXECCMD_PIPE_WRITE
+
+
+/* Returns 1 if file descriptor is closed, or 0 if it is still alive.
+ *
+ * i is index into cmdtab
+ *
+ * s (stream) indexes:
+ * - cmdtab[ i ].stream[ s ]
+ * - cmdtab[ i ].buffer[ s ]
+ * - cmdtab[ i ].fd [ s ]
+ */
+
+static int read_descriptor( int i, int s )
+{
+ int ret;
+ char buffer[ BUFSIZ ];
+
+ while ( 0 < ( ret = fread( buffer, sizeof( char ), BUFSIZ - 1,
+ cmdtab[ i ].stream[ s ] ) ) )
+ {
+ buffer[ ret ] = 0;
+ if ( !cmdtab[ i ].buffer[ s ] )
+ {
+ /* Never been allocated. */
+ if ( globs.max_buf && ret > globs.max_buf )
+ {
+ ret = globs.max_buf;
+ buffer[ ret ] = 0;
+ }
+ cmdtab[ i ].buf_size[ s ] = ret + 1;
+ cmdtab[ i ].buffer[ s ] = (char*)BJAM_MALLOC_ATOMIC( ret + 1 );
+ memcpy( cmdtab[ i ].buffer[ s ], buffer, ret + 1 );
+ }
+ else
+ {
+ /* Previously allocated. */
+ if ( cmdtab[ i ].buf_size[ s ] < globs.max_buf || !globs.max_buf )
+ {
+ char * tmp = cmdtab[ i ].buffer[ s ];
+ int const old_len = cmdtab[ i ].buf_size[ s ] - 1;
+ int const new_len = old_len + ret + 1;
+ cmdtab[ i ].buf_size[ s ] = new_len;
+ cmdtab[ i ].buffer[ s ] = (char*)BJAM_MALLOC_ATOMIC( new_len );
+ memcpy( cmdtab[ i ].buffer[ s ], tmp, old_len );
+ memcpy( cmdtab[ i ].buffer[ s ] + old_len, buffer, ret + 1 );
+ BJAM_FREE( tmp );
+ }
+ }
+ }
+
+ /* If buffer full, ensure last buffer char is newline so that jam log
+ * contains the command status at beginning of it own line instead of
+ * appended to end of the previous output.
+ */
+ if ( globs.max_buf && globs.max_buf <= cmdtab[ i ].buf_size[ s ] )
+ cmdtab[ i ].buffer[ s ][ cmdtab[ i ].buf_size[ s ] - 2 ] = '\n';
+
+ return feof( cmdtab[ i ].stream[ s ] );
+}
+
+
+/*
+ * close_streams() - Close the stream and pipe descriptor.
+ */
+
+static void close_streams( int const i, int const s )
+{
+ fclose( cmdtab[ i ].stream[ s ] );
+ cmdtab[ i ].stream[ s ] = 0;
+
+ close( cmdtab[ i ].fd[ s ] );
+ cmdtab[ i ].fd[ s ] = 0;
+}
+
+
+/*
+ * Populate the file descriptors collection for use in select() and return the
+ * maximal included file descriptor value.
+ */
+
+static int populate_file_descriptors( fd_set * const fds )
+{
+ int i;
+ int fd_max = 0;
+
+ FD_ZERO( fds );
+ for ( i = 0; i < globs.jobs; ++i )
+ {
+ int fd;
+ if ( ( fd = cmdtab[ i ].fd[ OUT ] ) > 0 )
+ {
+ if ( fd > fd_max ) fd_max = fd;
+ FD_SET( fd, fds );
+ }
+ if ( globs.pipe_action )
+ {
+ if ( ( fd = cmdtab[ i ].fd[ ERR ] ) > 0 )
+ {
+ if ( fd > fd_max ) fd_max = fd;
+ FD_SET( fd, fds );
+ }
+ }
+ }
+ return fd_max;
+}
+
+
+/*
+ * exec_wait() - wait for any of the async command processes to terminate.
+ *
+ * May register more than one terminated child process but will exit as soon as
+ * at least one has been registered.
+ */
+
+void exec_wait()
+{
+ int finished = 0;
+
+ /* Process children that signaled. */
+ while ( !finished )
+ {
+ int i;
+ struct timeval tv;
+ struct timeval * ptv = NULL;
+ int select_timeout = globs.timeout;
+
+ /* Prepare file descriptor information for use in select(). */
+ fd_set fds;
+ int const fd_max = populate_file_descriptors( &fds );
+
+ /* Check for timeouts:
+ * - kill children that already timed out
+ * - decide how long until the next one times out
+ */
+ if ( globs.timeout > 0 )
+ {
+ struct tms buf;
+ clock_t const current = times( &buf );
+ for ( i = 0; i < globs.jobs; ++i )
+ if ( cmdtab[ i ].pid )
+ {
+ clock_t const consumed =
+ ( current - cmdtab[ i ].start_time ) / tps;
+ if ( consumed >= globs.timeout )
+ {
+ killpg( cmdtab[ i ].pid, SIGKILL );
+ cmdtab[ i ].exit_reason = EXIT_TIMEOUT;
+ }
+ else if ( globs.timeout - consumed < select_timeout )
+ select_timeout = globs.timeout - consumed;
+ }
+
+ /* If nothing else causes our select() call to exit, force it after
+ * however long it takes for the next one of our child processes to
+ * crossed its alloted processing time so we can terminate it.
+ */
+ tv.tv_sec = select_timeout;
+ tv.tv_usec = 0;
+ ptv = &tv;
+ }
+
+ /* select() will wait for I/O on a descriptor, a signal, or timeout. */
+ {
+ int ret;
+ while ( ( ret = select( fd_max + 1, &fds, 0, 0, ptv ) ) == -1 )
+ if ( errno != EINTR )
+ break;
+ if ( ret <= 0 )
+ continue;
+ }
+
+ for ( i = 0; i < globs.jobs; ++i )
+ {
+ int out_done = 0;
+ int err_done = 0;
+ if ( FD_ISSET( cmdtab[ i ].fd[ OUT ], &fds ) )
+ out_done = read_descriptor( i, OUT );
+
+ if ( globs.pipe_action && FD_ISSET( cmdtab[ i ].fd[ ERR ], &fds ) )
+ err_done = read_descriptor( i, ERR );
+
+ /* If feof on either descriptor, we are done. */
+ if ( out_done || err_done )
+ {
+ int pid;
+ int status;
+ int rstat;
+ timing_info time_info;
+
+ /* We found a terminated child process - our search is done. */
+ finished = 1;
+
+ /* Close the stream and pipe descriptors. */
+ close_streams( i, OUT );
+ if ( globs.pipe_action )
+ close_streams( i, ERR );
+
+ /* Reap the child and release resources. */
+ while ( ( pid = waitpid( cmdtab[ i ].pid, &status, 0 ) ) == -1 )
+ if ( errno != EINTR )
+ break;
+ if ( pid != cmdtab[ i ].pid )
+ {
+ printf( "unknown pid %d with errno = %d\n", pid, errno );
+ exit( EXITBAD );
+ }
+
+ /* Set reason for exit if not timed out. */
+ if ( WIFEXITED( status ) )
+ cmdtab[ i ].exit_reason = WEXITSTATUS( status )
+ ? EXIT_FAIL
+ : EXIT_OK;
+
+ {
+ struct tms new_time;
+ times( &new_time );
+ time_info.system = (double)( new_time.tms_cstime -
+ old_time.tms_cstime ) / CLOCKS_PER_SEC;
+ time_info.user = (double)( new_time.tms_cutime -
+ old_time.tms_cutime ) / CLOCKS_PER_SEC;
+ timestamp_copy( &time_info.start, &cmdtab[ i ].start_dt );
+ timestamp_current( &time_info.end );
+ old_time = new_time;
+ }
+
+ /* Drive the completion. */
+ if ( interrupted() )
+ rstat = EXEC_CMD_INTR;
+ else if ( status )
+ rstat = EXEC_CMD_FAIL;
+ else
+ rstat = EXEC_CMD_OK;
+
+ /* Call the callback, may call back to jam rule land. */
+ (*cmdtab[ i ].func)( cmdtab[ i ].closure, rstat, &time_info,
+ cmdtab[ i ].buffer[ OUT ], cmdtab[ i ].buffer[ ERR ],
+ cmdtab[ i ].exit_reason );
+
+ /* Clean up the command's running commands table slot. */
+ BJAM_FREE( cmdtab[ i ].buffer[ OUT ] );
+ cmdtab[ i ].buffer[ OUT ] = 0;
+ cmdtab[ i ].buf_size[ OUT ] = 0;
+
+ BJAM_FREE( cmdtab[ i ].buffer[ ERR ] );
+ cmdtab[ i ].buffer[ ERR ] = 0;
+ cmdtab[ i ].buf_size[ ERR ] = 0;
+
+ cmdtab[ i ].pid = 0;
+ cmdtab[ i ].func = 0;
+ cmdtab[ i ].closure = 0;
+ cmdtab[ i ].start_time = 0;
+ }
+ }
+ }
+}
+
+
+/*
+ * Find a free slot in the running commands table.
+ */
+
+static int get_free_cmdtab_slot()
+{
+ int slot;
+ for ( slot = 0; slot < MAXJOBS; ++slot )
+ if ( !cmdtab[ slot ].pid )
+ return slot;
+ printf( "no slots for child!\n" );
+ exit( EXITBAD );
+}
+
+# endif /* USE_EXECUNIX */
diff --git a/src/kenlm/jam-files/engine/filent.c b/src/kenlm/jam-files/engine/filent.c
new file mode 100644
index 0000000..e4ac331
--- /dev/null
+++ b/src/kenlm/jam-files/engine/filent.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * filent.c - scan directories and archives on NT
+ *
+ * External routines:
+ * file_archscan() - scan an archive for files
+ * file_mkdir() - create a directory
+ * file_supported_fmt_resolution() - file modification timestamp resolution
+ *
+ * External routines called only via routines in filesys.c:
+ * file_collect_dir_content_() - collects directory content information
+ * file_dirscan_() - OS specific file_dirscan() implementation
+ * file_query_() - query information about a path from the OS
+ */
+
+#include "jam.h"
+#ifdef OS_NT
+#include "filesys.h"
+
+#include "object.h"
+#include "pathsys.h"
+#include "strings.h"
+
+#ifdef __BORLANDC__
+# undef FILENAME /* cpp namespace collision */
+#endif
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <direct.h>
+#include <io.h>
+
+
+/*
+ * file_collect_dir_content_() - collects directory content information
+ */
+
+int file_collect_dir_content_( file_info_t * const d )
+{
+ PATHNAME f;
+ string pathspec[ 1 ];
+ string pathname[ 1 ];
+ LIST * files = L0;
+ int d_length;
+
+ assert( d );
+ assert( d->is_dir );
+ assert( list_empty( d->files ) );
+
+ d_length = strlen( object_str( d->name ) );
+
+ memset( (char *)&f, '\0', sizeof( f ) );
+ f.f_dir.ptr = object_str( d->name );
+ f.f_dir.len = d_length;
+
+ /* Prepare file search specification for the FindXXX() Windows API. */
+ if ( !d_length )
+ string_copy( pathspec, ".\\*" );
+ else
+ {
+ /* We can not simply assume the given folder name will never include its
+ * trailing path separator or otherwise we would not support the Windows
+ * root folder specified without its drive letter, i.e. '\'.
+ */
+ char const trailingChar = object_str( d->name )[ d_length - 1 ] ;
+ string_copy( pathspec, object_str( d->name ) );
+ if ( ( trailingChar != '\\' ) && ( trailingChar != '/' ) )
+ string_append( pathspec, "\\" );
+ string_append( pathspec, "*" );
+ }
+
+ /* The following code for collecting information about all files in a folder
+ * needs to be kept synchronized with how the file_query() operation is
+ * implemented (collects information about a single file).
+ */
+ {
+ /* FIXME: Avoid duplicate FindXXX Windows API calls here and in the code
+ * determining a normalized path.
+ */
+ WIN32_FIND_DATA finfo;
+ HANDLE const findHandle = FindFirstFileA( pathspec->value, &finfo );
+ if ( findHandle == INVALID_HANDLE_VALUE )
+ {
+ string_free( pathspec );
+ return -1;
+ }
+
+ string_new( pathname );
+ do
+ {
+ OBJECT * pathname_obj;
+
+ f.f_base.ptr = finfo.cFileName;
+ f.f_base.len = strlen( finfo.cFileName );
+
+ string_truncate( pathname, 0 );
+ path_build( &f, pathname );
+
+ pathname_obj = object_new( pathname->value );
+ path_register_key( pathname_obj );
+ files = list_push_back( files, pathname_obj );
+ {
+ int found;
+ file_info_t * const ff = file_info( pathname_obj, &found );
+ ff->is_dir = finfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+ ff->is_file = !ff->is_dir;
+ ff->exists = 1;
+ timestamp_from_filetime( &ff->time, &finfo.ftLastWriteTime );
+ }
+ }
+ while ( FindNextFile( findHandle, &finfo ) );
+
+ FindClose( findHandle );
+ }
+
+ string_free( pathname );
+ string_free( pathspec );
+
+ d->files = files;
+ return 0;
+}
+
+
+/*
+ * file_dirscan_() - OS specific file_dirscan() implementation
+ */
+
+void file_dirscan_( file_info_t * const d, scanback func, void * closure )
+{
+ assert( d );
+ assert( d->is_dir );
+
+ /* Special case \ or d:\ : enter it */
+ {
+ char const * const name = object_str( d->name );
+ if ( name[ 0 ] == '\\' && !name[ 1 ] )
+ {
+ (*func)( closure, d->name, 1 /* stat()'ed */, &d->time );
+ }
+ else if ( name[ 0 ] && name[ 1 ] == ':' && name[ 2 ] && !name[ 3 ] )
+ {
+ /* We have just entered a 3-letter drive name spelling (with a
+ * trailing slash), into the hash table. Now enter its two-letter
+ * variant, without the trailing slash, so that if we try to check
+ * whether "c:" exists, we hit it.
+ *
+ * Jam core has workarounds for that. Given:
+ * x = c:\whatever\foo ;
+ * p = $(x:D) ;
+ * p2 = $(p:D) ;
+ * There will be no trailing slash in $(p), but there will be one in
+ * $(p2). But, that seems rather fragile.
+ */
+ OBJECT * const dir_no_slash = object_new_range( name, 2 );
+ (*func)( closure, d->name, 1 /* stat()'ed */, &d->time );
+ (*func)( closure, dir_no_slash, 1 /* stat()'ed */, &d->time );
+ object_free( dir_no_slash );
+ }
+ }
+}
+
+
+/*
+ * file_mkdir() - create a directory
+ */
+
+int file_mkdir( char const * const path )
+{
+ return _mkdir( path );
+}
+
+
+/*
+ * file_query_() - query information about a path from the OS
+ *
+ * The following code for collecting information about a single file needs to be
+ * kept synchronized with how the file_collect_dir_content_() operation is
+ * implemented (collects information about all files in a folder).
+ */
+
+int try_file_query_root( file_info_t * const info )
+{
+ WIN32_FILE_ATTRIBUTE_DATA fileData;
+ char buf[ 4 ];
+ char const * const pathstr = object_str( info->name );
+ if ( !pathstr[ 0 ] )
+ {
+ buf[ 0 ] = '.';
+ buf[ 1 ] = 0;
+ }
+ else if ( pathstr[ 0 ] == '\\' && ! pathstr[ 1 ] )
+ {
+ buf[ 0 ] = '\\';
+ buf[ 1 ] = '\0';
+ }
+ else if ( pathstr[ 1 ] == ':' )
+ {
+ if ( !pathstr[ 2 ] )
+ {
+ }
+ else if ( !pathstr[ 2 ] || ( pathstr[ 2 ] == '\\' && !pathstr[ 3 ] ) )
+ {
+ buf[ 0 ] = pathstr[ 0 ];
+ buf[ 1 ] = ':';
+ buf[ 2 ] = '\\';
+ buf[ 3 ] = '\0';
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ return 0;
+ }
+
+ /* We have a root path */
+ if ( !GetFileAttributesExA( buf, GetFileExInfoStandard, &fileData ) )
+ {
+ info->is_dir = 0;
+ info->is_file = 0;
+ info->exists = 0;
+ timestamp_clear( &info->time );
+ }
+ else
+ {
+ info->is_dir = fileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+ info->is_file = !info->is_dir;
+ info->exists = 1;
+ timestamp_from_filetime( &info->time, &fileData.ftLastWriteTime );
+ }
+ return 1;
+}
+
+void file_query_( file_info_t * const info )
+{
+ char const * const pathstr = object_str( info->name );
+ const char * dir;
+ OBJECT * parent;
+ file_info_t * parent_info;
+
+ if ( try_file_query_root( info ) )
+ return;
+
+ if ( ( dir = strrchr( pathstr, '\\' ) ) )
+ {
+ parent = object_new_range( pathstr, dir - pathstr );
+ }
+ else
+ {
+ parent = object_copy( constant_empty );
+ }
+ parent_info = file_query( parent );
+ object_free( parent );
+ if ( !parent_info || !parent_info->is_dir )
+ {
+ info->is_dir = 0;
+ info->is_file = 0;
+ info->exists = 0;
+ timestamp_clear( &info->time );
+ }
+ else
+ {
+ info->is_dir = 0;
+ info->is_file = 0;
+ info->exists = 0;
+ timestamp_clear( &info->time );
+ if ( list_empty( parent_info->files ) )
+ file_collect_dir_content_( parent_info );
+ }
+}
+
+
+/*
+ * file_supported_fmt_resolution() - file modification timestamp resolution
+ *
+ * Returns the minimum file modification timestamp resolution supported by this
+ * Boost Jam implementation. File modification timestamp changes of less than
+ * the returned value might not be recognized.
+ *
+ * Does not take into consideration any OS or file system related restrictions.
+ *
+ * Return value 0 indicates that any value supported by the OS is also supported
+ * here.
+ */
+
+void file_supported_fmt_resolution( timestamp * const t )
+{
+ /* On Windows we support nano-second file modification timestamp resolution,
+ * just the same as the Windows OS itself.
+ */
+ timestamp_init( t, 0, 0 );
+}
+
+
+/*
+ * file_archscan() - scan an archive for files
+ */
+
+/* Straight from SunOS */
+
+#define ARMAG "!<arch>\n"
+#define SARMAG 8
+
+#define ARFMAG "`\n"
+
+struct ar_hdr
+{
+ char ar_name[ 16 ];
+ char ar_date[ 12 ];
+ char ar_uid[ 6 ];
+ char ar_gid[ 6 ];
+ char ar_mode[ 8 ];
+ char ar_size[ 10 ];
+ char ar_fmag[ 2 ];
+};
+
+#define SARFMAG 2
+#define SARHDR sizeof( struct ar_hdr )
+
+void file_archscan( char const * archive, scanback func, void * closure )
+{
+ struct ar_hdr ar_hdr;
+ char * string_table = 0;
+ char buf[ MAXJPATH ];
+ long offset;
+ int const fd = open( archive, O_RDONLY | O_BINARY, 0 );
+
+ if ( fd < 0 )
+ return;
+
+ if ( read( fd, buf, SARMAG ) != SARMAG || strncmp( ARMAG, buf, SARMAG ) )
+ {
+ close( fd );
+ return;
+ }
+
+ offset = SARMAG;
+
+ if ( DEBUG_BINDSCAN )
+ printf( "scan archive %s\n", archive );
+
+ while ( ( read( fd, &ar_hdr, SARHDR ) == SARHDR ) &&
+ !memcmp( ar_hdr.ar_fmag, ARFMAG, SARFMAG ) )
+ {
+ long lar_date;
+ long lar_size;
+ char * name = 0;
+ char * endname;
+
+ sscanf( ar_hdr.ar_date, "%ld", &lar_date );
+ sscanf( ar_hdr.ar_size, "%ld", &lar_size );
+
+ lar_size = ( lar_size + 1 ) & ~1;
+
+ if ( ar_hdr.ar_name[ 0 ] == '/' && ar_hdr.ar_name[ 1 ] == '/' )
+ {
+ /* This is the "string table" entry of the symbol table, holding
+ * filename strings longer than 15 characters, i.e. those that do
+ * not fit into ar_name.
+ */
+ string_table = BJAM_MALLOC_ATOMIC( lar_size + 1 );
+ if ( read( fd, string_table, lar_size ) != lar_size )
+ printf( "error reading string table\n" );
+ string_table[ lar_size ] = '\0';
+ offset += SARHDR + lar_size;
+ continue;
+ }
+ else if ( ar_hdr.ar_name[ 0 ] == '/' && ar_hdr.ar_name[ 1 ] != ' ' )
+ {
+ /* Long filenames are recognized by "/nnnn" where nnnn is the
+ * string's offset in the string table represented in ASCII
+ * decimals.
+ */
+ name = string_table + atoi( ar_hdr.ar_name + 1 );
+ for ( endname = name; *endname && *endname != '\n'; ++endname );
+ }
+ else
+ {
+ /* normal name */
+ name = ar_hdr.ar_name;
+ endname = name + sizeof( ar_hdr.ar_name );
+ }
+
+ /* strip trailing white-space, slashes, and backslashes */
+
+ while ( endname-- > name )
+ if ( !isspace( *endname ) && ( *endname != '\\' ) && ( *endname !=
+ '/' ) )
+ break;
+ *++endname = 0;
+
+ /* strip leading directory names, an NT specialty */
+ {
+ char * c;
+ if ( c = strrchr( name, '/' ) )
+ name = c + 1;
+ if ( c = strrchr( name, '\\' ) )
+ name = c + 1;
+ }
+
+ sprintf( buf, "%s(%.*s)", archive, endname - name, name );
+ {
+ OBJECT * const member = object_new( buf );
+ timestamp time;
+ timestamp_init( &time, (time_t)lar_date, 0 );
+ (*func)( closure, member, 1 /* time valid */, &time );
+ object_free( member );
+ }
+
+ offset += SARHDR + lar_size;
+ lseek( fd, offset, 0 );
+ }
+
+ close( fd );
+}
+
+#endif /* OS_NT */
diff --git a/src/kenlm/jam-files/engine/filesys.c b/src/kenlm/jam-files/engine/filesys.c
new file mode 100644
index 0000000..dadaef8
--- /dev/null
+++ b/src/kenlm/jam-files/engine/filesys.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * filesys.c - OS independant file system manipulation support
+ *
+ * External routines:
+ * file_build1() - construct a path string based on PATHNAME information
+ * file_dirscan() - scan a directory for files
+ * file_done() - module cleanup called on shutdown
+ * file_info() - return cached information about a path
+ * file_is_file() - return whether a path identifies an existing file
+ * file_query() - get cached information about a path, query the OS if
+ * needed
+ * file_remove_atexit() - schedule a path to be removed on program exit
+ * file_time() - get a file timestamp
+ *
+ * External routines - utilites for OS specific module implementations:
+ * file_query_posix_() - query information about a path using POSIX stat()
+ *
+ * Internal routines:
+ * file_dirscan_impl() - no-profiling worker for file_dirscan()
+ */
+
+
+#include "jam.h"
+#include "filesys.h"
+
+#include "lists.h"
+#include "object.h"
+#include "pathsys.h"
+#include "strings.h"
+
+#include <assert.h>
+#include <sys/stat.h>
+
+
+/* Internal OS specific implementation details - have names ending with an
+ * underscore and are expected to be implemented in an OS specific fileXXX.c
+ * module.
+ */
+void file_dirscan_( file_info_t * const dir, scanback func, void * closure );
+int file_collect_dir_content_( file_info_t * const dir );
+void file_query_( file_info_t * const );
+
+static void file_dirscan_impl( OBJECT * dir, scanback func, void * closure );
+static void free_file_info( void * xfile, void * data );
+static void remove_files_atexit( void );
+
+
+static struct hash * filecache_hash;
+
+
+/*
+ * file_build1() - construct a path string based on PATHNAME information
+ */
+
+void file_build1( PATHNAME * const f, string * file )
+{
+ if ( DEBUG_SEARCH )
+ {
+ printf( "build file: " );
+ if ( f->f_root.len )
+ printf( "root = '%.*s' ", f->f_root.len, f->f_root.ptr );
+ if ( f->f_dir.len )
+ printf( "dir = '%.*s' ", f->f_dir.len, f->f_dir.ptr );
+ if ( f->f_base.len )
+ printf( "base = '%.*s' ", f->f_base.len, f->f_base.ptr );
+ printf( "\n" );
+ }
+
+ /* Start with the grist. If the current grist is not surrounded by <>'s, add
+ * them.
+ */
+ if ( f->f_grist.len )
+ {
+ if ( f->f_grist.ptr[ 0 ] != '<' )
+ string_push_back( file, '<' );
+ string_append_range(
+ file, f->f_grist.ptr, f->f_grist.ptr + f->f_grist.len );
+ if ( file->value[ file->size - 1 ] != '>' )
+ string_push_back( file, '>' );
+ }
+}
+
+
+/*
+ * file_dirscan() - scan a directory for files
+ */
+
+void file_dirscan( OBJECT * dir, scanback func, void * closure )
+{
+ PROFILE_ENTER( FILE_DIRSCAN );
+ file_dirscan_impl( dir, func, closure );
+ PROFILE_EXIT( FILE_DIRSCAN );
+}
+
+
+/*
+ * file_done() - module cleanup called on shutdown
+ */
+
+void file_done()
+{
+ remove_files_atexit();
+ if ( filecache_hash )
+ {
+ hashenumerate( filecache_hash, free_file_info, (void *)0 );
+ hashdone( filecache_hash );
+ }
+}
+
+
+/*
+ * file_info() - return cached information about a path
+ *
+ * Returns a default initialized structure containing only the path's normalized
+ * name in case this is the first time this file system entity has been
+ * referenced.
+ */
+
+file_info_t * file_info( OBJECT * const path, int * found )
+{
+ OBJECT * const path_key = path_as_key( path );
+ file_info_t * finfo;
+
+ if ( !filecache_hash )
+ filecache_hash = hashinit( sizeof( file_info_t ), "file_info" );
+
+ finfo = (file_info_t *)hash_insert( filecache_hash, path_key, found );
+ if ( !*found )
+ {
+ finfo->name = path_key;
+ finfo->files = L0;
+ }
+ else
+ object_free( path_key );
+
+ return finfo;
+}
+
+
+/*
+ * file_is_file() - return whether a path identifies an existing file
+ */
+
+int file_is_file( OBJECT * const path )
+{
+ file_info_t const * const ff = file_query( path );
+ return ff ? ff->is_file : -1;
+}
+
+
+/*
+ * file_time() - get a file timestamp
+ */
+
+int file_time( OBJECT * const path, timestamp * const time )
+{
+ file_info_t const * const ff = file_query( path );
+ if ( !ff ) return -1;
+ timestamp_copy( time, &ff->time );
+ return 0;
+}
+
+
+/*
+ * file_query() - get cached information about a path, query the OS if needed
+ *
+ * Returns 0 in case querying the OS about the given path fails, e.g. because
+ * the path does not reference an existing file system object.
+ */
+
+file_info_t * file_query( OBJECT * const path )
+{
+ /* FIXME: Add tracking for disappearing files (i.e. those that can not be
+ * detected by stat() even though they had been detected successfully
+ * before) and see how they should be handled in the rest of Boost Jam code.
+ * Possibly allow Jamfiles to specify some files as 'volatile' which would
+ * make Boost Jam avoid caching information about those files and instead
+ * ask the OS about them every time.
+ */
+ int found;
+ file_info_t * const ff = file_info( path, &found );
+ if ( !found )
+ {
+ file_query_( ff );
+ if ( ff->exists )
+ {
+ /* Set the path's timestamp to 1 in case it is 0 or undetected to avoid
+ * confusion with non-existing paths.
+ */
+ if ( timestamp_empty( &ff->time ) )
+ timestamp_init( &ff->time, 1, 0 );
+ }
+ }
+ if ( !ff->exists )
+ {
+ return 0;
+ }
+ return ff;
+}
+
+
+/*
+ * file_query_posix_() - query information about a path using POSIX stat()
+ *
+ * Fallback file_query_() implementation for OS specific modules.
+ *
+ * Note that the Windows POSIX stat() function implementation suffers from
+ * several issues:
+ * * Does not support file timestamps with resolution finer than 1 second,
+ * meaning it can not be used to detect file timestamp changes of less than
+ * 1 second. One possible consequence is that some fast-paced touch commands
+ * (such as those done by Boost Build's internal testing system if it does
+ * not do some extra waiting) will not be detected correctly by the build
+ * system.
+ * * Returns file modification times automatically adjusted for daylight
+ * savings time even though daylight savings time should have nothing to do
+ * with internal time representation.
+ */
+
+void file_query_posix_( file_info_t * const info )
+{
+ struct stat statbuf;
+ char const * const pathstr = object_str( info->name );
+ char const * const pathspec = *pathstr ? pathstr : ".";
+
+ if ( stat( pathspec, &statbuf ) < 0 )
+ {
+ info->is_file = 0;
+ info->is_dir = 0;
+ info->exists = 0;
+ timestamp_clear( &info->time );
+ }
+ else
+ {
+ info->is_file = statbuf.st_mode & S_IFREG ? 1 : 0;
+ info->is_dir = statbuf.st_mode & S_IFDIR ? 1 : 0;
+ info->exists = 1;
+ timestamp_init( &info->time, statbuf.st_mtime, 0 );
+ }
+}
+
+
+/*
+ * file_remove_atexit() - schedule a path to be removed on program exit
+ */
+
+static LIST * files_to_remove = L0;
+
+void file_remove_atexit( OBJECT * const path )
+{
+ files_to_remove = list_push_back( files_to_remove, object_copy( path ) );
+}
+
+
+/*
+ * file_dirscan_impl() - no-profiling worker for file_dirscan()
+ */
+
+static void file_dirscan_impl( OBJECT * dir, scanback func, void * closure )
+{
+ file_info_t * const d = file_query( dir );
+ if ( !d || !d->is_dir )
+ return;
+
+ /* Lazy collect the directory content information. */
+ if ( list_empty( d->files ) )
+ {
+ if ( DEBUG_BINDSCAN )
+ printf( "scan directory %s\n", object_str( d->name ) );
+ if ( file_collect_dir_content_( d ) < 0 )
+ return;
+ }
+
+ /* OS specific part of the file_dirscan operation. */
+ file_dirscan_( d, func, closure );
+
+ /* Report the collected directory content. */
+ {
+ LISTITER iter = list_begin( d->files );
+ LISTITER const end = list_end( d->files );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ OBJECT * const path = list_item( iter );
+ file_info_t const * const ffq = file_query( path );
+ /* Using a file name read from a file_info_t structure allows OS
+ * specific implementations to store some kind of a normalized file
+ * name there. Using such a normalized file name then allows us to
+ * correctly recognize different file paths actually identifying the
+ * same file. For instance, an implementation may:
+ * - convert all file names internally to lower case on a case
+ * insensitive file system
+ * - convert the NTFS paths to their long path variants as that
+ * file system each file system entity may have a long and a
+ * short path variant thus allowing for many different path
+ * strings identifying the same file.
+ */
+ (*func)( closure, ffq->name, 1 /* stat()'ed */, &ffq->time );
+ }
+ }
+}
+
+
+static void free_file_info( void * xfile, void * data )
+{
+ file_info_t * const file = (file_info_t *)xfile;
+ object_free( file->name );
+ list_free( file->files );
+}
+
+
+static void remove_files_atexit( void )
+{
+ LISTITER iter = list_begin( files_to_remove );
+ LISTITER const end = list_end( files_to_remove );
+ for ( ; iter != end; iter = list_next( iter ) )
+ remove( object_str( list_item( iter ) ) );
+ list_free( files_to_remove );
+ files_to_remove = L0;
+}
diff --git a/src/kenlm/jam-files/engine/filesys.h b/src/kenlm/jam-files/engine/filesys.h
new file mode 100644
index 0000000..74fa395
--- /dev/null
+++ b/src/kenlm/jam-files/engine/filesys.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * filesys.h - OS specific file routines
+ */
+
+#ifndef FILESYS_DWA20011025_H
+#define FILESYS_DWA20011025_H
+
+#include "hash.h"
+#include "lists.h"
+#include "object.h"
+#include "pathsys.h"
+#include "timestamp.h"
+
+
+typedef struct file_info_t
+{
+ OBJECT * name;
+ char is_file;
+ char is_dir;
+ char exists;
+ timestamp time;
+ LIST * files;
+} file_info_t;
+
+typedef void (*scanback)( void * closure, OBJECT * path, int found,
+ timestamp const * const );
+
+
+void file_archscan( char const * arch, scanback func, void * closure );
+void file_build1( PATHNAME * const f, string * file ) ;
+void file_dirscan( OBJECT * dir, scanback func, void * closure );
+file_info_t * file_info( OBJECT * const path, int * found );
+int file_is_file( OBJECT * const path );
+int file_mkdir( char const * const path );
+file_info_t * file_query( OBJECT * const path );
+void file_remove_atexit( OBJECT * const path );
+void file_supported_fmt_resolution( timestamp * const );
+int file_time( OBJECT * const path, timestamp * const );
+
+/* Internal utility worker functions. */
+void file_query_posix_( file_info_t * const );
+
+void file_done();
+
+#endif
diff --git a/src/kenlm/jam-files/engine/fileunix.c b/src/kenlm/jam-files/engine/fileunix.c
new file mode 100644
index 0000000..bff3a42
--- /dev/null
+++ b/src/kenlm/jam-files/engine/fileunix.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * fileunix.c - manipulate file names and scan directories on UNIX/AmigaOS
+ *
+ * External routines:
+ * file_archscan() - scan an archive for files
+ * file_mkdir() - create a directory
+ * file_supported_fmt_resolution() - file modification timestamp resolution
+ *
+ * External routines called only via routines in filesys.c:
+ * file_collect_dir_content_() - collects directory content information
+ * file_dirscan_() - OS specific file_dirscan() implementation
+ * file_query_() - query information about a path from the OS
+ */
+
+#include "jam.h"
+#ifdef USE_FILEUNIX
+#include "filesys.h"
+
+#include "object.h"
+#include "pathsys.h"
+#include "strings.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <sys/stat.h> /* needed for mkdir() */
+
+#if defined( sun ) || defined( __sun ) || defined( linux )
+# include <unistd.h> /* needed for read and close prototype */
+#endif
+
+#if defined( OS_SEQUENT ) || \
+ defined( OS_DGUX ) || \
+ defined( OS_SCO ) || \
+ defined( OS_ISC )
+# define PORTAR 1
+#endif
+
+#if defined( OS_RHAPSODY ) || defined( OS_MACOSX ) || defined( OS_NEXT )
+# include <sys/dir.h>
+# include <unistd.h> /* need unistd for rhapsody's proper lseek */
+# define STRUCT_DIRENT struct direct
+#else
+# include <dirent.h>
+# define STRUCT_DIRENT struct dirent
+#endif
+
+#ifdef OS_COHERENT
+# include <arcoff.h>
+# define HAVE_AR
+#endif
+
+#if defined( OS_MVS ) || defined( OS_INTERIX )
+#define ARMAG "!<arch>\n"
+#define SARMAG 8
+#define ARFMAG "`\n"
+#define HAVE_AR
+
+struct ar_hdr /* archive file member header - printable ascii */
+{
+ char ar_name[ 16 ]; /* file member name - `/' terminated */
+ char ar_date[ 12 ]; /* file member date - decimal */
+ char ar_uid[ 6 ]; /* file member user id - decimal */
+ char ar_gid[ 6 ]; /* file member group id - decimal */
+ char ar_mode[ 8 ]; /* file member mode - octal */
+ char ar_size[ 10 ]; /* file member size - decimal */
+ char ar_fmag[ 2 ]; /* ARFMAG - string to end header */
+};
+#endif
+
+#if defined( OS_QNX ) || defined( OS_BEOS ) || defined( OS_MPEIX )
+# define NO_AR
+# define HAVE_AR
+#endif
+
+#ifndef HAVE_AR
+# ifdef OS_AIX
+/* Define these for AIX to get the definitions for both small and big archive
+ * file format variants.
+ */
+# define __AR_SMALL__
+# define __AR_BIG__
+# endif
+# include <ar.h>
+#endif
+
+
+/*
+ * file_collect_dir_content_() - collects directory content information
+ */
+
+int file_collect_dir_content_( file_info_t * const d )
+{
+ LIST * files = L0;
+ PATHNAME f;
+ DIR * dd;
+ STRUCT_DIRENT * dirent;
+ string path[ 1 ];
+ char const * dirstr;
+
+ assert( d );
+ assert( d->is_dir );
+ assert( list_empty( d->files ) );
+
+ dirstr = object_str( d->name );
+
+ memset( (char *)&f, '\0', sizeof( f ) );
+ f.f_dir.ptr = dirstr;
+ f.f_dir.len = strlen( dirstr );
+
+ if ( !*dirstr ) dirstr = ".";
+
+ if ( !( dd = opendir( dirstr ) ) )
+ return -1;
+
+ string_new( path );
+ while ( ( dirent = readdir( dd ) ) )
+ {
+ OBJECT * name;
+ f.f_base.ptr = dirent->d_name
+ #ifdef old_sinix
+ - 2 /* Broken structure definition on sinix. */
+ #endif
+ ;
+ f.f_base.len = strlen( f.f_base.ptr );
+
+ string_truncate( path, 0 );
+ path_build( &f, path );
+ name = object_new( path->value );
+ /* Immediately stat the file to preserve invariants. */
+ if ( file_query( name ) )
+ files = list_push_back( files, name );
+ else
+ object_free( name );
+ }
+ string_free( path );
+
+ closedir( dd );
+
+ d->files = files;
+ return 0;
+}
+
+
+/*
+ * file_dirscan_() - OS specific file_dirscan() implementation
+ */
+
+void file_dirscan_( file_info_t * const d, scanback func, void * closure )
+{
+ assert( d );
+ assert( d->is_dir );
+
+ /* Special case / : enter it */
+ if ( !strcmp( object_str( d->name ), "/" ) )
+ (*func)( closure, d->name, 1 /* stat()'ed */, &d->time );
+}
+
+
+/*
+ * file_mkdir() - create a directory
+ */
+
+int file_mkdir( char const * const path )
+{
+ /* Explicit cast to remove const modifiers and avoid related compiler
+ * warnings displayed when using the intel compiler.
+ */
+ return mkdir( (char *)path, 0777 );
+}
+
+
+/*
+ * file_query_() - query information about a path from the OS
+ */
+
+void file_query_( file_info_t * const info )
+{
+ file_query_posix_( info );
+}
+
+
+/*
+ * file_supported_fmt_resolution() - file modification timestamp resolution
+ *
+ * Returns the minimum file modification timestamp resolution supported by this
+ * Boost Jam implementation. File modification timestamp changes of less than
+ * the returned value might not be recognized.
+ *
+ * Does not take into consideration any OS or file system related restrictions.
+ *
+ * Return value 0 indicates that any value supported by the OS is also supported
+ * here.
+ */
+
+void file_supported_fmt_resolution( timestamp * const t )
+{
+ /* The current implementation does not support file modification timestamp
+ * resolution of less than one second.
+ */
+ timestamp_init( t, 1, 0 );
+}
+
+
+/*
+ * file_archscan() - scan an archive for files
+ */
+
+#ifndef AIAMAG /* God-fearing UNIX */
+
+#define SARFMAG 2
+#define SARHDR sizeof( struct ar_hdr )
+
+void file_archscan( char const * archive, scanback func, void * closure )
+{
+#ifndef NO_AR
+ struct ar_hdr ar_hdr;
+ char * string_table = 0;
+ char buf[ MAXJPATH ];
+ long offset;
+ int fd;
+
+ if ( ( fd = open( archive, O_RDONLY, 0 ) ) < 0 )
+ return;
+
+ if ( read( fd, buf, SARMAG ) != SARMAG ||
+ strncmp( ARMAG, buf, SARMAG ) )
+ {
+ close( fd );
+ return;
+ }
+
+ offset = SARMAG;
+
+ if ( DEBUG_BINDSCAN )
+ printf( "scan archive %s\n", archive );
+
+ while ( ( read( fd, &ar_hdr, SARHDR ) == SARHDR ) &&
+ !( memcmp( ar_hdr.ar_fmag, ARFMAG, SARFMAG )
+#ifdef ARFZMAG
+ /* OSF also has a compressed format */
+ && memcmp( ar_hdr.ar_fmag, ARFZMAG, SARFMAG )
+#endif
+ ) )
+ {
+ char lar_name_[ 257 ];
+ char * lar_name = lar_name_ + 1;
+ long lar_date;
+ long lar_size;
+ long lar_offset;
+ char * c;
+ char * src;
+ char * dest;
+
+ strncpy( lar_name, ar_hdr.ar_name, sizeof( ar_hdr.ar_name ) );
+
+ sscanf( ar_hdr.ar_date, "%ld", &lar_date );
+ sscanf( ar_hdr.ar_size, "%ld", &lar_size );
+
+ if ( ar_hdr.ar_name[ 0 ] == '/' )
+ {
+ if ( ar_hdr.ar_name[ 1 ] == '/' )
+ {
+ /* This is the "string table" entry of the symbol table, holding
+ * filename strings longer than 15 characters, i.e. those that
+ * do not fit into ar_name.
+ */
+ string_table = (char *)BJAM_MALLOC_ATOMIC( lar_size );
+ lseek( fd, offset + SARHDR, 0 );
+ if ( read( fd, string_table, lar_size ) != lar_size )
+ printf("error reading string table\n");
+ }
+ else if ( string_table && ar_hdr.ar_name[ 1 ] != ' ' )
+ {
+ /* Long filenames are recognized by "/nnnn" where nnnn is the
+ * offset of the string in the string table represented in ASCII
+ * decimals.
+ */
+ dest = lar_name;
+ lar_offset = atoi( lar_name + 1 );
+ src = &string_table[ lar_offset ];
+ while ( *src != '/' )
+ *dest++ = *src++;
+ *dest = '/';
+ }
+ }
+
+ c = lar_name - 1;
+ while ( ( *++c != ' ' ) && ( *c != '/' ) );
+ *c = '\0';
+
+ if ( DEBUG_BINDSCAN )
+ printf( "archive name %s found\n", lar_name );
+
+ sprintf( buf, "%s(%s)", archive, lar_name );
+
+ {
+ OBJECT * const member = object_new( buf );
+ timestamp time;
+ timestamp_init( &time, (time_t)lar_date, 0 );
+ (*func)( closure, member, 1 /* time valid */, &time );
+ object_free( member );
+ }
+
+ offset += SARHDR + ( ( lar_size + 1 ) & ~1 );
+ lseek( fd, offset, 0 );
+ }
+
+ if ( string_table )
+ BJAM_FREE( string_table );
+
+ close( fd );
+#endif /* NO_AR */
+}
+
+#else /* AIAMAG - RS6000 AIX */
+
+static void file_archscan_small( int fd, char const * archive, scanback func,
+ void * closure )
+{
+ struct fl_hdr fl_hdr;
+
+ struct {
+ struct ar_hdr hdr;
+ char pad[ 256 ];
+ } ar_hdr ;
+
+ char buf[ MAXJPATH ];
+ long offset;
+
+ if ( read( fd, (char *)&fl_hdr, FL_HSZ ) != FL_HSZ )
+ return;
+
+ sscanf( fl_hdr.fl_fstmoff, "%ld", &offset );
+
+ if ( DEBUG_BINDSCAN )
+ printf( "scan archive %s\n", archive );
+
+ while ( offset > 0 && lseek( fd, offset, 0 ) >= 0 &&
+ read( fd, &ar_hdr, sizeof( ar_hdr ) ) >= (int)sizeof( ar_hdr.hdr ) )
+ {
+ long lar_date;
+ int lar_namlen;
+
+ sscanf( ar_hdr.hdr.ar_namlen, "%d" , &lar_namlen );
+ sscanf( ar_hdr.hdr.ar_date , "%ld", &lar_date );
+ sscanf( ar_hdr.hdr.ar_nxtmem, "%ld", &offset );
+
+ if ( !lar_namlen )
+ continue;
+
+ ar_hdr.hdr._ar_name.ar_name[ lar_namlen ] = '\0';
+
+ sprintf( buf, "%s(%s)", archive, ar_hdr.hdr._ar_name.ar_name );
+
+ {
+ OBJECT * const member = object_new( buf );
+ timestamp time;
+ timestamp_init( &time, (time_t)lar_date, 0 );
+ (*func)( closure, member, 1 /* time valid */, &time );
+ object_free( member );
+ }
+ }
+}
+
+/* Check for OS versions supporting the big variant. */
+#ifdef AR_HSZ_BIG
+
+static void file_archscan_big( int fd, char const * archive, scanback func,
+ void * closure )
+{
+ struct fl_hdr_big fl_hdr;
+
+ struct {
+ struct ar_hdr_big hdr;
+ char pad[ 256 ];
+ } ar_hdr ;
+
+ char buf[ MAXJPATH ];
+ long long offset;
+
+ if ( read( fd, (char *)&fl_hdr, FL_HSZ_BIG ) != FL_HSZ_BIG )
+ return;
+
+ sscanf( fl_hdr.fl_fstmoff, "%lld", &offset );
+
+ if ( DEBUG_BINDSCAN )
+ printf( "scan archive %s\n", archive );
+
+ while ( offset > 0 && lseek( fd, offset, 0 ) >= 0 &&
+ read( fd, &ar_hdr, sizeof( ar_hdr ) ) >= sizeof( ar_hdr.hdr ) )
+ {
+ long lar_date;
+ int lar_namlen;
+
+ sscanf( ar_hdr.hdr.ar_namlen, "%d" , &lar_namlen );
+ sscanf( ar_hdr.hdr.ar_date , "%ld" , &lar_date );
+ sscanf( ar_hdr.hdr.ar_nxtmem, "%lld", &offset );
+
+ if ( !lar_namlen )
+ continue;
+
+ ar_hdr.hdr._ar_name.ar_name[ lar_namlen ] = '\0';
+
+ sprintf( buf, "%s(%s)", archive, ar_hdr.hdr._ar_name.ar_name );
+
+ {
+ OBJECT * const member = object_new( buf );
+ timestamp time;
+ timestamp_init( &time, (time_t)lar_date, 0 );
+ (*func)( closure, member, 1 /* time valid */, &time );
+ object_free( member );
+ }
+ }
+}
+
+#endif /* AR_HSZ_BIG */
+
+void file_archscan( char const * archive, scanback func, void * closure )
+{
+ int fd;
+ char fl_magic[ SAIAMAG ];
+
+ if ( ( fd = open( archive, O_RDONLY, 0 ) ) < 0 )
+ return;
+
+ if ( read( fd, fl_magic, SAIAMAG ) != SAIAMAG ||
+ lseek( fd, 0, SEEK_SET ) == -1 )
+ {
+ close( fd );
+ return;
+ }
+
+ if ( !strncmp( AIAMAG, fl_magic, SAIAMAG ) )
+ {
+ /* read small variant */
+ file_archscan_small( fd, archive, func, closure );
+ }
+#ifdef AR_HSZ_BIG
+ else if ( !strncmp( AIAMAGBIG, fl_magic, SAIAMAG ) )
+ {
+ /* read big variant */
+ file_archscan_big( fd, archive, func, closure );
+ }
+#endif
+
+ close( fd );
+}
+
+#endif /* AIAMAG - RS6000 AIX */
+
+#endif /* USE_FILEUNIX */
diff --git a/src/kenlm/jam-files/engine/frames.c b/src/kenlm/jam-files/engine/frames.c
new file mode 100644
index 0000000..0491c5c
--- /dev/null
+++ b/src/kenlm/jam-files/engine/frames.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "jam.h"
+#include "frames.h"
+
+
+FRAME * frame_before_python_call;
+
+
+void frame_init( FRAME * frame )
+{
+ frame->prev = 0;
+ frame->prev_user = 0;
+ lol_init( frame->args );
+ frame->module = root_module();
+ frame->rulename = "module scope";
+ frame->file = 0;
+ frame->line = -1;
+}
+
+
+void frame_free( FRAME * frame )
+{
+ lol_free( frame->args );
+}
diff --git a/src/kenlm/jam-files/engine/frames.h b/src/kenlm/jam-files/engine/frames.h
new file mode 100644
index 0000000..2e99f17
--- /dev/null
+++ b/src/kenlm/jam-files/engine/frames.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef FRAMES_DWA20011021_H
+#define FRAMES_DWA20011021_H
+
+#include "lists.h"
+#include "modules.h"
+#include "object.h"
+
+
+typedef struct frame FRAME;
+
+struct frame
+{
+ FRAME * prev;
+ FRAME * prev_user; /* The nearest enclosing frame for which
+ module->user_module is true. */
+ LOL args[ 1 ];
+ module_t * module;
+ OBJECT * file;
+ int line;
+ char const * rulename;
+};
+
+
+/* When a call into Python is in progress, this variable points to the bjam
+ * frame that was current at the moment of the call. When the call completes,
+ * the variable is not defined. Furthermore, if Jam calls Python which calls Jam
+ * and so on, this variable only keeps the most recent Jam frame.
+ */
+extern FRAME * frame_before_python_call;
+
+
+void frame_init( FRAME * );
+void frame_free( FRAME * );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/function.c b/src/kenlm/jam-files/engine/function.c
new file mode 100644
index 0000000..690855e
--- /dev/null
+++ b/src/kenlm/jam-files/engine/function.c
@@ -0,0 +1,4870 @@
+/*
+ * Copyright 2011 Steven Watanabe
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "jam.h"
+#include "function.h"
+
+#include "class.h"
+#include "compile.h"
+#include "constants.h"
+#include "filesys.h"
+#include "frames.h"
+#include "lists.h"
+#include "mem.h"
+#include "pathsys.h"
+#include "rules.h"
+#include "search.h"
+#include "variable.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef OS_CYGWIN
+# include <cygwin/version.h>
+# include <sys/cygwin.h>
+# ifdef CYGWIN_VERSION_CYGWIN_CONV
+# include <errno.h>
+# endif
+# include <windows.h>
+#endif
+
+int glob( char const * s, char const * c );
+void backtrace( FRAME * );
+void backtrace_line( FRAME * );
+
+#define INSTR_PUSH_EMPTY 0
+#define INSTR_PUSH_CONSTANT 1
+#define INSTR_PUSH_ARG 2
+#define INSTR_PUSH_VAR 3
+#define INSTR_PUSH_VAR_FIXED 57
+#define INSTR_PUSH_GROUP 4
+#define INSTR_PUSH_RESULT 5
+#define INSTR_PUSH_APPEND 6
+#define INSTR_SWAP 7
+
+#define INSTR_JUMP_EMPTY 8
+#define INSTR_JUMP_NOT_EMPTY 9
+
+#define INSTR_JUMP 10
+#define INSTR_JUMP_LT 11
+#define INSTR_JUMP_LE 12
+#define INSTR_JUMP_GT 13
+#define INSTR_JUMP_GE 14
+#define INSTR_JUMP_EQ 15
+#define INSTR_JUMP_NE 16
+#define INSTR_JUMP_IN 17
+#define INSTR_JUMP_NOT_IN 18
+
+#define INSTR_JUMP_NOT_GLOB 19
+
+#define INSTR_FOR_INIT 56
+#define INSTR_FOR_LOOP 20
+
+#define INSTR_SET_RESULT 21
+#define INSTR_RETURN 22
+#define INSTR_POP 23
+
+#define INSTR_PUSH_LOCAL 24
+#define INSTR_POP_LOCAL 25
+#define INSTR_SET 26
+#define INSTR_APPEND 27
+#define INSTR_DEFAULT 28
+
+#define INSTR_PUSH_LOCAL_FIXED 58
+#define INSTR_POP_LOCAL_FIXED 59
+#define INSTR_SET_FIXED 60
+#define INSTR_APPEND_FIXED 61
+#define INSTR_DEFAULT_FIXED 62
+
+#define INSTR_PUSH_LOCAL_GROUP 29
+#define INSTR_POP_LOCAL_GROUP 30
+#define INSTR_SET_GROUP 31
+#define INSTR_APPEND_GROUP 32
+#define INSTR_DEFAULT_GROUP 33
+
+#define INSTR_PUSH_ON 34
+#define INSTR_POP_ON 35
+#define INSTR_SET_ON 36
+#define INSTR_APPEND_ON 37
+#define INSTR_DEFAULT_ON 38
+#define INSTR_GET_ON 65
+
+#define INSTR_CALL_RULE 39
+#define INSTR_CALL_MEMBER_RULE 66
+
+#define INSTR_APPLY_MODIFIERS 40
+#define INSTR_APPLY_INDEX 41
+#define INSTR_APPLY_INDEX_MODIFIERS 42
+#define INSTR_APPLY_MODIFIERS_GROUP 43
+#define INSTR_APPLY_INDEX_GROUP 44
+#define INSTR_APPLY_INDEX_MODIFIERS_GROUP 45
+#define INSTR_COMBINE_STRINGS 46
+#define INSTR_GET_GRIST 64
+
+#define INSTR_INCLUDE 47
+#define INSTR_RULE 48
+#define INSTR_ACTIONS 49
+#define INSTR_PUSH_MODULE 50
+#define INSTR_POP_MODULE 51
+#define INSTR_CLASS 52
+#define INSTR_BIND_MODULE_VARIABLES 63
+
+#define INSTR_APPEND_STRINGS 53
+#define INSTR_WRITE_FILE 54
+#define INSTR_OUTPUT_STRINGS 55
+
+typedef struct instruction
+{
+ unsigned int op_code;
+ int arg;
+} instruction;
+
+typedef struct _subfunction
+{
+ OBJECT * name;
+ FUNCTION * code;
+ int local;
+} SUBFUNCTION;
+
+typedef struct _subaction
+{
+ OBJECT * name;
+ FUNCTION * command;
+ int flags;
+} SUBACTION;
+
+#define FUNCTION_BUILTIN 0
+#define FUNCTION_JAM 1
+
+struct argument
+{
+ int flags;
+#define ARG_ONE 0
+#define ARG_OPTIONAL 1
+#define ARG_PLUS 2
+#define ARG_STAR 3
+#define ARG_VARIADIC 4
+ OBJECT * type_name;
+ OBJECT * arg_name;
+ int index;
+};
+
+struct arg_list
+{
+ int size;
+ struct argument * args;
+};
+
+struct _function
+{
+ int type;
+ int reference_count;
+ OBJECT * rulename;
+ struct arg_list * formal_arguments;
+ int num_formal_arguments;
+};
+
+typedef struct _builtin_function
+{
+ FUNCTION base;
+ LIST * ( * func )( FRAME *, int flags );
+ int flags;
+} BUILTIN_FUNCTION;
+
+typedef struct _jam_function
+{
+ FUNCTION base;
+ int code_size;
+ instruction * code;
+ int num_constants;
+ OBJECT * * constants;
+ int num_subfunctions;
+ SUBFUNCTION * functions;
+ int num_subactions;
+ SUBACTION * actions;
+ FUNCTION * generic;
+ OBJECT * file;
+ int line;
+} JAM_FUNCTION;
+
+
+#ifdef HAVE_PYTHON
+
+#define FUNCTION_PYTHON 2
+
+typedef struct _python_function
+{
+ FUNCTION base;
+ PyObject * python_function;
+} PYTHON_FUNCTION;
+
+static LIST * call_python_function( PYTHON_FUNCTION *, FRAME * );
+
+#endif
+
+
+struct _stack
+{
+ void * data;
+};
+
+static void * stack;
+
+STACK * stack_global()
+{
+ static STACK result;
+ if ( !stack )
+ {
+ int const size = 1 << 21;
+ stack = BJAM_MALLOC( size );
+ result.data = (char *)stack + size;
+ }
+ return &result;
+}
+
+static void check_alignment( STACK * s )
+{
+ assert( (size_t)s->data % sizeof( LIST * ) == 0 );
+}
+
+void * stack_allocate( STACK * s, int size )
+{
+ check_alignment( s );
+ s->data = (char *)s->data - size;
+ check_alignment( s );
+ return s->data;
+}
+
+void stack_deallocate( STACK * s, int size )
+{
+ check_alignment( s );
+ s->data = (char *)s->data + size;
+ check_alignment( s );
+}
+
+void stack_push( STACK * s, LIST * l )
+{
+ *(LIST * *)stack_allocate( s, sizeof( LIST * ) ) = l;
+}
+
+LIST * stack_pop( STACK * s )
+{
+ LIST * const result = *(LIST * *)s->data;
+ stack_deallocate( s, sizeof( LIST * ) );
+ return result;
+}
+
+LIST * stack_top( STACK * s )
+{
+ check_alignment( s );
+ return *(LIST * *)s->data;
+}
+
+LIST * stack_at( STACK * s, int n )
+{
+ check_alignment( s );
+ return *( (LIST * *)s->data + n );
+}
+
+void stack_set( STACK * s, int n, LIST * value )
+{
+ check_alignment( s );
+ *((LIST * *)s->data + n) = value;
+}
+
+void * stack_get( STACK * s )
+{
+ check_alignment( s );
+ return s->data;
+}
+
+LIST * frame_get_local( FRAME * frame, int idx )
+{
+ /* The only local variables are the arguments. */
+ return list_copy( lol_get( frame->args, idx ) );
+}
+
+static OBJECT * function_get_constant( JAM_FUNCTION * function, int idx )
+{
+ return function->constants[ idx ];
+}
+
+static LIST * function_get_variable( JAM_FUNCTION * function, FRAME * frame,
+ int idx )
+{
+ return list_copy( var_get( frame->module, function->constants[ idx ] ) );
+}
+
+static void function_set_variable( JAM_FUNCTION * function, FRAME * frame,
+ int idx, LIST * value )
+{
+ var_set( frame->module, function->constants[ idx ], value, VAR_SET );
+}
+
+static LIST * function_swap_variable( JAM_FUNCTION * function, FRAME * frame,
+ int idx, LIST * value )
+{
+ return var_swap( frame->module, function->constants[ idx ], value );
+}
+
+static void function_append_variable( JAM_FUNCTION * function, FRAME * frame,
+ int idx, LIST * value )
+{
+ var_set( frame->module, function->constants[ idx ], value, VAR_APPEND );
+}
+
+static void function_default_variable( JAM_FUNCTION * function, FRAME * frame,
+ int idx, LIST * value )
+{
+ var_set( frame->module, function->constants[ idx ], value, VAR_DEFAULT );
+}
+
+static void function_set_rule( JAM_FUNCTION * function, FRAME * frame,
+ STACK * s, int idx )
+{
+ SUBFUNCTION * sub = function->functions + idx;
+ new_rule_body( frame->module, sub->name, sub->code, !sub->local );
+}
+
+static void function_set_actions( JAM_FUNCTION * function, FRAME * frame,
+ STACK * s, int idx )
+{
+ SUBACTION * sub = function->actions + idx;
+ LIST * bindlist = stack_pop( s );
+ new_rule_actions( frame->module, sub->name, sub->command, bindlist,
+ sub->flags );
+}
+
+
+/*
+ * Returns the index if name is "<", ">", "1", "2", ... or "19" otherwise
+ * returns -1.
+ */
+
+static int get_argument_index( char const * s )
+{
+ if ( s[ 0 ] != '\0')
+ {
+ if ( s[ 1 ] == '\0' )
+ {
+ switch ( s[ 0 ] )
+ {
+ case '<': return 0;
+ case '>': return 1;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return s[ 0 ] - '1';
+ }
+ }
+ else if ( s[ 0 ] == '1' && s[ 2 ] == '\0' )
+ {
+ switch( s[ 1 ] )
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return s[ 1 ] - '0' + 10 - 1;
+ }
+ }
+ }
+ return -1;
+}
+
+static LIST * function_get_named_variable( JAM_FUNCTION * function,
+ FRAME * frame, OBJECT * name )
+{
+ int const idx = get_argument_index( object_str( name ) );
+ return idx == -1
+ ? list_copy( var_get( frame->module, name ) )
+ : list_copy( lol_get( frame->args, idx ) );
+}
+
+static void function_set_named_variable( JAM_FUNCTION * function, FRAME * frame,
+ OBJECT * name, LIST * value)
+{
+ var_set( frame->module, name, value, VAR_SET );
+}
+
+static LIST * function_swap_named_variable( JAM_FUNCTION * function,
+ FRAME * frame, OBJECT * name, LIST * value )
+{
+ return var_swap( frame->module, name, value );
+}
+
+static void function_append_named_variable( JAM_FUNCTION * function,
+ FRAME * frame, OBJECT * name, LIST * value)
+{
+ var_set( frame->module, name, value, VAR_APPEND );
+}
+
+static void function_default_named_variable( JAM_FUNCTION * function,
+ FRAME * frame, OBJECT * name, LIST * value )
+{
+ var_set( frame->module, name, value, VAR_DEFAULT );
+}
+
+static LIST * function_call_rule( JAM_FUNCTION * function, FRAME * frame,
+ STACK * s, int n_args, char const * unexpanded, OBJECT * file, int line )
+{
+ FRAME inner[ 1 ];
+ int i;
+ LIST * first = stack_pop( s );
+ LIST * result = L0;
+ OBJECT * rulename;
+ LIST * trailing;
+
+ frame->file = file;
+ frame->line = line;
+
+ if ( list_empty( first ) )
+ {
+ backtrace_line( frame );
+ printf( "warning: rulename %s expands to empty string\n", unexpanded );
+ backtrace( frame );
+ list_free( first );
+ for ( i = 0; i < n_args; ++i )
+ list_free( stack_pop( s ) );
+ return result;
+ }
+
+ rulename = object_copy( list_front( first ) );
+
+ frame_init( inner );
+ inner->prev = frame;
+ inner->prev_user = frame->module->user_module ? frame : frame->prev_user;
+ inner->module = frame->module; /* This gets fixed up in evaluate_rule(). */
+
+ for ( i = 0; i < n_args; ++i )
+ lol_add( inner->args, stack_at( s, n_args - i - 1 ) );
+
+ for ( i = 0; i < n_args; ++i )
+ stack_pop( s );
+
+ trailing = list_pop_front( first );
+ if ( trailing )
+ {
+ if ( inner->args->count == 0 )
+ lol_add( inner->args, trailing );
+ else
+ {
+ LIST * * const l = &inner->args->list[ 0 ];
+ *l = list_append( trailing, *l );
+ }
+ }
+
+ result = evaluate_rule( bindrule( rulename, inner->module ), rulename, inner );
+ frame_free( inner );
+ object_free( rulename );
+ return result;
+}
+
+static LIST * function_call_member_rule( JAM_FUNCTION * function, FRAME * frame, STACK * s, int n_args, OBJECT * rulename, OBJECT * file, int line )
+{
+ FRAME inner[ 1 ];
+ int i;
+ LIST * first = stack_pop( s );
+ LIST * result = L0;
+ LIST * trailing;
+ RULE * rule;
+ module_t * module;
+ OBJECT * real_rulename = 0;
+
+ frame->file = file;
+ frame->line = line;
+
+ if ( list_empty( first ) )
+ {
+ backtrace_line( frame );
+ printf( "warning: object is empty\n" );
+ backtrace( frame );
+
+ list_free( first );
+
+ for( i = 0; i < n_args; ++i )
+ {
+ list_free( stack_pop( s ) );
+ }
+
+ return result;
+ }
+
+ /* FIXME: handle generic case */
+ assert( list_length( first ) == 1 );
+
+ module = bindmodule( list_front( first ) );
+ if ( module->class_module )
+ {
+ rule = bindrule( rulename, module );
+ real_rulename = object_copy( function_rulename( rule->procedure ) );
+ }
+ else
+ {
+ string buf[ 1 ];
+ string_new( buf );
+ string_append( buf, object_str( list_front( first ) ) );
+ string_push_back( buf, '.' );
+ string_append( buf, object_str( rulename ) );
+ real_rulename = object_new( buf->value );
+ string_free( buf );
+ rule = bindrule( real_rulename, frame->module );
+ }
+
+ frame_init( inner );
+
+ inner->prev = frame;
+ inner->prev_user = frame->module->user_module ? frame : frame->prev_user;
+ inner->module = frame->module; /* This gets fixed up in evaluate_rule(), below. */
+
+ for( i = 0; i < n_args; ++i )
+ {
+ lol_add( inner->args, stack_at( s, n_args - i - 1 ) );
+ }
+
+ for( i = 0; i < n_args; ++i )
+ {
+ stack_pop( s );
+ }
+
+ if ( list_length( first ) > 1 )
+ {
+ string buf[ 1 ];
+ LIST * trailing = L0;
+ LISTITER iter = list_begin( first ), end = list_end( first );
+ iter = list_next( iter );
+ string_new( buf );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ string_append( buf, object_str( list_item( iter ) ) );
+ string_push_back( buf, '.' );
+ string_append( buf, object_str( rulename ) );
+ trailing = list_push_back( trailing, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+ string_free( buf );
+ if ( inner->args->count == 0 )
+ lol_add( inner->args, trailing );
+ else
+ {
+ LIST * * const l = &inner->args->list[ 0 ];
+ *l = list_append( trailing, *l );
+ }
+ }
+
+ result = evaluate_rule( rule, real_rulename, inner );
+ frame_free( inner );
+ object_free( rulename );
+ object_free( real_rulename );
+ return result;
+}
+
+
+/* Variable expansion */
+
+typedef struct
+{
+ int sub1;
+ int sub2;
+} subscript_t;
+
+typedef struct
+{
+ PATHNAME f; /* :GDBSMR -- pieces */
+ char parent; /* :P -- go to parent directory */
+ char filemods; /* one of the above applied */
+ char downshift; /* :L -- downshift result */
+ char upshift; /* :U -- upshift result */
+ char to_slashes; /* :T -- convert "\" to "/" */
+ char to_windows; /* :W -- convert cygwin to native paths */
+ PATHPART empty; /* :E -- default for empties */
+ PATHPART join; /* :J -- join list with char */
+} VAR_EDITS;
+
+static LIST * apply_modifiers_impl( LIST * result, string * buf,
+ VAR_EDITS * edits, int n, LISTITER iter, LISTITER end );
+static void get_iters( subscript_t const subscript, LISTITER * const first,
+ LISTITER * const last, int const length );
+
+
+/*
+ * var_edit_parse() - parse : modifiers into PATHNAME structure
+ *
+ * The : modifiers in a $(varname:modifier) currently support replacing or
+ * omitting elements of a filename, and so they are parsed into a PATHNAME
+ * structure (which contains pointers into the original string).
+ *
+ * Modifiers of the form "X=value" replace the component X with the given value.
+ * Modifiers without the "=value" cause everything but the component X to be
+ * omitted. X is one of:
+ *
+ * G <grist>
+ * D directory name
+ * B base name
+ * S .suffix
+ * M (member)
+ * R root directory - prepended to whole path
+ *
+ * This routine sets:
+ *
+ * f->f_xxx.ptr = 0
+ * f->f_xxx.len = 0
+ * -> leave the original component xxx
+ *
+ * f->f_xxx.ptr = string
+ * f->f_xxx.len = strlen( string )
+ * -> replace component xxx with string
+ *
+ * f->f_xxx.ptr = ""
+ * f->f_xxx.len = 0
+ * -> omit component xxx
+ *
+ * var_edit_file() below and path_build() obligingly follow this convention.
+ */
+
+static int var_edit_parse( char const * mods, VAR_EDITS * edits, int havezeroed
+ )
+{
+ while ( *mods )
+ {
+ PATHPART * fp;
+
+ switch ( *mods++ )
+ {
+ case 'L': edits->downshift = 1; continue;
+ case 'U': edits->upshift = 1; continue;
+ case 'P': edits->parent = edits->filemods = 1; continue;
+ case 'E': fp = &edits->empty; goto strval;
+ case 'J': fp = &edits->join; goto strval;
+ case 'G': fp = &edits->f.f_grist; goto fileval;
+ case 'R': fp = &edits->f.f_root; goto fileval;
+ case 'D': fp = &edits->f.f_dir; goto fileval;
+ case 'B': fp = &edits->f.f_base; goto fileval;
+ case 'S': fp = &edits->f.f_suffix; goto fileval;
+ case 'M': fp = &edits->f.f_member; goto fileval;
+ case 'T': edits->to_slashes = 1; continue;
+ case 'W': edits->to_windows = 1; continue;
+ default:
+ continue; /* Should complain, but so what... */
+ }
+
+ fileval:
+ /* Handle :CHARS, where each char (without a following =) selects a
+ * particular file path element. On the first such char, we deselect all
+ * others (by setting ptr = "", len = 0) and for each char we select
+ * that element (by setting ptr = 0).
+ */
+ edits->filemods = 1;
+
+ if ( *mods != '=' )
+ {
+ if ( !havezeroed++ )
+ {
+ int i;
+ for ( i = 0; i < 6; ++i )
+ {
+ edits->f.part[ i ].len = 0;
+ edits->f.part[ i ].ptr = "";
+ }
+ }
+
+ fp->ptr = 0;
+ continue;
+ }
+
+ strval:
+ /* Handle :X=value, or :X */
+ if ( *mods != '=' )
+ {
+ fp->ptr = "";
+ fp->len = 0;
+ }
+ else
+ {
+ fp->ptr = ++mods;
+ fp->len = strlen( mods );
+ mods += fp->len;
+ }
+ }
+
+ return havezeroed;
+}
+
+
+/*
+ * var_edit_file() - copy input target name to output, modifying filename.
+ */
+
+static void var_edit_file( char const * in, string * out, VAR_EDITS * edits )
+{
+ if ( edits->filemods )
+ {
+ PATHNAME pathname;
+
+ /* Parse apart original filename, putting parts into "pathname". */
+ path_parse( in, &pathname );
+
+ /* Replace any pathname with edits->f */
+ if ( edits->f.f_grist .ptr ) pathname.f_grist = edits->f.f_grist;
+ if ( edits->f.f_root .ptr ) pathname.f_root = edits->f.f_root;
+ if ( edits->f.f_dir .ptr ) pathname.f_dir = edits->f.f_dir;
+ if ( edits->f.f_base .ptr ) pathname.f_base = edits->f.f_base;
+ if ( edits->f.f_suffix.ptr ) pathname.f_suffix = edits->f.f_suffix;
+ if ( edits->f.f_member.ptr ) pathname.f_member = edits->f.f_member;
+
+ /* If requested, modify pathname to point to parent. */
+ if ( edits->parent )
+ path_parent( &pathname );
+
+ /* Put filename back together. */
+ path_build( &pathname, out );
+ }
+ else
+ string_append( out, in );
+}
+
+/*
+ * var_edit_cyg2win() - conversion of a cygwin to a Windows path.
+ *
+ * FIXME: skip grist
+ */
+
+#ifdef OS_CYGWIN
+static void var_edit_cyg2win( string * out, size_t pos, VAR_EDITS * edits )
+{
+ if ( edits->to_windows )
+ {
+ #ifdef CYGWIN_VERSION_CYGWIN_CONV
+ /* Use new Cygwin API added with Cygwin 1.7. Old one had no error
+ * handling and has been deprecated.
+ */
+ char * dynamicBuffer = 0;
+ char buffer[ MAX_PATH + 1001 ];
+ char const * result = buffer;
+ cygwin_conv_path_t const conv_type = CCP_POSIX_TO_WIN_A | CCP_RELATIVE;
+ ssize_t const apiResult = cygwin_conv_path( conv_type, out->value + pos,
+ buffer, sizeof( buffer ) / sizeof( *buffer ) );
+ assert( apiResult == 0 || apiResult == -1 );
+ assert( apiResult || strlen( result ) < sizeof( buffer ) / sizeof(
+ *buffer ) );
+ if ( apiResult )
+ {
+ result = 0;
+ if ( errno == ENOSPC )
+ {
+ ssize_t const size = cygwin_conv_path( conv_type, out->value +
+ pos, NULL, 0 );
+ assert( size >= -1 );
+ if ( size > 0 )
+ {
+ dynamicBuffer = (char *)BJAM_MALLOC_ATOMIC( size );
+ if ( dynamicBuffer )
+ {
+ ssize_t const apiResult = cygwin_conv_path( conv_type,
+ out->value + pos, dynamicBuffer, size );
+ assert( apiResult == 0 || apiResult == -1 );
+ if ( !apiResult )
+ {
+ result = dynamicBuffer;
+ assert( strlen( result ) < size );
+ }
+ }
+ }
+ }
+ }
+ #else /* CYGWIN_VERSION_CYGWIN_CONV */
+ /* Use old Cygwin API deprecated with Cygwin 1.7. */
+ char result[ MAX_PATH + 1 ];
+ cygwin_conv_to_win32_path( out->value + pos, result );
+ assert( strlen( result ) <= MAX_PATH );
+ #endif /* CYGWIN_VERSION_CYGWIN_CONV */
+ if ( result )
+ {
+ string_truncate( out, pos );
+ string_append( out, result );
+ edits->to_slashes = 0;
+ }
+ #ifdef CYGWIN_VERSION_CYGWIN_CONV
+ if ( dynamicBuffer )
+ BJAM_FREE( dynamicBuffer );
+ #endif
+ }
+}
+#endif /* OS_CYGWIN */
+
+
+/*
+ * var_edit_shift() - do upshift/downshift & other mods.
+ */
+
+static void var_edit_shift( string * out, size_t pos, VAR_EDITS * edits )
+{
+#ifdef OS_CYGWIN
+ var_edit_cyg2win( out, pos, edits );
+#endif
+
+ if ( edits->upshift || edits->downshift || edits->to_slashes )
+ {
+ /* Handle upshifting, downshifting and slash translation now. */
+ char * p;
+ for ( p = out->value + pos; *p; ++p )
+ {
+ if ( edits->upshift )
+ *p = toupper( *p );
+ else if ( edits->downshift )
+ *p = tolower( *p );
+ if ( edits->to_slashes && ( *p == '\\' ) )
+ *p = '/';
+ }
+ }
+}
+
+
+/*
+ * Reads n LISTs from the top of the STACK and combines them to form VAR_EDITS.
+ * Returns the number of VAR_EDITS pushed onto the STACK.
+ */
+
+static int expand_modifiers( STACK * s, int n )
+{
+ int i;
+ int total = 1;
+ LIST * * args = stack_get( s );
+ for ( i = 0; i < n; ++i )
+ total *= list_length( args[ i ] );
+
+ if ( total != 0 )
+ {
+ VAR_EDITS * out = stack_allocate( s, total * sizeof( VAR_EDITS ) );
+ LISTITER * iter = stack_allocate( s, n * sizeof( LIST * ) );
+ for ( i = 0; i < n; ++i )
+ iter[ i ] = list_begin( args[ i ] );
+ i = 0;
+ {
+ int havezeroed;
+ loop:
+ memset( out, 0, sizeof( *out ) );
+ havezeroed = 0;
+ for ( i = 0; i < n; ++i )
+ havezeroed = var_edit_parse( object_str( list_item( iter[ i ] )
+ ), out, havezeroed );
+ ++out;
+ while ( --i >= 0 )
+ {
+ if ( list_next( iter[ i ] ) != list_end( args[ i ] ) )
+ {
+ iter[ i ] = list_next( iter[ i ] );
+ goto loop;
+ }
+ iter[ i ] = list_begin( args[ i ] );
+ }
+ }
+ stack_deallocate( s, n * sizeof( LIST * ) );
+ }
+ return total;
+}
+
+static LIST * apply_modifiers( STACK * s, int n )
+{
+ LIST * value = stack_top( s );
+ LIST * result = L0;
+ VAR_EDITS * const edits = (VAR_EDITS *)( (LIST * *)stack_get( s ) + 1 );
+ string buf[ 1 ];
+ string_new( buf );
+ result = apply_modifiers_impl( result, buf, edits, n, list_begin( value ),
+ list_end( value ) );
+ string_free( buf );
+ return result;
+}
+
+
+/*
+ * Parse a string of the form "1-2", "-2--1", "2-" and return the two
+ * subscripts.
+ */
+
+subscript_t parse_subscript( char const * s )
+{
+ subscript_t result;
+ result.sub1 = 0;
+ result.sub2 = 0;
+ do /* so we can use "break" */
+ {
+ /* Allow negative subscripts. */
+ if ( !isdigit( *s ) && ( *s != '-' ) )
+ {
+ result.sub2 = 0;
+ break;
+ }
+ result.sub1 = atoi( s );
+
+ /* Skip over the first symbol, which is either a digit or dash. */
+ ++s;
+ while ( isdigit( *s ) ) ++s;
+
+ if ( *s == '\0' )
+ {
+ result.sub2 = result.sub1;
+ break;
+ }
+
+ if ( *s != '-' )
+ {
+ result.sub2 = 0;
+ break;
+ }
+
+ ++s;
+
+ if ( *s == '\0' )
+ {
+ result.sub2 = -1;
+ break;
+ }
+
+ if ( !isdigit( *s ) && ( *s != '-' ) )
+ {
+ result.sub2 = 0;
+ break;
+ }
+
+ /* First, compute the index of the last element. */
+ result.sub2 = atoi( s );
+ while ( isdigit( *++s ) );
+
+ if ( *s != '\0' )
+ result.sub2 = 0;
+
+ } while ( 0 );
+ return result;
+}
+
+static LIST * apply_subscript( STACK * s )
+{
+ LIST * value = stack_top( s );
+ LIST * indices = stack_at( s, 1 );
+ LIST * result = L0;
+ int length = list_length( value );
+ string buf[ 1 ];
+ LISTITER indices_iter = list_begin( indices );
+ LISTITER const indices_end = list_end( indices );
+ string_new( buf );
+ for ( ; indices_iter != indices_end; indices_iter = list_next( indices_iter
+ ) )
+ {
+ LISTITER iter = list_begin( value );
+ LISTITER end = list_end( value );
+ subscript_t const subscript = parse_subscript( object_str( list_item(
+ indices_iter ) ) );
+ get_iters( subscript, &iter, &end, length );
+ for ( ; iter != end; iter = list_next( iter ) )
+ result = list_push_back( result, object_copy( list_item( iter ) ) );
+ }
+ string_free( buf );
+ return result;
+}
+
+
+/*
+ * Reads the LIST from first and applies subscript to it. The results are
+ * written to *first and *last.
+ */
+
+static void get_iters( subscript_t const subscript, LISTITER * const first,
+ LISTITER * const last, int const length )
+{
+ int start;
+ int size;
+ LISTITER iter;
+ LISTITER end;
+ {
+
+ if ( subscript.sub1 < 0 )
+ start = length + subscript.sub1;
+ else if ( subscript.sub1 > length )
+ start = length;
+ else
+ start = subscript.sub1 - 1;
+
+ size = subscript.sub2 < 0
+ ? length + 1 + subscript.sub2 - start
+ : subscript.sub2 - start;
+
+ /*
+ * HACK: When the first subscript is before the start of the list, it
+ * magically becomes the beginning of the list. This is inconsistent,
+ * but needed for backwards compatibility.
+ */
+ if ( start < 0 )
+ start = 0;
+
+ /* The "sub2 < 0" test handles the semantic error of sub2 < sub1. */
+ if ( size < 0 )
+ size = 0;
+
+ if ( start + size > length )
+ size = length - start;
+ }
+
+ iter = *first;
+ while ( start-- > 0 )
+ iter = list_next( iter );
+
+ end = iter;
+ while ( size-- > 0 )
+ end = list_next( end );
+
+ *first = iter;
+ *last = end;
+}
+
+static LIST * apply_modifiers_empty( LIST * result, string * buf,
+ VAR_EDITS * edits, int n )
+{
+ int i;
+ for ( i = 0; i < n; ++i )
+ {
+ if ( edits[ i ].empty.ptr )
+ {
+ /** FIXME: is empty.ptr always null-terminated? */
+ var_edit_file( edits[ i ].empty.ptr, buf, edits + i );
+ var_edit_shift( buf, 0, edits + i );
+ result = list_push_back( result, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+ }
+ return result;
+}
+
+static LIST * apply_modifiers_non_empty( LIST * result, string * buf,
+ VAR_EDITS * edits, int n, LISTITER begin, LISTITER end )
+{
+ int i;
+ LISTITER iter;
+ for ( i = 0; i < n; ++i )
+ {
+ if ( edits[ i ].join.ptr )
+ {
+ var_edit_file( object_str( list_item( begin ) ), buf, edits + i );
+ var_edit_shift( buf, 0, edits + i );
+ for ( iter = list_next( begin ); iter != end; iter = list_next( iter
+ ) )
+ {
+ size_t size;
+ string_append( buf, edits[ i ].join.ptr );
+ size = buf->size;
+ var_edit_file( object_str( list_item( iter ) ), buf, edits + i
+ );
+ var_edit_shift( buf, size, edits + i );
+ }
+ result = list_push_back( result, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+ else
+ {
+ for ( iter = begin; iter != end; iter = list_next( iter ) )
+ {
+ var_edit_file( object_str( list_item( iter ) ), buf, edits + i );
+ var_edit_shift( buf, 0, edits + i );
+ result = list_push_back( result, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+ }
+ }
+ return result;
+}
+
+static LIST * apply_modifiers_impl( LIST * result, string * buf,
+ VAR_EDITS * edits, int n, LISTITER iter, LISTITER end )
+{
+ return iter == end
+ ? apply_modifiers_empty( result, buf, edits, n )
+ : apply_modifiers_non_empty( result, buf, edits, n, iter, end );
+}
+
+static LIST * apply_subscript_and_modifiers( STACK * s, int n )
+{
+ LIST * const value = stack_top( s );
+ LIST * const indices = stack_at( s, 1 );
+ LIST * result = L0;
+ VAR_EDITS * const edits = (VAR_EDITS *)((LIST * *)stack_get( s ) + 2);
+ int const length = list_length( value );
+ string buf[ 1 ];
+ LISTITER indices_iter = list_begin( indices );
+ LISTITER const indices_end = list_end( indices );
+ string_new( buf );
+ for ( ; indices_iter != indices_end; indices_iter = list_next( indices_iter
+ ) )
+ {
+ LISTITER iter = list_begin( value );
+ LISTITER end = list_end( value );
+ subscript_t const sub = parse_subscript( object_str( list_item(
+ indices_iter ) ) );
+ get_iters( sub, &iter, &end, length );
+ result = apply_modifiers_impl( result, buf, edits, n, iter, end );
+ }
+ string_free( buf );
+ return result;
+}
+
+
+/*
+ * expand() - expands a list of concatenated strings and variable refereces
+ *
+ * Takes a list of expansion items - each representing one element to be
+ * concatenated and each containing a list of its values. Returns a list of all
+ * possible values constructed by selecting a single value from each of the
+ * elements and concatenating them together.
+ *
+ * For example, in the following code:
+ *
+ * local a = one two three four ;
+ * local b = foo bar ;
+ * ECHO /$(a)/$(b)/$(a)/ ;
+ *
+ * When constructing the result of /$(a)/$(b)/ this function would get called
+ * with the following 7 expansion items:
+ * 1. /
+ * 2. one two three four
+ * 3. /
+ * 4. foo bar
+ * 5. /
+ * 6. one two three four
+ * 7. /
+ *
+ * And would result in a list containing 32 values:
+ * 1. /one/foo/one/
+ * 2. /one/foo/two/
+ * 3. /one/foo/three/
+ * 4. /one/foo/four/
+ * 5. /one/bar/one/
+ * ...
+ *
+ */
+
+typedef struct expansion_item
+{
+ /* Item's value list initialized prior to calling expand(). */
+ LIST * values;
+
+ /* Internal data initialized and used inside expand(). */
+ LISTITER current; /* Currently used value. */
+ int size; /* Concatenated string length prior to concatenating the
+ * item's current value.
+ */
+} expansion_item;
+
+static LIST * expand( expansion_item * items, int const length )
+{
+ LIST * result = L0;
+ string buf[ 1 ];
+ int size = 0;
+ int i;
+
+ assert( length > 0 );
+ for ( i = 0; i < length; ++i )
+ {
+ LISTITER iter = list_begin( items[ i ].values );
+ LISTITER const end = list_end( items[ i ].values );
+
+ /* If any of the items has no values - the result is an empty list. */
+ if ( iter == end ) return L0;
+
+ /* Set each item's 'current' to its first listed value. This indicates
+ * each item's next value to be used when constructing the list of all
+ * possible concatenated values.
+ */
+ items[ i ].current = iter;
+
+ /* Calculate the longest concatenated string length - to know how much
+ * memory we need to allocate as a buffer for holding the concatenated
+ * strings.
+ */
+ {
+ int max = 0;
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ int const len = strlen( object_str( list_item( iter ) ) );
+ if ( len > max ) max = len;
+ }
+ size += max;
+ }
+ }
+
+ string_new( buf );
+ string_reserve( buf, size );
+
+ i = 0;
+ while ( i >= 0 )
+ {
+ for ( ; i < length; ++i )
+ {
+ items[ i ].size = buf->size;
+ string_append( buf, object_str( list_item( items[ i ].current ) ) );
+ }
+ result = list_push_back( result, object_new( buf->value ) );
+ while ( --i >= 0 )
+ {
+ if ( list_next( items[ i ].current ) != list_end( items[ i ].values
+ ) )
+ {
+ items[ i ].current = list_next( items[ i ].current );
+ string_truncate( buf, items[ i ].size );
+ break;
+ }
+ else
+ items[ i ].current = list_begin( items[ i ].values );
+ }
+ }
+
+ string_free( buf );
+ return result;
+}
+
+static void combine_strings( STACK * s, int n, string * out )
+{
+ int i;
+ for ( i = 0; i < n; ++i )
+ {
+ LIST * const values = stack_pop( s );
+ LISTITER iter = list_begin( values );
+ LISTITER const end = list_end( values );
+ if ( iter != end )
+ {
+ string_append( out, object_str( list_item( iter ) ) );
+ for ( iter = list_next( iter ); iter != end; iter = list_next( iter
+ ) )
+ {
+ string_push_back( out, ' ' );
+ string_append( out, object_str( list_item( iter ) ) );
+ }
+ list_free( values );
+ }
+ }
+}
+
+struct dynamic_array
+{
+ int size;
+ int capacity;
+ void * data;
+};
+
+static void dynamic_array_init( struct dynamic_array * array )
+{
+ array->size = 0;
+ array->capacity = 0;
+ array->data = 0;
+}
+
+static void dynamic_array_free( struct dynamic_array * array )
+{
+ BJAM_FREE( array->data );
+}
+
+static void dynamic_array_push_impl( struct dynamic_array * const array,
+ void const * const value, int const unit_size )
+{
+ if ( array->capacity == 0 )
+ {
+ array->capacity = 2;
+ array->data = BJAM_MALLOC( array->capacity * unit_size );
+ }
+ else if ( array->capacity == array->size )
+ {
+ void * new_data;
+ array->capacity *= 2;
+ new_data = BJAM_MALLOC( array->capacity * unit_size );
+ memcpy( new_data, array->data, array->size * unit_size );
+ BJAM_FREE( array->data );
+ array->data = new_data;
+ }
+ memcpy( (char *)array->data + array->size * unit_size, value, unit_size );
+ ++array->size;
+}
+
+#define dynamic_array_push( array, value ) (dynamic_array_push_impl(array, &value, sizeof(value)))
+#define dynamic_array_at( type, array, idx ) (((type *)(array)->data)[idx])
+
+
+/*
+ * struct compiler
+ */
+
+struct label_info
+{
+ int absolute_position;
+ struct dynamic_array uses[ 1 ];
+};
+
+struct stored_rule
+{
+ OBJECT * name;
+ PARSE * parse;
+ int num_arguments;
+ struct arg_list * arguments;
+ int local;
+};
+
+typedef struct compiler
+{
+ struct dynamic_array code[ 1 ];
+ struct dynamic_array constants[ 1 ];
+ struct dynamic_array labels[ 1 ];
+ struct dynamic_array rules[ 1 ];
+ struct dynamic_array actions[ 1 ];
+} compiler;
+
+static void compiler_init( compiler * c )
+{
+ dynamic_array_init( c->code );
+ dynamic_array_init( c->constants );
+ dynamic_array_init( c->labels );
+ dynamic_array_init( c->rules );
+ dynamic_array_init( c->actions );
+}
+
+static void compiler_free( compiler * c )
+{
+ int i;
+ dynamic_array_free( c->actions );
+ dynamic_array_free( c->rules );
+ for ( i = 0; i < c->labels->size; ++i )
+ dynamic_array_free( dynamic_array_at( struct label_info, c->labels, i
+ ).uses );
+ dynamic_array_free( c->labels );
+ dynamic_array_free( c->constants );
+ dynamic_array_free( c->code );
+}
+
+static void compile_emit_instruction( compiler * c, instruction instr )
+{
+ dynamic_array_push( c->code, instr );
+}
+
+static int compile_new_label( compiler * c )
+{
+ int result = c->labels->size;
+ struct label_info info;
+ info.absolute_position = -1;
+ dynamic_array_init( info.uses );
+ dynamic_array_push( c->labels, info );
+ return result;
+}
+
+static void compile_set_label( compiler * c, int label )
+{
+ struct label_info * const l = &dynamic_array_at( struct label_info,
+ c->labels, label );
+ int const pos = c->code->size;
+ int i;
+ assert( l->absolute_position == -1 );
+ l->absolute_position = pos;
+ for ( i = 0; i < l->uses->size; ++i )
+ {
+ int id = dynamic_array_at( int, l->uses, i );
+ int offset = (int)( pos - id - 1 );
+ dynamic_array_at( instruction, c->code, id ).arg = offset;
+ }
+}
+
+static void compile_emit( compiler * c, unsigned int op_code, int arg )
+{
+ instruction instr;
+ instr.op_code = op_code;
+ instr.arg = arg;
+ compile_emit_instruction( c, instr );
+}
+
+static void compile_emit_branch( compiler * c, unsigned int op_code, int label )
+{
+ struct label_info * const l = &dynamic_array_at( struct label_info,
+ c->labels, label );
+ int const pos = c->code->size;
+ instruction instr;
+ instr.op_code = op_code;
+ if ( l->absolute_position == -1 )
+ {
+ instr.arg = 0;
+ dynamic_array_push( l->uses, pos );
+ }
+ else
+ instr.arg = (int)( l->absolute_position - pos - 1 );
+ compile_emit_instruction( c, instr );
+}
+
+static int compile_emit_constant( compiler * c, OBJECT * value )
+{
+ OBJECT * copy = object_copy( value );
+ dynamic_array_push( c->constants, copy );
+ return c->constants->size - 1;
+}
+
+static int compile_emit_rule( compiler * c, OBJECT * name, PARSE * parse,
+ int num_arguments, struct arg_list * arguments, int local )
+{
+ struct stored_rule rule;
+ rule.name = object_copy( name );
+ rule.parse = parse;
+ rule.num_arguments = num_arguments;
+ rule.arguments = arguments;
+ rule.local = local;
+ dynamic_array_push( c->rules, rule );
+ return (int)( c->rules->size - 1 );
+}
+
+static int compile_emit_actions( compiler * c, PARSE * parse )
+{
+ SUBACTION a;
+ a.name = object_copy( parse->string );
+ a.command = function_compile_actions( object_str( parse->string1 ),
+ parse->file, parse->line );
+ a.flags = parse->num;
+ dynamic_array_push( c->actions, a );
+ return (int)( c->actions->size - 1 );
+}
+
+static JAM_FUNCTION * compile_to_function( compiler * c )
+{
+ JAM_FUNCTION * const result = BJAM_MALLOC( sizeof( JAM_FUNCTION ) );
+ int i;
+ result->base.type = FUNCTION_JAM;
+ result->base.reference_count = 1;
+ result->base.formal_arguments = 0;
+ result->base.num_formal_arguments = 0;
+
+ result->base.rulename = 0;
+
+ result->code_size = c->code->size;
+ result->code = BJAM_MALLOC( c->code->size * sizeof( instruction ) );
+ memcpy( result->code, c->code->data, c->code->size * sizeof( instruction ) );
+
+ result->constants = BJAM_MALLOC( c->constants->size * sizeof( OBJECT * ) );
+ memcpy( result->constants, c->constants->data, c->constants->size * sizeof(
+ OBJECT * ) );
+ result->num_constants = c->constants->size;
+
+ result->num_subfunctions = c->rules->size;
+ result->functions = BJAM_MALLOC( c->rules->size * sizeof( SUBFUNCTION ) );
+ for ( i = 0; i < c->rules->size; ++i )
+ {
+ struct stored_rule * const rule = &dynamic_array_at( struct stored_rule,
+ c->rules, i );
+ result->functions[ i ].name = rule->name;
+ result->functions[ i ].code = function_compile( rule->parse );
+ result->functions[ i ].code->num_formal_arguments = rule->num_arguments;
+ result->functions[ i ].code->formal_arguments = rule->arguments;
+ result->functions[ i ].local = rule->local;
+ }
+
+ result->actions = BJAM_MALLOC( c->actions->size * sizeof( SUBACTION ) );
+ memcpy( result->actions, c->actions->data, c->actions->size * sizeof(
+ SUBACTION ) );
+ result->num_subactions = c->actions->size;
+
+ result->generic = 0;
+
+ result->file = 0;
+ result->line = -1;
+
+ return result;
+}
+
+
+/*
+ * Parsing of variable expansions
+ */
+
+typedef struct VAR_PARSE_GROUP
+{
+ struct dynamic_array elems[ 1 ];
+} VAR_PARSE_GROUP;
+
+typedef struct VAR_PARSE_ACTIONS
+{
+ struct dynamic_array elems[ 1 ];
+} VAR_PARSE_ACTIONS;
+
+#define VAR_PARSE_TYPE_VAR 0
+#define VAR_PARSE_TYPE_STRING 1
+#define VAR_PARSE_TYPE_FILE 2
+
+typedef struct _var_parse
+{
+ int type; /* string, variable or file */
+} VAR_PARSE;
+
+typedef struct
+{
+ VAR_PARSE base;
+ VAR_PARSE_GROUP * name;
+ VAR_PARSE_GROUP * subscript;
+ struct dynamic_array modifiers[ 1 ];
+} VAR_PARSE_VAR;
+
+typedef struct
+{
+ VAR_PARSE base;
+ OBJECT * s;
+} VAR_PARSE_STRING;
+
+typedef struct
+{
+ VAR_PARSE base;
+ struct dynamic_array filename[ 1 ];
+ struct dynamic_array contents[ 1 ];
+} VAR_PARSE_FILE;
+
+static void var_parse_free( VAR_PARSE * );
+
+
+/*
+ * VAR_PARSE_GROUP
+ */
+
+static VAR_PARSE_GROUP * var_parse_group_new()
+{
+ VAR_PARSE_GROUP * const result = BJAM_MALLOC( sizeof( VAR_PARSE_GROUP ) );
+ dynamic_array_init( result->elems );
+ return result;
+}
+
+static void var_parse_group_free( VAR_PARSE_GROUP * group )
+{
+ int i;
+ for ( i = 0; i < group->elems->size; ++i )
+ var_parse_free( dynamic_array_at( VAR_PARSE *, group->elems, i ) );
+ dynamic_array_free( group->elems );
+ BJAM_FREE( group );
+}
+
+static void var_parse_group_add( VAR_PARSE_GROUP * group, VAR_PARSE * elem )
+{
+ dynamic_array_push( group->elems, elem );
+}
+
+static void var_parse_group_maybe_add_constant( VAR_PARSE_GROUP * group,
+ char const * start, char const * end )
+{
+ if ( start != end )
+ {
+ string buf[ 1 ];
+ VAR_PARSE_STRING * const value = (VAR_PARSE_STRING *)BJAM_MALLOC(
+ sizeof(VAR_PARSE_STRING) );
+ value->base.type = VAR_PARSE_TYPE_STRING;
+ string_new( buf );
+ string_append_range( buf, start, end );
+ value->s = object_new( buf->value );
+ string_free( buf );
+ var_parse_group_add( group, (VAR_PARSE *)value );
+ }
+}
+
+VAR_PARSE_STRING * var_parse_group_as_literal( VAR_PARSE_GROUP * group )
+{
+ if ( group->elems->size == 1 )
+ {
+ VAR_PARSE * result = dynamic_array_at( VAR_PARSE *, group->elems, 0 );
+ if ( result->type == VAR_PARSE_TYPE_STRING )
+ return (VAR_PARSE_STRING *)result;
+ }
+ return 0;
+}
+
+
+/*
+ * VAR_PARSE_ACTIONS
+ */
+
+static VAR_PARSE_ACTIONS * var_parse_actions_new()
+{
+ VAR_PARSE_ACTIONS * const result = (VAR_PARSE_ACTIONS *)BJAM_MALLOC(
+ sizeof(VAR_PARSE_ACTIONS) );
+ dynamic_array_init( result->elems );
+ return result;
+}
+
+static void var_parse_actions_free( VAR_PARSE_ACTIONS * actions )
+{
+ int i;
+ for ( i = 0; i < actions->elems->size; ++i )
+ var_parse_group_free( dynamic_array_at( VAR_PARSE_GROUP *,
+ actions->elems, i ) );
+ dynamic_array_free( actions->elems );
+ BJAM_FREE( actions );
+}
+
+
+/*
+ * VAR_PARSE_VAR
+ */
+
+static VAR_PARSE_VAR * var_parse_var_new()
+{
+ VAR_PARSE_VAR * result = BJAM_MALLOC( sizeof( VAR_PARSE_VAR ) );
+ result->base.type = VAR_PARSE_TYPE_VAR;
+ result->name = var_parse_group_new();
+ result->subscript = 0;
+ dynamic_array_init( result->modifiers );
+ return result;
+}
+
+static void var_parse_var_free( VAR_PARSE_VAR * var )
+{
+ int i;
+ var_parse_group_free( var->name );
+ if ( var->subscript )
+ var_parse_group_free( var->subscript );
+ for ( i = 0; i < var->modifiers->size; ++i )
+ var_parse_group_free( dynamic_array_at( VAR_PARSE_GROUP *,
+ var->modifiers, i ) );
+ dynamic_array_free( var->modifiers );
+ BJAM_FREE( var );
+}
+
+static VAR_PARSE_GROUP * var_parse_var_new_modifier( VAR_PARSE_VAR * var )
+{
+ VAR_PARSE_GROUP * result = var_parse_group_new();
+ dynamic_array_push( var->modifiers, result );
+ return result;
+}
+
+
+/*
+ * VAR_PARSE_STRING
+ */
+
+static void var_parse_string_free( VAR_PARSE_STRING * string )
+{
+ object_free( string->s );
+ BJAM_FREE( string );
+}
+
+
+/*
+ * VAR_PARSE_FILE
+ */
+
+static VAR_PARSE_FILE * var_parse_file_new( void )
+{
+ VAR_PARSE_FILE * const result = (VAR_PARSE_FILE *)BJAM_MALLOC( sizeof(
+ VAR_PARSE_FILE ) );
+ result->base.type = VAR_PARSE_TYPE_FILE;
+ dynamic_array_init( result->filename );
+ dynamic_array_init( result->contents );
+ return result;
+}
+
+static void var_parse_file_free( VAR_PARSE_FILE * file )
+{
+ int i;
+ for ( i = 0; i < file->filename->size; ++i )
+ var_parse_group_free( dynamic_array_at( VAR_PARSE_GROUP *,
+ file->filename, i ) );
+ dynamic_array_free( file->filename );
+ for ( i = 0; i < file->contents->size; ++i )
+ var_parse_group_free( dynamic_array_at( VAR_PARSE_GROUP *,
+ file->contents, i ) );
+ dynamic_array_free( file->contents );
+ BJAM_FREE( file );
+}
+
+
+/*
+ * VAR_PARSE
+ */
+
+static void var_parse_free( VAR_PARSE * parse )
+{
+ switch ( parse->type )
+ {
+ case VAR_PARSE_TYPE_VAR:
+ var_parse_var_free( (VAR_PARSE_VAR *)parse );
+ break;
+
+ case VAR_PARSE_TYPE_STRING:
+ var_parse_string_free( (VAR_PARSE_STRING *)parse );
+ break;
+
+ case VAR_PARSE_TYPE_FILE:
+ var_parse_file_free( (VAR_PARSE_FILE *)parse );
+ break;
+
+ default:
+ assert( !"Invalid type" );
+ }
+}
+
+
+/*
+ * Compile VAR_PARSE
+ */
+
+static void var_parse_group_compile( VAR_PARSE_GROUP const * parse,
+ compiler * c );
+
+static void var_parse_var_compile( VAR_PARSE_VAR const * parse, compiler * c )
+{
+ int expand_name = 0;
+ int is_get_grist = 0;
+ int has_modifiers = 0;
+ /* Special case common modifiers */
+ if ( parse->modifiers->size == 1 )
+ {
+ VAR_PARSE_GROUP * mod = dynamic_array_at( VAR_PARSE_GROUP *, parse->modifiers, 0 );
+ if ( mod->elems->size == 1 )
+ {
+ VAR_PARSE * mod1 = dynamic_array_at( VAR_PARSE *, mod->elems, 0 );
+ if ( mod1->type == VAR_PARSE_TYPE_STRING )
+ {
+ OBJECT * s = ( (VAR_PARSE_STRING *)mod1 )->s;
+ if ( ! strcmp ( object_str( s ), "G" ) )
+ {
+ is_get_grist = 1;
+ }
+ }
+ }
+ }
+ /* If there are modifiers, emit them in reverse order. */
+ if ( parse->modifiers->size > 0 && !is_get_grist )
+ {
+ int i;
+ has_modifiers = 1;
+ for ( i = 0; i < parse->modifiers->size; ++i )
+ var_parse_group_compile( dynamic_array_at( VAR_PARSE_GROUP *,
+ parse->modifiers, parse->modifiers->size - i - 1 ), c );
+ }
+
+ /* If there is a subscript, emit it. */
+ if ( parse->subscript )
+ var_parse_group_compile( parse->subscript, c );
+
+ /* If the variable name is empty, look it up. */
+ if ( parse->name->elems->size == 0 )
+ compile_emit( c, INSTR_PUSH_VAR, compile_emit_constant( c,
+ constant_empty ) );
+ /* If the variable name does not need to be expanded, look it up. */
+ else if ( parse->name->elems->size == 1 && dynamic_array_at( VAR_PARSE *,
+ parse->name->elems, 0 )->type == VAR_PARSE_TYPE_STRING )
+ {
+ OBJECT * const name = ( (VAR_PARSE_STRING *)dynamic_array_at(
+ VAR_PARSE *, parse->name->elems, 0 ) )->s;
+ int const idx = get_argument_index( object_str( name ) );
+ if ( idx != -1 )
+ compile_emit( c, INSTR_PUSH_ARG, idx );
+ else
+ compile_emit( c, INSTR_PUSH_VAR, compile_emit_constant( c, name ) );
+ }
+ /* Otherwise, push the var names and use the group instruction. */
+ else
+ {
+ var_parse_group_compile( parse->name, c );
+ expand_name = 1;
+ }
+
+ /** Select the instruction for expanding the variable. */
+ if ( !has_modifiers && !parse->subscript && !expand_name )
+ ;
+ else if ( !has_modifiers && !parse->subscript && expand_name )
+ compile_emit( c, INSTR_PUSH_GROUP, 0 );
+ else if ( !has_modifiers && parse->subscript && !expand_name )
+ compile_emit( c, INSTR_APPLY_INDEX, 0 );
+ else if ( !has_modifiers && parse->subscript && expand_name )
+ compile_emit( c, INSTR_APPLY_INDEX_GROUP, 0 );
+ else if ( has_modifiers && !parse->subscript && !expand_name )
+ compile_emit( c, INSTR_APPLY_MODIFIERS, parse->modifiers->size );
+ else if ( has_modifiers && !parse->subscript && expand_name )
+ compile_emit( c, INSTR_APPLY_MODIFIERS_GROUP, parse->modifiers->size );
+ else if ( has_modifiers && parse->subscript && !expand_name )
+ compile_emit( c, INSTR_APPLY_INDEX_MODIFIERS, parse->modifiers->size );
+ else if ( has_modifiers && parse->subscript && expand_name )
+ compile_emit( c, INSTR_APPLY_INDEX_MODIFIERS_GROUP,
+ parse->modifiers->size );
+
+ /* Now apply any special modifiers */
+ if ( is_get_grist )
+ {
+ compile_emit( c, INSTR_GET_GRIST, 0 );
+ }
+}
+
+static void var_parse_string_compile( VAR_PARSE_STRING const * parse,
+ compiler * c )
+{
+ compile_emit( c, INSTR_PUSH_CONSTANT, compile_emit_constant( c, parse->s )
+ );
+}
+
+static void var_parse_file_compile( VAR_PARSE_FILE const * parse, compiler * c )
+{
+ int i;
+ for ( i = 0; i < parse->filename->size; ++i )
+ var_parse_group_compile( dynamic_array_at( VAR_PARSE_GROUP *,
+ parse->filename, parse->filename->size - i - 1 ), c );
+ compile_emit( c, INSTR_APPEND_STRINGS, parse->filename->size );
+ for ( i = 0; i < parse->contents->size; ++i )
+ var_parse_group_compile( dynamic_array_at( VAR_PARSE_GROUP *,
+ parse->contents, parse->contents->size - i - 1 ), c );
+ compile_emit( c, INSTR_WRITE_FILE, parse->contents->size );
+}
+
+static void var_parse_compile( VAR_PARSE const * parse, compiler * c )
+{
+ switch ( parse->type )
+ {
+ case VAR_PARSE_TYPE_VAR:
+ var_parse_var_compile( (VAR_PARSE_VAR const *)parse, c );
+ break;
+
+ case VAR_PARSE_TYPE_STRING:
+ var_parse_string_compile( (VAR_PARSE_STRING const *)parse, c );
+ break;
+
+ case VAR_PARSE_TYPE_FILE:
+ var_parse_file_compile( (VAR_PARSE_FILE const *)parse, c );
+ break;
+
+ default:
+ assert( !"Unknown var parse type." );
+ }
+}
+
+static void var_parse_group_compile( VAR_PARSE_GROUP const * parse, compiler * c
+ )
+{
+ /* Emit the elements in reverse order. */
+ int i;
+ for ( i = 0; i < parse->elems->size; ++i )
+ var_parse_compile( dynamic_array_at( VAR_PARSE *, parse->elems,
+ parse->elems->size - i - 1 ), c );
+ /* If there are no elements, emit an empty string. */
+ if ( parse->elems->size == 0 )
+ compile_emit( c, INSTR_PUSH_CONSTANT, compile_emit_constant( c,
+ constant_empty ) );
+ /* If there is more than one element, combine them. */
+ if ( parse->elems->size > 1 )
+ compile_emit( c, INSTR_COMBINE_STRINGS, parse->elems->size );
+}
+
+static void var_parse_actions_compile( VAR_PARSE_ACTIONS const * actions,
+ compiler * c )
+{
+ int i;
+ for ( i = 0; i < actions->elems->size; ++i )
+ var_parse_group_compile( dynamic_array_at( VAR_PARSE_GROUP *,
+ actions->elems, actions->elems->size - i - 1 ), c );
+ compile_emit( c, INSTR_OUTPUT_STRINGS, actions->elems->size );
+}
+
+
+/*
+ * Parse VAR_PARSE_VAR
+ */
+
+static VAR_PARSE * parse_at_file( char const * start, char const * mid,
+ char const * end );
+static VAR_PARSE * parse_variable( char const * * string );
+static int try_parse_variable( char const * * s_, char const * * string,
+ VAR_PARSE_GROUP * out );
+static void balance_parentheses( char const * * s_, char const * * string,
+ VAR_PARSE_GROUP * out );
+static void parse_var_string( char const * first, char const * last,
+ struct dynamic_array * out );
+
+
+/*
+ * Parses a string that can contain variables to expand.
+ */
+
+static VAR_PARSE_GROUP * parse_expansion( char const * * string )
+{
+ VAR_PARSE_GROUP * result = var_parse_group_new();
+ char const * s = *string;
+ for ( ; ; )
+ {
+ if ( try_parse_variable( &s, string, result ) ) {}
+ else if ( s[ 0 ] == '\0' )
+ {
+ var_parse_group_maybe_add_constant( result, *string, s );
+ return result;
+ }
+ else
+ ++s;
+ }
+}
+
+static VAR_PARSE_ACTIONS * parse_actions( char const * string )
+{
+ VAR_PARSE_ACTIONS * const result = var_parse_actions_new();
+ parse_var_string( string, string + strlen( string ), result->elems );
+ return result;
+}
+
+/*
+ * Checks whether the string a *s_ starts with a variable expansion "$(".
+ * *string should point to the first unemitted character before *s. If *s_
+ * starts with variable expansion, appends elements to out up to the closing
+ * ")", and adjusts *s_ and *string to point to next character. Returns 1 if s_
+ * starts with a variable, 0 otherwise.
+ */
+
+static int try_parse_variable( char const * * s_, char const * * string,
+ VAR_PARSE_GROUP * out )
+{
+ char const * s = *s_;
+ if ( s[ 0 ] == '$' && s[ 1 ] == '(' )
+ {
+ var_parse_group_maybe_add_constant( out, *string, s );
+ s += 2;
+ var_parse_group_add( out, parse_variable( &s ) );
+ *string = s;
+ *s_ = s;
+ return 1;
+ }
+ if ( s[ 0 ] == '@' && s[ 1 ] == '(' )
+ {
+ int depth = 1;
+ char const * ine;
+ char const * split = 0;
+ var_parse_group_maybe_add_constant( out, *string, s );
+ s += 2;
+ ine = s;
+
+ /* Scan the content of the response file @() section. */
+ while ( *ine && ( depth > 0 ) )
+ {
+ switch ( *ine )
+ {
+ case '(': ++depth; break;
+ case ')': --depth; break;
+ case ':':
+ if ( ( depth == 1 ) && ( ine[ 1 ] == 'E' ) && ( ine[ 2 ] == '='
+ ) )
+ split = ine;
+ break;
+ }
+ ++ine;
+ }
+
+ if ( !split || depth )
+ return 0;
+
+ var_parse_group_add( out, parse_at_file( s, split, ine - 1 ) );
+ *string = ine;
+ *s_ = ine;
+ return 1;
+ }
+ return 0;
+}
+
+
+static char const * current_file = "";
+static int current_line;
+
+static void parse_error( char const * message )
+{
+ printf( "%s:%d: %s\n", current_file, current_line, message );
+}
+
+
+/*
+ * Parses a single variable up to the closing ")" and adjusts *string to point
+ * to the next character. *string should point to the character immediately
+ * after the initial "$(".
+ */
+
+static VAR_PARSE * parse_variable( char const * * string )
+{
+ VAR_PARSE_VAR * const result = var_parse_var_new();
+ VAR_PARSE_GROUP * const name = result->name;
+ char const * s = *string;
+ for ( ; ; )
+ {
+ if ( try_parse_variable( &s, string, name ) ) {}
+ else if ( s[ 0 ] == ':' )
+ {
+ VAR_PARSE_GROUP * mod;
+ var_parse_group_maybe_add_constant( name, *string, s );
+ ++s;
+ *string = s;
+ mod = var_parse_var_new_modifier( result );
+ for ( ; ; )
+ {
+ if ( try_parse_variable( &s, string, mod ) ) {}
+ else if ( s[ 0 ] == ')' )
+ {
+ var_parse_group_maybe_add_constant( mod, *string, s );
+ *string = ++s;
+ return (VAR_PARSE *)result;
+ }
+ else if ( s[ 0 ] == '(' )
+ {
+ ++s;
+ balance_parentheses( &s, string, mod );
+ }
+ else if ( s[ 0 ] == ':' )
+ {
+ var_parse_group_maybe_add_constant( mod, *string, s );
+ *string = ++s;
+ mod = var_parse_var_new_modifier( result );
+ }
+ else if ( s[ 0 ] == '[' )
+ {
+ parse_error("unexpected subscript");
+ ++s;
+ }
+ else if ( s[ 0 ] == '\0' )
+ {
+ parse_error( "unbalanced parentheses" );
+ var_parse_group_maybe_add_constant( mod, *string, s );
+ *string = s;
+ return (VAR_PARSE *)result;
+ }
+ else
+ ++s;
+ }
+ }
+ else if ( s[ 0 ] == '[' )
+ {
+ VAR_PARSE_GROUP * subscript = var_parse_group_new();
+ result->subscript = subscript;
+ var_parse_group_maybe_add_constant( name, *string, s );
+ *string = ++s;
+ for ( ; ; )
+ {
+ if ( try_parse_variable( &s, string, subscript ) ) {}
+ else if ( s[ 0 ] == ']' )
+ {
+ var_parse_group_maybe_add_constant( subscript, *string, s );
+ *string = ++s;
+ if ( s[ 0 ] != ')' && s[ 0 ] != ':' && s[ 0 ] != '\0' )
+ parse_error( "unexpected text following []" );
+ break;
+ }
+ else if ( isdigit( s[ 0 ] ) || s[ 0 ] == '-' )
+ {
+ ++s;
+ }
+ else if ( s[ 0 ] == '\0' )
+ {
+ parse_error( "malformed subscript" );
+ break;
+ }
+ else
+ {
+ parse_error( "malformed subscript" );
+ ++s;
+ }
+ }
+ }
+ else if ( s[ 0 ] == ')' )
+ {
+ var_parse_group_maybe_add_constant( name, *string, s );
+ *string = ++s;
+ return (VAR_PARSE *)result;
+ }
+ else if ( s[ 0 ] == '(' )
+ {
+ ++s;
+ balance_parentheses( &s, string, name );
+ }
+ else if ( s[ 0 ] == '\0' )
+ {
+ parse_error( "unbalanced parentheses" );
+ var_parse_group_maybe_add_constant( name, *string, s );
+ *string = s;
+ return (VAR_PARSE *)result;
+ }
+ else
+ ++s;
+ }
+}
+
+static void parse_var_string( char const * first, char const * last,
+ struct dynamic_array * out )
+{
+ char const * saved = first;
+ while ( first != last )
+ {
+ /* Handle whitespace. */
+ while ( first != last && isspace( *first ) ) ++first;
+ if ( saved != first )
+ {
+ VAR_PARSE_GROUP * const group = var_parse_group_new();
+ var_parse_group_maybe_add_constant( group, saved, first );
+ saved = first;
+ dynamic_array_push( out, group );
+ }
+ if ( first == last ) break;
+
+ /* Handle non-whitespace */
+ {
+ VAR_PARSE_GROUP * group = var_parse_group_new();
+ for ( ; ; )
+ {
+ if ( first == last || isspace( *first ) )
+ {
+ var_parse_group_maybe_add_constant( group, saved, first );
+ saved = first;
+ break;
+ }
+ if ( try_parse_variable( &first, &saved, group ) )
+ assert( first <= last );
+ else
+ ++first;
+ }
+ dynamic_array_push( out, group );
+ }
+ }
+}
+
+/*
+ * start should point to the character immediately following the opening "@(",
+ * mid should point to the ":E=", and end should point to the closing ")".
+ */
+
+static VAR_PARSE * parse_at_file( char const * start, char const * mid,
+ char const * end )
+{
+ VAR_PARSE_FILE * result = var_parse_file_new();
+ parse_var_string( start, mid, result->filename );
+ parse_var_string( mid + 3, end, result->contents );
+ return (VAR_PARSE *)result;
+}
+
+/*
+ * Given that *s_ points to the character after a "(", parses up to the matching
+ * ")". *string should point to the first unemitted character before *s_.
+ *
+ * When the function returns, *s_ will point to the character after the ")", and
+ * *string will point to the first unemitted character before *s_. The range
+ * from *string to *s_ does not contain any variables that need to be expanded.
+ */
+
+void balance_parentheses( char const * * s_, char const * * string,
+ VAR_PARSE_GROUP * out)
+{
+ int depth = 1;
+ char const * s = *s_;
+ for ( ; ; )
+ {
+ if ( try_parse_variable( &s, string, out ) ) { }
+ else if ( s[ 0 ] == ':' || s[ 0 ] == '[' )
+ {
+ parse_error( "unbalanced parentheses" );
+ ++s;
+ }
+ else if ( s[ 0 ] == '\0' )
+ {
+ parse_error( "unbalanced parentheses" );
+ break;
+ }
+ else if ( s[ 0 ] == ')' )
+ {
+ ++s;
+ if ( --depth == 0 ) break;
+ }
+ else if ( s[ 0 ] == '(' )
+ {
+ ++depth;
+ ++s;
+ }
+ else
+ ++s;
+ }
+ *s_ = s;
+}
+
+
+/*
+ * Main compile.
+ */
+
+#define RESULT_STACK 0
+#define RESULT_RETURN 1
+#define RESULT_NONE 2
+
+static void compile_parse( PARSE * parse, compiler * c, int result_location );
+static struct arg_list * arg_list_compile( PARSE * parse, int * num_arguments );
+
+static void compile_condition( PARSE * parse, compiler * c, int branch_true, int label )
+{
+ assert( parse->type == PARSE_EVAL );
+ switch ( parse->num )
+ {
+ case EXPR_EXISTS:
+ compile_parse( parse->left, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_NOT_EMPTY, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_EMPTY, label );
+ break;
+
+ case EXPR_EQUALS:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_EQ, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_NE, label );
+ break;
+
+ case EXPR_NOTEQ:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_NE, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_EQ, label );
+ break;
+
+ case EXPR_LESS:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_LT, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_GE, label );
+ break;
+
+ case EXPR_LESSEQ:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_LE, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_GT, label );
+ break;
+
+ case EXPR_MORE:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_GT, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_LE, label );
+ break;
+
+ case EXPR_MOREEQ:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_GE, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_LT, label );
+ break;
+
+ case EXPR_IN:
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( branch_true )
+ compile_emit_branch( c, INSTR_JUMP_IN, label );
+ else
+ compile_emit_branch( c, INSTR_JUMP_NOT_IN, label );
+ break;
+
+ case EXPR_AND:
+ if ( branch_true )
+ {
+ int f = compile_new_label( c );
+ compile_condition( parse->left, c, 0, f );
+ compile_condition( parse->right, c, 1, label );
+ compile_set_label( c, f );
+ }
+ else
+ {
+ compile_condition( parse->left, c, 0, label );
+ compile_condition( parse->right, c, 0, label );
+ }
+ break;
+
+ case EXPR_OR:
+ if ( branch_true )
+ {
+ compile_condition( parse->left, c, 1, label );
+ compile_condition( parse->right, c, 1, label );
+ }
+ else
+ {
+ int t = compile_new_label( c );
+ compile_condition( parse->left, c, 1, t );
+ compile_condition( parse->right, c, 0, label );
+ compile_set_label( c, t );
+ }
+ break;
+
+ case EXPR_NOT:
+ compile_condition( parse->left, c, !branch_true, label );
+ break;
+ }
+}
+
+static void adjust_result( compiler * c, int actual_location,
+ int desired_location )
+{
+ if ( actual_location == desired_location )
+ ;
+ else if ( actual_location == RESULT_STACK && desired_location == RESULT_RETURN )
+ compile_emit( c, INSTR_SET_RESULT, 0 );
+ else if ( actual_location == RESULT_STACK && desired_location == RESULT_NONE )
+ compile_emit( c, INSTR_POP, 0 );
+ else if ( actual_location == RESULT_RETURN && desired_location == RESULT_STACK )
+ compile_emit( c, INSTR_PUSH_RESULT, 0 );
+ else if ( actual_location == RESULT_RETURN && desired_location == RESULT_NONE )
+ ;
+ else if ( actual_location == RESULT_NONE && desired_location == RESULT_STACK )
+ compile_emit( c, INSTR_PUSH_EMPTY, 0 );
+ else if ( actual_location == RESULT_NONE && desired_location == RESULT_RETURN )
+ {
+ compile_emit( c, INSTR_PUSH_EMPTY, 0 );
+ compile_emit( c, INSTR_SET_RESULT, 0 );
+ }
+ else
+ assert( !"invalid result location" );
+}
+
+static char const * parse_type( PARSE * parse )
+{
+ switch ( parse->type )
+ {
+ case PARSE_APPEND: return "append";
+ case PARSE_EVAL: return "eval";
+ case PARSE_RULES: return "rules";
+ default: return "unknown";
+ }
+}
+
+static void compile_append_chain( PARSE * parse, compiler * c )
+{
+ assert( parse->type == PARSE_APPEND );
+ if ( parse->left->type == PARSE_NULL )
+ compile_parse( parse->right, c, RESULT_STACK );
+ else
+ {
+ if ( parse->left->type == PARSE_APPEND )
+ compile_append_chain( parse->left, c );
+ else
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ compile_emit( c, INSTR_PUSH_APPEND, 0 );
+ }
+}
+
+static void compile_parse( PARSE * parse, compiler * c, int result_location )
+{
+ if ( parse->type == PARSE_APPEND )
+ {
+ compile_append_chain( parse, c );
+ adjust_result( c, RESULT_STACK, result_location );
+ }
+ else if ( parse->type == PARSE_EVAL )
+ {
+ /* FIXME: This is only needed because of the bizarre parsing of
+ * conditions.
+ */
+ if ( parse->num == EXPR_EXISTS )
+ compile_parse( parse->left, c, result_location );
+ else
+ {
+ int f = compile_new_label( c );
+ int end = compile_new_label( c );
+
+ printf( "%s:%d: Conditional used as list (check operator "
+ "precedence).\n", object_str( parse->file ), parse->line );
+
+ /* Emit the condition */
+ compile_condition( parse, c, 0, f );
+ compile_emit( c, INSTR_PUSH_CONSTANT, compile_emit_constant( c,
+ constant_true ) );
+ compile_emit_branch( c, INSTR_JUMP, end );
+ compile_set_label( c, f );
+ compile_emit( c, INSTR_PUSH_EMPTY, 0 );
+ compile_set_label( c, end );
+ adjust_result( c, RESULT_STACK, result_location );
+ }
+ }
+ else if ( parse->type == PARSE_FOREACH )
+ {
+ int var = compile_emit_constant( c, parse->string );
+ int top = compile_new_label( c );
+ int end = compile_new_label( c );
+
+ /*
+ * Evaluate the list.
+ */
+ compile_parse( parse->left, c, RESULT_STACK );
+
+ /* Localize the loop variable */
+ if ( parse->num )
+ {
+ compile_emit( c, INSTR_PUSH_EMPTY, 0 );
+ compile_emit( c, INSTR_PUSH_LOCAL, var );
+ compile_emit( c, INSTR_SWAP, 1 );
+ }
+
+ compile_emit( c, INSTR_FOR_INIT, 0 );
+ compile_set_label( c, top );
+ compile_emit_branch( c, INSTR_FOR_LOOP, end );
+ compile_emit( c, INSTR_SET, var );
+
+ /* Run the loop body */
+ compile_parse( parse->right, c, RESULT_NONE );
+
+ compile_emit_branch( c, INSTR_JUMP, top );
+ compile_set_label( c, end );
+
+ if ( parse->num )
+ compile_emit( c, INSTR_POP_LOCAL, var );
+
+ adjust_result( c, RESULT_NONE, result_location);
+ }
+ else if ( parse->type == PARSE_IF )
+ {
+ int f = compile_new_label( c );
+ /* Emit the condition */
+ compile_condition( parse->left, c, 0, f );
+ /* Emit the if block */
+ compile_parse( parse->right, c, result_location );
+ if ( parse->third->type != PARSE_NULL || result_location != RESULT_NONE )
+ {
+ /* Emit the else block */
+ int end = compile_new_label( c );
+ compile_emit_branch( c, INSTR_JUMP, end );
+ compile_set_label( c, f );
+ compile_parse( parse->third, c, result_location );
+ compile_set_label( c, end );
+ }
+ else
+ compile_set_label( c, f );
+
+ }
+ else if ( parse->type == PARSE_WHILE )
+ {
+ int nested_result = result_location == RESULT_NONE
+ ? RESULT_NONE
+ : RESULT_RETURN;
+ int test = compile_new_label( c );
+ int top = compile_new_label( c );
+ /* Make sure that we return an empty list if the loop runs zero times.
+ */
+ adjust_result( c, RESULT_NONE, nested_result );
+ /* Jump to the loop test. */
+ compile_emit_branch( c, INSTR_JUMP, test );
+ compile_set_label( c, top );
+ /* Emit the loop body. */
+ compile_parse( parse->right, c, nested_result );
+ /* Emit the condition. */
+ compile_set_label( c, test );
+ compile_condition( parse->left, c, 1, top );
+
+ adjust_result( c, nested_result, result_location );
+ }
+ else if ( parse->type == PARSE_INCLUDE )
+ {
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_emit( c, INSTR_INCLUDE, 0 );
+ compile_emit( c, INSTR_BIND_MODULE_VARIABLES, 0 );
+ adjust_result( c, RESULT_NONE, result_location );
+ }
+ else if ( parse->type == PARSE_MODULE )
+ {
+ int const nested_result = result_location == RESULT_NONE
+ ? RESULT_NONE
+ : RESULT_RETURN;
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_emit( c, INSTR_PUSH_MODULE, 0 );
+ compile_parse( parse->right, c, nested_result );
+ compile_emit( c, INSTR_POP_MODULE, 0 );
+ adjust_result( c, nested_result, result_location );
+ }
+ else if ( parse->type == PARSE_CLASS )
+ {
+ /* Evaluate the class name. */
+ compile_parse( parse->left->right, c, RESULT_STACK );
+ /* Evaluate the base classes. */
+ if ( parse->left->left )
+ compile_parse( parse->left->left->right, c, RESULT_STACK );
+ else
+ compile_emit( c, INSTR_PUSH_EMPTY, 0 );
+ compile_emit( c, INSTR_CLASS, 0 );
+ compile_parse( parse->right, c, RESULT_NONE );
+ compile_emit( c, INSTR_BIND_MODULE_VARIABLES, 0 );
+ compile_emit( c, INSTR_POP_MODULE, 0 );
+
+ adjust_result( c, RESULT_NONE, result_location );
+ }
+ else if ( parse->type == PARSE_LIST )
+ {
+ OBJECT * const o = parse->string;
+ char const * s = object_str( o );
+ VAR_PARSE_GROUP * group;
+ current_file = object_str( parse->file );
+ current_line = parse->line;
+ group = parse_expansion( &s );
+ var_parse_group_compile( group, c );
+ var_parse_group_free( group );
+ adjust_result( c, RESULT_STACK, result_location );
+ }
+ else if ( parse->type == PARSE_LOCAL )
+ {
+ int nested_result = result_location == RESULT_NONE
+ ? RESULT_NONE
+ : RESULT_RETURN;
+ /* This should be left recursive group of compile_appends. */
+ PARSE * vars = parse->left;
+
+ /* Special case an empty list of vars */
+ if ( vars->type == PARSE_NULL )
+ {
+ compile_parse( parse->right, c, RESULT_NONE );
+ compile_parse( parse->third, c, result_location );
+ nested_result = result_location;
+ }
+ /* Check whether there is exactly one variable with a constant name. */
+ else if ( vars->left->type == PARSE_NULL &&
+ vars->right->type == PARSE_LIST )
+ {
+ char const * s = object_str( vars->right->string );
+ VAR_PARSE_GROUP * group;
+ current_file = object_str( parse->file );
+ current_line = parse->line;
+ group = parse_expansion( &s );
+ if ( group->elems->size == 1 && dynamic_array_at( VAR_PARSE *,
+ group->elems, 0 )->type == VAR_PARSE_TYPE_STRING )
+ {
+ int const name = compile_emit_constant( c, (
+ (VAR_PARSE_STRING *)dynamic_array_at( VAR_PARSE *,
+ group->elems, 0 ) )->s );
+ var_parse_group_free( group );
+ compile_parse( parse->right, c, RESULT_STACK );
+ compile_emit( c, INSTR_PUSH_LOCAL, name );
+ compile_parse( parse->third, c, nested_result );
+ compile_emit( c, INSTR_POP_LOCAL, name );
+ }
+ else
+ {
+ var_parse_group_compile( group, c );
+ var_parse_group_free( group );
+ compile_parse( parse->right, c, RESULT_STACK );
+ compile_emit( c, INSTR_PUSH_LOCAL_GROUP, 0 );
+ compile_parse( parse->third, c, nested_result );
+ compile_emit( c, INSTR_POP_LOCAL_GROUP, 0 );
+ }
+ }
+ else
+ {
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ compile_emit( c, INSTR_PUSH_LOCAL_GROUP, 0 );
+ compile_parse( parse->third, c, nested_result );
+ compile_emit( c, INSTR_POP_LOCAL_GROUP, 0 );
+ }
+ adjust_result( c, nested_result, result_location );
+ }
+ else if ( parse->type == PARSE_ON )
+ {
+ if ( parse->right->type == PARSE_APPEND &&
+ parse->right->left->type == PARSE_NULL &&
+ parse->right->right->type == PARSE_LIST )
+ {
+ /* [ on $(target) return $(variable) ] */
+ PARSE * value = parse->right->right;
+ OBJECT * const o = value->string;
+ char const * s = object_str( o );
+ VAR_PARSE_GROUP * group;
+ OBJECT * varname = 0;
+ current_file = object_str( value->file );
+ current_line = value->line;
+ group = parse_expansion( &s );
+ if ( group->elems->size == 1 )
+ {
+ VAR_PARSE * one = dynamic_array_at( VAR_PARSE *, group->elems, 0 );
+ if ( one->type == VAR_PARSE_TYPE_VAR )
+ {
+ VAR_PARSE_VAR * var = ( VAR_PARSE_VAR * )one;
+ if ( var->modifiers->size == 0 && !var->subscript && var->name->elems->size == 1 )
+ {
+ VAR_PARSE * name = dynamic_array_at( VAR_PARSE *, var->name->elems, 0 );
+ if ( name->type == VAR_PARSE_TYPE_STRING )
+ {
+ varname = ( ( VAR_PARSE_STRING * )name )->s;
+ }
+ }
+ }
+ }
+ if ( varname )
+ {
+ /* We have one variable with a fixed name and no modifiers. */
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_emit( c, INSTR_GET_ON, compile_emit_constant( c, varname ) );
+ }
+ else
+ {
+ /* Too complex. Fall back on push/pop. */
+ int end = compile_new_label( c );
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_emit_branch( c, INSTR_PUSH_ON, end );
+ var_parse_group_compile( group, c );
+ compile_emit( c, INSTR_POP_ON, 0 );
+ compile_set_label( c, end );
+ }
+ var_parse_group_free( group );
+ }
+ else
+ {
+ int end = compile_new_label( c );
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_emit_branch( c, INSTR_PUSH_ON, end );
+ compile_parse( parse->right, c, RESULT_STACK );
+ compile_emit( c, INSTR_POP_ON, 0 );
+ compile_set_label( c, end );
+ }
+ adjust_result( c, RESULT_STACK, result_location );
+ }
+ else if ( parse->type == PARSE_RULE )
+ {
+ PARSE * p;
+ int n = 0;
+ VAR_PARSE_GROUP * group;
+ char const * s = object_str( parse->string );
+
+ if ( parse->left->left || parse->left->right->type != PARSE_NULL )
+ for ( p = parse->left; p; p = p->left )
+ {
+ compile_parse( p->right, c, RESULT_STACK );
+ ++n;
+ }
+
+ current_file = object_str( parse->file );
+ current_line = parse->line;
+ group = parse_expansion( &s );
+
+ if ( group->elems->size == 2 &&
+ dynamic_array_at( VAR_PARSE *, group->elems, 0 )->type == VAR_PARSE_TYPE_VAR &&
+ dynamic_array_at( VAR_PARSE *, group->elems, 1 )->type == VAR_PARSE_TYPE_STRING &&
+ ( object_str( ( (VAR_PARSE_STRING *)dynamic_array_at( VAR_PARSE *, group->elems, 1 ) )->s )[ 0 ] == '.' ) )
+ {
+ VAR_PARSE_STRING * access = (VAR_PARSE_STRING *)dynamic_array_at( VAR_PARSE *, group->elems, 1 );
+ OBJECT * member = object_new( object_str( access->s ) + 1 );
+ /* Emit the object */
+ var_parse_var_compile( (VAR_PARSE_VAR *)dynamic_array_at( VAR_PARSE *, group->elems, 0 ), c );
+ var_parse_group_free( group );
+ compile_emit( c, INSTR_CALL_MEMBER_RULE, n );
+ compile_emit( c, compile_emit_constant( c, member ), parse->line );
+ object_free( member );
+ }
+ else
+ {
+ var_parse_group_compile( group, c );
+ var_parse_group_free( group );
+ compile_emit( c, INSTR_CALL_RULE, n );
+ compile_emit( c, compile_emit_constant( c, parse->string ), parse->line );
+ }
+
+ adjust_result( c, RESULT_STACK, result_location );
+ }
+ else if ( parse->type == PARSE_RULES )
+ {
+ do compile_parse( parse->left, c, RESULT_NONE );
+ while ( ( parse = parse->right )->type == PARSE_RULES );
+ compile_parse( parse, c, result_location );
+ }
+ else if ( parse->type == PARSE_SET )
+ {
+ PARSE * vars = parse->left;
+ unsigned int op_code;
+ unsigned int op_code_group;
+
+ switch ( parse->num )
+ {
+ case ASSIGN_APPEND: op_code = INSTR_APPEND; op_code_group = INSTR_APPEND_GROUP; break;
+ case ASSIGN_DEFAULT: op_code = INSTR_DEFAULT; op_code_group = INSTR_DEFAULT_GROUP; break;
+ default: op_code = INSTR_SET; op_code_group = INSTR_SET_GROUP; break;
+ }
+
+ /* Check whether there is exactly one variable with a constant name. */
+ if ( vars->type == PARSE_LIST )
+ {
+ char const * s = object_str( vars->string );
+ VAR_PARSE_GROUP * group;
+ current_file = object_str( parse->file );
+ current_line = parse->line;
+ group = parse_expansion( &s );
+ if ( group->elems->size == 1 && dynamic_array_at( VAR_PARSE *,
+ group->elems, 0 )->type == VAR_PARSE_TYPE_STRING )
+ {
+ int const name = compile_emit_constant( c, (
+ (VAR_PARSE_STRING *)dynamic_array_at( VAR_PARSE *,
+ group->elems, 0 ) )->s );
+ var_parse_group_free( group );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( result_location != RESULT_NONE )
+ {
+ compile_emit( c, INSTR_SET_RESULT, 1 );
+ }
+ compile_emit( c, op_code, name );
+ }
+ else
+ {
+ var_parse_group_compile( group, c );
+ var_parse_group_free( group );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( result_location != RESULT_NONE )
+ {
+ compile_emit( c, INSTR_SET_RESULT, 1 );
+ }
+ compile_emit( c, op_code_group, 0 );
+ }
+ }
+ else
+ {
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+ if ( result_location != RESULT_NONE )
+ {
+ compile_emit( c, INSTR_SET_RESULT, 1 );
+ }
+ compile_emit( c, op_code_group, 0 );
+ }
+ if ( result_location != RESULT_NONE )
+ {
+ adjust_result( c, RESULT_RETURN, result_location );
+ }
+ }
+ else if ( parse->type == PARSE_SETCOMP )
+ {
+ int n_args;
+ struct arg_list * args = arg_list_compile( parse->right, &n_args );
+ int const rule_id = compile_emit_rule( c, parse->string, parse->left,
+ n_args, args, parse->num );
+ compile_emit( c, INSTR_RULE, rule_id );
+ adjust_result( c, RESULT_NONE, result_location );
+ }
+ else if ( parse->type == PARSE_SETEXEC )
+ {
+ int const actions_id = compile_emit_actions( c, parse );
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_emit( c, INSTR_ACTIONS, actions_id );
+ adjust_result( c, RESULT_NONE, result_location );
+ }
+ else if ( parse->type == PARSE_SETTINGS )
+ {
+ compile_parse( parse->left, c, RESULT_STACK );
+ compile_parse( parse->third, c, RESULT_STACK );
+ compile_parse( parse->right, c, RESULT_STACK );
+
+ switch ( parse->num )
+ {
+ case ASSIGN_APPEND: compile_emit( c, INSTR_APPEND_ON, 0 ); break;
+ case ASSIGN_DEFAULT: compile_emit( c, INSTR_DEFAULT_ON, 0 ); break;
+ default: compile_emit( c, INSTR_SET_ON, 0 ); break;
+ }
+
+ adjust_result( c, RESULT_STACK, result_location );
+ }
+ else if ( parse->type == PARSE_SWITCH )
+ {
+ int const switch_end = compile_new_label( c );
+ compile_parse( parse->left, c, RESULT_STACK );
+
+ for ( parse = parse->right; parse; parse = parse->right )
+ {
+ int const id = compile_emit_constant( c, parse->left->string );
+ int const next_case = compile_new_label( c );
+ compile_emit( c, INSTR_PUSH_CONSTANT, id );
+ compile_emit_branch( c, INSTR_JUMP_NOT_GLOB, next_case );
+ compile_parse( parse->left->left, c, result_location );
+ compile_emit_branch( c, INSTR_JUMP, switch_end );
+ compile_set_label( c, next_case );
+ }
+ compile_emit( c, INSTR_POP, 0 );
+ adjust_result( c, RESULT_NONE, result_location );
+ compile_set_label( c, switch_end );
+ }
+ else if ( parse->type == PARSE_NULL )
+ adjust_result( c, RESULT_NONE, result_location );
+ else
+ assert( !"unknown PARSE type." );
+}
+
+OBJECT * function_rulename( FUNCTION * function )
+{
+ return function->rulename;
+}
+
+void function_set_rulename( FUNCTION * function, OBJECT * rulename )
+{
+ function->rulename = rulename;
+}
+
+void function_location( FUNCTION * function_, OBJECT * * file, int * line )
+{
+ if ( function_->type == FUNCTION_BUILTIN )
+ {
+ *file = constant_builtin;
+ *line = -1;
+ }
+#ifdef HAVE_PYTHON
+ if ( function_->type == FUNCTION_PYTHON )
+ {
+ *file = constant_builtin;
+ *line = -1;
+ }
+#endif
+ else
+ {
+ JAM_FUNCTION * function = (JAM_FUNCTION *)function_;
+ assert( function_->type == FUNCTION_JAM );
+ *file = function->file;
+ *line = function->line;
+ }
+}
+
+static struct arg_list * arg_list_compile_builtin( char const * * args,
+ int * num_arguments );
+
+FUNCTION * function_builtin( LIST * ( * func )( FRAME * frame, int flags ),
+ int flags, char const * * args )
+{
+ BUILTIN_FUNCTION * result = BJAM_MALLOC( sizeof( BUILTIN_FUNCTION ) );
+ result->base.type = FUNCTION_BUILTIN;
+ result->base.reference_count = 1;
+ result->base.rulename = 0;
+ result->base.formal_arguments = arg_list_compile_builtin( args,
+ &result->base.num_formal_arguments );
+ result->func = func;
+ result->flags = flags;
+ return (FUNCTION *)result;
+}
+
+FUNCTION * function_compile( PARSE * parse )
+{
+ compiler c[ 1 ];
+ JAM_FUNCTION * result;
+ compiler_init( c );
+ compile_parse( parse, c, RESULT_RETURN );
+ compile_emit( c, INSTR_RETURN, 0 );
+ result = compile_to_function( c );
+ compiler_free( c );
+ result->file = object_copy( parse->file );
+ result->line = parse->line;
+ return (FUNCTION *)result;
+}
+
+FUNCTION * function_compile_actions( char const * actions, OBJECT * file,
+ int line )
+{
+ compiler c[ 1 ];
+ JAM_FUNCTION * result;
+ VAR_PARSE_ACTIONS * parse;
+ current_file = object_str( file );
+ current_line = line;
+ parse = parse_actions( actions );
+ compiler_init( c );
+ var_parse_actions_compile( parse, c );
+ var_parse_actions_free( parse );
+ compile_emit( c, INSTR_RETURN, 0 );
+ result = compile_to_function( c );
+ compiler_free( c );
+ result->file = object_copy( file );
+ result->line = line;
+ return (FUNCTION *)result;
+}
+
+static void argument_list_print( struct arg_list * args, int num_args );
+
+
+/* Define delimiters for type check elements in argument lists (and return type
+ * specifications, eventually).
+ */
+# define TYPE_OPEN_DELIM '['
+# define TYPE_CLOSE_DELIM ']'
+
+/*
+ * is_type_name() - true iff the given string represents a type check
+ * specification.
+ */
+
+int is_type_name( char const * s )
+{
+ return s[ 0 ] == TYPE_OPEN_DELIM && s[ strlen( s ) - 1 ] ==
+ TYPE_CLOSE_DELIM;
+}
+
+static void argument_error( char const * message, FUNCTION * procedure,
+ FRAME * frame, OBJECT * arg )
+{
+ extern void print_source_line( FRAME * );
+ LOL * actual = frame->args;
+ backtrace_line( frame->prev );
+ printf( "*** argument error\n* rule %s ( ", frame->rulename );
+ argument_list_print( procedure->formal_arguments,
+ procedure->num_formal_arguments );
+ printf( " )\n* called with: ( " );
+ lol_print( actual );
+ printf( " )\n* %s %s\n", message, arg ? object_str ( arg ) : "" );
+ function_location( procedure, &frame->file, &frame->line );
+ print_source_line( frame );
+ printf( "see definition of rule '%s' being called\n", frame->rulename );
+ backtrace( frame->prev );
+ exit( 1 );
+}
+
+static void type_check_range( OBJECT * type_name, LISTITER iter, LISTITER end,
+ FRAME * caller, FUNCTION * called, OBJECT * arg_name )
+{
+ static module_t * typecheck = 0;
+
+ /* If nothing to check, bail now. */
+ if ( iter == end || !type_name )
+ return;
+
+ if ( !typecheck )
+ typecheck = bindmodule( constant_typecheck );
+
+ /* If the checking rule can not be found, also bail. */
+ if ( !typecheck->rules || !hash_find( typecheck->rules, type_name ) )
+ return;
+
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ LIST * error;
+ FRAME frame[ 1 ];
+ frame_init( frame );
+ frame->module = typecheck;
+ frame->prev = caller;
+ frame->prev_user = caller->module->user_module
+ ? caller
+ : caller->prev_user;
+
+ /* Prepare the argument list */
+ lol_add( frame->args, list_new( object_copy( list_item( iter ) ) ) );
+ error = evaluate_rule( bindrule( type_name, frame->module ), type_name, frame );
+
+ if ( !list_empty( error ) )
+ argument_error( object_str( list_front( error ) ), called, caller,
+ arg_name );
+
+ frame_free( frame );
+ }
+}
+
+static void type_check( OBJECT * type_name, LIST * values, FRAME * caller,
+ FUNCTION * called, OBJECT * arg_name )
+{
+ type_check_range( type_name, list_begin( values ), list_end( values ),
+ caller, called, arg_name );
+}
+
+void argument_list_check( struct arg_list * formal, int formal_count,
+ FUNCTION * function, FRAME * frame )
+{
+ LOL * all_actual = frame->args;
+ int i;
+
+ for ( i = 0; i < formal_count; ++i )
+ {
+ LIST * actual = lol_get( all_actual, i );
+ LISTITER actual_iter = list_begin( actual );
+ LISTITER const actual_end = list_end( actual );
+ int j;
+ for ( j = 0; j < formal[ i ].size; ++j )
+ {
+ struct argument * formal_arg = &formal[ i ].args[ j ];
+ LIST * value;
+
+ switch ( formal_arg->flags )
+ {
+ case ARG_ONE:
+ if ( actual_iter == actual_end )
+ argument_error( "missing argument", function, frame,
+ formal_arg->arg_name );
+ type_check_range( formal_arg->type_name, actual_iter,
+ list_next( actual_iter ), frame, function,
+ formal_arg->arg_name );
+ actual_iter = list_next( actual_iter );
+ break;
+ case ARG_OPTIONAL:
+ if ( actual_iter == actual_end )
+ value = L0;
+ else
+ {
+ type_check_range( formal_arg->type_name, actual_iter,
+ list_next( actual_iter ), frame, function,
+ formal_arg->arg_name );
+ actual_iter = list_next( actual_iter );
+ }
+ break;
+ case ARG_PLUS:
+ if ( actual_iter == actual_end )
+ argument_error( "missing argument", function, frame,
+ formal_arg->arg_name );
+ /* fallthrough */
+ case ARG_STAR:
+ type_check_range( formal_arg->type_name, actual_iter,
+ actual_end, frame, function, formal_arg->arg_name );
+ actual_iter = actual_end;
+ break;
+ case ARG_VARIADIC:
+ return;
+ }
+ }
+
+ if ( actual_iter != actual_end )
+ argument_error( "extra argument", function, frame, list_item(
+ actual_iter ) );
+ }
+
+ for ( ; i < all_actual->count; ++i )
+ {
+ LIST * actual = lol_get( all_actual, i );
+ if ( !list_empty( actual ) )
+ argument_error( "extra argument", function, frame, list_front(
+ actual ) );
+ }
+}
+
+void argument_list_push( struct arg_list * formal, int formal_count,
+ FUNCTION * function, FRAME * frame, STACK * s )
+{
+ LOL * all_actual = frame->args;
+ int i;
+
+ for ( i = 0; i < formal_count; ++i )
+ {
+ LIST * actual = lol_get( all_actual, i );
+ LISTITER actual_iter = list_begin( actual );
+ LISTITER const actual_end = list_end( actual );
+ int j;
+ for ( j = 0; j < formal[ i ].size; ++j )
+ {
+ struct argument * formal_arg = &formal[ i ].args[ j ];
+ LIST * value;
+
+ switch ( formal_arg->flags )
+ {
+ case ARG_ONE:
+ if ( actual_iter == actual_end )
+ argument_error( "missing argument", function, frame,
+ formal_arg->arg_name );
+ value = list_new( object_copy( list_item( actual_iter ) ) );
+ actual_iter = list_next( actual_iter );
+ break;
+ case ARG_OPTIONAL:
+ if ( actual_iter == actual_end )
+ value = L0;
+ else
+ {
+ value = list_new( object_copy( list_item( actual_iter ) ) );
+ actual_iter = list_next( actual_iter );
+ }
+ break;
+ case ARG_PLUS:
+ if ( actual_iter == actual_end )
+ argument_error( "missing argument", function, frame,
+ formal_arg->arg_name );
+ /* fallthrough */
+ case ARG_STAR:
+ value = list_copy_range( actual, actual_iter, actual_end );
+ actual_iter = actual_end;
+ break;
+ case ARG_VARIADIC:
+ return;
+ }
+
+ type_check( formal_arg->type_name, value, frame, function,
+ formal_arg->arg_name );
+
+ if ( formal_arg->index != -1 )
+ {
+ LIST * * const old = &frame->module->fixed_variables[
+ formal_arg->index ];
+ stack_push( s, *old );
+ *old = value;
+ }
+ else
+ stack_push( s, var_swap( frame->module, formal_arg->arg_name,
+ value ) );
+ }
+
+ if ( actual_iter != actual_end )
+ argument_error( "extra argument", function, frame, list_item(
+ actual_iter ) );
+ }
+
+ for ( ; i < all_actual->count; ++i )
+ {
+ LIST * const actual = lol_get( all_actual, i );
+ if ( !list_empty( actual ) )
+ argument_error( "extra argument", function, frame, list_front(
+ actual ) );
+ }
+}
+
+void argument_list_pop( struct arg_list * formal, int formal_count,
+ FRAME * frame, STACK * s )
+{
+ int i;
+ for ( i = formal_count - 1; i >= 0; --i )
+ {
+ int j;
+ for ( j = formal[ i ].size - 1; j >= 0 ; --j )
+ {
+ struct argument * formal_arg = &formal[ i ].args[ j ];
+
+ if ( formal_arg->flags == ARG_VARIADIC )
+ continue;
+ if ( formal_arg->index != -1 )
+ {
+ LIST * const old = stack_pop( s );
+ LIST * * const pos = &frame->module->fixed_variables[
+ formal_arg->index ];
+ list_free( *pos );
+ *pos = old;
+ }
+ else
+ var_set( frame->module, formal_arg->arg_name, stack_pop( s ),
+ VAR_SET );
+ }
+ }
+}
+
+
+struct argument_compiler
+{
+ struct dynamic_array args[ 1 ];
+ struct argument arg;
+ int state;
+#define ARGUMENT_COMPILER_START 0
+#define ARGUMENT_COMPILER_FOUND_TYPE 1
+#define ARGUMENT_COMPILER_FOUND_OBJECT 2
+#define ARGUMENT_COMPILER_DONE 3
+};
+
+
+static void argument_compiler_init( struct argument_compiler * c )
+{
+ dynamic_array_init( c->args );
+ c->state = ARGUMENT_COMPILER_START;
+}
+
+static void argument_compiler_free( struct argument_compiler * c )
+{
+ dynamic_array_free( c->args );
+}
+
+static void argument_compiler_add( struct argument_compiler * c, OBJECT * arg,
+ OBJECT * file, int line )
+{
+ switch ( c->state )
+ {
+ case ARGUMENT_COMPILER_FOUND_OBJECT:
+
+ if ( object_equal( arg, constant_question_mark ) )
+ {
+ c->arg.flags = ARG_OPTIONAL;
+ }
+ else if ( object_equal( arg, constant_plus ) )
+ {
+ c->arg.flags = ARG_PLUS;
+ }
+ else if ( object_equal( arg, constant_star ) )
+ {
+ c->arg.flags = ARG_STAR;
+ }
+
+ dynamic_array_push( c->args, c->arg );
+ c->state = ARGUMENT_COMPILER_START;
+
+ if ( c->arg.flags != ARG_ONE )
+ break;
+ /* fall-through */
+
+ case ARGUMENT_COMPILER_START:
+
+ c->arg.type_name = 0;
+ c->arg.index = -1;
+ c->arg.flags = ARG_ONE;
+
+ if ( is_type_name( object_str( arg ) ) )
+ {
+ c->arg.type_name = object_copy( arg );
+ c->state = ARGUMENT_COMPILER_FOUND_TYPE;
+ break;
+ }
+ /* fall-through */
+
+ case ARGUMENT_COMPILER_FOUND_TYPE:
+
+ if ( is_type_name( object_str( arg ) ) )
+ {
+ printf( "%s:%d: missing argument name before type name: %s\n",
+ object_str( file ), line, object_str( arg ) );
+ exit( 1 );
+ }
+
+ c->arg.arg_name = object_copy( arg );
+ if ( object_equal( arg, constant_star ) )
+ {
+ c->arg.flags = ARG_VARIADIC;
+ dynamic_array_push( c->args, c->arg );
+ c->state = ARGUMENT_COMPILER_DONE;
+ }
+ else
+ {
+ c->state = ARGUMENT_COMPILER_FOUND_OBJECT;
+ }
+ break;
+
+ case ARGUMENT_COMPILER_DONE:
+ break;
+ }
+}
+
+static void argument_compiler_recurse( struct argument_compiler * c,
+ PARSE * parse )
+{
+ if ( parse->type == PARSE_APPEND )
+ {
+ argument_compiler_recurse( c, parse->left );
+ argument_compiler_recurse( c, parse->right );
+ }
+ else if ( parse->type != PARSE_NULL )
+ {
+ assert( parse->type == PARSE_LIST );
+ argument_compiler_add( c, parse->string, parse->file, parse->line );
+ }
+}
+
+static struct arg_list arg_compile_impl( struct argument_compiler * c,
+ OBJECT * file, int line )
+{
+ struct arg_list result;
+ switch ( c->state )
+ {
+ case ARGUMENT_COMPILER_START:
+ case ARGUMENT_COMPILER_DONE:
+ break;
+ case ARGUMENT_COMPILER_FOUND_TYPE:
+ printf( "%s:%d: missing argument name after type name: %s\n",
+ object_str( file ), line, object_str( c->arg.type_name ) );
+ exit( 1 );
+ case ARGUMENT_COMPILER_FOUND_OBJECT:
+ dynamic_array_push( c->args, c->arg );
+ break;
+ }
+ result.size = c->args->size;
+ result.args = BJAM_MALLOC( c->args->size * sizeof( struct argument ) );
+ memcpy( result.args, c->args->data, c->args->size * sizeof( struct argument
+ ) );
+ return result;
+}
+
+static struct arg_list arg_compile( PARSE * parse )
+{
+ struct argument_compiler c[ 1 ];
+ struct arg_list result;
+ argument_compiler_init( c );
+ argument_compiler_recurse( c, parse );
+ result = arg_compile_impl( c, parse->file, parse->line );
+ argument_compiler_free( c );
+ return result;
+}
+
+struct argument_list_compiler
+{
+ struct dynamic_array args[ 1 ];
+};
+
+static void argument_list_compiler_init( struct argument_list_compiler * c )
+{
+ dynamic_array_init( c->args );
+}
+
+static void argument_list_compiler_free( struct argument_list_compiler * c )
+{
+ dynamic_array_free( c->args );
+}
+
+static void argument_list_compiler_add( struct argument_list_compiler * c,
+ PARSE * parse )
+{
+ struct arg_list args = arg_compile( parse );
+ dynamic_array_push( c->args, args );
+}
+
+static void argument_list_compiler_recurse( struct argument_list_compiler * c,
+ PARSE * parse )
+{
+ if ( parse )
+ {
+ argument_list_compiler_add( c, parse->right );
+ argument_list_compiler_recurse( c, parse->left );
+ }
+}
+
+static struct arg_list * arg_list_compile( PARSE * parse, int * num_arguments )
+{
+ if ( parse )
+ {
+ struct argument_list_compiler c[ 1 ];
+ struct arg_list * result;
+ argument_list_compiler_init( c );
+ argument_list_compiler_recurse( c, parse );
+ *num_arguments = c->args->size;
+ result = BJAM_MALLOC( c->args->size * sizeof( struct arg_list ) );
+ memcpy( result, c->args->data, c->args->size * sizeof( struct arg_list )
+ );
+ argument_list_compiler_free( c );
+ return result;
+ }
+ *num_arguments = 0;
+ return 0;
+}
+
+static struct arg_list * arg_list_compile_builtin( char const * * args,
+ int * num_arguments )
+{
+ if ( args )
+ {
+ struct argument_list_compiler c[ 1 ];
+ struct arg_list * result;
+ argument_list_compiler_init( c );
+ while ( *args )
+ {
+ struct argument_compiler arg_comp[ 1 ];
+ struct arg_list arg;
+ argument_compiler_init( arg_comp );
+ for ( ; *args; ++args )
+ {
+ OBJECT * token;
+ if ( strcmp( *args, ":" ) == 0 )
+ {
+ ++args;
+ break;
+ }
+ token = object_new( *args );
+ argument_compiler_add( arg_comp, token, constant_builtin, -1 );
+ object_free( token );
+ }
+ arg = arg_compile_impl( arg_comp, constant_builtin, -1 );
+ dynamic_array_push( c->args, arg );
+ argument_compiler_free( arg_comp );
+ }
+ *num_arguments = c->args->size;
+ result = BJAM_MALLOC( c->args->size * sizeof( struct arg_list ) );
+ memcpy( result, c->args->data, c->args->size * sizeof( struct arg_list )
+ );
+ argument_list_compiler_free( c );
+ return result;
+ }
+ *num_arguments = 0;
+ return 0;
+}
+
+static void argument_list_print( struct arg_list * args, int num_args )
+{
+ if ( args )
+ {
+ int i;
+ for ( i = 0; i < num_args; ++i )
+ {
+ int j;
+ if ( i ) printf( " : " );
+ for ( j = 0; j < args[ i ].size; ++j )
+ {
+ struct argument * formal_arg = &args[ i ].args[ j ];
+ if ( j ) printf( " " );
+ if ( formal_arg->type_name )
+ printf( "%s ", object_str( formal_arg->type_name ) );
+ printf( "%s", object_str( formal_arg->arg_name ) );
+ switch ( formal_arg->flags )
+ {
+ case ARG_OPTIONAL: printf( " ?" ); break;
+ case ARG_PLUS: printf( " +" ); break;
+ case ARG_STAR: printf( " *" ); break;
+ }
+ }
+ }
+ }
+}
+
+
+struct arg_list * argument_list_bind_variables( struct arg_list * formal,
+ int formal_count, module_t * module, int * counter )
+{
+ if ( formal )
+ {
+ struct arg_list * result = (struct arg_list *)BJAM_MALLOC( sizeof(
+ struct arg_list ) * formal_count );
+ int i;
+
+ for ( i = 0; i < formal_count; ++i )
+ {
+ int j;
+ struct argument * args = (struct argument *)BJAM_MALLOC( sizeof(
+ struct argument ) * formal[ i ].size );
+ for ( j = 0; j < formal[ i ].size; ++j )
+ {
+ args[ j ] = formal[ i ].args[ j ];
+ if ( args[ j ].type_name )
+ args[ j ].type_name = object_copy( args[ j ].type_name );
+ args[ j ].arg_name = object_copy( args[ j ].arg_name );
+ if ( args[ j ].flags != ARG_VARIADIC )
+ args[ j ].index = module_add_fixed_var( module,
+ args[ j ].arg_name, counter );
+ }
+ result[ i ].args = args;
+ result[ i ].size = formal[ i ].size;
+ }
+
+ return result;
+ }
+ return 0;
+}
+
+
+void argument_list_free( struct arg_list * args, int args_count )
+{
+ int i;
+ for ( i = 0; i < args_count; ++i )
+ {
+ int j;
+ for ( j = 0; j < args[ i ].size; ++j )
+ {
+ if ( args[ i ].args[ j ].type_name )
+ object_free( args[ i ].args[ j ].type_name );
+ object_free( args[ i ].args[ j ].arg_name );
+ }
+ BJAM_FREE( args[ i ].args );
+ }
+ BJAM_FREE( args );
+}
+
+
+FUNCTION * function_unbind_variables( FUNCTION * f )
+{
+ if ( f->type == FUNCTION_JAM )
+ {
+ JAM_FUNCTION * const func = (JAM_FUNCTION *)f;
+ return func->generic ? func->generic : f;
+ }
+#ifdef HAVE_PYTHON
+ if ( f->type == FUNCTION_PYTHON )
+ return f;
+#endif
+ assert( f->type == FUNCTION_BUILTIN );
+ return f;
+}
+
+FUNCTION * function_bind_variables( FUNCTION * f, module_t * module,
+ int * counter )
+{
+ if ( f->type == FUNCTION_BUILTIN )
+ return f;
+#ifdef HAVE_PYTHON
+ if ( f->type == FUNCTION_PYTHON )
+ return f;
+#endif
+ {
+ JAM_FUNCTION * func = (JAM_FUNCTION *)f;
+ JAM_FUNCTION * new_func = BJAM_MALLOC( sizeof( JAM_FUNCTION ) );
+ instruction * code;
+ int i;
+ assert( f->type == FUNCTION_JAM );
+ memcpy( new_func, func, sizeof( JAM_FUNCTION ) );
+ new_func->base.reference_count = 1;
+ new_func->base.formal_arguments = argument_list_bind_variables(
+ f->formal_arguments, f->num_formal_arguments, module, counter );
+ new_func->code = BJAM_MALLOC( func->code_size * sizeof( instruction ) );
+ memcpy( new_func->code, func->code, func->code_size * sizeof(
+ instruction ) );
+ new_func->generic = (FUNCTION *)func;
+ func = new_func;
+ for ( i = 0; ; ++i )
+ {
+ OBJECT * key;
+ int op_code;
+ code = func->code + i;
+ switch ( code->op_code )
+ {
+ case INSTR_PUSH_VAR: op_code = INSTR_PUSH_VAR_FIXED; break;
+ case INSTR_PUSH_LOCAL: op_code = INSTR_PUSH_LOCAL_FIXED; break;
+ case INSTR_POP_LOCAL: op_code = INSTR_POP_LOCAL_FIXED; break;
+ case INSTR_SET: op_code = INSTR_SET_FIXED; break;
+ case INSTR_APPEND: op_code = INSTR_APPEND_FIXED; break;
+ case INSTR_DEFAULT: op_code = INSTR_DEFAULT_FIXED; break;
+ case INSTR_RETURN: return (FUNCTION *)new_func;
+ case INSTR_CALL_MEMBER_RULE:
+ case INSTR_CALL_RULE: ++i; continue;
+ case INSTR_PUSH_MODULE:
+ {
+ int depth = 1;
+ ++i;
+ while ( depth > 0 )
+ {
+ code = func->code + i;
+ switch ( code->op_code )
+ {
+ case INSTR_PUSH_MODULE:
+ case INSTR_CLASS:
+ ++depth;
+ break;
+ case INSTR_POP_MODULE:
+ --depth;
+ break;
+ case INSTR_CALL_RULE:
+ ++i;
+ break;
+ }
+ ++i;
+ }
+ --i;
+ }
+ default: continue;
+ }
+ key = func->constants[ code->arg ];
+ if ( !( object_equal( key, constant_TMPDIR ) ||
+ object_equal( key, constant_TMPNAME ) ||
+ object_equal( key, constant_TMPFILE ) ||
+ object_equal( key, constant_STDOUT ) ||
+ object_equal( key, constant_STDERR ) ) )
+ {
+ code->op_code = op_code;
+ code->arg = module_add_fixed_var( module, key, counter );
+ }
+ }
+ }
+}
+
+void function_refer( FUNCTION * func )
+{
+ ++func->reference_count;
+}
+
+void function_free( FUNCTION * function_ )
+{
+ int i;
+
+ if ( --function_->reference_count != 0 )
+ return;
+
+ if ( function_->formal_arguments )
+ argument_list_free( function_->formal_arguments,
+ function_->num_formal_arguments );
+
+ if ( function_->type == FUNCTION_JAM )
+ {
+ JAM_FUNCTION * func = (JAM_FUNCTION *)function_;
+
+ BJAM_FREE( func->code );
+
+ if ( func->generic )
+ function_free( func->generic );
+ else
+ {
+ if ( function_->rulename ) object_free( function_->rulename );
+
+ for ( i = 0; i < func->num_constants; ++i )
+ object_free( func->constants[ i ] );
+ BJAM_FREE( func->constants );
+
+ for ( i = 0; i < func->num_subfunctions; ++i )
+ {
+ object_free( func->functions[ i ].name );
+ function_free( func->functions[ i ].code );
+ }
+ BJAM_FREE( func->functions );
+
+ for ( i = 0; i < func->num_subactions; ++i )
+ {
+ object_free( func->actions[ i ].name );
+ function_free( func->actions[ i ].command );
+ }
+ BJAM_FREE( func->actions );
+
+ object_free( func->file );
+ }
+ }
+#ifdef HAVE_PYTHON
+ else if ( function_->type == FUNCTION_PYTHON )
+ {
+ PYTHON_FUNCTION * func = (PYTHON_FUNCTION *)function_;
+ Py_DECREF( func->python_function );
+ if ( function_->rulename ) object_free( function_->rulename );
+ }
+#endif
+ else
+ {
+ assert( function_->type == FUNCTION_BUILTIN );
+ if ( function_->rulename ) object_free( function_->rulename );
+ }
+
+ BJAM_FREE( function_ );
+}
+
+
+/* Alignment check for stack */
+
+struct align_var_edits
+{
+ char ch;
+ VAR_EDITS e;
+};
+
+struct align_expansion_item
+{
+ char ch;
+ expansion_item e;
+};
+
+static char check_align_var_edits[ sizeof(struct align_var_edits) <= sizeof(VAR_EDITS) + sizeof(void *) ? 1 : -1 ];
+static char check_align_expansion_item[ sizeof(struct align_expansion_item) <= sizeof(expansion_item) + sizeof(void *) ? 1 : -1 ];
+
+static char check_ptr_size1[ sizeof(LIST *) <= sizeof(void *) ? 1 : -1 ];
+static char check_ptr_size2[ sizeof(char *) <= sizeof(void *) ? 1 : -1 ];
+
+void function_run_actions( FUNCTION * function, FRAME * frame, STACK * s,
+ string * out )
+{
+ *(string * *)stack_allocate( s, sizeof( string * ) ) = out;
+ list_free( function_run( function, frame, s ) );
+ stack_deallocate( s, sizeof( string * ) );
+}
+
+/*
+ * WARNING: The instruction set is tuned for Jam and is not really generic. Be
+ * especially careful about stack push/pop.
+ */
+
+LIST * function_run( FUNCTION * function_, FRAME * frame, STACK * s )
+{
+ JAM_FUNCTION * function;
+ instruction * code;
+ LIST * l;
+ LIST * r;
+ LIST * result = L0;
+ void * saved_stack = s->data;
+
+ if ( function_->type == FUNCTION_BUILTIN )
+ {
+ BUILTIN_FUNCTION const * const f = (BUILTIN_FUNCTION *)function_;
+ if ( function_->formal_arguments )
+ argument_list_check( function_->formal_arguments,
+ function_->num_formal_arguments, function_, frame );
+ return f->func( frame, f->flags );
+ }
+
+#ifdef HAVE_PYTHON
+ else if ( function_->type == FUNCTION_PYTHON )
+ {
+ PYTHON_FUNCTION * f = (PYTHON_FUNCTION *)function_;
+ return call_python_function( f, frame );
+ }
+#endif
+
+ assert( function_->type == FUNCTION_JAM );
+
+ if ( function_->formal_arguments )
+ argument_list_push( function_->formal_arguments,
+ function_->num_formal_arguments, function_, frame, s );
+
+ function = (JAM_FUNCTION *)function_;
+ code = function->code;
+ for ( ; ; )
+ {
+ switch ( code->op_code )
+ {
+
+ /*
+ * Basic stack manipulation
+ */
+
+ case INSTR_PUSH_EMPTY:
+ stack_push( s, L0 );
+ break;
+
+ case INSTR_PUSH_CONSTANT:
+ {
+ OBJECT * value = function_get_constant( function, code->arg );
+ stack_push( s, list_new( object_copy( value ) ) );
+ break;
+ }
+
+ case INSTR_PUSH_ARG:
+ stack_push( s, frame_get_local( frame, code->arg ) );
+ break;
+
+ case INSTR_PUSH_VAR:
+ stack_push( s, function_get_variable( function, frame, code->arg ) );
+ break;
+
+ case INSTR_PUSH_VAR_FIXED:
+ stack_push( s, list_copy( frame->module->fixed_variables[ code->arg
+ ] ) );
+ break;
+
+ case INSTR_PUSH_GROUP:
+ {
+ LIST * value = L0;
+ LISTITER iter;
+ LISTITER end;
+ l = stack_pop( s );
+ for ( iter = list_begin( l ), end = list_end( l ); iter != end;
+ iter = list_next( iter ) )
+ value = list_append( value, function_get_named_variable(
+ function, frame, list_item( iter ) ) );
+ list_free( l );
+ stack_push( s, value );
+ break;
+ }
+
+ case INSTR_PUSH_APPEND:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ stack_push( s, list_append( l, r ) );
+ break;
+
+ case INSTR_SWAP:
+ l = stack_top( s );
+ stack_set( s, 0, stack_at( s, code->arg ) );
+ stack_set( s, code->arg, l );
+ break;
+
+ case INSTR_POP:
+ list_free( stack_pop( s ) );
+ break;
+
+ /*
+ * Branch instructions
+ */
+
+ case INSTR_JUMP:
+ code += code->arg;
+ break;
+
+ case INSTR_JUMP_EMPTY:
+ l = stack_pop( s );
+ if ( !list_cmp( l, L0 ) ) code += code->arg;
+ list_free( l );
+ break;
+
+ case INSTR_JUMP_NOT_EMPTY:
+ l = stack_pop( s );
+ if ( list_cmp( l, L0 ) ) code += code->arg;
+ list_free( l );
+ break;
+
+ case INSTR_JUMP_LT:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ if ( list_cmp( l, r ) < 0 ) code += code->arg;
+ list_free( l );
+ list_free( r );
+ break;
+
+ case INSTR_JUMP_LE:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ if ( list_cmp( l, r ) <= 0 ) code += code->arg;
+ list_free( l );
+ list_free( r );
+ break;
+
+ case INSTR_JUMP_GT:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ if ( list_cmp( l, r ) > 0 ) code += code->arg;
+ list_free( l );
+ list_free( r );
+ break;
+
+ case INSTR_JUMP_GE:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ if ( list_cmp( l, r ) >= 0 ) code += code->arg;
+ list_free( l );
+ list_free( r );
+ break;
+
+ case INSTR_JUMP_EQ:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ if ( list_cmp( l, r ) == 0 ) code += code->arg;
+ list_free( l );
+ list_free( r );
+ break;
+
+ case INSTR_JUMP_NE:
+ r = stack_pop(s);
+ l = stack_pop(s);
+ if ( list_cmp(l, r) != 0 ) code += code->arg;
+ list_free(l);
+ list_free(r);
+ break;
+
+ case INSTR_JUMP_IN:
+ r = stack_pop(s);
+ l = stack_pop(s);
+ if ( list_is_sublist( l, r ) ) code += code->arg;
+ list_free(l);
+ list_free(r);
+ break;
+
+ case INSTR_JUMP_NOT_IN:
+ r = stack_pop( s );
+ l = stack_pop( s );
+ if ( !list_is_sublist( l, r ) ) code += code->arg;
+ list_free( l );
+ list_free( r );
+ break;
+
+ /*
+ * For
+ */
+
+ case INSTR_FOR_INIT:
+ l = stack_top( s );
+ *(LISTITER *)stack_allocate( s, sizeof( LISTITER ) ) =
+ list_begin( l );
+ break;
+
+ case INSTR_FOR_LOOP:
+ {
+ LISTITER iter = *(LISTITER *)stack_get( s );
+ stack_deallocate( s, sizeof( LISTITER ) );
+ l = stack_top( s );
+ if ( iter == list_end( l ) )
+ {
+ list_free( stack_pop( s ) );
+ code += code->arg;
+ }
+ else
+ {
+ r = list_new( object_copy( list_item( iter ) ) );
+ iter = list_next( iter );
+ *(LISTITER *)stack_allocate( s, sizeof( LISTITER ) ) = iter;
+ stack_push( s, r );
+ }
+ break;
+ }
+
+ /*
+ * Switch
+ */
+
+ case INSTR_JUMP_NOT_GLOB:
+ {
+ char const * pattern;
+ char const * match;
+ l = stack_pop( s );
+ r = stack_top( s );
+ pattern = list_empty( l ) ? "" : object_str( list_front( l ) );
+ match = list_empty( r ) ? "" : object_str( list_front( r ) );
+ if ( glob( pattern, match ) )
+ code += code->arg;
+ else
+ list_free( stack_pop( s ) );
+ list_free( l );
+ break;
+ }
+
+ /*
+ * Return
+ */
+
+ case INSTR_SET_RESULT:
+ list_free( result );
+ if ( !code->arg )
+ result = stack_pop( s );
+ else
+ result = list_copy( stack_top( s ) );
+ break;
+
+ case INSTR_PUSH_RESULT:
+ stack_push( s, result );
+ result = L0;
+ break;
+
+ case INSTR_RETURN:
+ {
+ if ( function_->formal_arguments )
+ argument_list_pop( function_->formal_arguments,
+ function_->num_formal_arguments, frame, s );
+#ifndef NDEBUG
+ if ( !( saved_stack == s->data ) )
+ {
+ frame->file = function->file;
+ frame->line = function->line;
+ backtrace_line( frame );
+ printf( "error: stack check failed.\n" );
+ backtrace( frame );
+ assert( saved_stack == s->data );
+ }
+#endif
+ assert( saved_stack == s->data );
+ return result;
+ }
+
+ /*
+ * Local variables
+ */
+
+ case INSTR_PUSH_LOCAL:
+ {
+ LIST * value = stack_pop( s );
+ stack_push( s, function_swap_variable( function, frame, code->arg,
+ value ) );
+ break;
+ }
+
+ case INSTR_POP_LOCAL:
+ function_set_variable( function, frame, code->arg, stack_pop( s ) );
+ break;
+
+ case INSTR_PUSH_LOCAL_FIXED:
+ {
+ LIST * value = stack_pop( s );
+ LIST * * ptr = &frame->module->fixed_variables[ code->arg ];
+ assert( code->arg < frame->module->num_fixed_variables );
+ stack_push( s, *ptr );
+ *ptr = value;
+ break;
+ }
+
+ case INSTR_POP_LOCAL_FIXED:
+ {
+ LIST * value = stack_pop( s );
+ LIST * * ptr = &frame->module->fixed_variables[ code->arg ];
+ assert( code->arg < frame->module->num_fixed_variables );
+ list_free( *ptr );
+ *ptr = value;
+ break;
+ }
+
+ case INSTR_PUSH_LOCAL_GROUP:
+ {
+ LIST * const value = stack_pop( s );
+ LISTITER iter;
+ LISTITER end;
+ l = stack_pop( s );
+ for ( iter = list_begin( l ), end = list_end( l ); iter != end;
+ iter = list_next( iter ) )
+ stack_push( s, function_swap_named_variable( function, frame,
+ list_item( iter ), list_copy( value ) ) );
+ list_free( value );
+ stack_push( s, l );
+ break;
+ }
+
+ case INSTR_POP_LOCAL_GROUP:
+ {
+ LISTITER iter;
+ LISTITER end;
+ r = stack_pop( s );
+ l = list_reverse( r );
+ list_free( r );
+ for ( iter = list_begin( l ), end = list_end( l ); iter != end;
+ iter = list_next( iter ) )
+ function_set_named_variable( function, frame, list_item( iter ),
+ stack_pop( s ) );
+ list_free( l );
+ break;
+ }
+
+ /*
+ * on $(TARGET) variables
+ */
+
+ case INSTR_PUSH_ON:
+ {
+ LIST * targets = stack_top( s );
+ if ( !list_empty( targets ) )
+ {
+ /* FIXME: push the state onto the stack instead of using
+ * pushsettings.
+ */
+ TARGET * t = bindtarget( list_front( targets ) );
+ pushsettings( frame->module, t->settings );
+ }
+ else
+ {
+ /* [ on $(TARGET) ... ] is ignored if $(TARGET) is empty. */
+ list_free( stack_pop( s ) );
+ stack_push( s, L0 );
+ code += code->arg;
+ }
+ break;
+ }
+
+ case INSTR_POP_ON:
+ {
+ LIST * result = stack_pop( s );
+ LIST * targets = stack_pop( s );
+ if ( !list_empty( targets ) )
+ {
+ TARGET * t = bindtarget( list_front( targets ) );
+ popsettings( frame->module, t->settings );
+ }
+ list_free( targets );
+ stack_push( s, result );
+ break;
+ }
+
+ case INSTR_SET_ON:
+ {
+ LIST * targets = stack_pop( s );
+ LIST * value = stack_pop( s );
+ LIST * vars = stack_pop( s );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * t = bindtarget( list_item( iter ) );
+ LISTITER vars_iter = list_begin( vars );
+ LISTITER const vars_end = list_end( vars );
+ for ( ; vars_iter != vars_end; vars_iter = list_next( vars_iter
+ ) )
+ t->settings = addsettings( t->settings, VAR_SET, list_item(
+ vars_iter ), list_copy( value ) );
+ }
+ list_free( vars );
+ list_free( targets );
+ stack_push( s, value );
+ break;
+ }
+
+ case INSTR_APPEND_ON:
+ {
+ LIST * targets = stack_pop( s );
+ LIST * value = stack_pop( s );
+ LIST * vars = stack_pop( s );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * const t = bindtarget( list_item( iter ) );
+ LISTITER vars_iter = list_begin( vars );
+ LISTITER const vars_end = list_end( vars );
+ for ( ; vars_iter != vars_end; vars_iter = list_next( vars_iter
+ ) )
+ t->settings = addsettings( t->settings, VAR_APPEND,
+ list_item( vars_iter ), list_copy( value ) );
+ }
+ list_free( vars );
+ list_free( targets );
+ stack_push( s, value );
+ break;
+ }
+
+ case INSTR_DEFAULT_ON:
+ {
+ LIST * targets = stack_pop( s );
+ LIST * value = stack_pop( s );
+ LIST * vars = stack_pop( s );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * t = bindtarget( list_item( iter ) );
+ LISTITER vars_iter = list_begin( vars );
+ LISTITER const vars_end = list_end( vars );
+ for ( ; vars_iter != vars_end; vars_iter = list_next( vars_iter
+ ) )
+ t->settings = addsettings( t->settings, VAR_DEFAULT,
+ list_item( vars_iter ), list_copy( value ) );
+ }
+ list_free( vars );
+ list_free( targets );
+ stack_push( s, value );
+ break;
+ }
+
+ /* [ on $(target) return $(variable) ] */
+ case INSTR_GET_ON:
+ {
+ LIST * targets = stack_pop( s );
+ LIST * result = L0;
+ if ( !list_empty( targets ) )
+ {
+ OBJECT * varname = function->constants[ code->arg ];
+ TARGET * t = bindtarget( list_front( targets ) );
+ SETTINGS * s = t->settings;
+ int found = 0;
+ for ( ; s != 0; s = s->next )
+ {
+ if ( object_equal( s->symbol, varname ) )
+ {
+ result = s->value;
+ found = 1;
+ break;
+ }
+ }
+ if ( !found )
+ {
+ result = var_get( frame->module, varname ) ;
+ }
+ }
+ stack_push( s, list_copy( result ) );
+ break;
+ }
+
+ /*
+ * Variable setting
+ */
+
+ case INSTR_SET:
+ function_set_variable( function, frame, code->arg,
+ stack_pop( s ) );
+ break;
+
+ case INSTR_APPEND:
+ function_append_variable( function, frame, code->arg,
+ stack_pop( s ) );
+ break;
+
+ case INSTR_DEFAULT:
+ function_default_variable( function, frame, code->arg,
+ stack_pop( s ) );
+ break;
+
+ case INSTR_SET_FIXED:
+ {
+ LIST * * ptr = &frame->module->fixed_variables[ code->arg ];
+ assert( code->arg < frame->module->num_fixed_variables );
+ list_free( *ptr );
+ *ptr = stack_pop( s );
+ break;
+ }
+
+ case INSTR_APPEND_FIXED:
+ {
+ LIST * * ptr = &frame->module->fixed_variables[ code->arg ];
+ assert( code->arg < frame->module->num_fixed_variables );
+ *ptr = list_append( *ptr, stack_pop( s ) );
+ break;
+ }
+
+ case INSTR_DEFAULT_FIXED:
+ {
+ LIST * * ptr = &frame->module->fixed_variables[ code->arg ];
+ LIST * value = stack_pop( s );
+ assert( code->arg < frame->module->num_fixed_variables );
+ if ( list_empty( *ptr ) )
+ *ptr = value;
+ else
+ list_free( value );
+ break;
+ }
+
+ case INSTR_SET_GROUP:
+ {
+ LIST * value = stack_pop( s );
+ LIST * vars = stack_pop( s );
+ LISTITER iter = list_begin( vars );
+ LISTITER const end = list_end( vars );
+ for ( ; iter != end; iter = list_next( iter ) )
+ function_set_named_variable( function, frame, list_item( iter ),
+ list_copy( value ) );
+ list_free( vars );
+ list_free( value );
+ break;
+ }
+
+ case INSTR_APPEND_GROUP:
+ {
+ LIST * value = stack_pop( s );
+ LIST * vars = stack_pop( s );
+ LISTITER iter = list_begin( vars );
+ LISTITER const end = list_end( vars );
+ for ( ; iter != end; iter = list_next( iter ) )
+ function_append_named_variable( function, frame, list_item( iter
+ ), list_copy( value ) );
+ list_free( vars );
+ list_free( value );
+ break;
+ }
+
+ case INSTR_DEFAULT_GROUP:
+ {
+ LIST * value = stack_pop( s );
+ LIST * vars = stack_pop( s );
+ LISTITER iter = list_begin( vars );
+ LISTITER const end = list_end( vars );
+ for ( ; iter != end; iter = list_next( iter ) )
+ function_default_named_variable( function, frame, list_item(
+ iter ), list_copy( value ) );
+ list_free( vars );
+ list_free( value );
+ break;
+ }
+
+ /*
+ * Rules
+ */
+
+ case INSTR_CALL_RULE:
+ {
+ char const * unexpanded = object_str( function_get_constant(
+ function, code[ 1 ].op_code ) );
+ LIST * result = function_call_rule( function, frame, s, code->arg,
+ unexpanded, function->file, code[ 1 ].arg );
+ stack_push( s, result );
+ ++code;
+ break;
+ }
+
+ case INSTR_CALL_MEMBER_RULE:
+ {
+ OBJECT * rule_name = function_get_constant( function, code[1].op_code );
+ LIST * result = function_call_member_rule( function, frame, s, code->arg, rule_name, function->file, code[1].arg );
+ stack_push( s, result );
+ ++code;
+ break;
+ }
+
+ case INSTR_RULE:
+ function_set_rule( function, frame, s, code->arg );
+ break;
+
+ case INSTR_ACTIONS:
+ function_set_actions( function, frame, s, code->arg );
+ break;
+
+ /*
+ * Variable expansion
+ */
+
+ case INSTR_APPLY_MODIFIERS:
+ {
+ int n;
+ int i;
+ l = stack_pop( s );
+ n = expand_modifiers( s, code->arg );
+ stack_push( s, l );
+ l = apply_modifiers( s, n );
+ list_free( stack_pop( s ) );
+ stack_deallocate( s, n * sizeof( VAR_EDITS ) );
+ for ( i = 0; i < code->arg; ++i )
+ list_free( stack_pop( s ) ); /* pop modifiers */
+ stack_push( s, l );
+ break;
+ }
+
+ case INSTR_APPLY_INDEX:
+ l = apply_subscript( s );
+ list_free( stack_pop( s ) );
+ list_free( stack_pop( s ) );
+ stack_push( s, l );
+ break;
+
+ case INSTR_APPLY_INDEX_MODIFIERS:
+ {
+ int i;
+ int n;
+ l = stack_pop( s );
+ r = stack_pop( s );
+ n = expand_modifiers( s, code->arg );
+ stack_push( s, r );
+ stack_push( s, l );
+ l = apply_subscript_and_modifiers( s, n );
+ list_free( stack_pop( s ) );
+ list_free( stack_pop( s ) );
+ stack_deallocate( s, n * sizeof( VAR_EDITS ) );
+ for ( i = 0; i < code->arg; ++i )
+ list_free( stack_pop( s ) ); /* pop modifiers */
+ stack_push( s, l );
+ break;
+ }
+
+ case INSTR_APPLY_MODIFIERS_GROUP:
+ {
+ int i;
+ LIST * const vars = stack_pop( s );
+ int const n = expand_modifiers( s, code->arg );
+ LIST * result = L0;
+ LISTITER iter = list_begin( vars );
+ LISTITER const end = list_end( vars );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ stack_push( s, function_get_named_variable( function, frame,
+ list_item( iter ) ) );
+ result = list_append( result, apply_modifiers( s, n ) );
+ list_free( stack_pop( s ) );
+ }
+ list_free( vars );
+ stack_deallocate( s, n * sizeof( VAR_EDITS ) );
+ for ( i = 0; i < code->arg; ++i )
+ list_free( stack_pop( s ) ); /* pop modifiers */
+ stack_push( s, result );
+ break;
+ }
+
+ case INSTR_APPLY_INDEX_GROUP:
+ {
+ LIST * vars = stack_pop( s );
+ LIST * result = L0;
+ LISTITER iter = list_begin( vars );
+ LISTITER const end = list_end( vars );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ stack_push( s, function_get_named_variable( function, frame,
+ list_item( iter ) ) );
+ result = list_append( result, apply_subscript( s ) );
+ list_free( stack_pop( s ) );
+ }
+ list_free( vars );
+ list_free( stack_pop( s ) );
+ stack_push( s, result );
+ break;
+ }
+
+ case INSTR_APPLY_INDEX_MODIFIERS_GROUP:
+ {
+ int i;
+ LIST * const vars = stack_pop( s );
+ LIST * const r = stack_pop( s );
+ int const n = expand_modifiers( s, code->arg );
+ LIST * result = L0;
+ LISTITER iter = list_begin( vars );
+ LISTITER const end = list_end( vars );
+ stack_push( s, r );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ stack_push( s, function_get_named_variable( function, frame,
+ list_item( iter ) ) );
+ result = list_append( result, apply_subscript_and_modifiers( s,
+ n ) );
+ list_free( stack_pop( s ) );
+ }
+ list_free( stack_pop( s ) );
+ list_free( vars );
+ stack_deallocate( s, n * sizeof( VAR_EDITS ) );
+ for ( i = 0; i < code->arg; ++i )
+ list_free( stack_pop( s ) ); /* pop modifiers */
+ stack_push( s, result );
+ break;
+ }
+
+ case INSTR_COMBINE_STRINGS:
+ {
+ size_t const buffer_size = code->arg * sizeof( expansion_item );
+ LIST * * const stack_pos = stack_get( s );
+ expansion_item * items = stack_allocate( s, buffer_size );
+ LIST * result;
+ int i;
+ for ( i = 0; i < code->arg; ++i )
+ items[ i ].values = stack_pos[ i ];
+ result = expand( items, code->arg );
+ stack_deallocate( s, buffer_size );
+ for ( i = 0; i < code->arg; ++i )
+ list_free( stack_pop( s ) );
+ stack_push( s, result );
+ break;
+ }
+
+ case INSTR_GET_GRIST:
+ {
+ LIST * vals = stack_pop( s );
+ LIST * result = L0;
+ LISTITER iter, end;
+
+ for ( iter = list_begin( vals ), end = list_end( vals ); iter != end; ++iter )
+ {
+ OBJECT * new_object;
+ const char * value = object_str( list_item( iter ) );
+ const char * p;
+ if ( value[ 0 ] == '<' && ( p = strchr( value, '>' ) ) )
+ {
+ if( p[ 1 ] )
+ new_object = object_new_range( value, p - value + 1 );
+ else
+ new_object = object_copy( list_item( iter ) );
+ }
+ else
+ {
+ new_object = object_copy( constant_empty );
+ }
+ result = list_push_back( result, new_object );
+ }
+
+ list_free( vals );
+ stack_push( s, result );
+ break;
+ }
+
+ case INSTR_INCLUDE:
+ {
+ LIST * nt = stack_pop( s );
+ if ( !list_empty( nt ) )
+ {
+ TARGET * const t = bindtarget( list_front( nt ) );
+ list_free( nt );
+
+ /* DWA 2001/10/22 - Perforce Jam cleared the arguments here,
+ * which prevented an included file from being treated as part
+ * of the body of a rule. I did not see any reason to do that,
+ * so I lifted the restriction.
+ */
+
+ /* Bind the include file under the influence of "on-target"
+ * variables. Though they are targets, include files are not
+ * built with make().
+ */
+
+ pushsettings( root_module(), t->settings );
+ /* We do not expect that a file to be included is generated by
+ * some action. Therefore, pass 0 as third argument. If the name
+ * resolves to a directory, let it error out.
+ */
+ object_free( t->boundname );
+ t->boundname = search( t->name, &t->time, 0, 0 );
+ popsettings( root_module(), t->settings );
+
+ parse_file( t->boundname, frame );
+ }
+ break;
+ }
+
+ /*
+ * Classes and modules
+ */
+
+ case INSTR_PUSH_MODULE:
+ {
+ LIST * const module_name = stack_pop( s );
+ module_t * const outer_module = frame->module;
+ frame->module = !list_empty( module_name )
+ ? bindmodule( list_front( module_name ) )
+ : root_module();
+ list_free( module_name );
+ *(module_t * *)stack_allocate( s, sizeof( module_t * ) ) =
+ outer_module;
+ break;
+ }
+
+ case INSTR_POP_MODULE:
+ {
+ module_t * const outer_module = *(module_t * *)stack_get( s );
+ stack_deallocate( s, sizeof( module_t * ) );
+ frame->module = outer_module;
+ break;
+ }
+
+ case INSTR_CLASS:
+ {
+ LIST * bases = stack_pop( s );
+ LIST * name = stack_pop( s );
+ OBJECT * class_module = make_class_module( name, bases, frame );
+
+ module_t * const outer_module = frame->module;
+ frame->module = bindmodule( class_module );
+ object_free( class_module );
+
+ *(module_t * *)stack_allocate( s, sizeof( module_t * ) ) =
+ outer_module;
+ break;
+ }
+
+ case INSTR_BIND_MODULE_VARIABLES:
+ module_bind_variables( frame->module );
+ break;
+
+ case INSTR_APPEND_STRINGS:
+ {
+ string buf[ 1 ];
+ string_new( buf );
+ combine_strings( s, code->arg, buf );
+ stack_push( s, list_new( object_new( buf->value ) ) );
+ string_free( buf );
+ break;
+ }
+
+ case INSTR_WRITE_FILE:
+ {
+ string buf[ 1 ];
+ char const * out;
+ OBJECT * tmp_filename = 0;
+ int out_debug = DEBUG_EXEC ? 1 : 0;
+ FILE * out_file = 0;
+ string_new( buf );
+ combine_strings( s, code->arg, buf );
+ out = object_str( list_front( stack_top( s ) ) );
+
+ /* For stdout/stderr we will create a temp file and generate a
+ * command that outputs the content as needed.
+ */
+ if ( ( strcmp( "STDOUT", out ) == 0 ) ||
+ ( strcmp( "STDERR", out ) == 0 ) )
+ {
+ int err_redir = strcmp( "STDERR", out ) == 0;
+ string result[ 1 ];
+ tmp_filename = path_tmpfile();
+ string_new( result );
+ #ifdef OS_NT
+ string_append( result, "type \"" );
+ #else
+ string_append( result, "cat \"" );
+ #endif
+ string_append( result, object_str( tmp_filename ) );
+ string_push_back( result, '\"' );
+ if ( err_redir )
+ string_append( result, " 1>&2" );
+
+ /* Replace STDXXX with the temporary file. */
+ list_free( stack_pop( s ) );
+ stack_push( s, list_new( object_new( result->value ) ) );
+ out = object_str( tmp_filename );
+
+ string_free( result );
+
+ /* Make sure temp files created by this get nuked eventually. */
+ file_remove_atexit( tmp_filename );
+ }
+
+ if ( !globs.noexec )
+ {
+ string out_name[ 1 ];
+ /* Handle "path to file" filenames. */
+ if ( ( out[ 0 ] == '"' ) && ( out[ strlen( out ) - 1 ] == '"' )
+ )
+ {
+ string_copy( out_name, out + 1 );
+ string_truncate( out_name, out_name->size - 1 );
+ }
+ else
+ string_copy( out_name, out );
+ out_file = fopen( out_name->value, "w" );
+
+ if ( !out_file )
+ {
+ printf( "failed to write output file '%s'!\n",
+ out_name->value );
+ exit( EXITBAD );
+ }
+ string_free( out_name );
+ }
+
+ if ( out_debug ) printf( "\nfile %s\n", out );
+ if ( out_file ) fputs( buf->value, out_file );
+ if ( out_debug ) fputs( buf->value, stdout );
+ if ( out_file )
+ {
+ fflush( out_file );
+ fclose( out_file );
+ }
+ string_free( buf );
+ if ( tmp_filename )
+ object_free( tmp_filename );
+
+ if ( out_debug ) fputc( '\n', stdout );
+ break;
+ }
+
+ case INSTR_OUTPUT_STRINGS:
+ {
+ string * const buf = *(string * *)( (char *)stack_get( s ) + (
+ code->arg * sizeof( LIST * ) ) );
+ combine_strings( s, code->arg, buf );
+ break;
+ }
+
+ }
+ ++code;
+ }
+}
+
+
+#ifdef HAVE_PYTHON
+
+static struct arg_list * arg_list_compile_python( PyObject * bjam_signature,
+ int * num_arguments )
+{
+ if ( bjam_signature )
+ {
+ struct argument_list_compiler c[ 1 ];
+ struct arg_list * result;
+ Py_ssize_t s;
+ Py_ssize_t i;
+ argument_list_compiler_init( c );
+
+ s = PySequence_Size( bjam_signature );
+ for ( i = 0; i < s; ++i )
+ {
+ struct argument_compiler arg_comp[ 1 ];
+ struct arg_list arg;
+ PyObject * v = PySequence_GetItem( bjam_signature, i );
+ Py_ssize_t j;
+ Py_ssize_t inner;
+ argument_compiler_init( arg_comp );
+
+ inner = PySequence_Size( v );
+ for ( j = 0; j < inner; ++j )
+ argument_compiler_add( arg_comp, object_new( PyString_AsString(
+ PySequence_GetItem( v, j ) ) ), constant_builtin, -1 );
+
+ arg = arg_compile_impl( arg_comp, constant_builtin, -1 );
+ dynamic_array_push( c->args, arg );
+ argument_compiler_free( arg_comp );
+ Py_DECREF( v );
+ }
+
+ *num_arguments = c->args->size;
+ result = BJAM_MALLOC( c->args->size * sizeof( struct arg_list ) );
+ memcpy( result, c->args->data, c->args->size * sizeof( struct arg_list )
+ );
+ argument_list_compiler_free( c );
+ return result;
+ }
+ *num_arguments = 0;
+ return 0;
+}
+
+FUNCTION * function_python( PyObject * function, PyObject * bjam_signature )
+{
+ PYTHON_FUNCTION * result = BJAM_MALLOC( sizeof( PYTHON_FUNCTION ) );
+
+ result->base.type = FUNCTION_PYTHON;
+ result->base.reference_count = 1;
+ result->base.rulename = 0;
+ result->base.formal_arguments = arg_list_compile_python( bjam_signature,
+ &result->base.num_formal_arguments );
+ Py_INCREF( function );
+ result->python_function = function;
+
+ return (FUNCTION *)result;
+}
+
+
+static void argument_list_to_python( struct arg_list * formal, int formal_count,
+ FUNCTION * function, FRAME * frame, PyObject * kw )
+{
+ LOL * all_actual = frame->args;
+ int i;
+
+ for ( i = 0; i < formal_count; ++i )
+ {
+ LIST * actual = lol_get( all_actual, i );
+ LISTITER actual_iter = list_begin( actual );
+ LISTITER const actual_end = list_end( actual );
+ int j;
+ for ( j = 0; j < formal[ i ].size; ++j )
+ {
+ struct argument * formal_arg = &formal[ i ].args[ j ];
+ PyObject * value;
+ LIST * l;
+
+ switch ( formal_arg->flags )
+ {
+ case ARG_ONE:
+ if ( actual_iter == actual_end )
+ argument_error( "missing argument", function, frame,
+ formal_arg->arg_name );
+ type_check_range( formal_arg->type_name, actual_iter, list_next(
+ actual_iter ), frame, function, formal_arg->arg_name );
+ value = PyString_FromString( object_str( list_item( actual_iter
+ ) ) );
+ actual_iter = list_next( actual_iter );
+ break;
+ case ARG_OPTIONAL:
+ if ( actual_iter == actual_end )
+ value = 0;
+ else
+ {
+ type_check_range( formal_arg->type_name, actual_iter,
+ list_next( actual_iter ), frame, function,
+ formal_arg->arg_name );
+ value = PyString_FromString( object_str( list_item(
+ actual_iter ) ) );
+ actual_iter = list_next( actual_iter );
+ }
+ break;
+ case ARG_PLUS:
+ if ( actual_iter == actual_end )
+ argument_error( "missing argument", function, frame,
+ formal_arg->arg_name );
+ /* fallthrough */
+ case ARG_STAR:
+ type_check_range( formal_arg->type_name, actual_iter,
+ actual_end, frame, function, formal_arg->arg_name );
+ l = list_copy_range( actual, actual_iter, actual_end );
+ value = list_to_python( l );
+ list_free( l );
+ actual_iter = actual_end;
+ break;
+ case ARG_VARIADIC:
+ return;
+ }
+
+ if ( value )
+ {
+ PyObject * key = PyString_FromString( object_str(
+ formal_arg->arg_name ) );
+ PyDict_SetItem( kw, key, value );
+ Py_DECREF( key );
+ Py_DECREF( value );
+ }
+ }
+
+ if ( actual_iter != actual_end )
+ argument_error( "extra argument", function, frame, list_item(
+ actual_iter ) );
+ }
+
+ for ( ; i < all_actual->count; ++i )
+ {
+ LIST * const actual = lol_get( all_actual, i );
+ if ( !list_empty( actual ) )
+ argument_error( "extra argument", function, frame, list_front(
+ actual ) );
+ }
+}
+
+
+/* Given a Python object, return a string to use in Jam code instead of the said
+ * object.
+ *
+ * If the object is a string, use the string value.
+ * If the object implemenets __jam_repr__ method, use that.
+ * Otherwise return 0.
+ */
+
+OBJECT * python_to_string( PyObject * value )
+{
+ if ( PyString_Check( value ) )
+ return object_new( PyString_AS_STRING( value ) );
+
+ /* See if this instance defines the special __jam_repr__ method. */
+ if ( PyInstance_Check( value )
+ && PyObject_HasAttrString( value, "__jam_repr__" ) )
+ {
+ PyObject * repr = PyObject_GetAttrString( value, "__jam_repr__" );
+ if ( repr )
+ {
+ PyObject * arguments2 = PyTuple_New( 0 );
+ PyObject * value2 = PyObject_Call( repr, arguments2, 0 );
+ Py_DECREF( repr );
+ Py_DECREF( arguments2 );
+ if ( PyString_Check( value2 ) )
+ return object_new( PyString_AS_STRING( value2 ) );
+ Py_DECREF( value2 );
+ }
+ }
+ return 0;
+}
+
+
+static module_t * python_module()
+{
+ static module_t * python = 0;
+ if ( !python )
+ python = bindmodule( constant_python );
+ return python;
+}
+
+
+static LIST * call_python_function( PYTHON_FUNCTION * function, FRAME * frame )
+{
+ LIST * result = 0;
+ PyObject * arguments = 0;
+ PyObject * kw = NULL;
+ int i;
+ PyObject * py_result;
+ FRAME * prev_frame_before_python_call;
+
+ if ( function->base.formal_arguments )
+ {
+ arguments = PyTuple_New( 0 );
+ kw = PyDict_New();
+ argument_list_to_python( function->base.formal_arguments,
+ function->base.num_formal_arguments, &function->base, frame, kw );
+ }
+ else
+ {
+ arguments = PyTuple_New( frame->args->count );
+ for ( i = 0; i < frame->args->count; ++i )
+ PyTuple_SetItem( arguments, i, list_to_python( lol_get( frame->args,
+ i ) ) );
+ }
+
+ frame->module = python_module();
+
+ prev_frame_before_python_call = frame_before_python_call;
+ frame_before_python_call = frame;
+ py_result = PyObject_Call( function->python_function, arguments, kw );
+ frame_before_python_call = prev_frame_before_python_call;
+ Py_DECREF( arguments );
+ Py_XDECREF( kw );
+ if ( py_result != NULL )
+ {
+ if ( PyList_Check( py_result ) )
+ {
+ int size = PyList_Size( py_result );
+ int i;
+ for ( i = 0; i < size; ++i )
+ {
+ OBJECT * s = python_to_string( PyList_GetItem( py_result, i ) );
+ if ( !s )
+ fprintf( stderr,
+ "Non-string object returned by Python call.\n" );
+ else
+ result = list_push_back( result, s );
+ }
+ }
+ else if ( py_result == Py_None )
+ {
+ result = L0;
+ }
+ else
+ {
+ OBJECT * const s = python_to_string( py_result );
+ if ( s )
+ result = list_new( s );
+ else
+ /* We have tried all we could. Return empty list. There are
+ * cases, e.g. feature.feature function that should return a
+ * value for the benefit of Python code and which also can be
+ * called by Jam code, where no sensible value can be returned.
+ * We cannot even emit a warning, since there would be a pile of
+ * them.
+ */
+ result = L0;
+ }
+
+ Py_DECREF( py_result );
+ }
+ else
+ {
+ PyErr_Print();
+ fprintf( stderr, "Call failed\n" );
+ }
+
+ return result;
+}
+
+#endif
+
+
+void function_done( void )
+{
+ BJAM_FREE( stack );
+}
diff --git a/src/kenlm/jam-files/engine/function.h b/src/kenlm/jam-files/engine/function.h
new file mode 100644
index 0000000..64f26b3
--- /dev/null
+++ b/src/kenlm/jam-files/engine/function.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011 Steven Watanabe
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef FUNCTION_SW20111123_H
+#define FUNCTION_SW20111123_H
+
+#include "object.h"
+#include "frames.h"
+#include "lists.h"
+#include "parse.h"
+#include "strings.h"
+
+typedef struct _function FUNCTION;
+typedef struct _stack STACK;
+
+STACK * stack_global( void );
+void stack_push( STACK * s, LIST * l );
+LIST * stack_pop( STACK * s );
+
+FUNCTION * function_compile( PARSE * parse );
+FUNCTION * function_builtin( LIST * ( * func )( FRAME * frame, int flags ), int flags, const char * * args );
+void function_refer( FUNCTION * );
+void function_free( FUNCTION * );
+OBJECT * function_rulename( FUNCTION * );
+void function_set_rulename( FUNCTION *, OBJECT * );
+void function_location( FUNCTION *, OBJECT * *, int * );
+LIST * function_run( FUNCTION * function, FRAME * frame, STACK * s );
+
+FUNCTION * function_compile_actions( const char * actions, OBJECT * file, int line );
+void function_run_actions( FUNCTION * function, FRAME * frame, STACK * s, string * out );
+
+FUNCTION * function_bind_variables( FUNCTION * f, module_t * module, int * counter );
+FUNCTION * function_unbind_variables( FUNCTION * f );
+
+void function_done( void );
+
+#ifdef HAVE_PYTHON
+
+FUNCTION * function_python( PyObject * function, PyObject * bjam_signature );
+
+#endif
+
+#endif
diff --git a/src/kenlm/jam-files/engine/glob.c b/src/kenlm/jam-files/engine/glob.c
new file mode 100644
index 0000000..bec00ee
--- /dev/null
+++ b/src/kenlm/jam-files/engine/glob.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 1994 Christopher Seiwald. All rights reserved.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * glob.c - match a string against a simple pattern
+ *
+ * Understands the following patterns:
+ *
+ * * any number of characters
+ * ? any single character
+ * [a-z] any single character in the range a-z
+ * [^a-z] any single character not in the range a-z
+ * \x match x
+ *
+ * External functions:
+ *
+ * glob() - match a string against a simple pattern
+ *
+ * Internal functions:
+ *
+ * globchars() - build a bitlist to check for character group match
+ */
+
+# include "jam.h"
+
+# define CHECK_BIT( tab, bit ) ( tab[ (bit)/8 ] & (1<<( (bit)%8 )) )
+# define BITLISTSIZE 16 /* bytes used for [chars] in compiled expr */
+
+static void globchars( const char * s, const char * e, char * b );
+
+
+/*
+ * glob() - match a string against a simple pattern.
+ */
+
+int glob( const char * c, const char * s )
+{
+ char bitlist[ BITLISTSIZE ];
+ const char * here;
+
+ for ( ; ; )
+ switch ( *c++ )
+ {
+ case '\0':
+ return *s ? -1 : 0;
+
+ case '?':
+ if ( !*s++ )
+ return 1;
+ break;
+
+ case '[':
+ /* Scan for matching ]. */
+
+ here = c;
+ do if ( !*c++ ) return 1;
+ while ( ( here == c ) || ( *c != ']' ) );
+ ++c;
+
+ /* Build character class bitlist. */
+
+ globchars( here, c, bitlist );
+
+ if ( !CHECK_BIT( bitlist, *(const unsigned char *)s ) )
+ return 1;
+ ++s;
+ break;
+
+ case '*':
+ here = s;
+
+ while ( *s )
+ ++s;
+
+ /* Try to match the rest of the pattern in a recursive */
+ /* call. If the match fails we'll back up chars, retrying. */
+
+ while ( s != here )
+ {
+ int r;
+
+ /* A fast path for the last token in a pattern. */
+ r = *c ? glob( c, s ) : *s ? -1 : 0;
+
+ if ( !r )
+ return 0;
+ if ( r < 0 )
+ return 1;
+ --s;
+ }
+ break;
+
+ case '\\':
+ /* Force literal match of next char. */
+ if ( !*c || ( *s++ != *c++ ) )
+ return 1;
+ break;
+
+ default:
+ if ( *s++ != c[ -1 ] )
+ return 1;
+ break;
+ }
+}
+
+
+/*
+ * globchars() - build a bitlist to check for character group match.
+ */
+
+static void globchars( const char * s, const char * e, char * b )
+{
+ int neg = 0;
+
+ memset( b, '\0', BITLISTSIZE );
+
+ if ( *s == '^' )
+ {
+ ++neg;
+ ++s;
+ }
+
+ while ( s < e )
+ {
+ int c;
+
+ if ( ( s + 2 < e ) && ( s[1] == '-' ) )
+ {
+ for ( c = s[0]; c <= s[2]; ++c )
+ b[ c/8 ] |= ( 1 << ( c % 8 ) );
+ s += 3;
+ }
+ else
+ {
+ c = *s++;
+ b[ c/8 ] |= ( 1 << ( c % 8 ) );
+ }
+ }
+
+ if ( neg )
+ {
+ int i;
+ for ( i = 0; i < BITLISTSIZE; ++i )
+ b[ i ] ^= 0377;
+ }
+
+ /* Do not include \0 in either $[chars] or $[^chars]. */
+ b[0] &= 0376;
+}
diff --git a/src/kenlm/jam-files/engine/hash.c b/src/kenlm/jam-files/engine/hash.c
new file mode 100644
index 0000000..36f8366
--- /dev/null
+++ b/src/kenlm/jam-files/engine/hash.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * hash.c - simple in-memory hashing routines
+ *
+ * External routines:
+ * hashinit() - initialize a hash table, returning a handle
+ * hashitem() - find a record in the table, and optionally enter a new one
+ * hashdone() - free a hash table, given its handle
+ *
+ * Internal routines:
+ * hashrehash() - resize and rebuild hp->tab, the hash table
+ */
+
+#include "jam.h"
+#include "hash.h"
+
+#include "compile.h"
+
+#include <assert.h>
+
+/* */
+#define HASH_DEBUG_PROFILE 1
+/* */
+
+/* Header attached to all hash table data items. */
+
+typedef struct item ITEM;
+struct item
+{
+ ITEM * next;
+};
+
+#define MAX_LISTS 32
+
+struct hash
+{
+ /*
+ * the hash table, just an array of item pointers
+ */
+ struct
+ {
+ int nel;
+ ITEM * * base;
+ } tab;
+
+ int bloat; /* tab.nel / items.nel */
+ int inel; /* initial number of elements */
+
+ /*
+ * the array of records, maintained by these routines - essentially a
+ * microallocator
+ */
+ struct
+ {
+ int more; /* how many more ITEMs fit in lists[ list ] */
+ ITEM * free; /* free list of items */
+ char * next; /* where to put more ITEMs in lists[ list ] */
+ int size; /* sizeof( ITEM ) + aligned datalen */
+ int nel; /* total ITEMs held by all lists[] */
+ int list; /* index into lists[] */
+
+ struct
+ {
+ int nel; /* total ITEMs held by this list */
+ char * base; /* base of ITEMs array */
+ } lists[ MAX_LISTS ];
+ } items;
+
+ char const * name; /* just for hashstats() */
+};
+
+static void hashrehash( struct hash * );
+static void hashstat( struct hash * );
+
+static unsigned int hash_keyval( OBJECT * key )
+{
+ return object_hash( key );
+}
+
+#define hash_bucket(hp, keyval) ((hp)->tab.base + ((keyval) % (hp)->tab.nel))
+
+#define hash_data_key(data) (*(OBJECT * *)(data))
+#define hash_item_data(item) ((HASHDATA *)((char *)item + sizeof(ITEM)))
+#define hash_item_key(item) (hash_data_key(hash_item_data(item)))
+
+
+#define ALIGNED(x) ((x + sizeof(ITEM) - 1) & ~(sizeof(ITEM) - 1))
+
+/*
+ * hashinit() - initialize a hash table, returning a handle
+ */
+
+struct hash * hashinit( int datalen, char const * name )
+{
+ struct hash * hp = (struct hash *)BJAM_MALLOC( sizeof( *hp ) );
+
+ hp->bloat = 3;
+ hp->tab.nel = 0;
+ hp->tab.base = 0;
+ hp->items.more = 0;
+ hp->items.free = 0;
+ hp->items.size = sizeof( ITEM ) + ALIGNED( datalen );
+ hp->items.list = -1;
+ hp->items.nel = 0;
+ hp->inel = 11; /* 47 */
+ hp->name = name;
+
+ return hp;
+}
+
+
+/*
+ * hash_search() - Find the hash item for the given data.
+ *
+ * Returns a pointer to a hashed item with the given key. If given a 'previous'
+ * pointer, makes it point to the item prior to the found item in the same
+ * bucket or to 0 if our item is the first item in its bucket.
+ */
+
+static ITEM * hash_search( struct hash * hp, unsigned int keyval,
+ OBJECT * keydata, ITEM * * previous )
+{
+ ITEM * i = *hash_bucket( hp, keyval );
+ ITEM * p = 0;
+ for ( ; i; i = i->next )
+ {
+ if ( object_equal( hash_item_key( i ), keydata ) )
+ {
+ if ( previous )
+ *previous = p;
+ return i;
+ }
+ p = i;
+ }
+ return 0;
+}
+
+
+/*
+ * hash_insert() - insert a record in the table or return the existing one
+ */
+
+HASHDATA * hash_insert( struct hash * hp, OBJECT * key, int * found )
+{
+ ITEM * i;
+ unsigned int keyval = hash_keyval( key );
+
+ #ifdef HASH_DEBUG_PROFILE
+ profile_frame prof[ 1 ];
+ if ( DEBUG_PROFILE )
+ profile_enter( 0, prof );
+ #endif
+
+ if ( !hp->items.more )
+ hashrehash( hp );
+
+ i = hash_search( hp, keyval, key, 0 );
+ if ( i )
+ *found = 1;
+ else
+ {
+ ITEM * * base = hash_bucket( hp, keyval );
+
+ /* Try to grab one from the free list. */
+ if ( hp->items.free )
+ {
+ i = hp->items.free;
+ hp->items.free = i->next;
+ assert( !hash_item_key( i ) );
+ }
+ else
+ {
+ i = (ITEM *)hp->items.next;
+ hp->items.next += hp->items.size;
+ }
+ --hp->items.more;
+ i->next = *base;
+ *base = i;
+ *found = 0;
+ }
+
+ #ifdef HASH_DEBUG_PROFILE
+ if ( DEBUG_PROFILE )
+ profile_exit( prof );
+ #endif
+
+ return hash_item_data( i );
+}
+
+
+/*
+ * hash_find() - find a record in the table or NULL if none exists
+ */
+
+HASHDATA * hash_find( struct hash * hp, OBJECT * key )
+{
+ ITEM * i;
+ unsigned int keyval = hash_keyval( key );
+
+ #ifdef HASH_DEBUG_PROFILE
+ profile_frame prof[ 1 ];
+ if ( DEBUG_PROFILE )
+ profile_enter( 0, prof );
+ #endif
+
+ if ( !hp->items.nel )
+ {
+ #ifdef HASH_DEBUG_PROFILE
+ if ( DEBUG_PROFILE )
+ profile_exit( prof );
+ #endif
+ return 0;
+ }
+
+ i = hash_search( hp, keyval, key, 0 );
+
+ #ifdef HASH_DEBUG_PROFILE
+ if ( DEBUG_PROFILE )
+ profile_exit( prof );
+ #endif
+
+ return i ? hash_item_data( i ) : 0;
+}
+
+
+/*
+ * hashrehash() - resize and rebuild hp->tab, the hash table
+ */
+
+static void hashrehash( struct hash * hp )
+{
+ int i = ++hp->items.list;
+ hp->items.more = i ? 2 * hp->items.nel : hp->inel;
+ hp->items.next = (char *)BJAM_MALLOC( hp->items.more * hp->items.size );
+ hp->items.free = 0;
+
+ hp->items.lists[ i ].nel = hp->items.more;
+ hp->items.lists[ i ].base = hp->items.next;
+ hp->items.nel += hp->items.more;
+
+ if ( hp->tab.base )
+ BJAM_FREE( (char *)hp->tab.base );
+
+ hp->tab.nel = hp->items.nel * hp->bloat;
+ hp->tab.base = (ITEM * *)BJAM_MALLOC( hp->tab.nel * sizeof( ITEM * * ) );
+
+ memset( (char *)hp->tab.base, '\0', hp->tab.nel * sizeof( ITEM * ) );
+
+ for ( i = 0; i < hp->items.list; ++i )
+ {
+ int nel = hp->items.lists[ i ].nel;
+ char * next = hp->items.lists[ i ].base;
+
+ for ( ; nel--; next += hp->items.size )
+ {
+ ITEM * i = (ITEM *)next;
+ ITEM * * ip = hp->tab.base + object_hash( hash_item_key( i ) ) %
+ hp->tab.nel;
+ /* code currently assumes rehashing only when there are no free
+ * items
+ */
+ assert( hash_item_key( i ) );
+
+ i->next = *ip;
+ *ip = i;
+ }
+ }
+}
+
+
+void hashenumerate( struct hash * hp, void (* f)( void *, void * ), void * data
+ )
+{
+ int i;
+ for ( i = 0; i <= hp->items.list; ++i )
+ {
+ char * next = hp->items.lists[ i ].base;
+ int nel = hp->items.lists[ i ].nel;
+ if ( i == hp->items.list )
+ nel -= hp->items.more;
+
+ for ( ; nel--; next += hp->items.size )
+ {
+ ITEM * const i = (ITEM *)next;
+ if ( hash_item_key( i ) != 0 ) /* Do not enumerate freed items. */
+ f( hash_item_data( i ), data );
+ }
+ }
+}
+
+
+/*
+ * hash_free() - free a hash table, given its handle
+ */
+
+void hash_free( struct hash * hp )
+{
+ int i;
+ if ( !hp )
+ return;
+ if ( hp->tab.base )
+ BJAM_FREE( (char *)hp->tab.base );
+ for ( i = 0; i <= hp->items.list; ++i )
+ BJAM_FREE( hp->items.lists[ i ].base );
+ BJAM_FREE( (char *)hp );
+}
+
+
+static void hashstat( struct hash * hp )
+{
+ struct hashstats stats[ 1 ];
+ hashstats_init( stats );
+ hashstats_add( stats, hp );
+ hashstats_print( stats, hp->name );
+}
+
+
+void hashstats_init( struct hashstats * stats )
+{
+ stats->count = 0;
+ stats->num_items = 0;
+ stats->tab_size = 0;
+ stats->item_size = 0;
+ stats->sets = 0;
+ stats->num_hashes = 0;
+}
+
+
+void hashstats_add( struct hashstats * stats, struct hash * hp )
+{
+ if ( hp )
+ {
+ ITEM * * tab = hp->tab.base;
+ int nel = hp->tab.nel;
+ int count = 0;
+ int sets = 0;
+ int i;
+
+ for ( i = 0; i < nel; ++i )
+ {
+ ITEM * item;
+ int here = 0;
+ for ( item = tab[ i ]; item; item = item->next )
+ ++here;
+
+ count += here;
+ if ( here > 0 )
+ ++sets;
+ }
+
+ stats->count += count;
+ stats->sets += sets;
+ stats->num_items += hp->items.nel;
+ stats->tab_size += hp->tab.nel;
+ stats->item_size = hp->items.size;
+ ++stats->num_hashes;
+ }
+}
+
+
+void hashstats_print( struct hashstats * stats, char const * name )
+{
+ printf( "%s table: %d+%d+%d (%dK+%luK+%luK) items+table+hash, %f density\n",
+ name,
+ stats->count,
+ stats->num_items,
+ stats->tab_size,
+ stats->num_items * stats->item_size / 1024,
+ (long unsigned)stats->tab_size * sizeof( ITEM * * ) / 1024,
+ (long unsigned)stats->num_hashes * sizeof( struct hash ) / 1024,
+ (float)stats->count / (float)stats->sets );
+}
+
+
+void hashdone( struct hash * hp )
+{
+ if ( !hp )
+ return;
+ if ( DEBUG_MEM || DEBUG_PROFILE )
+ hashstat( hp );
+ hash_free( hp );
+}
diff --git a/src/kenlm/jam-files/engine/hash.h b/src/kenlm/jam-files/engine/hash.h
new file mode 100644
index 0000000..7c40e8c
--- /dev/null
+++ b/src/kenlm/jam-files/engine/hash.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * hash.h - simple in-memory hashing routines
+ */
+
+#ifndef BOOST_JAM_HASH_H
+#define BOOST_JAM_HASH_H
+
+#include "object.h"
+
+/*
+ * An opaque struct representing an item in the hash table. The first element of
+ * every struct stored in the table must be an OBJECT * which is treated as the
+ * key.
+ */
+typedef struct hashdata HASHDATA;
+
+/*
+ * hashinit() - initialize a hash table, returning a handle.
+ *
+ * Parameters:
+ * datalen - item size
+ * name - used for debugging
+ */
+struct hash * hashinit( int datalen, char const * name );
+
+/*
+ * hash_free() - free a hash table, given its handle
+ */
+void hash_free( struct hash * );
+void hashdone( struct hash * );
+
+/*
+ * hashenumerate() - call f(i, data) on each item, i in the hash table. The
+ * enumeration order is unspecified.
+ */
+void hashenumerate( struct hash *, void (* f)( void *, void * ), void * data );
+
+/*
+ * hash_insert() - insert a new item in a hash table, or return an existing one.
+ *
+ * Preconditions:
+ * - hp must be a hash table created by hashinit()
+ * - key must be an object created by object_new()
+ *
+ * Postconditions:
+ * - if the key does not already exist in the hash table, *found == 0 and the
+ * result will be a pointer to an uninitialized item. The key of the new
+ * item must be set to a value equal to key before any further operations on
+ * the hash table except hashdone().
+ * - if the key is present then *found == 1 and the result is a pointer to the
+ * existing record.
+ */
+HASHDATA * hash_insert( struct hash *, OBJECT * key, int * found );
+
+/*
+ * hash_find() - find a record in the table or NULL if none exists
+ */
+HASHDATA * hash_find( struct hash *, OBJECT * key );
+
+struct hashstats {
+ int count;
+ int num_items;
+ int tab_size;
+ int item_size;
+ int sets;
+ int num_hashes;
+};
+
+void hashstats_init( struct hashstats * stats );
+void hashstats_add( struct hashstats * stats, struct hash * );
+void hashstats_print( struct hashstats * stats, char const * name );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/hcache.c b/src/kenlm/jam-files/engine/hcache.c
new file mode 100644
index 0000000..3cf15f7
--- /dev/null
+++ b/src/kenlm/jam-files/engine/hcache.c
@@ -0,0 +1,519 @@
+/*
+ * This file has been donated to Jam.
+ */
+
+/*
+ * Craig W. McPheeters, Alias|Wavefront.
+ *
+ * hcache.c hcache.h - handle cacheing of #includes in source files.
+ *
+ * Create a cache of files scanned for headers. When starting jam, look for the
+ * cache file and load it if present. When finished the binding phase, create a
+ * new header cache. The cache contains files, their timestamps and the header
+ * files found in their scan. During the binding phase of jam, look in the
+ * header cache first for the headers contained in a file. If the cache is
+ * present and valid, use its contents. This results in dramatic speedups with
+ * large projects (e.g. 3min -> 1min startup for one project.)
+ *
+ * External routines:
+ * hcache_init() - read and parse the local .jamdeps file.
+ * hcache_done() - write a new .jamdeps file.
+ * hcache() - return list of headers on target. Use cache or do a scan.
+ *
+ * The dependency file format is an ASCII file with 1 line per target. Each line
+ * has the following fields:
+ * @boundname@ timestamp_sec timestamp_nsec @file@ @file@ @file@ ...
+ */
+
+#ifdef OPT_HEADER_CACHE_EXT
+
+#include "jam.h"
+#include "hcache.h"
+
+#include "hash.h"
+#include "headers.h"
+#include "lists.h"
+#include "modules.h"
+#include "object.h"
+#include "parse.h"
+#include "regexp.h"
+#include "rules.h"
+#include "search.h"
+#include "timestamp.h"
+#include "variable.h"
+
+typedef struct hcachedata HCACHEDATA ;
+
+struct hcachedata
+{
+ OBJECT * boundname;
+ timestamp time;
+ LIST * includes;
+ LIST * hdrscan; /* the HDRSCAN value for this target */
+ int age; /* if too old, we will remove it from cache */
+ HCACHEDATA * next;
+};
+
+
+static struct hash * hcachehash = 0;
+static HCACHEDATA * hcachelist = 0;
+
+static int queries = 0;
+static int hits = 0;
+
+#define CACHE_FILE_VERSION "version 5"
+#define CACHE_RECORD_HEADER "header"
+#define CACHE_RECORD_END "end"
+
+
+/*
+ * Return the name of the header cache file. May return NULL.
+ *
+ * The user sets this by setting the HCACHEFILE variable in a Jamfile. We cache
+ * the result so the user can not change the cache file during header scanning.
+ */
+
+static const char * cache_name( void )
+{
+ static OBJECT * name = 0;
+ if ( !name )
+ {
+ LIST * const hcachevar = var_get( root_module(), constant_HCACHEFILE );
+
+ if ( !list_empty( hcachevar ) )
+ {
+ TARGET * const t = bindtarget( list_front( hcachevar ) );
+
+ pushsettings( root_module(), t->settings );
+ /* Do not expect the cache file to be generated, so pass 0 as the
+ * third argument to search. Expect the location to be specified via
+ * LOCATE, so pass 0 as the fourth arugment.
+ */
+ object_free( t->boundname );
+ t->boundname = search( t->name, &t->time, 0, 0 );
+ popsettings( root_module(), t->settings );
+
+ name = object_copy( t->boundname );
+ }
+ }
+ return name ? object_str( name ) : 0;
+}
+
+
+/*
+ * Return the maximum age a cache entry can have before it is purged from the
+ * cache.
+ */
+
+static int cache_maxage( void )
+{
+ int age = 100;
+ LIST * const var = var_get( root_module(), constant_HCACHEMAXAGE );
+ if ( !list_empty( var ) )
+ {
+ age = atoi( object_str( list_front( var ) ) );
+ if ( age < 0 )
+ age = 0;
+ }
+ return age;
+}
+
+
+/*
+ * Read a netstring. The caveat is that the string can not contain ASCII 0. The
+ * returned value is as returned by object_new().
+ */
+
+OBJECT * read_netstring( FILE * f )
+{
+ unsigned long len;
+ static char * buf = NULL;
+ static unsigned long buf_len = 0;
+
+ if ( fscanf( f, " %9lu", &len ) != 1 )
+ return NULL;
+ if ( fgetc( f ) != (int)'\t' )
+ return NULL;
+
+ if ( len > 1024 * 64 )
+ return NULL; /* sanity check */
+
+ if ( len > buf_len )
+ {
+ unsigned long new_len = buf_len * 2;
+ if ( new_len < len )
+ new_len = len;
+ buf = (char *)BJAM_REALLOC( buf, new_len + 1 );
+ if ( buf )
+ buf_len = new_len;
+ }
+
+ if ( !buf )
+ return NULL;
+
+ if ( fread( buf, 1, len, f ) != len )
+ return NULL;
+ if ( fgetc( f ) != (int)'\n' )
+ return NULL;
+
+ buf[ len ] = 0;
+ return object_new( buf );
+}
+
+
+/*
+ * Write a netstring.
+ */
+
+void write_netstring( FILE * f, char const * s )
+{
+ if ( !s )
+ s = "";
+ fprintf( f, "%lu\t%s\n", (long unsigned)strlen( s ), s );
+}
+
+
+void hcache_init()
+{
+ FILE * f;
+ OBJECT * version = 0;
+ int header_count = 0;
+ const char * hcachename;
+
+ if ( hcachehash )
+ return;
+
+ hcachehash = hashinit( sizeof( HCACHEDATA ), "hcache" );
+
+ if ( !( hcachename = cache_name() ) )
+ return;
+
+ if ( !( f = fopen( hcachename, "rb" ) ) )
+ return;
+
+ version = read_netstring( f );
+
+ if ( !version || strcmp( object_str( version ), CACHE_FILE_VERSION ) )
+ goto bail;
+
+ while ( 1 )
+ {
+ HCACHEDATA cachedata;
+ HCACHEDATA * c;
+ OBJECT * record_type = 0;
+ OBJECT * time_secs_str = 0;
+ OBJECT * time_nsecs_str = 0;
+ OBJECT * age_str = 0;
+ OBJECT * includes_count_str = 0;
+ OBJECT * hdrscan_count_str = 0;
+ int i;
+ int count;
+ LIST * l;
+ int found;
+
+ cachedata.boundname = 0;
+ cachedata.includes = 0;
+ cachedata.hdrscan = 0;
+
+ record_type = read_netstring( f );
+ if ( !record_type )
+ {
+ fprintf( stderr, "invalid %s\n", hcachename );
+ goto cleanup;
+ }
+ if ( !strcmp( object_str( record_type ), CACHE_RECORD_END ) )
+ {
+ object_free( record_type );
+ break;
+ }
+ if ( strcmp( object_str( record_type ), CACHE_RECORD_HEADER ) )
+ {
+ fprintf( stderr, "invalid %s with record separator <%s>\n",
+ hcachename, record_type ? object_str( record_type ) : "<null>" );
+ goto cleanup;
+ }
+
+ cachedata.boundname = read_netstring( f );
+ time_secs_str = read_netstring( f );
+ time_nsecs_str = read_netstring( f );
+ age_str = read_netstring( f );
+ includes_count_str = read_netstring( f );
+
+ if ( !cachedata.boundname || !time_secs_str || !time_nsecs_str ||
+ !age_str || !includes_count_str )
+ {
+ fprintf( stderr, "invalid %s\n", hcachename );
+ goto cleanup;
+ }
+
+ timestamp_init( &cachedata.time, atoi( object_str( time_secs_str ) ),
+ atoi( object_str( time_nsecs_str ) ) );
+ cachedata.age = atoi( object_str( age_str ) ) + 1;
+
+ count = atoi( object_str( includes_count_str ) );
+ for ( l = L0, i = 0; i < count; ++i )
+ {
+ OBJECT * const s = read_netstring( f );
+ if ( !s )
+ {
+ fprintf( stderr, "invalid %s\n", hcachename );
+ list_free( l );
+ goto cleanup;
+ }
+ l = list_push_back( l, s );
+ }
+ cachedata.includes = l;
+
+ hdrscan_count_str = read_netstring( f );
+ if ( !hdrscan_count_str )
+ {
+ fprintf( stderr, "invalid %s\n", hcachename );
+ goto cleanup;
+ }
+
+ count = atoi( object_str( hdrscan_count_str ) );
+ for ( l = L0, i = 0; i < count; ++i )
+ {
+ OBJECT * const s = read_netstring( f );
+ if ( !s )
+ {
+ fprintf( stderr, "invalid %s\n", hcachename );
+ list_free( l );
+ goto cleanup;
+ }
+ l = list_push_back( l, s );
+ }
+ cachedata.hdrscan = l;
+
+ c = (HCACHEDATA *)hash_insert( hcachehash, cachedata.boundname, &found )
+ ;
+ if ( !found )
+ {
+ c->boundname = cachedata.boundname;
+ c->includes = cachedata.includes;
+ c->hdrscan = cachedata.hdrscan;
+ c->age = cachedata.age;
+ timestamp_copy( &c->time, &cachedata.time );
+ }
+ else
+ {
+ fprintf( stderr, "can not insert header cache item, bailing on %s"
+ "\n", hcachename );
+ goto cleanup;
+ }
+
+ c->next = hcachelist;
+ hcachelist = c;
+
+ ++header_count;
+
+ object_free( record_type );
+ object_free( time_secs_str );
+ object_free( time_nsecs_str );
+ object_free( age_str );
+ object_free( includes_count_str );
+ object_free( hdrscan_count_str );
+ continue;
+
+cleanup:
+
+ if ( record_type ) object_free( record_type );
+ if ( time_secs_str ) object_free( time_secs_str );
+ if ( time_nsecs_str ) object_free( time_nsecs_str );
+ if ( age_str ) object_free( age_str );
+ if ( includes_count_str ) object_free( includes_count_str );
+ if ( hdrscan_count_str ) object_free( hdrscan_count_str );
+
+ if ( cachedata.boundname ) object_free( cachedata.boundname );
+ if ( cachedata.includes ) list_free( cachedata.includes );
+ if ( cachedata.hdrscan ) list_free( cachedata.hdrscan );
+
+ goto bail;
+ }
+
+ if ( DEBUG_HEADER )
+ printf( "hcache read from file %s\n", hcachename );
+
+bail:
+ if ( version )
+ object_free( version );
+ fclose( f );
+}
+
+
+void hcache_done()
+{
+ FILE * f;
+ HCACHEDATA * c;
+ int header_count = 0;
+ const char * hcachename;
+ int maxage;
+
+ if ( !hcachehash )
+ return;
+
+ if ( !( hcachename = cache_name() ) )
+ goto cleanup;
+
+ if ( !( f = fopen( hcachename, "wb" ) ) )
+ goto cleanup;
+
+ maxage = cache_maxage();
+
+ /* Print out the version. */
+ write_netstring( f, CACHE_FILE_VERSION );
+
+ c = hcachelist;
+ for ( c = hcachelist; c; c = c->next )
+ {
+ LISTITER iter;
+ LISTITER end;
+ char time_secs_str[ 30 ];
+ char time_nsecs_str[ 30 ];
+ char age_str[ 30 ];
+ char includes_count_str[ 30 ];
+ char hdrscan_count_str[ 30 ];
+
+ if ( maxage == 0 )
+ c->age = 0;
+ else if ( c->age > maxage )
+ continue;
+
+ sprintf( includes_count_str, "%lu", (long unsigned)list_length(
+ c->includes ) );
+ sprintf( hdrscan_count_str, "%lu", (long unsigned)list_length(
+ c->hdrscan ) );
+ sprintf( time_secs_str, "%lu", (long unsigned)c->time.secs );
+ sprintf( time_nsecs_str, "%lu", (long unsigned)c->time.nsecs );
+ sprintf( age_str, "%lu", (long unsigned)c->age );
+
+ write_netstring( f, CACHE_RECORD_HEADER );
+ write_netstring( f, object_str( c->boundname ) );
+ write_netstring( f, time_secs_str );
+ write_netstring( f, time_nsecs_str );
+ write_netstring( f, age_str );
+ write_netstring( f, includes_count_str );
+ for ( iter = list_begin( c->includes ), end = list_end( c->includes );
+ iter != end; iter = list_next( iter ) )
+ write_netstring( f, object_str( list_item( iter ) ) );
+ write_netstring( f, hdrscan_count_str );
+ for ( iter = list_begin( c->hdrscan ), end = list_end( c->hdrscan );
+ iter != end; iter = list_next( iter ) )
+ write_netstring( f, object_str( list_item( iter ) ) );
+ fputs( "\n", f );
+ ++header_count;
+ }
+ write_netstring( f, CACHE_RECORD_END );
+
+ if ( DEBUG_HEADER )
+ printf( "hcache written to %s. %d dependencies, %.0f%% hit rate\n",
+ hcachename, header_count, queries ? 100.0 * hits / queries : 0 );
+
+ fclose ( f );
+
+cleanup:
+ for ( c = hcachelist; c; c = c->next )
+ {
+ list_free( c->includes );
+ list_free( c->hdrscan );
+ object_free( c->boundname );
+ }
+
+ hcachelist = 0;
+ if ( hcachehash )
+ hashdone( hcachehash );
+ hcachehash = 0;
+}
+
+
+LIST * hcache( TARGET * t, int rec, regexp * re[], LIST * hdrscan )
+{
+ HCACHEDATA * c;
+
+ ++queries;
+
+ if ( ( c = (HCACHEDATA *)hash_find( hcachehash, t->boundname ) ) )
+ {
+ if ( !timestamp_cmp( &c->time, &t->time ) )
+ {
+ LIST * const l1 = hdrscan;
+ LIST * const l2 = c->hdrscan;
+ LISTITER iter1 = list_begin( l1 );
+ LISTITER const end1 = list_end( l1 );
+ LISTITER iter2 = list_begin( l2 );
+ LISTITER const end2 = list_end( l2 );
+ while ( iter1 != end1 && iter2 != end2 )
+ {
+ if ( !object_equal( list_item( iter1 ), list_item( iter2 ) ) )
+ iter1 = end1;
+ else
+ {
+ iter1 = list_next( iter1 );
+ iter2 = list_next( iter2 );
+ }
+ }
+ if ( iter1 != end1 || iter2 != end2 )
+ {
+ if ( DEBUG_HEADER )
+ {
+ printf( "HDRSCAN out of date in cache for %s\n",
+ object_str( t->boundname ) );
+ printf(" real : ");
+ list_print( hdrscan );
+ printf( "\n cached: " );
+ list_print( c->hdrscan );
+ printf( "\n" );
+ }
+
+ list_free( c->includes );
+ list_free( c->hdrscan );
+ c->includes = L0;
+ c->hdrscan = L0;
+ }
+ else
+ {
+ if ( DEBUG_HEADER )
+ printf( "using header cache for %s\n", object_str(
+ t->boundname ) );
+ c->age = 0;
+ ++hits;
+ return list_copy( c->includes );
+ }
+ }
+ else
+ {
+ if ( DEBUG_HEADER )
+ printf ("header cache out of date for %s\n", object_str(
+ t->boundname ) );
+ list_free( c->includes );
+ list_free( c->hdrscan );
+ c->includes = L0;
+ c->hdrscan = L0;
+ }
+ }
+ else
+ {
+ int found;
+ c = (HCACHEDATA *)hash_insert( hcachehash, t->boundname, &found );
+ if ( !found )
+ {
+ c->boundname = object_copy( t->boundname );
+ c->next = hcachelist;
+ hcachelist = c;
+ }
+ }
+
+ /* 'c' points at the cache entry. Its out of date. */
+ {
+ LIST * const l = headers1( L0, t->boundname, rec, re );
+
+ timestamp_copy( &c->time, &t->time );
+ c->age = 0;
+ c->includes = list_copy( l );
+ c->hdrscan = list_copy( hdrscan );
+
+ return l;
+ }
+}
+
+#endif /* OPT_HEADER_CACHE_EXT */
diff --git a/src/kenlm/jam-files/engine/hcache.h b/src/kenlm/jam-files/engine/hcache.h
new file mode 100644
index 0000000..a9d929d
--- /dev/null
+++ b/src/kenlm/jam-files/engine/hcache.h
@@ -0,0 +1,19 @@
+/*
+ * This file is not part of Jam
+ */
+
+/*
+ * hcache.h - handle #includes in source files
+ */
+#ifndef HCACHE_H
+#define HCACHE_H
+
+#include "lists.h"
+#include "regexp.h"
+#include "rules.h"
+
+void hcache_init( void );
+void hcache_done( void );
+LIST * hcache( TARGET * t, int rec, regexp * re[], LIST * hdrscan );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/hdrmacro.c b/src/kenlm/jam-files/engine/hdrmacro.c
new file mode 100644
index 0000000..eb4fe90
--- /dev/null
+++ b/src/kenlm/jam-files/engine/hdrmacro.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * hdrmacro.c - handle header files that define macros used in #include
+ * statements.
+ *
+ * we look for lines like "#define MACRO <....>" or '#define MACRO " "' in
+ * the target file. When found, we then phony up a rule invocation like:
+ *
+ * $(HDRRULE) <target> : <resolved included files> ;
+ *
+ * External routines:
+ * headers1() - scan a target for "#include MACRO" lines and try to resolve
+ * them when needed
+ *
+ * Internal routines:
+ * headers1() - using regexp, scan a file and build include LIST
+ */
+
+#include "jam.h"
+#include "hdrmacro.h"
+
+#include "compile.h"
+#include "hash.h"
+#include "lists.h"
+#include "object.h"
+#include "parse.h"
+#include "rules.h"
+#include "strings.h"
+#include "subst.h"
+#include "variable.h"
+
+
+/* this type is used to store a dictionary of file header macros */
+typedef struct header_macro
+{
+ OBJECT * symbol;
+ OBJECT * filename; /* we could maybe use a LIST here ?? */
+} HEADER_MACRO;
+
+static struct hash * header_macros_hash = 0;
+
+
+/*
+ * headers() - scan a target for include files and call HDRRULE
+ */
+
+#define MAXINC 10
+
+void macro_headers( TARGET * t )
+{
+ static regexp * re = 0;
+ FILE * f;
+ char buf[ 1024 ];
+
+ if ( DEBUG_HEADER )
+ printf( "macro header scan for %s\n", object_str( t->name ) );
+
+ /* This regexp is used to detect lines of the form
+ * "#define MACRO <....>" or "#define MACRO "....."
+ * in the header macro files.
+ */
+ if ( !re )
+ {
+ OBJECT * const re_str = object_new(
+ "^[ ]*#[ ]*define[ ]*([A-Za-z][A-Za-z0-9_]*)[ ]*"
+ "[<\"]([^\">]*)[\">].*$" );
+ re = regex_compile( re_str );
+ object_free( re_str );
+ }
+
+ if ( !( f = fopen( object_str( t->boundname ), "r" ) ) )
+ return;
+
+ while ( fgets( buf, sizeof( buf ), f ) )
+ {
+ HEADER_MACRO var;
+ HEADER_MACRO * v = &var;
+
+ if ( regexec( re, buf ) && re->startp[ 1 ] )
+ {
+ OBJECT * symbol;
+ int found;
+ /* we detected a line that looks like "#define MACRO filename */
+ ( (char *)re->endp[ 1 ] )[ 0 ] = '\0';
+ ( (char *)re->endp[ 2 ] )[ 0 ] = '\0';
+
+ if ( DEBUG_HEADER )
+ printf( "macro '%s' used to define filename '%s' in '%s'\n",
+ re->startp[ 1 ], re->startp[ 2 ], object_str( t->boundname )
+ );
+
+ /* add macro definition to hash table */
+ if ( !header_macros_hash )
+ header_macros_hash = hashinit( sizeof( HEADER_MACRO ),
+ "hdrmacros" );
+
+ symbol = object_new( re->startp[ 1 ] );
+ v = (HEADER_MACRO *)hash_insert( header_macros_hash, symbol, &found
+ );
+ if ( !found )
+ {
+ v->symbol = symbol;
+ v->filename = object_new( re->startp[ 2 ] ); /* never freed */
+ }
+ else
+ object_free( symbol );
+ /* XXXX: FOR NOW, WE IGNORE MULTIPLE MACRO DEFINITIONS !! */
+ /* WE MIGHT AS WELL USE A LIST TO STORE THEM.. */
+ }
+ }
+
+ fclose( f );
+}
+
+
+OBJECT * macro_header_get( OBJECT * macro_name )
+{
+ HEADER_MACRO * v;
+ if ( header_macros_hash && ( v = (HEADER_MACRO *)hash_find(
+ header_macros_hash, macro_name ) ) )
+ {
+ if ( DEBUG_HEADER )
+ printf( "### macro '%s' evaluated to '%s'\n", object_str( macro_name
+ ), object_str( v->filename ) );
+ return v->filename;
+ }
+ return 0;
+}
diff --git a/src/kenlm/jam-files/engine/hdrmacro.h b/src/kenlm/jam-files/engine/hdrmacro.h
new file mode 100644
index 0000000..7595ede
--- /dev/null
+++ b/src/kenlm/jam-files/engine/hdrmacro.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * hdrmacro.h - parses header files for #define MACRO <filename> or
+ * #define MACRO "filename" definitions
+ */
+
+#ifndef HDRMACRO_SW20111118_H
+#define HDRMACRO_SW20111118_H
+
+#include "object.h"
+#include "rules.h"
+
+void macro_headers( TARGET * );
+OBJECT * macro_header_get( OBJECT * macro_name );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/headers.c b/src/kenlm/jam-files/engine/headers.c
new file mode 100644
index 0000000..0d9558d
--- /dev/null
+++ b/src/kenlm/jam-files/engine/headers.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * headers.c - handle #includes in source files
+ *
+ * Using regular expressions provided as the variable $(HDRSCAN), headers()
+ * searches a file for #include files and phonies up a rule invocation:
+ * $(HDRRULE) <target> : <include files> ;
+ *
+ * External routines:
+ * headers() - scan a target for include files and call HDRRULE
+ *
+ * Internal routines:
+ * headers1() - using regexp, scan a file and build include LIST
+ */
+
+#include "jam.h"
+#include "headers.h"
+
+#include "compile.h"
+#include "hdrmacro.h"
+#include "lists.h"
+#include "modules.h"
+#include "object.h"
+#include "parse.h"
+#include "rules.h"
+#include "subst.h"
+#include "variable.h"
+
+#ifdef OPT_HEADER_CACHE_EXT
+# include "hcache.h"
+#endif
+
+#ifndef OPT_HEADER_CACHE_EXT
+static LIST * headers1( LIST *, OBJECT * file, int rec, regexp * re[] );
+#endif
+
+
+/*
+ * headers() - scan a target for include files and call HDRRULE
+ */
+
+#define MAXINC 10
+
+void headers( TARGET * t )
+{
+ LIST * hdrscan;
+ LIST * hdrrule;
+ #ifndef OPT_HEADER_CACHE_EXT
+ LIST * headlist = L0;
+ #endif
+ regexp * re[ MAXINC ];
+ int rec = 0;
+ LISTITER iter;
+ LISTITER end;
+
+ hdrscan = var_get( root_module(), constant_HDRSCAN );
+ if ( list_empty( hdrscan ) )
+ return;
+
+ hdrrule = var_get( root_module(), constant_HDRRULE );
+ if ( list_empty( hdrrule ) )
+ return;
+
+ if ( DEBUG_HEADER )
+ printf( "header scan %s\n", object_str( t->name ) );
+
+ /* Compile all regular expressions in HDRSCAN */
+ iter = list_begin( hdrscan );
+ end = list_end( hdrscan );
+ for ( ; ( rec < MAXINC ) && iter != end; iter = list_next( iter ) )
+ {
+ re[ rec++ ] = regex_compile( list_item( iter ) );
+ }
+
+ /* Doctor up call to HDRRULE rule */
+ /* Call headers1() to get LIST of included files. */
+ {
+ FRAME frame[ 1 ];
+ frame_init( frame );
+ lol_add( frame->args, list_new( object_copy( t->name ) ) );
+#ifdef OPT_HEADER_CACHE_EXT
+ lol_add( frame->args, hcache( t, rec, re, hdrscan ) );
+#else
+ lol_add( frame->args, headers1( headlist, t->boundname, rec, re ) );
+#endif
+
+ if ( lol_get( frame->args, 1 ) )
+ {
+ OBJECT * rulename = list_front( hdrrule );
+ /* The third argument to HDRRULE is the bound name of $(<). */
+ lol_add( frame->args, list_new( object_copy( t->boundname ) ) );
+ list_free( evaluate_rule( bindrule( rulename, frame->module ), rulename, frame ) );
+ }
+
+ /* Clean up. */
+ frame_free( frame );
+ }
+}
+
+
+/*
+ * headers1() - using regexp, scan a file and build include LIST.
+ */
+
+#ifndef OPT_HEADER_CACHE_EXT
+static
+#endif
+LIST * headers1( LIST * l, OBJECT * file, int rec, regexp * re[] )
+{
+ FILE * f;
+ char buf[ 1024 ];
+ int i;
+ static regexp * re_macros = 0;
+
+#ifdef OPT_IMPROVED_PATIENCE_EXT
+ static int count = 0;
+ ++count;
+ if ( ( ( count == 100 ) || !( count % 1000 ) ) && DEBUG_MAKE )
+ {
+ printf( "...patience...\n" );
+ fflush( stdout );
+ }
+#endif
+
+ /* The following regexp is used to detect cases where a file is included
+ * through a line like "#include MACRO".
+ */
+ if ( re_macros == 0 )
+ {
+ OBJECT * const re_str = object_new(
+ "#[ \t]*include[ \t]*([A-Za-z][A-Za-z0-9_]*).*$" );
+ re_macros = regex_compile( re_str );
+ object_free( re_str );
+ }
+
+ if ( !( f = fopen( object_str( file ), "r" ) ) )
+ return l;
+
+ while ( fgets( buf, sizeof( buf ), f ) )
+ {
+ for ( i = 0; i < rec; ++i )
+ if ( regexec( re[ i ], buf ) && re[ i ]->startp[ 1 ] )
+ {
+ ( (char *)re[ i ]->endp[ 1 ] )[ 0 ] = '\0';
+ if ( DEBUG_HEADER )
+ printf( "header found: %s\n", re[ i ]->startp[ 1 ] );
+ l = list_push_back( l, object_new( re[ i ]->startp[ 1 ] ) );
+ }
+
+ /* Special treatment for #include MACRO. */
+ if ( regexec( re_macros, buf ) && re_macros->startp[ 1 ] )
+ {
+ OBJECT * header_filename;
+ OBJECT * macro_name;
+
+ ( (char *)re_macros->endp[ 1 ] )[ 0 ] = '\0';
+
+ if ( DEBUG_HEADER )
+ printf( "macro header found: %s", re_macros->startp[ 1 ] );
+
+ macro_name = object_new( re_macros->startp[ 1 ] );
+ header_filename = macro_header_get( macro_name );
+ object_free( macro_name );
+ if ( header_filename )
+ {
+ if ( DEBUG_HEADER )
+ printf( " resolved to '%s'\n", object_str( header_filename )
+ );
+ l = list_push_back( l, object_copy( header_filename ) );
+ }
+ else
+ {
+ if ( DEBUG_HEADER )
+ printf( " ignored !!\n" );
+ }
+ }
+ }
+
+ fclose( f );
+ return l;
+}
+
+
+void regerror( char const * s )
+{
+ printf( "re error %s\n", s );
+}
diff --git a/src/kenlm/jam-files/engine/headers.h b/src/kenlm/jam-files/engine/headers.h
new file mode 100644
index 0000000..1c0a642
--- /dev/null
+++ b/src/kenlm/jam-files/engine/headers.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * headers.h - handle #includes in source files
+ */
+
+#ifndef HEADERS_SW20111118_H
+#define HEADERS_SW20111118_H
+
+#include "object.h"
+#include "rules.h"
+#include "regexp.h"
+
+void headers( TARGET * t );
+
+#ifdef OPT_HEADER_CACHE_EXT
+struct regexp;
+LIST * headers1( LIST *l, OBJECT * file, int rec, struct regexp *re[] );
+#endif
+
+#endif
diff --git a/src/kenlm/jam-files/engine/jam.c b/src/kenlm/jam-files/engine/jam.c
new file mode 100644
index 0000000..1c80eec
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jam.c
@@ -0,0 +1,656 @@
+/*
+ * /+\
+ * +\ Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ * \+/
+ *
+ * This file is part of jam.
+ *
+ * License is hereby granted to use this software and distribute it freely, as
+ * long as this copyright notice is retained and modifications are clearly
+ * marked.
+ *
+ * ALL WARRANTIES ARE HEREBY DISCLAIMED.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * jam.c - make redux
+ *
+ * See Jam.html for usage information.
+ *
+ * These comments document the code.
+ *
+ * The top half of the code is structured such:
+ *
+ * jam
+ * / | \
+ * +---+ | \
+ * / | \
+ * jamgram option \
+ * / | \ \
+ * / | \ \
+ * / | \ |
+ * scan | compile make
+ * | | / | \ / | \
+ * | | / | \ / | \
+ * | | / | \ / | \
+ * jambase parse | rules search make1
+ * | | | \
+ * | | | \
+ * | | | \
+ * builtins timestamp command execute
+ * |
+ * |
+ * |
+ * filesys
+ *
+ *
+ * The support routines are called by all of the above, but themselves are
+ * layered thus:
+ *
+ * variable|expand
+ * / | |
+ * / | |
+ * / | |
+ * lists | pathsys
+ * \ |
+ * \ hash
+ * \ |
+ * \ |
+ * \ |
+ * \ |
+ * \ |
+ * object
+ *
+ * Roughly, the modules are:
+ *
+ * builtins.c - jam's built-in rules
+ * command.c - maintain lists of commands
+ * compile.c - compile parsed jam statements
+ * exec*.c - execute a shell script on a specific OS
+ * file*.c - scan directories and archives on a specific OS
+ * hash.c - simple in-memory hashing routines
+ * hdrmacro.c - handle header file parsing for filename macro definitions
+ * headers.c - handle #includes in source files
+ * jambase.c - compilable copy of Jambase
+ * jamgram.y - jam grammar
+ * lists.c - maintain lists of strings
+ * make.c - bring a target up to date, once rules are in place
+ * make1.c - execute command to bring targets up to date
+ * object.c - string manipulation routines
+ * option.c - command line option processing
+ * parse.c - make and destroy parse trees as driven by the parser
+ * path*.c - manipulate file names on a specific OS
+ * hash.c - simple in-memory hashing routines
+ * regexp.c - Henry Spencer's regexp
+ * rules.c - access to RULEs, TARGETs, and ACTIONs
+ * scan.c - the jam yacc scanner
+ * search.c - find a target along $(SEARCH) or $(LOCATE)
+ * timestamp.c - get the timestamp of a file or archive member
+ * variable.c - handle jam multi-element variables
+ */
+
+
+#include "jam.h"
+#include "patchlevel.h"
+
+#include "builtins.h"
+#include "class.h"
+#include "compile.h"
+#include "constants.h"
+#include "filesys.h"
+#include "function.h"
+#include "hcache.h"
+#include "lists.h"
+#include "make.h"
+#include "object.h"
+#include "option.h"
+#include "output.h"
+#include "parse.h"
+#include "cwd.h"
+#include "rules.h"
+#include "scan.h"
+#include "search.h"
+#include "strings.h"
+#include "timestamp.h"
+#include "variable.h"
+
+/* Macintosh is "special" */
+#ifdef OS_MAC
+# include <QuickDraw.h>
+#endif
+
+/* And UNIX for this. */
+#ifdef unix
+# include <sys/utsname.h>
+# include <signal.h>
+#endif
+
+struct globs globs =
+{
+ 0, /* noexec */
+ 1, /* jobs */
+ 0, /* quitquick */
+ 0, /* newestfirst */
+ 0, /* pipes action stdout and stderr merged to action output */
+#ifdef OS_MAC
+ { 0, 0 }, /* debug - suppress tracing output */
+#else
+ { 0, 1 }, /* debug ... */
+#endif
+ 0, /* output commands, not run them */
+ 0, /* action timeout */
+ 0 /* maximum buffer size zero is all output */
+};
+
+/* Symbols to be defined as true for use in Jambase. */
+static char * othersyms[] = { OSMAJOR, OSMINOR, OSPLAT, JAMVERSYM, 0 };
+
+
+/* Known for sure:
+ * mac needs arg_enviro
+ * OS2 needs extern environ
+ */
+
+#ifdef OS_MAC
+# define use_environ arg_environ
+# ifdef MPW
+ QDGlobals qd;
+# endif
+#endif
+
+/* on Win32-LCC */
+#if defined( OS_NT ) && defined( __LCC__ )
+# define use_environ _environ
+#endif
+
+#if defined( __MWERKS__)
+# define use_environ _environ
+ extern char * * _environ;
+#endif
+
+#ifndef use_environ
+# define use_environ environ
+# if !defined( __WATCOM__ ) && !defined( OS_OS2 ) && !defined( OS_NT )
+ extern char **environ;
+# endif
+#endif
+
+#if YYDEBUG != 0
+ extern int yydebug;
+#endif
+
+#ifndef NDEBUG
+static void run_unit_tests()
+{
+# if defined( USE_EXECNT )
+ extern void execnt_unit_test();
+ execnt_unit_test();
+# endif
+ string_unit_test();
+}
+#endif
+
+int anyhow = 0;
+
+#ifdef HAVE_PYTHON
+ extern PyObject * bjam_call ( PyObject * self, PyObject * args );
+ extern PyObject * bjam_import_rule ( PyObject * self, PyObject * args );
+ extern PyObject * bjam_define_action( PyObject * self, PyObject * args );
+ extern PyObject * bjam_variable ( PyObject * self, PyObject * args );
+ extern PyObject * bjam_backtrace ( PyObject * self, PyObject * args );
+ extern PyObject * bjam_caller ( PyObject * self, PyObject * args );
+#endif
+
+void regex_done();
+
+char const * saved_argv0;
+
+int main( int argc, char * * argv, char * * arg_environ )
+{
+ int n;
+ char * s;
+ struct bjam_option optv[ N_OPTS ];
+ char const * all = "all";
+ int status;
+ int arg_c = argc;
+ char * * arg_v = argv;
+ char const * progname = argv[ 0 ];
+ module_t * environ_module;
+
+ saved_argv0 = argv[ 0 ];
+
+ BJAM_MEM_INIT();
+
+#ifdef OS_MAC
+ InitGraf( &qd.thePort );
+#endif
+
+ --argc;
+ ++argv;
+
+ if ( getoptions( argc, argv, "-:l:m:d:j:p:f:gs:t:ano:qv", optv ) < 0 )
+ {
+ printf( "\nusage: %s [ options ] targets...\n\n", progname );
+
+ printf( "-a Build all targets, even if they are current.\n" );
+ printf( "-dx Set the debug level to x (0-9).\n" );
+ printf( "-fx Read x instead of Jambase.\n" );
+ /* printf( "-g Build from newest sources first.\n" ); */
+ printf( "-jx Run up to x shell commands concurrently.\n" );
+ printf( "-lx Limit actions to x number of seconds after which they are stopped.\n" );
+ printf( "-mx Maximum target output saved (kb), default is to save all output.\n" );
+ printf( "-n Don't actually execute the updating actions.\n" );
+ printf( "-ox Write the updating actions to file x.\n" );
+ printf( "-px x=0, pipes action stdout and stderr merged into action output.\n" );
+ printf( "-q Quit quickly as soon as a target fails.\n" );
+ printf( "-sx=y Set variable x=y, overriding environment.\n" );
+ printf( "-tx Rebuild x, even if it is up-to-date.\n" );
+ printf( "-v Print the version of jam and exit.\n" );
+ printf( "--x Option is ignored.\n\n" );
+
+ exit( EXITBAD );
+ }
+
+ /* Version info. */
+ if ( ( s = getoptval( optv, 'v', 0 ) ) )
+ {
+ printf( "Boost.Jam Version %s. %s.\n", VERSION, OSMINOR );
+ printf( " Copyright 1993-2002 Christopher Seiwald and Perforce "
+ "Software, Inc.\n" );
+ printf( " Copyright 2001 David Turner.\n" );
+ printf( " Copyright 2001-2004 David Abrahams.\n" );
+ printf( " Copyright 2002-2008 Rene Rivera.\n" );
+ printf( " Copyright 2003-2008 Vladimir Prus.\n" );
+ return EXITOK;
+ }
+
+ /* Pick up interesting options. */
+ if ( ( s = getoptval( optv, 'n', 0 ) ) )
+ {
+ ++globs.noexec;
+ globs.debug[ 2 ] = 1;
+ }
+
+ if ( ( s = getoptval( optv, 'p', 0 ) ) )
+ {
+ /* Undocumented -p3 (acts like both -p1 -p2) means separate pipe action
+ * stdout and stderr.
+ */
+ globs.pipe_action = atoi( s );
+ if ( globs.pipe_action < 0 || 3 < globs.pipe_action )
+ {
+ printf( "Invalid pipe descriptor '%d', valid values are -p[0..3]."
+ "\n", globs.pipe_action );
+ exit( EXITBAD );
+ }
+ }
+
+ if ( ( s = getoptval( optv, 'q', 0 ) ) )
+ globs.quitquick = 1;
+
+ if ( ( s = getoptval( optv, 'a', 0 ) ) )
+ anyhow++;
+
+ if ( ( s = getoptval( optv, 'j', 0 ) ) )
+ {
+ globs.jobs = atoi( s );
+ if ( globs.jobs < 1 || globs.jobs > MAXJOBS )
+ {
+ printf( "Invalid value for the '-j' option, valid values are 1 "
+ "through %d.\n", MAXJOBS );
+ exit( EXITBAD );
+ }
+ }
+
+ if ( ( s = getoptval( optv, 'g', 0 ) ) )
+ globs.newestfirst = 1;
+
+ if ( ( s = getoptval( optv, 'l', 0 ) ) )
+ globs.timeout = atoi( s );
+
+ if ( ( s = getoptval( optv, 'm', 0 ) ) )
+ globs.max_buf = atoi( s ) * 1024; /* convert to kb */
+
+ /* Turn on/off debugging */
+ for ( n = 0; ( s = getoptval( optv, 'd', n ) ); ++n )
+ {
+ int i;
+
+ /* First -d, turn off defaults. */
+ if ( !n )
+ for ( i = 0; i < DEBUG_MAX; ++i )
+ globs.debug[i] = 0;
+
+ i = atoi( s );
+
+ if ( ( i < 0 ) || ( i >= DEBUG_MAX ) )
+ {
+ printf( "Invalid debug level '%s'.\n", s );
+ continue;
+ }
+
+ /* n turns on levels 1-n. */
+ /* +n turns on level n. */
+ if ( *s == '+' )
+ globs.debug[ i ] = 1;
+ else while ( i )
+ globs.debug[ i-- ] = 1;
+ }
+
+ constants_init();
+ cwd_init();
+
+ {
+ PROFILE_ENTER( MAIN );
+
+#ifdef HAVE_PYTHON
+ {
+ PROFILE_ENTER( MAIN_PYTHON );
+ Py_Initialize();
+ {
+ static PyMethodDef BjamMethods[] = {
+ {"call", bjam_call, METH_VARARGS,
+ "Call the specified bjam rule."},
+ {"import_rule", bjam_import_rule, METH_VARARGS,
+ "Imports Python callable to bjam."},
+ {"define_action", bjam_define_action, METH_VARARGS,
+ "Defines a command line action."},
+ {"variable", bjam_variable, METH_VARARGS,
+ "Obtains a variable from bjam's global module."},
+ {"backtrace", bjam_backtrace, METH_VARARGS,
+ "Returns bjam backtrace from the last call into Python."},
+ {"caller", bjam_caller, METH_VARARGS,
+ "Returns the module from which the last call into Python is made."},
+ {NULL, NULL, 0, NULL}
+ };
+
+ Py_InitModule( "bjam", BjamMethods );
+ }
+ PROFILE_EXIT( MAIN_PYTHON );
+ }
+#endif
+
+#ifndef NDEBUG
+ run_unit_tests();
+#endif
+#if YYDEBUG != 0
+ if ( DEBUG_PARSE )
+ yydebug = 1;
+#endif
+
+ /* Set JAMDATE. */
+ {
+ timestamp current;
+ timestamp_current( ¤t );
+ var_set( root_module(), constant_JAMDATE, list_new( outf_time(
+ ¤t ) ), VAR_SET );
+ }
+
+ /* Set JAM_VERSION. */
+ var_set( root_module(), constant_JAM_VERSION,
+ list_push_back( list_push_back( list_new(
+ object_new( VERSION_MAJOR_SYM ) ),
+ object_new( VERSION_MINOR_SYM ) ),
+ object_new( VERSION_PATCH_SYM ) ),
+ VAR_SET );
+
+ /* Set JAMUNAME. */
+#ifdef unix
+ {
+ struct utsname u;
+
+ if ( uname( &u ) >= 0 )
+ {
+ var_set( root_module(), constant_JAMUNAME,
+ list_push_back(
+ list_push_back(
+ list_push_back(
+ list_push_back(
+ list_new(
+ object_new( u.sysname ) ),
+ object_new( u.nodename ) ),
+ object_new( u.release ) ),
+ object_new( u.version ) ),
+ object_new( u.machine ) ), VAR_SET );
+ }
+ }
+#endif /* unix */
+
+ /* Set JAM_TIMESTAMP_RESOLUTION. */
+ {
+ timestamp fmt_resolution[ 1 ];
+ file_supported_fmt_resolution( fmt_resolution );
+ var_set( root_module(), constant_JAM_TIMESTAMP_RESOLUTION, list_new(
+ object_new( timestamp_timestr( fmt_resolution ) ) ), VAR_SET );
+ }
+
+ /* Load up environment variables. */
+
+ /* First into the global module, with splitting, for backward
+ * compatibility.
+ */
+ var_defines( root_module(), use_environ, 1 );
+
+ environ_module = bindmodule( constant_ENVIRON );
+ /* Then into .ENVIRON, without splitting. */
+ var_defines( environ_module, use_environ, 0 );
+
+ /*
+ * Jam defined variables OS & OSPLAT. We load them after environment, so
+ * that setting OS in environment does not change Jam's notion of the
+ * current platform.
+ */
+ var_defines( root_module(), othersyms, 1 );
+
+ /* Load up variables set on command line. */
+ for ( n = 0; ( s = getoptval( optv, 's', n ) ); ++n )
+ {
+ char * symv[ 2 ];
+ symv[ 0 ] = s;
+ symv[ 1 ] = 0;
+ var_defines( root_module(), symv, 1 );
+ var_defines( environ_module, symv, 0 );
+ }
+
+ /* Set the ARGV to reflect the complete list of arguments of invocation.
+ */
+ for ( n = 0; n < arg_c; ++n )
+ var_set( root_module(), constant_ARGV, list_new( object_new(
+ arg_v[ n ] ) ), VAR_APPEND );
+
+ /* Initialize built-in rules. */
+ load_builtins();
+
+ /* Add the targets in the command line to the update list. */
+ for ( n = 1; n < arg_c; ++n )
+ {
+ if ( arg_v[ n ][ 0 ] == '-' )
+ {
+ char * f = "-:l:d:j:f:gs:t:ano:qv";
+ for ( ; *f; ++f ) if ( *f == arg_v[ n ][ 1 ] ) break;
+ if ( ( f[ 1 ] == ':' ) && ( arg_v[ n ][ 2 ] == '\0' ) ) ++n;
+ }
+ else
+ {
+ OBJECT * const target = object_new( arg_v[ n ] );
+ mark_target_for_updating( target );
+ object_free( target );
+ }
+ }
+
+ if ( list_empty( targets_to_update() ) )
+ mark_target_for_updating( constant_all );
+
+ /* Parse ruleset. */
+ {
+ FRAME frame[ 1 ];
+ frame_init( frame );
+ for ( n = 0; ( s = getoptval( optv, 'f', n ) ); ++n )
+ {
+ OBJECT * const filename = object_new( s );
+ parse_file( filename, frame );
+ object_free( filename );
+ }
+
+ if ( !n )
+ parse_file( constant_plus, frame );
+ }
+
+ status = yyanyerrors();
+
+ /* Manually touch -t targets. */
+ for ( n = 0; ( s = getoptval( optv, 't', n ) ); ++n )
+ {
+ OBJECT * const target = object_new( s );
+ touch_target( target );
+ object_free( target );
+ }
+
+ /* If an output file is specified, set globs.cmdout to that. */
+ if ( ( s = getoptval( optv, 'o', 0 ) ) )
+ {
+ if ( !( globs.cmdout = fopen( s, "w" ) ) )
+ {
+ printf( "Failed to write to '%s'\n", s );
+ exit( EXITBAD );
+ }
+ ++globs.noexec;
+ }
+
+ /* The build system may set the PARALLELISM variable to override -j
+ * options.
+ */
+ {
+ LIST * const p = var_get( root_module(), constant_PARALLELISM );
+ if ( !list_empty( p ) )
+ {
+ int const j = atoi( object_str( list_front( p ) ) );
+ if ( j < 1 || j > MAXJOBS )
+ printf( "Invalid value of PARALLELISM: %s. Valid values "
+ "are 1 through %d.\n", object_str( list_front( p ) ),
+ MAXJOBS );
+ else
+ globs.jobs = j;
+ }
+ }
+
+ /* KEEP_GOING overrides -q option. */
+ {
+ LIST * const p = var_get( root_module(), constant_KEEP_GOING );
+ if ( !list_empty( p ) )
+ globs.quitquick = atoi( object_str( list_front( p ) ) ) ? 0 : 1;
+ }
+
+ /* Now make target. */
+ {
+ PROFILE_ENTER( MAIN_MAKE );
+ LIST * const targets = targets_to_update();
+ if ( !list_empty( targets ) )
+ status |= make( targets, anyhow );
+ else
+ status = last_update_now_status;
+ PROFILE_EXIT( MAIN_MAKE );
+ }
+
+ PROFILE_EXIT( MAIN );
+ }
+
+ if ( DEBUG_PROFILE )
+ profile_dump();
+
+
+#ifdef OPT_HEADER_CACHE_EXT
+ hcache_done();
+#endif
+
+ clear_targets_to_update();
+
+ /* Widely scattered cleanup. */
+ property_set_done();
+ file_done();
+ rules_done();
+ timestamp_done();
+ search_done();
+ class_done();
+ modules_done();
+ regex_done();
+ cwd_done();
+ path_done();
+ function_done();
+ list_done();
+ constants_done();
+ object_done();
+
+ /* Close cmdout. */
+ if ( globs.cmdout )
+ fclose( globs.cmdout );
+
+#ifdef HAVE_PYTHON
+ Py_Finalize();
+#endif
+
+ BJAM_MEM_CLOSE();
+
+ return status ? EXITBAD : EXITOK;
+}
+
+
+/*
+ * executable_path()
+ */
+
+#if defined(_WIN32)
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+char * executable_path( char const * argv0 )
+{
+ char buf[ 1024 ];
+ DWORD const ret = GetModuleFileName( NULL, buf, sizeof( buf ) );
+ return ( !ret || ret == sizeof( buf ) ) ? NULL : strdup( buf );
+}
+#elif defined(__APPLE__) /* Not tested */
+# include <mach-o/dyld.h>
+char *executable_path( char const * argv0 )
+{
+ char buf[ 1024 ];
+ uint32_t size = sizeof( buf );
+ return _NSGetExecutablePath( buf, &size ) ? NULL : strdup( buf );
+}
+#elif defined(sun) || defined(__sun) /* Not tested */
+# include <stdlib.h>
+char * executable_path( char const * argv0 )
+{
+ return strdup( getexecname() );
+}
+#elif defined(__FreeBSD__)
+# include <sys/sysctl.h>
+char * executable_path( char const * argv0 )
+{
+ int mib[ 4 ] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+ char buf[ 1024 ];
+ size_t size = sizeof( buf );
+ sysctl( mib, 4, buf, &size, NULL, 0 );
+ return ( !size || size == sizeof( buf ) ) ? NULL : strndup( buf, size );
+}
+#elif defined(__linux__)
+# include <unistd.h>
+char * executable_path( char const * argv0 )
+{
+ char buf[ 1024 ];
+ ssize_t const ret = readlink( "/proc/self/exe", buf, sizeof( buf ) );
+ return ( !ret || ret == sizeof( buf ) ) ? NULL : strndup( buf, ret );
+}
+#else
+char * executable_path( char const * argv0 )
+{
+ /* If argv0 is an absolute path, assume it is the right absolute path. */
+ return argv0[ 0 ] == '/' ? strdup( argv0 ) : NULL;
+}
+#endif
diff --git a/src/kenlm/jam-files/engine/jam.h b/src/kenlm/jam-files/engine/jam.h
new file mode 100644
index 0000000..86ad0e8
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jam.h
@@ -0,0 +1,475 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * jam.h - includes and globals for jam
+ */
+
+#ifndef JAM_H_VP_2003_08_01
+#define JAM_H_VP_2003_08_01
+
+#ifdef HAVE_PYTHON
+#include <Python.h>
+#endif
+
+/* Assume popen support is available unless known otherwise. */
+#define HAVE_POPEN 1
+
+/*
+ * Windows NT
+ */
+
+#ifdef NT
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <malloc.h>
+#ifndef __MWERKS__
+ #include <memory.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+
+#define OSMAJOR "NT=true"
+#define OSMINOR "OS=NT"
+#define OS_NT
+#define SPLITPATH ';'
+#define MAXLINE (undefined__see_execnt_c) /* max chars per command line */
+#define USE_EXECNT
+#define PATH_DELIM '\\'
+
+/* AS400 cross-compile from NT. */
+
+#ifdef AS400
+ #undef OSMINOR
+ #undef OSMAJOR
+ #define OSMAJOR "AS400=true"
+ #define OSMINOR "OS=AS400"
+ #define OS_AS400
+#endif
+
+/* Metrowerks Standard Library on Windows. */
+
+#ifdef __MSL__
+ #undef HAVE_POPEN
+#endif
+
+#endif /* #ifdef NT */
+
+
+/*
+ * Windows MingW32
+ */
+
+#ifdef MINGW
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <malloc.h>
+#include <memory.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+
+#define OSMAJOR "MINGW=true"
+#define OSMINOR "OS=MINGW"
+#define OS_NT
+#define SPLITPATH ';'
+#define MAXLINE 996 /* max chars per command line */
+#define USE_EXECUNIX
+#define PATH_DELIM '\\'
+
+#endif /* #ifdef MINGW */
+
+
+/*
+ * God fearing UNIX.
+ */
+
+#ifndef OSMINOR
+
+#define OSMAJOR "UNIX=true"
+#define USE_EXECUNIX
+#define USE_FILEUNIX
+#define PATH_DELIM '/'
+
+#ifdef _AIX
+ #define unix
+ #define MAXLINE 23552 /* 24k - 1k, max chars per command line */
+ #define OSMINOR "OS=AIX"
+ #define OS_AIX
+ #define NO_VFORK
+#endif
+#ifdef AMIGA
+ #define OSMINOR "OS=AMIGA"
+ #define OS_AMIGA
+#endif
+#ifdef __BEOS__
+ #define unix
+ #define OSMINOR "OS=BEOS"
+ #define OS_BEOS
+ #define NO_VFORK
+#endif
+#ifdef __bsdi__
+ #define OSMINOR "OS=BSDI"
+ #define OS_BSDI
+#endif
+#if defined (COHERENT) && defined (_I386)
+ #define OSMINOR "OS=COHERENT"
+ #define OS_COHERENT
+ #define NO_VFORK
+#endif
+#if defined(__cygwin__) || defined(__CYGWIN__)
+ #define OSMINOR "OS=CYGWIN"
+ #define OS_CYGWIN
+#endif
+#if defined(__FreeBSD__) && !defined(__DragonFly__)
+ #define OSMINOR "OS=FREEBSD"
+ #define OS_FREEBSD
+#endif
+#ifdef __DragonFly__
+ #define OSMINOR "OS=DRAGONFLYBSD"
+ #define OS_DRAGONFLYBSD
+#endif
+#ifdef __DGUX__
+ #define OSMINOR "OS=DGUX"
+ #define OS_DGUX
+#endif
+#ifdef __hpux
+ #define OSMINOR "OS=HPUX"
+ #define OS_HPUX
+#endif
+#ifdef __OPENNT
+ #define unix
+ #define OSMINOR "OS=INTERIX"
+ #define OS_INTERIX
+ #define NO_VFORK
+#endif
+#ifdef __sgi
+ #define OSMINOR "OS=IRIX"
+ #define OS_IRIX
+ #define NO_VFORK
+#endif
+#ifdef __ISC
+ #define OSMINOR "OS=ISC"
+ #define OS_ISC
+ #define NO_VFORK
+#endif
+#ifdef linux
+ #define OSMINOR "OS=LINUX"
+ #define OS_LINUX
+#endif
+#ifdef __Lynx__
+ #define OSMINOR "OS=LYNX"
+ #define OS_LYNX
+ #define NO_VFORK
+ #define unix
+#endif
+#ifdef __MACHTEN__
+ #define OSMINOR "OS=MACHTEN"
+ #define OS_MACHTEN
+#endif
+#ifdef mpeix
+ #define unix
+ #define OSMINOR "OS=MPEIX"
+ #define OS_MPEIX
+ #define NO_VFORK
+#endif
+#ifdef __MVS__
+ #define unix
+ #define OSMINOR "OS=MVS"
+ #define OS_MVS
+#endif
+#ifdef _ATT4
+ #define OSMINOR "OS=NCR"
+ #define OS_NCR
+#endif
+#ifdef __NetBSD__
+ #define unix
+ #define OSMINOR "OS=NETBSD"
+ #define OS_NETBSD
+ #define NO_VFORK
+#endif
+#ifdef __QNX__
+ #define unix
+ #ifdef __QNXNTO__
+ #define OSMINOR "OS=QNXNTO"
+ #define OS_QNXNTO
+ #else
+ #define OSMINOR "OS=QNX"
+ #define OS_QNX
+ #define NO_VFORK
+ #define MAXLINE 996 /* max chars per command line */
+ #endif
+#endif
+#ifdef NeXT
+ #ifdef __APPLE__
+ #define OSMINOR "OS=RHAPSODY"
+ #define OS_RHAPSODY
+ #else
+ #define OSMINOR "OS=NEXT"
+ #define OS_NEXT
+ #endif
+#endif
+#ifdef __APPLE__
+ #define unix
+ #define OSMINOR "OS=MACOSX"
+ #define OS_MACOSX
+#endif
+#ifdef __osf__
+ #ifndef unix
+ #define unix
+ #endif
+ #define OSMINOR "OS=OSF"
+ #define OS_OSF
+#endif
+#ifdef _SEQUENT_
+ #define OSMINOR "OS=PTX"
+ #define OS_PTX
+#endif
+#ifdef M_XENIX
+ #define OSMINOR "OS=SCO"
+ #define OS_SCO
+ #define NO_VFORK
+#endif
+#ifdef sinix
+ #define unix
+ #define OSMINOR "OS=SINIX"
+ #define OS_SINIX
+#endif
+#ifdef sun
+ #if defined(__svr4__) || defined(__SVR4)
+ #define OSMINOR "OS=SOLARIS"
+ #define OS_SOLARIS
+ #else
+ #define OSMINOR "OS=SUNOS"
+ #define OS_SUNOS
+ #endif
+#endif
+#ifdef ultrix
+ #define OSMINOR "OS=ULTRIX"
+ #define OS_ULTRIX
+#endif
+#ifdef _UNICOS
+ #define OSMINOR "OS=UNICOS"
+ #define OS_UNICOS
+#endif
+#if defined(__USLC__) && !defined(M_XENIX)
+ #define OSMINOR "OS=UNIXWARE"
+ #define OS_UNIXWARE
+#endif
+#ifdef __OpenBSD__
+ #define OSMINOR "OS=OPENBSD"
+ #define OS_OPENBSD
+ #define unix
+#endif
+#if defined (__FreeBSD_kernel__) && !defined(__FreeBSD__)
+ #define OSMINOR "OS=KFREEBSD"
+ #define OS_KFREEBSD
+#endif
+#ifndef OSMINOR
+ #define OSMINOR "OS=UNKNOWN"
+#endif
+
+/* All the UNIX includes */
+
+#include <sys/types.h>
+
+#ifndef OS_MPEIX
+ #include <sys/file.h>
+#endif
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#ifndef OS_QNX
+ #include <memory.h>
+#endif
+
+#ifndef OS_ULTRIX
+ #include <stdlib.h>
+#endif
+
+#if !defined( OS_BSDI ) && \
+ !defined( OS_FREEBSD ) && \
+ !defined( OS_DRAGONFLYBSD ) && \
+ !defined( OS_NEXT ) && \
+ !defined( OS_MACHTEN ) && \
+ !defined( OS_MACOSX ) && \
+ !defined( OS_RHAPSODY ) && \
+ !defined( OS_MVS ) && \
+ !defined( OS_OPENBSD )
+ #include <malloc.h>
+#endif
+
+#endif /* #ifndef OSMINOR */
+
+
+/*
+ * OSPLAT definitions - suppressed when it is a one-of-a-kind.
+ */
+
+#if defined( _M_PPC ) || \
+ defined( PPC ) || \
+ defined( ppc ) || \
+ defined( __powerpc__ ) || \
+ defined( __ppc__ )
+ #define OSPLAT "OSPLAT=PPC"
+#endif
+
+#if defined( _ALPHA_ ) || \
+ defined( __alpha__ )
+ #define OSPLAT "OSPLAT=AXP"
+#endif
+
+#if defined( _i386_ ) || \
+ defined( __i386__ ) || \
+ defined( __i386 ) || \
+ defined( _M_IX86 )
+ #define OSPLAT "OSPLAT=X86"
+#endif
+
+#if defined( __ia64__ ) || \
+ defined( __IA64__ ) || \
+ defined( __ia64 )
+ #define OSPLAT "OSPLAT=IA64"
+#endif
+
+#if defined( __x86_64__ ) || \
+ defined( __amd64__ ) || \
+ defined( _M_AMD64 )
+ #define OSPLAT "OSPLAT=X86_64"
+#endif
+
+#if defined( __sparc__ ) || \
+ defined( __sparc )
+ #define OSPLAT "OSPLAT=SPARC"
+#endif
+
+#ifdef __mips__
+ #define OSPLAT "OSPLAT=MIPS"
+#endif
+
+#ifdef __arm__
+ #define OSPLAT "OSPLAT=ARM"
+#endif
+
+#ifdef __s390__
+ #define OSPLAT "OSPLAT=390"
+#endif
+
+#ifdef __hppa
+ #define OSPLAT "OSPLAT=PARISC"
+#endif
+
+#ifndef OSPLAT
+ #define OSPLAT ""
+#endif
+
+
+/*
+ * Jam implementation misc.
+ */
+
+#ifndef MAXLINE
+ #define MAXLINE 102400 /* max chars per command line */
+#endif
+
+#ifndef EXITOK
+ #define EXITOK 0
+ #define EXITBAD 1
+#endif
+
+#ifndef SPLITPATH
+ #define SPLITPATH ':'
+#endif
+
+/* You probably do not need to muck with these. */
+
+#define MAXSYM 1024 /* longest symbol in the environment */
+#define MAXJPATH 1024 /* longest filename */
+
+#define MAXJOBS 64 /* internally enforced -j limit */
+#define MAXARGC 32 /* words in $(JAMSHELL) */
+
+/* Jam private definitions below. */
+
+#define DEBUG_MAX 14
+
+
+struct globs
+{
+ int noexec;
+ int jobs;
+ int quitquick;
+ int newestfirst; /* build newest sources first */
+ int pipe_action;
+ char debug[ DEBUG_MAX ];
+ FILE * cmdout; /* print cmds, not run them */
+ long timeout; /* number of seconds to limit actions to,
+ * default 0 for no limit.
+ */
+ int dart; /* output build and test results formatted for
+ * Dart
+ */
+ int max_buf; /* maximum amount of output saved from target
+ * (kb)
+ */
+};
+
+extern struct globs globs;
+
+#define DEBUG_MAKE ( globs.debug[ 1 ] ) /* show actions when executed */
+#define DEBUG_MAKEQ ( globs.debug[ 2 ] ) /* show even quiet actions */
+#define DEBUG_EXEC ( globs.debug[ 2 ] ) /* show text of actons */
+#define DEBUG_MAKEPROG ( globs.debug[ 3 ] ) /* show make0 progress */
+#define DEBUG_BIND ( globs.debug[ 3 ] ) /* show when files bound */
+
+#define DEBUG_EXECCMD ( globs.debug[ 4 ] ) /* show execcmds()'s work */
+
+#define DEBUG_COMPILE ( globs.debug[ 5 ] ) /* show rule invocations */
+
+#define DEBUG_HEADER ( globs.debug[ 6 ] ) /* show result of header scan */
+#define DEBUG_BINDSCAN ( globs.debug[ 6 ] ) /* show result of dir scan */
+#define DEBUG_SEARCH ( globs.debug[ 6 ] ) /* show binding attempts */
+
+#define DEBUG_VARSET ( globs.debug[ 7 ] ) /* show variable settings */
+#define DEBUG_VARGET ( globs.debug[ 8 ] ) /* show variable fetches */
+#define DEBUG_VAREXP ( globs.debug[ 8 ] ) /* show variable expansions */
+#define DEBUG_IF ( globs.debug[ 8 ] ) /* show 'if' calculations */
+#define DEBUG_LISTS ( globs.debug[ 9 ] ) /* show list manipulation */
+#define DEBUG_SCAN ( globs.debug[ 9 ] ) /* show scanner tokens */
+#define DEBUG_MEM ( globs.debug[ 9 ] ) /* show memory use */
+
+#define DEBUG_PROFILE ( globs.debug[ 10 ] ) /* dump rule execution times */
+#define DEBUG_PARSE ( globs.debug[ 11 ] ) /* debug parsing */
+#define DEBUG_GRAPH ( globs.debug[ 12 ] ) /* debug dependencies */
+#define DEBUG_FATE ( globs.debug[ 13 ] ) /* show fate changes in make0() */
+
+/* Everyone gets the memory definitions. */
+#include "mem.h"
+
+/* They also get the profile functions. */
+#include "debug.h"
+
+#endif
diff --git a/src/kenlm/jam-files/engine/jambase.c b/src/kenlm/jam-files/engine/jambase.c
new file mode 100644
index 0000000..b15282b
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jambase.c
@@ -0,0 +1,1691 @@
+/* Generated by mkjambase from Jambase */
+char *jambase[] = {
+/* Jambase */
+"if $(NT)\n",
+"{\n",
+"SLASH ?= \\\\ ;\n",
+"}\n",
+"SLASH ?= / ;\n",
+"rule find-to-root ( dir : patterns + )\n",
+"{\n",
+"local globs = [ GLOB $(dir) : $(patterns) ] ;\n",
+"while ! $(globs) && $(dir:P) != $(dir)\n",
+"{\n",
+"dir = $(dir:P) ;\n",
+"globs = [ GLOB $(dir) : $(patterns) ] ;\n",
+"}\n",
+"return $(globs) ;\n",
+"}\n",
+".boost-build-file = ;\n",
+".bootstrap-file = ;\n",
+"BOOST_BUILD_PATH.user-value = $(BOOST_BUILD_PATH) ;\n",
+"if ! $(BOOST_BUILD_PATH) && $(UNIX)\n",
+"{\n",
+"BOOST_BUILD_PATH = /usr/share/boost-build ;\n",
+"}\n",
+"rule _poke ( module-name ? : variables + : value * )\n",
+"{\n",
+"module $(<)\n",
+"{\n",
+"$(>) = $(3) ;\n",
+"}\n",
+"}\n",
+"rule boost-build ( dir ? )\n",
+"{\n",
+"if $(.bootstrap-file)\n",
+"{\n",
+"ECHO \"Error: Illegal attempt to re-bootstrap the build system by invoking\" ;\n",
+"ECHO ;\n",
+"ECHO \" 'boost-build\" $(dir) \";'\" ;\n",
+"ECHO ;\n",
+"EXIT \"Please consult the documentation at 'http://www.boost.org'.\" ;\n",
+"}\n",
+"BOOST_BUILD_PATH = $(dir:R=$(.boost-build-file:D)) $(BOOST_BUILD_PATH) ;\n",
+"_poke .ENVIRON : BOOST_BUILD_PATH : $(BOOST_BUILD_PATH) ;\n",
+"local bootstrap-file = [ GLOB $(BOOST_BUILD_PATH) : bootstrap.jam ] ;\n",
+".bootstrap-file = $(bootstrap-file[1]) ;\n",
+"if ! $(.bootstrap-file)\n",
+"{\n",
+"ECHO \"Unable to load Boost.Build: could not find build system.\" ;\n",
+"ECHO --------------------------------------------------------- ;\n",
+"ECHO \"$(.boost-build-file) attempted to load the build system by invoking\" ;\n",
+"ECHO ;\n",
+"ECHO \" 'boost-build\" $(dir) \";'\" ;\n",
+"ECHO ;\n",
+"ECHO \"but we were unable to find \\\"bootstrap.jam\\\" in the specified directory\" ;\n",
+"ECHO \"or in BOOST_BUILD_PATH (searching \"$(BOOST_BUILD_PATH:J=\", \")\").\" ;\n",
+"ECHO ;\n",
+"EXIT \"Please consult the documentation at 'http://www.boost.org'.\" ;\n",
+"}\n",
+"if [ MATCH .*(--debug-configuration).* : $(ARGV) ]\n",
+"{\n",
+"ECHO \"notice: loading Boost.Build from\"\n",
+"[ NORMALIZE_PATH $(.bootstrap-file:D) ] ;\n",
+"}\n",
+"include $(.bootstrap-file) ;\n",
+"}\n",
+"if [ MATCH .*(b2).* : $(ARGV[1]:BL) ] \n",
+"|| [ MATCH .*(bjam).* : $(ARGV[1]:BL) ]\n",
+"|| $(BOOST_ROOT) # A temporary measure so Jam works with Boost.Build v1.\n",
+"{\n",
+"local search-path = $(BOOST_BUILD_PATH) $(BOOST_ROOT) ;\n",
+"local self = [ SELF_PATH ] ;\n",
+"local boost-build-relative = ../../share/boost-build ;\n",
+"local self-based-path = [ NORMALIZE_PATH $(boost-build-relative:R=$(self)) ] ;\n",
+"local boost-build-files =\n",
+"[ find-to-root [ PWD ] : boost-build.jam ]\n",
+"[ GLOB $(self-based-path) : boost-build.jam ]\n",
+"[ GLOB $(search-path) : boost-build.jam ] ;\n",
+".boost-build-file = $(boost-build-files[1]) ;\n",
+"if ! $(.boost-build-file)\n",
+"{\n",
+"ECHO \"Unable to load Boost.Build: could not find \\\"boost-build.jam\\\"\" ;\n",
+"ECHO --------------------------------------------------------------- ;\n",
+"if ! [ MATCH .*(bjam).* : $(ARGV[1]:BL) ]\n",
+"{\n",
+"ECHO \"BOOST_ROOT must be set, either in the environment, or \" ;\n",
+"ECHO \"on the command-line with -sBOOST_ROOT=..., to the root\" ;\n",
+"ECHO \"of the boost installation.\" ;\n",
+"ECHO ;\n",
+"}\n",
+"ECHO \"Attempted search from\" [ PWD ] \"up to the root\" ;\n",
+"ECHO \"at\" $(self-based-path) ;\n",
+"ECHO \"and in these directories from BOOST_BUILD_PATH and BOOST_ROOT: \"$(search-path:J=\", \")\".\" ;\n",
+"EXIT \"Please consult the documentation at 'http://www.boost.org'.\" ;\n",
+"}\n",
+"if [ MATCH .*(--debug-configuration).* : $(ARGV) ]\n",
+"{\n",
+"ECHO \"notice: found boost-build.jam at\"\n",
+"[ NORMALIZE_PATH $(.boost-build-file) ] ;\n",
+"}\n",
+"include $(.boost-build-file) ;\n",
+"if ! $(.bootstrap-file)\n",
+"{\n",
+"ECHO \"Unable to load Boost.Build\" ;\n",
+"ECHO -------------------------- ;\n",
+"ECHO \"\\\"$(.boost-build-file)\\\" was found by searching from\" [ PWD ] \"up to the root\" ;\n",
+"ECHO \"and in these directories from BOOST_BUILD_PATH and BOOST_ROOT: \"$(search-path:J=\", \")\".\" ;\n",
+"ECHO ;\n",
+"ECHO \"However, it failed to call the \\\"boost-build\\\" rule to indicate\" ;\n",
+"ECHO \"the location of the build system.\" ;\n",
+"ECHO ;\n",
+"EXIT \"Please consult the documentation at 'http://www.boost.org'.\" ;\n",
+"}\n",
+"}\n",
+"else\n",
+"{\n",
+"if $(NT)\n",
+"{\n",
+"local SUPPORTED_TOOLSETS = \"BORLANDC\" \"VC7\" \"VISUALC\" \"VISUALC16\" \"INTELC\" \"WATCOM\"\n",
+"\"MINGW\" \"LCC\" ;\n",
+"TOOLSET = \"\" ;\n",
+"if $(JAM_TOOLSET)\n",
+"{\n",
+"local t ;\n",
+"for t in $(SUPPORTED_TOOLSETS)\n",
+"{\n",
+"$(t) = $($(t):J=\" \") ; # reconstitute paths with spaces in them\n",
+"if $(t) = $(JAM_TOOLSET) { TOOLSET = $(t) ; }\n",
+"}\n",
+"if ! $(TOOLSET)\n",
+"{\n",
+"ECHO \"The JAM_TOOLSET environment variable is defined but its value\" ;\n",
+"ECHO \"is invalid, please use one of the following:\" ;\n",
+"ECHO ;\n",
+"for t in $(SUPPORTED_TOOLSETS) { ECHO \" \" $(t) ; }\n",
+"EXIT ;\n",
+"}\n",
+"}\n",
+"if ! $(TOOLSET)\n",
+"{\n",
+"if $(BCCROOT)\n",
+"{\n",
+"TOOLSET = BORLANDC ;\n",
+"BORLANDC = $(BCCROOT:J=\" \") ;\n",
+"}\n",
+"else if $(MSVC)\n",
+"{\n",
+"TOOLSET = VISUALC16 ;\n",
+"VISUALC16 = $(MSVC:J=\" \") ;\n",
+"}\n",
+"else if $(MSVCNT)\n",
+"{\n",
+"TOOLSET = VISUALC ;\n",
+"VISUALC = $(MSVCNT:J=\" \") ;\n",
+"}\n",
+"else if $(MSVCDir)\n",
+"{\n",
+"TOOLSET = VISUALC ;\n",
+"VISUALC = $(MSVCDir:J=\" \") ;\n",
+"}\n",
+"else if $(MINGW)\n",
+"{\n",
+"TOOLSET = MINGW ;\n",
+"}\n",
+"else\n",
+"{\n",
+"ECHO \"Jam cannot be run because, either:\" ;\n",
+"ECHO \" a. You didn't set BOOST_ROOT to indicate the root of your\" ;\n",
+"ECHO \" Boost installation.\" ;\n",
+"ECHO \" b. You are trying to use stock Jam but didn't indicate which\" ;\n",
+"ECHO \" compilation toolset to use. To do so, follow these simple\" ;\n",
+"ECHO \" instructions:\" ;\n",
+"ECHO ;\n",
+"ECHO \" - define one of the following environment variable, with the\" ;\n",
+"ECHO \" appropriate value according to this list:\" ;\n",
+"ECHO ;\n",
+"ECHO \" Variable Toolset Description\" ;\n",
+"ECHO ;\n",
+"ECHO \" BORLANDC Borland C++ BC++ install path\" ;\n",
+"ECHO \" VISUALC Microsoft Visual C++ VC++ install path\" ;\n",
+"ECHO \" VISUALC16 Microsoft Visual C++ 16 bit VC++ 16 bit install\" ;\n",
+"ECHO \" INTELC Intel C/C++ IC++ install path\" ;\n",
+"ECHO \" WATCOM Watcom C/C++ Watcom install path\" ;\n",
+"ECHO \" MINGW MinGW (gcc) MinGW install path\" ;\n",
+"ECHO \" LCC Win32-LCC LCC-Win32 install path\" ;\n",
+"ECHO ;\n",
+"ECHO \" - define the JAM_TOOLSET environment variable with the *name*\" ;\n",
+"ECHO \" of the toolset variable you want to use.\" ;\n",
+"ECHO ;\n",
+"ECHO \" e.g.: set VISUALC=C:\\\\Visual6\" ;\n",
+"ECHO \" set JAM_TOOLSET=VISUALC\" ;\n",
+"EXIT ;\n",
+"}\n",
+"}\n",
+"CP ?= copy ;\n",
+"RM ?= del /f/q ;\n",
+"SLASH ?= \\\\ ;\n",
+"SUFLIB ?= .lib ;\n",
+"SUFOBJ ?= .obj ;\n",
+"SUFEXE ?= .exe ;\n",
+"if $(TOOLSET) = BORLANDC\n",
+"{\n",
+"ECHO \"Compiler is Borland C++\" ;\n",
+"AR ?= tlib /C /P64 ;\n",
+"CC ?= bcc32 ;\n",
+"CCFLAGS ?= -q -y -d -v -w-par -w-ccc -w-rch -w-pro -w-aus ;\n",
+"C++ ?= bcc32 ;\n",
+"C++FLAGS ?= -q -y -d -v -w-par -w-ccc -w-rch -w-pro -w-aus -P ;\n",
+"LINK ?= $(CC) ;\n",
+"LINKFLAGS ?= $(CCFLAGS) ;\n",
+"STDLIBPATH ?= $(BORLANDC)\\\\lib ;\n",
+"STDHDRS ?= $(BORLANDC)\\\\include ;\n",
+"NOARSCAN ?= true ;\n",
+"}\n",
+"else if $(TOOLSET) = VISUALC16\n",
+"{\n",
+"ECHO \"Compiler is Microsoft Visual C++ 16 bit\" ;\n",
+"AR ?= lib /nologo ;\n",
+"CC ?= cl /nologo ;\n",
+"CCFLAGS ?= /D \\\"WIN\\\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= $(CC) ;\n",
+"LINKFLAGS ?= $(CCFLAGS) ;\n",
+"LINKLIBS ?=\n",
+"\\\"$(VISUALC16)\\\\lib\\\\mlibce.lib\\\"\n",
+"\\\"$(VISUALC16)\\\\lib\\\\oldnames.lib\\\"\n",
+";\n",
+"LINKLIBS ?= ;\n",
+"NOARSCAN ?= true ;\n",
+"OPTIM ?= \"\" ;\n",
+"STDHDRS ?= $(VISUALC16)\\\\include ;\n",
+"UNDEFFLAG ?= \"/u _\" ;\n",
+"}\n",
+"else if $(TOOLSET) = VISUALC\n",
+"{\n",
+"ECHO \"Compiler is Microsoft Visual C++\" ;\n",
+"AR ?= lib ;\n",
+"AS ?= masm386 ;\n",
+"CC ?= cl /nologo ;\n",
+"CCFLAGS ?= \"\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= link /nologo ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= \\\"$(VISUALC)\\\\lib\\\\advapi32.lib\\\"\n",
+"\\\"$(VISUALC)\\\\lib\\\\gdi32.lib\\\"\n",
+"\\\"$(VISUALC)\\\\lib\\\\user32.lib\\\"\n",
+"\\\"$(VISUALC)\\\\lib\\\\kernel32.lib\\\" ;\n",
+"OPTIM ?= \"\" ;\n",
+"STDHDRS ?= $(VISUALC)\\\\include ;\n",
+"UNDEFFLAG ?= \"/u _\" ;\n",
+"}\n",
+"else if $(TOOLSET) = VC7\n",
+"{\n",
+"ECHO \"Compiler is Microsoft Visual C++ .NET\" ;\n",
+"AR ?= lib ;\n",
+"AS ?= masm386 ;\n",
+"CC ?= cl /nologo ;\n",
+"CCFLAGS ?= \"\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= link /nologo ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= \\\"$(VISUALC)\\\\PlatformSDK\\\\lib\\\\advapi32.lib\\\"\n",
+"\\\"$(VISUALC)\\\\PlatformSDK\\\\lib\\\\gdi32.lib\\\"\n",
+"\\\"$(VISUALC)\\\\PlatformSDK\\\\lib\\\\user32.lib\\\"\n",
+"\\\"$(VISUALC)\\\\PlatformSDK\\\\lib\\\\kernel32.lib\\\" ;\n",
+"OPTIM ?= \"\" ;\n",
+"STDHDRS ?= \\\"$(VISUALC)\\\\include\\\"\n",
+"\\\"$(VISUALC)\\\\PlatformSDK\\\\include\\\" ;\n",
+"UNDEFFLAG ?= \"/u _\" ;\n",
+"}\n",
+"else if $(TOOLSET) = INTELC\n",
+"{\n",
+"ECHO \"Compiler is Intel C/C++\" ;\n",
+"if ! $(VISUALC)\n",
+"{\n",
+"ECHO \"As a special exception, when using the Intel C++ compiler, you need\" ;\n",
+"ECHO \"to define the VISUALC environment variable to indicate the location\" ;\n",
+"ECHO \"of your Visual C++ installation. Aborting..\" ;\n",
+"EXIT ;\n",
+"}\n",
+"AR ?= lib ;\n",
+"AS ?= masm386 ;\n",
+"CC ?= icl /nologo ;\n",
+"CCFLAGS ?= \"\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= link /nologo ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= $(VISUALC)\\\\lib\\\\advapi32.lib\n",
+"$(VISUALC)\\\\lib\\\\kernel32.lib\n",
+";\n",
+"OPTIM ?= \"\" ;\n",
+"STDHDRS ?= $(INTELC)\\include $(VISUALC)\\\\include ;\n",
+"UNDEFFLAG ?= \"/u _\" ;\n",
+"}\n",
+"else if $(TOOLSET) = WATCOM\n",
+"{\n",
+"ECHO \"Compiler is Watcom C/C++\" ;\n",
+"AR ?= wlib ;\n",
+"CC ?= wcc386 ;\n",
+"CCFLAGS ?= /zq /DWIN32 /I$(WATCOM)\\\\h ; # zq=quiet\n",
+"C++ ?= wpp386 ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"CP ?= copy ;\n",
+"DOT ?= . ;\n",
+"DOTDOT ?= .. ;\n",
+"LINK ?= wcl386 ;\n",
+"LINKFLAGS ?= /zq ; # zq=quiet\n",
+"LINKLIBS ?= ;\n",
+"MV ?= move ;\n",
+"NOARSCAN ?= true ;\n",
+"OPTIM ?= ;\n",
+"RM ?= del /f ;\n",
+"SLASH ?= \\\\ ;\n",
+"STDHDRS ?= $(WATCOM)\\\\h $(WATCOM)\\\\h\\\\nt ;\n",
+"SUFEXE ?= .exe ;\n",
+"SUFLIB ?= .lib ;\n",
+"SUFOBJ ?= .obj ;\n",
+"UNDEFFLAG ?= \"/u _\" ;\n",
+"}\n",
+"else if $(TOOLSET) = MINGW\n",
+"{\n",
+"ECHO \"Compiler is GCC with Mingw\" ;\n",
+"AR ?= ar -ru ;\n",
+"CC ?= gcc ;\n",
+"CCFLAGS ?= \"\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= $(CC) ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= \"\" ;\n",
+"OPTIM ?= ;\n",
+"SUFOBJ = .o ;\n",
+"SUFLIB = .a ;\n",
+"SLASH = / ;\n",
+"}\n",
+"else if $(TOOLSET) = LCC\n",
+"{\n",
+"ECHO \"Compiler is Win32-LCC\" ;\n",
+"AR ?= lcclib ;\n",
+"CC ?= lcc ;\n",
+"CCFLAGS ?= \"\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= lcclnk ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= \"\" ;\n",
+"OPTIM ?= ;\n",
+"NOARSCAN = true ;\n",
+"}\n",
+"else\n",
+"{\n",
+"EXIT On NT, set BCCROOT, MSVCNT, MINGW or MSVC to the root of the\n",
+"Borland or Microsoft directories. ;\n",
+"}\n",
+"}\n",
+"else if $(OS2)\n",
+"{\n",
+"local SUPPORTED_TOOLSETS = \"EMX\" \"WATCOM\" ;\n",
+"TOOLSET = \"\" ;\n",
+"if $(JAM_TOOLSET)\n",
+"{\n",
+"local t ;\n",
+"for t in $(SUPPORTED_TOOLSETS)\n",
+"{\n",
+"$(t) = $($(t):J=\" \") ; # reconstitute paths with spaces in them\n",
+"if $(t) = $(JAM_TOOLSET) { TOOLSET = $(t) ; }\n",
+"}\n",
+"if ! $(TOOLSET)\n",
+"{\n",
+"ECHO \"The JAM_TOOLSET environment variable is defined but its value\" ;\n",
+"ECHO \"is invalid, please use one of the following:\" ;\n",
+"ECHO ;\n",
+"for t in $(SUPPORTED_TOOLSETS) { ECHO \" \" $(t) ; }\n",
+"EXIT ;\n",
+"}\n",
+"}\n",
+"if ! $(TOOLSET)\n",
+"{\n",
+"if $(watcom)\n",
+"{\n",
+"WATCOM = $(watcom:J=\" \") ;\n",
+"TOOLSET = WATCOM ;\n",
+"}\n",
+"else\n",
+"{\n",
+"ECHO \"Jam cannot be run because you didn't indicate which compilation toolset\" ;\n",
+"ECHO \"to use. To do so, follow these simple instructions:\" ;\n",
+"ECHO ;\n",
+"ECHO \" - define one of the following environment variable, with the\" ;\n",
+"ECHO \" appropriate value according to this list:\" ;\n",
+"ECHO ;\n",
+"ECHO \" Variable Toolset Description\" ;\n",
+"ECHO ;\n",
+"ECHO \" WATCOM Watcom C/C++ Watcom install path\" ;\n",
+"ECHO \" EMX EMX (gcc) EMX install path\" ;\n",
+"ECHO \" VISUALAGE IBM Visual Age C/C++ VisualAge install path\" ;\n",
+"ECHO ;\n",
+"ECHO \" - define the JAM_TOOLSET environment variable with the *name*\" ;\n",
+"ECHO \" of the toolset variable you want to use.\" ;\n",
+"ECHO ;\n",
+"ECHO \" e.g.: set WATCOM=C:\\WATCOM\" ;\n",
+"ECHO \" set JAM_TOOLSET=WATCOM\" ;\n",
+"ECHO ;\n",
+"EXIT ;\n",
+"}\n",
+"}\n",
+"RM = del /f ;\n",
+"CP = copy ;\n",
+"MV ?= move ;\n",
+"DOT ?= . ;\n",
+"DOTDOT ?= .. ;\n",
+"SUFLIB ?= .lib ;\n",
+"SUFOBJ ?= .obj ;\n",
+"SUFEXE ?= .exe ;\n",
+"if $(TOOLSET) = WATCOM\n",
+"{\n",
+"AR ?= wlib ;\n",
+"BINDIR ?= \\\\os2\\\\apps ;\n",
+"CC ?= wcc386 ;\n",
+"CCFLAGS ?= /zq /DOS2 /I$(WATCOM)\\\\h ; # zq=quiet\n",
+"C++ ?= wpp386 ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= wcl386 ;\n",
+"LINKFLAGS ?= /zq ; # zq=quiet\n",
+"LINKLIBS ?= ;\n",
+"NOARSCAN ?= true ;\n",
+"OPTIM ?= ;\n",
+"SLASH ?= \\\\ ;\n",
+"STDHDRS ?= $(WATCOM)\\\\h ;\n",
+"UNDEFFLAG ?= \"/u _\" ;\n",
+"}\n",
+"else if $(TOOLSET) = EMX\n",
+"{\n",
+"ECHO \"Compiler is GCC-EMX\" ;\n",
+"AR ?= ar -ru ;\n",
+"CC ?= gcc ;\n",
+"CCFLAGS ?= \"\" ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"LINK ?= $(CC) ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= \"\" ;\n",
+"OPTIM ?= ;\n",
+"SUFOBJ = .o ;\n",
+"SUFLIB = .a ;\n",
+"UNDEFFLAG ?= \"-U\" ;\n",
+"SLASH = / ;\n",
+"}\n",
+"else\n",
+"{\n",
+"EXIT \"Sorry, but the $(JAM_TOOLSET) toolset isn't supported for now\" ;\n",
+"}\n",
+"}\n",
+"else if $(VMS)\n",
+"{\n",
+"C++ ?= cxx ;\n",
+"C++FLAGS ?= ;\n",
+"CC ?= cc ;\n",
+"CCFLAGS ?= ;\n",
+"CHMOD ?= set file/prot= ;\n",
+"CP ?= copy/replace ;\n",
+"CRELIB ?= true ;\n",
+"DOT ?= [] ;\n",
+"DOTDOT ?= [-] ;\n",
+"EXEMODE ?= (w:e) ;\n",
+"FILEMODE ?= (w:r) ;\n",
+"HDRS ?= ;\n",
+"LINK ?= link ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"LINKLIBS ?= ;\n",
+"MKDIR ?= create/dir ;\n",
+"MV ?= rename ;\n",
+"OPTIM ?= \"\" ;\n",
+"RM ?= delete ;\n",
+"RUNVMS ?= mcr ;\n",
+"SHELLMODE ?= (w:er) ;\n",
+"SLASH ?= . ;\n",
+"STDHDRS ?= decc$library_include ;\n",
+"SUFEXE ?= .exe ;\n",
+"SUFLIB ?= .olb ;\n",
+"SUFOBJ ?= .obj ;\n",
+"switch $(OS)\n",
+"{\n",
+"case OPENVMS : CCFLAGS ?= /stand=vaxc ;\n",
+"case VMS : LINKLIBS ?= sys$library:vaxcrtl.olb/lib ;\n",
+"}\n",
+"}\n",
+"else if $(MAC)\n",
+"{\n",
+"local OPT ;\n",
+"CW ?= \"{CW}\" ;\n",
+"MACHDRS ?=\n",
+"\"$(UMACHDRS):Universal:Interfaces:CIncludes\"\n",
+"\"$(CW):MSL:MSL_C:MSL_Common:Include\"\n",
+"\"$(CW):MSL:MSL_C:MSL_MacOS:Include\" ;\n",
+"MACLIBS ?=\n",
+"\"$(CW):MacOS Support:Universal:Libraries:StubLibraries:Interfacelib\"\n",
+"\"$(CW):MacOS Support:Universal:Libraries:StubLibraries:Mathlib\" ;\n",
+"MPWLIBS ?=\n",
+"\"$(CW):MacOS Support:Libraries:Runtime:Runtime PPC:MSL MPWCRuntime.lib\"\n",
+"\"$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL C.PPC MPW.Lib\" ;\n",
+"MPWNLLIBS ?=\n",
+"\"$(CW):MacOS Support:Libraries:Runtime:Runtime PPC:MSL MPWCRuntime.lib\"\n",
+"\"$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL C.PPC MPW(NL).Lib\" ;\n",
+"SIOUXHDRS ?= ;\n",
+"SIOUXLIBS ?=\n",
+"\"$(CW):MacOS Support:Libraries:Runtime:Runtime PPC:MSL RuntimePPC.lib\"\n",
+"\"$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL SIOUX.PPC.Lib\"\n",
+"\"$(CW):MSL:MSL_C:MSL_MacOS:Lib:PPC:MSL C.PPC.Lib\" ;\n",
+"C++ ?= mwcppc ;\n",
+"C++FLAGS ?= -w off -nomapcr ;\n",
+"CC ?= mwcppc ;\n",
+"CCFLAGS ?= -w off -nomapcr ;\n",
+"CP ?= duplicate -y ;\n",
+"DOT ?= \":\" ;\n",
+"DOTDOT ?= \"::\" ;\n",
+"HDRS ?= $(MACHDRS) $(MPWHDRS) ;\n",
+"LINK ?= mwlinkppc ;\n",
+"LINKFLAGS ?= -mpwtool -warn ;\n",
+"LINKLIBS ?= $(MACLIBS) $(MPWLIBS) ;\n",
+"MKDIR ?= newfolder ;\n",
+"MV ?= rename -y ;\n",
+"NOARSCAN ?= true ;\n",
+"OPTIM ?= ;\n",
+"RM ?= delete -y ;\n",
+"SLASH ?= \":\" ;\n",
+"STDHDRS ?= ;\n",
+"SUFLIB ?= .lib ;\n",
+"SUFOBJ ?= .o ;\n",
+"}\n",
+"else if $(OS) = BEOS && $(METROWERKS)\n",
+"{\n",
+"AR ?= mwld -xml -o ;\n",
+"BINDIR ?= /boot/apps ;\n",
+"CC ?= mwcc ;\n",
+"CCFLAGS ?= -nosyspath ;\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= -nosyspath ;\n",
+"FORTRAN ?= \"\" ;\n",
+"LIBDIR ?= /boot/develop/libraries ;\n",
+"LINK ?= mwld ;\n",
+"LINKFLAGS ?= \"\" ;\n",
+"MANDIR ?= /boot/documentation/\"Shell Tools\"/HTML ;\n",
+"NOARSCAN ?= true ;\n",
+"STDHDRS ?= /boot/develop/headers/posix ;\n",
+"}\n",
+"else if $(OS) = BEOS\n",
+"{\n",
+"BINDIR ?= /boot/apps ;\n",
+"CC ?= gcc ;\n",
+"C++ ?= $(CC) ;\n",
+"FORTRAN ?= \"\" ;\n",
+"LIBDIR ?= /boot/develop/libraries ;\n",
+"LINK ?= gcc ;\n",
+"LINKLIBS ?= -lnet ;\n",
+"NOARSCAN ?= true ;\n",
+"STDHDRS ?= /boot/develop/headers/posix ;\n",
+"}\n",
+"else if $(UNIX)\n",
+"{\n",
+"switch $(OS)\n",
+"{\n",
+"case AIX :\n",
+"LINKLIBS ?= -lbsd ;\n",
+"case AMIGA :\n",
+"CC ?= gcc ;\n",
+"YACC ?= \"bison -y\" ;\n",
+"case CYGWIN :\n",
+"CC ?= gcc ;\n",
+"CCFLAGS += -D__cygwin__ ;\n",
+"LEX ?= flex ;\n",
+"RANLIB ?= \"\" ;\n",
+"SUFEXE ?= .exe ;\n",
+"YACC ?= \"bison -y\" ;\n",
+"case DGUX :\n",
+"RANLIB ?= \"\" ;\n",
+"RELOCATE ?= true ;\n",
+"case HPUX :\n",
+"YACC = ;\n",
+"CFLAGS += -Ae ;\n",
+"CCFLAGS += -Ae ;\n",
+"RANLIB ?= \"\" ;\n",
+"case INTERIX :\n",
+"CC ?= gcc ;\n",
+"RANLIB ?= \"\" ;\n",
+"case IRIX :\n",
+"RANLIB ?= \"\" ;\n",
+"case MPEIX :\n",
+"CC ?= gcc ;\n",
+"C++ ?= gcc ;\n",
+"CCFLAGS += -D_POSIX_SOURCE ;\n",
+"HDRS += /usr/include ;\n",
+"RANLIB ?= \"\" ;\n",
+"NOARSCAN ?= true ;\n",
+"NOARUPDATE ?= true ;\n",
+"case MVS :\n",
+"RANLIB ?= \"\" ;\n",
+"case NEXT :\n",
+"AR ?= libtool -o ;\n",
+"RANLIB ?= \"\" ;\n",
+"case MACOSX :\n",
+"AR ?= libtool -o ;\n",
+"C++ ?= c++ ;\n",
+"MANDIR ?= /usr/local/share/man ;\n",
+"RANLIB ?= \"\" ;\n",
+"case NCR :\n",
+"RANLIB ?= \"\" ;\n",
+"case PTX :\n",
+"RANLIB ?= \"\" ;\n",
+"case QNX :\n",
+"AR ?= wlib ;\n",
+"CC ?= cc ;\n",
+"CCFLAGS ?= -Q ; # quiet\n",
+"C++ ?= $(CC) ;\n",
+"C++FLAGS ?= -Q ; # quiet\n",
+"LINK ?= $(CC) ;\n",
+"LINKFLAGS ?= -Q ; # quiet\n",
+"NOARSCAN ?= true ;\n",
+"RANLIB ?= \"\" ;\n",
+"case SCO :\n",
+"RANLIB ?= \"\" ;\n",
+"RELOCATE ?= true ;\n",
+"case SINIX :\n",
+"RANLIB ?= \"\" ;\n",
+"case SOLARIS :\n",
+"RANLIB ?= \"\" ;\n",
+"AR ?= \"/usr/ccs/bin/ar ru\" ;\n",
+"case UNICOS :\n",
+"NOARSCAN ?= true ;\n",
+"OPTIM ?= -O0 ;\n",
+"case UNIXWARE :\n",
+"RANLIB ?= \"\" ;\n",
+"RELOCATE ?= true ;\n",
+"}\n",
+"CCFLAGS ?= ;\n",
+"C++FLAGS ?= $(CCFLAGS) ;\n",
+"CHMOD ?= chmod ;\n",
+"CHGRP ?= chgrp ;\n",
+"CHOWN ?= chown ;\n",
+"LEX ?= lex ;\n",
+"LINKFLAGS ?= $(CCFLAGS) ;\n",
+"LINKLIBS ?= ;\n",
+"OPTIM ?= -O ;\n",
+"RANLIB ?= ranlib ;\n",
+"YACC ?= yacc ;\n",
+"YACCFILES ?= y.tab ;\n",
+"YACCFLAGS ?= -d ;\n",
+"}\n",
+"AR ?= ar ru ;\n",
+"AS ?= as ;\n",
+"ASFLAGS ?= ;\n",
+"AWK ?= awk ;\n",
+"BINDIR ?= /usr/local/bin ;\n",
+"C++ ?= cc ;\n",
+"C++FLAGS ?= ;\n",
+"CC ?= cc ;\n",
+"CCFLAGS ?= ;\n",
+"CP ?= cp -f ;\n",
+"CRELIB ?= ;\n",
+"DOT ?= . ;\n",
+"DOTDOT ?= .. ;\n",
+"EXEMODE ?= 711 ;\n",
+"FILEMODE ?= 644 ;\n",
+"FORTRAN ?= f77 ;\n",
+"FORTRANFLAGS ?= ;\n",
+"HDRS ?= ;\n",
+"INSTALLGRIST ?= installed ;\n",
+"JAMFILE ?= Jamfile ;\n",
+"JAMRULES ?= Jamrules ;\n",
+"LEX ?= ;\n",
+"LIBDIR ?= /usr/local/lib ;\n",
+"LINK ?= $(CC) ;\n",
+"LINKFLAGS ?= ;\n",
+"LINKLIBS ?= ;\n",
+"LN ?= ln ;\n",
+"MANDIR ?= /usr/local/man ;\n",
+"MKDIR ?= mkdir ;\n",
+"MV ?= mv -f ;\n",
+"OPTIM ?= ;\n",
+"RCP ?= rcp ;\n",
+"RM ?= rm -f ;\n",
+"RSH ?= rsh ;\n",
+"SED ?= sed ;\n",
+"SHELLHEADER ?= \"#!/bin/sh\" ;\n",
+"SHELLMODE ?= 755 ;\n",
+"SLASH ?= / ;\n",
+"STDHDRS ?= /usr/include ;\n",
+"SUFEXE ?= \"\" ;\n",
+"SUFLIB ?= .a ;\n",
+"SUFOBJ ?= .o ;\n",
+"UNDEFFLAG ?= \"-u _\" ;\n",
+"YACC ?= ;\n",
+"YACCFILES ?= ;\n",
+"YACCFLAGS ?= ;\n",
+"HDRPATTERN =\n",
+"\"^[ ]*#[ ]*include[ ]*[<\\\"]([^\\\">]*)[\\\">].*$\" ;\n",
+"OSFULL = $(OS)$(OSVER)$(OSPLAT) $(OS)$(OSPLAT) $(OS)$(OSVER) $(OS) ;\n",
+"DEPENDS all : shell files lib exe obj ;\n",
+"DEPENDS all shell files lib exe obj : first ;\n",
+"NOTFILE all first shell files lib exe obj dirs clean uninstall ;\n",
+"ALWAYS clean uninstall ;\n",
+"rule As\n",
+"{\n",
+"DEPENDS $(<) : $(>) ;\n",
+"ASFLAGS on $(<) += $(ASFLAGS) $(SUBDIRASFLAGS) ;\n",
+"}\n",
+"rule Bulk\n",
+"{\n",
+"local i ;\n",
+"for i in $(>)\n",
+"{\n",
+"File $(i:D=$(<)) : $(i) ;\n",
+"}\n",
+"}\n",
+"rule Cc\n",
+"{\n",
+"local _h ;\n",
+"DEPENDS $(<) : $(>) ;\n",
+"CCFLAGS on $(<) += $(CCFLAGS) $(SUBDIRCCFLAGS) ;\n",
+"if $(RELOCATE)\n",
+"{\n",
+"CcMv $(<) : $(>) ;\n",
+"}\n",
+"_h = $(SEARCH_SOURCE) $(HDRS) $(SUBDIRHDRS) ;\n",
+"if $(VMS) && $(_h)\n",
+"{\n",
+"SLASHINC on $(<) = \"/inc=(\" $(_h[1]) ,$(_h[2-]) \")\" ;\n",
+"}\n",
+"else if $(MAC) && $(_h)\n",
+"{\n",
+"local _i _j ;\n",
+"_j = $(_h[1]) ;\n",
+"for _i in $(_h[2-])\n",
+"{\n",
+"_j = $(_j),$(_i) ;\n",
+"}\n",
+"MACINC on $(<) = \\\"$(_j)\\\" ;\n",
+"}\n",
+"}\n",
+"rule C++\n",
+"{\n",
+"local _h ;\n",
+"DEPENDS $(<) : $(>) ;\n",
+"C++FLAGS on $(<) += $(C++FLAGS) $(SUBDIRC++FLAGS) ;\n",
+"if $(RELOCATE)\n",
+"{\n",
+"CcMv $(<) : $(>) ;\n",
+"}\n",
+"_h = $(SEARCH_SOURCE) $(HDRS) $(SUBDIRHDRS) ;\n",
+"if $(VMS) && $(_h)\n",
+"{\n",
+"SLASHINC on $(<) = \"/inc=(\" $(_h[1]) ,$(_h[2-]) \")\" ;\n",
+"}\n",
+"else if $(MAC) && $(_h)\n",
+"{\n",
+"local _i _j ;\n",
+"_j = $(_h[1]) ;\n",
+"for _i in $(_h[2-])\n",
+"{\n",
+"_j = $(_j),$(_i) ;\n",
+"}\n",
+"MACINC on $(<) = \\\"$(_j)\\\" ;\n",
+"}\n",
+"}\n",
+"rule Chmod\n",
+"{\n",
+"if $(CHMOD) { Chmod1 $(<) ; }\n",
+"}\n",
+"rule File\n",
+"{\n",
+"DEPENDS files : $(<) ;\n",
+"DEPENDS $(<) : $(>) ;\n",
+"SEARCH on $(>) = $(SEARCH_SOURCE) ;\n",
+"MODE on $(<) = $(FILEMODE) ;\n",
+"Chmod $(<) ;\n",
+"}\n",
+"rule Fortran\n",
+"{\n",
+"DEPENDS $(<) : $(>) ;\n",
+"}\n",
+"rule GenFile\n",
+"{\n",
+"local _t = [ FGristSourceFiles $(<) ] ;\n",
+"local _s = [ FAppendSuffix $(>[1]) : $(SUFEXE) ] ;\n",
+"Depends $(_t) : $(_s) $(>[2-]) ;\n",
+"GenFile1 $(_t) : $(_s) $(>[2-]) ;\n",
+"Clean clean : $(_t) ;\n",
+"}\n",
+"rule GenFile1\n",
+"{\n",
+"MakeLocate $(<) : $(LOCATE_SOURCE) ;\n",
+"SEARCH on $(>) = $(SEARCH_SOURCE) ;\n",
+"}\n",
+"rule HardLink\n",
+"{\n",
+"DEPENDS files : $(<) ;\n",
+"DEPENDS $(<) : $(>) ;\n",
+"SEARCH on $(>) = $(SEARCH_SOURCE) ;\n",
+"}\n",
+"rule HdrMacroFile\n",
+"{\n",
+"HDRMACRO $(<) ;\n",
+"}\n",
+"rule HdrRule\n",
+"{\n",
+"local s ;\n",
+"if $(HDRGRIST)\n",
+"{\n",
+"s = $(>:G=$(HDRGRIST)) ;\n",
+"} else {\n",
+"s = $(>) ;\n",
+"}\n",
+"INCLUDES $(<) : $(s) ;\n",
+"SEARCH on $(s) = $(HDRSEARCH) ;\n",
+"NOCARE $(s) ;\n",
+"HDRSEARCH on $(s) = $(HDRSEARCH) ;\n",
+"HDRSCAN on $(s) = $(HDRSCAN) ;\n",
+"HDRRULE on $(s) = $(HDRRULE) ;\n",
+"HDRGRIST on $(s) = $(HDRGRIST) ;\n",
+"}\n",
+"rule InstallInto\n",
+"{\n",
+"local i t ;\n",
+"t = $(>:G=$(INSTALLGRIST)) ;\n",
+"Depends install : $(t) ;\n",
+"Clean uninstall : $(t) ;\n",
+"SEARCH on $(>) = $(SEARCH_SOURCE) ;\n",
+"MakeLocate $(t) : $(<) ;\n",
+"for i in $(>)\n",
+"{\n",
+"local tt = $(i:G=$(INSTALLGRIST)) ;\n",
+"Depends $(tt) : $(i) ;\n",
+"Install $(tt) : $(i) ;\n",
+"Chmod $(tt) ;\n",
+"if $(OWNER) && $(CHOWN)\n",
+"{\n",
+"Chown $(tt) ;\n",
+"OWNER on $(tt) = $(OWNER) ;\n",
+"}\n",
+"if $(GROUP) && $(CHGRP)\n",
+"{\n",
+"Chgrp $(tt) ;\n",
+"GROUP on $(tt) = $(GROUP) ;\n",
+"}\n",
+"}\n",
+"}\n",
+"rule InstallBin\n",
+"{\n",
+"local _t = [ FAppendSuffix $(>) : $(SUFEXE) ] ;\n",
+"InstallInto $(<) : $(_t) ;\n",
+"MODE on $(_t:G=installed) = $(EXEMODE) ;\n",
+"}\n",
+"rule InstallFile\n",
+"{\n",
+"InstallInto $(<) : $(>) ;\n",
+"MODE on $(>:G=installed) = $(FILEMODE) ;\n",
+"}\n",
+"rule InstallLib\n",
+"{\n",
+"InstallInto $(<) : $(>) ;\n",
+"MODE on $(>:G=installed) = $(FILEMODE) ;\n",
+"}\n",
+"rule InstallMan\n",
+"{\n",
+"local i s d ;\n",
+"for i in $(>)\n",
+"{\n",
+"switch $(i:S)\n",
+"{\n",
+"case .1 : s = 1 ; case .2 : s = 2 ; case .3 : s = 3 ;\n",
+"case .4 : s = 4 ; case .5 : s = 5 ; case .6 : s = 6 ;\n",
+"case .7 : s = 7 ; case .8 : s = 8 ; case .l : s = l ;\n",
+"case .n : s = n ; case .man : s = 1 ;\n",
+"}\n",
+"d = man$(s) ;\n",
+"InstallInto $(d:R=$(<)) : $(i) ;\n",
+"}\n",
+"MODE on $(>:G=installed) = $(FILEMODE) ;\n",
+"}\n",
+"rule InstallShell\n",
+"{\n",
+"InstallInto $(<) : $(>) ;\n",
+"MODE on $(>:G=installed) = $(SHELLMODE) ;\n",
+"}\n",
+"rule Lex\n",
+"{\n",
+"LexMv $(<) : $(>) ;\n",
+"DEPENDS $(<) : $(>) ;\n",
+"MakeLocate $(<) : $(LOCATE_SOURCE) ;\n",
+"Clean clean : $(<) ;\n",
+"}\n",
+"rule Library\n",
+"{\n",
+"LibraryFromObjects $(<) : $(>:S=$(SUFOBJ)) ;\n",
+"Objects $(>) ;\n",
+"}\n",
+"rule LibraryFromObjects\n",
+"{\n",
+"local _i _l _s ;\n",
+"_s = [ FGristFiles $(>) ] ;\n",
+"_l = $(<:S=$(SUFLIB)) ;\n",
+"if $(KEEPOBJS)\n",
+"{\n",
+"DEPENDS obj : $(_s) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"DEPENDS lib : $(_l) ;\n",
+"}\n",
+"if ! $(_l:D)\n",
+"{\n",
+"MakeLocate $(_l) $(_l)($(_s:BS)) : $(LOCATE_TARGET) ;\n",
+"}\n",
+"if $(NOARSCAN)\n",
+"{\n",
+"DEPENDS $(_l) : $(_s) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"DEPENDS $(_l) : $(_l)($(_s:BS)) ;\n",
+"for _i in $(_s)\n",
+"{\n",
+"DEPENDS $(_l)($(_i:BS)) : $(_i) ;\n",
+"}\n",
+"}\n",
+"Clean clean : $(_l) ;\n",
+"if $(CRELIB) { CreLib $(_l) : $(_s[1]) ; }\n",
+"Archive $(_l) : $(_s) ;\n",
+"if $(RANLIB) { Ranlib $(_l) ; }\n",
+"if ! ( $(NOARSCAN) || $(KEEPOBJS) ) { RmTemps $(_l) : $(_s) ; }\n",
+"}\n",
+"rule Link\n",
+"{\n",
+"MODE on $(<) = $(EXEMODE) ;\n",
+"Chmod $(<) ;\n",
+"}\n",
+"rule LinkLibraries\n",
+"{\n",
+"local _t = [ FAppendSuffix $(<) : $(SUFEXE) ] ;\n",
+"DEPENDS $(_t) : $(>:S=$(SUFLIB)) ;\n",
+"NEEDLIBS on $(_t) += $(>:S=$(SUFLIB)) ;\n",
+"}\n",
+"rule Main\n",
+"{\n",
+"MainFromObjects $(<) : $(>:S=$(SUFOBJ)) ;\n",
+"Objects $(>) ;\n",
+"}\n",
+"rule MainFromObjects\n",
+"{\n",
+"local _s _t ;\n",
+"_s = [ FGristFiles $(>) ] ;\n",
+"_t = [ FAppendSuffix $(<) : $(SUFEXE) ] ;\n",
+"if $(_t) != $(<)\n",
+"{\n",
+"DEPENDS $(<) : $(_t) ;\n",
+"NOTFILE $(<) ;\n",
+"}\n",
+"DEPENDS exe : $(_t) ;\n",
+"DEPENDS $(_t) : $(_s) ;\n",
+"MakeLocate $(_t) : $(LOCATE_TARGET) ;\n",
+"Clean clean : $(_t) ;\n",
+"Link $(_t) : $(_s) ;\n",
+"}\n",
+"rule MakeLocate\n",
+"{\n",
+"if $(>)\n",
+"{\n",
+"LOCATE on $(<) = $(>) ;\n",
+"Depends $(<) : $(>[1]) ;\n",
+"MkDir $(>[1]) ;\n",
+"}\n",
+"}\n",
+"rule MkDir\n",
+"{\n",
+"NOUPDATE $(<) ;\n",
+"if $(<) != $(DOT) && ! $($(<)-mkdir)\n",
+"{\n",
+"local s ;\n",
+"$(<)-mkdir = true ;\n",
+"MkDir1 $(<) ;\n",
+"Depends dirs : $(<) ;\n",
+"s = $(<:P) ;\n",
+"if $(NT)\n",
+"{\n",
+"switch $(s)\n",
+"{\n",
+"case *: : s = ;\n",
+"case *:\\\\ : s = ;\n",
+"}\n",
+"}\n",
+"if $(s) && $(s) != $(<)\n",
+"{\n",
+"Depends $(<) : $(s) ;\n",
+"MkDir $(s) ;\n",
+"}\n",
+"else if $(s)\n",
+"{\n",
+"NOTFILE $(s) ;\n",
+"}\n",
+"}\n",
+"}\n",
+"rule Object\n",
+"{\n",
+"local h ;\n",
+"Clean clean : $(<) ;\n",
+"MakeLocate $(<) : $(LOCATE_TARGET) ;\n",
+"SEARCH on $(>) = $(SEARCH_SOURCE) ;\n",
+"HDRS on $(<) = $(SEARCH_SOURCE) $(HDRS) $(SUBDIRHDRS) ;\n",
+"if $(SEARCH_SOURCE)\n",
+"{\n",
+"h = $(SEARCH_SOURCE) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"h = \"\" ;\n",
+"}\n",
+"HDRRULE on $(>) = HdrRule ;\n",
+"HDRSCAN on $(>) = $(HDRPATTERN) ;\n",
+"HDRSEARCH on $(>) = $(HDRS) $(SUBDIRHDRS) $(h) $(STDHDRS) ;\n",
+"HDRGRIST on $(>) = $(HDRGRIST) ;\n",
+"switch $(>:S)\n",
+"{\n",
+"case .asm : As $(<) : $(>) ;\n",
+"case .c : Cc $(<) : $(>) ;\n",
+"case .C : C++ $(<) : $(>) ;\n",
+"case .cc : C++ $(<) : $(>) ;\n",
+"case .cpp : C++ $(<) : $(>) ;\n",
+"case .f : Fortran $(<) : $(>) ;\n",
+"case .l : Cc $(<) : $(<:S=.c) ;\n",
+"Lex $(<:S=.c) : $(>) ;\n",
+"case .s : As $(<) : $(>) ;\n",
+"case .y : Cc $(<) : $(<:S=.c) ;\n",
+"Yacc $(<:S=.c) : $(>) ;\n",
+"case * : UserObject $(<) : $(>) ;\n",
+"}\n",
+"}\n",
+"rule ObjectCcFlags\n",
+"{\n",
+"CCFLAGS on [ FGristFiles $(<:S=$(SUFOBJ)) ] += $(>) ;\n",
+"}\n",
+"rule ObjectC++Flags\n",
+"{\n",
+"C++FLAGS on [ FGristFiles $(<:S=$(SUFOBJ)) ] += $(>) ;\n",
+"}\n",
+"rule ObjectHdrs\n",
+"{\n",
+"HDRS on [ FGristFiles $(<:S=$(SUFOBJ)) ] += $(>) ;\n",
+"}\n",
+"rule Objects\n",
+"{\n",
+"local _i ;\n",
+"for _i in [ FGristFiles $(<) ]\n",
+"{\n",
+"Object $(_i:S=$(SUFOBJ)) : $(_i) ;\n",
+"DEPENDS obj : $(_i:S=$(SUFOBJ)) ;\n",
+"}\n",
+"}\n",
+"rule RmTemps\n",
+"{\n",
+"TEMPORARY $(>) ;\n",
+"}\n",
+"rule Setuid\n",
+"{\n",
+"MODE on [ FAppendSuffix $(<) : $(SUFEXE) ] = 4711 ;\n",
+"}\n",
+"rule Shell\n",
+"{\n",
+"DEPENDS shell : $(<) ;\n",
+"DEPENDS $(<) : $(>) ;\n",
+"SEARCH on $(>) = $(SEARCH_SOURCE) ;\n",
+"MODE on $(<) = $(SHELLMODE) ;\n",
+"Clean clean : $(<) ;\n",
+"Chmod $(<) ;\n",
+"}\n",
+"rule SubDir\n",
+"{\n",
+"local _r _s ;\n",
+"if ! $($(<[1]))\n",
+"{\n",
+"if ! $(<[1])\n",
+"{\n",
+"EXIT SubDir syntax error ;\n",
+"}\n",
+"$(<[1]) = [ FSubDir $(<[2-]) ] ;\n",
+"}\n",
+"if ! $($(<[1])-included)\n",
+"{\n",
+"$(<[1])-included = TRUE ;\n",
+"_r = $($(<[1])RULES) ;\n",
+"if ! $(_r)\n",
+"{\n",
+"_r = $(JAMRULES:R=$($(<[1]))) ;\n",
+"}\n",
+"include $(_r) ;\n",
+"}\n",
+"_s = [ FDirName $(<[2-]) ] ;\n",
+"SUBDIR = $(_s:R=$($(<[1]))) ;\n",
+"SUBDIR_TOKENS = $(<[2-]) ;\n",
+"SEARCH_SOURCE = $(SUBDIR) ;\n",
+"LOCATE_SOURCE = $(ALL_LOCATE_TARGET) $(SUBDIR) ;\n",
+"LOCATE_TARGET = $(ALL_LOCATE_TARGET) $(SUBDIR) ;\n",
+"SOURCE_GRIST = [ FGrist $(<[2-]) ] ;\n",
+"SUBDIRCCFLAGS = ;\n",
+"SUBDIRC++FLAGS = ;\n",
+"SUBDIRHDRS = ;\n",
+"}\n",
+"rule SubDirCcFlags\n",
+"{\n",
+"SUBDIRCCFLAGS += $(<) ;\n",
+"}\n",
+"rule SubDirC++Flags\n",
+"{\n",
+"SUBDIRC++FLAGS += $(<) ;\n",
+"}\n",
+"rule SubDirHdrs\n",
+"{\n",
+"SUBDIRHDRS += $(<) ;\n",
+"}\n",
+"rule SubInclude\n",
+"{\n",
+"local _s ;\n",
+"if ! $($(<[1]))\n",
+"{\n",
+"EXIT Top level of source tree has not been set with $(<[1]) ;\n",
+"}\n",
+"_s = [ FDirName $(<[2-]) ] ;\n",
+"include $(JAMFILE:D=$(_s):R=$($(<[1]))) ;\n",
+"}\n",
+"rule Undefines\n",
+"{\n",
+"UNDEFS on [ FAppendSuffix $(<) : $(SUFEXE) ] += $(UNDEFFLAG)$(>) ;\n",
+"}\n",
+"rule UserObject\n",
+"{\n",
+"EXIT \"Unknown suffix on\" $(>) \"- see UserObject rule in Jamfile(5).\" ;\n",
+"}\n",
+"rule Yacc\n",
+"{\n",
+"local _h ;\n",
+"_h = $(<:BS=.h) ;\n",
+"MakeLocate $(<) $(_h) : $(LOCATE_SOURCE) ;\n",
+"if $(YACC)\n",
+"{\n",
+"DEPENDS $(<) $(_h) : $(>) ;\n",
+"Yacc1 $(<) $(_h) : $(>) ;\n",
+"YaccMv $(<) $(_h) : $(>) ;\n",
+"Clean clean : $(<) $(_h) ;\n",
+"}\n",
+"INCLUDES $(<) : $(_h) ;\n",
+"}\n",
+"rule FGrist\n",
+"{\n",
+"local _g _i ;\n",
+"_g = $(<[1]) ;\n",
+"for _i in $(<[2-])\n",
+"{\n",
+"_g = $(_g)!$(_i) ;\n",
+"}\n",
+"return $(_g) ;\n",
+"}\n",
+"rule FGristFiles\n",
+"{\n",
+"if ! $(SOURCE_GRIST)\n",
+"{\n",
+"return $(<) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"return $(<:G=$(SOURCE_GRIST)) ;\n",
+"}\n",
+"}\n",
+"rule FGristSourceFiles\n",
+"{\n",
+"if ! $(SOURCE_GRIST)\n",
+"{\n",
+"return $(<) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"local _i _o ;\n",
+"for _i in $(<)\n",
+"{\n",
+"switch $(_i)\n",
+"{\n",
+"case *.h : _o += $(_i) ;\n",
+"case * : _o += $(_i:G=$(SOURCE_GRIST)) ;\n",
+"}\n",
+"}\n",
+"return $(_o) ;\n",
+"}\n",
+"}\n",
+"rule FConcat\n",
+"{\n",
+"local _t _r ;\n",
+"$(_r) = $(<[1]) ;\n",
+"for _t in $(<[2-])\n",
+"{\n",
+"$(_r) = $(_r)$(_t) ;\n",
+"}\n",
+"return $(_r) ;\n",
+"}\n",
+"rule FSubDir\n",
+"{\n",
+"local _i _d ;\n",
+"if ! $(<[1])\n",
+"{\n",
+"_d = $(DOT) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"_d = $(DOTDOT) ;\n",
+"for _i in $(<[2-])\n",
+"{\n",
+"_d = $(_d:R=$(DOTDOT)) ;\n",
+"}\n",
+"}\n",
+"return $(_d) ;\n",
+"}\n",
+"rule FDirName\n",
+"{\n",
+"local _s _i ;\n",
+"if ! $(<)\n",
+"{\n",
+"_s = $(DOT) ;\n",
+"}\n",
+"else if $(VMS)\n",
+"{\n",
+"switch $(<[1])\n",
+"{\n",
+"case *:* : _s = $(<[1]) ;\n",
+"case \\\\[*\\\\] : _s = $(<[1]) ;\n",
+"case * : _s = [.$(<[1])] ;\n",
+"}\n",
+"for _i in [.$(<[2-])]\n",
+"{\n",
+"_s = $(_i:R=$(_s)) ;\n",
+"}\n",
+"}\n",
+"else if $(MAC)\n",
+"{\n",
+"_s = $(DOT) ;\n",
+"for _i in $(<)\n",
+"{\n",
+"_s = $(_i:R=$(_s)) ;\n",
+"}\n",
+"}\n",
+"else\n",
+"{\n",
+"_s = $(<[1]) ;\n",
+"for _i in $(<[2-])\n",
+"{\n",
+"_s = $(_i:R=$(_s)) ;\n",
+"}\n",
+"}\n",
+"return $(_s) ;\n",
+"}\n",
+"rule _makeCommon\n",
+"{\n",
+"if $($(<)[1]) && $($(<)[1]) = $($(>)[1])\n",
+"{\n",
+"$(<) = $($(<)[2-]) ;\n",
+"$(>) = $($(>)[2-]) ;\n",
+"_makeCommon $(<) : $(>) ;\n",
+"}\n",
+"}\n",
+"rule FRelPath\n",
+"{\n",
+"local _l _r ;\n",
+"_l = $(<) ;\n",
+"_r = $(>) ;\n",
+"_makeCommon _l : _r ;\n",
+"_l = [ FSubDir $(_l) ] ;\n",
+"_r = [ FDirName $(_r) ] ;\n",
+"if $(_r) = $(DOT) {\n",
+"return $(_l) ;\n",
+"} else {\n",
+"return $(_r:R=$(_l)) ;\n",
+"}\n",
+"}\n",
+"rule FAppendSuffix\n",
+"{\n",
+"if $(>)\n",
+"{\n",
+"local _i _o ;\n",
+"for _i in $(<)\n",
+"{\n",
+"if $(_i:S)\n",
+"{\n",
+"_o += $(_i) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"_o += $(_i:S=$(>)) ;\n",
+"}\n",
+"}\n",
+"return $(_o) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"return $(<) ;\n",
+"}\n",
+"}\n",
+"rule unmakeDir\n",
+"{\n",
+"if $(>[1]:D) && $(>[1]:D) != $(>[1]) && $(>[1]:D) != \\\\\\\\\n",
+"{\n",
+"unmakeDir $(<) : $(>[1]:D) $(>[1]:BS) $(>[2-]) ;\n",
+"}\n",
+"else\n",
+"{\n",
+"$(<) = $(>) ;\n",
+"}\n",
+"}\n",
+"rule FConvertToSlashes\n",
+"{\n",
+"local _d, _s, _i ;\n",
+"unmakeDir _d : $(<) ;\n",
+"_s = $(_d[1]) ;\n",
+"for _i in $(_d[2-])\n",
+"{\n",
+"_s = $(_s)/$(_i) ;\n",
+"}\n",
+"return $(_s) ;\n",
+"}\n",
+"actions updated together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) $(>)\n",
+"}\n",
+"actions As\n",
+"{\n",
+"$(AS) $(ASFLAGS) -I$(HDRS) -o $(<) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o $(<) $(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o $(<) $(>)\n",
+"}\n",
+"actions Chgrp\n",
+"{\n",
+"$(CHGRP) $(GROUP) $(<)\n",
+"}\n",
+"actions Chmod1\n",
+"{\n",
+"$(CHMOD) $(MODE) $(<)\n",
+"}\n",
+"actions Chown\n",
+"{\n",
+"$(CHOWN) $(OWNER) $(<)\n",
+"}\n",
+"actions piecemeal together existing Clean\n",
+"{\n",
+"$(RM) $(>)\n",
+"}\n",
+"actions File\n",
+"{\n",
+"$(CP) $(>) $(<)\n",
+"}\n",
+"actions GenFile1\n",
+"{\n",
+"$(>[1]) $(<) $(>[2-])\n",
+"}\n",
+"actions Fortran\n",
+"{\n",
+"$(FORTRAN) $(FORTRANFLAGS) -o $(<) $(>)\n",
+"}\n",
+"actions HardLink\n",
+"{\n",
+"$(RM) $(<) && $(LN) $(>) $(<)\n",
+"}\n",
+"actions Install\n",
+"{\n",
+"$(CP) $(>) $(<)\n",
+"}\n",
+"actions Lex\n",
+"{\n",
+"$(LEX) $(>)\n",
+"}\n",
+"actions LexMv\n",
+"{\n",
+"$(MV) lex.yy.c $(<)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) $(LINKFLAGS) -o $(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)\n",
+"}\n",
+"actions MkDir1\n",
+"{\n",
+"$(MKDIR) $(<)\n",
+"}\n",
+"actions together Ranlib\n",
+"{\n",
+"$(RANLIB) $(<)\n",
+"}\n",
+"actions quietly updated piecemeal together RmTemps\n",
+"{\n",
+"$(RM) $(>)\n",
+"}\n",
+"actions Shell\n",
+"{\n",
+"$(AWK) '\n",
+"NR == 1 { print \"$(SHELLHEADER)\" }\n",
+"NR == 1 && /^[#:]/ { next }\n",
+"/^##/ { next }\n",
+"{ print }\n",
+"' < $(>) > $(<)\n",
+"}\n",
+"actions Yacc1\n",
+"{\n",
+"$(YACC) $(YACCFLAGS) $(>)\n",
+"}\n",
+"actions YaccMv\n",
+"{\n",
+"$(MV) $(YACCFILES).c $(<[1])\n",
+"$(MV) $(YACCFILES).h $(<[2])\n",
+"}\n",
+"if $(RELOCATE)\n",
+"{\n",
+"actions C++\n",
+"{\n",
+"$(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) $(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) $(>)\n",
+"}\n",
+"actions ignore CcMv\n",
+"{\n",
+"[ $(<) != $(>:BS=$(SUFOBJ)) ] && $(MV) $(>:BS=$(SUFOBJ)) $(<)\n",
+"}\n",
+"}\n",
+"if $(NOARUPDATE)\n",
+"{\n",
+"actions Archive\n",
+"{\n",
+"$(AR) $(<) $(>)\n",
+"}\n",
+"}\n",
+"if $(NT)\n",
+"{\n",
+"if $(TOOLSET) = VISUALC || $(TOOLSET) = VC7 || $(TOOLSET) = INTELC\n",
+"{\n",
+"actions updated together piecemeal Archive\n",
+"{\n",
+"if exist $(<) set _$(<:B)_=$(<)\n",
+"$(AR) /out:$(<) %_$(<:B)_% $(>)\n",
+"}\n",
+"actions As\n",
+"{\n",
+"$(AS) /Ml /p /v /w2 $(>) $(<) ,nul,nul;\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) /c $(CCFLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) /I$(STDHDRS) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) /c $(C++FLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) /I$(STDHDRS) /Tp$(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) $(LINKFLAGS) /out:$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)\n",
+"}\n",
+"}\n",
+"else if $(TOOLSET) = VISUALC16\n",
+"{\n",
+"actions updated together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) -+$(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) /c $(CCFLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) /c $(C++FLAGS) $(OPTIM) /Fo$(<) /I$(HDRS) /Tp$(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) $(LINKFLAGS) /out:$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)\n",
+"}\n",
+"}\n",
+"else if $(TOOLSET) = BORLANDC\n",
+"{\n",
+"actions updated together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) -+$(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) -e$(<) $(LINKFLAGS) $(UNDEFS) -L$(LINKLIBS) $(NEEDLIBS) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)\n",
+"}\n",
+"}\n",
+"else if $(TOOLSET) = MINGW\n",
+"{\n",
+"actions together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) $(>:T)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)\n",
+"}\n",
+"}\n",
+"else if $(TOOLSET) = WATCOM\n",
+"{\n",
+"actions together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) +-$(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) $(CCFLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) $(C++FLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) $(LINKFLAGS) /Fe=$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)\n",
+"}\n",
+"actions Shell\n",
+"{\n",
+"$(CP) $(>) $(<)\n",
+"}\n",
+"}\n",
+"else if $(TOOLSET) = LCC\n",
+"{\n",
+"actions together piecemeal Archive\n",
+"{\n",
+"$(AR) /out:$(<) $(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) $(CCFLAGS) $(OPTIM) -Fo$(<) -I$(HDRS) $(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) $(LINKFLAGS) -o $(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)\n",
+"}\n",
+"actions Shell\n",
+"{\n",
+"$(CP) $(>) $(<)\n",
+"}\n",
+"}\n",
+"}\n",
+"else if $(OS2)\n",
+"{\n",
+"if $(TOOLSET) = WATCOM\n",
+"{\n",
+"actions together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) +-$(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) $(CCFLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) $(C++FLAGS) $(OPTIM) /Fo=$(<) /I$(HDRS) $(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) $(LINKFLAGS) /Fe=$(<) $(UNDEFS) $(>) $(NEEDLIBS) $(LINKLIBS)\n",
+"}\n",
+"actions Shell\n",
+"{\n",
+"$(CP) $(>) $(<)\n",
+"}\n",
+"}\n",
+"else if $(TOOLSET) = EMX\n",
+"{\n",
+"actions together piecemeal Archive\n",
+"{\n",
+"$(AR) $(<) $(>:T)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC) -c $(CCFLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++) -c $(C++FLAGS) $(OPTIM) -I$(HDRS) -o$(<) $(>)\n",
+"}\n",
+"}\n",
+"}\n",
+"else if $(VMS)\n",
+"{\n",
+"actions updated together piecemeal Archive\n",
+"{\n",
+"lib/replace $(<) $(>[1]) ,$(>[2-])\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"$(CC)/obj=$(<) $(CCFLAGS) $(OPTIM) $(SLASHINC) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"$(C++)/obj=$(<) $(C++FLAGS) $(OPTIM) $(SLASHINC) $(>)\n",
+"}\n",
+"actions piecemeal together existing Clean\n",
+"{\n",
+"$(RM) $(>[1]);* ,$(>[2-]);*\n",
+"}\n",
+"actions together quietly CreLib\n",
+"{\n",
+"if f$search(\"$(<)\") .eqs. \"\" then lib/create $(<)\n",
+"}\n",
+"actions GenFile1\n",
+"{\n",
+"mcr $(>[1]) $(<) $(>[2-])\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK)/exe=$(<) $(LINKFLAGS) $(>[1]) ,$(>[2-]) ,$(NEEDLIBS)/lib ,$(LINKLIBS)\n",
+"}\n",
+"actions quietly updated piecemeal together RmTemps\n",
+"{\n",
+"$(RM) $(>[1]);* ,$(>[2-]);*\n",
+"}\n",
+"actions Shell\n",
+"{\n",
+"$(CP) $(>) $(<)\n",
+"}\n",
+"}\n",
+"else if $(MAC)\n",
+"{\n",
+"actions together Archive\n",
+"{\n",
+"$(LINK) -library -o $(<) $(>)\n",
+"}\n",
+"actions Cc\n",
+"{\n",
+"set -e MWCincludes $(MACINC)\n",
+"$(CC) -o $(<) $(CCFLAGS) $(OPTIM) $(>)\n",
+"}\n",
+"actions C++\n",
+"{\n",
+"set -e MWCincludes $(MACINC)\n",
+"$(CC) -o $(<) $(C++FLAGS) $(OPTIM) $(>)\n",
+"}\n",
+"actions Link bind NEEDLIBS\n",
+"{\n",
+"$(LINK) -o $(<) $(LINKFLAGS) $(>) $(NEEDLIBS) \"$(LINKLIBS)\"\n",
+"}\n",
+"}\n",
+"rule BULK { Bulk $(<) : $(>) ; }\n",
+"rule FILE { File $(<) : $(>) ; }\n",
+"rule HDRRULE { HdrRule $(<) : $(>) ; }\n",
+"rule INSTALL { Install $(<) : $(>) ; }\n",
+"rule LIBRARY { Library $(<) : $(>) ; }\n",
+"rule LIBS { LinkLibraries $(<) : $(>) ; }\n",
+"rule LINK { Link $(<) : $(>) ; }\n",
+"rule MAIN { Main $(<) : $(>) ; }\n",
+"rule SETUID { Setuid $(<) ; }\n",
+"rule SHELL { Shell $(<) : $(>) ; }\n",
+"rule UNDEFINES { Undefines $(<) : $(>) ; }\n",
+"rule INSTALLBIN { InstallBin $(BINDIR) : $(<) ; }\n",
+"rule INSTALLLIB { InstallLib $(LIBDIR) : $(<) ; }\n",
+"rule INSTALLMAN { InstallMan $(MANDIR) : $(<) ; }\n",
+"rule addDirName { $(<) += [ FDirName $(>) ] ; }\n",
+"rule makeDirName { $(<) = [ FDirName $(>) ] ; }\n",
+"rule makeGristedName { $(<) = [ FGristSourceFiles $(>) ] ; }\n",
+"rule makeRelPath { $(<[1]) = [ FRelPath $(<[2-]) : $(>) ] ; }\n",
+"rule makeSuffixed { $(<[1]) = [ FAppendSuffix $(>) : $(<[2]) ] ; }\n",
+"{\n",
+"if $(JAMFILE) { include $(JAMFILE) ; }\n",
+"}\n",
+"}\n",
+0 };
diff --git a/src/kenlm/jam-files/engine/jambase.h b/src/kenlm/jam-files/engine/jambase.h
new file mode 100644
index 0000000..c05ec79
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jambase.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * jambase.h - declaration for the internal jambase
+ *
+ * The file Jambase is turned into a C array of strings in jambase.c
+ * so that it can be built in to the executable. This is the
+ * declaration for that array.
+ */
+
+extern char *jambase[];
diff --git a/src/kenlm/jam-files/engine/jamgram.c b/src/kenlm/jam-files/engine/jamgram.c
new file mode 100644
index 0000000..48c8522
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jamgram.c
@@ -0,0 +1,2327 @@
+/* A Bison parser, made by GNU Bison 2.4.3. */
+
+/* Skeleton implementation for Bison's Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2009, 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+ simplifying the original so-called "semantic" parser. */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+ infringing on user name space. This should be done even for local
+ variables, as they might otherwise be expanded by user macros.
+ There are some unavoidable exceptions within include files to
+ define necessary library symbols; they are noted "INFRINGES ON
+ USER NAME SPACE" below. */
+
+/* Identify Bison output. */
+#define YYBISON 1
+
+/* Bison version. */
+#define YYBISON_VERSION "2.4.3"
+
+/* Skeleton name. */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers. */
+#define YYPURE 0
+
+/* Push parsers. */
+#define YYPUSH 0
+
+/* Pull parsers. */
+#define YYPULL 1
+
+/* Using locations. */
+#define YYLSP_NEEDED 0
+
+
+
+/* Copy the first part of user declarations. */
+
+/* Line 189 of yacc.c */
+#line 96 "jamgram.y"
+
+#include "jam.h"
+
+#include "lists.h"
+#include "parse.h"
+#include "scan.h"
+#include "compile.h"
+#include "object.h"
+#include "rules.h"
+
+# define YYMAXDEPTH 10000 /* for OSF and other less endowed yaccs */
+
+# define F0 -1
+# define P0 (PARSE *)0
+# define S0 (OBJECT *)0
+
+# define pappend( l,r ) parse_make( PARSE_APPEND,l,r,P0,S0,S0,0 )
+# define peval( c,l,r ) parse_make( PARSE_EVAL,l,r,P0,S0,S0,c )
+# define pfor( s,l,r,x ) parse_make( PARSE_FOREACH,l,r,P0,s,S0,x )
+# define pif( l,r,t ) parse_make( PARSE_IF,l,r,t,S0,S0,0 )
+# define pincl( l ) parse_make( PARSE_INCLUDE,l,P0,P0,S0,S0,0 )
+# define plist( s ) parse_make( PARSE_LIST,P0,P0,P0,s,S0,0 )
+# define plocal( l,r,t ) parse_make( PARSE_LOCAL,l,r,t,S0,S0,0 )
+# define pmodule( l,r ) parse_make( PARSE_MODULE,l,r,P0,S0,S0,0 )
+# define pclass( l,r ) parse_make( PARSE_CLASS,l,r,P0,S0,S0,0 )
+# define pnull() parse_make( PARSE_NULL,P0,P0,P0,S0,S0,0 )
+# define pon( l,r ) parse_make( PARSE_ON,l,r,P0,S0,S0,0 )
+# define prule( s,p ) parse_make( PARSE_RULE,p,P0,P0,s,S0,0 )
+# define prules( l,r ) parse_make( PARSE_RULES,l,r,P0,S0,S0,0 )
+# define pset( l,r,a ) parse_make( PARSE_SET,l,r,P0,S0,S0,a )
+# define pset1( l,r,t,a ) parse_make( PARSE_SETTINGS,l,r,t,S0,S0,a )
+# define psetc( s,p,a,l ) parse_make( PARSE_SETCOMP,p,a,P0,s,S0,l )
+# define psete( s,l,s1,f ) parse_make( PARSE_SETEXEC,l,P0,P0,s,s1,f )
+# define pswitch( l,r ) parse_make( PARSE_SWITCH,l,r,P0,S0,S0,0 )
+# define pwhile( l,r ) parse_make( PARSE_WHILE,l,r,P0,S0,S0,0 )
+
+# define pnode( l,r ) parse_make( F0,l,r,P0,S0,S0,0 )
+# define psnode( s,l ) parse_make( F0,l,P0,P0,s,S0,0 )
+
+
+
+/* Line 189 of yacc.c */
+#line 114 "y.tab.c"
+
+/* Enabling traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+
+/* Enabling verbose error messages. */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE 0
+#endif
+
+/* Enabling the token table. */
+#ifndef YYTOKEN_TABLE
+# define YYTOKEN_TABLE 0
+#endif
+
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ _BANG_t = 258,
+ _BANG_EQUALS_t = 259,
+ _AMPER_t = 260,
+ _AMPERAMPER_t = 261,
+ _LPAREN_t = 262,
+ _RPAREN_t = 263,
+ _PLUS_EQUALS_t = 264,
+ _COLON_t = 265,
+ _SEMIC_t = 266,
+ _LANGLE_t = 267,
+ _LANGLE_EQUALS_t = 268,
+ _EQUALS_t = 269,
+ _RANGLE_t = 270,
+ _RANGLE_EQUALS_t = 271,
+ _QUESTION_EQUALS_t = 272,
+ _LBRACKET_t = 273,
+ _RBRACKET_t = 274,
+ ACTIONS_t = 275,
+ BIND_t = 276,
+ CASE_t = 277,
+ CLASS_t = 278,
+ DEFAULT_t = 279,
+ ELSE_t = 280,
+ EXISTING_t = 281,
+ FOR_t = 282,
+ IF_t = 283,
+ IGNORE_t = 284,
+ IN_t = 285,
+ INCLUDE_t = 286,
+ LOCAL_t = 287,
+ MODULE_t = 288,
+ ON_t = 289,
+ PIECEMEAL_t = 290,
+ QUIETLY_t = 291,
+ RETURN_t = 292,
+ RULE_t = 293,
+ SWITCH_t = 294,
+ TOGETHER_t = 295,
+ UPDATED_t = 296,
+ WHILE_t = 297,
+ _LBRACE_t = 298,
+ _BAR_t = 299,
+ _BARBAR_t = 300,
+ _RBRACE_t = 301,
+ ARG = 302,
+ STRING = 303
+ };
+#endif
+/* Tokens. */
+#define _BANG_t 258
+#define _BANG_EQUALS_t 259
+#define _AMPER_t 260
+#define _AMPERAMPER_t 261
+#define _LPAREN_t 262
+#define _RPAREN_t 263
+#define _PLUS_EQUALS_t 264
+#define _COLON_t 265
+#define _SEMIC_t 266
+#define _LANGLE_t 267
+#define _LANGLE_EQUALS_t 268
+#define _EQUALS_t 269
+#define _RANGLE_t 270
+#define _RANGLE_EQUALS_t 271
+#define _QUESTION_EQUALS_t 272
+#define _LBRACKET_t 273
+#define _RBRACKET_t 274
+#define ACTIONS_t 275
+#define BIND_t 276
+#define CASE_t 277
+#define CLASS_t 278
+#define DEFAULT_t 279
+#define ELSE_t 280
+#define EXISTING_t 281
+#define FOR_t 282
+#define IF_t 283
+#define IGNORE_t 284
+#define IN_t 285
+#define INCLUDE_t 286
+#define LOCAL_t 287
+#define MODULE_t 288
+#define ON_t 289
+#define PIECEMEAL_t 290
+#define QUIETLY_t 291
+#define RETURN_t 292
+#define RULE_t 293
+#define SWITCH_t 294
+#define TOGETHER_t 295
+#define UPDATED_t 296
+#define WHILE_t 297
+#define _LBRACE_t 298
+#define _BAR_t 299
+#define _BARBAR_t 300
+#define _RBRACE_t 301
+#define ARG 302
+#define STRING 303
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef int YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+/* Copy the second part of user declarations. */
+
+
+/* Line 264 of yacc.c */
+#line 252 "y.tab.c"
+
+#ifdef short
+# undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+# define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+# define YYSIZE_T size_t
+# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+# define YYSIZE_T size_t
+# else
+# define YYSIZE_T unsigned int
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+# if ENABLE_NLS
+# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+# define YY_(msgid) dgettext ("bison-runtime", msgid)
+# endif
+# endif
+# ifndef YY_
+# define YY_(msgid) msgid
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E. */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(e) ((void) (e))
+#else
+# define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions. */
+#ifndef lint
+# define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static int
+YYID (int yyi)
+#else
+static int
+YYID (yyi)
+ int yyi;
+#endif
+{
+ return yyi;
+}
+#endif
+
+#if ! defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols. */
+
+# ifdef YYSTACK_USE_ALLOCA
+# if YYSTACK_USE_ALLOCA
+# ifdef __GNUC__
+# define YYSTACK_ALLOC __builtin_alloca
+# elif defined __BUILTIN_VA_ARG_INCR
+# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+# elif defined _AIX
+# define YYSTACK_ALLOC __alloca
+# elif defined _MSC_VER
+# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+# define alloca _alloca
+# else
+# define YYSTACK_ALLOC alloca
+# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef _STDLIB_H
+# define _STDLIB_H 1
+# endif
+# endif
+# endif
+# endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+ /* Pacify GCC's `empty if-body' warning. */
+# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+# ifndef YYSTACK_ALLOC_MAXIMUM
+ /* The OS might guarantee only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
+ to allow for a few compiler-allocated temporary stack slots. */
+# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+# endif
+# else
+# define YYSTACK_ALLOC YYMALLOC
+# define YYSTACK_FREE YYFREE
+# ifndef YYSTACK_ALLOC_MAXIMUM
+# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+# endif
+# if (defined __cplusplus && ! defined _STDLIB_H \
+ && ! ((defined YYMALLOC || defined malloc) \
+ && (defined YYFREE || defined free)))
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef _STDLIB_H
+# define _STDLIB_H 1
+# endif
+# endif
+# ifndef YYMALLOC
+# define YYMALLOC malloc
+# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# ifndef YYFREE
+# define YYFREE free
+# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+
+#if (! defined yyoverflow \
+ && (! defined __cplusplus \
+ || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member. */
+union yyalloc
+{
+ yytype_int16 yyss_alloc;
+ YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next. */
+# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+ N elements. */
+# define YYSTACK_BYTES(N) \
+ ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+ + YYSTACK_GAP_MAXIMUM)
+
+/* Copy COUNT objects from FROM to TO. The source and destination do
+ not overlap. */
+# ifndef YYCOPY
+# if defined __GNUC__ && 1 < __GNUC__
+# define YYCOPY(To, From, Count) \
+ __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+# else
+# define YYCOPY(To, From, Count) \
+ do \
+ { \
+ YYSIZE_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (To)[yyi] = (From)[yyi]; \
+ } \
+ while (YYID (0))
+# endif
+# endif
+
+/* Relocate STACK from its old location to the new one. The
+ local variables YYSIZE and YYSTACKSIZE give the old and new number of
+ elements in the stack, and YYPTR gives the new location of the
+ stack. Advance YYPTR to a properly aligned location for the next
+ stack. */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack) \
+ do \
+ { \
+ YYSIZE_T yynewbytes; \
+ YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \
+ Stack = &yyptr->Stack_alloc; \
+ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+ yyptr += yynewbytes / sizeof (*yyptr); \
+ } \
+ while (YYID (0))
+
+#endif
+
+/* YYFINAL -- State number of the termination state. */
+#define YYFINAL 43
+/* YYLAST -- Last index in YYTABLE. */
+#define YYLAST 243
+
+/* YYNTOKENS -- Number of terminals. */
+#define YYNTOKENS 49
+/* YYNNTS -- Number of nonterminals. */
+#define YYNNTS 24
+/* YYNRULES -- Number of rules. */
+#define YYNRULES 75
+/* YYNRULES -- Number of states. */
+#define YYNSTATES 159
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
+#define YYUNDEFTOK 2
+#define YYMAXUTOK 303
+
+#define YYTRANSLATE(YYX) \
+ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
+static const yytype_uint8 yytranslate[] =
+{
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48
+};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+ YYRHS. */
+static const yytype_uint8 yyprhs[] =
+{
+ 0, 0, 3, 4, 6, 8, 10, 12, 15, 21,
+ 22, 25, 27, 31, 32, 34, 35, 39, 43, 47,
+ 52, 59, 63, 72, 78, 84, 90, 96, 102, 110,
+ 116, 120, 121, 122, 132, 134, 136, 138, 141, 143,
+ 147, 151, 155, 159, 163, 167, 171, 175, 179, 183,
+ 187, 190, 194, 195, 198, 203, 205, 209, 211, 212,
+ 215, 217, 218, 223, 226, 231, 236, 237, 240, 242,
+ 244, 246, 248, 250, 252, 253
+};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS. */
+static const yytype_int8 yyrhs[] =
+{
+ 50, 0, -1, -1, 52, -1, 53, -1, 52, -1,
+ 57, -1, 57, 52, -1, 32, 65, 54, 11, 51,
+ -1, -1, 14, 65, -1, 53, -1, 7, 64, 8,
+ -1, -1, 32, -1, -1, 43, 51, 46, -1, 31,
+ 65, 11, -1, 47, 64, 11, -1, 67, 60, 65,
+ 11, -1, 67, 34, 65, 60, 65, 11, -1, 37,
+ 65, 11, -1, 27, 56, 47, 30, 65, 43, 51,
+ 46, -1, 39, 65, 43, 62, 46, -1, 28, 61,
+ 43, 51, 46, -1, 33, 65, 43, 51, 46, -1,
+ 23, 64, 43, 51, 46, -1, 42, 61, 43, 51,
+ 46, -1, 28, 61, 43, 51, 46, 25, 57, -1,
+ 56, 38, 47, 55, 57, -1, 34, 67, 57, -1,
+ -1, -1, 20, 70, 47, 72, 43, 58, 48, 59,
+ 46, -1, 14, -1, 9, -1, 17, -1, 24, 14,
+ -1, 67, -1, 61, 14, 61, -1, 61, 4, 61,
+ -1, 61, 12, 61, -1, 61, 13, 61, -1, 61,
+ 15, 61, -1, 61, 16, 61, -1, 61, 5, 61,
+ -1, 61, 6, 61, -1, 61, 44, 61, -1, 61,
+ 45, 61, -1, 67, 30, 65, -1, 3, 61, -1,
+ 7, 61, 8, -1, -1, 63, 62, -1, 22, 47,
+ 10, 51, -1, 65, -1, 65, 10, 64, -1, 66,
+ -1, -1, 66, 67, -1, 47, -1, -1, 18, 68,
+ 69, 19, -1, 47, 64, -1, 34, 67, 47, 64,
+ -1, 34, 67, 37, 65, -1, -1, 70, 71, -1,
+ 41, -1, 40, -1, 29, -1, 36, -1, 35, -1,
+ 26, -1, -1, 21, 65, -1
+};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
+static const yytype_uint16 yyrline[] =
+{
+ 0, 139, 139, 141, 152, 154, 158, 160, 162, 167,
+ 170, 172, 176, 179, 182, 185, 188, 190, 192, 194,
+ 196, 198, 200, 202, 204, 206, 208, 210, 212, 214,
+ 216, 219, 221, 218, 230, 232, 234, 236, 243, 245,
+ 247, 249, 251, 253, 255, 257, 259, 261, 263, 265,
+ 267, 269, 281, 282, 286, 295, 297, 307, 312, 313,
+ 317, 319, 319, 328, 330, 332, 343, 344, 348, 350,
+ 352, 354, 356, 358, 368, 369
+};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
+static const char *const yytname[] =
+{
+ "$end", "error", "$undefined", "_BANG_t", "_BANG_EQUALS_t", "_AMPER_t",
+ "_AMPERAMPER_t", "_LPAREN_t", "_RPAREN_t", "_PLUS_EQUALS_t", "_COLON_t",
+ "_SEMIC_t", "_LANGLE_t", "_LANGLE_EQUALS_t", "_EQUALS_t", "_RANGLE_t",
+ "_RANGLE_EQUALS_t", "_QUESTION_EQUALS_t", "_LBRACKET_t", "_RBRACKET_t",
+ "ACTIONS_t", "BIND_t", "CASE_t", "CLASS_t", "DEFAULT_t", "ELSE_t",
+ "EXISTING_t", "FOR_t", "IF_t", "IGNORE_t", "IN_t", "INCLUDE_t",
+ "LOCAL_t", "MODULE_t", "ON_t", "PIECEMEAL_t", "QUIETLY_t", "RETURN_t",
+ "RULE_t", "SWITCH_t", "TOGETHER_t", "UPDATED_t", "WHILE_t", "_LBRACE_t",
+ "_BAR_t", "_BARBAR_t", "_RBRACE_t", "ARG", "STRING", "$accept", "run",
+ "block", "rules", "null", "assign_list_opt", "arglist_opt", "local_opt",
+ "rule", "$@1", "$@2", "assign", "expr", "cases", "case", "lol", "list",
+ "listp", "arg", "$@3", "func", "eflags", "eflag", "bindlist", 0
+};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+ token YYLEX-NUM. */
+static const yytype_uint16 yytoknum[] =
+{
+ 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
+ 265, 266, 267, 268, 269, 270, 271, 272, 273, 274,
+ 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+ 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
+ 295, 296, 297, 298, 299, 300, 301, 302, 303
+};
+# endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
+static const yytype_uint8 yyr1[] =
+{
+ 0, 49, 50, 50, 51, 51, 52, 52, 52, 53,
+ 54, 54, 55, 55, 56, 56, 57, 57, 57, 57,
+ 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
+ 57, 58, 59, 57, 60, 60, 60, 60, 61, 61,
+ 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 61, 62, 62, 63, 64, 64, 65, 66, 66,
+ 67, 68, 67, 69, 69, 69, 70, 70, 71, 71,
+ 71, 71, 71, 71, 72, 72
+};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
+static const yytype_uint8 yyr2[] =
+{
+ 0, 2, 0, 1, 1, 1, 1, 2, 5, 0,
+ 2, 1, 3, 0, 1, 0, 3, 3, 3, 4,
+ 6, 3, 8, 5, 5, 5, 5, 5, 7, 5,
+ 3, 0, 0, 9, 1, 1, 1, 2, 1, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 3, 0, 2, 4, 1, 3, 1, 0, 2,
+ 1, 0, 4, 2, 4, 4, 0, 2, 1, 1,
+ 1, 1, 1, 1, 0, 2
+};
+
+/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
+ STATE-NUM when YYTABLE doesn't specify something else to do. Zero
+ means the default is an error. */
+static const yytype_uint8 yydefact[] =
+{
+ 2, 61, 66, 58, 15, 0, 58, 58, 58, 0,
+ 58, 58, 0, 9, 60, 0, 3, 0, 6, 0,
+ 0, 0, 0, 55, 57, 14, 0, 0, 0, 60,
+ 0, 38, 0, 9, 0, 15, 0, 0, 0, 0,
+ 5, 4, 0, 1, 0, 7, 35, 34, 36, 0,
+ 58, 58, 0, 58, 0, 73, 70, 72, 71, 69,
+ 68, 74, 67, 9, 58, 59, 0, 50, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 9, 0, 0,
+ 58, 17, 58, 11, 0, 9, 30, 21, 52, 9,
+ 16, 18, 13, 37, 0, 0, 0, 63, 62, 58,
+ 0, 0, 56, 58, 51, 40, 45, 46, 41, 42,
+ 39, 43, 44, 0, 47, 48, 49, 10, 9, 0,
+ 0, 0, 52, 0, 58, 15, 58, 19, 58, 58,
+ 75, 31, 26, 0, 24, 8, 25, 0, 23, 53,
+ 27, 0, 29, 0, 65, 64, 0, 9, 15, 9,
+ 12, 20, 32, 0, 28, 54, 0, 22, 33
+};
+
+/* YYDEFGOTO[NTERM-NUM]. */
+static const yytype_int16 yydefgoto[] =
+{
+ -1, 15, 39, 40, 41, 84, 125, 17, 18, 146,
+ 156, 51, 30, 121, 122, 22, 23, 24, 31, 20,
+ 54, 21, 62, 100
+};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+ STATE-NUM. */
+#define YYPACT_NINF -48
+static const yytype_int16 yypact[] =
+{
+ 170, -48, -48, -48, -12, 7, -48, -17, -48, -3,
+ -48, -48, 7, 170, 1, 22, -48, -9, 170, 19,
+ -2, 79, -6, 29, -3, -48, 2, 7, 7, -48,
+ 138, 20, 44, 45, 18, 196, 51, 26, 151, 24,
+ -48, -48, 62, -48, 27, -48, -48, -48, -48, 61,
+ -48, -48, -3, -48, 67, -48, -48, -48, -48, -48,
+ -48, 58, -48, 170, -48, -48, 50, -48, 52, 7,
+ 7, 7, 7, 7, 7, 7, 7, 170, 7, 7,
+ -48, -48, -48, -48, 70, 170, -48, -48, 87, 170,
+ -48, -48, 94, -48, 17, 99, -20, -48, -48, -48,
+ 69, 71, -48, -48, -48, 91, 156, 156, -48, -48,
+ 91, -48, -48, 77, 78, 78, -48, -48, 170, 81,
+ 66, 82, 87, 95, -48, 196, -48, -48, -48, -48,
+ -48, -48, -48, 97, 112, -48, -48, 135, -48, -48,
+ -48, 150, -48, 148, -48, -48, 98, 170, 196, 170,
+ -48, -48, -48, 115, -48, -48, 116, -48, -48
+};
+
+/* YYPGOTO[NTERM-NUM]. */
+static const yytype_int16 yypgoto[] =
+{
+ -48, -48, -47, 5, 140, -48, -48, 171, -27, -48,
+ -48, 80, 60, 54, -48, -13, -4, -48, 0, -48,
+ -48, -48, -48, -48
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
+ positive, shift that token. If negative, reduce the rule which
+ number is the opposite. If zero, do what YYDEFACT says.
+ If YYTABLE_NINF, syntax error. */
+#define YYTABLE_NINF -59
+static const yytype_int16 yytable[] =
+{
+ 19, 42, 32, 33, 34, 16, 36, 37, 86, 35,
+ 27, -58, -58, 19, 28, 1, 101, 128, 19, -58,
+ 25, -14, 43, 45, 65, 1, 46, 129, 46, 44,
+ 113, 47, 52, 47, 48, 19, 48, 63, 119, 64,
+ 97, 49, 123, 49, 29, 53, 94, 95, -58, 66,
+ 80, 102, 96, 50, 29, 81, 69, 70, 71, 82,
+ 104, 85, 87, 19, 72, 73, 74, 75, 76, 88,
+ 90, 135, 38, 91, 92, 93, 116, 19, 117, 99,
+ 103, 118, 69, 70, 71, 19, 98, 67, 68, 19,
+ 72, 73, 74, 75, 76, 130, 78, 79, 142, 133,
+ 153, 124, 155, 72, 73, 55, 75, 76, 56, 120,
+ 127, 141, 131, 137, 57, 58, 145, 132, 19, 59,
+ 60, 154, 143, 134, 144, 19, 61, 136, 138, 105,
+ 106, 107, 108, 109, 110, 111, 112, 148, 114, 115,
+ 147, 140, 69, 70, 71, 149, 152, 19, 19, 19,
+ 72, 73, 74, 75, 76, 69, 70, 71, 150, 151,
+ 69, 157, 158, 72, 73, 74, 75, 76, 72, 73,
+ 74, 75, 76, 83, 126, 26, 139, 0, 0, 0,
+ 0, 77, 78, 79, 0, 0, 0, 0, 1, 0,
+ 2, 0, 0, 3, 89, 78, 79, 4, 5, 0,
+ 0, 6, 7, 8, 9, 0, 0, 10, -15, 11,
+ 0, 0, 12, 13, 1, 0, 2, 14, 0, 3,
+ 0, 0, 0, 4, 5, 0, 0, 6, 25, 8,
+ 9, 0, 0, 10, 0, 11, 0, 0, 12, 13,
+ 0, 0, 0, 14
+};
+
+static const yytype_int16 yycheck[] =
+{
+ 0, 14, 6, 7, 8, 0, 10, 11, 35, 9,
+ 3, 10, 11, 13, 7, 18, 63, 37, 18, 18,
+ 32, 38, 0, 18, 24, 18, 9, 47, 9, 38,
+ 77, 14, 34, 14, 17, 35, 17, 43, 85, 10,
+ 53, 24, 89, 24, 47, 47, 50, 51, 47, 47,
+ 30, 64, 52, 34, 47, 11, 4, 5, 6, 14,
+ 8, 43, 11, 63, 12, 13, 14, 15, 16, 43,
+ 46, 118, 12, 11, 47, 14, 80, 77, 82, 21,
+ 30, 11, 4, 5, 6, 85, 19, 27, 28, 89,
+ 12, 13, 14, 15, 16, 99, 44, 45, 125, 103,
+ 147, 7, 149, 12, 13, 26, 15, 16, 29, 22,
+ 11, 124, 43, 47, 35, 36, 129, 46, 118, 40,
+ 41, 148, 126, 46, 128, 125, 47, 46, 46, 69,
+ 70, 71, 72, 73, 74, 75, 76, 25, 78, 79,
+ 43, 46, 4, 5, 6, 10, 48, 147, 148, 149,
+ 12, 13, 14, 15, 16, 4, 5, 6, 8, 11,
+ 4, 46, 46, 12, 13, 14, 15, 16, 12, 13,
+ 14, 15, 16, 33, 94, 4, 122, -1, -1, -1,
+ -1, 43, 44, 45, -1, -1, -1, -1, 18, -1,
+ 20, -1, -1, 23, 43, 44, 45, 27, 28, -1,
+ -1, 31, 32, 33, 34, -1, -1, 37, 38, 39,
+ -1, -1, 42, 43, 18, -1, 20, 47, -1, 23,
+ -1, -1, -1, 27, 28, -1, -1, 31, 32, 33,
+ 34, -1, -1, 37, -1, 39, -1, -1, 42, 43,
+ -1, -1, -1, 47
+};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+ symbol of state STATE-NUM. */
+static const yytype_uint8 yystos[] =
+{
+ 0, 18, 20, 23, 27, 28, 31, 32, 33, 34,
+ 37, 39, 42, 43, 47, 50, 52, 56, 57, 67,
+ 68, 70, 64, 65, 66, 32, 56, 3, 7, 47,
+ 61, 67, 65, 65, 65, 67, 65, 65, 61, 51,
+ 52, 53, 64, 0, 38, 52, 9, 14, 17, 24,
+ 34, 60, 34, 47, 69, 26, 29, 35, 36, 40,
+ 41, 47, 71, 43, 10, 67, 47, 61, 61, 4,
+ 5, 6, 12, 13, 14, 15, 16, 43, 44, 45,
+ 30, 11, 14, 53, 54, 43, 57, 11, 43, 43,
+ 46, 11, 47, 14, 65, 65, 67, 64, 19, 21,
+ 72, 51, 64, 30, 8, 61, 61, 61, 61, 61,
+ 61, 61, 61, 51, 61, 61, 65, 65, 11, 51,
+ 22, 62, 63, 51, 7, 55, 60, 11, 37, 47,
+ 65, 43, 46, 65, 46, 51, 46, 47, 46, 62,
+ 46, 64, 57, 65, 65, 64, 58, 43, 25, 10,
+ 8, 11, 48, 51, 57, 51, 59, 46, 46
+};
+
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY (-2)
+#define YYEOF 0
+
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrorlab
+
+
+/* Like YYERROR except do call yyerror. This remains here temporarily
+ to ease the transition to the new meaning of YYERROR, for GCC.
+ Once GCC version 2 has supplanted version 1, this can go. However,
+ YYFAIL appears to be in use. Nevertheless, it is formally deprecated
+ in Bison 2.4.2's NEWS entry, where a plan to phase it out is
+ discussed. */
+
+#define YYFAIL goto yyerrlab
+#if defined YYFAIL
+ /* This is here to suppress warnings from the GCC cpp's
+ -Wunused-macros. Normally we don't worry about that warning, but
+ some users do, and we want to make it easy for users to remove
+ YYFAIL uses, which will produce warnings from Bison 2.5. */
+#endif
+
+#define YYRECOVERING() (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value) \
+do \
+ if (yychar == YYEMPTY && yylen == 1) \
+ { \
+ yychar = (Token); \
+ yylval = (Value); \
+ yytoken = YYTRANSLATE (yychar); \
+ YYPOPSTACK (1); \
+ goto yybackup; \
+ } \
+ else \
+ { \
+ yyerror (YY_("syntax error: cannot back up")); \
+ YYERROR; \
+ } \
+while (YYID (0))
+
+
+#define YYTERROR 1
+#define YYERRCODE 256
+
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+ If N is 0, then set CURRENT to the empty location which ends
+ the previous symbol: RHS[0] (always defined). */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+# define YYLLOC_DEFAULT(Current, Rhs, N) \
+ do \
+ if (YYID (N)) \
+ { \
+ (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
+ (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
+ (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
+ (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
+ } \
+ else \
+ { \
+ (Current).first_line = (Current).last_line = \
+ YYRHSLOC (Rhs, 0).last_line; \
+ (Current).first_column = (Current).last_column = \
+ YYRHSLOC (Rhs, 0).last_column; \
+ } \
+ while (YYID (0))
+#endif
+
+
+/* YY_LOCATION_PRINT -- Print the location on the stream.
+ This macro was not mandated originally: define only if we know
+ we won't break user code: when these are the locations we know. */
+
+#ifndef YY_LOCATION_PRINT
+# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
+# define YY_LOCATION_PRINT(File, Loc) \
+ fprintf (File, "%d.%d-%d.%d", \
+ (Loc).first_line, (Loc).first_column, \
+ (Loc).last_line, (Loc).last_column)
+# else
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+# endif
+#endif
+
+
+/* YYLEX -- calling `yylex' with the right arguments. */
+
+#ifdef YYLEX_PARAM
+# define YYLEX yylex (YYLEX_PARAM)
+#else
+# define YYLEX yylex ()
+#endif
+
+/* Enable debugging if requested. */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+# define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args) \
+do { \
+ if (yydebug) \
+ YYFPRINTF Args; \
+} while (YYID (0))
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
+do { \
+ if (yydebug) \
+ { \
+ YYFPRINTF (stderr, "%s ", Title); \
+ yy_symbol_print (stderr, \
+ Type, Value); \
+ YYFPRINTF (stderr, "\n"); \
+ } \
+} while (YYID (0))
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT. |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_value_print (yyoutput, yytype, yyvaluep)
+ FILE *yyoutput;
+ int yytype;
+ YYSTYPE const * const yyvaluep;
+#endif
+{
+ if (!yyvaluep)
+ return;
+# ifdef YYPRINT
+ if (yytype < YYNTOKENS)
+ YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# else
+ YYUSE (yyoutput);
+# endif
+ switch (yytype)
+ {
+ default:
+ break;
+ }
+}
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT. |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_print (yyoutput, yytype, yyvaluep)
+ FILE *yyoutput;
+ int yytype;
+ YYSTYPE const * const yyvaluep;
+#endif
+{
+ if (yytype < YYNTOKENS)
+ YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
+ else
+ YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+ yy_symbol_value_print (yyoutput, yytype, yyvaluep);
+ YYFPRINTF (yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included). |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
+#else
+static void
+yy_stack_print (yybottom, yytop)
+ yytype_int16 *yybottom;
+ yytype_int16 *yytop;
+#endif
+{
+ YYFPRINTF (stderr, "Stack now");
+ for (; yybottom <= yytop; yybottom++)
+ {
+ int yybot = *yybottom;
+ YYFPRINTF (stderr, " %d", yybot);
+ }
+ YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top) \
+do { \
+ if (yydebug) \
+ yy_stack_print ((Bottom), (Top)); \
+} while (YYID (0))
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced. |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
+#else
+static void
+yy_reduce_print (yyvsp, yyrule)
+ YYSTYPE *yyvsp;
+ int yyrule;
+#endif
+{
+ int yynrhs = yyr2[yyrule];
+ int yyi;
+ unsigned long int yylno = yyrline[yyrule];
+ YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
+ yyrule - 1, yylno);
+ /* The symbols being reduced. */
+ for (yyi = 0; yyi < yynrhs; yyi++)
+ {
+ YYFPRINTF (stderr, " $%d = ", yyi + 1);
+ yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
+ &(yyvsp[(yyi + 1) - (yynrhs)])
+ );
+ YYFPRINTF (stderr, "\n");
+ }
+}
+
+# define YY_REDUCE_PRINT(Rule) \
+do { \
+ if (yydebug) \
+ yy_reduce_print (yyvsp, Rule); \
+} while (YYID (0))
+
+/* Nonzero means print parse trace. It is left uninitialized so that
+ multiple parsers can coexist. */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks. */
+#ifndef YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+ if the built-in stack extension method is used).
+
+ Do not make this value too large; the results are undefined if
+ YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+ evaluated with infinite-precision integer arithmetic. */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+
+#if YYERROR_VERBOSE
+
+# ifndef yystrlen
+# if defined __GLIBC__ && defined _STRING_H
+# define yystrlen strlen
+# else
+/* Return the length of YYSTR. */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T
+yystrlen (const char *yystr)
+#else
+static YYSIZE_T
+yystrlen (yystr)
+ const char *yystr;
+#endif
+{
+ YYSIZE_T yylen;
+ for (yylen = 0; yystr[yylen]; yylen++)
+ continue;
+ return yylen;
+}
+# endif
+# endif
+
+# ifndef yystpcpy
+# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+# define yystpcpy stpcpy
+# else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+ YYDEST. */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static char *
+yystpcpy (char *yydest, const char *yysrc)
+#else
+static char *
+yystpcpy (yydest, yysrc)
+ char *yydest;
+ const char *yysrc;
+#endif
+{
+ char *yyd = yydest;
+ const char *yys = yysrc;
+
+ while ((*yyd++ = *yys++) != '\0')
+ continue;
+
+ return yyd - 1;
+}
+# endif
+# endif
+
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+ quotes and backslashes, so that it's suitable for yyerror. The
+ heuristic is that double-quoting is unnecessary unless the string
+ contains an apostrophe, a comma, or backslash (other than
+ backslash-backslash). YYSTR is taken from yytname. If YYRES is
+ null, do not copy; instead, return the length of what the result
+ would have been. */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+{
+ if (*yystr == '"')
+ {
+ YYSIZE_T yyn = 0;
+ char const *yyp = yystr;
+
+ for (;;)
+ switch (*++yyp)
+ {
+ case '\'':
+ case ',':
+ goto do_not_strip_quotes;
+
+ case '\\':
+ if (*++yyp != '\\')
+ goto do_not_strip_quotes;
+ /* Fall through. */
+ default:
+ if (yyres)
+ yyres[yyn] = *yyp;
+ yyn++;
+ break;
+
+ case '"':
+ if (yyres)
+ yyres[yyn] = '\0';
+ return yyn;
+ }
+ do_not_strip_quotes: ;
+ }
+
+ if (! yyres)
+ return yystrlen (yystr);
+
+ return yystpcpy (yyres, yystr) - yyres;
+}
+# endif
+
+/* Copy into YYRESULT an error message about the unexpected token
+ YYCHAR while in state YYSTATE. Return the number of bytes copied,
+ including the terminating null byte. If YYRESULT is null, do not
+ copy anything; just return the number of bytes that would be
+ copied. As a special case, return 0 if an ordinary "syntax error"
+ message will do. Return YYSIZE_MAXIMUM if overflow occurs during
+ size calculation. */
+static YYSIZE_T
+yysyntax_error (char *yyresult, int yystate, int yychar)
+{
+ int yyn = yypact[yystate];
+
+ if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
+ return 0;
+ else
+ {
+ int yytype = YYTRANSLATE (yychar);
+ YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
+ YYSIZE_T yysize = yysize0;
+ YYSIZE_T yysize1;
+ int yysize_overflow = 0;
+ enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+ int yyx;
+
+# if 0
+ /* This is so xgettext sees the translatable formats that are
+ constructed on the fly. */
+ YY_("syntax error, unexpected %s");
+ YY_("syntax error, unexpected %s, expecting %s");
+ YY_("syntax error, unexpected %s, expecting %s or %s");
+ YY_("syntax error, unexpected %s, expecting %s or %s or %s");
+ YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
+# endif
+ char *yyfmt;
+ char const *yyf;
+ static char const yyunexpected[] = "syntax error, unexpected %s";
+ static char const yyexpecting[] = ", expecting %s";
+ static char const yyor[] = " or %s";
+ char yyformat[sizeof yyunexpected
+ + sizeof yyexpecting - 1
+ + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2)
+ * (sizeof yyor - 1))];
+ char const *yyprefix = yyexpecting;
+
+ /* Start YYX at -YYN if negative to avoid negative indexes in
+ YYCHECK. */
+ int yyxbegin = yyn < 0 ? -yyn : 0;
+
+ /* Stay within bounds of both yycheck and yytname. */
+ int yychecklim = YYLAST - yyn + 1;
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+ int yycount = 1;
+
+ yyarg[0] = yytname[yytype];
+ yyfmt = yystpcpy (yyformat, yyunexpected);
+
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
+ {
+ if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+ {
+ yycount = 1;
+ yysize = yysize0;
+ yyformat[sizeof yyunexpected - 1] = '\0';
+ break;
+ }
+ yyarg[yycount++] = yytname[yyx];
+ yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+ yysize_overflow |= (yysize1 < yysize);
+ yysize = yysize1;
+ yyfmt = yystpcpy (yyfmt, yyprefix);
+ yyprefix = yyor;
+ }
+
+ yyf = YY_(yyformat);
+ yysize1 = yysize + yystrlen (yyf);
+ yysize_overflow |= (yysize1 < yysize);
+ yysize = yysize1;
+
+ if (yysize_overflow)
+ return YYSIZE_MAXIMUM;
+
+ if (yyresult)
+ {
+ /* Avoid sprintf, as that infringes on the user's name space.
+ Don't have undefined behavior even if the translation
+ produced a string with the wrong number of "%s"s. */
+ char *yyp = yyresult;
+ int yyi = 0;
+ while ((*yyp = *yyf) != '\0')
+ {
+ if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
+ {
+ yyp += yytnamerr (yyp, yyarg[yyi++]);
+ yyf += 2;
+ }
+ else
+ {
+ yyp++;
+ yyf++;
+ }
+ }
+ }
+ return yysize;
+ }
+}
+#endif /* YYERROR_VERBOSE */
+
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol. |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
+#else
+static void
+yydestruct (yymsg, yytype, yyvaluep)
+ const char *yymsg;
+ int yytype;
+ YYSTYPE *yyvaluep;
+#endif
+{
+ YYUSE (yyvaluep);
+
+ if (!yymsg)
+ yymsg = "Deleting";
+ YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
+ switch (yytype)
+ {
+
+ default:
+ break;
+ }
+}
+
+/* Prevent warnings from -Wmissing-prototypes. */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
+/* The lookahead symbol. */
+int yychar;
+
+/* The semantic value of the lookahead symbol. */
+YYSTYPE yylval;
+
+/* Number of syntax errors so far. */
+int yynerrs;
+
+
+
+/*-------------------------.
+| yyparse or yypush_parse. |
+`-------------------------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void *YYPARSE_PARAM)
+#else
+int
+yyparse (YYPARSE_PARAM)
+ void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void)
+#else
+int
+yyparse ()
+
+#endif
+#endif
+{
+
+
+ int yystate;
+ /* Number of tokens to shift before error messages enabled. */
+ int yyerrstatus;
+
+ /* The stacks and their tools:
+ `yyss': related to states.
+ `yyvs': related to semantic values.
+
+ Refer to the stacks thru separate pointers, to allow yyoverflow
+ to reallocate them elsewhere. */
+
+ /* The state stack. */
+ yytype_int16 yyssa[YYINITDEPTH];
+ yytype_int16 *yyss;
+ yytype_int16 *yyssp;
+
+ /* The semantic value stack. */
+ YYSTYPE yyvsa[YYINITDEPTH];
+ YYSTYPE *yyvs;
+ YYSTYPE *yyvsp;
+
+ YYSIZE_T yystacksize;
+
+ int yyn;
+ int yyresult;
+ /* Lookahead token as an internal (translated) token number. */
+ int yytoken;
+ /* The variables used to return semantic value and location from the
+ action routines. */
+ YYSTYPE yyval;
+
+#if YYERROR_VERBOSE
+ /* Buffer for error messages, and its allocated size. */
+ char yymsgbuf[128];
+ char *yymsg = yymsgbuf;
+ YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
+
+ /* The number of symbols on the RHS of the reduced rule.
+ Keep to zero when no symbol should be popped. */
+ int yylen = 0;
+
+ yytoken = 0;
+ yyss = yyssa;
+ yyvs = yyvsa;
+ yystacksize = YYINITDEPTH;
+
+ YYDPRINTF ((stderr, "Starting parse\n"));
+
+ yystate = 0;
+ yyerrstatus = 0;
+ yynerrs = 0;
+ yychar = YYEMPTY; /* Cause a token to be read. */
+
+ /* Initialize stack pointers.
+ Waste one element of value and location stack
+ so that they stay on the same level as the state stack.
+ The wasted elements are never initialized. */
+ yyssp = yyss;
+ yyvsp = yyvs;
+
+ goto yysetstate;
+
+/*------------------------------------------------------------.
+| yynewstate -- Push a new state, which is found in yystate. |
+`------------------------------------------------------------*/
+ yynewstate:
+ /* In all cases, when you get here, the value and location stacks
+ have just been pushed. So pushing a state here evens the stacks. */
+ yyssp++;
+
+ yysetstate:
+ *yyssp = yystate;
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ {
+ /* Get the current used size of the three stacks, in elements. */
+ YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+ {
+ /* Give user a chance to reallocate the stack. Use copies of
+ these so that the &'s don't force the real ones into
+ memory. */
+ YYSTYPE *yyvs1 = yyvs;
+ yytype_int16 *yyss1 = yyss;
+
+ /* Each stack pointer address is followed by the size of the
+ data in use in that stack, in bytes. This used to be a
+ conditional around just the two extra args, but that might
+ be undefined if yyoverflow is a macro. */
+ yyoverflow (YY_("memory exhausted"),
+ &yyss1, yysize * sizeof (*yyssp),
+ &yyvs1, yysize * sizeof (*yyvsp),
+ &yystacksize);
+
+ yyss = yyss1;
+ yyvs = yyvs1;
+ }
+#else /* no yyoverflow */
+# ifndef YYSTACK_RELOCATE
+ goto yyexhaustedlab;
+# else
+ /* Extend the stack our own way. */
+ if (YYMAXDEPTH <= yystacksize)
+ goto yyexhaustedlab;
+ yystacksize *= 2;
+ if (YYMAXDEPTH < yystacksize)
+ yystacksize = YYMAXDEPTH;
+
+ {
+ yytype_int16 *yyss1 = yyss;
+ union yyalloc *yyptr =
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+ if (! yyptr)
+ goto yyexhaustedlab;
+ YYSTACK_RELOCATE (yyss_alloc, yyss);
+ YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+# undef YYSTACK_RELOCATE
+ if (yyss1 != yyssa)
+ YYSTACK_FREE (yyss1);
+ }
+# endif
+#endif /* no yyoverflow */
+
+ yyssp = yyss + yysize - 1;
+ yyvsp = yyvs + yysize - 1;
+
+ YYDPRINTF ((stderr, "Stack size increased to %lu\n",
+ (unsigned long int) yystacksize));
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ YYABORT;
+ }
+
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+
+ if (yystate == YYFINAL)
+ YYACCEPT;
+
+ goto yybackup;
+
+/*-----------.
+| yybackup. |
+`-----------*/
+yybackup:
+
+ /* Do appropriate processing given the current state. Read a
+ lookahead token if we need one and don't already have one. */
+
+ /* First try to decide what to do without reference to lookahead token. */
+ yyn = yypact[yystate];
+ if (yyn == YYPACT_NINF)
+ goto yydefault;
+
+ /* Not known => get a lookahead token if don't already have one. */
+
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
+ if (yychar == YYEMPTY)
+ {
+ YYDPRINTF ((stderr, "Reading a token: "));
+ yychar = YYLEX;
+ }
+
+ if (yychar <= YYEOF)
+ {
+ yychar = yytoken = YYEOF;
+ YYDPRINTF ((stderr, "Now at end of input.\n"));
+ }
+ else
+ {
+ yytoken = YYTRANSLATE (yychar);
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+ }
+
+ /* If the proper action on seeing token YYTOKEN is to reduce or to
+ detect an error, take that action. */
+ yyn += yytoken;
+ if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+ goto yydefault;
+ yyn = yytable[yyn];
+ if (yyn <= 0)
+ {
+ if (yyn == 0 || yyn == YYTABLE_NINF)
+ goto yyerrlab;
+ yyn = -yyn;
+ goto yyreduce;
+ }
+
+ /* Count tokens shifted since error; after three, turn off error
+ status. */
+ if (yyerrstatus)
+ yyerrstatus--;
+
+ /* Shift the lookahead token. */
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+
+ /* Discard the shifted token. */
+ yychar = YYEMPTY;
+
+ yystate = yyn;
+ *++yyvsp = yylval;
+
+ goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state. |
+`-----------------------------------------------------------*/
+yydefault:
+ yyn = yydefact[yystate];
+ if (yyn == 0)
+ goto yyerrlab;
+ goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- Do a reduction. |
+`-----------------------------*/
+yyreduce:
+ /* yyn is the number of a rule to reduce with. */
+ yylen = yyr2[yyn];
+
+ /* If YYLEN is nonzero, implement the default value of the action:
+ `$$ = $1'.
+
+ Otherwise, the following line sets YYVAL to garbage.
+ This behavior is undocumented and Bison
+ users should not rely upon it. Assigning to YYVAL
+ unconditionally makes the parser a bit smaller, and it avoids a
+ GCC warning that YYVAL may be used uninitialized. */
+ yyval = yyvsp[1-yylen];
+
+
+ YY_REDUCE_PRINT (yyn);
+ switch (yyn)
+ {
+ case 3:
+
+/* Line 1464 of yacc.c */
+#line 142 "jamgram.y"
+ { parse_save( (yyvsp[(1) - (1)]).parse ); }
+ break;
+
+ case 4:
+
+/* Line 1464 of yacc.c */
+#line 153 "jamgram.y"
+ { (yyval).parse = (yyvsp[(1) - (1)]).parse; }
+ break;
+
+ case 5:
+
+/* Line 1464 of yacc.c */
+#line 155 "jamgram.y"
+ { (yyval).parse = (yyvsp[(1) - (1)]).parse; }
+ break;
+
+ case 6:
+
+/* Line 1464 of yacc.c */
+#line 159 "jamgram.y"
+ { (yyval).parse = (yyvsp[(1) - (1)]).parse; }
+ break;
+
+ case 7:
+
+/* Line 1464 of yacc.c */
+#line 161 "jamgram.y"
+ { (yyval).parse = prules( (yyvsp[(1) - (2)]).parse, (yyvsp[(2) - (2)]).parse ); }
+ break;
+
+ case 8:
+
+/* Line 1464 of yacc.c */
+#line 163 "jamgram.y"
+ { (yyval).parse = plocal( (yyvsp[(2) - (5)]).parse, (yyvsp[(3) - (5)]).parse, (yyvsp[(5) - (5)]).parse ); }
+ break;
+
+ case 9:
+
+/* Line 1464 of yacc.c */
+#line 167 "jamgram.y"
+ { (yyval).parse = pnull(); }
+ break;
+
+ case 10:
+
+/* Line 1464 of yacc.c */
+#line 171 "jamgram.y"
+ { (yyval).parse = (yyvsp[(2) - (2)]).parse; (yyval).number = ASSIGN_SET; }
+ break;
+
+ case 11:
+
+/* Line 1464 of yacc.c */
+#line 173 "jamgram.y"
+ { (yyval).parse = (yyvsp[(1) - (1)]).parse; (yyval).number = ASSIGN_APPEND; }
+ break;
+
+ case 12:
+
+/* Line 1464 of yacc.c */
+#line 177 "jamgram.y"
+ { (yyval).parse = (yyvsp[(2) - (3)]).parse; }
+ break;
+
+ case 13:
+
+/* Line 1464 of yacc.c */
+#line 179 "jamgram.y"
+ { (yyval).parse = P0; }
+ break;
+
+ case 14:
+
+/* Line 1464 of yacc.c */
+#line 183 "jamgram.y"
+ { (yyval).number = 1; }
+ break;
+
+ case 15:
+
+/* Line 1464 of yacc.c */
+#line 185 "jamgram.y"
+ { (yyval).number = 0; }
+ break;
+
+ case 16:
+
+/* Line 1464 of yacc.c */
+#line 189 "jamgram.y"
+ { (yyval).parse = (yyvsp[(2) - (3)]).parse; }
+ break;
+
+ case 17:
+
+/* Line 1464 of yacc.c */
+#line 191 "jamgram.y"
+ { (yyval).parse = pincl( (yyvsp[(2) - (3)]).parse ); }
+ break;
+
+ case 18:
+
+/* Line 1464 of yacc.c */
+#line 193 "jamgram.y"
+ { (yyval).parse = prule( (yyvsp[(1) - (3)]).string, (yyvsp[(2) - (3)]).parse ); }
+ break;
+
+ case 19:
+
+/* Line 1464 of yacc.c */
+#line 195 "jamgram.y"
+ { (yyval).parse = pset( (yyvsp[(1) - (4)]).parse, (yyvsp[(3) - (4)]).parse, (yyvsp[(2) - (4)]).number ); }
+ break;
+
+ case 20:
+
+/* Line 1464 of yacc.c */
+#line 197 "jamgram.y"
+ { (yyval).parse = pset1( (yyvsp[(1) - (6)]).parse, (yyvsp[(3) - (6)]).parse, (yyvsp[(5) - (6)]).parse, (yyvsp[(4) - (6)]).number ); }
+ break;
+
+ case 21:
+
+/* Line 1464 of yacc.c */
+#line 199 "jamgram.y"
+ { (yyval).parse = (yyvsp[(2) - (3)]).parse; }
+ break;
+
+ case 22:
+
+/* Line 1464 of yacc.c */
+#line 201 "jamgram.y"
+ { (yyval).parse = pfor( (yyvsp[(3) - (8)]).string, (yyvsp[(5) - (8)]).parse, (yyvsp[(7) - (8)]).parse, (yyvsp[(2) - (8)]).number ); }
+ break;
+
+ case 23:
+
+/* Line 1464 of yacc.c */
+#line 203 "jamgram.y"
+ { (yyval).parse = pswitch( (yyvsp[(2) - (5)]).parse, (yyvsp[(4) - (5)]).parse ); }
+ break;
+
+ case 24:
+
+/* Line 1464 of yacc.c */
+#line 205 "jamgram.y"
+ { (yyval).parse = pif( (yyvsp[(2) - (5)]).parse, (yyvsp[(4) - (5)]).parse, pnull() ); }
+ break;
+
+ case 25:
+
+/* Line 1464 of yacc.c */
+#line 207 "jamgram.y"
+ { (yyval).parse = pmodule( (yyvsp[(2) - (5)]).parse, (yyvsp[(4) - (5)]).parse ); }
+ break;
+
+ case 26:
+
+/* Line 1464 of yacc.c */
+#line 209 "jamgram.y"
+ { (yyval).parse = pclass( (yyvsp[(2) - (5)]).parse, (yyvsp[(4) - (5)]).parse ); }
+ break;
+
+ case 27:
+
+/* Line 1464 of yacc.c */
+#line 211 "jamgram.y"
+ { (yyval).parse = pwhile( (yyvsp[(2) - (5)]).parse, (yyvsp[(4) - (5)]).parse ); }
+ break;
+
+ case 28:
+
+/* Line 1464 of yacc.c */
+#line 213 "jamgram.y"
+ { (yyval).parse = pif( (yyvsp[(2) - (7)]).parse, (yyvsp[(4) - (7)]).parse, (yyvsp[(7) - (7)]).parse ); }
+ break;
+
+ case 29:
+
+/* Line 1464 of yacc.c */
+#line 215 "jamgram.y"
+ { (yyval).parse = psetc( (yyvsp[(3) - (5)]).string, (yyvsp[(5) - (5)]).parse, (yyvsp[(4) - (5)]).parse, (yyvsp[(1) - (5)]).number ); }
+ break;
+
+ case 30:
+
+/* Line 1464 of yacc.c */
+#line 217 "jamgram.y"
+ { (yyval).parse = pon( (yyvsp[(2) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 31:
+
+/* Line 1464 of yacc.c */
+#line 219 "jamgram.y"
+ { yymode( SCAN_STRING ); }
+ break;
+
+ case 32:
+
+/* Line 1464 of yacc.c */
+#line 221 "jamgram.y"
+ { yymode( SCAN_NORMAL ); }
+ break;
+
+ case 33:
+
+/* Line 1464 of yacc.c */
+#line 223 "jamgram.y"
+ { (yyval).parse = psete( (yyvsp[(3) - (9)]).string,(yyvsp[(4) - (9)]).parse,(yyvsp[(7) - (9)]).string,(yyvsp[(2) - (9)]).number ); }
+ break;
+
+ case 34:
+
+/* Line 1464 of yacc.c */
+#line 231 "jamgram.y"
+ { (yyval).number = ASSIGN_SET; }
+ break;
+
+ case 35:
+
+/* Line 1464 of yacc.c */
+#line 233 "jamgram.y"
+ { (yyval).number = ASSIGN_APPEND; }
+ break;
+
+ case 36:
+
+/* Line 1464 of yacc.c */
+#line 235 "jamgram.y"
+ { (yyval).number = ASSIGN_DEFAULT; }
+ break;
+
+ case 37:
+
+/* Line 1464 of yacc.c */
+#line 237 "jamgram.y"
+ { (yyval).number = ASSIGN_DEFAULT; }
+ break;
+
+ case 38:
+
+/* Line 1464 of yacc.c */
+#line 244 "jamgram.y"
+ { (yyval).parse = peval( EXPR_EXISTS, (yyvsp[(1) - (1)]).parse, pnull() ); }
+ break;
+
+ case 39:
+
+/* Line 1464 of yacc.c */
+#line 246 "jamgram.y"
+ { (yyval).parse = peval( EXPR_EQUALS, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 40:
+
+/* Line 1464 of yacc.c */
+#line 248 "jamgram.y"
+ { (yyval).parse = peval( EXPR_NOTEQ, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 41:
+
+/* Line 1464 of yacc.c */
+#line 250 "jamgram.y"
+ { (yyval).parse = peval( EXPR_LESS, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 42:
+
+/* Line 1464 of yacc.c */
+#line 252 "jamgram.y"
+ { (yyval).parse = peval( EXPR_LESSEQ, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 43:
+
+/* Line 1464 of yacc.c */
+#line 254 "jamgram.y"
+ { (yyval).parse = peval( EXPR_MORE, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 44:
+
+/* Line 1464 of yacc.c */
+#line 256 "jamgram.y"
+ { (yyval).parse = peval( EXPR_MOREEQ, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 45:
+
+/* Line 1464 of yacc.c */
+#line 258 "jamgram.y"
+ { (yyval).parse = peval( EXPR_AND, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 46:
+
+/* Line 1464 of yacc.c */
+#line 260 "jamgram.y"
+ { (yyval).parse = peval( EXPR_AND, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 47:
+
+/* Line 1464 of yacc.c */
+#line 262 "jamgram.y"
+ { (yyval).parse = peval( EXPR_OR, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 48:
+
+/* Line 1464 of yacc.c */
+#line 264 "jamgram.y"
+ { (yyval).parse = peval( EXPR_OR, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 49:
+
+/* Line 1464 of yacc.c */
+#line 266 "jamgram.y"
+ { (yyval).parse = peval( EXPR_IN, (yyvsp[(1) - (3)]).parse, (yyvsp[(3) - (3)]).parse ); }
+ break;
+
+ case 50:
+
+/* Line 1464 of yacc.c */
+#line 268 "jamgram.y"
+ { (yyval).parse = peval( EXPR_NOT, (yyvsp[(2) - (2)]).parse, pnull() ); }
+ break;
+
+ case 51:
+
+/* Line 1464 of yacc.c */
+#line 270 "jamgram.y"
+ { (yyval).parse = (yyvsp[(2) - (3)]).parse; }
+ break;
+
+ case 52:
+
+/* Line 1464 of yacc.c */
+#line 281 "jamgram.y"
+ { (yyval).parse = P0; }
+ break;
+
+ case 53:
+
+/* Line 1464 of yacc.c */
+#line 283 "jamgram.y"
+ { (yyval).parse = pnode( (yyvsp[(1) - (2)]).parse, (yyvsp[(2) - (2)]).parse ); }
+ break;
+
+ case 54:
+
+/* Line 1464 of yacc.c */
+#line 287 "jamgram.y"
+ { (yyval).parse = psnode( (yyvsp[(2) - (4)]).string, (yyvsp[(4) - (4)]).parse ); }
+ break;
+
+ case 55:
+
+/* Line 1464 of yacc.c */
+#line 296 "jamgram.y"
+ { (yyval).parse = pnode( P0, (yyvsp[(1) - (1)]).parse ); }
+ break;
+
+ case 56:
+
+/* Line 1464 of yacc.c */
+#line 298 "jamgram.y"
+ { (yyval).parse = pnode( (yyvsp[(3) - (3)]).parse, (yyvsp[(1) - (3)]).parse ); }
+ break;
+
+ case 57:
+
+/* Line 1464 of yacc.c */
+#line 308 "jamgram.y"
+ { (yyval).parse = (yyvsp[(1) - (1)]).parse; yymode( SCAN_NORMAL ); }
+ break;
+
+ case 58:
+
+/* Line 1464 of yacc.c */
+#line 312 "jamgram.y"
+ { (yyval).parse = pnull(); yymode( SCAN_PUNCT ); }
+ break;
+
+ case 59:
+
+/* Line 1464 of yacc.c */
+#line 314 "jamgram.y"
+ { (yyval).parse = pappend( (yyvsp[(1) - (2)]).parse, (yyvsp[(2) - (2)]).parse ); }
+ break;
+
+ case 60:
+
+/* Line 1464 of yacc.c */
+#line 318 "jamgram.y"
+ { (yyval).parse = plist( (yyvsp[(1) - (1)]).string ); }
+ break;
+
+ case 61:
+
+/* Line 1464 of yacc.c */
+#line 319 "jamgram.y"
+ { yymode( SCAN_NORMAL ); }
+ break;
+
+ case 62:
+
+/* Line 1464 of yacc.c */
+#line 320 "jamgram.y"
+ { (yyval).parse = (yyvsp[(3) - (4)]).parse; }
+ break;
+
+ case 63:
+
+/* Line 1464 of yacc.c */
+#line 329 "jamgram.y"
+ { (yyval).parse = prule( (yyvsp[(1) - (2)]).string, (yyvsp[(2) - (2)]).parse ); }
+ break;
+
+ case 64:
+
+/* Line 1464 of yacc.c */
+#line 331 "jamgram.y"
+ { (yyval).parse = pon( (yyvsp[(2) - (4)]).parse, prule( (yyvsp[(3) - (4)]).string, (yyvsp[(4) - (4)]).parse ) ); }
+ break;
+
+ case 65:
+
+/* Line 1464 of yacc.c */
+#line 333 "jamgram.y"
+ { (yyval).parse = pon( (yyvsp[(2) - (4)]).parse, (yyvsp[(4) - (4)]).parse ); }
+ break;
+
+ case 66:
+
+/* Line 1464 of yacc.c */
+#line 343 "jamgram.y"
+ { (yyval).number = 0; }
+ break;
+
+ case 67:
+
+/* Line 1464 of yacc.c */
+#line 345 "jamgram.y"
+ { (yyval).number = (yyvsp[(1) - (2)]).number | (yyvsp[(2) - (2)]).number; }
+ break;
+
+ case 68:
+
+/* Line 1464 of yacc.c */
+#line 349 "jamgram.y"
+ { (yyval).number = EXEC_UPDATED; }
+ break;
+
+ case 69:
+
+/* Line 1464 of yacc.c */
+#line 351 "jamgram.y"
+ { (yyval).number = EXEC_TOGETHER; }
+ break;
+
+ case 70:
+
+/* Line 1464 of yacc.c */
+#line 353 "jamgram.y"
+ { (yyval).number = EXEC_IGNORE; }
+ break;
+
+ case 71:
+
+/* Line 1464 of yacc.c */
+#line 355 "jamgram.y"
+ { (yyval).number = EXEC_QUIETLY; }
+ break;
+
+ case 72:
+
+/* Line 1464 of yacc.c */
+#line 357 "jamgram.y"
+ { (yyval).number = EXEC_PIECEMEAL; }
+ break;
+
+ case 73:
+
+/* Line 1464 of yacc.c */
+#line 359 "jamgram.y"
+ { (yyval).number = EXEC_EXISTING; }
+ break;
+
+ case 74:
+
+/* Line 1464 of yacc.c */
+#line 368 "jamgram.y"
+ { (yyval).parse = pnull(); }
+ break;
+
+ case 75:
+
+/* Line 1464 of yacc.c */
+#line 370 "jamgram.y"
+ { (yyval).parse = (yyvsp[(2) - (2)]).parse; }
+ break;
+
+
+
+/* Line 1464 of yacc.c */
+#line 2118 "y.tab.c"
+ default: break;
+ }
+ YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+
+ *++yyvsp = yyval;
+
+ /* Now `shift' the result of the reduction. Determine what state
+ that goes to, based on the state we popped back to and the rule
+ number reduced by. */
+
+ yyn = yyr1[yyn];
+
+ yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+ if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+ yystate = yytable[yystate];
+ else
+ yystate = yydefgoto[yyn - YYNTOKENS];
+
+ goto yynewstate;
+
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+ /* If not already recovering from an error, report this error. */
+ if (!yyerrstatus)
+ {
+ ++yynerrs;
+#if ! YYERROR_VERBOSE
+ yyerror (YY_("syntax error"));
+#else
+ {
+ YYSIZE_T yysize = yysyntax_error (0, yystate, yychar);
+ if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
+ {
+ YYSIZE_T yyalloc = 2 * yysize;
+ if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
+ yyalloc = YYSTACK_ALLOC_MAXIMUM;
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+ yymsg = (char *) YYSTACK_ALLOC (yyalloc);
+ if (yymsg)
+ yymsg_alloc = yyalloc;
+ else
+ {
+ yymsg = yymsgbuf;
+ yymsg_alloc = sizeof yymsgbuf;
+ }
+ }
+
+ if (0 < yysize && yysize <= yymsg_alloc)
+ {
+ (void) yysyntax_error (yymsg, yystate, yychar);
+ yyerror (yymsg);
+ }
+ else
+ {
+ yyerror (YY_("syntax error"));
+ if (yysize != 0)
+ goto yyexhaustedlab;
+ }
+ }
+#endif
+ }
+
+
+
+ if (yyerrstatus == 3)
+ {
+ /* If just tried and failed to reuse lookahead token after an
+ error, discard it. */
+
+ if (yychar <= YYEOF)
+ {
+ /* Return failure if at end of input. */
+ if (yychar == YYEOF)
+ YYABORT;
+ }
+ else
+ {
+ yydestruct ("Error: discarding",
+ yytoken, &yylval);
+ yychar = YYEMPTY;
+ }
+ }
+
+ /* Else will try to reuse lookahead token after shifting the error
+ token. */
+ goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR. |
+`---------------------------------------------------*/
+yyerrorlab:
+
+ /* Pacify compilers like GCC when the user code never invokes
+ YYERROR and the label yyerrorlab therefore never appears in user
+ code. */
+ if (/*CONSTCOND*/ 0)
+ goto yyerrorlab;
+
+ /* Do not reclaim the symbols of the rule which action triggered
+ this YYERROR. */
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+ yystate = *yyssp;
+ goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR. |
+`-------------------------------------------------------------*/
+yyerrlab1:
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
+
+ for (;;)
+ {
+ yyn = yypact[yystate];
+ if (yyn != YYPACT_NINF)
+ {
+ yyn += YYTERROR;
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+ {
+ yyn = yytable[yyn];
+ if (0 < yyn)
+ break;
+ }
+ }
+
+ /* Pop the current state because it cannot handle the error token. */
+ if (yyssp == yyss)
+ YYABORT;
+
+
+ yydestruct ("Error: popping",
+ yystos[yystate], yyvsp);
+ YYPOPSTACK (1);
+ yystate = *yyssp;
+ YY_STACK_PRINT (yyss, yyssp);
+ }
+
+ *++yyvsp = yylval;
+
+
+ /* Shift the error token. */
+ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
+ yystate = yyn;
+ goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here. |
+`-------------------------------------*/
+yyacceptlab:
+ yyresult = 0;
+ goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here. |
+`-----------------------------------*/
+yyabortlab:
+ yyresult = 1;
+ goto yyreturn;
+
+#if !defined(yyoverflow) || YYERROR_VERBOSE
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here. |
+`-------------------------------------------------*/
+yyexhaustedlab:
+ yyerror (YY_("memory exhausted"));
+ yyresult = 2;
+ /* Fall through. */
+#endif
+
+yyreturn:
+ if (yychar != YYEMPTY)
+ yydestruct ("Cleanup: discarding lookahead",
+ yytoken, &yylval);
+ /* Do not reclaim the symbols of the rule which action triggered
+ this YYABORT or YYACCEPT. */
+ YYPOPSTACK (yylen);
+ YY_STACK_PRINT (yyss, yyssp);
+ while (yyssp != yyss)
+ {
+ yydestruct ("Cleanup: popping",
+ yystos[*yyssp], yyvsp);
+ YYPOPSTACK (1);
+ }
+#ifndef yyoverflow
+ if (yyss != yyssa)
+ YYSTACK_FREE (yyss);
+#endif
+#if YYERROR_VERBOSE
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+#endif
+ /* Make sure YYID is used. */
+ return YYID (yyresult);
+}
+
+
+
diff --git a/src/kenlm/jam-files/engine/jamgram.h b/src/kenlm/jam-files/engine/jamgram.h
new file mode 100644
index 0000000..97f1175
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jamgram.h
@@ -0,0 +1,149 @@
+/* A Bison parser, made by GNU Bison 2.4.3. */
+
+/* Skeleton interface for Bison's Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2009, 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ _BANG_t = 258,
+ _BANG_EQUALS_t = 259,
+ _AMPER_t = 260,
+ _AMPERAMPER_t = 261,
+ _LPAREN_t = 262,
+ _RPAREN_t = 263,
+ _PLUS_EQUALS_t = 264,
+ _COLON_t = 265,
+ _SEMIC_t = 266,
+ _LANGLE_t = 267,
+ _LANGLE_EQUALS_t = 268,
+ _EQUALS_t = 269,
+ _RANGLE_t = 270,
+ _RANGLE_EQUALS_t = 271,
+ _QUESTION_EQUALS_t = 272,
+ _LBRACKET_t = 273,
+ _RBRACKET_t = 274,
+ ACTIONS_t = 275,
+ BIND_t = 276,
+ CASE_t = 277,
+ CLASS_t = 278,
+ DEFAULT_t = 279,
+ ELSE_t = 280,
+ EXISTING_t = 281,
+ FOR_t = 282,
+ IF_t = 283,
+ IGNORE_t = 284,
+ IN_t = 285,
+ INCLUDE_t = 286,
+ LOCAL_t = 287,
+ MODULE_t = 288,
+ ON_t = 289,
+ PIECEMEAL_t = 290,
+ QUIETLY_t = 291,
+ RETURN_t = 292,
+ RULE_t = 293,
+ SWITCH_t = 294,
+ TOGETHER_t = 295,
+ UPDATED_t = 296,
+ WHILE_t = 297,
+ _LBRACE_t = 298,
+ _BAR_t = 299,
+ _BARBAR_t = 300,
+ _RBRACE_t = 301,
+ ARG = 302,
+ STRING = 303
+ };
+#endif
+/* Tokens. */
+#define _BANG_t 258
+#define _BANG_EQUALS_t 259
+#define _AMPER_t 260
+#define _AMPERAMPER_t 261
+#define _LPAREN_t 262
+#define _RPAREN_t 263
+#define _PLUS_EQUALS_t 264
+#define _COLON_t 265
+#define _SEMIC_t 266
+#define _LANGLE_t 267
+#define _LANGLE_EQUALS_t 268
+#define _EQUALS_t 269
+#define _RANGLE_t 270
+#define _RANGLE_EQUALS_t 271
+#define _QUESTION_EQUALS_t 272
+#define _LBRACKET_t 273
+#define _RBRACKET_t 274
+#define ACTIONS_t 275
+#define BIND_t 276
+#define CASE_t 277
+#define CLASS_t 278
+#define DEFAULT_t 279
+#define ELSE_t 280
+#define EXISTING_t 281
+#define FOR_t 282
+#define IF_t 283
+#define IGNORE_t 284
+#define IN_t 285
+#define INCLUDE_t 286
+#define LOCAL_t 287
+#define MODULE_t 288
+#define ON_t 289
+#define PIECEMEAL_t 290
+#define QUIETLY_t 291
+#define RETURN_t 292
+#define RULE_t 293
+#define SWITCH_t 294
+#define TOGETHER_t 295
+#define UPDATED_t 296
+#define WHILE_t 297
+#define _LBRACE_t 298
+#define _BAR_t 299
+#define _BARBAR_t 300
+#define _RBRACE_t 301
+#define ARG 302
+#define STRING 303
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef int YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+extern YYSTYPE yylval;
+
+
diff --git a/src/kenlm/jam-files/engine/jamgram.y b/src/kenlm/jam-files/engine/jamgram.y
new file mode 100644
index 0000000..543f156
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jamgram.y
@@ -0,0 +1,373 @@
+%token _BANG_t
+%token _BANG_EQUALS_t
+%token _AMPER_t
+%token _AMPERAMPER_t
+%token _LPAREN_t
+%token _RPAREN_t
+%token _PLUS_EQUALS_t
+%token _COLON_t
+%token _SEMIC_t
+%token _LANGLE_t
+%token _LANGLE_EQUALS_t
+%token _EQUALS_t
+%token _RANGLE_t
+%token _RANGLE_EQUALS_t
+%token _QUESTION_EQUALS_t
+%token _LBRACKET_t
+%token _RBRACKET_t
+%token ACTIONS_t
+%token BIND_t
+%token CASE_t
+%token CLASS_t
+%token DEFAULT_t
+%token ELSE_t
+%token EXISTING_t
+%token FOR_t
+%token IF_t
+%token IGNORE_t
+%token IN_t
+%token INCLUDE_t
+%token LOCAL_t
+%token MODULE_t
+%token ON_t
+%token PIECEMEAL_t
+%token QUIETLY_t
+%token RETURN_t
+%token RULE_t
+%token SWITCH_t
+%token TOGETHER_t
+%token UPDATED_t
+%token WHILE_t
+%token _LBRACE_t
+%token _BAR_t
+%token _BARBAR_t
+%token _RBRACE_t
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * jamgram.yy - jam grammar
+ *
+ * 04/13/94 (seiwald) - added shorthand L0 for null list pointer
+ * 06/01/94 (seiwald) - new 'actions existing' does existing sources
+ * 08/23/94 (seiwald) - Support for '+=' (append to variable)
+ * 08/31/94 (seiwald) - Allow ?= as alias for "default =".
+ * 09/15/94 (seiwald) - if conditionals take only single arguments, so
+ * that 'if foo == bar' gives syntax error (use =).
+ * 02/11/95 (seiwald) - when scanning arguments to rules, only treat
+ * punctuation keywords as keywords. All arg lists
+ * are terminated with punctuation keywords.
+ *
+ * 09/11/00 (seiwald) - Support for function calls:
+ *
+ * Rules now return lists (LIST *), rather than void.
+ *
+ * New "[ rule ]" syntax evals rule into a LIST.
+ *
+ * Lists are now generated by compile_list() and
+ * compile_append(), and any other rule that indirectly
+ * makes a list, rather than being built directly here,
+ * so that lists values can contain rule evaluations.
+ *
+ * New 'return' rule sets the return value, though
+ * other statements also may have return values.
+ *
+ * 'run' production split from 'block' production so
+ * that empty blocks can be handled separately.
+ */
+
+%token ARG STRING
+
+%left _BARBAR_t _BAR_t
+%left _AMPERAMPER_t _AMPER_t
+%left _EQUALS_t _BANG_EQUALS_t IN_t
+%left _LANGLE_t _LANGLE_EQUALS_t _RANGLE_t _RANGLE_EQUALS_t
+%left _BANG_t
+
+%{
+#include "jam.h"
+
+#include "lists.h"
+#include "parse.h"
+#include "scan.h"
+#include "compile.h"
+#include "object.h"
+#include "rules.h"
+
+# define YYMAXDEPTH 10000 /* for OSF and other less endowed yaccs */
+
+# define F0 -1
+# define P0 (PARSE *)0
+# define S0 (OBJECT *)0
+
+# define pappend( l,r ) parse_make( PARSE_APPEND,l,r,P0,S0,S0,0 )
+# define peval( c,l,r ) parse_make( PARSE_EVAL,l,r,P0,S0,S0,c )
+# define pfor( s,l,r,x ) parse_make( PARSE_FOREACH,l,r,P0,s,S0,x )
+# define pif( l,r,t ) parse_make( PARSE_IF,l,r,t,S0,S0,0 )
+# define pincl( l ) parse_make( PARSE_INCLUDE,l,P0,P0,S0,S0,0 )
+# define plist( s ) parse_make( PARSE_LIST,P0,P0,P0,s,S0,0 )
+# define plocal( l,r,t ) parse_make( PARSE_LOCAL,l,r,t,S0,S0,0 )
+# define pmodule( l,r ) parse_make( PARSE_MODULE,l,r,P0,S0,S0,0 )
+# define pclass( l,r ) parse_make( PARSE_CLASS,l,r,P0,S0,S0,0 )
+# define pnull() parse_make( PARSE_NULL,P0,P0,P0,S0,S0,0 )
+# define pon( l,r ) parse_make( PARSE_ON,l,r,P0,S0,S0,0 )
+# define prule( s,p ) parse_make( PARSE_RULE,p,P0,P0,s,S0,0 )
+# define prules( l,r ) parse_make( PARSE_RULES,l,r,P0,S0,S0,0 )
+# define pset( l,r,a ) parse_make( PARSE_SET,l,r,P0,S0,S0,a )
+# define pset1( l,r,t,a ) parse_make( PARSE_SETTINGS,l,r,t,S0,S0,a )
+# define psetc( s,p,a,l ) parse_make( PARSE_SETCOMP,p,a,P0,s,S0,l )
+# define psete( s,l,s1,f ) parse_make( PARSE_SETEXEC,l,P0,P0,s,s1,f )
+# define pswitch( l,r ) parse_make( PARSE_SWITCH,l,r,P0,S0,S0,0 )
+# define pwhile( l,r ) parse_make( PARSE_WHILE,l,r,P0,S0,S0,0 )
+
+# define pnode( l,r ) parse_make( F0,l,r,P0,S0,S0,0 )
+# define psnode( s,l ) parse_make( F0,l,P0,P0,s,S0,0 )
+
+%}
+
+%%
+
+run : /* empty */
+ /* do nothing */
+ | rules
+ { parse_save( $1.parse ); }
+ ;
+
+/*
+ * block - zero or more rules
+ * rules - one or more rules
+ * rule - any one of jam's rules
+ * right-recursive so rules execute in order.
+ */
+
+block : null
+ { $$.parse = $1.parse; }
+ | rules
+ { $$.parse = $1.parse; }
+ ;
+
+rules : rule
+ { $$.parse = $1.parse; }
+ | rule rules
+ { $$.parse = prules( $1.parse, $2.parse ); }
+ | LOCAL_t list assign_list_opt _SEMIC_t block
+ { $$.parse = plocal( $2.parse, $3.parse, $5.parse ); }
+ ;
+
+null : /* empty */
+ { $$.parse = pnull(); }
+ ;
+
+assign_list_opt : _EQUALS_t list
+ { $$.parse = $2.parse; $$.number = ASSIGN_SET; }
+ | null
+ { $$.parse = $1.parse; $$.number = ASSIGN_APPEND; }
+ ;
+
+arglist_opt : _LPAREN_t lol _RPAREN_t
+ { $$.parse = $2.parse; }
+ |
+ { $$.parse = P0; }
+ ;
+
+local_opt : LOCAL_t
+ { $$.number = 1; }
+ | /* empty */
+ { $$.number = 0; }
+ ;
+
+rule : _LBRACE_t block _RBRACE_t
+ { $$.parse = $2.parse; }
+ | INCLUDE_t list _SEMIC_t
+ { $$.parse = pincl( $2.parse ); }
+ | ARG lol _SEMIC_t
+ { $$.parse = prule( $1.string, $2.parse ); }
+ | arg assign list _SEMIC_t
+ { $$.parse = pset( $1.parse, $3.parse, $2.number ); }
+ | arg ON_t list assign list _SEMIC_t
+ { $$.parse = pset1( $1.parse, $3.parse, $5.parse, $4.number ); }
+ | RETURN_t list _SEMIC_t
+ { $$.parse = $2.parse; }
+ | FOR_t local_opt ARG IN_t list _LBRACE_t block _RBRACE_t
+ { $$.parse = pfor( $3.string, $5.parse, $7.parse, $2.number ); }
+ | SWITCH_t list _LBRACE_t cases _RBRACE_t
+ { $$.parse = pswitch( $2.parse, $4.parse ); }
+ | IF_t expr _LBRACE_t block _RBRACE_t
+ { $$.parse = pif( $2.parse, $4.parse, pnull() ); }
+ | MODULE_t list _LBRACE_t block _RBRACE_t
+ { $$.parse = pmodule( $2.parse, $4.parse ); }
+ | CLASS_t lol _LBRACE_t block _RBRACE_t
+ { $$.parse = pclass( $2.parse, $4.parse ); }
+ | WHILE_t expr _LBRACE_t block _RBRACE_t
+ { $$.parse = pwhile( $2.parse, $4.parse ); }
+ | IF_t expr _LBRACE_t block _RBRACE_t ELSE_t rule
+ { $$.parse = pif( $2.parse, $4.parse, $7.parse ); }
+ | local_opt RULE_t ARG arglist_opt rule
+ { $$.parse = psetc( $3.string, $5.parse, $4.parse, $1.number ); }
+ | ON_t arg rule
+ { $$.parse = pon( $2.parse, $3.parse ); }
+ | ACTIONS_t eflags ARG bindlist _LBRACE_t
+ { yymode( SCAN_STRING ); }
+ STRING
+ { yymode( SCAN_NORMAL ); }
+ _RBRACE_t
+ { $$.parse = psete( $3.string,$4.parse,$7.string,$2.number ); }
+ ;
+
+/*
+ * assign - = or +=
+ */
+
+assign : _EQUALS_t
+ { $$.number = ASSIGN_SET; }
+ | _PLUS_EQUALS_t
+ { $$.number = ASSIGN_APPEND; }
+ | _QUESTION_EQUALS_t
+ { $$.number = ASSIGN_DEFAULT; }
+ | DEFAULT_t _EQUALS_t
+ { $$.number = ASSIGN_DEFAULT; }
+ ;
+
+/*
+ * expr - an expression for if
+ */
+expr : arg
+ { $$.parse = peval( EXPR_EXISTS, $1.parse, pnull() ); }
+ | expr _EQUALS_t expr
+ { $$.parse = peval( EXPR_EQUALS, $1.parse, $3.parse ); }
+ | expr _BANG_EQUALS_t expr
+ { $$.parse = peval( EXPR_NOTEQ, $1.parse, $3.parse ); }
+ | expr _LANGLE_t expr
+ { $$.parse = peval( EXPR_LESS, $1.parse, $3.parse ); }
+ | expr _LANGLE_EQUALS_t expr
+ { $$.parse = peval( EXPR_LESSEQ, $1.parse, $3.parse ); }
+ | expr _RANGLE_t expr
+ { $$.parse = peval( EXPR_MORE, $1.parse, $3.parse ); }
+ | expr _RANGLE_EQUALS_t expr
+ { $$.parse = peval( EXPR_MOREEQ, $1.parse, $3.parse ); }
+ | expr _AMPER_t expr
+ { $$.parse = peval( EXPR_AND, $1.parse, $3.parse ); }
+ | expr _AMPERAMPER_t expr
+ { $$.parse = peval( EXPR_AND, $1.parse, $3.parse ); }
+ | expr _BAR_t expr
+ { $$.parse = peval( EXPR_OR, $1.parse, $3.parse ); }
+ | expr _BARBAR_t expr
+ { $$.parse = peval( EXPR_OR, $1.parse, $3.parse ); }
+ | arg IN_t list
+ { $$.parse = peval( EXPR_IN, $1.parse, $3.parse ); }
+ | _BANG_t expr
+ { $$.parse = peval( EXPR_NOT, $2.parse, pnull() ); }
+ | _LPAREN_t expr _RPAREN_t
+ { $$.parse = $2.parse; }
+ ;
+
+
+/*
+ * cases - action elements inside a 'switch'
+ * case - a single action element inside a 'switch'
+ * right-recursive rule so cases can be examined in order.
+ */
+
+cases : /* empty */
+ { $$.parse = P0; }
+ | case cases
+ { $$.parse = pnode( $1.parse, $2.parse ); }
+ ;
+
+case : CASE_t ARG _COLON_t block
+ { $$.parse = psnode( $2.string, $4.parse ); }
+ ;
+
+/*
+ * lol - list of lists
+ * right-recursive rule so that lists can be added in order.
+ */
+
+lol : list
+ { $$.parse = pnode( P0, $1.parse ); }
+ | list _COLON_t lol
+ { $$.parse = pnode( $3.parse, $1.parse ); }
+ ;
+
+/*
+ * list - zero or more args in a LIST
+ * listp - list (in puncutation only mode)
+ * arg - one ARG or function call
+ */
+
+list : listp
+ { $$.parse = $1.parse; yymode( SCAN_NORMAL ); }
+ ;
+
+listp : /* empty */
+ { $$.parse = pnull(); yymode( SCAN_PUNCT ); }
+ | listp arg
+ { $$.parse = pappend( $1.parse, $2.parse ); }
+ ;
+
+arg : ARG
+ { $$.parse = plist( $1.string ); }
+ | _LBRACKET_t { yymode( SCAN_NORMAL ); } func _RBRACKET_t
+ { $$.parse = $3.parse; }
+ ;
+
+/*
+ * func - a function call (inside [])
+ * This needs to be split cleanly out of 'rule'
+ */
+
+func : ARG lol
+ { $$.parse = prule( $1.string, $2.parse ); }
+ | ON_t arg ARG lol
+ { $$.parse = pon( $2.parse, prule( $3.string, $4.parse ) ); }
+ | ON_t arg RETURN_t list
+ { $$.parse = pon( $2.parse, $4.parse ); }
+ ;
+
+
+/*
+ * eflags - zero or more modifiers to 'executes'
+ * eflag - a single modifier to 'executes'
+ */
+
+eflags : /* empty */
+ { $$.number = 0; }
+ | eflags eflag
+ { $$.number = $1.number | $2.number; }
+ ;
+
+eflag : UPDATED_t
+ { $$.number = EXEC_UPDATED; }
+ | TOGETHER_t
+ { $$.number = EXEC_TOGETHER; }
+ | IGNORE_t
+ { $$.number = EXEC_IGNORE; }
+ | QUIETLY_t
+ { $$.number = EXEC_QUIETLY; }
+ | PIECEMEAL_t
+ { $$.number = EXEC_PIECEMEAL; }
+ | EXISTING_t
+ { $$.number = EXEC_EXISTING; }
+ ;
+
+
+/*
+ * bindlist - list of variable to bind for an action
+ */
+
+bindlist : /* empty */
+ { $$.parse = pnull(); }
+ | BIND_t list
+ { $$.parse = $2.parse; }
+ ;
+
+
diff --git a/src/kenlm/jam-files/engine/jamgram.yy b/src/kenlm/jam-files/engine/jamgram.yy
new file mode 100644
index 0000000..8d20e38
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jamgram.yy
@@ -0,0 +1,329 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * jamgram.yy - jam grammar
+ *
+ * 04/13/94 (seiwald) - added shorthand L0 for null list pointer
+ * 06/01/94 (seiwald) - new 'actions existing' does existing sources
+ * 08/23/94 (seiwald) - Support for '+=' (append to variable)
+ * 08/31/94 (seiwald) - Allow ?= as alias for "default =".
+ * 09/15/94 (seiwald) - if conditionals take only single arguments, so
+ * that 'if foo == bar' gives syntax error (use =).
+ * 02/11/95 (seiwald) - when scanning arguments to rules, only treat
+ * punctuation keywords as keywords. All arg lists
+ * are terminated with punctuation keywords.
+ *
+ * 09/11/00 (seiwald) - Support for function calls:
+ *
+ * Rules now return lists (LIST *), rather than void.
+ *
+ * New "[ rule ]" syntax evals rule into a LIST.
+ *
+ * Lists are now generated by compile_list() and
+ * compile_append(), and any other rule that indirectly
+ * makes a list, rather than being built directly here,
+ * so that lists values can contain rule evaluations.
+ *
+ * New 'return' rule sets the return value, though
+ * other statements also may have return values.
+ *
+ * 'run' production split from 'block' production so
+ * that empty blocks can be handled separately.
+ */
+
+%token ARG STRING
+
+%left `||` `|`
+%left `&&` `&`
+%left `=` `!=` `in`
+%left `<` `<=` `>` `>=`
+%left `!`
+
+%{
+#include "jam.h"
+
+#include "lists.h"
+#include "parse.h"
+#include "scan.h"
+#include "compile.h"
+#include "object.h"
+#include "rules.h"
+
+# define YYMAXDEPTH 10000 /* for OSF and other less endowed yaccs */
+
+# define F0 -1
+# define P0 (PARSE *)0
+# define S0 (OBJECT *)0
+
+# define pappend( l,r ) parse_make( PARSE_APPEND,l,r,P0,S0,S0,0 )
+# define peval( c,l,r ) parse_make( PARSE_EVAL,l,r,P0,S0,S0,c )
+# define pfor( s,l,r,x ) parse_make( PARSE_FOREACH,l,r,P0,s,S0,x )
+# define pif( l,r,t ) parse_make( PARSE_IF,l,r,t,S0,S0,0 )
+# define pincl( l ) parse_make( PARSE_INCLUDE,l,P0,P0,S0,S0,0 )
+# define plist( s ) parse_make( PARSE_LIST,P0,P0,P0,s,S0,0 )
+# define plocal( l,r,t ) parse_make( PARSE_LOCAL,l,r,t,S0,S0,0 )
+# define pmodule( l,r ) parse_make( PARSE_MODULE,l,r,P0,S0,S0,0 )
+# define pclass( l,r ) parse_make( PARSE_CLASS,l,r,P0,S0,S0,0 )
+# define pnull() parse_make( PARSE_NULL,P0,P0,P0,S0,S0,0 )
+# define pon( l,r ) parse_make( PARSE_ON,l,r,P0,S0,S0,0 )
+# define prule( s,p ) parse_make( PARSE_RULE,p,P0,P0,s,S0,0 )
+# define prules( l,r ) parse_make( PARSE_RULES,l,r,P0,S0,S0,0 )
+# define pset( l,r,a ) parse_make( PARSE_SET,l,r,P0,S0,S0,a )
+# define pset1( l,r,t,a ) parse_make( PARSE_SETTINGS,l,r,t,S0,S0,a )
+# define psetc( s,p,a,l ) parse_make( PARSE_SETCOMP,p,a,P0,s,S0,l )
+# define psete( s,l,s1,f ) parse_make( PARSE_SETEXEC,l,P0,P0,s,s1,f )
+# define pswitch( l,r ) parse_make( PARSE_SWITCH,l,r,P0,S0,S0,0 )
+# define pwhile( l,r ) parse_make( PARSE_WHILE,l,r,P0,S0,S0,0 )
+
+# define pnode( l,r ) parse_make( F0,l,r,P0,S0,S0,0 )
+# define psnode( s,l ) parse_make( F0,l,P0,P0,s,S0,0 )
+
+%}
+
+%%
+
+run : /* empty */
+ /* do nothing */
+ | rules
+ { parse_save( $1.parse ); }
+ ;
+
+/*
+ * block - zero or more rules
+ * rules - one or more rules
+ * rule - any one of jam's rules
+ * right-recursive so rules execute in order.
+ */
+
+block : null
+ { $$.parse = $1.parse; }
+ | rules
+ { $$.parse = $1.parse; }
+ ;
+
+rules : rule
+ { $$.parse = $1.parse; }
+ | rule rules
+ { $$.parse = prules( $1.parse, $2.parse ); }
+ | `local` list assign_list_opt `;` block
+ { $$.parse = plocal( $2.parse, $3.parse, $5.parse ); }
+ ;
+
+null : /* empty */
+ { $$.parse = pnull(); }
+ ;
+
+assign_list_opt : `=` list
+ { $$.parse = $2.parse; $$.number = ASSIGN_SET; }
+ | null
+ { $$.parse = $1.parse; $$.number = ASSIGN_APPEND; }
+ ;
+
+arglist_opt : `(` lol `)`
+ { $$.parse = $2.parse; }
+ |
+ { $$.parse = P0; }
+ ;
+
+local_opt : `local`
+ { $$.number = 1; }
+ | /* empty */
+ { $$.number = 0; }
+ ;
+
+rule : `{` block `}`
+ { $$.parse = $2.parse; }
+ | `include` list `;`
+ { $$.parse = pincl( $2.parse ); }
+ | ARG lol `;`
+ { $$.parse = prule( $1.string, $2.parse ); }
+ | arg assign list `;`
+ { $$.parse = pset( $1.parse, $3.parse, $2.number ); }
+ | arg `on` list assign list `;`
+ { $$.parse = pset1( $1.parse, $3.parse, $5.parse, $4.number ); }
+ | `return` list `;`
+ { $$.parse = $2.parse; }
+ | `for` local_opt ARG `in` list `{` block `}`
+ { $$.parse = pfor( $3.string, $5.parse, $7.parse, $2.number ); }
+ | `switch` list `{` cases `}`
+ { $$.parse = pswitch( $2.parse, $4.parse ); }
+ | `if` expr `{` block `}`
+ { $$.parse = pif( $2.parse, $4.parse, pnull() ); }
+ | `module` list `{` block `}`
+ { $$.parse = pmodule( $2.parse, $4.parse ); }
+ | `class` lol `{` block `}`
+ { $$.parse = pclass( $2.parse, $4.parse ); }
+ | `while` expr `{` block `}`
+ { $$.parse = pwhile( $2.parse, $4.parse ); }
+ | `if` expr `{` block `}` `else` rule
+ { $$.parse = pif( $2.parse, $4.parse, $7.parse ); }
+ | local_opt `rule` ARG arglist_opt rule
+ { $$.parse = psetc( $3.string, $5.parse, $4.parse, $1.number ); }
+ | `on` arg rule
+ { $$.parse = pon( $2.parse, $3.parse ); }
+ | `actions` eflags ARG bindlist `{`
+ { yymode( SCAN_STRING ); }
+ STRING
+ { yymode( SCAN_NORMAL ); }
+ `}`
+ { $$.parse = psete( $3.string,$4.parse,$7.string,$2.number ); }
+ ;
+
+/*
+ * assign - = or +=
+ */
+
+assign : `=`
+ { $$.number = ASSIGN_SET; }
+ | `+=`
+ { $$.number = ASSIGN_APPEND; }
+ | `?=`
+ { $$.number = ASSIGN_DEFAULT; }
+ | `default` `=`
+ { $$.number = ASSIGN_DEFAULT; }
+ ;
+
+/*
+ * expr - an expression for if
+ */
+expr : arg
+ { $$.parse = peval( EXPR_EXISTS, $1.parse, pnull() ); }
+ | expr `=` expr
+ { $$.parse = peval( EXPR_EQUALS, $1.parse, $3.parse ); }
+ | expr `!=` expr
+ { $$.parse = peval( EXPR_NOTEQ, $1.parse, $3.parse ); }
+ | expr `<` expr
+ { $$.parse = peval( EXPR_LESS, $1.parse, $3.parse ); }
+ | expr `<=` expr
+ { $$.parse = peval( EXPR_LESSEQ, $1.parse, $3.parse ); }
+ | expr `>` expr
+ { $$.parse = peval( EXPR_MORE, $1.parse, $3.parse ); }
+ | expr `>=` expr
+ { $$.parse = peval( EXPR_MOREEQ, $1.parse, $3.parse ); }
+ | expr `&` expr
+ { $$.parse = peval( EXPR_AND, $1.parse, $3.parse ); }
+ | expr `&&` expr
+ { $$.parse = peval( EXPR_AND, $1.parse, $3.parse ); }
+ | expr `|` expr
+ { $$.parse = peval( EXPR_OR, $1.parse, $3.parse ); }
+ | expr `||` expr
+ { $$.parse = peval( EXPR_OR, $1.parse, $3.parse ); }
+ | arg `in` list
+ { $$.parse = peval( EXPR_IN, $1.parse, $3.parse ); }
+ | `!` expr
+ { $$.parse = peval( EXPR_NOT, $2.parse, pnull() ); }
+ | `(` expr `)`
+ { $$.parse = $2.parse; }
+ ;
+
+
+/*
+ * cases - action elements inside a 'switch'
+ * case - a single action element inside a 'switch'
+ * right-recursive rule so cases can be examined in order.
+ */
+
+cases : /* empty */
+ { $$.parse = P0; }
+ | case cases
+ { $$.parse = pnode( $1.parse, $2.parse ); }
+ ;
+
+case : `case` ARG `:` block
+ { $$.parse = psnode( $2.string, $4.parse ); }
+ ;
+
+/*
+ * lol - list of lists
+ * right-recursive rule so that lists can be added in order.
+ */
+
+lol : list
+ { $$.parse = pnode( P0, $1.parse ); }
+ | list `:` lol
+ { $$.parse = pnode( $3.parse, $1.parse ); }
+ ;
+
+/*
+ * list - zero or more args in a LIST
+ * listp - list (in puncutation only mode)
+ * arg - one ARG or function call
+ */
+
+list : listp
+ { $$.parse = $1.parse; yymode( SCAN_NORMAL ); }
+ ;
+
+listp : /* empty */
+ { $$.parse = pnull(); yymode( SCAN_PUNCT ); }
+ | listp arg
+ { $$.parse = pappend( $1.parse, $2.parse ); }
+ ;
+
+arg : ARG
+ { $$.parse = plist( $1.string ); }
+ | `[` { yymode( SCAN_NORMAL ); } func `]`
+ { $$.parse = $3.parse; }
+ ;
+
+/*
+ * func - a function call (inside [])
+ * This needs to be split cleanly out of 'rule'
+ */
+
+func : ARG lol
+ { $$.parse = prule( $1.string, $2.parse ); }
+ | `on` arg ARG lol
+ { $$.parse = pon( $2.parse, prule( $3.string, $4.parse ) ); }
+ | `on` arg `return` list
+ { $$.parse = pon( $2.parse, $4.parse ); }
+ ;
+
+
+/*
+ * eflags - zero or more modifiers to 'executes'
+ * eflag - a single modifier to 'executes'
+ */
+
+eflags : /* empty */
+ { $$.number = 0; }
+ | eflags eflag
+ { $$.number = $1.number | $2.number; }
+ ;
+
+eflag : `updated`
+ { $$.number = EXEC_UPDATED; }
+ | `together`
+ { $$.number = EXEC_TOGETHER; }
+ | `ignore`
+ { $$.number = EXEC_IGNORE; }
+ | `quietly`
+ { $$.number = EXEC_QUIETLY; }
+ | `piecemeal`
+ { $$.number = EXEC_PIECEMEAL; }
+ | `existing`
+ { $$.number = EXEC_EXISTING; }
+ ;
+
+
+/*
+ * bindlist - list of variable to bind for an action
+ */
+
+bindlist : /* empty */
+ { $$.parse = pnull(); }
+ | `bind` list
+ { $$.parse = $2.parse; }
+ ;
+
+
diff --git a/src/kenlm/jam-files/engine/jamgramtab.h b/src/kenlm/jam-files/engine/jamgramtab.h
new file mode 100644
index 0000000..a0fd43f
--- /dev/null
+++ b/src/kenlm/jam-files/engine/jamgramtab.h
@@ -0,0 +1,44 @@
+ { "!", _BANG_t },
+ { "!=", _BANG_EQUALS_t },
+ { "&", _AMPER_t },
+ { "&&", _AMPERAMPER_t },
+ { "(", _LPAREN_t },
+ { ")", _RPAREN_t },
+ { "+=", _PLUS_EQUALS_t },
+ { ":", _COLON_t },
+ { ";", _SEMIC_t },
+ { "<", _LANGLE_t },
+ { "<=", _LANGLE_EQUALS_t },
+ { "=", _EQUALS_t },
+ { ">", _RANGLE_t },
+ { ">=", _RANGLE_EQUALS_t },
+ { "?=", _QUESTION_EQUALS_t },
+ { "[", _LBRACKET_t },
+ { "]", _RBRACKET_t },
+ { "actions", ACTIONS_t },
+ { "bind", BIND_t },
+ { "case", CASE_t },
+ { "class", CLASS_t },
+ { "default", DEFAULT_t },
+ { "else", ELSE_t },
+ { "existing", EXISTING_t },
+ { "for", FOR_t },
+ { "if", IF_t },
+ { "ignore", IGNORE_t },
+ { "in", IN_t },
+ { "include", INCLUDE_t },
+ { "local", LOCAL_t },
+ { "module", MODULE_t },
+ { "on", ON_t },
+ { "piecemeal", PIECEMEAL_t },
+ { "quietly", QUIETLY_t },
+ { "return", RETURN_t },
+ { "rule", RULE_t },
+ { "switch", SWITCH_t },
+ { "together", TOGETHER_t },
+ { "updated", UPDATED_t },
+ { "while", WHILE_t },
+ { "{", _LBRACE_t },
+ { "|", _BAR_t },
+ { "||", _BARBAR_t },
+ { "}", _RBRACE_t },
diff --git a/src/kenlm/jam-files/engine/lists.c b/src/kenlm/jam-files/engine/lists.c
new file mode 100644
index 0000000..3f2309b
--- /dev/null
+++ b/src/kenlm/jam-files/engine/lists.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * lists.c - maintain lists of objects
+ */
+
+#include "jam.h"
+#include "lists.h"
+
+#include <assert.h>
+
+static LIST * freelist[ 32 ]; /* junkpile for list_dealloc() */
+
+static unsigned get_bucket( unsigned size )
+{
+ unsigned bucket = 0;
+ while ( size > ( 1u << bucket ) ) ++bucket;
+ return bucket;
+}
+
+static LIST * list_alloc( unsigned const size )
+{
+ unsigned const bucket = get_bucket( size );
+ if ( freelist[ bucket ] )
+ {
+ LIST * result = freelist[ bucket ];
+ freelist[ bucket ] = result->impl.next;
+ return result;
+ }
+ return (LIST *)BJAM_MALLOC( sizeof( LIST ) + ( 1u << bucket ) *
+ sizeof( OBJECT * ) );
+}
+
+static void list_dealloc( LIST * l )
+{
+ unsigned size = list_length( l );
+ unsigned bucket;
+ LIST * node = l;
+
+ if ( size == 0 ) return;
+
+ bucket = get_bucket( size );;
+
+#ifdef BJAM_NO_MEM_CACHE
+ BJAM_FREE( node );
+#else
+ node->impl.next = freelist[ bucket ];
+ freelist[ bucket ] = node;
+#endif
+}
+
+/*
+ * list_append() - append a list onto another one, returning total
+ */
+
+LIST * list_append( LIST * l, LIST * nl )
+{
+ if ( list_empty( l ) )
+ return nl;
+ if ( !list_empty( nl ) )
+ {
+ int const l_size = list_length( l );
+ int const nl_size = list_length( nl );
+ int const size = l_size + nl_size;
+ unsigned const bucket = get_bucket( size );
+
+ /* Do we need to reallocate? */
+ if ( l_size <= ( 1u << ( bucket - 1 ) ) )
+ {
+ LIST * result = list_alloc( size );
+ memcpy( list_begin( result ), list_begin( l ), l_size * sizeof(
+ OBJECT * ) );
+ list_dealloc( l );
+ l = result;
+ }
+
+ l->impl.size = size;
+ memcpy( list_begin( l ) + l_size, list_begin( nl ), nl_size * sizeof(
+ OBJECT * ) );
+ list_dealloc( nl );
+ }
+ return l;
+}
+
+LISTITER list_begin( LIST * l )
+{
+ return l ? (LISTITER)( (char *)l + sizeof( LIST ) ) : 0;
+}
+
+LISTITER list_end( LIST * l )
+{
+ return l ? list_begin( l ) + l->impl.size : 0;
+}
+
+LIST * list_new( OBJECT * value )
+{
+ LIST * const head = list_alloc( 1 ) ;
+ head->impl.size = 1;
+ list_begin( head )[ 0 ] = value;
+ return head;
+}
+
+/*
+ * list_push_back() - tack a string onto the end of a list of strings
+ */
+
+LIST * list_push_back( LIST * head, OBJECT * value )
+{
+ unsigned int size = list_length( head );
+ unsigned int i;
+
+ if ( DEBUG_LISTS )
+ printf( "list > %s <\n", object_str( value ) );
+
+ /* If the size is a power of 2, reallocate. */
+ if ( size == 0 )
+ {
+ head = list_alloc( 1 );
+ }
+ else if ( ( ( size - 1 ) & size ) == 0 )
+ {
+ LIST * l = list_alloc( size + 1 );
+ memcpy( l, head, sizeof( LIST ) + size * sizeof( OBJECT * ) );
+ list_dealloc( head );
+ head = l;
+ }
+
+ list_begin( head )[ size ] = value;
+ head->impl.size = size + 1;
+
+ return head;
+}
+
+
+/*
+ * list_copy() - copy a whole list of strings (nl) onto end of another (l).
+ */
+
+LIST * list_copy( LIST * l )
+{
+ int size = list_length( l );
+ int i;
+ LIST * result;
+
+ if ( size == 0 ) return L0;
+
+ result = list_alloc( size );
+ result->impl.size = size;
+ for ( i = 0; i < size; ++i )
+ list_begin( result )[ i ] = object_copy( list_begin( l )[ i ] );
+ return result;
+}
+
+
+LIST * list_copy_range( LIST * l, LISTITER first, LISTITER last )
+{
+ if ( first == last )
+ return L0;
+ else
+ {
+ int size = last - first;
+ LIST * result = list_alloc( size );
+ LISTITER dest = list_begin( result );
+ result->impl.size = size;
+ for ( ; first != last; ++first, ++dest )
+ *dest = object_copy( *first );
+ return result;
+ }
+}
+
+
+/*
+ * list_sublist() - copy a subset of a list of strings.
+ */
+
+LIST * list_sublist( LIST * l, int start, int count )
+{
+ int end = start + count;
+ int size = list_length( l );
+ if ( start >= size ) return L0;
+ if ( end > size ) end = size;
+ return list_copy_range( l, list_begin( l ) + start, list_begin( l ) + end );
+}
+
+
+static int str_ptr_compare( void const * va, void const * vb )
+{
+ OBJECT * a = *( (OBJECT * *)va );
+ OBJECT * b = *( (OBJECT * *)vb );
+ return strcmp( object_str( a ), object_str( b ) );
+}
+
+
+LIST * list_sort( LIST * l )
+{
+ int len;
+ int ii;
+ LIST * result;
+
+ if ( !l )
+ return L0;
+
+ len = list_length( l );
+ result = list_copy( l );
+
+ qsort( list_begin( result ), len, sizeof( OBJECT * ), str_ptr_compare );
+
+ return result;
+}
+
+
+/*
+ * list_free() - free a list of strings
+ */
+
+void list_free( LIST * head )
+{
+ if ( !list_empty( head ) )
+ {
+ LISTITER iter = list_begin( head );
+ LISTITER const end = list_end( head );
+ for ( ; iter != end; iter = list_next( iter ) )
+ object_free( list_item( iter ) );
+ list_dealloc( head );
+ }
+}
+
+
+/*
+ * list_pop_front() - remove the front element from a list of strings
+ */
+
+LIST * list_pop_front( LIST * l )
+{
+ unsigned size = list_length( l );
+ assert( size );
+ --size;
+ object_free( list_front( l ) );
+
+ if ( size == 0 )
+ {
+ list_dealloc( l );
+ return L0;
+ }
+
+ if ( ( ( size - 1 ) & size ) == 0 )
+ {
+ LIST * const nl = list_alloc( size );
+ nl->impl.size = size;
+ memcpy( list_begin( nl ), list_begin( l ) + 1, size * sizeof( OBJECT * )
+ );
+ list_dealloc( l );
+ return nl;
+ }
+
+ l->impl.size = size;
+ memmove( list_begin( l ), list_begin( l ) + 1, size * sizeof( OBJECT * ) );
+ return l;
+}
+
+LIST * list_reverse( LIST * l )
+{
+ int size = list_length( l );
+ if ( size == 0 ) return L0;
+ {
+ LIST * const result = list_alloc( size );
+ int i;
+ result->impl.size = size;
+ for ( i = 0; i < size; ++i )
+ list_begin( result )[ i ] = object_copy( list_begin( l )[ size - i -
+ 1 ] );
+ return result;
+ }
+}
+
+int list_cmp( LIST * t, LIST * s )
+{
+ int status = 0;
+ LISTITER t_it = list_begin( t );
+ LISTITER const t_end = list_end( t );
+ LISTITER s_it = list_begin( s );
+ LISTITER const s_end = list_end( s );
+
+ while ( !status && ( t_it != t_end || s_it != s_end ) )
+ {
+ char const * st = t_it != t_end ? object_str( list_item( t_it ) ) : "";
+ char const * ss = s_it != s_end ? object_str( list_item( s_it ) ) : "";
+
+ status = strcmp( st, ss );
+
+ t_it = t_it != t_end ? list_next( t_it ) : t_it;
+ s_it = s_it != s_end ? list_next( s_it ) : s_it;
+ }
+
+ return status;
+}
+
+int list_is_sublist( LIST * sub, LIST * l )
+{
+ LISTITER iter = list_begin( sub );
+ LISTITER const end = list_end( sub );
+ for ( ; iter != end; iter = list_next( iter ) )
+ if ( !list_in( l, list_item( iter ) ) )
+ return 0;
+ return 1;
+}
+
+/*
+ * list_print() - print a list of strings to stdout
+ */
+
+void list_print( LIST * l )
+{
+ LISTITER iter = list_begin( l ), end = list_end( l );
+ if ( iter != end )
+ {
+ printf( "%s", object_str( list_item( iter ) ) );
+ iter = list_next( iter );
+ for ( ; iter != end; iter = list_next( iter ) )
+ printf( " %s", object_str( list_item( iter ) ) );
+ }
+}
+
+
+/*
+ * list_length() - return the number of items in the list
+ */
+
+int list_length( LIST * l )
+{
+ return l ? l->impl.size : 0;
+}
+
+
+int list_in( LIST * l, OBJECT * value )
+{
+ LISTITER iter = list_begin( l );
+ LISTITER end = list_end( l );
+ for ( ; iter != end; iter = list_next( iter ) )
+ if ( object_equal( list_item( iter ), value ) )
+ return 1;
+ return 0;
+}
+
+
+LIST * list_unique( LIST * sorted_list )
+{
+ LIST * result = L0;
+ OBJECT * last_added = 0;
+
+ LISTITER iter = list_begin( sorted_list ), end = list_end( sorted_list );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ if ( !last_added || !object_equal( list_item( iter ), last_added ) )
+ {
+ result = list_push_back( result, object_copy( list_item( iter ) ) );
+ last_added = list_item( iter );
+ }
+ }
+ return result;
+}
+
+void list_done()
+{
+ int i;
+ for ( i = 0; i < sizeof( freelist ) / sizeof( freelist[ 0 ] ); ++i )
+ {
+ LIST * l = freelist[ i ];
+ while ( l )
+ {
+ LIST * const tmp = l;
+ l = l->impl.next;
+ BJAM_FREE( tmp );
+ }
+ }
+}
+
+
+/*
+ * lol_init() - initialize a LOL (list of lists).
+ */
+
+void lol_init( LOL * lol )
+{
+ lol->count = 0;
+}
+
+
+/*
+ * lol_add() - append a LIST onto an LOL.
+ */
+
+void lol_add( LOL * lol, LIST * l )
+{
+ if ( lol->count < LOL_MAX )
+ lol->list[ lol->count++ ] = l;
+}
+
+
+/*
+ * lol_free() - free the LOL and its LISTs.
+ */
+
+void lol_free( LOL * lol )
+{
+ int i;
+ for ( i = 0; i < lol->count; ++i )
+ list_free( lol->list[ i ] );
+ lol->count = 0;
+}
+
+
+/*
+ * lol_get() - return one of the LISTs in the LOL.
+ */
+
+LIST * lol_get( LOL * lol, int i )
+{
+ return i < lol->count ? lol->list[ i ] : L0;
+}
+
+
+/*
+ * lol_print() - debug print LISTS separated by ":".
+ */
+
+void lol_print( LOL * lol )
+{
+ int i;
+ for ( i = 0; i < lol->count; ++i )
+ {
+ if ( i )
+ printf( " : " );
+ list_print( lol->list[ i ] );
+ }
+}
+
+#ifdef HAVE_PYTHON
+
+PyObject * list_to_python( LIST * l )
+{
+ PyObject * result = PyList_New( 0 );
+ LISTITER iter = list_begin( l );
+ LISTITER const end = list_end( l );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ PyObject * s = PyString_FromString( object_str( list_item( iter ) ) );
+ PyList_Append( result, s );
+ Py_DECREF( s );
+ }
+
+ return result;
+}
+
+LIST * list_from_python( PyObject * l )
+{
+ LIST * result = L0;
+
+ Py_ssize_t n = PySequence_Size( l );
+ Py_ssize_t i;
+ for ( i = 0; i < n; ++i )
+ {
+ PyObject * v = PySequence_GetItem( l, i );
+ result = list_push_back( result, object_new( PyString_AsString( v ) ) );
+ Py_DECREF( v );
+ }
+
+ return result;
+}
+
+#endif
diff --git a/src/kenlm/jam-files/engine/lists.h b/src/kenlm/jam-files/engine/lists.h
new file mode 100644
index 0000000..3dd8fe8
--- /dev/null
+++ b/src/kenlm/jam-files/engine/lists.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * lists.h - the LIST structure and routines to manipulate them
+ *
+ * The whole of jam relies on lists of objects as a datatype. This module, in
+ * conjunction with object.c, handles these relatively efficiently.
+ *
+ * Structures defined:
+ *
+ * LIST - list of OBJECTs
+ * LOL - list of LISTs
+ *
+ * External routines:
+ *
+ * list_append() - append a list onto another one, returning total
+ * list_new() - tack an object onto the end of a list of objects
+ * list_copy() - copy a whole list of objects
+ * list_sublist() - copy a subset of a list of objects
+ * list_free() - free a list of objects
+ * list_print() - print a list of objects to stdout
+ * list_length() - return the number of items in the list
+ *
+ * lol_init() - initialize a LOL (list of lists)
+ * lol_add() - append a LIST onto an LOL
+ * lol_free() - free the LOL and its LISTs
+ * lol_get() - return one of the LISTs in the LOL
+ * lol_print() - debug print LISTS separated by ":"
+ */
+
+#ifndef LISTS_DWA20011022_H
+#define LISTS_DWA20011022_H
+
+#include "object.h"
+
+#ifdef HAVE_PYTHON
+# include <Python.h>
+#endif
+
+/*
+ * LIST - list of strings
+ */
+
+typedef struct _list {
+ union {
+ int size;
+ struct _list * next;
+ OBJECT * align;
+ } impl;
+} LIST;
+
+typedef OBJECT * * LISTITER;
+
+/*
+ * LOL - list of LISTs
+ */
+
+#define LOL_MAX 19
+typedef struct _lol {
+ int count;
+ LIST * list[ LOL_MAX ];
+} LOL;
+
+LIST * list_new( OBJECT * value );
+LIST * list_append( LIST * destination, LIST * source );
+LIST * list_copy( LIST * );
+LIST * list_copy_range( LIST * destination, LISTITER first, LISTITER last );
+void list_free( LIST * head );
+LIST * list_push_back( LIST * head, OBJECT * value );
+void list_print( LIST * );
+int list_length( LIST * );
+LIST * list_sublist( LIST *, int start, int count );
+LIST * list_pop_front( LIST * );
+LIST * list_sort( LIST * );
+LIST * list_unique( LIST * sorted_list );
+int list_in( LIST *, OBJECT * value );
+LIST * list_reverse( LIST * );
+int list_cmp( LIST * lhs, LIST * rhs );
+int list_is_sublist( LIST * sub, LIST * l );
+void list_done();
+
+LISTITER list_begin( LIST * );
+LISTITER list_end( LIST * );
+#define list_next( it ) ((it) + 1)
+#define list_item( it ) (*(it))
+#define list_empty( l ) ((l) == L0)
+#define list_front( l ) list_item( list_begin( l ) )
+
+#define L0 ((LIST *)0)
+
+void lol_add( LOL *, LIST * );
+void lol_init( LOL * );
+void lol_free( LOL * );
+LIST * lol_get( LOL *, int i );
+void lol_print( LOL * );
+void lol_build( LOL *, char const * * elements );
+
+#ifdef HAVE_PYTHON
+PyObject * list_to_python( LIST * );
+LIST * list_from_python( PyObject * );
+#endif
+
+#endif
diff --git a/src/kenlm/jam-files/engine/make.c b/src/kenlm/jam-files/engine/make.c
new file mode 100644
index 0000000..afc8bb9
--- /dev/null
+++ b/src/kenlm/jam-files/engine/make.c
@@ -0,0 +1,908 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * make.c - bring a target up to date, once rules are in place.
+ *
+ * This modules controls the execution of rules to bring a target and its
+ * dependencies up to date. It is invoked after the targets, rules, et. al.
+ * described in rules.h are created by the interpreting jam files.
+ *
+ * This file contains the main make() entry point and the first pass make0().
+ * The second pass, make1(), which actually does the command execution, is in
+ * make1.c.
+ *
+ * External routines:
+ * make() - make a target, given its name
+ *
+ * Internal routines:
+ * make0() - bind and scan everything to make a TARGET
+ * make0sort() - reorder TARGETS chain by their time (newest to oldest)
+ */
+
+#include "jam.h"
+#include "make.h"
+
+#include "command.h"
+#ifdef OPT_HEADER_CACHE_EXT
+# include "hcache.h"
+#endif
+#include "headers.h"
+#include "lists.h"
+#include "object.h"
+#include "parse.h"
+#include "rules.h"
+#include "search.h"
+#include "timestamp.h"
+#include "variable.h"
+
+#include <assert.h>
+
+#ifndef max
+# define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+static TARGETS * make0sort( TARGETS * c );
+
+#ifdef OPT_GRAPH_DEBUG_EXT
+ static void dependGraphOutput( TARGET * t, int depth );
+#endif
+
+static char const * target_fate[] =
+{
+ "init", /* T_FATE_INIT */
+ "making", /* T_FATE_MAKING */
+ "stable", /* T_FATE_STABLE */
+ "newer", /* T_FATE_NEWER */
+ "temp", /* T_FATE_ISTMP */
+ "touched", /* T_FATE_TOUCHED */
+ "rebuild", /* T_FATE_REBUILD */
+ "missing", /* T_FATE_MISSING */
+ "needtmp", /* T_FATE_NEEDTMP */
+ "old", /* T_FATE_OUTDATED */
+ "update", /* T_FATE_UPDATE */
+ "nofind", /* T_FATE_CANTFIND */
+ "nomake" /* T_FATE_CANTMAKE */
+};
+
+static char const * target_bind[] =
+{
+ "unbound",
+ "missing",
+ "parents",
+ "exists",
+};
+
+#define spaces(x) ( " " + ( x > 20 ? 0 : 20-x ) )
+
+
+/*
+ * make() - make a target, given its name.
+ */
+
+int make( LIST * targets, int anyhow )
+{
+ COUNTS counts[ 1 ];
+ int status = 0; /* 1 if anything fails */
+
+#ifdef OPT_HEADER_CACHE_EXT
+ hcache_init();
+#endif
+
+ memset( (char *)counts, 0, sizeof( *counts ) );
+
+ /* First bind all targets with LOCATE_TARGET setting. This is needed to
+ * correctly handle dependencies to generated headers.
+ */
+ bind_explicitly_located_targets();
+
+ {
+ LISTITER iter, end;
+ PROFILE_ENTER( MAKE_MAKE0 );
+ for ( iter = list_begin( targets ), end = list_end( targets ); iter != end; iter = list_next( iter ) )
+ {
+ TARGET * t = bindtarget( list_item( iter ) );
+ if ( t->fate == T_FATE_INIT )
+ make0( t, 0, 0, counts, anyhow, 0 );
+ }
+ PROFILE_EXIT( MAKE_MAKE0 );
+ }
+
+#ifdef OPT_GRAPH_DEBUG_EXT
+ if ( DEBUG_GRAPH )
+ {
+ LISTITER iter, end;
+ for ( iter = list_begin( targets ), end = list_end( targets ); iter != end; iter = list_next( iter ) )
+ dependGraphOutput( bindtarget( list_item( iter ) ), 0 );
+ }
+#endif
+
+ if ( DEBUG_MAKE )
+ {
+ if ( counts->targets )
+ printf( "...found %d target%s...\n", counts->targets,
+ counts->targets > 1 ? "s" : "" );
+ if ( counts->temp )
+ printf( "...using %d temp target%s...\n", counts->temp,
+ counts->temp > 1 ? "s" : "" );
+ if ( counts->updating )
+ printf( "...updating %d target%s...\n", counts->updating,
+ counts->updating > 1 ? "s" : "" );
+ if ( counts->cantfind )
+ printf( "...can't find %d target%s...\n", counts->cantfind,
+ counts->cantfind > 1 ? "s" : "" );
+ if ( counts->cantmake )
+ printf( "...can't make %d target%s...\n", counts->cantmake,
+ counts->cantmake > 1 ? "s" : "" );
+ }
+
+ status = counts->cantfind || counts->cantmake;
+
+ {
+ PROFILE_ENTER( MAKE_MAKE1 );
+ status |= make1( targets );
+ PROFILE_EXIT( MAKE_MAKE1 );
+ }
+
+ return status;
+}
+
+
+/* Force any dependants of t that have already at least begun being visited by
+ * make0() to be updated.
+ */
+
+static void update_dependants( TARGET * t )
+{
+ TARGETS * q;
+
+ for ( q = t->dependants; q; q = q->next )
+ {
+ TARGET * p = q->target;
+ char fate0 = p->fate;
+
+ /* If we have already at least begun visiting it and we are not already
+ * rebuilding it for other reasons.
+ */
+ if ( ( fate0 != T_FATE_INIT ) && ( fate0 < T_FATE_BUILD ) )
+ {
+ p->fate = T_FATE_UPDATE;
+
+ if ( DEBUG_FATE )
+ {
+ printf( "fate change %s from %s to %s (as dependant of %s)\n",
+ object_str( p->name ), target_fate[ (int) fate0 ], target_fate[ (int) p->fate ], object_str( t->name ) );
+ }
+
+ /* If we are done visiting it, go back and make sure its dependants
+ * get rebuilt.
+ */
+ if ( fate0 > T_FATE_MAKING )
+ update_dependants( p );
+ }
+ }
+}
+
+
+/*
+ * Make sure that all of t's rebuilds get rebuilt.
+ */
+
+static void force_rebuilds( TARGET * t )
+{
+ TARGETS * d;
+ for ( d = t->rebuilds; d; d = d->next )
+ {
+ TARGET * r = d->target;
+
+ /* If it is not already being rebuilt for other reasons. */
+ if ( r->fate < T_FATE_BUILD )
+ {
+ if ( DEBUG_FATE )
+ printf( "fate change %s from %s to %s (by rebuild)\n",
+ object_str( r->name ), target_fate[ (int) r->fate ], target_fate[ T_FATE_REBUILD ] );
+
+ /* Force rebuild it. */
+ r->fate = T_FATE_REBUILD;
+
+ /* And make sure its dependants are updated too. */
+ update_dependants( r );
+ }
+ }
+}
+
+
+int make0rescan( TARGET * t, TARGET * rescanning )
+{
+ int result = 0;
+ TARGETS * c;
+
+ /* Check whether we have already found a cycle. */
+ if ( target_scc( t ) == rescanning )
+ return 1;
+
+ /* If we have already visited this node, ignore it. */
+ if ( t->rescanning == rescanning )
+ return 0;
+
+ /* If t is already updated, ignore it. */
+ if ( t->scc_root == NULL && t->progress > T_MAKE_ACTIVE )
+ return 0;
+
+ t->rescanning = rescanning;
+ for ( c = t->depends; c; c = c->next )
+ {
+ TARGET * dependency = c->target;
+ /* Always start at the root of each new strongly connected component. */
+ if ( target_scc( dependency ) != target_scc( t ) )
+ dependency = target_scc( dependency );
+ result |= make0rescan( dependency, rescanning );
+
+ /* Make sure that we pick up the new include node. */
+ if ( c->target->includes == rescanning )
+ result = 1;
+ }
+ if ( result && t->scc_root == NULL )
+ {
+ t->scc_root = rescanning;
+ rescanning->depends = targetentry( rescanning->depends, t );
+ }
+ return result;
+}
+
+
+/*
+ * make0() - bind and scan everything to make a TARGET.
+ *
+ * Recursively binds a target, searches for #included headers, calls itself on
+ * those headers and any dependencies.
+ */
+
+void make0
+(
+ TARGET * t,
+ TARGET * p, /* parent */
+ int depth, /* for display purposes */
+ COUNTS * counts, /* for reporting */
+ int anyhow,
+ TARGET * rescanning
+) /* forcibly touch all (real) targets */
+{
+ TARGETS * c;
+ TARGET * ptime = t;
+ TARGET * located_target = 0;
+ timestamp last;
+ timestamp leaf;
+ timestamp hlast;
+ int fate;
+ char const * flag = "";
+ SETTINGS * s;
+
+#ifdef OPT_GRAPH_DEBUG_EXT
+ int savedFate;
+ int oldTimeStamp;
+#endif
+
+ if ( DEBUG_MAKEPROG )
+ printf( "make\t--\t%s%s\n", spaces( depth ), object_str( t->name ) );
+
+ /*
+ * Step 1: Initialize.
+ */
+
+ if ( DEBUG_MAKEPROG )
+ printf( "make\t--\t%s%s\n", spaces( depth ), object_str( t->name ) );
+
+ t->fate = T_FATE_MAKING;
+ t->depth = depth;
+
+ /*
+ * Step 2: Under the influence of "on target" variables, bind the target and
+ * search for headers.
+ */
+
+ /* Step 2a: Set "on target" variables. */
+ s = copysettings( t->settings );
+ pushsettings( root_module(), s );
+
+ /* Step 2b: Find and timestamp the target file (if it is a file). */
+ if ( ( t->binding == T_BIND_UNBOUND ) && !( t->flags & T_FLAG_NOTFILE ) )
+ {
+ OBJECT * another_target;
+ object_free( t->boundname );
+ t->boundname = search( t->name, &t->time, &another_target,
+ t->flags & T_FLAG_ISFILE );
+ /* If it was detected that this target refers to an already existing and
+ * bound target, we add a dependency so that every target depending on
+ * us will depend on that other target as well.
+ */
+ if ( another_target )
+ located_target = bindtarget( another_target );
+
+ t->binding = timestamp_empty( &t->time )
+ ? T_BIND_MISSING
+ : T_BIND_EXISTS;
+ }
+
+ /* INTERNAL, NOTFILE header nodes have the time of their parents. */
+ if ( p && ( t->flags & T_FLAG_INTERNAL ) )
+ ptime = p;
+
+ /* If temp file does not exist but parent does, use parent. */
+ if ( p && ( t->flags & T_FLAG_TEMP ) &&
+ ( t->binding == T_BIND_MISSING ) &&
+ ( p->binding != T_BIND_MISSING ) )
+ {
+ t->binding = T_BIND_PARENTS;
+ ptime = p;
+ }
+
+#ifdef OPT_SEMAPHORE
+ {
+ LIST * var = var_get( root_module(), constant_JAM_SEMAPHORE );
+ if ( !list_empty( var ) )
+ {
+ TARGET * const semaphore = bindtarget( list_front( var ) );
+ semaphore->progress = T_MAKE_SEMAPHORE;
+ t->semaphore = semaphore;
+ }
+ }
+#endif
+
+ /* Step 2c: If its a file, search for headers. */
+ if ( t->binding == T_BIND_EXISTS )
+ headers( t );
+
+ /* Step 2d: reset "on target" variables. */
+ popsettings( root_module(), s );
+ freesettings( s );
+
+ /*
+ * Pause for a little progress reporting.
+ */
+
+ if ( DEBUG_BIND )
+ {
+ if ( !object_equal( t->name, t->boundname ) )
+ printf( "bind\t--\t%s%s: %s\n", spaces( depth ),
+ object_str( t->name ), object_str( t->boundname ) );
+
+ switch ( t->binding )
+ {
+ case T_BIND_UNBOUND:
+ case T_BIND_MISSING:
+ case T_BIND_PARENTS:
+ printf( "time\t--\t%s%s: %s\n", spaces( depth ),
+ object_str( t->name ), target_bind[ (int)t->binding ] );
+ break;
+
+ case T_BIND_EXISTS:
+ printf( "time\t--\t%s%s: %s\n", spaces( depth ),
+ object_str( t->name ), timestamp_str( &t->time ) );
+ break;
+ }
+ }
+
+ /*
+ * Step 3: Recursively make0() dependencies & headers.
+ */
+
+ /* Step 3a: Recursively make0() dependencies. */
+ for ( c = t->depends; c; c = c->next )
+ {
+ int const internal = t->flags & T_FLAG_INTERNAL;
+
+ /* Warn about circular deps, except for includes, which include each
+ * other alot.
+ */
+ if ( c->target->fate == T_FATE_INIT )
+ make0( c->target, ptime, depth + 1, counts, anyhow, rescanning );
+ else if ( c->target->fate == T_FATE_MAKING && !internal )
+ printf( "warning: %s depends on itself\n", object_str(
+ c->target->name ) );
+ else if ( c->target->fate != T_FATE_MAKING && rescanning )
+ make0rescan( c->target, rescanning );
+ if ( rescanning && c->target->includes && c->target->includes->fate !=
+ T_FATE_MAKING )
+ make0rescan( target_scc( c->target->includes ), rescanning );
+ }
+
+ if ( located_target )
+ {
+ if ( located_target->fate == T_FATE_INIT )
+ make0( located_target, ptime, depth + 1, counts, anyhow, rescanning
+ );
+ else if ( located_target->fate != T_FATE_MAKING && rescanning )
+ make0rescan( located_target, rescanning );
+ }
+
+ /* Step 3b: Recursively make0() internal includes node. */
+ if ( t->includes )
+ make0( t->includes, p, depth + 1, counts, anyhow, rescanning );
+
+ /* Step 3c: Add dependencies' includes to our direct dependencies. */
+ {
+ TARGETS * incs = 0;
+ for ( c = t->depends; c; c = c->next )
+ if ( c->target->includes )
+ incs = targetentry( incs, c->target->includes );
+ t->depends = targetchain( t->depends, incs );
+ }
+
+ if ( located_target )
+ t->depends = targetentry( t->depends, located_target );
+
+ /* Step 3d: Detect cycles. */
+ {
+ int cycle_depth = depth;
+ for ( c = t->depends; c; c = c->next )
+ {
+ TARGET * scc_root = target_scc( c->target );
+ if ( scc_root->fate == T_FATE_MAKING &&
+ ( !scc_root->includes ||
+ scc_root->includes->fate != T_FATE_MAKING ) )
+ {
+ if ( scc_root->depth < cycle_depth )
+ {
+ cycle_depth = scc_root->depth;
+ t->scc_root = scc_root;
+ }
+ }
+ }
+ }
+
+ /*
+ * Step 4: Compute time & fate.
+ */
+
+ /* Step 4a: Pick up dependencies' time and fate. */
+ timestamp_clear( &last );
+ timestamp_clear( &leaf );
+ fate = T_FATE_STABLE;
+ for ( c = t->depends; c; c = c->next )
+ {
+ /* If we are in a different strongly connected component, pull
+ * timestamps from the root.
+ */
+ if ( c->target->scc_root )
+ {
+ TARGET * const scc_root = target_scc( c->target );
+ if ( scc_root != t->scc_root )
+ {
+ timestamp_max( &c->target->leaf, &c->target->leaf,
+ &scc_root->leaf );
+ timestamp_max( &c->target->time, &c->target->time,
+ &scc_root->time );
+ c->target->fate = max( c->target->fate, scc_root->fate );
+ }
+ }
+
+ /* If LEAVES has been applied, we only heed the timestamps of the leaf
+ * source nodes.
+ */
+ timestamp_max( &leaf, &leaf, &c->target->leaf );
+ if ( t->flags & T_FLAG_LEAVES )
+ {
+ timestamp_copy( &last, &leaf );
+ continue;
+ }
+ timestamp_max( &last, &last, &c->target->time );
+ fate = max( fate, c->target->fate );
+
+#ifdef OPT_GRAPH_DEBUG_EXT
+ if ( DEBUG_FATE )
+ if ( fate < c->target->fate )
+ printf( "fate change %s from %s to %s by dependency %s\n",
+ object_str( t->name ), target_fate[ (int)fate ],
+ target_fate[ (int)c->target->fate ], object_str(
+ c->target->name ) );
+#endif
+ }
+
+ /* Step 4b: Pick up included headers time. */
+
+ /*
+ * If a header is newer than a temp source that includes it, the temp source
+ * will need building.
+ */
+ if ( t->includes )
+ timestamp_copy( &hlast, &t->includes->time );
+ else
+ timestamp_clear( &hlast );
+
+ /* Step 4c: handle NOUPDATE oddity.
+ *
+ * If a NOUPDATE file exists, mark it as having eternally old dependencies.
+ * Do not inherit our fate from our dependencies. Decide fate based only on
+ * other flags and our binding (done later).
+ */
+ if ( t->flags & T_FLAG_NOUPDATE )
+ {
+#ifdef OPT_GRAPH_DEBUG_EXT
+ if ( DEBUG_FATE )
+ if ( fate != T_FATE_STABLE )
+ printf( "fate change %s back to stable, NOUPDATE.\n",
+ object_str( t->name ) );
+#endif
+
+ timestamp_clear( &last );
+ timestamp_clear( &t->time );
+
+ /* Do not inherit our fate from our dependencies. Decide fate based only
+ * upon other flags and our binding (done later).
+ */
+ fate = T_FATE_STABLE;
+ }
+
+ /* Step 4d: Determine fate: rebuild target or what? */
+
+ /*
+ In English:
+ If can not find or make child, can not make target.
+ If children changed, make target.
+ If target missing, make it.
+ If children newer, make target.
+ If temp's children newer than parent, make temp.
+ If temp's headers newer than parent, make temp.
+ If deliberately touched, make it.
+ If up-to-date temp file present, use it.
+ If target newer than non-notfile parent, mark target newer.
+ Otherwise, stable!
+
+ Note this block runs from least to most stable: as we make it further
+ down the list, the target's fate gets more stable.
+ */
+
+#ifdef OPT_GRAPH_DEBUG_EXT
+ savedFate = fate;
+ oldTimeStamp = 0;
+#endif
+
+ if ( fate >= T_FATE_BROKEN )
+ {
+ fate = T_FATE_CANTMAKE;
+ }
+ else if ( fate >= T_FATE_SPOIL )
+ {
+ fate = T_FATE_UPDATE;
+ }
+ else if ( t->binding == T_BIND_MISSING )
+ {
+ fate = T_FATE_MISSING;
+ }
+ else if ( t->binding == T_BIND_EXISTS && timestamp_cmp( &last, &t->time ) >
+ 0 )
+ {
+#ifdef OPT_GRAPH_DEBUG_EXT
+ oldTimeStamp = 1;
+#endif
+ fate = T_FATE_OUTDATED;
+ }
+ else if ( t->binding == T_BIND_PARENTS && timestamp_cmp( &last, &p->time ) >
+ 0 )
+ {
+#ifdef OPT_GRAPH_DEBUG_EXT
+ oldTimeStamp = 1;
+#endif
+ fate = T_FATE_NEEDTMP;
+ }
+ else if ( t->binding == T_BIND_PARENTS && timestamp_cmp( &hlast, &p->time )
+ > 0 )
+ {
+ fate = T_FATE_NEEDTMP;
+ }
+ else if ( t->flags & T_FLAG_TOUCHED )
+ {
+ fate = T_FATE_TOUCHED;
+ }
+ else if ( anyhow && !( t->flags & T_FLAG_NOUPDATE ) )
+ {
+ fate = T_FATE_TOUCHED;
+ }
+ else if ( t->binding == T_BIND_EXISTS && ( t->flags & T_FLAG_TEMP ) )
+ {
+ fate = T_FATE_ISTMP;
+ }
+ else if ( t->binding == T_BIND_EXISTS && p && p->binding != T_BIND_UNBOUND
+ && timestamp_cmp( &t->time, &p->time ) > 0 )
+ {
+#ifdef OPT_GRAPH_DEBUG_EXT
+ oldTimeStamp = 1;
+#endif
+ fate = T_FATE_NEWER;
+ }
+ else
+ {
+ fate = T_FATE_STABLE;
+ }
+#ifdef OPT_GRAPH_DEBUG_EXT
+ if ( DEBUG_FATE && ( fate != savedFate ) )
+ {
+ if ( savedFate == T_FATE_STABLE )
+ printf( "fate change %s set to %s%s\n", object_str( t->name ),
+ target_fate[ fate ], oldTimeStamp ? " (by timestamp)" : "" );
+ else
+ printf( "fate change %s from %s to %s%s\n", object_str( t->name ),
+ target_fate[ savedFate ], target_fate[ fate ], oldTimeStamp ?
+ " (by timestamp)" : "" );
+ }
+#endif
+
+ /* Step 4e: Handle missing files. */
+ /* If it is missing and there are no actions to create it, boom. */
+ /* If we can not make a target we do not care about it, okay. */
+ /* We could insist that there are updating actions for all missing */
+ /* files, but if they have dependencies we just pretend it is a NOTFILE. */
+
+ if ( ( fate == T_FATE_MISSING ) && !t->actions && !t->depends )
+ {
+ if ( t->flags & T_FLAG_NOCARE )
+ {
+#ifdef OPT_GRAPH_DEBUG_EXT
+ if ( DEBUG_FATE )
+ printf( "fate change %s to STABLE from %s, "
+ "no actions, no dependencies and do not care\n",
+ object_str( t->name ), target_fate[ fate ] );
+#endif
+ fate = T_FATE_STABLE;
+ }
+ else
+ {
+ printf( "don't know how to make %s\n", object_str( t->name ) );
+ fate = T_FATE_CANTFIND;
+ }
+ }
+
+ /* Step 4f: Propagate dependencies' time & fate. */
+ /* Set leaf time to be our time only if this is a leaf. */
+
+ timestamp_max( &t->time, &t->time, &last );
+ timestamp_copy( &t->leaf, timestamp_empty( &leaf ) ? &t->time : &leaf );
+ /* This target's fate may have been updated by virtue of following some
+ * target's rebuilds list, so only allow it to be increased to the fate we
+ * have calculated. Otherwise, grab its new fate.
+ */
+ if ( fate > t->fate )
+ t->fate = fate;
+ else
+ fate = t->fate;
+
+ /* Step 4g: If this target needs to be built, force rebuild everything in
+ * its rebuilds list.
+ */
+ if ( ( fate >= T_FATE_BUILD ) && ( fate < T_FATE_BROKEN ) )
+ force_rebuilds( t );
+
+ /*
+ * Step 5: Sort dependencies by their update time.
+ */
+
+ if ( globs.newestfirst )
+ t->depends = make0sort( t->depends );
+
+ /*
+ * Step 6: A little harmless tabulating for tracing purposes.
+ */
+
+ /* Do not count or report interal includes nodes. */
+ if ( t->flags & T_FLAG_INTERNAL )
+ return;
+
+ if ( counts )
+ {
+#ifdef OPT_IMPROVED_PATIENCE_EXT
+ ++counts->targets;
+#else
+ if ( !( ++counts->targets % 1000 ) && DEBUG_MAKE )
+ {
+ printf( "...patience...\n" );
+ fflush(stdout);
+ }
+#endif
+
+ if ( fate == T_FATE_ISTMP )
+ ++counts->temp;
+ else if ( fate == T_FATE_CANTFIND )
+ ++counts->cantfind;
+ else if ( ( fate == T_FATE_CANTMAKE ) && t->actions )
+ ++counts->cantmake;
+ else if ( ( fate >= T_FATE_BUILD ) && ( fate < T_FATE_BROKEN ) &&
+ t->actions )
+ ++counts->updating;
+ }
+
+ if ( !( t->flags & T_FLAG_NOTFILE ) && ( fate >= T_FATE_SPOIL ) )
+ flag = "+";
+ else if ( t->binding == T_BIND_EXISTS && p && timestamp_cmp( &t->time,
+ &p->time ) > 0 )
+ flag = "*";
+
+ if ( DEBUG_MAKEPROG )
+ printf( "made%s\t%s\t%s%s\n", flag, target_fate[ (int)t->fate ],
+ spaces( depth ), object_str( t->name ) );
+}
+
+
+#ifdef OPT_GRAPH_DEBUG_EXT
+
+static char const * target_name( TARGET * t )
+{
+ static char buf[ 1000 ];
+ if ( t->flags & T_FLAG_INTERNAL )
+ {
+ sprintf( buf, "%s (internal node)", object_str( t->name ) );
+ return buf;
+ }
+ return object_str( t->name );
+}
+
+
+/*
+ * dependGraphOutput() - output the DG after make0 has run.
+ */
+
+static void dependGraphOutput( TARGET * t, int depth )
+{
+ TARGETS * c;
+
+ if ( ( t->flags & T_FLAG_VISITED ) || !t->name || !t->boundname )
+ return;
+
+ t->flags |= T_FLAG_VISITED;
+
+ switch ( t->fate )
+ {
+ case T_FATE_TOUCHED:
+ case T_FATE_MISSING:
+ case T_FATE_OUTDATED:
+ case T_FATE_UPDATE:
+ printf( "->%s%2d Name: %s\n", spaces( depth ), depth, target_name( t
+ ) );
+ break;
+ default:
+ printf( " %s%2d Name: %s\n", spaces( depth ), depth, target_name( t
+ ) );
+ break;
+ }
+
+ if ( !object_equal( t->name, t->boundname ) )
+ printf( " %s Loc: %s\n", spaces( depth ), object_str( t->boundname )
+ );
+
+ switch ( t->fate )
+ {
+ case T_FATE_STABLE:
+ printf( " %s : Stable\n", spaces( depth ) );
+ break;
+ case T_FATE_NEWER:
+ printf( " %s : Newer\n", spaces( depth ) );
+ break;
+ case T_FATE_ISTMP:
+ printf( " %s : Up to date temp file\n", spaces( depth ) );
+ break;
+ case T_FATE_NEEDTMP:
+ printf( " %s : Temporary file, to be updated\n", spaces( depth )
+ );
+ break;
+ case T_FATE_TOUCHED:
+ printf( " %s : Been touched, updating it\n", spaces( depth ) );
+ break;
+ case T_FATE_MISSING:
+ printf( " %s : Missing, creating it\n", spaces( depth ) );
+ break;
+ case T_FATE_OUTDATED:
+ printf( " %s : Outdated, updating it\n", spaces( depth ) );
+ break;
+ case T_FATE_REBUILD:
+ printf( " %s : Rebuild, updating it\n", spaces( depth ) );
+ break;
+ case T_FATE_UPDATE:
+ printf( " %s : Updating it\n", spaces( depth ) );
+ break;
+ case T_FATE_CANTFIND:
+ printf( " %s : Can not find it\n", spaces( depth ) );
+ break;
+ case T_FATE_CANTMAKE:
+ printf( " %s : Can make it\n", spaces( depth ) );
+ break;
+ }
+
+ if ( t->flags & ~T_FLAG_VISITED )
+ {
+ printf( " %s : ", spaces( depth ) );
+ if ( t->flags & T_FLAG_TEMP ) printf( "TEMPORARY " );
+ if ( t->flags & T_FLAG_NOCARE ) printf( "NOCARE " );
+ if ( t->flags & T_FLAG_NOTFILE ) printf( "NOTFILE " );
+ if ( t->flags & T_FLAG_TOUCHED ) printf( "TOUCHED " );
+ if ( t->flags & T_FLAG_LEAVES ) printf( "LEAVES " );
+ if ( t->flags & T_FLAG_NOUPDATE ) printf( "NOUPDATE " );
+ printf( "\n" );
+ }
+
+ for ( c = t->depends; c; c = c->next )
+ {
+ printf( " %s : Depends on %s (%s)", spaces( depth ),
+ target_name( c->target ), target_fate[ (int)c->target->fate ] );
+ if ( !timestamp_cmp( &c->target->time, &t->time ) )
+ printf( " (max time)");
+ printf( "\n" );
+ }
+
+ for ( c = t->depends; c; c = c->next )
+ dependGraphOutput( c->target, depth + 1 );
+}
+#endif
+
+
+/*
+ * make0sort() - reorder TARGETS chain by their time (newest to oldest).
+ *
+ * We walk chain, taking each item and inserting it on the sorted result, with
+ * newest items at the front. This involves updating each of the TARGETS'
+ * c->next and c->tail. Note that we make c->tail a valid prev pointer for every
+ * entry. Normally, it is only valid at the head, where prev == tail. Note also
+ * that while tail is a loop, next ends at the end of the chain.
+ */
+
+static TARGETS * make0sort( TARGETS * chain )
+{
+ PROFILE_ENTER( MAKE_MAKE0SORT );
+
+ TARGETS * result = 0;
+
+ /* Walk the current target list. */
+ while ( chain )
+ {
+ TARGETS * c = chain;
+ TARGETS * s = result;
+
+ chain = chain->next;
+
+ /* Find point s in result for c. */
+ while ( s && timestamp_cmp( &s->target->time, &c->target->time ) > 0 )
+ s = s->next;
+
+ /* Insert c in front of s (might be 0). */
+ c->next = s; /* good even if s = 0 */
+ if ( result == s ) result = c; /* new head of chain? */
+ if ( !s ) s = result; /* wrap to ensure a next */
+ if ( result != c ) s->tail->next = c; /* not head? be prev's next */
+ c->tail = s->tail; /* take on next's prev */
+ s->tail = c; /* make next's prev us */
+ }
+
+ PROFILE_EXIT( MAKE_MAKE0SORT );
+ return result;
+}
+
+
+static LIST * targets_to_update_ = L0;
+
+
+void mark_target_for_updating( OBJECT * target )
+{
+ targets_to_update_ = list_push_back( targets_to_update_, object_copy(
+ target ) );
+}
+
+
+LIST * targets_to_update()
+{
+ return targets_to_update_;
+}
+
+
+void clear_targets_to_update()
+{
+ list_free( targets_to_update_ );
+ targets_to_update_ = L0;
+}
diff --git a/src/kenlm/jam-files/engine/make.h b/src/kenlm/jam-files/engine/make.h
new file mode 100644
index 0000000..2c3ba16
--- /dev/null
+++ b/src/kenlm/jam-files/engine/make.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * make.h - bring a target up to date, once rules are in place
+ */
+
+#ifndef MAKE_SW20111118_H
+#define MAKE_SW20111118_H
+
+#include "lists.h"
+#include "object.h"
+#include "rules.h"
+
+int make( LIST * targets, int anyhow );
+int make1( LIST * t );
+
+typedef struct {
+ int temp;
+ int updating;
+ int cantfind;
+ int cantmake;
+ int targets;
+ int made;
+} COUNTS ;
+
+
+void make0( TARGET * t, TARGET * p, int depth, COUNTS * counts, int anyhow,
+ TARGET * rescanning );
+
+
+/* Specifies that the target should be updated. */
+void mark_target_for_updating( OBJECT * target );
+
+/* Returns targets previously passed to mark_target_for_updating(). */
+LIST * targets_to_update();
+
+/* Clears/unmarks all targets currently marked for update. */
+void clear_targets_to_update();
+
+#endif
diff --git a/src/kenlm/jam-files/engine/make1.c b/src/kenlm/jam-files/engine/make1.c
new file mode 100644
index 0000000..71eee12
--- /dev/null
+++ b/src/kenlm/jam-files/engine/make1.c
@@ -0,0 +1,1283 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * make1.c - execute commands to bring targets up to date
+ *
+ * This module contains make1(), the entry point called by make() to recursively
+ * descend the dependency graph executing update actions as marked by make0().
+ *
+ * External routines:
+ * make1() - execute commands to update a TARGET and all of its dependencies
+ *
+ * Internal routines, the recursive/asynchronous command executors:
+ * make1a() - recursively schedules dependency builds and then goes to
+ * MAKE1B
+ * make1b() - if nothing is blocking this target's build, proceed to
+ * MAKE1C
+ * make1c() - launch target's next command, or go to parents' MAKE1B
+ * if none
+ * make1c_closure() - handle command execution completion and go to MAKE1C
+ *
+ * Internal support routines:
+ * make1cmds() - turn ACTIONS into CMDs, grouping, splitting, etc.
+ * make1list() - turn a list of targets into a LIST, for $(<) and $(>)
+ * make1settings() - for vars with bound values, build up replacement lists
+ * make1bind() - bind targets that weren't bound in dependency analysis
+ */
+
+#include "jam.h"
+#include "make.h"
+
+#include "command.h"
+#include "compile.h"
+#include "execcmd.h"
+#include "headers.h"
+#include "lists.h"
+#include "object.h"
+#include "output.h"
+#include "parse.h"
+#include "rules.h"
+#include "search.h"
+#include "variable.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#if !defined( NT ) || defined( __GNUC__ )
+ #include <unistd.h> /* for unlink */
+#endif
+
+static CMD * make1cmds ( TARGET * );
+static LIST * make1list ( LIST *, TARGETS *, int flags );
+static SETTINGS * make1settings ( struct module_t *, LIST * vars );
+static void make1bind ( TARGET * );
+static TARGET * make1findcycle ( TARGET * );
+static void make1breakcycle( TARGET *, TARGET * cycle_root );
+
+/* Ugly static - it is too hard to carry it through the callbacks. */
+
+static struct
+{
+ int failed;
+ int skipped;
+ int total;
+ int made;
+} counts[ 1 ];
+
+/* Target state. */
+#define T_STATE_MAKE1A 0 /* make1a() should be called */
+#define T_STATE_MAKE1B 1 /* make1b() should be called */
+#define T_STATE_MAKE1C 2 /* make1c() should be called */
+
+typedef struct _state state;
+struct _state
+{
+ state * prev; /* previous state on stack */
+ TARGET * t; /* current target */
+ TARGET * parent; /* parent argument necessary for MAKE1A */
+ int curstate; /* current state */
+};
+
+static void make1a( state * const );
+static void make1b( state * const );
+static void make1c( state const * const );
+
+static void make1c_closure( void * const closure, int status,
+ timing_info const * const, char const * const cmd_stdout,
+ char const * const cmd_stderr, int const cmd_exit_reason );
+
+typedef struct _stack
+{
+ state * stack;
+} stack;
+
+static stack state_stack = { NULL };
+
+static state * state_freelist = NULL;
+
+/* Currently running command counter. */
+static int cmdsrunning;
+
+
+static state * alloc_state()
+{
+ if ( state_freelist )
+ {
+ state * const pState = state_freelist;
+ state_freelist = pState->prev;
+ memset( pState, 0, sizeof( state ) );
+ return pState;
+ }
+ return (state *)BJAM_MALLOC( sizeof( state ) );
+}
+
+
+static void free_state( state * const pState )
+{
+ pState->prev = state_freelist;
+ state_freelist = pState;
+}
+
+
+static void clear_state_freelist()
+{
+ while ( state_freelist )
+ {
+ state * const pState = state_freelist;
+ state_freelist = state_freelist->prev;
+ BJAM_FREE( pState );
+ }
+}
+
+
+static state * current_state( stack * const pStack )
+{
+ return pStack->stack;
+}
+
+
+static void pop_state( stack * const pStack )
+{
+ if ( pStack->stack )
+ {
+ state * const pState = pStack->stack->prev;
+ free_state( pStack->stack );
+ pStack->stack = pState;
+ }
+}
+
+
+static state * push_state( stack * const pStack, TARGET * const t,
+ TARGET * const parent, int const curstate )
+{
+ state * const pState = alloc_state();
+ pState->t = t;
+ pState->parent = parent;
+ pState->prev = pStack->stack;
+ pState->curstate = curstate;
+ return pStack->stack = pState;
+}
+
+
+/*
+ * Pushes a stack onto another stack, effectively reversing the order.
+ */
+
+static void push_stack_on_stack( stack * const pDest, stack * const pSrc )
+{
+ while ( pSrc->stack )
+ {
+ state * const pState = pSrc->stack;
+ pSrc->stack = pState->prev;
+ pState->prev = pDest->stack;
+ pDest->stack = pState;
+ }
+}
+
+
+/*
+ * make1() - execute commands to update a list of targets and all of their dependencies
+ */
+
+static int intr = 0;
+static int quit = 0;
+
+int make1( LIST * targets )
+{
+ state * pState;
+ int status = 0;
+
+ memset( (char *)counts, 0, sizeof( *counts ) );
+
+ {
+ LISTITER iter, end;
+ stack temp_stack = { NULL };
+ for ( iter = list_begin( targets ), end = list_end( targets );
+ iter != end; iter = list_next( iter ) )
+ push_state( &temp_stack, bindtarget( list_item( iter ) ), NULL, T_STATE_MAKE1A );
+ push_stack_on_stack( &state_stack, &temp_stack );
+ }
+
+ /* Clear any state left over from the past */
+ quit = 0;
+
+ /* Recursively make the target and its dependencies. */
+
+ while ( 1 )
+ {
+ while ( ( pState = current_state( &state_stack ) ) )
+ {
+ if ( quit )
+ pop_state( &state_stack );
+
+ switch ( pState->curstate )
+ {
+ case T_STATE_MAKE1A: make1a( pState ); break;
+ case T_STATE_MAKE1B: make1b( pState ); break;
+ case T_STATE_MAKE1C: make1c( pState ); break;
+ default:
+ assert( !"make1(): Invalid state detected." );
+ }
+ }
+ if ( !cmdsrunning )
+ break;
+ /* Wait for outstanding commands to finish running. */
+ exec_wait();
+ }
+
+ clear_state_freelist();
+
+ /* Talk about it. */
+ if ( counts->failed )
+ printf( "...failed updating %d target%s...\n", counts->failed,
+ counts->failed > 1 ? "s" : "" );
+ if ( DEBUG_MAKE && counts->skipped )
+ printf( "...skipped %d target%s...\n", counts->skipped,
+ counts->skipped > 1 ? "s" : "" );
+ if ( DEBUG_MAKE && counts->made )
+ printf( "...updated %d target%s...\n", counts->made,
+ counts->made > 1 ? "s" : "" );
+
+ /* If we were interrupted, exit now that all child processes
+ have finished. */
+ if ( intr )
+ exit( 1 );
+
+ {
+ LISTITER iter, end;
+ for ( iter = list_begin( targets ), end = list_end( targets );
+ iter != end; iter = list_next( iter ) )
+ {
+ /* Check that the target was updated and that the
+ update succeeded. */
+ TARGET * t = bindtarget( list_item( iter ) );
+ if (t->progress == T_MAKE_DONE)
+ {
+ if (t->status != EXEC_CMD_OK)
+ status = 1;
+ }
+ else if ( ! ( t->progress == T_MAKE_NOEXEC_DONE && globs.noexec ) )
+ {
+ status = 1;
+ }
+ }
+ }
+ return status;
+}
+
+
+/*
+ * make1a() - recursively schedules dependency builds and then goes to MAKE1B
+ *
+ * Called to start processing a specified target. Does nothing if the target is
+ * already being processed or otherwise starts processing all of its
+ * dependencies.
+ */
+
+static void make1a( state * const pState )
+{
+ TARGET * t = pState->t;
+ TARGET * const scc_root = target_scc( t );
+
+ if ( !pState->parent || target_scc( pState->parent ) != scc_root )
+ pState->t = t = scc_root;
+
+ /* If the parent is the first to try to build this target or this target is
+ * in the MAKE1C quagmire, arrange for the parent to be notified when this
+ * target has been built.
+ */
+ if ( pState->parent && t->progress <= T_MAKE_RUNNING )
+ {
+ TARGET * const parent_scc = target_scc( pState->parent );
+ if ( t != parent_scc )
+ {
+ t->parents = targetentry( t->parents, parent_scc );
+ ++parent_scc->asynccnt;
+ }
+ }
+
+ /* If the target has been previously updated with -n in effect, and we are
+ * now ignoring -n, update it for real. E.g. if the UPDATE_NOW rule was
+ * called for it twice - first with the -n option and then without.
+ */
+ if ( !globs.noexec && t->progress == T_MAKE_NOEXEC_DONE )
+ t->progress = T_MAKE_INIT;
+
+ /* If this target is already being processed then do nothing. There is no
+ * need to start processing the same target all over again.
+ */
+ if ( t->progress != T_MAKE_INIT )
+ {
+ pop_state( &state_stack );
+ return;
+ }
+
+ /* Guard against circular dependencies. */
+ t->progress = T_MAKE_ONSTACK;
+
+ /* 'asynccnt' counts the dependencies preventing this target from proceeding
+ * to MAKE1C for actual building. We start off with a count of 1 to prevent
+ * anything from happening until we can notify all dependencies that they
+ * are needed. This 1 is then accounted for when we enter MAKE1B ourselves,
+ * below. Without this if a dependency gets built before we finish
+ * processing all of our other dependencies our build might be triggerred
+ * prematurely.
+ */
+ t->asynccnt = 1;
+
+ /* Push dependency build requests (to be executed in the natural order). */
+ {
+ stack temp_stack = { NULL };
+ TARGETS * c;
+ for ( c = t->depends; c && !quit; c = c->next )
+ push_state( &temp_stack, c->target, t, T_STATE_MAKE1A );
+ push_stack_on_stack( &state_stack, &temp_stack );
+ }
+
+ t->progress = T_MAKE_ACTIVE;
+
+ /* Once all of our dependencies have started getting processed we can move
+ * onto MAKE1B.
+ */
+ /* Implementation note:
+ * In theory this would be done by popping this state before pushing
+ * dependency target build requests but as a slight optimization we simply
+ * modify our current state and leave it on the stack instead.
+ */
+ pState->curstate = T_STATE_MAKE1B;
+}
+
+
+/*
+ * make1b() - if nothing is blocking this target's build, proceed to MAKE1C
+ *
+ * Called after something stops blocking this target's build, e.g. that all of
+ * its dependencies have started being processed, one of its dependencies has
+ * been built or a semaphore this target has been waiting for is free again.
+ */
+
+static void make1b( state * const pState )
+{
+ TARGET * const t = pState->t;
+ TARGET * failed = 0;
+ char const * failed_name = "dependencies";
+
+ /* If any dependencies are still outstanding, wait until they signal their
+ * completion by pushing this same state for their parent targets.
+ */
+ if ( --t->asynccnt )
+ {
+ pop_state( &state_stack );
+ return;
+ }
+
+ /* Try to aquire a semaphore. If it is locked, wait until the target that
+ * locked it is built and signals completition.
+ */
+#ifdef OPT_SEMAPHORE
+ if ( t->semaphore && t->semaphore->asynccnt )
+ {
+ /* Append 't' to the list of targets waiting on semaphore. */
+ t->semaphore->parents = targetentry( t->semaphore->parents, t );
+ t->asynccnt++;
+
+ if ( DEBUG_EXECCMD )
+ printf( "SEM: %s is busy, delaying launch of %s\n",
+ object_str( t->semaphore->name ), object_str( t->name ) );
+ pop_state( &state_stack );
+ return;
+ }
+#endif
+
+ /* Now ready to build target 't', if dependencies built OK. */
+
+ /* Collect status from dependencies. If -n was passed then act as though all
+ * dependencies built correctly (the only way they can fail is if UPDATE_NOW
+ * was called). If the dependencies can not be found or we got an interrupt,
+ * we can not get here.
+ */
+ if ( !globs.noexec )
+ {
+ TARGETS * c;
+ for ( c = t->depends; c; c = c->next )
+ if ( c->target->status > t->status && !( c->target->flags &
+ T_FLAG_NOCARE ) )
+ {
+ failed = c->target;
+ t->status = c->target->status;
+ }
+ }
+
+ /* If an internal header node failed to build, we want to output the target
+ * that it failed on.
+ */
+ if ( failed )
+ failed_name = failed->flags & T_FLAG_INTERNAL
+ ? failed->failed
+ : object_str( failed->name );
+ t->failed = failed_name;
+
+ /* If actions for building any of the dependencies have failed, bail.
+ * Otherwise, execute all actions to make the current target.
+ */
+ if ( ( t->status == EXEC_CMD_FAIL ) && t->actions )
+ {
+ ++counts->skipped;
+ if ( ( t->flags & ( T_FLAG_RMOLD | T_FLAG_NOTFILE ) ) == T_FLAG_RMOLD )
+ {
+ if ( !unlink( object_str( t->boundname ) ) )
+ printf( "...removing outdated %s\n", object_str( t->boundname )
+ );
+ }
+ else
+ printf( "...skipped %s for lack of %s...\n", object_str( t->name ),
+ failed_name );
+ }
+
+ if ( t->status == EXEC_CMD_OK )
+ switch ( t->fate )
+ {
+ case T_FATE_STABLE:
+ case T_FATE_NEWER:
+ break;
+
+ case T_FATE_CANTFIND:
+ case T_FATE_CANTMAKE:
+ t->status = EXEC_CMD_FAIL;
+ break;
+
+ case T_FATE_ISTMP:
+ if ( DEBUG_MAKE )
+ printf( "...using %s...\n", object_str( t->name ) );
+ break;
+
+ case T_FATE_TOUCHED:
+ case T_FATE_MISSING:
+ case T_FATE_NEEDTMP:
+ case T_FATE_OUTDATED:
+ case T_FATE_UPDATE:
+ case T_FATE_REBUILD:
+ /* Prepare commands for executing actions scheduled for this target.
+ * Commands have their embedded variables automatically expanded,
+ * including making use of any "on target" variables.
+ */
+ if ( t->actions )
+ {
+ ++counts->total;
+ if ( DEBUG_MAKE && !( counts->total % 100 ) )
+ printf( "...on %dth target...\n", counts->total );
+
+ t->cmds = (char *)make1cmds( t );
+ /* Update the target's "progress" so MAKE1C processing counts it
+ * among its successes/failures.
+ */
+ t->progress = T_MAKE_RUNNING;
+ }
+ break;
+
+ /* All valid fates should have been accounted for by now. */
+ default:
+ printf( "ERROR: %s has bad fate %d", object_str( t->name ),
+ t->fate );
+ abort();
+ }
+
+#ifdef OPT_SEMAPHORE
+ /* If there is a semaphore, indicate that it is in use. */
+ if ( t->semaphore )
+ {
+ ++t->semaphore->asynccnt;
+ if ( DEBUG_EXECCMD )
+ printf( "SEM: %s now used by %s\n", object_str( t->semaphore->name
+ ), object_str( t->name ) );
+ }
+#endif
+
+ /* Proceed to MAKE1C to begin executing the chain of commands prepared for
+ * building the target. If we are not going to build the target (e.g. due to
+ * dependency failures or no commands needing to be run) the chain will be
+ * empty and MAKE1C processing will directly signal the target's completion.
+ */
+ /* Implementation note:
+ * Morfing the current state on the stack instead of popping it and
+ * pushing a new one is a slight optimization with no side-effects since we
+ * pushed no other states while processing this one.
+ */
+ pState->curstate = T_STATE_MAKE1C;
+}
+
+
+/*
+ * make1c() - launch target's next command, or go to parents' MAKE1B if none
+ *
+ * If there are (more) commands to run to build this target (and we have not hit
+ * an error running earlier comands) we launch the command using exec_cmd().
+ * Command execution signals its completion in exec_wait() by calling our
+ * make1c_closure() callback.
+ *
+ * If there are no more commands to run, we collect the status from all the
+ * actions and report our completion to all the parents.
+ */
+
+static void make1c( state const * const pState )
+{
+ TARGET * const t = pState->t;
+ CMD * const cmd = (CMD *)t->cmds;
+
+ if ( cmd && t->status == EXEC_CMD_OK )
+ {
+ /* Pop state first in case something below (e.g. exec_cmd(), exec_wait()
+ * or make1c_closure()) pushes a new state. Note that we must not access
+ * the popped state data after this as the same stack node might have
+ * been reused internally for some newly pushed state.
+ */
+ pop_state( &state_stack );
+
+ /* Increment the jobs running counter. */
+ ++cmdsrunning;
+
+ /* Execute the actual build command or fake it if no-op. */
+ if ( globs.noexec || cmd->noop )
+ {
+ timing_info time_info = { 0 };
+ timestamp_current( &time_info.start );
+ timestamp_copy( &time_info.end, &time_info.start );
+ make1c_closure( t, EXEC_CMD_OK, &time_info, "", "", EXIT_OK );
+ }
+ else
+ {
+ exec_cmd( cmd->buf, make1c_closure, t, cmd->shell );
+
+ /* Wait until under the concurrent command count limit. */
+ /* FIXME: This wait could be skipped here and moved to just before
+ * trying to execute a command that would cross the command count
+ * limit. Note though that this might affect the order in which
+ * unrelated targets get built and would thus require that all
+ * affected Boost Build tests be updated.
+ */
+ assert( 0 < globs.jobs );
+ assert( globs.jobs <= MAXJOBS );
+ while ( cmdsrunning >= globs.jobs )
+ exec_wait();
+ }
+ }
+ else
+ {
+ ACTIONS * actions;
+
+ /* Collect status from actions, and distribute it as well. */
+ for ( actions = t->actions; actions; actions = actions->next )
+ if ( actions->action->status > t->status )
+ t->status = actions->action->status;
+ for ( actions = t->actions; actions; actions = actions->next )
+ if ( t->status > actions->action->status )
+ actions->action->status = t->status;
+
+ /* Tally success/failure for those we tried to update. */
+ if ( t->progress == T_MAKE_RUNNING )
+ switch ( t->status )
+ {
+ case EXEC_CMD_OK: ++counts->made; break;
+ case EXEC_CMD_FAIL: ++counts->failed; break;
+ }
+
+ /* Tell parents their dependency has been built. */
+ {
+ TARGETS * c;
+ stack temp_stack = { NULL };
+ TARGET * additional_includes = NULL;
+
+ t->progress = globs.noexec ? T_MAKE_NOEXEC_DONE : T_MAKE_DONE;
+
+ /* Target has been updated so rescan it for dependencies. */
+ if ( t->fate >= T_FATE_MISSING && t->status == EXEC_CMD_OK &&
+ !( t->flags & T_FLAG_INTERNAL ) )
+ {
+ TARGET * saved_includes;
+ SETTINGS * s;
+
+ t->rescanned = 1;
+
+ /* Clean current includes. */
+ saved_includes = t->includes;
+ t->includes = 0;
+
+ s = copysettings( t->settings );
+ pushsettings( root_module(), s );
+ headers( t );
+ popsettings( root_module(), s );
+ freesettings( s );
+
+ if ( t->includes )
+ {
+ /* Tricky. The parents have already been processed, but they
+ * have not seen the internal node, because it was just
+ * created. We need to:
+ * - push MAKE1A states that would have been pushed by the
+ * parents here
+ * - make sure all unprocessed parents will pick up the
+ * new includes
+ * - make sure processing the additional MAKE1A states is
+ * done before processing the MAKE1B state for our
+ * current target (which would mean this target has
+ * already been built), otherwise the parent would be
+ * considered built before the additional MAKE1A state
+ * processing even got a chance to start.
+ */
+ make0( t->includes, t->parents->target, 0, 0, 0, t->includes
+ );
+ /* Link the old includes on to make sure that it gets
+ * cleaned up correctly.
+ */
+ t->includes->includes = saved_includes;
+ for ( c = t->dependants; c; c = c->next )
+ c->target->depends = targetentry( c->target->depends,
+ t->includes );
+ /* Will be processed below. */
+ additional_includes = t->includes;
+ }
+ else
+ {
+ t->includes = saved_includes;
+ }
+ }
+
+ if ( additional_includes )
+ for ( c = t->parents; c; c = c->next )
+ push_state( &temp_stack, additional_includes, c->target,
+ T_STATE_MAKE1A );
+
+ if ( t->scc_root )
+ {
+ TARGET * const scc_root = target_scc( t );
+ assert( scc_root->progress < T_MAKE_DONE );
+ for ( c = t->parents; c; c = c->next )
+ {
+ if ( target_scc( c->target ) == scc_root )
+ push_state( &temp_stack, c->target, NULL, T_STATE_MAKE1B
+ );
+ else
+ scc_root->parents = targetentry( scc_root->parents,
+ c->target );
+ }
+ }
+ else
+ {
+ for ( c = t->parents; c; c = c->next )
+ push_state( &temp_stack, c->target, NULL, T_STATE_MAKE1B );
+ }
+
+#ifdef OPT_SEMAPHORE
+ /* If there is a semaphore, it is now free. */
+ if ( t->semaphore )
+ {
+ assert( t->semaphore->asynccnt == 1 );
+ --t->semaphore->asynccnt;
+
+ if ( DEBUG_EXECCMD )
+ printf( "SEM: %s is now free\n", object_str(
+ t->semaphore->name ) );
+
+ /* If anything is waiting, notify the next target. There is no
+ * point in notifying all waiting targets, since they will be
+ * notified again.
+ */
+ if ( t->semaphore->parents )
+ {
+ TARGETS * first = t->semaphore->parents;
+ t->semaphore->parents = first->next;
+ if ( first->next )
+ first->next->tail = first->tail;
+
+ if ( DEBUG_EXECCMD )
+ printf( "SEM: placing %s on stack\n", object_str(
+ first->target->name ) );
+ push_state( &temp_stack, first->target, NULL, T_STATE_MAKE1B
+ );
+ BJAM_FREE( first );
+ }
+ }
+#endif
+
+ /* Must pop state before pushing any more. */
+ pop_state( &state_stack );
+
+ /* Using stacks reverses the order of execution. Reverse it back. */
+ push_stack_on_stack( &state_stack, &temp_stack );
+ }
+ }
+}
+
+
+/*
+ * call_timing_rule() - Look up the __TIMING_RULE__ variable on the given
+ * target, and if non-empty, invoke the rule it names, passing the given
+ * timing_info.
+ */
+
+static void call_timing_rule( TARGET * target, timing_info const * const time )
+{
+ LIST * timing_rule;
+
+ pushsettings( root_module(), target->settings );
+ timing_rule = var_get( root_module(), constant_TIMING_RULE );
+ popsettings( root_module(), target->settings );
+
+ if ( !list_empty( timing_rule ) )
+ {
+ /* rule timing-rule ( args * : target : start end user system ) */
+
+ /* Prepare the argument list. */
+ FRAME frame[ 1 ];
+ OBJECT * rulename = list_front( timing_rule );
+ frame_init( frame );
+
+ /* args * :: $(__TIMING_RULE__[2-]) */
+ lol_add( frame->args, list_copy_range( timing_rule, list_next(
+ list_begin( timing_rule ) ), list_end( timing_rule ) ) );
+
+ /* target :: the name of the target */
+ lol_add( frame->args, list_new( object_copy( target->name ) ) );
+
+ /* start end user system :: info about the action command */
+ lol_add( frame->args, list_push_back( list_push_back( list_push_back( list_new(
+ outf_time( &time->start ) ),
+ outf_time( &time->end ) ),
+ outf_double( time->user ) ),
+ outf_double( time->system ) ) );
+
+ /* Call the rule. */
+ evaluate_rule( bindrule( rulename , root_module() ), rulename, frame );
+
+ /* Clean up. */
+ frame_free( frame );
+ }
+}
+
+
+/*
+ * call_action_rule() - Look up the __ACTION_RULE__ variable on the given
+ * target, and if non-empty, invoke the rule it names, passing the given info,
+ * timing_info, executed command and command output.
+ */
+
+static void call_action_rule
+(
+ TARGET * target,
+ int status,
+ timing_info const * time,
+ char const * executed_command,
+ char const * command_output
+)
+{
+ LIST * action_rule;
+
+ pushsettings( root_module(), target->settings );
+ action_rule = var_get( root_module(), constant_ACTION_RULE );
+ popsettings( root_module(), target->settings );
+
+ if ( !list_empty( action_rule ) )
+ {
+ /* rule action-rule (
+ args * :
+ target :
+ command status start end user system :
+ output ? ) */
+
+ /* Prepare the argument list. */
+ FRAME frame[ 1 ];
+ OBJECT * rulename = list_front( action_rule );
+ frame_init( frame );
+
+ /* args * :: $(__ACTION_RULE__[2-]) */
+ lol_add( frame->args, list_copy_range( action_rule, list_next(
+ list_begin( action_rule ) ), list_end( action_rule ) ) );
+
+ /* target :: the name of the target */
+ lol_add( frame->args, list_new( object_copy( target->name ) ) );
+
+ /* command status start end user system :: info about the action command
+ */
+ lol_add( frame->args,
+ list_push_back( list_push_back( list_push_back( list_push_back( list_push_back( list_new(
+ object_new( executed_command ) ),
+ outf_int( status ) ),
+ outf_time( &time->start ) ),
+ outf_time( &time->end ) ),
+ outf_double( time->user ) ),
+ outf_double( time->system ) ) );
+
+ /* output ? :: the output of the action command */
+ if ( command_output )
+ lol_add( frame->args, list_new( object_new( command_output ) ) );
+ else
+ lol_add( frame->args, L0 );
+
+ /* Call the rule. */
+ evaluate_rule( bindrule( rulename, root_module() ), rulename, frame );
+
+ /* Clean up. */
+ frame_free( frame );
+ }
+}
+
+
+/*
+ * make1c_closure() - handle command execution completion and go to MAKE1C.
+ *
+ * Internal function passed as a notification callback for when a command
+ * finishes getting executed by the OS or called directly when faking that a
+ * command had been executed by the OS.
+ *
+ * Now all we need to do is fiddle with the command exit status and push a new
+ * MAKE1C state to execute the next command scheduled for building this target
+ * or close up the target's build process in case there are no more commands
+ * scheduled for it. On interrupts, we bail heavily.
+ */
+
+static void make1c_closure
+(
+ void * const closure,
+ int status_orig,
+ timing_info const * const time,
+ char const * const cmd_stdout,
+ char const * const cmd_stderr,
+ int const cmd_exit_reason
+)
+{
+ TARGET * const t = (TARGET *)closure;
+ CMD * const cmd = (CMD *)t->cmds;
+ char const * rule_name = 0;
+ char const * target_name = 0;
+
+ assert( cmd );
+
+ --cmdsrunning;
+
+ /* Calculate the target's status from the cmd execution result. */
+ {
+ /* Store the target's status. */
+ t->status = status_orig;
+
+ /* Invert OK/FAIL target status when FAIL_EXPECTED has been applied. */
+ if ( t->flags & T_FLAG_FAIL_EXPECTED && !globs.noexec )
+ {
+ switch ( t->status )
+ {
+ case EXEC_CMD_FAIL: t->status = EXEC_CMD_OK; break;
+ case EXEC_CMD_OK: t->status = EXEC_CMD_FAIL; break;
+ }
+ }
+
+ /* Ignore failures for actions marked as 'ignore'. */
+ if ( t->status == EXEC_CMD_FAIL && cmd->rule->actions->flags &
+ RULE_IGNORE )
+ t->status = EXEC_CMD_OK;
+ }
+
+ if ( DEBUG_MAKEQ ||
+ ( DEBUG_MAKE && !( cmd->rule->actions->flags & RULE_QUIETLY ) ) )
+ {
+ rule_name = object_str( cmd->rule->name );
+ target_name = object_str( list_front( lol_get( (LOL *)&cmd->args, 0 ) )
+ );
+ }
+
+ out_action( rule_name, target_name, cmd->buf->value, cmd_stdout, cmd_stderr,
+ cmd_exit_reason );
+
+ if ( !globs.noexec )
+ {
+ call_timing_rule( t, time );
+ if ( DEBUG_EXECCMD )
+ printf( "%f sec system; %f sec user\n", time->system, time->user );
+
+ /* Assume -p0 is in effect, i.e. cmd_stdout contains merged output. */
+ call_action_rule( t, status_orig, time, cmd->buf->value, cmd_stdout );
+ }
+
+ /* Print command text on failure. */
+ if ( t->status == EXEC_CMD_FAIL && DEBUG_MAKE )
+ {
+ if ( !DEBUG_EXEC )
+ printf( "%s\n", cmd->buf->value );
+
+ printf( "...failed %s ", object_str( cmd->rule->name ) );
+ list_print( lol_get( (LOL *)&cmd->args, 0 ) );
+ printf( "...\n" );
+ }
+
+ /* On interrupt, set quit so _everything_ fails. Do the same for failed
+ * commands if we were asked to stop the build in case of any errors.
+ */
+ if ( t->status == EXEC_CMD_INTR )
+ {
+ ++intr;
+ ++quit;
+ }
+ if ( t->status == EXEC_CMD_FAIL && globs.quitquick )
+ ++quit;
+
+ /* If the command was not successful remove all of its targets not marked as
+ * "precious".
+ */
+ if ( t->status != EXEC_CMD_OK )
+ {
+ LIST * const targets = lol_get( (LOL *)&cmd->args, 0 );
+ LISTITER iter = list_begin( targets );
+ LISTITER const end = list_end( targets );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ char const * const filename = object_str( list_item( iter ) );
+ TARGET const * const t = bindtarget( list_item( iter ) );
+ if ( !( t->flags & T_FLAG_PRECIOUS ) && !unlink( filename ) )
+ printf( "...removing %s\n", filename );
+ }
+ }
+
+ /* Free this command and push the MAKE1C state to execute the next one
+ * scheduled for building this same target.
+ */
+ t->cmds = (char *)cmd_next( cmd );
+ cmd_free( cmd );
+ push_state( &state_stack, t, NULL, T_STATE_MAKE1C );
+}
+
+
+/*
+ * swap_settings() - replace the settings from the current module and target
+ * with those from the new module and target
+ */
+
+static void swap_settings
+(
+ module_t * * current_module,
+ TARGET * * current_target,
+ module_t * new_module,
+ TARGET * new_target
+)
+{
+ if ( ( new_target == *current_target ) &&
+ ( new_module == *current_module ) )
+ return;
+
+ if ( *current_target )
+ popsettings( *current_module, (*current_target)->settings );
+
+ if ( new_target )
+ pushsettings( new_module, new_target->settings );
+
+ *current_module = new_module;
+ *current_target = new_target;
+}
+
+
+/*
+ * make1cmds() - turn ACTIONS into CMDs, grouping, splitting, etc.
+ *
+ * Essentially copies a chain of ACTIONs to a chain of CMDs, grouping
+ * RULE_TOGETHER actions, splitting RULE_PIECEMEAL actions, and handling
+ * RULE_NEWSRCS actions. The result is a chain of CMDs which has already had all
+ * of its embedded variable references expanded and can now be executed using
+ * exec_cmd().
+ */
+
+static CMD * make1cmds( TARGET * t )
+{
+ CMD * cmds = 0;
+ CMD * * cmds_next = &cmds;
+ LIST * shell = L0;
+ module_t * settings_module = 0;
+ TARGET * settings_target = 0;
+ ACTIONS * a0;
+ int const running_flag = globs.noexec ? A_RUNNING_NOEXEC : A_RUNNING;
+
+ /* Step through actions. Actions may be shared with other targets or grouped
+ * using RULE_TOGETHER, so actions already seen are skipped.
+ */
+ for ( a0 = t->actions; a0; a0 = a0->next )
+ {
+ RULE * rule = a0->action->rule;
+ rule_actions * actions = rule->actions;
+ SETTINGS * boundvars;
+ LIST * nt;
+ LIST * ns;
+ ACTIONS * a1;
+
+ /* Only do rules with commands to execute. If this action has already
+ * been executed, use saved status.
+ */
+ if ( !actions || a0->action->running >= running_flag )
+ continue;
+
+ a0->action->running = running_flag;
+
+ /* Make LISTS of targets and sources. If `execute together` has been
+ * specified for this rule, tack on sources from each instance of this
+ * rule for this target.
+ */
+ nt = make1list( L0, a0->action->targets, 0 );
+ ns = make1list( L0, a0->action->sources, actions->flags );
+ if ( actions->flags & RULE_TOGETHER )
+ for ( a1 = a0->next; a1; a1 = a1->next )
+ if ( a1->action->rule == rule &&
+ a1->action->running < running_flag )
+ {
+ ns = make1list( ns, a1->action->sources, actions->flags );
+ a1->action->running = running_flag;
+ }
+
+ /* If doing only updated (or existing) sources, but none have been
+ * updated (or exist), skip this action.
+ */
+ if ( list_empty( ns ) &&
+ ( actions->flags & ( RULE_NEWSRCS | RULE_EXISTING ) ) )
+ {
+ list_free( nt );
+ continue;
+ }
+
+ swap_settings( &settings_module, &settings_target, rule->module, t );
+ if ( list_empty( shell ) )
+ {
+ /* shell is per-target */
+ shell = var_get( rule->module, constant_JAMSHELL );
+ }
+
+ /* If we had 'actions xxx bind vars' we bind the vars now. */
+ boundvars = make1settings( rule->module, actions->bindlist );
+ pushsettings( rule->module, boundvars );
+
+ /*
+ * Build command, starting with all source args.
+ *
+ * For actions that allow PIECEMEAL commands, if the constructed command
+ * string is too long, we retry constructing it with a reduced number of
+ * source arguments presented.
+ *
+ * While reducing slowly takes a bit of compute time to get things just
+ * right, it is worth it to get as close to maximum allowed command
+ * string length as possible, because launching the commands we are
+ * executing is likely to be much more compute intensive.
+ *
+ * Note that we loop through at least once, for sourceless actions.
+ */
+ {
+ int const length = list_length( ns );
+ int start = 0;
+ int chunk = length;
+ LIST * cmd_targets = L0;
+ LIST * cmd_shell = L0;
+ do
+ {
+ CMD * cmd;
+ int cmd_check_result;
+ int cmd_error_length;
+ int cmd_error_max_length;
+ int retry = 0;
+ int accept_command = 0;
+
+ /* Build cmd: cmd_new() takes ownership of its lists. */
+ if ( list_empty( cmd_targets ) ) cmd_targets = list_copy( nt );
+ if ( list_empty( cmd_shell ) ) cmd_shell = list_copy( shell );
+ cmd = cmd_new( rule, cmd_targets, list_sublist( ns, start,
+ chunk ), cmd_shell );
+
+ cmd_check_result = exec_check( cmd->buf, &cmd->shell,
+ &cmd_error_length, &cmd_error_max_length );
+
+ if ( cmd_check_result == EXEC_CHECK_OK )
+ {
+ accept_command = 1;
+ }
+ else if ( cmd_check_result == EXEC_CHECK_NOOP )
+ {
+ accept_command = 1;
+ cmd->noop = 1;
+ }
+ else if ( ( actions->flags & RULE_PIECEMEAL ) && ( chunk > 1 ) )
+ {
+ /* Too long but splittable. Reduce chunk size slowly and
+ * retry.
+ */
+ assert( cmd_check_result == EXEC_CHECK_TOO_LONG ||
+ cmd_check_result == EXEC_CHECK_LINE_TOO_LONG );
+ chunk = chunk * 9 / 10;
+ retry = 1;
+ }
+ else
+ {
+ /* Too long and not splittable. */
+ char const * const error_message = cmd_check_result ==
+ EXEC_CHECK_TOO_LONG
+ ? "is too long"
+ : "contains a line that is too long";
+ assert( cmd_check_result == EXEC_CHECK_TOO_LONG ||
+ cmd_check_result == EXEC_CHECK_LINE_TOO_LONG );
+ printf( "%s action %s (%d, max %d):\n", object_str(
+ rule->name ), error_message, cmd_error_length,
+ cmd_error_max_length );
+
+ /* Tell the user what did not fit. */
+ fputs( cmd->buf->value, stdout );
+ exit( EXITBAD );
+ }
+
+ assert( !retry || !accept_command );
+
+ if ( accept_command )
+ {
+ /* Chain it up. */
+ *cmds_next = cmd;
+ cmds_next = &cmd->next;
+
+ /* Mark lists we need recreated for the next command since
+ * they got consumed by the cmd object.
+ */
+ cmd_targets = L0;
+ cmd_shell = L0;
+ }
+ else
+ {
+ /* We can reuse targets & shell lists for the next command
+ * if we do not let them die with this cmd object.
+ */
+ cmd_release_targets_and_shell( cmd );
+ cmd_free( cmd );
+ }
+
+ if ( !retry )
+ start += chunk;
+ }
+ while ( start < length );
+ }
+
+ /* These were always copied when used. */
+ list_free( nt );
+ list_free( ns );
+
+ /* Free variables with values bound by 'actions xxx bind vars'. */
+ popsettings( rule->module, boundvars );
+ freesettings( boundvars );
+ }
+
+ swap_settings( &settings_module, &settings_target, 0, 0 );
+ return cmds;
+}
+
+
+/*
+ * make1list() - turn a list of targets into a LIST, for $(<) and $(>)
+ */
+
+static LIST * make1list( LIST * l, TARGETS * targets, int flags )
+{
+ for ( ; targets; targets = targets->next )
+ {
+ TARGET * t = targets->target;
+
+ if ( t->binding == T_BIND_UNBOUND )
+ make1bind( t );
+
+ if ( ( flags & RULE_EXISTING ) && ( flags & RULE_NEWSRCS ) )
+ {
+ if ( ( t->binding != T_BIND_EXISTS ) &&
+ ( t->fate <= T_FATE_STABLE ) )
+ continue;
+ }
+ else if ( flags & RULE_EXISTING )
+ {
+ if ( t->binding != T_BIND_EXISTS )
+ continue;
+ }
+ else if ( flags & RULE_NEWSRCS )
+ {
+ if ( t->fate <= T_FATE_STABLE )
+ continue;
+ }
+
+ /* Prohibit duplicates for RULE_TOGETHER. */
+ if ( flags & RULE_TOGETHER )
+ {
+ LISTITER iter = list_begin( l );
+ LISTITER const end = list_end( l );
+ for ( ; iter != end; iter = list_next( iter ) )
+ if ( object_equal( list_item( iter ), t->boundname ) )
+ break;
+ if ( iter != end )
+ continue;
+ }
+
+ /* Build new list. */
+ l = list_push_back( l, object_copy( t->boundname ) );
+ }
+
+ return l;
+}
+
+
+/*
+ * make1settings() - for vars with bound values, build up replacement lists
+ */
+
+static SETTINGS * make1settings( struct module_t * module, LIST * vars )
+{
+ SETTINGS * settings = 0;
+
+ LISTITER vars_iter = list_begin( vars );
+ LISTITER const vars_end = list_end( vars );
+ for ( ; vars_iter != vars_end; vars_iter = list_next( vars_iter ) )
+ {
+ LIST * const l = var_get( module, list_item( vars_iter ) );
+ LIST * nl = L0;
+ LISTITER iter = list_begin( l );
+ LISTITER const end = list_end( l );
+
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ TARGET * const t = bindtarget( list_item( iter ) );
+
+ /* Make sure the target is bound. */
+ if ( t->binding == T_BIND_UNBOUND )
+ make1bind( t );
+
+ /* Build a new list. */
+ nl = list_push_back( nl, object_copy( t->boundname ) );
+ }
+
+ /* Add to settings chain. */
+ settings = addsettings( settings, VAR_SET, list_item( vars_iter ), nl );
+ }
+
+ return settings;
+}
+
+
+/*
+ * make1bind() - bind targets that were not bound during dependency analysis
+ *
+ * Spot the kludge! If a target is not in the dependency tree, it did not get
+ * bound by make0(), so we have to do it here. Ugly.
+ */
+
+static void make1bind( TARGET * t )
+{
+ if ( t->flags & T_FLAG_NOTFILE )
+ return;
+
+ pushsettings( root_module(), t->settings );
+ object_free( t->boundname );
+ t->boundname = search( t->name, &t->time, 0, t->flags & T_FLAG_ISFILE );
+ t->binding = timestamp_empty( &t->time ) ? T_BIND_MISSING : T_BIND_EXISTS;
+ popsettings( root_module(), t->settings );
+}
diff --git a/src/kenlm/jam-files/engine/md5.c b/src/kenlm/jam-files/engine/md5.c
new file mode 100644
index 0000000..c35d96c
--- /dev/null
+++ b/src/kenlm/jam-files/engine/md5.c
@@ -0,0 +1,381 @@
+/*
+ Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved.
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ L. Peter Deutsch
+ ghost@aladdin.com
+
+ */
+/* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */
+/*
+ Independent implementation of MD5 (RFC 1321).
+
+ This code implements the MD5 Algorithm defined in RFC 1321, whose
+ text is available at
+ http://www.ietf.org/rfc/rfc1321.txt
+ The code is derived from the text of the RFC, including the test suite
+ (section A.5) but excluding the rest of Appendix A. It does not include
+ any code or documentation that is identified in the RFC as being
+ copyrighted.
+
+ The original and principal author of md5.c is L. Peter Deutsch
+ <ghost@aladdin.com>. Other authors are noted in the change history
+ that follows (in reverse chronological order):
+
+ 2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order
+ either statically or dynamically; added missing #include <string.h>
+ in library.
+ 2002-03-11 lpd Corrected argument list for main(), and added int return
+ type, in test program and T value program.
+ 2002-02-21 lpd Added missing #include <stdio.h> in test program.
+ 2000-07-03 lpd Patched to eliminate warnings about "constant is
+ unsigned in ANSI C, signed in traditional"; made test program
+ self-checking.
+ 1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
+ 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5).
+ 1999-05-03 lpd Original version.
+ */
+
+#include "md5.h"
+#include <string.h>
+
+#undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */
+#ifdef ARCH_IS_BIG_ENDIAN
+# define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1)
+#else
+# define BYTE_ORDER 0
+#endif
+
+#define T_MASK ((md5_word_t)~0)
+#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
+#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
+#define T3 0x242070db
+#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
+#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
+#define T6 0x4787c62a
+#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
+#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
+#define T9 0x698098d8
+#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
+#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
+#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
+#define T13 0x6b901122
+#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
+#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
+#define T16 0x49b40821
+#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
+#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
+#define T19 0x265e5a51
+#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
+#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
+#define T22 0x02441453
+#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
+#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
+#define T25 0x21e1cde6
+#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
+#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
+#define T28 0x455a14ed
+#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
+#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
+#define T31 0x676f02d9
+#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
+#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
+#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
+#define T35 0x6d9d6122
+#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
+#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
+#define T38 0x4bdecfa9
+#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
+#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
+#define T41 0x289b7ec6
+#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
+#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
+#define T44 0x04881d05
+#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
+#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
+#define T47 0x1fa27cf8
+#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
+#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
+#define T50 0x432aff97
+#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
+#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
+#define T53 0x655b59c3
+#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
+#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
+#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
+#define T57 0x6fa87e4f
+#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
+#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
+#define T60 0x4e0811a1
+#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
+#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
+#define T63 0x2ad7d2bb
+#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
+
+
+static void
+md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/)
+{
+ md5_word_t
+ a = pms->abcd[0], b = pms->abcd[1],
+ c = pms->abcd[2], d = pms->abcd[3];
+ md5_word_t t;
+#if BYTE_ORDER > 0
+ /* Define storage only for big-endian CPUs. */
+ md5_word_t X[16];
+#else
+ /* Define storage for little-endian or both types of CPUs. */
+ md5_word_t xbuf[16];
+ const md5_word_t *X;
+#endif
+
+ {
+#if BYTE_ORDER == 0
+ /*
+ * Determine dynamically whether this is a big-endian or
+ * little-endian machine, since we can use a more efficient
+ * algorithm on the latter.
+ */
+ static const int w = 1;
+
+ if (*((const md5_byte_t *)&w)) /* dynamic little-endian */
+#endif
+#if BYTE_ORDER <= 0 /* little-endian */
+ {
+ /*
+ * On little-endian machines, we can process properly aligned
+ * data without copying it.
+ */
+ if (!((data - (const md5_byte_t *)0) & 3)) {
+ /* data are properly aligned */
+ X = (const md5_word_t *)data;
+ } else {
+ /* not aligned */
+ memcpy(xbuf, data, 64);
+ X = xbuf;
+ }
+ }
+#endif
+#if BYTE_ORDER == 0
+ else /* dynamic big-endian */
+#endif
+#if BYTE_ORDER >= 0 /* big-endian */
+ {
+ /*
+ * On big-endian machines, we must arrange the bytes in the
+ * right order.
+ */
+ const md5_byte_t *xp = data;
+ int i;
+
+# if BYTE_ORDER == 0
+ X = xbuf; /* (dynamic only) */
+# else
+# define xbuf X /* (static only) */
+# endif
+ for (i = 0; i < 16; ++i, xp += 4)
+ xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
+ }
+#endif
+ }
+
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+ /* Round 1. */
+ /* Let [abcd k s i] denote the operation
+ a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
+#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + F(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 0, 7, T1);
+ SET(d, a, b, c, 1, 12, T2);
+ SET(c, d, a, b, 2, 17, T3);
+ SET(b, c, d, a, 3, 22, T4);
+ SET(a, b, c, d, 4, 7, T5);
+ SET(d, a, b, c, 5, 12, T6);
+ SET(c, d, a, b, 6, 17, T7);
+ SET(b, c, d, a, 7, 22, T8);
+ SET(a, b, c, d, 8, 7, T9);
+ SET(d, a, b, c, 9, 12, T10);
+ SET(c, d, a, b, 10, 17, T11);
+ SET(b, c, d, a, 11, 22, T12);
+ SET(a, b, c, d, 12, 7, T13);
+ SET(d, a, b, c, 13, 12, T14);
+ SET(c, d, a, b, 14, 17, T15);
+ SET(b, c, d, a, 15, 22, T16);
+#undef SET
+
+ /* Round 2. */
+ /* Let [abcd k s i] denote the operation
+ a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
+#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + G(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 1, 5, T17);
+ SET(d, a, b, c, 6, 9, T18);
+ SET(c, d, a, b, 11, 14, T19);
+ SET(b, c, d, a, 0, 20, T20);
+ SET(a, b, c, d, 5, 5, T21);
+ SET(d, a, b, c, 10, 9, T22);
+ SET(c, d, a, b, 15, 14, T23);
+ SET(b, c, d, a, 4, 20, T24);
+ SET(a, b, c, d, 9, 5, T25);
+ SET(d, a, b, c, 14, 9, T26);
+ SET(c, d, a, b, 3, 14, T27);
+ SET(b, c, d, a, 8, 20, T28);
+ SET(a, b, c, d, 13, 5, T29);
+ SET(d, a, b, c, 2, 9, T30);
+ SET(c, d, a, b, 7, 14, T31);
+ SET(b, c, d, a, 12, 20, T32);
+#undef SET
+
+ /* Round 3. */
+ /* Let [abcd k s t] denote the operation
+ a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + H(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 5, 4, T33);
+ SET(d, a, b, c, 8, 11, T34);
+ SET(c, d, a, b, 11, 16, T35);
+ SET(b, c, d, a, 14, 23, T36);
+ SET(a, b, c, d, 1, 4, T37);
+ SET(d, a, b, c, 4, 11, T38);
+ SET(c, d, a, b, 7, 16, T39);
+ SET(b, c, d, a, 10, 23, T40);
+ SET(a, b, c, d, 13, 4, T41);
+ SET(d, a, b, c, 0, 11, T42);
+ SET(c, d, a, b, 3, 16, T43);
+ SET(b, c, d, a, 6, 23, T44);
+ SET(a, b, c, d, 9, 4, T45);
+ SET(d, a, b, c, 12, 11, T46);
+ SET(c, d, a, b, 15, 16, T47);
+ SET(b, c, d, a, 2, 23, T48);
+#undef SET
+
+ /* Round 4. */
+ /* Let [abcd k s t] denote the operation
+ a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+ t = a + I(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
+ /* Do the following 16 operations. */
+ SET(a, b, c, d, 0, 6, T49);
+ SET(d, a, b, c, 7, 10, T50);
+ SET(c, d, a, b, 14, 15, T51);
+ SET(b, c, d, a, 5, 21, T52);
+ SET(a, b, c, d, 12, 6, T53);
+ SET(d, a, b, c, 3, 10, T54);
+ SET(c, d, a, b, 10, 15, T55);
+ SET(b, c, d, a, 1, 21, T56);
+ SET(a, b, c, d, 8, 6, T57);
+ SET(d, a, b, c, 15, 10, T58);
+ SET(c, d, a, b, 6, 15, T59);
+ SET(b, c, d, a, 13, 21, T60);
+ SET(a, b, c, d, 4, 6, T61);
+ SET(d, a, b, c, 11, 10, T62);
+ SET(c, d, a, b, 2, 15, T63);
+ SET(b, c, d, a, 9, 21, T64);
+#undef SET
+
+ /* Then perform the following additions. (That is increment each
+ of the four registers by the value it had before this block
+ was started.) */
+ pms->abcd[0] += a;
+ pms->abcd[1] += b;
+ pms->abcd[2] += c;
+ pms->abcd[3] += d;
+}
+
+void
+md5_init(md5_state_t *pms)
+{
+ pms->count[0] = pms->count[1] = 0;
+ pms->abcd[0] = 0x67452301;
+ pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
+ pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
+ pms->abcd[3] = 0x10325476;
+}
+
+void
+md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
+{
+ const md5_byte_t *p = data;
+ int left = nbytes;
+ int offset = (pms->count[0] >> 3) & 63;
+ md5_word_t nbits = (md5_word_t)(nbytes << 3);
+
+ if (nbytes <= 0)
+ return;
+
+ /* Update the message length. */
+ pms->count[1] += nbytes >> 29;
+ pms->count[0] += nbits;
+ if (pms->count[0] < nbits)
+ pms->count[1]++;
+
+ /* Process an initial partial block. */
+ if (offset) {
+ int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
+
+ memcpy(pms->buf + offset, p, copy);
+ if (offset + copy < 64)
+ return;
+ p += copy;
+ left -= copy;
+ md5_process(pms, pms->buf);
+ }
+
+ /* Process full blocks. */
+ for (; left >= 64; p += 64, left -= 64)
+ md5_process(pms, p);
+
+ /* Process a final partial block. */
+ if (left)
+ memcpy(pms->buf, p, left);
+}
+
+void
+md5_finish(md5_state_t *pms, md5_byte_t digest[16])
+{
+ static const md5_byte_t pad[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ md5_byte_t data[8];
+ int i;
+
+ /* Save the length before padding. */
+ for (i = 0; i < 8; ++i)
+ data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3));
+ /* Pad to 56 bytes mod 64. */
+ md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1);
+ /* Append the length. */
+ md5_append(pms, data, 8);
+ for (i = 0; i < 16; ++i)
+ digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3));
+}
diff --git a/src/kenlm/jam-files/engine/md5.h b/src/kenlm/jam-files/engine/md5.h
new file mode 100644
index 0000000..698c995
--- /dev/null
+++ b/src/kenlm/jam-files/engine/md5.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved.
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ L. Peter Deutsch
+ ghost@aladdin.com
+
+ */
+/* $Id: md5.h,v 1.4 2002/04/13 19:20:28 lpd Exp $ */
+/*
+ Independent implementation of MD5 (RFC 1321).
+
+ This code implements the MD5 Algorithm defined in RFC 1321, whose
+ text is available at
+ http://www.ietf.org/rfc/rfc1321.txt
+ The code is derived from the text of the RFC, including the test suite
+ (section A.5) but excluding the rest of Appendix A. It does not include
+ any code or documentation that is identified in the RFC as being
+ copyrighted.
+
+ The original and principal author of md5.h is L. Peter Deutsch
+ <ghost@aladdin.com>. Other authors are noted in the change history
+ that follows (in reverse chronological order):
+
+ 2002-04-13 lpd Removed support for non-ANSI compilers; removed
+ references to Ghostscript; clarified derivation from RFC 1321;
+ now handles byte order either statically or dynamically.
+ 1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
+ 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5);
+ added conditionalization for C++ compilation from Martin
+ Purschke <purschke@bnl.gov>.
+ 1999-05-03 lpd Original version.
+ */
+
+#ifndef md5_INCLUDED
+# define md5_INCLUDED
+
+/*
+ * This package supports both compile-time and run-time determination of CPU
+ * byte order. If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be
+ * compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is
+ * defined as non-zero, the code will be compiled to run only on big-endian
+ * CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to
+ * run on either big- or little-endian CPUs, but will run slightly less
+ * efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined.
+ */
+
+typedef unsigned char md5_byte_t; /* 8-bit byte */
+typedef unsigned int md5_word_t; /* 32-bit word */
+
+/* Define the state of the MD5 Algorithm. */
+typedef struct md5_state_s {
+ md5_word_t count[2]; /* message length in bits, lsw first */
+ md5_word_t abcd[4]; /* digest buffer */
+ md5_byte_t buf[64]; /* accumulate block */
+} md5_state_t;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Initialize the algorithm. */
+void md5_init(md5_state_t *pms);
+
+/* Append a string to the message. */
+void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes);
+
+/* Finish the message and return the digest. */
+void md5_finish(md5_state_t *pms, md5_byte_t digest[16]);
+
+#ifdef __cplusplus
+} /* end extern "C" */
+#endif
+
+#endif /* md5_INCLUDED */
diff --git a/src/kenlm/jam-files/engine/mem.c b/src/kenlm/jam-files/engine/mem.c
new file mode 100644
index 0000000..6a11fb3
--- /dev/null
+++ b/src/kenlm/jam-files/engine/mem.c
@@ -0,0 +1,75 @@
+/*
+Copyright Rene Rivera 2006.
+Distributed under the Boost Software License, Version 1.0.
+(See accompanying file LICENSE_1_0.txt or copy at
+http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#include "jam.h"
+
+#ifdef OPT_BOEHM_GC
+
+ /* Compile the Boehm GC as one big chunk of code. It's much easier
+ this way, than trying to make radical changes to the bjam build
+ scripts. */
+
+ #define ATOMIC_UNCOLLECTABLE
+ #define NO_EXECUTE_PERMISSION
+ #define ALL_INTERIOR_POINTERS
+
+ #define LARGE_CONFIG
+ /*
+ #define NO_SIGNALS
+ #define SILENT
+ */
+ #ifndef GC_DEBUG
+ #define NO_DEBUGGING
+ #endif
+
+ #ifdef __GLIBC__
+ #define __USE_GNU
+ #endif
+
+ #include "boehm_gc/reclaim.c"
+ #include "boehm_gc/allchblk.c"
+ #include "boehm_gc/misc.c"
+ #include "boehm_gc/alloc.c"
+ #include "boehm_gc/mach_dep.c"
+ #include "boehm_gc/os_dep.c"
+ #include "boehm_gc/mark_rts.c"
+ #include "boehm_gc/headers.c"
+ #include "boehm_gc/mark.c"
+ #include "boehm_gc/obj_map.c"
+ #include "boehm_gc/pcr_interface.c"
+ #include "boehm_gc/blacklst.c"
+ #include "boehm_gc/new_hblk.c"
+ #include "boehm_gc/real_malloc.c"
+ #include "boehm_gc/dyn_load.c"
+ #include "boehm_gc/dbg_mlc.c"
+ #include "boehm_gc/malloc.c"
+ #include "boehm_gc/stubborn.c"
+ #include "boehm_gc/checksums.c"
+ #include "boehm_gc/pthread_support.c"
+ #include "boehm_gc/pthread_stop_world.c"
+ #include "boehm_gc/darwin_stop_world.c"
+ #include "boehm_gc/typd_mlc.c"
+ #include "boehm_gc/ptr_chck.c"
+ #include "boehm_gc/mallocx.c"
+ #include "boehm_gc/gcj_mlc.c"
+ #include "boehm_gc/specific.c"
+ #include "boehm_gc/gc_dlopen.c"
+ #include "boehm_gc/backgraph.c"
+ #include "boehm_gc/win32_threads.c"
+
+ /* Needs to be last. */
+ #include "boehm_gc/finalize.c"
+
+#elif defined(OPT_DUMA)
+
+ #ifdef OS_NT
+ #define WIN32
+ #endif
+ #include "duma/duma.c"
+ #include "duma/print.c"
+
+#endif
diff --git a/src/kenlm/jam-files/engine/mem.h b/src/kenlm/jam-files/engine/mem.h
new file mode 100644
index 0000000..8718b07
--- /dev/null
+++ b/src/kenlm/jam-files/engine/mem.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2006. Rene Rivera
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef BJAM_MEM_H
+#define BJAM_MEM_H
+
+#ifdef OPT_BOEHM_GC
+
+ /* Use Boehm GC memory allocator. */
+ #include <gc.h>
+
+ #define bjam_malloc_x(s) memset(GC_malloc(s),0,s)
+ #define bjam_malloc_atomic_x(s) memset(GC_malloc_atomic(s),0,s)
+ #define bjam_calloc_x(n,s) memset(GC_malloc((n)*(s)),0,(n)*(s))
+ #define bjam_calloc_atomic_x(n,s) memset(GC_malloc_atomic((n)*(s)),0,(n)*(s))
+ #define bjam_realloc_x(p,s) GC_realloc(p,s)
+ #define bjam_free_x(p) GC_free(p)
+ #define bjam_mem_init_x() GC_init(); GC_enable_incremental()
+
+ #define bjam_malloc_raw_x(s) malloc(s)
+ #define bjam_calloc_raw_x(n,s) calloc(n,s)
+ #define bjam_realloc_raw_x(p,s) realloc(p,s)
+ #define bjam_free_raw_x(p) free(p)
+
+ #ifndef BJAM_NEWSTR_NO_ALLOCATE
+ # define BJAM_NEWSTR_NO_ALLOCATE
+ #endif
+
+#elif defined( OPT_DUMA )
+
+ /* Use Duma memory debugging library. */
+ #include <stdlib.h>
+
+ #define _DUMA_CONFIG_H_
+ #define DUMA_NO_GLOBAL_MALLOC_FREE
+ #define DUMA_EXPLICIT_INIT
+ #define DUMA_NO_THREAD_SAFETY
+ #define DUMA_NO_CPP_SUPPORT
+ /* #define DUMA_NO_LEAKDETECTION */
+ /* #define DUMA_USE_FRAMENO */
+ /* #define DUMA_PREFER_ATEXIT */
+ /* #define DUMA_OLD_DEL_MACRO */
+ /* #define DUMA_NO_HANG_MSG */
+ #define DUMA_PAGE_SIZE 4096
+ #define DUMA_MIN_ALIGNMENT 1
+ /* #define DUMA_GNU_INIT_ATTR 0 */
+ typedef unsigned int DUMA_ADDR;
+ typedef unsigned int DUMA_SIZE;
+ #include <duma.h>
+
+ #define bjam_malloc_x(s) malloc(s)
+ #define bjam_calloc_x(n,s) calloc(n,s)
+ #define bjam_realloc_x(p,s) realloc(p,s)
+ #define bjam_free_x(p) free(p)
+
+ #ifndef BJAM_NEWSTR_NO_ALLOCATE
+ # define BJAM_NEWSTR_NO_ALLOCATE
+ #endif
+
+#else
+
+ /* Standard C memory allocation. */
+ #include <stdlib.h>
+
+ #define bjam_malloc_x(s) malloc(s)
+ #define bjam_calloc_x(n,s) calloc(n,s)
+ #define bjam_realloc_x(p,s) realloc(p,s)
+ #define bjam_free_x(p) free(p)
+
+#endif
+
+#ifndef bjam_malloc_atomic_x
+ #define bjam_malloc_atomic_x(s) bjam_malloc_x(s)
+#endif
+#ifndef bjam_calloc_atomic_x
+ #define bjam_calloc_atomic_x(n,s) bjam_calloc_x(n,s)
+#endif
+#ifndef bjam_mem_init_x
+ #define bjam_mem_init_x()
+#endif
+#ifndef bjam_mem_close_x
+ #define bjam_mem_close_x()
+#endif
+#ifndef bjam_malloc_raw_x
+ #define bjam_malloc_raw_x(s) bjam_malloc_x(s)
+#endif
+#ifndef bjam_calloc_raw_x
+ #define bjam_calloc_raw_x(n,s) bjam_calloc_x(n,s)
+#endif
+#ifndef bjam_realloc_raw_x
+ #define bjam_realloc_raw_x(p,s) bjam_realloc_x(p,s)
+#endif
+#ifndef bjam_free_raw_x
+ #define bjam_free_raw_x(p) bjam_free_x(p)
+#endif
+
+#ifdef OPT_DEBUG_PROFILE
+ /* Profile tracing of memory allocations. */
+ #include "debug.h"
+
+ #define BJAM_MALLOC(s) (profile_memory(s), bjam_malloc_x(s))
+ #define BJAM_MALLOC_ATOMIC(s) (profile_memory(s), bjam_malloc_atomic_x(s))
+ #define BJAM_CALLOC(n,s) (profile_memory(n*s), bjam_calloc_x(n,s))
+ #define BJAM_CALLOC_ATOMIC(n,s) (profile_memory(n*s), bjam_calloc_atomic_x(n,s))
+ #define BJAM_REALLOC(p,s) (profile_memory(s), bjam_realloc_x(p,s))
+
+ #define BJAM_MALLOC_RAW(s) (profile_memory(s), bjam_malloc_raw_x(s))
+ #define BJAM_CALLOC_RAW(n,s) (profile_memory(n*s), bjam_calloc_raw_x(n,s))
+ #define BJAM_REALLOC_RAW(p,s) (profile_memory(s), bjam_realloc_raw_x(p,s))
+#else
+ /* No mem tracing. */
+ #define BJAM_MALLOC(s) bjam_malloc_x(s)
+ #define BJAM_MALLOC_ATOMIC(s) bjam_malloc_atomic_x(s)
+ #define BJAM_CALLOC(n,s) bjam_calloc_x(n,s)
+ #define BJAM_CALLOC_ATOMIC(n,s) bjam_calloc_atomic_x(n,s)
+ #define BJAM_REALLOC(p,s) bjam_realloc_x(p,s)
+
+ #define BJAM_MALLOC_RAW(s) bjam_malloc_raw_x(s)
+ #define BJAM_CALLOC_RAW(n,s) bjam_calloc_raw_x(n,s)
+ #define BJAM_REALLOC_RAW(p,s) bjam_realloc_raw_x(p,s)
+#endif
+
+#define BJAM_MEM_INIT() bjam_mem_init_x()
+#define BJAM_MEM_CLOSE() bjam_mem_close_x()
+
+#define BJAM_FREE(p) bjam_free_x(p)
+#define BJAM_FREE_RAW(p) bjam_free_raw_x(p)
+
+#endif
diff --git a/src/kenlm/jam-files/engine/mkjambase.c b/src/kenlm/jam-files/engine/mkjambase.c
new file mode 100644
index 0000000..cdf5998
--- /dev/null
+++ b/src/kenlm/jam-files/engine/mkjambase.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * mkjambase.c - turn Jambase into a big C structure
+ *
+ * Usage: mkjambase jambase.c Jambase ...
+ *
+ * Results look like this:
+ *
+ * char *jambase[] = {
+ * "...\n",
+ * ...
+ * 0 };
+ *
+ * Handles \'s and "'s specially; knows to delete blank and comment lines.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+
+int main( int argc, char * * argv, char * * envp )
+{
+ char buf[ 1024 ];
+ FILE * fin;
+ FILE * fout;
+ char * p;
+ int doDotC = 0;
+
+ if ( argc < 3 )
+ {
+ fprintf( stderr, "usage: %s jambase.c Jambase ...\n", argv[ 0 ] );
+ return -1;
+ }
+
+ if ( !( fout = fopen( argv[1], "w" ) ) )
+ {
+ perror( argv[ 1 ] );
+ return -1;
+ }
+
+ /* If the file ends in .c generate a C source file. */
+ if ( ( p = strrchr( argv[1], '.' ) ) && !strcmp( p, ".c" ) )
+ doDotC++;
+
+ /* Now process the files. */
+
+ argc -= 2;
+ argv += 2;
+
+ if ( doDotC )
+ {
+ fprintf( fout, "/* Generated by mkjambase from Jambase */\n" );
+ fprintf( fout, "char *jambase[] = {\n" );
+ }
+
+ for ( ; argc--; ++argv )
+ {
+ if ( !( fin = fopen( *argv, "r" ) ) )
+ {
+ perror( *argv );
+ return -1;
+ }
+
+ if ( doDotC )
+ fprintf( fout, "/* %s */\n", *argv );
+ else
+ fprintf( fout, "### %s ###\n", *argv );
+
+ while ( fgets( buf, sizeof( buf ), fin ) )
+ {
+ if ( doDotC )
+ {
+ char * p = buf;
+
+ /* Strip leading whitespace. */
+ while ( ( *p == ' ' ) || ( *p == '\t' ) || ( *p == '\n' ) )
+ ++p;
+
+ /* Drop comments and empty lines. */
+ if ( ( *p == '#' ) || !*p )
+ continue;
+
+ /* Copy. */
+ putc( '"', fout );
+ for ( ; *p && ( *p != '\n' ); ++p )
+ switch ( *p )
+ {
+ case '\\': putc( '\\', fout ); putc( '\\', fout ); break;
+ case '"' : putc( '\\', fout ); putc( '"' , fout ); break;
+ case '\r': break;
+ default: putc( *p, fout ); break;
+ }
+
+ fprintf( fout, "\\n\",\n" );
+ }
+ else
+ {
+ fprintf( fout, "%s", buf );
+ }
+ }
+
+ fclose( fin );
+ }
+
+ if ( doDotC )
+ fprintf( fout, "0 };\n" );
+
+ fclose( fout );
+
+ return 0;
+}
diff --git a/src/kenlm/jam-files/engine/modules.c b/src/kenlm/jam-files/engine/modules.c
new file mode 100644
index 0000000..6be82fe
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "jam.h"
+#include "modules.h"
+
+#include "hash.h"
+#include "lists.h"
+#include "native.h"
+#include "object.h"
+#include "parse.h"
+#include "rules.h"
+#include "strings.h"
+#include "variable.h"
+
+#include <assert.h>
+#include <string.h>
+
+static struct hash * module_hash = 0;
+static module_t root;
+
+
+module_t * bindmodule( OBJECT * name )
+{
+ if ( !name )
+ return &root;
+
+ {
+ PROFILE_ENTER( BINDMODULE );
+
+ module_t * m;
+ int found;
+
+ if ( !module_hash )
+ module_hash = hashinit( sizeof( module_t ), "modules" );
+
+ m = (module_t *)hash_insert( module_hash, name, &found );
+ if ( !found )
+ {
+ m->name = object_copy( name );
+ m->variables = 0;
+ m->variable_indices = 0;
+ m->num_fixed_variables = 0;
+ m->fixed_variables = 0;
+ m->rules = 0;
+ m->imported_modules = 0;
+ m->class_module = 0;
+ m->native_rules = 0;
+ m->user_module = 0;
+ }
+
+ PROFILE_EXIT( BINDMODULE );
+
+ return m;
+ }
+}
+
+
+/*
+ * demand_rules() - Get the module's "rules" hash on demand.
+ */
+struct hash * demand_rules( module_t * m )
+{
+ if ( !m->rules )
+ m->rules = hashinit( sizeof( RULE ), "rules" );
+ return m->rules;
+}
+
+
+/*
+ * delete_module() - wipe out the module's rules and variables.
+ */
+
+static void delete_rule_( void * xrule, void * data )
+{
+ rule_free( (RULE *)xrule );
+}
+
+
+static void delete_native_rule( void * xrule, void * data )
+{
+ native_rule_t * rule = (native_rule_t *)xrule;
+ object_free( rule->name );
+ if ( rule->procedure )
+ function_free( rule->procedure );
+}
+
+
+static void delete_imported_modules( void * xmodule_name, void * data )
+{
+ object_free( *(OBJECT * *)xmodule_name );
+}
+
+
+static void free_fixed_variable( void * xvar, void * data );
+
+void delete_module( module_t * m )
+{
+ /* Clear out all the rules. */
+ if ( m->rules )
+ {
+ hashenumerate( m->rules, delete_rule_, (void *)0 );
+ hash_free( m->rules );
+ m->rules = 0;
+ }
+
+ if ( m->native_rules )
+ {
+ hashenumerate( m->native_rules, delete_native_rule, (void *)0 );
+ hash_free( m->native_rules );
+ m->native_rules = 0;
+ }
+
+ if ( m->variables )
+ {
+ var_done( m );
+ m->variables = 0;
+ }
+
+ if ( m->fixed_variables )
+ {
+ int i;
+ for ( i = 0; i < m->num_fixed_variables; ++i )
+ {
+ list_free( m->fixed_variables[ i ] );
+ }
+ BJAM_FREE( m->fixed_variables );
+ m->fixed_variables = 0;
+ }
+
+ if ( m->variable_indices )
+ {
+ hashenumerate( m->variable_indices, &free_fixed_variable, (void *)0 );
+ hash_free( m->variable_indices );
+ m->variable_indices = 0;
+ }
+
+ if ( m->imported_modules )
+ {
+ hashenumerate( m->imported_modules, delete_imported_modules, (void *)0 );
+ hash_free( m->imported_modules );
+ m->imported_modules = 0;
+ }
+}
+
+
+struct module_stats
+{
+ OBJECT * module_name;
+ struct hashstats rules_stats[ 1 ];
+ struct hashstats variables_stats[ 1 ];
+ struct hashstats variable_indices_stats[ 1 ];
+ struct hashstats imported_modules_stats[ 1 ];
+};
+
+
+static void module_stat( struct hash * hp, OBJECT * module, const char * name )
+{
+ if ( hp )
+ {
+ struct hashstats stats[ 1 ];
+ string id[ 1 ];
+ hashstats_init( stats );
+ string_new( id );
+ string_append( id, object_str( module ) );
+ string_push_back( id, ' ' );
+ string_append( id, name );
+
+ hashstats_add( stats, hp );
+ hashstats_print( stats, id->value );
+
+ string_free( id );
+ }
+}
+
+
+static void class_module_stat( struct hashstats * stats, OBJECT * module, const char * name )
+{
+ if ( stats->item_size )
+ {
+ string id[ 1 ];
+ string_new( id );
+ string_append( id, object_str( module ) );
+ string_append( id, " object " );
+ string_append( id, name );
+
+ hashstats_print( stats, id->value );
+
+ string_free( id );
+ }
+}
+
+
+static void stat_module( void * xmodule, void * data )
+{
+ module_t *m = (module_t *)xmodule;
+
+ if ( DEBUG_MEM || DEBUG_PROFILE )
+ {
+ struct hash * class_info = (struct hash *)data;
+ if ( m->class_module )
+ {
+ int found;
+ struct module_stats * ms = (struct module_stats *)hash_insert( class_info, m->class_module->name, &found );
+ if ( !found )
+ {
+ ms->module_name = m->class_module->name;
+ hashstats_init( ms->rules_stats );
+ hashstats_init( ms->variables_stats );
+ hashstats_init( ms->variable_indices_stats );
+ hashstats_init( ms->imported_modules_stats );
+ }
+
+ hashstats_add( ms->rules_stats, m->rules );
+ hashstats_add( ms->variables_stats, m->variables );
+ hashstats_add( ms->variable_indices_stats, m->variable_indices );
+ hashstats_add( ms->imported_modules_stats, m->imported_modules );
+ }
+ else
+ {
+ module_stat( m->rules, m->name, "rules" );
+ module_stat( m->variables, m->name, "variables" );
+ module_stat( m->variable_indices, m->name, "fixed variables" );
+ module_stat( m->imported_modules, m->name, "imported modules" );
+ }
+ }
+
+ delete_module( m );
+ object_free( m->name );
+}
+
+static void print_class_stats( void * xstats, void * data )
+{
+ struct module_stats * stats = (struct module_stats *)xstats;
+ class_module_stat( stats->rules_stats, stats->module_name, "rules" );
+ class_module_stat( stats->variables_stats, stats->module_name, "variables" );
+ class_module_stat( stats->variable_indices_stats, stats->module_name, "fixed variables" );
+ class_module_stat( stats->imported_modules_stats, stats->module_name, "imported modules" );
+}
+
+
+static void delete_module_( void * xmodule, void * data )
+{
+ module_t *m = (module_t *)xmodule;
+
+ delete_module( m );
+ object_free( m->name );
+}
+
+
+void modules_done()
+{
+ if ( DEBUG_MEM || DEBUG_PROFILE )
+ {
+ struct hash * class_hash = hashinit( sizeof( struct module_stats ), "object info" );
+ hashenumerate( module_hash, stat_module, (void *)class_hash );
+ hashenumerate( class_hash, print_class_stats, (void *)0 );
+ hash_free( class_hash );
+ }
+ hashenumerate( module_hash, delete_module_, (void *)0 );
+ hashdone( module_hash );
+ module_hash = 0;
+ delete_module( &root );
+}
+
+module_t * root_module()
+{
+ return &root;
+}
+
+
+void import_module( LIST * module_names, module_t * target_module )
+{
+ PROFILE_ENTER( IMPORT_MODULE );
+
+ struct hash * h;
+ LISTITER iter;
+ LISTITER end;
+
+ if ( !target_module->imported_modules )
+ target_module->imported_modules = hashinit( sizeof( char * ), "imported"
+ );
+ h = target_module->imported_modules;
+
+ iter = list_begin( module_names );
+ end = list_end( module_names );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ int found;
+ OBJECT * const s = list_item( iter );
+ OBJECT * * const ss = (OBJECT * *)hash_insert( h, s, &found );
+ if ( !found )
+ *ss = object_copy( s );
+ }
+
+ PROFILE_EXIT( IMPORT_MODULE );
+}
+
+
+static void add_module_name( void * r_, void * result_ )
+{
+ OBJECT * * const r = (OBJECT * *)r_;
+ LIST * * const result = (LIST * *)result_;
+ *result = list_push_back( *result, object_copy( *r ) );
+}
+
+
+LIST * imported_modules( module_t * module )
+{
+ LIST * result = L0;
+ if ( module->imported_modules )
+ hashenumerate( module->imported_modules, add_module_name, &result );
+ return result;
+}
+
+
+FUNCTION * function_bind_variables( FUNCTION *, module_t *, int * counter );
+FUNCTION * function_unbind_variables( FUNCTION * );
+
+struct fixed_variable
+{
+ OBJECT * key;
+ int n;
+};
+
+struct bind_vars_t
+{
+ module_t * module;
+ int counter;
+};
+
+
+static void free_fixed_variable( void * xvar, void * data )
+{
+ object_free( ( (struct fixed_variable *)xvar )->key );
+}
+
+
+static void bind_variables_for_rule( void * xrule, void * xdata )
+{
+ RULE * rule = (RULE *)xrule;
+ struct bind_vars_t * data = (struct bind_vars_t *)xdata;
+ if ( rule->procedure && rule->module == data->module )
+ rule->procedure = function_bind_variables( rule->procedure,
+ data->module, &data->counter );
+}
+
+
+void module_bind_variables( struct module_t * m )
+{
+ if ( m != root_module() && m->rules )
+ {
+ struct bind_vars_t data;
+ data.module = m;
+ data.counter = m->num_fixed_variables;
+ hashenumerate( m->rules, &bind_variables_for_rule, &data );
+ module_set_fixed_variables( m, data.counter );
+ }
+}
+
+
+int module_add_fixed_var( struct module_t * m, OBJECT * name, int * counter )
+{
+ struct fixed_variable * v;
+ int found;
+
+ assert( !m->class_module );
+
+ if ( !m->variable_indices )
+ m->variable_indices = hashinit( sizeof( struct fixed_variable ), "variable index table" );
+
+ v = (struct fixed_variable *)hash_insert( m->variable_indices, name, &found );
+ if ( !found )
+ {
+ v->key = object_copy( name );
+ v->n = (*counter)++;
+ }
+
+ return v->n;
+}
+
+
+LIST * var_get_and_clear_raw( module_t * m, OBJECT * name );
+
+static void load_fixed_variable( void * xvar, void * data )
+{
+ struct fixed_variable * var = (struct fixed_variable *)xvar;
+ struct module_t * m = (struct module_t *)data;
+ if ( var->n >= m->num_fixed_variables )
+ m->fixed_variables[ var->n ] = var_get_and_clear_raw( m, var->key );
+}
+
+
+void module_set_fixed_variables( struct module_t * m, int n_variables )
+{
+ /* Reallocate */
+ struct hash * variable_indices;
+ LIST * * fixed_variables = BJAM_MALLOC( n_variables * sizeof( LIST * ) );
+ if ( m->fixed_variables )
+ {
+ memcpy( fixed_variables, m->fixed_variables, m->num_fixed_variables * sizeof( LIST * ) );
+ BJAM_FREE( m->fixed_variables );
+ }
+ m->fixed_variables = fixed_variables;
+ variable_indices = m->class_module
+ ? m->class_module->variable_indices
+ : m->variable_indices;
+ if ( variable_indices )
+ hashenumerate( variable_indices, &load_fixed_variable, m );
+ m->num_fixed_variables = n_variables;
+}
+
+
+int module_get_fixed_var( struct module_t * m_, OBJECT * name )
+{
+ struct fixed_variable * v;
+ struct module_t * m = m_;
+
+ if ( m->class_module )
+ m = m->class_module;
+
+ if ( !m->variable_indices )
+ return -1;
+
+ v = (struct fixed_variable *)hash_find( m->variable_indices, name );
+ return v && v->n < m_->num_fixed_variables ? v->n : -1;
+}
diff --git a/src/kenlm/jam-files/engine/modules.h b/src/kenlm/jam-files/engine/modules.h
new file mode 100644
index 0000000..1b161c6
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+#ifndef MODULES_DWA10182001_H
+#define MODULES_DWA10182001_H
+
+#include "lists.h"
+#include "object.h"
+
+typedef struct module_t module_t ;
+struct module_t
+{
+ OBJECT * name;
+ struct hash * rules;
+ struct hash * variables;
+ struct hash * variable_indices;
+ int num_fixed_variables;
+ LIST * * fixed_variables;
+ struct hash * imported_modules;
+ module_t * class_module;
+ struct hash * native_rules;
+ int user_module;
+};
+
+module_t * bindmodule( OBJECT * name );
+module_t * root_module();
+void delete_module( module_t * );
+
+void import_module( LIST * module_names, module_t * target_module );
+LIST * imported_modules( module_t * );
+
+struct hash * demand_rules( module_t * );
+
+void module_bind_variables( module_t * );
+
+/*
+ * After calling module_add_fixed_var, module_set_fixed_variables must be called
+ * before accessing any variables in the module.
+ */
+int module_add_fixed_var( module_t *, OBJECT * name, int * n );
+void module_set_fixed_variables( module_t *, int n );
+
+/*
+ * Returns the index of the variable or -1 if none exists.
+ */
+int module_get_fixed_var( module_t *, OBJECT * name );
+
+void modules_done();
+
+#endif
diff --git a/src/kenlm/jam-files/engine/modules/order.c b/src/kenlm/jam-files/engine/modules/order.c
new file mode 100644
index 0000000..3a83d38
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/order.c
@@ -0,0 +1,160 @@
+/* Copyright 2004. Vladimir Prus
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "../lists.h"
+#include "../mem.h"
+#include "../native.h"
+#include "../object.h"
+#include "../strings.h"
+#include "../variable.h"
+
+
+/* Use quite klugy approach: when we add order dependency from 'a' to 'b', just
+ * append 'b' to of value of variable 'a'.
+ */
+LIST * add_pair( FRAME * frame, int flags )
+{
+ LIST * arg = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( arg );
+ LISTITER const end = list_end( arg );
+ var_set( frame->module, list_item( iter ), list_copy_range( arg, list_next(
+ iter ), end ), VAR_APPEND );
+ return L0;
+}
+
+
+/* Given a list and a value, returns position of that value in the list, or -1
+ * if not found.
+ */
+int list_index( LIST * list, OBJECT * value )
+{
+ int result = 0;
+ LISTITER iter = list_begin( list );
+ LISTITER const end = list_end( list );
+ for ( ; iter != end; iter = list_next( iter ), ++result )
+ if ( object_equal( list_item( iter ), value ) )
+ return result;
+ return -1;
+}
+
+enum colors { white, gray, black };
+
+
+/* Main routine for topological sort. Calls itself recursively on all adjacent
+ * vertices which were not yet visited. After that, 'current_vertex' is added to
+ * '*result_ptr'.
+ */
+void do_ts( int * * graph, int current_vertex, int * colors, int * * result_ptr
+ )
+{
+ int i;
+
+ colors[ current_vertex ] = gray;
+ for ( i = 0; graph[ current_vertex ][ i ] != -1; ++i )
+ {
+ int adjacent_vertex = graph[ current_vertex ][ i ];
+ if ( colors[ adjacent_vertex ] == white )
+ do_ts( graph, adjacent_vertex, colors, result_ptr );
+ /* The vertex is either black, in which case we do not have to do
+ * anything, or gray, in which case we have a loop. If we have a loop,
+ * it is not clear what useful diagnostic we can emit, so we emit
+ * nothing.
+ */
+ }
+ colors[ current_vertex ] = black;
+ **result_ptr = current_vertex;
+ ( *result_ptr )++;
+}
+
+
+void topological_sort( int * * graph, int num_vertices, int * result )
+{
+ int i;
+ int * colors = ( int * )BJAM_CALLOC( num_vertices, sizeof( int ) );
+ for ( i = 0; i < num_vertices; ++i )
+ colors[ i ] = white;
+
+ for ( i = 0; i < num_vertices; ++i )
+ if ( colors[ i ] == white )
+ do_ts( graph, i, colors, &result );
+
+ BJAM_FREE( colors );
+}
+
+
+LIST * order( FRAME * frame, int flags )
+{
+ LIST * arg = lol_get( frame->args, 0 );
+ LIST * result = L0;
+ int src;
+ LISTITER iter = list_begin( arg );
+ LISTITER const end = list_end( arg );
+
+ /* We need to create a graph of order dependencies between the passed
+ * objects. We assume there are no duplicates passed to 'add_pair'.
+ */
+ int length = list_length( arg );
+ int * * graph = ( int * * )BJAM_CALLOC( length, sizeof( int * ) );
+ int * order = ( int * )BJAM_MALLOC( ( length + 1 ) * sizeof( int ) );
+
+ for ( src = 0; iter != end; iter = list_next( iter ), ++src )
+ {
+ /* For all objects this one depends upon, add elements to 'graph'. */
+ LIST * dependencies = var_get( frame->module, list_item( iter ) );
+ int index = 0;
+ LISTITER dep_iter = list_begin( dependencies );
+ LISTITER const dep_end = list_end( dependencies );
+
+ graph[ src ] = ( int * )BJAM_CALLOC( list_length( dependencies ) + 1,
+ sizeof( int ) );
+ for ( ; dep_iter != dep_end; dep_iter = list_next( dep_iter ) )
+ {
+ int const dst = list_index( arg, list_item( dep_iter ) );
+ if ( dst != -1 )
+ graph[ src ][ index++ ] = dst;
+ }
+ graph[ src ][ index ] = -1;
+ }
+
+ topological_sort( graph, length, order );
+
+ {
+ int index = length - 1;
+ for ( ; index >= 0; --index )
+ {
+ int i;
+ LISTITER iter = list_begin( arg );
+ LISTITER const end = list_end( arg );
+ for ( i = 0; i < order[ index ]; ++i, iter = list_next( iter ) );
+ result = list_push_back( result, object_copy( list_item( iter ) ) );
+ }
+ }
+
+ /* Clean up */
+ {
+ int i;
+ for ( i = 0; i < length; ++i )
+ BJAM_FREE( graph[ i ] );
+ BJAM_FREE( graph );
+ BJAM_FREE( order );
+ }
+
+ return result;
+}
+
+
+void init_order()
+{
+ {
+ char const * args[] = { "first", "second", 0 };
+ declare_native_rule( "class@order", "add-pair", args, add_pair, 1 );
+ }
+
+ {
+ char const * args[] = { "objects", "*", 0 };
+ declare_native_rule( "class@order", "order", args, order, 1 );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/modules/path.c b/src/kenlm/jam-files/engine/modules/path.c
new file mode 100644
index 0000000..f8dedac
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/path.c
@@ -0,0 +1,25 @@
+/* Copyright Vladimir Prus 2003.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "../constants.h"
+#include "../frames.h"
+#include "../lists.h"
+#include "../native.h"
+#include "../timestamp.h"
+
+
+LIST * path_exists( FRAME * frame, int flags )
+{
+ return file_query( list_front( lol_get( frame->args, 0 ) ) ) ?
+ list_new( object_copy( constant_true ) ) : L0;
+}
+
+
+void init_path()
+{
+ char const * args[] = { "location", 0 };
+ declare_native_rule( "path", "exists", args, path_exists, 1 );
+}
diff --git a/src/kenlm/jam-files/engine/modules/property-set.c b/src/kenlm/jam-files/engine/modules/property-set.c
new file mode 100644
index 0000000..21e35d5
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/property-set.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright 2013 Steven Watanabe
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "../object.h"
+#include "../lists.h"
+#include "../modules.h"
+#include "../rules.h"
+#include "../variable.h"
+#include "../native.h"
+#include "../compile.h"
+#include "../mem.h"
+#include "../constants.h"
+#include "string.h"
+
+struct ps_map_entry
+{
+ struct ps_map_entry * next;
+ LIST * key;
+ OBJECT * value;
+};
+
+struct ps_map
+{
+ struct ps_map_entry * * table;
+ size_t table_size;
+ size_t num_elems;
+};
+
+static unsigned list_hash(LIST * key)
+{
+ unsigned int hash = 0;
+ LISTITER iter = list_begin( key ), end = list_end( key );
+ for ( ; iter != end; ++iter )
+ {
+ hash = hash * 2147059363 + object_hash( list_item( iter ) );
+ }
+ return hash;
+}
+
+static int list_equal( LIST * lhs, LIST * rhs )
+{
+ LISTITER lhs_iter, lhs_end, rhs_iter;
+ if ( list_length( lhs ) != list_length( rhs ) )
+ {
+ return 0;
+ }
+ lhs_iter = list_begin( lhs );
+ lhs_end = list_end( lhs );
+ rhs_iter = list_begin( rhs );
+ for ( ; lhs_iter != lhs_end; ++lhs_iter, ++rhs_iter )
+ {
+ if ( ! object_equal( list_item( lhs_iter ), list_item( rhs_iter ) ) )
+ {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void ps_map_init( struct ps_map * map )
+{
+ size_t i;
+ map->table_size = 2;
+ map->num_elems = 0;
+ map->table = BJAM_MALLOC( map->table_size * sizeof( struct ps_map_entry * ) );
+ for ( i = 0; i < map->table_size; ++i )
+ {
+ map->table[ i ] = NULL;
+ }
+}
+
+static void ps_map_destroy( struct ps_map * map )
+{
+ size_t i;
+ for ( i = 0; i < map->table_size; ++i )
+ {
+ struct ps_map_entry * pos;
+ for ( pos = map->table[ i ]; pos; )
+ {
+ struct ps_map_entry * tmp = pos->next;
+ BJAM_FREE( pos );
+ pos = tmp;
+ }
+ }
+ BJAM_FREE( map->table );
+}
+
+static void ps_map_rehash( struct ps_map * map )
+{
+ struct ps_map old = *map;
+ size_t i;
+ map->table = BJAM_MALLOC( map->table_size * 2 * sizeof( struct ps_map_entry * ) );
+ map->table_size *= 2;
+ for ( i = 0; i < map->table_size; ++i )
+ {
+ map->table[ i ] = NULL;
+ }
+ for ( i = 0; i < old.table_size; ++i )
+ {
+ struct ps_map_entry * pos;
+ for ( pos = old.table[ i ]; pos; )
+ {
+ struct ps_map_entry * tmp = pos->next;
+
+ unsigned hash_val = list_hash( pos->key );
+ unsigned bucket = hash_val % map->table_size;
+ pos->next = map->table[ bucket ];
+ map->table[ bucket ] = pos;
+
+ pos = tmp;
+ }
+ }
+ BJAM_FREE( old.table );
+}
+
+static struct ps_map_entry * ps_map_insert(struct ps_map * map, LIST * key)
+{
+ unsigned hash_val = list_hash( key );
+ unsigned bucket = hash_val % map->table_size;
+ struct ps_map_entry * pos;
+ for ( pos = map->table[bucket]; pos ; pos = pos->next )
+ {
+ if ( list_equal( pos->key, key ) )
+ return pos;
+ }
+
+ if ( map->num_elems >= map->table_size )
+ {
+ ps_map_rehash( map );
+ bucket = hash_val % map->table_size;
+ }
+ pos = BJAM_MALLOC( sizeof( struct ps_map_entry ) );
+ pos->next = map->table[bucket];
+ pos->key = key;
+ pos->value = 0;
+ map->table[bucket] = pos;
+ ++map->num_elems;
+ return pos;
+}
+
+static struct ps_map all_property_sets;
+
+LIST * property_set_create( FRAME * frame, int flags )
+{
+ LIST * properties = lol_get( frame->args, 0 );
+ LIST * sorted = list_sort( properties );
+ LIST * unique = list_unique( sorted );
+ struct ps_map_entry * pos = ps_map_insert( &all_property_sets, unique );
+ list_free( sorted );
+ if ( pos->value )
+ {
+ list_free( unique );
+ return list_new( object_copy( pos->value ) );
+ }
+ else
+ {
+ OBJECT * rulename = object_new( "new" );
+ OBJECT * varname = object_new( "self.raw" );
+ LIST * val = call_rule( rulename, frame,
+ list_new( object_new( "property-set" ) ), 0 );
+ LISTITER iter, end;
+ object_free( rulename );
+ pos->value = list_front( val );
+ var_set( bindmodule( pos->value ), varname, unique, VAR_SET );
+ object_free( varname );
+
+ for ( iter = list_begin( unique ), end = list_end( unique ); iter != end; ++iter )
+ {
+ const char * str = object_str( list_item( iter ) );
+ if ( str[ 0 ] != '<' || ! strchr( str, '>' ) )
+ {
+ string message[ 1 ];
+ string_new( message );
+ string_append( message, "Invalid property: '" );
+ string_append( message, str );
+ string_append( message, "'" );
+ rulename = object_new( "errors.error" );
+ call_rule( rulename, frame,
+ list_new( object_new( message->value ) ), 0 );
+ /* unreachable */
+ string_free( message );
+ object_free( rulename );
+ }
+ }
+
+ return val;
+ }
+}
+
+/* binary search for the property value */
+LIST * property_set_get( FRAME * frame, int flags )
+{
+ OBJECT * varname = object_new( "self.raw" );
+ LIST * props = var_get( frame->module, varname );
+ const char * name = object_str( list_front( lol_get( frame->args, 0 ) ) );
+ size_t name_len = strlen( name );
+ LISTITER begin, end;
+ LIST * result = L0;
+ object_free( varname );
+
+ /* Assumes random access */
+ begin = list_begin( props ), end = list_end( props );
+
+ while ( 1 )
+ {
+ ptrdiff_t diff = (end - begin);
+ LISTITER mid = begin + diff / 2;
+ int res;
+ if ( diff == 0 )
+ {
+ return L0;
+ }
+ res = strncmp( object_str( list_item( mid ) ), name, name_len );
+ if ( res < 0 )
+ {
+ begin = mid + 1;
+ }
+ else if ( res > 0 )
+ {
+ end = mid;
+ }
+ else /* We've found the property */
+ {
+ /* Find the beginning of the group */
+ LISTITER tmp = mid;
+ while ( tmp > begin )
+ {
+ --tmp;
+ res = strncmp( object_str( list_item( tmp ) ), name, name_len );
+ if ( res != 0 )
+ {
+ ++tmp;
+ break;
+ }
+ }
+ begin = tmp;
+ /* Find the end of the group */
+ tmp = mid + 1;
+ while ( tmp < end )
+ {
+ res = strncmp( object_str( list_item( tmp ) ), name, name_len );
+ if ( res != 0 ) break;
+ ++tmp;
+ }
+ end = tmp;
+ break;
+ }
+ }
+
+ for ( ; begin != end; ++begin )
+ {
+ result = list_push_back( result,
+ object_new( object_str( list_item( begin ) ) + name_len ) );
+ }
+
+ return result;
+}
+
+/* binary search for the property value */
+LIST * property_set_contains_features( FRAME * frame, int flags )
+{
+ OBJECT * varname = object_new( "self.raw" );
+ LIST * props = var_get( frame->module, varname );
+ LIST * features = lol_get( frame->args, 0 );
+ LIST * result = L0;
+ LISTITER features_iter = list_begin( features );
+ LISTITER features_end = list_end( features ) ;
+ object_free( varname );
+
+ for ( ; features_iter != features_end; ++features_iter )
+ {
+ const char * name = object_str( list_item( features_iter ) );
+ size_t name_len = strlen( name );
+ LISTITER begin, end;
+ /* Assumes random access */
+ begin = list_begin( props ), end = list_end( props );
+
+ while ( 1 )
+ {
+ ptrdiff_t diff = (end - begin);
+ LISTITER mid = begin + diff / 2;
+ int res;
+ if ( diff == 0 )
+ {
+ /* The feature is missing */
+ return L0;
+ }
+ res = strncmp( object_str( list_item( mid ) ), name, name_len );
+ if ( res < 0 )
+ {
+ begin = mid + 1;
+ }
+ else if ( res > 0 )
+ {
+ end = mid;
+ }
+ else /* We've found the property */
+ {
+ break;
+ }
+ }
+ }
+ return list_new( object_copy( constant_true ) );
+}
+
+void init_property_set()
+{
+ {
+ char const * args[] = { "raw-properties", "*", 0 };
+ declare_native_rule( "property-set", "create", args, property_set_create, 1 );
+ }
+ {
+ char const * args[] = { "feature", 0 };
+ declare_native_rule( "class@property-set", "get", args, property_set_get, 1 );
+ }
+ {
+ char const * args[] = { "features", "*", 0 };
+ declare_native_rule( "class@property-set", "contains-features", args, property_set_contains_features, 1 );
+ }
+ ps_map_init( &all_property_sets );
+}
+
+void property_set_done()
+{
+ ps_map_destroy( &all_property_sets );
+}
diff --git a/src/kenlm/jam-files/engine/modules/readme.txt b/src/kenlm/jam-files/engine/modules/readme.txt
new file mode 100644
index 0000000..2edf6e1
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/readme.txt
@@ -0,0 +1,3 @@
+
+This directory constains sources which declare native
+rules for Boost.Build modules.
\ No newline at end of file
diff --git a/src/kenlm/jam-files/engine/modules/regex.c b/src/kenlm/jam-files/engine/modules/regex.c
new file mode 100644
index 0000000..d9f8177
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/regex.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2003. Vladimir Prus
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "../mem.h"
+#include "../native.h"
+#include "../strings.h"
+#include "../subst.h"
+
+/*
+rule split ( string separator )
+{
+ local result ;
+ local s = $(string) ;
+
+ local match = 1 ;
+ while $(match)
+ {
+ match = [ MATCH ^(.*)($(separator))(.*) : $(s) ] ;
+ if $(match)
+ {
+ match += "" ; # in case 3rd item was empty - works around MATCH bug
+ result = $(match[3]) $(result) ;
+ s = $(match[1]) ;
+ }
+ }
+ return $(s) $(result) ;
+}
+*/
+
+LIST * regex_split( FRAME * frame, int flags )
+{
+ LIST * args = lol_get( frame->args, 0 );
+ OBJECT * s;
+ OBJECT * separator;
+ regexp * re;
+ const char * pos;
+ LIST * result = L0;
+ LISTITER iter = list_begin( args );
+ s = list_item( iter );
+ separator = list_item( list_next( iter ) );
+
+ re = regex_compile( separator );
+
+ pos = object_str( s );
+ while ( regexec( re, pos ) )
+ {
+ result = list_push_back( result, object_new_range( pos, re->startp[ 0 ] - pos ) );
+ pos = re->endp[ 0 ];
+ }
+
+ result = list_push_back( result, object_new( pos ) );
+
+ return result;
+}
+
+/*
+rule replace (
+ string # The string to modify.
+ match # The characters to replace.
+ replacement # The string to replace with.
+ )
+{
+ local result = "" ;
+ local parts = 1 ;
+ while $(parts)
+ {
+ parts = [ MATCH ^(.*)($(match))(.*) : $(string) ] ;
+ if $(parts)
+ {
+ parts += "" ;
+ result = "$(replacement)$(parts[3])$(result)" ;
+ string = $(parts[1]) ;
+ }
+ }
+ string ?= "" ;
+ result = "$(string)$(result)" ;
+ return $(result) ;
+}
+*/
+
+LIST * regex_replace( FRAME * frame, int flags )
+{
+ LIST * args = lol_get( frame->args, 0 );
+ OBJECT * s;
+ OBJECT * match;
+ OBJECT * replacement;
+ regexp * re;
+ const char * pos;
+ string buf[ 1 ];
+ LIST * result;
+ LISTITER iter = list_begin( args );
+ s = list_item( iter );
+ iter = list_next( iter );
+ match = list_item( iter );
+ iter = list_next( iter );
+ replacement = list_item(iter );
+
+ re = regex_compile( match );
+
+ string_new( buf );
+
+ pos = object_str( s );
+ while ( regexec( re, pos ) )
+ {
+ string_append_range( buf, pos, re->startp[ 0 ] );
+ string_append( buf, object_str( replacement ) );
+ pos = re->endp[ 0 ];
+ }
+ string_append( buf, pos );
+
+ result = list_new( object_new( buf->value ) );
+
+ string_free( buf );
+
+ return result;
+}
+
+/*
+rule transform ( list * : pattern : indices * )
+{
+ indices ?= 1 ;
+ local result ;
+ for local e in $(list)
+ {
+ local m = [ MATCH $(pattern) : $(e) ] ;
+ if $(m)
+ {
+ result += $(m[$(indices)]) ;
+ }
+ }
+ return $(result) ;
+}
+*/
+
+LIST * regex_transform( FRAME * frame, int flags )
+{
+ LIST * const l = lol_get( frame->args, 0 );
+ LIST * const pattern = lol_get( frame->args, 1 );
+ LIST * const indices_list = lol_get( frame->args, 2 );
+ int * indices = 0;
+ int size;
+ LIST * result = L0;
+
+ if ( !list_empty( indices_list ) )
+ {
+ int * p;
+ LISTITER iter = list_begin( indices_list );
+ LISTITER const end = list_end( indices_list );
+ size = list_length( indices_list );
+ indices = (int *)BJAM_MALLOC( size * sizeof( int ) );
+ for ( p = indices; iter != end; iter = list_next( iter ) )
+ *p++ = atoi( object_str( list_item( iter ) ) );
+ }
+ else
+ {
+ size = 1;
+ indices = (int *)BJAM_MALLOC( sizeof( int ) );
+ *indices = 1;
+ }
+
+ {
+ /* Result is cached and intentionally never freed */
+ regexp * const re = regex_compile( list_front( pattern ) );
+
+ LISTITER iter = list_begin( l );
+ LISTITER const end = list_end( l );
+
+ string buf[ 1 ];
+ string_new( buf );
+
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ if ( regexec( re, object_str( list_item( iter ) ) ) )
+ {
+ int i = 0;
+ for ( ; i < size; ++i )
+ {
+ int const index = indices[ i ];
+ /* Skip empty submatches. Not sure it is right in all cases,
+ * but surely is right for the case for which this routine
+ * is optimized -- header scanning.
+ */
+ if ( re->startp[ index ] != re->endp[ index ] )
+ {
+ string_append_range( buf, re->startp[ index ],
+ re->endp[ index ] );
+ result = list_push_back( result, object_new( buf->value
+ ) );
+ string_truncate( buf, 0 );
+ }
+ }
+ }
+ }
+ string_free( buf );
+ }
+
+ BJAM_FREE( indices );
+ return result;
+}
+
+
+void init_regex()
+{
+ {
+ char const * args[] = { "string", "separator", 0 };
+ declare_native_rule( "regex", "split", args, regex_split, 1 );
+ }
+ {
+ char const * args[] = { "string", "match", "replacement", 0 };
+ declare_native_rule( "regex", "replace", args, regex_replace, 1 );
+ }
+ {
+ char const * args[] = { "list", "*", ":", "pattern", ":", "indices", "*", 0 };
+ declare_native_rule( "regex", "transform", args, regex_transform, 2 );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/modules/sequence.c b/src/kenlm/jam-files/engine/modules/sequence.c
new file mode 100644
index 0000000..08ed305
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/sequence.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright Vladimir Prus 2003.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "../native.h"
+#include "../object.h"
+#include "../lists.h"
+#include "../compile.h"
+
+#include <stdlib.h>
+
+
+#ifndef max
+# define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+
+LIST * sequence_select_highest_ranked( FRAME * frame, int flags )
+{
+ /* Returns all of 'elements' for which corresponding element in parallel */
+ /* list 'rank' is equal to the maximum value in 'rank'. */
+
+ LIST * const elements = lol_get( frame->args, 0 );
+ LIST * const rank = lol_get( frame->args, 1 );
+
+ LIST * result = L0;
+ int highest_rank = -1;
+
+ {
+ LISTITER iter = list_begin( rank );
+ LISTITER const end = list_end( rank );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ int const current = atoi( object_str( list_item( iter ) ) );
+ highest_rank = max( highest_rank, current );
+ }
+ }
+
+ {
+ LISTITER iter = list_begin( rank );
+ LISTITER const end = list_end( rank );
+ LISTITER elements_iter = list_begin( elements );
+ LISTITER const elements_end = list_end( elements );
+ for ( ; iter != end; iter = list_next( iter ), elements_iter =
+ list_next( elements_iter ) )
+ if ( atoi( object_str( list_item( iter ) ) ) == highest_rank )
+ result = list_push_back( result, object_copy( list_item(
+ elements_iter ) ) );
+ }
+
+ return result;
+}
+
+LIST * sequence_transform( FRAME * frame, int flags )
+{
+ LIST * function = lol_get( frame->args, 0 );
+ LIST * sequence = lol_get( frame->args, 1 );
+ LIST * result = L0;
+ OBJECT * function_name = list_front( function );
+ LISTITER args_begin = list_next( list_begin( function ) ), args_end = list_end( function );
+ LISTITER iter = list_begin( sequence ), end = list_end( sequence );
+ RULE * rule = bindrule( function_name, frame->prev->module );
+
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ FRAME inner[ 1 ];
+
+ frame_init( inner );
+ inner->prev = frame;
+ inner->prev_user = frame->prev_user;
+ inner->module = frame->prev->module;
+
+ lol_add( inner->args, list_push_back( list_copy_range( function, args_begin, args_end ), object_copy( list_item( iter ) ) ) );
+ result = list_append( result, evaluate_rule( rule, function_name, inner ) );
+
+ frame_free( inner );
+ }
+
+ return result;
+}
+
+void init_sequence()
+{
+ {
+ char const * args[] = { "elements", "*", ":", "rank", "*", 0 };
+ declare_native_rule( "sequence", "select-highest-ranked", args,
+ sequence_select_highest_ranked, 1 );
+ }
+ {
+ char const * args[] = { "function", "+", ":", "sequence", "*", 0 };
+ declare_native_rule( "sequence", "transform", args,
+ sequence_transform, 1 );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/modules/set.c b/src/kenlm/jam-files/engine/modules/set.c
new file mode 100644
index 0000000..77a314d
--- /dev/null
+++ b/src/kenlm/jam-files/engine/modules/set.c
@@ -0,0 +1,43 @@
+/* Copyright Vladimir Prus 2003. Distributed under the Boost */
+/* Software License, Version 1.0. (See accompanying */
+/* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
+
+#include "../native.h"
+#include "../object.h"
+
+/*
+ local result = ;
+ local element ;
+ for element in $(B)
+ {
+ if ! ( $(element) in $(A) )
+ {
+ result += $(element) ;
+ }
+ }
+ return $(result) ;
+*/
+LIST *set_difference( FRAME *frame, int flags )
+{
+
+ LIST* b = lol_get( frame->args, 0 );
+ LIST* a = lol_get( frame->args, 1 );
+
+ LIST* result = L0;
+ LISTITER iter = list_begin( b ), end = list_end( b );
+ for( ; iter != end; iter = list_next( iter ) )
+ {
+ if (!list_in(a, list_item(iter)))
+ result = list_push_back(result, object_copy(list_item(iter)));
+ }
+ return result;
+}
+
+void init_set()
+{
+ {
+ const char* args[] = { "B", "*", ":", "A", "*", 0 };
+ declare_native_rule("set", "difference", args, set_difference, 1);
+ }
+
+}
diff --git a/src/kenlm/jam-files/engine/native.c b/src/kenlm/jam-files/engine/native.c
new file mode 100644
index 0000000..68828aa
--- /dev/null
+++ b/src/kenlm/jam-files/engine/native.c
@@ -0,0 +1,34 @@
+/* Copyright 2003. Vladimir Prus
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "native.h"
+
+#include "hash.h"
+
+#include <assert.h>
+
+
+void declare_native_rule( char const * module, char const * rule,
+ char const * * args, LIST * (*f)( FRAME *, int ), int version )
+{
+ OBJECT * const module_obj = module ? object_new( module ) : 0 ;
+ module_t * m = bindmodule( module_obj );
+ if ( module_obj )
+ object_free( module_obj );
+ if ( !m->native_rules )
+ m->native_rules = hashinit( sizeof( native_rule_t ), "native rules" );
+
+ {
+ OBJECT * const name = object_new( rule );
+ int found;
+ native_rule_t * const np = (native_rule_t *)hash_insert(
+ m->native_rules, name, &found );
+ np->name = name;
+ assert( !found );
+ np->procedure = function_builtin( f, 0, args );
+ np->version = version;
+ }
+}
diff --git a/src/kenlm/jam-files/engine/native.h b/src/kenlm/jam-files/engine/native.h
new file mode 100644
index 0000000..6d38d01
--- /dev/null
+++ b/src/kenlm/jam-files/engine/native.h
@@ -0,0 +1,34 @@
+/* Copyright 2003. David Abrahams
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef NATIVE_H_VP_2003_12_09
+#define NATIVE_H_VP_2003_12_09
+
+#include "function.h"
+#include "frames.h"
+#include "lists.h"
+#include "object.h"
+
+typedef struct native_rule_t
+{
+ OBJECT * name;
+ FUNCTION * procedure;
+
+ /* Version of the interface that the native rule provides. It is possible
+ * that we want to change the set parameter for existing native rule. In
+ * that case, version number should be incremented so Boost.Build can check
+ * for the version it relies on.
+ *
+ * Versions are numbered from 1.
+ */
+ int version;
+} native_rule_t;
+/* MSVC debugger gets confused unless the native_rule_t typedef is provided. */
+
+void declare_native_rule( char const * module, char const * rule,
+ char const * * args, LIST * (*f)( FRAME *, int ), int version );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/object.c b/src/kenlm/jam-files/engine/object.c
new file mode 100644
index 0000000..ef46e4a
--- /dev/null
+++ b/src/kenlm/jam-files/engine/object.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ * Copyright 2011 Steven Watanabe
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * object.c - object manipulation routines
+ *
+ * External functions:
+ * object_new() - create an object from a string
+ * object_new_range() - create an object from a string of given length
+ * object_copy() - return a copy of an object
+ * object_free() - free an object
+ * object_str() - get the string value of an object
+ * object_done() - free string tables
+ *
+ * This implementation builds a hash table of all strings, so that multiple
+ * calls of object_new() on the same string allocate memory for the string once.
+ * Strings are never actually freed.
+ */
+
+#include "jam.h"
+#include "object.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+
+#define OBJECT_MAGIC 0xa762e0e3u
+
+#ifndef object_copy
+
+struct hash_header
+{
+#ifndef NDEBUG
+ unsigned int magic;
+#endif
+ unsigned int hash;
+ struct hash_item * next;
+};
+
+#endif
+
+struct hash_item
+{
+ struct hash_header header;
+ char data[ 1 ];
+};
+
+#define ALLOC_ALIGNMENT (sizeof(struct hash_item) - sizeof(struct hash_header))
+
+typedef struct string_set
+{
+ unsigned int num;
+ unsigned int size;
+ struct hash_item * * data;
+} string_set;
+
+static string_set strhash;
+static int strtotal = 0;
+static int strcount_in = 0;
+static int strcount_out = 0;
+
+
+/*
+ * Immortal string allocator implementation speeds string allocation and cuts
+ * down on internal fragmentation.
+ */
+
+#define STRING_BLOCK 4096
+typedef struct strblock
+{
+ struct strblock * next;
+ char data[ STRING_BLOCK ];
+} strblock;
+
+static strblock * strblock_chain = 0;
+
+/* Storage remaining in the current strblock */
+static char * storage_start = 0;
+static char * storage_finish = 0;
+
+
+/*
+ * allocate() - Allocate n bytes of immortal string storage.
+ */
+
+static char * allocate( size_t n )
+{
+#ifdef BJAM_NEWSTR_NO_ALLOCATE
+ return (char *)BJAM_MALLOC( n );
+#else
+ /* See if we can grab storage from an existing block. */
+ size_t remaining = storage_finish - storage_start;
+ n = ( ( n + ALLOC_ALIGNMENT - 1 ) / ALLOC_ALIGNMENT ) * ALLOC_ALIGNMENT;
+ if ( remaining >= n )
+ {
+ char * result = storage_start;
+ storage_start += n;
+ return result;
+ }
+ else /* Must allocate a new block. */
+ {
+ strblock * new_block;
+ size_t nalloc = n;
+ if ( nalloc < STRING_BLOCK )
+ nalloc = STRING_BLOCK;
+
+ /* Allocate a new block and link into the chain. */
+ new_block = (strblock *)BJAM_MALLOC( offsetof( strblock, data[ 0 ] ) +
+ nalloc * sizeof( new_block->data[ 0 ] ) );
+ if ( new_block == 0 )
+ return 0;
+ new_block->next = strblock_chain;
+ strblock_chain = new_block;
+
+ /* Take future allocations out of the larger remaining space. */
+ if ( remaining < nalloc - n )
+ {
+ storage_start = new_block->data + n;
+ storage_finish = new_block->data + nalloc;
+ }
+ return new_block->data;
+ }
+#endif
+}
+
+
+static unsigned int hash_keyval( char const * key, int const size )
+{
+ unsigned int const magic = 2147059363;
+ unsigned int hash = 0;
+
+ unsigned int i;
+ for ( i = 0; i < size / sizeof( unsigned int ); ++i )
+ {
+ unsigned int val;
+ memcpy( &val, key, sizeof( unsigned int ) );
+ hash = hash * magic + val;
+ key += sizeof( unsigned int );
+ }
+
+ {
+ unsigned int val = 0;
+ memcpy( &val, key, size % sizeof( unsigned int ) );
+ hash = hash * magic + val;
+ }
+
+ return hash + ( hash >> 17 );
+}
+
+
+static void string_set_init( string_set * set )
+{
+ set->size = 0;
+ set->num = 4;
+ set->data = (struct hash_item * *)BJAM_MALLOC( set->num * sizeof( struct hash_item * ) );
+ memset( set->data, 0, set->num * sizeof( struct hash_item * ) );
+}
+
+
+static void string_set_done( string_set * set )
+{
+ BJAM_FREE( set->data );
+}
+
+
+static void string_set_resize( string_set * set )
+{
+ unsigned i;
+ string_set new_set;
+ new_set.num = set->num * 2;
+ new_set.size = set->size;
+ new_set.data = (struct hash_item * *)BJAM_MALLOC( sizeof( struct hash_item *
+ ) * new_set.num );
+ memset( new_set.data, 0, sizeof( struct hash_item * ) * new_set.num );
+ for ( i = 0; i < set->num; ++i )
+ {
+ while ( set->data[ i ] )
+ {
+ struct hash_item * temp = set->data[ i ];
+ unsigned pos = temp->header.hash % new_set.num;
+ set->data[ i ] = temp->header.next;
+ temp->header.next = new_set.data[ pos ];
+ new_set.data[ pos ] = temp;
+ }
+ }
+ BJAM_FREE( set->data );
+ *set = new_set;
+}
+
+
+static char const * string_set_insert( string_set * set, char const * string,
+ int const size )
+{
+ unsigned hash = hash_keyval( string, size );
+ unsigned pos = hash % set->num;
+
+ struct hash_item * result;
+
+ for ( result = set->data[ pos ]; result; result = result->header.next )
+ if ( !strncmp( result->data, string, size ) && !result->data[ size ] )
+ return result->data;
+
+ if ( set->size >= set->num )
+ {
+ string_set_resize( set );
+ pos = hash % set->num;
+ }
+
+ result = (struct hash_item *)allocate( sizeof( struct hash_header ) + size +
+ 1 );
+ result->header.hash = hash;
+ result->header.next = set->data[ pos ];
+#ifndef NDEBUG
+ result->header.magic = OBJECT_MAGIC;
+#endif
+ memcpy( result->data, string, size );
+ result->data[ size ] = '\0';
+ assert( hash_keyval( result->data, size ) == result->header.hash );
+ set->data[ pos ] = result;
+ strtotal += size + 1;
+ ++set->size;
+
+ return result->data;
+}
+
+
+static struct hash_item * object_get_item( OBJECT * obj )
+{
+ return (struct hash_item *)( (char *)obj - offsetof( struct hash_item, data
+ ) );
+}
+
+
+static void object_validate( OBJECT * obj )
+{
+ assert( obj );
+ assert( object_get_item( obj )->header.magic == OBJECT_MAGIC );
+}
+
+
+/*
+ * object_new_range() - create an object from a string of given length
+ */
+
+OBJECT * object_new_range( char const * const string, int const size )
+{
+ ++strcount_in;
+
+#ifdef BJAM_NO_MEM_CACHE
+ {
+ struct hash_item * const m = (struct hash_item *)BJAM_MALLOC( sizeof(
+ struct hash_header ) + size + 1 );
+ strtotal += size + 1;
+ memcpy( m->data, string, size );
+ m->data[ size ] = '\0';
+ m->header.magic = OBJECT_MAGIC;
+ return (OBJECT *)m->data;
+ }
+#else
+ if ( !strhash.data )
+ string_set_init( &strhash );
+ return (OBJECT *)string_set_insert( &strhash, string, size );
+#endif
+}
+
+
+/*
+ * object_new() - create an object from a string
+ */
+
+OBJECT * object_new( char const * const string )
+{
+ return object_new_range( string, strlen( string ) );
+}
+
+
+#ifndef object_copy
+
+/*
+ * object_copy() - return a copy of an object
+ */
+
+OBJECT * object_copy( OBJECT * obj )
+{
+ object_validate( obj );
+#ifdef BJAM_NO_MEM_CACHE
+ return object_new( object_str( obj ) );
+#else
+ ++strcount_in;
+ return obj;
+#endif
+}
+
+
+/*
+ * object_free() - free an object
+ */
+
+void object_free( OBJECT * obj )
+{
+ object_validate( obj );
+#ifdef BJAM_NO_MEM_CACHE
+ BJAM_FREE( object_get_item( obj ) );
+#endif
+ ++strcount_out;
+}
+
+
+/*
+ * object_str() - return the OBJECT's internal C string
+ */
+
+char const * object_str( OBJECT * obj )
+{
+ object_validate( obj );
+ return (char const *)obj;
+}
+
+
+/*
+ * object_equal() - compare two objects
+ */
+
+int object_equal( OBJECT * lhs, OBJECT * rhs )
+{
+ object_validate( lhs );
+ object_validate( rhs );
+#ifdef BJAM_NO_MEM_CACHE
+ return !strcmp( object_str( lhs ), object_str( rhs ) );
+#else
+ assert( ( lhs == rhs ) == !strcmp( object_str( lhs ), object_str( rhs ) ) );
+ return lhs == rhs;
+#endif
+}
+
+
+/*
+ * object_hash() - returns the hash value of an object
+ */
+
+unsigned int object_hash( OBJECT * obj )
+{
+ object_validate( obj );
+#ifdef BJAM_NO_MEM_CACHE
+ return hash_keyval( object_str( obj ), strlen( object_str( obj ) ) );
+#else
+ return object_get_item( obj )->header.hash;
+#endif
+}
+
+#endif
+
+/*
+ * object_done() - free string tables.
+ */
+
+void object_done()
+{
+#ifdef BJAM_NEWSTR_NO_ALLOCATE
+ unsigned i;
+ for ( i = 0; i < strhash.num; ++i )
+ {
+ while ( strhash.data[ i ] )
+ {
+ struct hash_item * item = strhash.data[ i ];
+ strhash.data[ i ] = item->header.next;
+ BJAM_FREE( item );
+ }
+ }
+#else
+ /* Reclaim string blocks. */
+ while ( strblock_chain )
+ {
+ strblock * const n = strblock_chain->next;
+ BJAM_FREE( strblock_chain );
+ strblock_chain = n;
+ }
+#endif
+
+ string_set_done( &strhash );
+
+ if ( DEBUG_MEM )
+ {
+ printf( "%dK in strings\n", strtotal / 1024 );
+ if ( strcount_in != strcount_out )
+ printf( "--- %d strings of %d dangling\n", strcount_in -
+ strcount_out, strcount_in );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/object.h b/src/kenlm/jam-files/engine/object.h
new file mode 100644
index 0000000..cabb9f6
--- /dev/null
+++ b/src/kenlm/jam-files/engine/object.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011 Steven Watanabe
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * object.h - object manipulation routines
+ */
+
+#ifndef BOOST_JAM_OBJECT_H
+#define BOOST_JAM_OBJECT_H
+
+typedef struct _object OBJECT;
+
+OBJECT * object_new( char const * const );
+OBJECT * object_new_range( char const * const, int const size );
+void object_done( void );
+
+#if defined(NDEBUG) && !defined(BJAM_NO_MEM_CACHE)
+
+struct hash_header
+{
+ unsigned int hash;
+ struct hash_item * next;
+};
+
+#define object_str( obj ) ((char const *)(obj))
+#define object_copy( obj ) (obj)
+#define object_free( obj ) ((void)0)
+#define object_equal( lhs, rhs ) ((lhs) == (rhs))
+#define object_hash( obj ) (((struct hash_header *)((char *)(obj) - sizeof(struct hash_header)))->hash)
+
+#else
+
+char const * object_str ( OBJECT * );
+OBJECT * object_copy ( OBJECT * );
+void object_free ( OBJECT * );
+int object_equal( OBJECT *, OBJECT * );
+unsigned int object_hash ( OBJECT * );
+
+#endif
+
+#endif
diff --git a/src/kenlm/jam-files/engine/option.c b/src/kenlm/jam-files/engine/option.c
new file mode 100644
index 0000000..d25e5e8
--- /dev/null
+++ b/src/kenlm/jam-files/engine/option.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+# include "jam.h"
+# include "option.h"
+
+/*
+ * option.c - command line option processing
+ *
+ * {o >o
+ * \<>) "Process command line options as defined in <option.h>.
+ * Return the number of argv[] elements used up by options,
+ * or -1 if an invalid option flag was given or an argument
+ * was supplied for an option that does not require one."
+ */
+
+int getoptions( int argc, char * * argv, char * opts, bjam_option * optv )
+{
+ int i;
+ int optc = N_OPTS;
+
+ memset( (char *)optv, '\0', sizeof( *optv ) * N_OPTS );
+
+ for ( i = 0; i < argc; ++i )
+ {
+ char *arg;
+
+ if ( ( argv[ i ][ 0 ] != '-' ) ||
+ ( ( argv[ i ][ 1 ] != '-' ) && !isalpha( argv[ i ][ 1 ] ) ) )
+ continue;
+
+ if ( !optc-- )
+ {
+ printf( "too many options (%d max)\n", N_OPTS );
+ return -1;
+ }
+
+ for ( arg = &argv[ i ][ 1 ]; *arg; ++arg )
+ {
+ char * f;
+
+ for ( f = opts; *f; ++f )
+ if ( *f == *arg )
+ break;
+
+ if ( !*f )
+ {
+ printf( "Invalid option: -%c\n", *arg );
+ return -1;
+ }
+
+ optv->flag = *f;
+
+ if ( f[ 1 ] != ':' )
+ {
+ optv++->val = "true";
+ }
+ else if ( arg[ 1 ] )
+ {
+ optv++->val = &arg[1];
+ break;
+ }
+ else if ( ++i < argc )
+ {
+ optv++->val = argv[ i ];
+ break;
+ }
+ else
+ {
+ printf( "option: -%c needs argument\n", *f );
+ return -1;
+ }
+ }
+ }
+
+ return i;
+}
+
+
+/*
+ * Name: getoptval() - find an option given its character.
+ */
+
+char * getoptval( bjam_option * optv, char opt, int subopt )
+{
+ int i;
+ for ( i = 0; i < N_OPTS; ++i, ++optv )
+ if ( ( optv->flag == opt ) && !subopt-- )
+ return optv->val;
+ return 0;
+}
diff --git a/src/kenlm/jam-files/engine/option.h b/src/kenlm/jam-files/engine/option.h
new file mode 100644
index 0000000..7c9c747
--- /dev/null
+++ b/src/kenlm/jam-files/engine/option.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * option.h - command line option processing
+ *
+ * {o >o
+ * \ -) "Command line option."
+ */
+
+typedef struct bjam_option
+{
+ char flag; /* filled in by getoption() */
+ char * val; /* set to random address if true */
+} bjam_option;
+
+#define N_OPTS 256
+
+int getoptions( int argc, char * * argv, char * opts, bjam_option * optv );
+char * getoptval( bjam_option * optv, char opt, int subopt );
diff --git a/src/kenlm/jam-files/engine/output.c b/src/kenlm/jam-files/engine/output.c
new file mode 100644
index 0000000..eaaee43
--- /dev/null
+++ b/src/kenlm/jam-files/engine/output.c
@@ -0,0 +1,98 @@
+/*
+ Copyright 2007 Rene Rivera
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#include "jam.h"
+#include "output.h"
+
+#include <stdio.h>
+
+
+#define bjam_out (stdout)
+#define bjam_err (stderr)
+
+static void out_( char const * data, FILE * const io )
+{
+ while ( *data )
+ {
+ size_t const len = strcspn( data, "\r" );
+ data += fwrite( data, 1, len, io );
+ if ( *data == '\r' ) ++data;
+ }
+}
+
+
+void out_action
+(
+ char const * const action,
+ char const * const target,
+ char const * const command,
+ char const * const out_data,
+ char const * const err_data,
+ int const exit_reason
+)
+{
+ /* Print out the action + target line, if the action is quiet the action
+ * should be null.
+ */
+ if ( action )
+ fprintf( bjam_out, "%s %s\n", action, target );
+
+ /* Print out the command executed if given -d+2. */
+ if ( DEBUG_EXEC )
+ {
+ fputs( command, bjam_out );
+ fputc( '\n', bjam_out );
+ }
+
+ /* Print out the command executed to the command stream. */
+ if ( globs.cmdout )
+ fputs( command, globs.cmdout );
+
+ /* If the process expired, make user aware with an explicit message, but do
+ * this only for non-quiet actions.
+ */
+ if ( exit_reason == EXIT_TIMEOUT && action )
+ fprintf( bjam_out, "%ld second time limit exceeded\n", globs.timeout );
+
+ /* Print out the command output, if requested, or if the program failed, but
+ * only output for non-quiet actions.
+ */
+ if ( action || exit_reason != EXIT_OK )
+ {
+ if ( out_data &&
+ ( ( globs.pipe_action & 1 /* STDOUT_FILENO */ ) ||
+ ( globs.pipe_action == 0 ) ) )
+ out_( out_data, bjam_out );
+ if ( err_data && ( globs.pipe_action & 2 /* STDERR_FILENO */ ) )
+ out_( err_data, bjam_err );
+ }
+
+ fflush( bjam_out );
+ fflush( bjam_err );
+ fflush( globs.cmdout );
+}
+
+
+OBJECT * outf_int( int const value )
+{
+ char buffer[ 50 ];
+ sprintf( buffer, "%i", value );
+ return object_new( buffer );
+}
+
+
+OBJECT * outf_double( double const value )
+{
+ char buffer[ 50 ];
+ sprintf( buffer, "%f", value );
+ return object_new( buffer );
+}
+
+
+OBJECT * outf_time( timestamp const * const time )
+{
+ return object_new( timestamp_str( time ) );
+}
diff --git a/src/kenlm/jam-files/engine/output.h b/src/kenlm/jam-files/engine/output.h
new file mode 100644
index 0000000..186e867
--- /dev/null
+++ b/src/kenlm/jam-files/engine/output.h
@@ -0,0 +1,30 @@
+/*
+ Copyright 2007 Rene Rivera
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#ifndef BJAM_OUTPUT_H
+#define BJAM_OUTPUT_H
+
+#include "object.h"
+#include "timestamp.h"
+
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_TIMEOUT 2
+
+void out_action(
+ char const * const action,
+ char const * const target,
+ char const * const command,
+ char const * const out_data,
+ char const * const err_data,
+ int const exit_reason
+);
+
+OBJECT * outf_int( int const value );
+OBJECT * outf_double( double const value );
+OBJECT * outf_time( timestamp const * const value );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/parse.c b/src/kenlm/jam-files/engine/parse.c
new file mode 100644
index 0000000..02412e0
--- /dev/null
+++ b/src/kenlm/jam-files/engine/parse.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "jam.h"
+#include "lists.h"
+#include "parse.h"
+#include "scan.h"
+#include "object.h"
+#include "modules.h"
+#include "frames.h"
+#include "function.h"
+
+/*
+ * parse.c - make and destroy parse trees as driven by the parser
+ *
+ * 09/07/00 (seiwald) - ref count on PARSE to avoid freeing when used,
+ * as per Matt Armstrong.
+ * 09/11/00 (seiwald) - structure reworked to reflect that (*func)()
+ * returns a LIST *.
+ */
+
+static PARSE * yypsave;
+
+void parse_file( OBJECT * f, FRAME * frame )
+{
+ /* Suspend scan of current file and push this new file in the stream. */
+ yyfparse( f );
+
+ /* Now parse each block of rules and execute it. Execute it outside of the
+ * parser so that recursive calls to yyrun() work (no recursive yyparse's).
+ */
+
+ for ( ; ; )
+ {
+ PARSE * p;
+ FUNCTION * func;
+
+ /* Filled by yyparse() calling parse_save(). */
+ yypsave = 0;
+
+ /* If parse error or empty parse, outta here. */
+ if ( yyparse() || !( p = yypsave ) )
+ break;
+
+ /* Run the parse tree. */
+ func = function_compile( p );
+ parse_free( p );
+ list_free( function_run( func, frame, stack_global() ) );
+ function_free( func );
+ }
+}
+
+
+void parse_save( PARSE * p )
+{
+ yypsave = p;
+}
+
+
+PARSE * parse_make(
+ int type,
+ PARSE * left,
+ PARSE * right,
+ PARSE * third,
+ OBJECT * string,
+ OBJECT * string1,
+ int num )
+{
+ PARSE * p = (PARSE *)BJAM_MALLOC( sizeof( PARSE ) );
+
+ p->type = type;
+ p->left = left;
+ p->right = right;
+ p->third = third;
+ p->string = string;
+ p->string1 = string1;
+ p->num = num;
+ p->refs = 1;
+ p->rulename = 0;
+
+ if ( left )
+ {
+ p->file = object_copy( left->file );
+ p->line = left->line;
+ }
+ else
+ {
+ yyinput_last_read_token( &p->file, &p->line );
+ p->file = object_copy( p->file );
+ }
+
+ return p;
+}
+
+
+void parse_refer( PARSE * p )
+{
+ ++p->refs;
+}
+
+
+void parse_free( PARSE * p )
+{
+ if ( --p->refs )
+ return;
+
+ if ( p->string )
+ object_free( p->string );
+ if ( p->string1 )
+ object_free( p->string1 );
+ if ( p->left )
+ parse_free( p->left );
+ if ( p->right )
+ parse_free( p->right );
+ if ( p->third )
+ parse_free( p->third );
+ if ( p->rulename )
+ object_free( p->rulename );
+ if ( p->file )
+ object_free( p->file );
+
+ BJAM_FREE( (char *)p );
+}
diff --git a/src/kenlm/jam-files/engine/parse.h b/src/kenlm/jam-files/engine/parse.h
new file mode 100644
index 0000000..bb47af6
--- /dev/null
+++ b/src/kenlm/jam-files/engine/parse.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * parse.h - make and destroy parse trees as driven by the parser.
+ */
+
+#ifndef PARSE_DWA20011020_H
+#define PARSE_DWA20011020_H
+
+#include "frames.h"
+#include "lists.h"
+#include "modules.h"
+
+
+#define PARSE_APPEND 0
+#define PARSE_FOREACH 1
+#define PARSE_IF 2
+#define PARSE_EVAL 3
+#define PARSE_INCLUDE 4
+#define PARSE_LIST 5
+#define PARSE_LOCAL 6
+#define PARSE_MODULE 7
+#define PARSE_CLASS 8
+#define PARSE_NULL 9
+#define PARSE_ON 10
+#define PARSE_RULE 11
+#define PARSE_RULES 12
+#define PARSE_SET 13
+#define PARSE_SETCOMP 14
+#define PARSE_SETEXEC 15
+#define PARSE_SETTINGS 16
+#define PARSE_SWITCH 17
+#define PARSE_WHILE 18
+
+
+/*
+ * Parse tree node.
+ */
+
+typedef struct _PARSE PARSE;
+
+struct _PARSE {
+ int type;
+ PARSE * left;
+ PARSE * right;
+ PARSE * third;
+ OBJECT * string;
+ OBJECT * string1;
+ int num;
+ int refs;
+ OBJECT * rulename;
+ OBJECT * file;
+ int line;
+};
+
+void parse_file( OBJECT *, FRAME * );
+void parse_save( PARSE * );
+
+PARSE * parse_make( int type, PARSE * left, PARSE * right, PARSE * third,
+ OBJECT * string, OBJECT * string1, int num );
+
+void parse_refer( PARSE * );
+void parse_free( PARSE * );
+LIST * parse_evaluate( PARSE *, FRAME * );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/patchlevel.h b/src/kenlm/jam-files/engine/patchlevel.h
new file mode 100644
index 0000000..60b0d61
--- /dev/null
+++ b/src/kenlm/jam-files/engine/patchlevel.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* Keep JAMVERSYM in sync with VERSION. */
+/* It can be accessed as $(JAMVERSION) in the Jamfile. */
+
+#define VERSION_MAJOR 2011
+#define VERSION_MINOR 12
+#define VERSION_PATCH 1
+#define VERSION_MAJOR_SYM "2011"
+#define VERSION_MINOR_SYM "12"
+#define VERSION_PATCH_SYM "01"
+#define VERSION "2011.12.1"
+#define JAMVERSYM "JAMVERSION=2011.12"
diff --git a/src/kenlm/jam-files/engine/pathnt.c b/src/kenlm/jam-files/engine/pathnt.c
new file mode 100644
index 0000000..8abf8fe
--- /dev/null
+++ b/src/kenlm/jam-files/engine/pathnt.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * pathnt.c - NT specific path manipulation support
+ */
+
+#include "pathsys.h"
+
+#include "hash.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include <assert.h>
+#include <stdlib.h>
+
+
+/* The definition of this in winnt.h is not ANSI-C compatible. */
+#undef INVALID_FILE_ATTRIBUTES
+#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
+
+
+typedef struct path_key_entry
+{
+ OBJECT * path;
+ OBJECT * key;
+ int exists;
+} path_key_entry;
+
+static struct hash * path_key_cache;
+
+
+/*
+ * path_get_process_id_()
+ */
+
+unsigned long path_get_process_id_( void )
+{
+ return GetCurrentProcessId();
+}
+
+
+/*
+ * path_get_temp_path_()
+ */
+
+void path_get_temp_path_( string * buffer )
+{
+ DWORD pathLength = GetTempPathA( 0, NULL );
+ string_reserve( buffer, pathLength );
+ pathLength = GetTempPathA( pathLength, buffer->value );
+ buffer->value[ pathLength - 1 ] = '\0';
+ buffer->size = pathLength - 1;
+}
+
+
+/*
+ * canonicWindowsPath() - convert a given path into its canonic/long format
+ *
+ * Appends the canonic path to the end of the given 'string' object.
+ *
+ * FIXME: This function is still work-in-progress as it originally did not
+ * necessarily return the canonic path format (could return slightly different
+ * results for certain equivalent path strings) and could accept paths pointing
+ * to non-existing file system entities as well.
+ *
+ * Caches results internally, automatically caching any parent paths it has to
+ * convert to their canonic format in the process.
+ *
+ * Prerequisites:
+ * - path given in normalized form, i.e. all of its folder separators have
+ * already been converted into '\\'
+ * - path_key_cache path/key mapping cache object already initialized
+ */
+
+static int canonicWindowsPath( char const * const path, int const path_length,
+ string * const out )
+{
+ char const * last_element;
+ unsigned long saved_size;
+ char const * p;
+ int missing_parent;
+
+ /* This is only called via path_key(), which initializes the cache. */
+ assert( path_key_cache );
+
+ if ( !path_length )
+ return 1;
+
+ if ( path_length == 1 && path[ 0 ] == '\\' )
+ {
+ string_push_back( out, '\\' );
+ return 1;
+ }
+
+ if ( path[ 1 ] == ':' &&
+ ( path_length == 2 ||
+ ( path_length == 3 && path[ 2 ] == '\\' ) ) )
+ {
+ string_push_back( out, toupper( path[ 0 ] ) );
+ string_push_back( out, ':' );
+ string_push_back( out, '\\' );
+ return 1;
+ }
+
+ /* Find last '\\'. */
+ for ( p = path + path_length - 1; p >= path && *p != '\\'; --p );
+ last_element = p + 1;
+
+ /* Special case '\' && 'D:\' - include trailing '\'. */
+ if ( p == path ||
+ p == path + 2 && path[ 1 ] == ':' )
+ ++p;
+
+ missing_parent = 0;
+
+ if ( p >= path )
+ {
+ char const * const dir = path;
+ int const dir_length = p - path;
+ OBJECT * const dir_obj = object_new_range( dir, dir_length );
+ int found;
+ path_key_entry * const result = (path_key_entry *)hash_insert(
+ path_key_cache, dir_obj, &found );
+ if ( !found )
+ {
+ result->path = dir_obj;
+ if ( canonicWindowsPath( dir, dir_length, out ) )
+ result->exists = 1;
+ else
+ result->exists = 0;
+ result->key = object_new( out->value );
+ }
+ else
+ {
+ object_free( dir_obj );
+ string_append( out, object_str( result->key ) );
+ }
+ if ( !result->exists )
+ missing_parent = 1;
+ }
+
+ if ( out->size && out->value[ out->size - 1 ] != '\\' )
+ string_push_back( out, '\\' );
+
+ saved_size = out->size;
+ string_append_range( out, last_element, path + path_length );
+
+ if ( !missing_parent )
+ {
+ char const * const n = last_element;
+ int const n_length = path + path_length - n;
+ if ( !( n_length == 1 && n[ 0 ] == '.' )
+ && !( n_length == 2 && n[ 0 ] == '.' && n[ 1 ] == '.' ) )
+ {
+ WIN32_FIND_DATA fd;
+ HANDLE const hf = FindFirstFileA( out->value, &fd );
+ if ( hf != INVALID_HANDLE_VALUE )
+ {
+ string_truncate( out, saved_size );
+ string_append( out, fd.cFileName );
+ FindClose( hf );
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * normalize_path() - 'normalizes' the given path for the path-key mapping
+ *
+ * The resulting string has nothing to do with 'normalized paths' as used in
+ * Boost Jam build scripts and the built-in NORMALIZE_PATH rule. It is intended
+ * to be used solely as an intermediate step when mapping an arbitrary path to
+ * its canonical representation.
+ *
+ * When choosing the intermediate string the important things are for it to be
+ * inexpensive to calculate and any two paths having different canonical
+ * representations also need to have different calculated intermediate string
+ * representations. Any implemented additional rules serve only to simplify
+ * constructing the canonical path representation from the calculated
+ * intermediate string.
+ *
+ * Implemented returned path rules:
+ * - use backslashes as path separators
+ * - lowercase only (since all Windows file systems are case insensitive)
+ * - trim trailing path separator except in case of a root path, i.e. 'X:\'
+ */
+
+static void normalize_path( string * path )
+{
+ char * s;
+ for ( s = path->value; s < path->value + path->size; ++s )
+ *s = *s == '/' ? '\\' : tolower( *s );
+ /* Strip trailing "/". */
+ if ( path->size && path->size != 3 && path->value[ path->size - 1 ] == '\\'
+ )
+ string_pop_back( path );
+}
+
+
+static path_key_entry * path_key( OBJECT * const path,
+ int const known_to_be_canonic )
+{
+ path_key_entry * result;
+ int found;
+
+ if ( !path_key_cache )
+ path_key_cache = hashinit( sizeof( path_key_entry ), "path to key" );
+
+ result = (path_key_entry *)hash_insert( path_key_cache, path, &found );
+ if ( !found )
+ {
+ OBJECT * normalized;
+ int normalized_size;
+ path_key_entry * nresult;
+ result->path = path;
+ {
+ string buf[ 1 ];
+ string_copy( buf, object_str( path ) );
+ normalize_path( buf );
+ normalized = object_new( buf->value );
+ normalized_size = buf->size;
+ string_free( buf );
+ }
+ nresult = (path_key_entry *)hash_insert( path_key_cache, normalized,
+ &found );
+ if ( !found || nresult == result )
+ {
+ nresult->path = normalized;
+ if ( known_to_be_canonic )
+ nresult->key = object_copy( path );
+ else
+ {
+ string canonic_path[ 1 ];
+ string_new( canonic_path );
+ if ( canonicWindowsPath( object_str( normalized ), normalized_size,
+ canonic_path ) )
+ nresult->exists = 1;
+ else
+ nresult->exists = 0;
+ nresult->key = object_new( canonic_path->value );
+ string_free( canonic_path );
+ }
+ }
+ else
+ object_free( normalized );
+ if ( nresult != result )
+ {
+ result->path = object_copy( path );
+ result->key = object_copy( nresult->key );
+ result->exists = nresult->exists;
+ }
+ }
+
+ return result;
+}
+
+
+void path_register_key( OBJECT * canonic_path )
+{
+ path_key( canonic_path, 1 );
+}
+
+
+OBJECT * path_as_key( OBJECT * path )
+{
+ return object_copy( path_key( path, 0 )->key );
+}
+
+
+static void free_path_key_entry( void * xentry, void * const data )
+{
+ path_key_entry * const entry = (path_key_entry *)xentry;
+ object_free( entry->path );
+ object_free( entry->key );
+}
+
+
+void path_done( void )
+{
+ if ( path_key_cache )
+ {
+ hashenumerate( path_key_cache, &free_path_key_entry, 0 );
+ hashdone( path_key_cache );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/pathsys.c b/src/kenlm/jam-files/engine/pathsys.c
new file mode 100644
index 0000000..ae4e6e0
--- /dev/null
+++ b/src/kenlm/jam-files/engine/pathsys.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * pathsys.c - platform independent path manipulation support
+ *
+ * External routines:
+ * path_build() - build a filename given dir/base/suffix/member
+ * path_parent() - make a PATHNAME point to its parent dir
+ * path_parse() - split a file name into dir/base/suffix/member
+ * path_tmpdir() - returns the system dependent temporary folder path
+ * path_tmpfile() - returns a new temporary path
+ * path_tmpnam() - returns a new temporary name
+ *
+ * File_parse() and path_build() just manipulate a string and a structure;
+ * they do not make system calls.
+ */
+
+#include "jam.h"
+#include "pathsys.h"
+
+#include "filesys.h"
+
+#include <stdlib.h>
+#include <time.h>
+
+
+/* Internal OS specific implementation details - have names ending with an
+ * underscore and are expected to be implemented in an OS specific pathXXX.c
+ * module.
+ */
+unsigned long path_get_process_id_( void );
+void path_get_temp_path_( string * buffer );
+
+
+/*
+ * path_parse() - split a file name into dir/base/suffix/member
+ */
+
+void path_parse( char const * file, PATHNAME * f )
+{
+ char const * p;
+ char const * q;
+ char const * end;
+
+ memset( (char *)f, 0, sizeof( *f ) );
+
+ /* Look for '<grist>'. */
+
+ if ( ( file[ 0 ] == '<' ) && ( p = strchr( file, '>' ) ) )
+ {
+ f->f_grist.ptr = file;
+ f->f_grist.len = p - file;
+ file = p + 1;
+ }
+
+ /* Look for 'dir/'. */
+
+ p = strrchr( file, '/' );
+
+#if PATH_DELIM == '\\'
+ /* On NT, look for dir\ as well */
+ {
+ char * const p1 = strrchr( p ? p + 1 : file, '\\' );
+ if ( p1 ) p = p1;
+ }
+#endif
+
+ if ( p )
+ {
+ f->f_dir.ptr = file;
+ f->f_dir.len = p - file;
+
+ /* Special case for / - dirname is /, not "" */
+ if ( !f->f_dir.len )
+ ++f->f_dir.len;
+
+#if PATH_DELIM == '\\'
+ /* Special case for D:/ - dirname is D:/, not "D:" */
+ if ( f->f_dir.len == 2 && file[ 1 ] == ':' )
+ ++f->f_dir.len;
+#endif
+
+ file = p + 1;
+ }
+
+ end = file + strlen( file );
+
+ /* Look for '(member)'. */
+ if ( ( p = strchr( file, '(' ) ) && ( end[ -1 ] == ')' ) )
+ {
+ f->f_member.ptr = p + 1;
+ f->f_member.len = end - p - 2;
+ end = p;
+ }
+
+ /* Look for '.suffix'. This would be memrchr(). */
+ p = 0;
+ for ( q = file; ( q = (char *)memchr( q, '.', end - q ) ); ++q )
+ p = q;
+ if ( p )
+ {
+ f->f_suffix.ptr = p;
+ f->f_suffix.len = end - p;
+ end = p;
+ }
+
+ /* Leaves base. */
+ f->f_base.ptr = file;
+ f->f_base.len = end - file;
+}
+
+
+/*
+ * is_path_delim() - true iff c is a path delimiter
+ */
+
+static int is_path_delim( char const c )
+{
+ return c == PATH_DELIM
+#if PATH_DELIM == '\\'
+ || c == '/'
+#endif
+ ;
+}
+
+
+/*
+ * as_path_delim() - convert c to a path delimiter if it is not one already
+ */
+
+static char as_path_delim( char const c )
+{
+ return is_path_delim( c ) ? c : PATH_DELIM;
+}
+
+
+/*
+ * path_build() - build a filename given dir/base/suffix/member
+ *
+ * To avoid changing slash direction on NT when reconstituting paths, instead of
+ * unconditionally appending PATH_DELIM we check the past-the-end character of
+ * the previous path element. If it is a path delimiter, we append that, and
+ * only append PATH_DELIM as a last resort. This heuristic is based on the fact
+ * that PATHNAME objects are usually the result of calling path_parse, which
+ * leaves the original slashes in the past-the-end position. Correctness depends
+ * on the assumption that all strings are zero terminated, so a past-the-end
+ * character will always be available.
+ *
+ * As an attendant patch, we had to ensure that backslashes are used explicitly
+ * in 'timestamp.c'.
+ */
+
+void path_build( PATHNAME * f, string * file )
+{
+ file_build1( f, file );
+
+ /* Do not prepend root if it is '.' or the directory is rooted. */
+ if ( f->f_root.len
+ && !( f->f_root.len == 1 && f->f_root.ptr[ 0 ] == '.' )
+ && !( f->f_dir.len && f->f_dir.ptr[ 0 ] == '/' )
+#if PATH_DELIM == '\\'
+ && !( f->f_dir.len && f->f_dir.ptr[ 0 ] == '\\' )
+ && !( f->f_dir.len && f->f_dir.ptr[ 1 ] == ':' )
+#endif
+ )
+ {
+ string_append_range( file, f->f_root.ptr, f->f_root.ptr + f->f_root.len
+ );
+ /* If 'root' already ends with a path delimeter, do not add another one.
+ */
+ if ( !is_path_delim( f->f_root.ptr[ f->f_root.len - 1 ] ) )
+ string_push_back( file, as_path_delim( f->f_root.ptr[ f->f_root.len
+ ] ) );
+ }
+
+ if ( f->f_dir.len )
+ string_append_range( file, f->f_dir.ptr, f->f_dir.ptr + f->f_dir.len );
+
+ /* Put path separator between dir and file. */
+ /* Special case for root dir: do not add another path separator. */
+ if ( f->f_dir.len && ( f->f_base.len || f->f_suffix.len )
+#if PATH_DELIM == '\\'
+ && !( f->f_dir.len == 3 && f->f_dir.ptr[ 1 ] == ':' )
+#endif
+ && !( f->f_dir.len == 1 && is_path_delim( f->f_dir.ptr[ 0 ] ) ) )
+ string_push_back( file, as_path_delim( f->f_dir.ptr[ f->f_dir.len ] ) );
+
+ if ( f->f_base.len )
+ string_append_range( file, f->f_base.ptr, f->f_base.ptr + f->f_base.len
+ );
+
+ if ( f->f_suffix.len )
+ string_append_range( file, f->f_suffix.ptr, f->f_suffix.ptr +
+ f->f_suffix.len );
+
+ if ( f->f_member.len )
+ {
+ string_push_back( file, '(' );
+ string_append_range( file, f->f_member.ptr, f->f_member.ptr +
+ f->f_member.len );
+ string_push_back( file, ')' );
+ }
+}
+
+
+/*
+ * path_parent() - make a PATHNAME point to its parent dir
+ */
+
+void path_parent( PATHNAME * f )
+{
+ f->f_base.ptr = f->f_suffix.ptr = f->f_member.ptr = "";
+ f->f_base.len = f->f_suffix.len = f->f_member.len = 0;
+}
+
+
+/*
+ * path_tmpdir() - returns the system dependent temporary folder path
+ *
+ * Returned value is stored inside a static buffer and should not be modified.
+ * Returned value does *not* include a trailing path separator.
+ */
+
+string const * path_tmpdir()
+{
+ static string buffer[ 1 ];
+ static int have_result;
+ if ( !have_result )
+ {
+ string_new( buffer );
+ path_get_temp_path_( buffer );
+ have_result = 1;
+ }
+ return buffer;
+}
+
+
+/*
+ * path_tmpnam() - returns a new temporary name
+ */
+
+OBJECT * path_tmpnam( void )
+{
+ char name_buffer[ 64 ];
+ unsigned long const pid = path_get_process_id_();
+ static unsigned long t;
+ if ( !t ) t = time( 0 ) & 0xffff;
+ t += 1;
+ sprintf( name_buffer, "jam%lx%lx.000", pid, t );
+ return object_new( name_buffer );
+}
+
+
+/*
+ * path_tmpfile() - returns a new temporary path
+ */
+
+OBJECT * path_tmpfile( void )
+{
+ OBJECT * result;
+ OBJECT * tmpnam;
+
+ string file_path[ 1 ];
+ string_copy( file_path, path_tmpdir()->value );
+ string_push_back( file_path, PATH_DELIM );
+ tmpnam = path_tmpnam();
+ string_append( file_path, object_str( tmpnam ) );
+ object_free( tmpnam );
+ result = object_new( file_path->value );
+ string_free( file_path );
+
+ return result;
+}
diff --git a/src/kenlm/jam-files/engine/pathsys.h b/src/kenlm/jam-files/engine/pathsys.h
new file mode 100644
index 0000000..9b7a4ca
--- /dev/null
+++ b/src/kenlm/jam-files/engine/pathsys.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * pathsys.h - PATHNAME struct
+ */
+
+/*
+ * PATHNAME - a name of a file, broken into <grist>dir/base/suffix(member)
+ *
+ * <grist> - salt to distinguish between targets that would otherwise have the
+ * same name - it never appears in the bound name of a target.
+ *
+ * (member) - archive member name: the syntax is arbitrary, but must agree in
+ * path_parse(), path_build() and the Jambase.
+ */
+
+#ifndef PATHSYS_VP_20020211_H
+#define PATHSYS_VP_20020211_H
+
+#include "object.h"
+#include "strings.h"
+
+
+typedef struct _pathpart
+{
+ char const * ptr;
+ int len;
+} PATHPART;
+
+typedef struct _pathname
+{
+ PATHPART part[ 6 ];
+
+#define f_grist part[ 0 ]
+#define f_root part[ 1 ]
+#define f_dir part[ 2 ]
+#define f_base part[ 3 ]
+#define f_suffix part[ 4 ]
+#define f_member part[ 5 ]
+} PATHNAME;
+
+
+void path_build( PATHNAME *, string * file );
+void path_parse( char const * file, PATHNAME * );
+void path_parent( PATHNAME * );
+
+/* Given a path, returns an object containing an equivalent path in canonical
+ * format that can be used as a unique key for that path. Equivalent paths such
+ * as a/b, A\B, and a\B on NT all yield the same key.
+ */
+OBJECT * path_as_key( OBJECT * path );
+
+/* Called as an optimization when we know we have a path that is already in its
+ * canonical/long/key form. Avoids the need for some subsequent path_as_key()
+ * call to do a potentially expensive path conversion requiring access to the
+ * actual underlying file system.
+ */
+void path_register_key( OBJECT * canonic_path );
+
+/* Returns a static pointer to the system dependent path to the temporary
+ * directory. NOTE: Does *not* include a trailing path separator.
+ */
+string const * path_tmpdir( void );
+
+/* Returns a new temporary name. */
+OBJECT * path_tmpnam( void );
+
+/* Returns a new temporary path. */
+OBJECT * path_tmpfile( void );
+
+/* Give the first argument to 'main', return a full path to our executable.
+ * Returns null in the unlikely case it cannot be determined. Caller is
+ * responsible for freeing the string.
+ *
+ * Implemented in jam.c
+ */
+char * executable_path( char const * argv0 );
+
+void path_done( void );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/pathunix.c b/src/kenlm/jam-files/engine/pathunix.c
new file mode 100644
index 0000000..8ca0d18
--- /dev/null
+++ b/src/kenlm/jam-files/engine/pathunix.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * pathunix.c - UNIX specific path manipulation support
+ */
+
+#include "pathsys.h"
+
+#include <stdlib.h>
+#include <unistd.h> /* needed for getpid() */
+
+
+/*
+ * path_get_process_id_()
+ */
+
+unsigned long path_get_process_id_( void )
+{
+ return getpid();
+}
+
+
+/*
+ * path_get_temp_path_()
+ */
+
+void path_get_temp_path_( string * buffer )
+{
+ char const * t = getenv( "TMPDIR" );
+ string_append( buffer, t ? t : "/tmp" );
+}
+
+
+/*
+ * path_register_key()
+ */
+
+void path_register_key( OBJECT * path )
+{
+}
+
+
+/*
+ * path_as_key()
+ */
+
+OBJECT * path_as_key( OBJECT * path )
+{
+ return object_copy( path );
+}
+
+
+/*
+ * path_done()
+ */
+
+void path_done( void )
+{
+}
diff --git a/src/kenlm/jam-files/engine/regexp.c b/src/kenlm/jam-files/engine/regexp.c
new file mode 100644
index 0000000..c64201b
--- /dev/null
+++ b/src/kenlm/jam-files/engine/regexp.c
@@ -0,0 +1,1329 @@
+/*
+ * regcomp and regexec -- regsub and regerror are elsewhere
+ *
+ * Copyright (c) 1986 by University of Toronto.
+ * Written by Henry Spencer. Not derived from licensed software.
+ *
+ * Permission is granted to anyone to use this software for any
+ * purpose on any computer system, and to redistribute it freely,
+ * subject to the following restrictions:
+ *
+ * 1. The author is not responsible for the consequences of use of
+ * this software, no matter how awful, even if they arise
+ * from defects in it.
+ *
+ * 2. The origin of this software must not be misrepresented, either
+ * by explicit claim or by omission.
+ *
+ * 3. Altered versions must be plainly marked as such, and must not
+ * be misrepresented as being the original software.
+ *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
+ *** hoptoad!gnu, on 27 Dec 1986, to add \n as an alternative to |
+ *** to assist in implementing egrep.
+ *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
+ *** hoptoad!gnu, on 27 Dec 1986, to add \< and \> for word-matching
+ *** as in BSD grep and ex.
+ *** THIS IS AN ALTERED VERSION. It was altered by John Gilmore,
+ *** hoptoad!gnu, on 28 Dec 1986, to optimize characters quoted with \.
+ *** THIS IS AN ALTERED VERSION. It was altered by James A. Woods,
+ *** ames!jaw, on 19 June 1987, to quash a regcomp() redundancy.
+ *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
+ *** seiwald@vix.com, on 28 August 1993, for use in jam. Regmagic.h
+ *** was moved into regexp.h, and the include of regexp.h now uses "'s
+ *** to avoid conflicting with the system regexp.h. Const, bless its
+ *** soul, was removed so it can compile everywhere. The declaration
+ *** of strchr() was in conflict on AIX, so it was removed (as it is
+ *** happily defined in string.h).
+ *** THIS IS AN ALTERED VERSION. It was altered by Christopher Seiwald
+ *** seiwald@perforce.com, on 20 January 2000, to use function prototypes.
+ *
+ * Beware that some of this code is subtly aware of the way operator precedence
+ * is structured in regular expressions. Serious changes in regular-expression
+ * syntax might require a total rethink.
+ */
+
+
+#include "jam.h"
+#include "regexp.h"
+
+#include <stdio.h>
+#include <ctype.h>
+#ifndef ultrix
+# include <stdlib.h>
+#endif
+#include <string.h>
+
+
+/*
+ * The "internal use only" fields in regexp.h are present to pass info from
+ * compile to execute that permits the execute phase to run lots faster on
+ * simple cases. They are:
+ :
+ * regstart char that must begin a match; '\0' if none obvious.
+ * reganch is the match anchored (at beginning-of-line only)?
+ * regmust string (pointer into program) that match must include, or NULL.
+ * regmlen length of regmust string.
+ *
+ * Regstart and reganch permit very fast decisions on suitable starting points
+ * for a match, cutting down the work a lot. Regmust permits fast rejection of
+ * lines that cannot possibly match. The regmust tests are costly enough that
+ * regcomp() supplies a regmust only if the r.e. contains something potentially
+ * expensive (at present, the only such thing detected is * or + at the start of
+ * the r.e., which can involve a lot of backup). Regmlen is supplied because the
+ * test in regexec() needs it and regcomp() is computing it anyway.
+ */
+
+/*
+ * Structure for regexp "program". This is essentially a linear encoding of a
+ * nondeterministic finite-state machine (aka syntax charts or "railroad normal
+ * form" in parsing technology). Each node is an opcode plus a "next" pointer,
+ * possibly plus an operand. "Next" pointers of all nodes except BRANCH
+ * implement concatenation; a "next" pointer with a BRANCH on both ends of it is
+ * connecting two alternatives. [Here we have one of the subtle syntax
+ * dependencies: an individual BRANCH, as opposed to a collection of them, is
+ * never concatenated with anything because of operator precedence.] The operand
+ * of some types of node is a literal string; for others, it is a node leading
+ * into a sub-FSM. In particular, the operand of a BRANCH node is the first node
+ * of the branch. [NB this is *not* a tree structure: the tail of the branch
+ * connects to the thing following the set of BRANCHes.] The opcodes are:
+ */
+
+/* definition number opnd? meaning */
+#define END 0 /* no End of program. */
+#define BOL 1 /* no Match "" at beginning of line. */
+#define EOL 2 /* no Match "" at end of line. */
+#define ANY 3 /* no Match any one character. */
+#define ANYOF 4 /* str Match any character in this string. */
+#define ANYBUT 5 /* str Match any character not in this string. */
+#define BRANCH 6 /* node Match this alternative, or the next... */
+#define BACK 7 /* no Match "", "next" ptr points backward. */
+#define EXACTLY 8 /* str Match this string. */
+#define NOTHING 9 /* no Match empty string. */
+#define STAR 10 /* node Match this (simple) thing 0 or more times. */
+#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
+#define WORDA 12 /* no Match "" at wordchar, where prev is nonword */
+#define WORDZ 13 /* no Match "" at nonwordchar, where prev is word */
+#define OPEN 20 /* no Mark this point in input as start of #n. */
+ /* OPEN+1 is number 1, etc. */
+#define CLOSE 30 /* no Analogous to OPEN. */
+
+
+/*
+ * Opcode notes:
+ *
+ * BRANCH The set of branches constituting a single choice are hooked
+ * together with their "next" pointers, since precedence prevents
+ * anything being concatenated to any individual branch. The
+ * "next" pointer of the last BRANCH in a choice points to the
+ * thing following the whole choice. This is also where the
+ * final "next" pointer of each individual branch points; each
+ * branch starts with the operand node of a BRANCH node.
+ *
+ * BACK Normal "next" pointers all implicitly point forward; BACK
+ * exists to make loop structures possible.
+ *
+ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
+ * BRANCH structures using BACK. Simple cases (one character
+ * per match) are implemented with STAR and PLUS for speed
+ * and to minimize recursive plunges.
+ *
+ * OPEN,CLOSE ...are numbered at compile time.
+ */
+
+/*
+ * A node is one char of opcode followed by two chars of "next" pointer.
+ * "Next" pointers are stored as two 8-bit pieces, high order first. The
+ * value is a positive offset from the opcode of the node containing it.
+ * An operand, if any, simply follows the node. (Note that much of the
+ * code generation knows about this implicit relationship.)
+ *
+ * Using two bytes for the "next" pointer is vast overkill for most things,
+ * but allows patterns to get big without disasters.
+ */
+#define OP(p) (*(p))
+#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
+#define OPERAND(p) ((p) + 3)
+
+/*
+ * See regmagic.h for one further detail of program structure.
+ */
+
+
+/*
+ * Utility definitions.
+ */
+#ifndef CHARBITS
+#define UCHARAT(p) ((int)*(const unsigned char *)(p))
+#else
+#define UCHARAT(p) ((int)*(p)&CHARBITS)
+#endif
+
+#define FAIL(m) { regerror(m); return(NULL); }
+#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
+
+/*
+ * Flags to be passed up and down.
+ */
+#define HASWIDTH 01 /* Known never to match null string. */
+#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
+#define SPSTART 04 /* Starts with * or +. */
+#define WORST 0 /* Worst case. */
+
+/*
+ * Global work variables for regcomp().
+ */
+static char *regparse; /* Input-scan pointer. */
+static int regnpar; /* () count. */
+static char regdummy;
+static char *regcode; /* Code-emit pointer; ®dummy = don't. */
+static long regsize; /* Code size. */
+
+/*
+ * Forward declarations for regcomp()'s friends.
+ */
+#ifndef STATIC
+#define STATIC static
+#endif
+STATIC char *reg( int paren, int *flagp );
+STATIC char *regbranch( int *flagp );
+STATIC char *regpiece( int *flagp );
+STATIC char *regatom( int *flagp );
+STATIC char *regnode( int op );
+STATIC char *regnext( register char *p );
+STATIC void regc( int b );
+STATIC void reginsert( char op, char *opnd );
+STATIC void regtail( char *p, char *val );
+STATIC void regoptail( char *p, char *val );
+#ifdef STRCSPN
+STATIC int strcspn();
+#endif
+
+/*
+ - regcomp - compile a regular expression into internal code
+ *
+ * We can't allocate space until we know how big the compiled form will be,
+ * but we can't compile it (and thus know how big it is) until we've got a
+ * place to put the code. So we cheat: we compile it twice, once with code
+ * generation turned off and size counting turned on, and once "for real".
+ * This also means that we don't allocate space until we are sure that the
+ * thing really will compile successfully, and we never have to move the
+ * code and thus invalidate pointers into it. (Note that it has to be in
+ * one piece because free() must be able to free it all.)
+ *
+ * Beware that the optimization-preparation code in here knows about some
+ * of the structure of the compiled regexp.
+ */
+regexp *
+regcomp( const char *exp )
+{
+ register regexp *r;
+ register char *scan;
+ register char *longest;
+ register unsigned len;
+ int flags;
+
+ if (exp == NULL)
+ FAIL("NULL argument");
+
+ /* First pass: determine size, legality. */
+#ifdef notdef
+ if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */
+#endif
+ regparse = (char *)exp;
+ regnpar = 1;
+ regsize = 0L;
+ regcode = ®dummy;
+ regc(MAGIC);
+ if (reg(0, &flags) == NULL)
+ return(NULL);
+
+ /* Small enough for pointer-storage convention? */
+ if (regsize >= 32767L) /* Probably could be 65535L. */
+ FAIL("regexp too big");
+
+ /* Allocate space. */
+ r = (regexp *)BJAM_MALLOC(sizeof(regexp) + (unsigned)regsize);
+ if (r == NULL)
+ FAIL("out of space");
+
+ /* Second pass: emit code. */
+ regparse = (char *)exp;
+ regnpar = 1;
+ regcode = r->program;
+ regc(MAGIC);
+ if (reg(0, &flags) == NULL)
+ return(NULL);
+
+ /* Dig out information for optimizations. */
+ r->regstart = '\0'; /* Worst-case defaults. */
+ r->reganch = 0;
+ r->regmust = NULL;
+ r->regmlen = 0;
+ scan = r->program+1; /* First BRANCH. */
+ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
+ scan = OPERAND(scan);
+
+ /* Starting-point info. */
+ if (OP(scan) == EXACTLY)
+ r->regstart = *OPERAND(scan);
+ else if (OP(scan) == BOL)
+ r->reganch++;
+
+ /*
+ * If there's something expensive in the r.e., find the
+ * longest literal string that must appear and make it the
+ * regmust. Resolve ties in favor of later strings, since
+ * the regstart check works with the beginning of the r.e.
+ * and avoiding duplication strengthens checking. Not a
+ * strong reason, but sufficient in the absence of others.
+ */
+ if (flags&SPSTART) {
+ longest = NULL;
+ len = 0;
+ for (; scan != NULL; scan = regnext(scan))
+ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
+ longest = OPERAND(scan);
+ len = strlen(OPERAND(scan));
+ }
+ r->regmust = longest;
+ r->regmlen = len;
+ }
+ }
+
+ return(r);
+}
+
+/*
+ - reg - regular expression, i.e. main body or parenthesized thing
+ *
+ * Caller must absorb opening parenthesis.
+ *
+ * Combining parenthesis handling with the base level of regular expression
+ * is a trifle forced, but the need to tie the tails of the branches to what
+ * follows makes it hard to avoid.
+ */
+static char *
+reg(
+ int paren, /* Parenthesized? */
+ int *flagp )
+{
+ register char *ret;
+ register char *br;
+ register char *ender;
+ register int parno = 0;
+ int flags;
+
+ *flagp = HASWIDTH; /* Tentatively. */
+
+ /* Make an OPEN node, if parenthesized. */
+ if (paren) {
+ if (regnpar >= NSUBEXP)
+ FAIL("too many ()");
+ parno = regnpar;
+ regnpar++;
+ ret = regnode(OPEN+parno);
+ } else
+ ret = NULL;
+
+ /* Pick up the branches, linking them together. */
+ br = regbranch(&flags);
+ if (br == NULL)
+ return(NULL);
+ if (ret != NULL)
+ regtail(ret, br); /* OPEN -> first. */
+ else
+ ret = br;
+ if (!(flags&HASWIDTH))
+ *flagp &= ~HASWIDTH;
+ *flagp |= flags&SPSTART;
+ while (*regparse == '|' || *regparse == '\n') {
+ regparse++;
+ br = regbranch(&flags);
+ if (br == NULL)
+ return(NULL);
+ regtail(ret, br); /* BRANCH -> BRANCH. */
+ if (!(flags&HASWIDTH))
+ *flagp &= ~HASWIDTH;
+ *flagp |= flags&SPSTART;
+ }
+
+ /* Make a closing node, and hook it on the end. */
+ ender = regnode((paren) ? CLOSE+parno : END);
+ regtail(ret, ender);
+
+ /* Hook the tails of the branches to the closing node. */
+ for (br = ret; br != NULL; br = regnext(br))
+ regoptail(br, ender);
+
+ /* Check for proper termination. */
+ if (paren && *regparse++ != ')') {
+ FAIL("unmatched ()");
+ } else if (!paren && *regparse != '\0') {
+ if (*regparse == ')') {
+ FAIL("unmatched ()");
+ } else
+ FAIL("junk on end"); /* "Can't happen". */
+ /* NOTREACHED */
+ }
+
+ return(ret);
+}
+
+/*
+ - regbranch - one alternative of an | operator
+ *
+ * Implements the concatenation operator.
+ */
+static char *
+regbranch( int *flagp )
+{
+ register char *ret;
+ register char *chain;
+ register char *latest;
+ int flags;
+
+ *flagp = WORST; /* Tentatively. */
+
+ ret = regnode(BRANCH);
+ chain = NULL;
+ while (*regparse != '\0' && *regparse != ')' &&
+ *regparse != '\n' && *regparse != '|') {
+ latest = regpiece(&flags);
+ if (latest == NULL)
+ return(NULL);
+ *flagp |= flags&HASWIDTH;
+ if (chain == NULL) /* First piece. */
+ *flagp |= flags&SPSTART;
+ else
+ regtail(chain, latest);
+ chain = latest;
+ }
+ if (chain == NULL) /* Loop ran zero times. */
+ (void) regnode(NOTHING);
+
+ return(ret);
+}
+
+/*
+ - regpiece - something followed by possible [*+?]
+ *
+ * Note that the branching code sequences used for ? and the general cases
+ * of * and + are somewhat optimized: they use the same NOTHING node as
+ * both the endmarker for their branch list and the body of the last branch.
+ * It might seem that this node could be dispensed with entirely, but the
+ * endmarker role is not redundant.
+ */
+static char *
+regpiece( int *flagp )
+{
+ register char *ret;
+ register char op;
+ register char *next;
+ int flags;
+
+ ret = regatom(&flags);
+ if (ret == NULL)
+ return(NULL);
+
+ op = *regparse;
+ if (!ISMULT(op)) {
+ *flagp = flags;
+ return(ret);
+ }
+
+ if (!(flags&HASWIDTH) && op != '?')
+ FAIL("*+ operand could be empty");
+ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
+
+ if (op == '*' && (flags&SIMPLE))
+ reginsert(STAR, ret);
+ else if (op == '*') {
+ /* Emit x* as (x&|), where & means "self". */
+ reginsert(BRANCH, ret); /* Either x */
+ regoptail(ret, regnode(BACK)); /* and loop */
+ regoptail(ret, ret); /* back */
+ regtail(ret, regnode(BRANCH)); /* or */
+ regtail(ret, regnode(NOTHING)); /* null. */
+ } else if (op == '+' && (flags&SIMPLE))
+ reginsert(PLUS, ret);
+ else if (op == '+') {
+ /* Emit x+ as x(&|), where & means "self". */
+ next = regnode(BRANCH); /* Either */
+ regtail(ret, next);
+ regtail(regnode(BACK), ret); /* loop back */
+ regtail(next, regnode(BRANCH)); /* or */
+ regtail(ret, regnode(NOTHING)); /* null. */
+ } else if (op == '?') {
+ /* Emit x? as (x|) */
+ reginsert(BRANCH, ret); /* Either x */
+ regtail(ret, regnode(BRANCH)); /* or */
+ next = regnode(NOTHING); /* null. */
+ regtail(ret, next);
+ regoptail(ret, next);
+ }
+ regparse++;
+ if (ISMULT(*regparse))
+ FAIL("nested *?+");
+
+ return(ret);
+}
+
+/*
+ - regatom - the lowest level
+ *
+ * Optimization: gobbles an entire sequence of ordinary characters so that
+ * it can turn them into a single node, which is smaller to store and
+ * faster to run. Backslashed characters are exceptions, each becoming a
+ * separate node; the code is simpler that way and it's not worth fixing.
+ */
+static char *
+regatom( int *flagp )
+{
+ register char *ret;
+ int flags;
+
+ *flagp = WORST; /* Tentatively. */
+
+ switch (*regparse++) {
+ /* FIXME: these chars only have meaning at beg/end of pat? */
+ case '^':
+ ret = regnode(BOL);
+ break;
+ case '$':
+ ret = regnode(EOL);
+ break;
+ case '.':
+ ret = regnode(ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ break;
+ case '[': {
+ register int classr;
+ register int classend;
+
+ if (*regparse == '^') { /* Complement of range. */
+ ret = regnode(ANYBUT);
+ regparse++;
+ } else
+ ret = regnode(ANYOF);
+ if (*regparse == ']' || *regparse == '-')
+ regc(*regparse++);
+ while (*regparse != '\0' && *regparse != ']') {
+ if (*regparse == '-') {
+ regparse++;
+ if (*regparse == ']' || *regparse == '\0')
+ regc('-');
+ else {
+ classr = UCHARAT(regparse-2)+1;
+ classend = UCHARAT(regparse);
+ if (classr > classend+1)
+ FAIL("invalid [] range");
+ for (; classr <= classend; classr++)
+ regc(classr);
+ regparse++;
+ }
+ } else
+ regc(*regparse++);
+ }
+ regc('\0');
+ if (*regparse != ']')
+ FAIL("unmatched []");
+ regparse++;
+ *flagp |= HASWIDTH|SIMPLE;
+ }
+ break;
+ case '(':
+ ret = reg(1, &flags);
+ if (ret == NULL)
+ return(NULL);
+ *flagp |= flags&(HASWIDTH|SPSTART);
+ break;
+ case '\0':
+ case '|':
+ case '\n':
+ case ')':
+ FAIL("internal urp"); /* Supposed to be caught earlier. */
+ break;
+ case '?':
+ case '+':
+ case '*':
+ FAIL("?+* follows nothing");
+ break;
+ case '\\':
+ switch (*regparse++) {
+ case '\0':
+ FAIL("trailing \\");
+ break;
+ case '<':
+ ret = regnode(WORDA);
+ break;
+ case '>':
+ ret = regnode(WORDZ);
+ break;
+ /* FIXME: Someday handle \1, \2, ... */
+ default:
+ /* Handle general quoted chars in exact-match routine */
+ goto de_fault;
+ }
+ break;
+ de_fault:
+ default:
+ /*
+ * Encode a string of characters to be matched exactly.
+ *
+ * This is a bit tricky due to quoted chars and due to
+ * '*', '+', and '?' taking the SINGLE char previous
+ * as their operand.
+ *
+ * On entry, the char at regparse[-1] is going to go
+ * into the string, no matter what it is. (It could be
+ * following a \ if we are entered from the '\' case.)
+ *
+ * Basic idea is to pick up a good char in ch and
+ * examine the next char. If it's *+? then we twiddle.
+ * If it's \ then we frozzle. If it's other magic char
+ * we push ch and terminate the string. If none of the
+ * above, we push ch on the string and go around again.
+ *
+ * regprev is used to remember where "the current char"
+ * starts in the string, if due to a *+? we need to back
+ * up and put the current char in a separate, 1-char, string.
+ * When regprev is NULL, ch is the only char in the
+ * string; this is used in *+? handling, and in setting
+ * flags |= SIMPLE at the end.
+ */
+ {
+ char *regprev;
+ register char ch;
+
+ regparse--; /* Look at cur char */
+ ret = regnode(EXACTLY);
+ for ( regprev = 0 ; ; ) {
+ ch = *regparse++; /* Get current char */
+ switch (*regparse) { /* look at next one */
+
+ default:
+ regc(ch); /* Add cur to string */
+ break;
+
+ case '.': case '[': case '(':
+ case ')': case '|': case '\n':
+ case '$': case '^':
+ case '\0':
+ /* FIXME, $ and ^ should not always be magic */
+ magic:
+ regc(ch); /* dump cur char */
+ goto done; /* and we are done */
+
+ case '?': case '+': case '*':
+ if (!regprev) /* If just ch in str, */
+ goto magic; /* use it */
+ /* End mult-char string one early */
+ regparse = regprev; /* Back up parse */
+ goto done;
+
+ case '\\':
+ regc(ch); /* Cur char OK */
+ switch (regparse[1]){ /* Look after \ */
+ case '\0':
+ case '<':
+ case '>':
+ /* FIXME: Someday handle \1, \2, ... */
+ goto done; /* Not quoted */
+ default:
+ /* Backup point is \, scan * point is after it. */
+ regprev = regparse;
+ regparse++;
+ continue; /* NOT break; */
+ }
+ }
+ regprev = regparse; /* Set backup point */
+ }
+ done:
+ regc('\0');
+ *flagp |= HASWIDTH;
+ if (!regprev) /* One char? */
+ *flagp |= SIMPLE;
+ }
+ break;
+ }
+
+ return(ret);
+}
+
+/*
+ - regnode - emit a node
+ */
+static char * /* Location. */
+regnode( int op )
+{
+ register char *ret;
+ register char *ptr;
+
+ ret = regcode;
+ if (ret == ®dummy) {
+ regsize += 3;
+ return(ret);
+ }
+
+ ptr = ret;
+ *ptr++ = op;
+ *ptr++ = '\0'; /* Null "next" pointer. */
+ *ptr++ = '\0';
+ regcode = ptr;
+
+ return(ret);
+}
+
+/*
+ - regc - emit (if appropriate) a byte of code
+ */
+static void
+regc( int b )
+{
+ if (regcode != ®dummy)
+ *regcode++ = b;
+ else
+ regsize++;
+}
+
+/*
+ - reginsert - insert an operator in front of already-emitted operand
+ *
+ * Means relocating the operand.
+ */
+static void
+reginsert(
+ char op,
+ char *opnd )
+{
+ register char *src;
+ register char *dst;
+ register char *place;
+
+ if (regcode == ®dummy) {
+ regsize += 3;
+ return;
+ }
+
+ src = regcode;
+ regcode += 3;
+ dst = regcode;
+ while (src > opnd)
+ *--dst = *--src;
+
+ place = opnd; /* Op node, where operand used to be. */
+ *place++ = op;
+ *place++ = '\0';
+ *place++ = '\0';
+}
+
+/*
+ - regtail - set the next-pointer at the end of a node chain
+ */
+static void
+regtail(
+ char *p,
+ char *val )
+{
+ register char *scan;
+ register char *temp;
+ register int offset;
+
+ if (p == ®dummy)
+ return;
+
+ /* Find last node. */
+ scan = p;
+ for (;;) {
+ temp = regnext(scan);
+ if (temp == NULL)
+ break;
+ scan = temp;
+ }
+
+ if (OP(scan) == BACK)
+ offset = scan - val;
+ else
+ offset = val - scan;
+ *(scan+1) = (offset>>8)&0377;
+ *(scan+2) = offset&0377;
+}
+
+/*
+ - regoptail - regtail on operand of first argument; nop if operandless
+ */
+
+static void
+regoptail(
+ char *p,
+ char *val )
+{
+ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
+ if (p == NULL || p == ®dummy || OP(p) != BRANCH)
+ return;
+ regtail(OPERAND(p), val);
+}
+
+/*
+ * regexec and friends
+ */
+
+/*
+ * Global work variables for regexec().
+ */
+static const char *reginput; /* String-input pointer. */
+static const char *regbol; /* Beginning of input, for ^ check. */
+static const char **regstartp; /* Pointer to startp array. */
+static const char **regendp; /* Ditto for endp. */
+
+/*
+ * Forwards.
+ */
+STATIC int regtry( regexp *prog, const char *string );
+STATIC int regmatch( char *prog );
+STATIC int regrepeat( char *p );
+
+#ifdef DEBUG
+int regnarrate = 0;
+void regdump();
+STATIC char *regprop();
+#endif
+
+/*
+ - regexec - match a regexp against a string
+ */
+int
+regexec(
+ register regexp *prog,
+ register const char *string )
+{
+ register char *s;
+
+ /* Be paranoid... */
+ if (prog == NULL || string == NULL) {
+ regerror("NULL parameter");
+ return(0);
+ }
+
+ /* Check validity of program. */
+ if (UCHARAT(prog->program) != MAGIC) {
+ regerror("corrupted program");
+ return(0);
+ }
+
+ /* If there is a "must appear" string, look for it. */
+ if ( prog->regmust != NULL )
+ {
+ s = (char *)string;
+ while ( ( s = strchr( s, prog->regmust[ 0 ] ) ) != NULL )
+ {
+ if ( !strncmp( s, prog->regmust, prog->regmlen ) )
+ break; /* Found it. */
+ ++s;
+ }
+ if ( s == NULL ) /* Not present. */
+ return 0;
+ }
+
+ /* Mark beginning of line for ^ . */
+ regbol = (char *)string;
+
+ /* Simplest case: anchored match need be tried only once. */
+ if ( prog->reganch )
+ return regtry( prog, string );
+
+ /* Messy cases: unanchored match. */
+ s = (char *)string;
+ if (prog->regstart != '\0')
+ /* We know what char it must start with. */
+ while ((s = strchr(s, prog->regstart)) != NULL) {
+ if (regtry(prog, s))
+ return(1);
+ s++;
+ }
+ else
+ /* We do not -- general case. */
+ do {
+ if ( regtry( prog, s ) )
+ return( 1 );
+ } while ( *s++ != '\0' );
+
+ /* Failure. */
+ return 0;
+}
+
+
+/*
+ * regtry() - try match at specific point.
+ */
+
+static int /* 0 failure, 1 success */
+regtry(
+ regexp *prog,
+ const char *string )
+{
+ register int i;
+ register const char * * sp;
+ register const char * * ep;
+
+ reginput = string;
+ regstartp = prog->startp;
+ regendp = prog->endp;
+
+ sp = prog->startp;
+ ep = prog->endp;
+ for ( i = NSUBEXP; i > 0; --i )
+ {
+ *sp++ = NULL;
+ *ep++ = NULL;
+ }
+ if ( regmatch( prog->program + 1 ) )
+ {
+ prog->startp[ 0 ] = string;
+ prog->endp[ 0 ] = reginput;
+ return 1;
+ }
+ else
+ return 0;
+}
+
+
+/*
+ * regmatch() - main matching routine.
+ *
+ * Conceptually the strategy is simple: check to see whether the current node
+ * matches, call self recursively to see whether the rest matches, and then act
+ * accordingly. In practice we make some effort to avoid recursion, in
+ * particular by going through "ordinary" nodes (that do not need to know
+ * whether the rest of the match failed) by a loop instead of by recursion.
+ */
+
+static int /* 0 failure, 1 success */
+regmatch( char * prog )
+{
+ char * scan; /* Current node. */
+ char * next; /* Next node. */
+
+ scan = prog;
+#ifdef DEBUG
+ if (scan != NULL && regnarrate)
+ fprintf(stderr, "%s(\n", regprop(scan));
+#endif
+ while (scan != NULL) {
+#ifdef DEBUG
+ if (regnarrate)
+ fprintf(stderr, "%s...\n", regprop(scan));
+#endif
+ next = regnext(scan);
+
+ switch (OP(scan)) {
+ case BOL:
+ if (reginput != regbol)
+ return(0);
+ break;
+ case EOL:
+ if (*reginput != '\0')
+ return(0);
+ break;
+ case WORDA:
+ /* Must be looking at a letter, digit, or _ */
+ if ((!isalnum(*reginput)) && *reginput != '_')
+ return(0);
+ /* Prev must be BOL or nonword */
+ if (reginput > regbol &&
+ (isalnum(reginput[-1]) || reginput[-1] == '_'))
+ return(0);
+ break;
+ case WORDZ:
+ /* Must be looking at non letter, digit, or _ */
+ if (isalnum(*reginput) || *reginput == '_')
+ return(0);
+ /* We don't care what the previous char was */
+ break;
+ case ANY:
+ if (*reginput == '\0')
+ return(0);
+ reginput++;
+ break;
+ case EXACTLY: {
+ register int len;
+ register char *opnd;
+
+ opnd = OPERAND(scan);
+ /* Inline the first character, for speed. */
+ if (*opnd != *reginput)
+ return(0);
+ len = strlen(opnd);
+ if (len > 1 && strncmp(opnd, reginput, len) != 0)
+ return(0);
+ reginput += len;
+ }
+ break;
+ case ANYOF:
+ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
+ return(0);
+ reginput++;
+ break;
+ case ANYBUT:
+ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
+ return(0);
+ reginput++;
+ break;
+ case NOTHING:
+ break;
+ case BACK:
+ break;
+ case OPEN+1:
+ case OPEN+2:
+ case OPEN+3:
+ case OPEN+4:
+ case OPEN+5:
+ case OPEN+6:
+ case OPEN+7:
+ case OPEN+8:
+ case OPEN+9: {
+ register int no;
+ register const char *save;
+
+ no = OP(scan) - OPEN;
+ save = reginput;
+
+ if (regmatch(next)) {
+ /*
+ * Don't set startp if some later
+ * invocation of the same parentheses
+ * already has.
+ */
+ if (regstartp[no] == NULL)
+ regstartp[no] = save;
+ return(1);
+ } else
+ return(0);
+ }
+ break;
+ case CLOSE+1:
+ case CLOSE+2:
+ case CLOSE+3:
+ case CLOSE+4:
+ case CLOSE+5:
+ case CLOSE+6:
+ case CLOSE+7:
+ case CLOSE+8:
+ case CLOSE+9: {
+ register int no;
+ register const char *save;
+
+ no = OP(scan) - CLOSE;
+ save = reginput;
+
+ if (regmatch(next)) {
+ /*
+ * Don't set endp if some later
+ * invocation of the same parentheses
+ * already has.
+ */
+ if (regendp[no] == NULL)
+ regendp[no] = save;
+ return(1);
+ } else
+ return(0);
+ }
+ break;
+ case BRANCH: {
+ register const char *save;
+
+ if (OP(next) != BRANCH) /* No choice. */
+ next = OPERAND(scan); /* Avoid recursion. */
+ else {
+ do {
+ save = reginput;
+ if (regmatch(OPERAND(scan)))
+ return(1);
+ reginput = save;
+ scan = regnext(scan);
+ } while (scan != NULL && OP(scan) == BRANCH);
+ return(0);
+ /* NOTREACHED */
+ }
+ }
+ break;
+ case STAR:
+ case PLUS: {
+ register char nextch;
+ register int no;
+ register const char *save;
+ register int min;
+
+ /*
+ * Lookahead to avoid useless match attempts
+ * when we know what character comes next.
+ */
+ nextch = '\0';
+ if (OP(next) == EXACTLY)
+ nextch = *OPERAND(next);
+ min = (OP(scan) == STAR) ? 0 : 1;
+ save = reginput;
+ no = regrepeat(OPERAND(scan));
+ while (no >= min) {
+ /* If it could work, try it. */
+ if (nextch == '\0' || *reginput == nextch)
+ if (regmatch(next))
+ return(1);
+ /* Couldn't or didn't -- back up. */
+ no--;
+ reginput = save + no;
+ }
+ return(0);
+ }
+ break;
+ case END:
+ return(1); /* Success! */
+ break;
+ default:
+ regerror("memory corruption");
+ return(0);
+ break;
+ }
+
+ scan = next;
+ }
+
+ /*
+ * We get here only if there's trouble -- normally "case END" is
+ * the terminating point.
+ */
+ regerror("corrupted pointers");
+ return(0);
+}
+
+/*
+ - regrepeat - repeatedly match something simple, report how many
+ */
+static int
+regrepeat( char *p )
+{
+ register int count = 0;
+ register const char *scan;
+ register char *opnd;
+
+ scan = reginput;
+ opnd = OPERAND(p);
+ switch (OP(p)) {
+ case ANY:
+ count = strlen(scan);
+ scan += count;
+ break;
+ case EXACTLY:
+ while (*opnd == *scan) {
+ count++;
+ scan++;
+ }
+ break;
+ case ANYOF:
+ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
+ count++;
+ scan++;
+ }
+ break;
+ case ANYBUT:
+ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
+ count++;
+ scan++;
+ }
+ break;
+ default: /* Oh dear. Called inappropriately. */
+ regerror("internal foulup");
+ count = 0; /* Best compromise. */
+ break;
+ }
+ reginput = scan;
+
+ return(count);
+}
+
+/*
+ - regnext - dig the "next" pointer out of a node
+ */
+static char *
+regnext( register char *p )
+{
+ register int offset;
+
+ if (p == ®dummy)
+ return(NULL);
+
+ offset = NEXT(p);
+ if (offset == 0)
+ return(NULL);
+
+ if (OP(p) == BACK)
+ return(p-offset);
+ else
+ return(p+offset);
+}
+
+#ifdef DEBUG
+
+STATIC char *regprop();
+
+/*
+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
+ */
+void
+regdump( regexp *r )
+{
+ register char *s;
+ register char op = EXACTLY; /* Arbitrary non-END op. */
+ register char *next;
+
+
+ s = r->program + 1;
+ while (op != END) { /* While that wasn't END last time... */
+ op = OP(s);
+ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
+ next = regnext(s);
+ if (next == NULL) /* Next ptr. */
+ printf("(0)");
+ else
+ printf("(%d)", (s-r->program)+(next-s));
+ s += 3;
+ if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
+ /* Literal string, where present. */
+ while (*s != '\0') {
+ putchar(*s);
+ s++;
+ }
+ s++;
+ }
+ putchar('\n');
+ }
+
+ /* Header fields of interest. */
+ if (r->regstart != '\0')
+ printf("start `%c' ", r->regstart);
+ if (r->reganch)
+ printf("anchored ");
+ if (r->regmust != NULL)
+ printf("must have \"%s\"", r->regmust);
+ printf("\n");
+}
+
+/*
+ - regprop - printable representation of opcode
+ */
+static char *
+regprop( char *op )
+{
+ register char *p;
+ static char buf[50];
+
+ (void) strcpy(buf, ":");
+
+ switch (OP(op)) {
+ case BOL:
+ p = "BOL";
+ break;
+ case EOL:
+ p = "EOL";
+ break;
+ case ANY:
+ p = "ANY";
+ break;
+ case ANYOF:
+ p = "ANYOF";
+ break;
+ case ANYBUT:
+ p = "ANYBUT";
+ break;
+ case BRANCH:
+ p = "BRANCH";
+ break;
+ case EXACTLY:
+ p = "EXACTLY";
+ break;
+ case NOTHING:
+ p = "NOTHING";
+ break;
+ case BACK:
+ p = "BACK";
+ break;
+ case END:
+ p = "END";
+ break;
+ case OPEN+1:
+ case OPEN+2:
+ case OPEN+3:
+ case OPEN+4:
+ case OPEN+5:
+ case OPEN+6:
+ case OPEN+7:
+ case OPEN+8:
+ case OPEN+9:
+ sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
+ p = NULL;
+ break;
+ case CLOSE+1:
+ case CLOSE+2:
+ case CLOSE+3:
+ case CLOSE+4:
+ case CLOSE+5:
+ case CLOSE+6:
+ case CLOSE+7:
+ case CLOSE+8:
+ case CLOSE+9:
+ sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
+ p = NULL;
+ break;
+ case STAR:
+ p = "STAR";
+ break;
+ case PLUS:
+ p = "PLUS";
+ break;
+ case WORDA:
+ p = "WORDA";
+ break;
+ case WORDZ:
+ p = "WORDZ";
+ break;
+ default:
+ regerror("corrupted opcode");
+ break;
+ }
+ if (p != NULL)
+ (void) strcat(buf, p);
+ return(buf);
+}
+#endif
+
+/*
+ * The following is provided for those people who do not have strcspn() in
+ * their C libraries. They should get off their butts and do something
+ * about it; at least one public-domain implementation of those (highly
+ * useful) string routines has been published on Usenet.
+ */
+#ifdef STRCSPN
+/*
+ * strcspn - find length of initial segment of s1 consisting entirely
+ * of characters not from s2
+ */
+
+static int
+strcspn(
+ char *s1,
+ char *s2 )
+{
+ register char *scan1;
+ register char *scan2;
+ register int count;
+
+ count = 0;
+ for (scan1 = s1; *scan1 != '\0'; scan1++) {
+ for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
+ if (*scan1 == *scan2++)
+ return(count);
+ count++;
+ }
+ return(count);
+}
+#endif
diff --git a/src/kenlm/jam-files/engine/regexp.h b/src/kenlm/jam-files/engine/regexp.h
new file mode 100644
index 0000000..6898ccd
--- /dev/null
+++ b/src/kenlm/jam-files/engine/regexp.h
@@ -0,0 +1,34 @@
+/*
+ * Definitions etc. for regexp(3) routines.
+ *
+ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
+ * not the System V one.
+ */
+#ifndef REGEXP_DWA20011023_H
+#define REGEXP_DWA20011023_H
+
+#define NSUBEXP 10
+typedef struct regexp {
+ char const * startp[ NSUBEXP ];
+ char const * endp[ NSUBEXP ];
+ char regstart; /* Internal use only. */
+ char reganch; /* Internal use only. */
+ char * regmust; /* Internal use only. */
+ int regmlen; /* Internal use only. */
+ char program[ 1 ]; /* Unwarranted chumminess with compiler. */
+} regexp;
+
+
+regexp * regcomp( char const * exp );
+int regexec( regexp * prog, char const * string );
+void regerror( char const * s );
+
+
+/*
+ * The first byte of the regexp internal "program" is actually this magic
+ * number; the start node begins in the second byte.
+ */
+#define MAGIC 0234
+
+#endif
+
diff --git a/src/kenlm/jam-files/engine/rules.c b/src/kenlm/jam-files/engine/rules.c
new file mode 100644
index 0000000..7947c55
--- /dev/null
+++ b/src/kenlm/jam-files/engine/rules.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * rules.c - access to RULEs, TARGETs, and ACTIONs
+ *
+ * External routines:
+ * bindrule() - return pointer to RULE, creating it if necessary.
+ * bindtarget() - return pointer to TARGET, creating it if necessary.
+ * touch_target() - mark a target to simulate being new.
+ * targetlist() - turn list of target names into a TARGET chain.
+ * targetentry() - add a TARGET to a chain of TARGETS.
+ * actionlist() - append to an ACTION chain.
+ * addsettings() - add a deferred "set" command to a target.
+ * pushsettings() - set all target specific variables.
+ * popsettings() - reset target specific variables to their pre-push values.
+ * freesettings() - delete a settings list.
+ * rules_done() - free RULE and TARGET tables.
+ */
+
+#include "jam.h"
+#include "rules.h"
+
+#include "hash.h"
+#include "lists.h"
+#include "object.h"
+#include "parse.h"
+#include "pathsys.h"
+#include "search.h"
+#include "variable.h"
+
+
+static void set_rule_actions( RULE *, rule_actions * );
+static void set_rule_body ( RULE *, FUNCTION * );
+
+static struct hash * targethash = 0;
+
+
+/*
+ * get_target_includes() - lazy creates a target's internal includes node
+ *
+ * The newly created node is not entered into the hash table as there should
+ * never be a need to bind them directly from a target names. If you want to
+ * access an internal includes node by name, first access the actual target and
+ * then read the internal includes node from there.
+ */
+
+static TARGET * get_target_includes( TARGET * const t )
+{
+ if ( !t->includes )
+ {
+ TARGET * const i = (TARGET *)BJAM_MALLOC( sizeof( *t ) );
+ memset( (char *)i, '\0', sizeof( *i ) );
+ i->name = object_copy( t->name );
+ i->boundname = object_copy( i->name );
+ i->flags |= T_FLAG_NOTFILE | T_FLAG_INTERNAL;
+ i->original_target = t;
+ t->includes = i;
+ }
+ return t->includes;
+}
+
+
+/*
+ * target_include() - adds a target to the given targe's 'included' list
+ * target_include_many() - adds targets to the given target's 'included' list
+ *
+ * Included targets are modeled as dependencies of the including target's
+ * internal include node.
+ */
+
+void target_include( TARGET * const including, TARGET * const included )
+{
+ TARGET * const internal = get_target_includes( including );
+ internal->depends = targetentry( internal->depends, included );
+}
+
+void target_include_many( TARGET * const including, LIST * const included_names
+ )
+{
+ TARGET * const internal = get_target_includes( including );
+ internal->depends = targetlist( internal->depends, included_names );
+}
+
+
+/*
+ * enter_rule() - return pointer to RULE, creating it if necessary in
+ * target_module.
+ */
+
+static RULE * enter_rule( OBJECT * rulename, module_t * target_module )
+{
+ int found;
+ RULE * const r = (RULE *)hash_insert( demand_rules( target_module ),
+ rulename, &found );
+ if ( !found )
+ {
+ r->name = object_copy( rulename );
+ r->procedure = 0;
+ r->module = 0;
+ r->actions = 0;
+ r->exported = 0;
+ r->module = target_module;
+ }
+ return r;
+}
+
+
+/*
+ * define_rule() - return pointer to RULE, creating it if necessary in
+ * target_module. Prepare it to accept a body or action originating in
+ * src_module.
+ */
+
+static RULE * define_rule( module_t * src_module, OBJECT * rulename,
+ module_t * target_module )
+{
+ RULE * const r = enter_rule( rulename, target_module );
+ if ( r->module != src_module )
+ {
+ /* If the rule was imported from elsewhere, clear it now. */
+ set_rule_body( r, 0 );
+ set_rule_actions( r, 0 );
+ /* r will be executed in the source module. */
+ r->module = src_module;
+ }
+ return r;
+}
+
+
+void rule_free( RULE * r )
+{
+ object_free( r->name );
+ r->name = 0;
+ if ( r->procedure )
+ function_free( r->procedure );
+ r->procedure = 0;
+ if ( r->actions )
+ actions_free( r->actions );
+ r->actions = 0;
+}
+
+
+/*
+ * bindtarget() - return pointer to TARGET, creating it if necessary.
+ */
+
+TARGET * bindtarget( OBJECT * const target_name )
+{
+ int found;
+ TARGET * t;
+
+ if ( !targethash )
+ targethash = hashinit( sizeof( TARGET ), "targets" );
+
+ t = (TARGET *)hash_insert( targethash, target_name, &found );
+ if ( !found )
+ {
+ memset( (char *)t, '\0', sizeof( *t ) );
+ t->name = object_copy( target_name );
+ t->boundname = object_copy( t->name ); /* default for T_FLAG_NOTFILE */
+ }
+
+ return t;
+}
+
+
+static void bind_explicitly_located_target( void * xtarget, void * data )
+{
+ TARGET * t = (TARGET *)xtarget;
+ if ( !( t->flags & T_FLAG_NOTFILE ) )
+ {
+ /* Check if there is a setting for LOCATE. */
+ SETTINGS * s = t->settings;
+ for ( ; s ; s = s->next )
+ {
+ if ( object_equal( s->symbol, constant_LOCATE ) && ! list_empty( s->value ) )
+ {
+ set_explicit_binding( t->name, list_front( s->value ) );
+ break;
+ }
+ }
+ }
+}
+
+
+void bind_explicitly_located_targets()
+{
+ if ( targethash )
+ hashenumerate( targethash, bind_explicitly_located_target, (void *)0 );
+}
+
+
+/*
+ * touch_target() - mark a target to simulate being new.
+ */
+
+void touch_target( OBJECT * const t )
+{
+ bindtarget( t )->flags |= T_FLAG_TOUCHED;
+}
+
+
+/*
+ * target_scc() - returns the root of a strongly connected component that this
+ * target is a part of.
+ */
+
+TARGET * target_scc( TARGET * t )
+{
+ TARGET * result = t;
+ while ( result->scc_root )
+ result = result->scc_root;
+ while ( t->scc_root )
+ {
+ TARGET * const tmp = t->scc_root;
+ t->scc_root = result;
+ t = tmp;
+ }
+ return result;
+}
+
+
+/*
+ * targetlist() - turn list of target names into a TARGET chain.
+ *
+ * Inputs:
+ * chain existing TARGETS to append to
+ * targets list of target names
+ */
+
+TARGETS * targetlist( TARGETS * chain, LIST * target_names )
+{
+ LISTITER iter = list_begin( target_names );
+ LISTITER const end = list_end( target_names );
+ for ( ; iter != end; iter = list_next( iter ) )
+ chain = targetentry( chain, bindtarget( list_item( iter ) ) );
+ return chain;
+}
+
+
+/*
+ * targetentry() - add a TARGET to a chain of TARGETS.
+ *
+ * Inputs:
+ * chain existing TARGETS to append to
+ * target new target to append
+ */
+
+TARGETS * targetentry( TARGETS * chain, TARGET * target )
+{
+ TARGETS * const c = (TARGETS *)BJAM_MALLOC( sizeof( TARGETS ) );
+ c->target = target;
+
+ if ( !chain ) chain = c;
+ else chain->tail->next = c;
+ chain->tail = c;
+ c->next = 0;
+
+ return chain;
+}
+
+
+/*
+ * targetchain() - append two TARGET chains.
+ *
+ * Inputs:
+ * chain existing TARGETS to append to
+ * target new target to append
+ */
+
+TARGETS * targetchain( TARGETS * chain, TARGETS * targets )
+{
+ if ( !targets ) return chain;
+ if ( !chain ) return targets;
+
+ chain->tail->next = targets;
+ chain->tail = targets->tail;
+ return chain;
+}
+
+/*
+ * action_free - decrement the ACTIONs refrence count and (maybe) free it.
+ */
+
+void action_free( ACTION * action )
+{
+ if ( --action->refs == 0 )
+ {
+ freetargets( action->targets );
+ freetargets( action->sources );
+ BJAM_FREE( action );
+ }
+}
+
+
+/*
+ * actionlist() - append to an ACTION chain.
+ */
+
+ACTIONS * actionlist( ACTIONS * chain, ACTION * action )
+{
+ ACTIONS * const actions = (ACTIONS *)BJAM_MALLOC( sizeof( ACTIONS ) );
+ actions->action = action;
+ ++action->refs;
+ if ( !chain ) chain = actions;
+ else chain->tail->next = actions;
+ chain->tail = actions;
+ actions->next = 0;
+ return chain;
+}
+
+static SETTINGS * settings_freelist;
+
+
+/*
+ * addsettings() - add a deferred "set" command to a target.
+ *
+ * Adds a variable setting (varname=list) onto a chain of settings for a
+ * particular target. 'flag' controls the relationship between new and old
+ * values in the same way as in var_set() function (see variable.c). Returns the
+ * head of the settings chain.
+ */
+
+SETTINGS * addsettings( SETTINGS * head, int flag, OBJECT * symbol,
+ LIST * value )
+{
+ SETTINGS * v;
+
+ /* Look for previous settings. */
+ for ( v = head; v; v = v->next )
+ if ( object_equal( v->symbol, symbol ) )
+ break;
+
+ /* If not previously set, alloc a new. */
+ /* If appending, do so. */
+ /* Else free old and set new. */
+ if ( !v )
+ {
+ v = settings_freelist;
+ if ( v )
+ settings_freelist = v->next;
+ else
+ v = (SETTINGS *)BJAM_MALLOC( sizeof( *v ) );
+
+ v->symbol = object_copy( symbol );
+ v->value = value;
+ v->next = head;
+ head = v;
+ }
+ else if ( flag == VAR_APPEND )
+ {
+ v->value = list_append( v->value, value );
+ }
+ else if ( flag != VAR_DEFAULT )
+ {
+ list_free( v->value );
+ v->value = value;
+ }
+ else
+ list_free( value );
+
+ /* Return (new) head of list. */
+ return head;
+}
+
+
+/*
+ * pushsettings() - set all target specific variables.
+ */
+
+void pushsettings( struct module_t * module, SETTINGS * v )
+{
+ for ( ; v; v = v->next )
+ v->value = var_swap( module, v->symbol, v->value );
+}
+
+
+/*
+ * popsettings() - reset target specific variables to their pre-push values.
+ */
+
+void popsettings( struct module_t * module, SETTINGS * v )
+{
+ pushsettings( module, v ); /* just swap again */
+}
+
+
+/*
+ * copysettings() - duplicate a settings list, returning the new copy.
+ */
+
+SETTINGS * copysettings( SETTINGS * head )
+{
+ SETTINGS * copy = 0;
+ SETTINGS * v;
+ for ( v = head; v; v = v->next )
+ copy = addsettings( copy, VAR_SET, v->symbol, list_copy( v->value ) );
+ return copy;
+}
+
+
+/*
+ * freetargets() - delete a targets list.
+ */
+
+void freetargets( TARGETS * chain )
+{
+ while ( chain )
+ {
+ TARGETS * const n = chain->next;
+ BJAM_FREE( chain );
+ chain = n;
+ }
+}
+
+
+/*
+ * freeactions() - delete an action list.
+ */
+
+void freeactions( ACTIONS * chain )
+{
+ while ( chain )
+ {
+ ACTIONS * const n = chain->next;
+ action_free( chain->action );
+ BJAM_FREE( chain );
+ chain = n;
+ }
+}
+
+
+/*
+ * freesettings() - delete a settings list.
+ */
+
+void freesettings( SETTINGS * v )
+{
+ while ( v )
+ {
+ SETTINGS * const n = v->next;
+ object_free( v->symbol );
+ list_free( v->value );
+ v->next = settings_freelist;
+ settings_freelist = v;
+ v = n;
+ }
+}
+
+
+static void freetarget( void * xt, void * data )
+{
+ TARGET * const t = (TARGET *)xt;
+ if ( t->name ) object_free ( t->name );
+ if ( t->boundname ) object_free ( t->boundname );
+ if ( t->settings ) freesettings( t->settings );
+ if ( t->depends ) freetargets ( t->depends );
+ if ( t->dependants ) freetargets ( t->dependants );
+ if ( t->parents ) freetargets ( t->parents );
+ if ( t->actions ) freeactions ( t->actions );
+ if ( t->includes )
+ {
+ freetarget( t->includes, (void *)0 );
+ BJAM_FREE( t->includes );
+ }
+}
+
+
+/*
+ * rules_done() - free RULE and TARGET tables.
+ */
+
+void rules_done()
+{
+ if ( targethash )
+ {
+ hashenumerate( targethash, freetarget, 0 );
+ hashdone( targethash );
+ }
+ while ( settings_freelist )
+ {
+ SETTINGS * const n = settings_freelist->next;
+ BJAM_FREE( settings_freelist );
+ settings_freelist = n;
+ }
+}
+
+
+/*
+ * actions_refer() - add a new reference to the given actions.
+ */
+
+void actions_refer( rule_actions * a )
+{
+ ++a->reference_count;
+}
+
+
+/*
+ * actions_free() - release a reference to given actions.
+ */
+
+void actions_free( rule_actions * a )
+{
+ if ( --a->reference_count <= 0 )
+ {
+ function_free( a->command );
+ list_free( a->bindlist );
+ BJAM_FREE( a );
+ }
+}
+
+
+/*
+ * set_rule_body() - set the argument list and procedure of the given rule.
+ */
+
+static void set_rule_body( RULE * rule, FUNCTION * procedure )
+{
+ if ( procedure )
+ function_refer( procedure );
+ if ( rule->procedure )
+ function_free( rule->procedure );
+ rule->procedure = procedure;
+}
+
+
+/*
+ * global_name() - given a rule, return the name for a corresponding rule in the
+ * global module.
+ */
+
+static OBJECT * global_rule_name( RULE * r )
+{
+ if ( r->module == root_module() )
+ return object_copy( r->name );
+
+ {
+ char name[ 4096 ] = "";
+ if ( r->module->name )
+ {
+ strncat( name, object_str( r->module->name ), sizeof( name ) - 1 );
+ strncat( name, ".", sizeof( name ) - 1 );
+ }
+ strncat( name, object_str( r->name ), sizeof( name ) - 1 );
+ return object_new( name );
+ }
+}
+
+
+/*
+ * global_rule() - given a rule, produce a corresponding entry in the global
+ * module.
+ */
+
+static RULE * global_rule( RULE * r )
+{
+ if ( r->module == root_module() )
+ return r;
+
+ {
+ OBJECT * const name = global_rule_name( r );
+ RULE * const result = define_rule( r->module, name, root_module() );
+ object_free( name );
+ return result;
+ }
+}
+
+
+/*
+ * new_rule_body() - make a new rule named rulename in the given module, with
+ * the given argument list and procedure. If exported is true, the rule is
+ * exported to the global module as modulename.rulename.
+ */
+
+RULE * new_rule_body( module_t * m, OBJECT * rulename, FUNCTION * procedure,
+ int exported )
+{
+ RULE * const local = define_rule( m, rulename, m );
+ local->exported = exported;
+ set_rule_body( local, procedure );
+
+ /* Mark the procedure with the global rule name, regardless of whether the
+ * rule is exported. That gives us something reasonably identifiable that we
+ * can use, e.g. in profiling output. Only do this once, since this could be
+ * called multiple times with the same procedure.
+ */
+ if ( !function_rulename( procedure ) )
+ function_set_rulename( procedure, global_rule_name( local ) );
+
+ return local;
+}
+
+
+static void set_rule_actions( RULE * rule, rule_actions * actions )
+{
+ if ( actions )
+ actions_refer( actions );
+ if ( rule->actions )
+ actions_free( rule->actions );
+ rule->actions = actions;
+}
+
+
+static rule_actions * actions_new( FUNCTION * command, LIST * bindlist,
+ int flags )
+{
+ rule_actions * const result = (rule_actions *)BJAM_MALLOC( sizeof(
+ rule_actions ) );
+ function_refer( command );
+ result->command = command;
+ result->bindlist = bindlist;
+ result->flags = flags;
+ result->reference_count = 0;
+ return result;
+}
+
+
+RULE * new_rule_actions( module_t * m, OBJECT * rulename, FUNCTION * command,
+ LIST * bindlist, int flags )
+{
+ RULE * const local = define_rule( m, rulename, m );
+ RULE * const global = global_rule( local );
+ set_rule_actions( local, actions_new( command, bindlist, flags ) );
+ set_rule_actions( global, local->actions );
+ return local;
+}
+
+
+/*
+ * Looks for a rule in the specified module, and returns it, if found. First
+ * checks if the rule is present in the module's rule table. Second, if the
+ * rule's name is in the form name1.name2 and name1 is in the list of imported
+ * modules, look in module 'name1' for rule 'name2'.
+ */
+
+RULE * lookup_rule( OBJECT * rulename, module_t * m, int local_only )
+{
+ RULE * r;
+ RULE * result = 0;
+ module_t * original_module = m;
+
+ if ( m->class_module )
+ m = m->class_module;
+
+ if ( m->rules && ( r = (RULE *)hash_find( m->rules, rulename ) ) )
+ result = r;
+ else if ( !local_only && m->imported_modules )
+ {
+ /* Try splitting the name into module and rule. */
+ char * p = strchr( object_str( rulename ), '.' ) ;
+ if ( p )
+ {
+ /* Now, r->name keeps the module name, and p + 1 keeps the rule
+ * name.
+ */
+ OBJECT * rule_part = object_new( p + 1 );
+ OBJECT * module_part;
+ {
+ string buf[ 1 ];
+ string_new( buf );
+ string_append_range( buf, object_str( rulename ), p );
+ module_part = object_new( buf->value );
+ string_free( buf );
+ }
+ if ( hash_find( m->imported_modules, module_part ) )
+ result = lookup_rule( rule_part, bindmodule( module_part ), 1 );
+ object_free( module_part );
+ object_free( rule_part );
+ }
+ }
+
+ if ( result )
+ {
+ if ( local_only && !result->exported )
+ result = 0;
+ else if ( original_module != m )
+ {
+ /* Lookup started in class module. We have found a rule in class
+ * module, which is marked for execution in that module, or in some
+ * instance. Mark it for execution in the instance where we started
+ * the lookup.
+ */
+ int const execute_in_class = result->module == m;
+ int const execute_in_some_instance =
+ result->module->class_module == m;
+ if ( execute_in_class || execute_in_some_instance )
+ result->module = original_module;
+ }
+ }
+
+ return result;
+}
+
+
+RULE * bindrule( OBJECT * rulename, module_t * m )
+{
+ RULE * result = lookup_rule( rulename, m, 0 );
+ if ( !result )
+ result = lookup_rule( rulename, root_module(), 0 );
+ /* We have only one caller, 'evaluate_rule', which will complain about
+ * calling an undefined rule. We could issue the error here, but we do not
+ * have the necessary information, such as frame.
+ */
+ if ( !result )
+ result = enter_rule( rulename, m );
+ return result;
+}
+
+
+RULE * import_rule( RULE * source, module_t * m, OBJECT * name )
+{
+ RULE * const dest = define_rule( source->module, name, m );
+ set_rule_body( dest, source->procedure );
+ set_rule_actions( dest, source->actions );
+ return dest;
+}
+
+
+void rule_localize( RULE * rule, module_t * m )
+{
+ rule->module = m;
+ if ( rule->procedure )
+ {
+ FUNCTION * procedure = function_unbind_variables( rule->procedure );
+ function_refer( procedure );
+ function_free( rule->procedure );
+ rule->procedure = procedure;
+ }
+}
diff --git a/src/kenlm/jam-files/engine/rules.h b/src/kenlm/jam-files/engine/rules.h
new file mode 100644
index 0000000..fe2792f
--- /dev/null
+++ b/src/kenlm/jam-files/engine/rules.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * rules.h - targets, rules, and related information
+ *
+ * This file describes the structures holding the targets, rules, and related
+ * information accumulated by interpreting the statements of the jam files.
+ *
+ * The following are defined:
+ *
+ * RULE - a generic jam rule, the product of RULE and ACTIONS.
+ * ACTIONS - a chain of ACTIONs.
+ * ACTION - a RULE instance with targets and sources.
+ * SETTINGS - variables to set when executing a TARGET's ACTIONS.
+ * TARGETS - a chain of TARGETs.
+ * TARGET - an entity (e.g. a file) that can be built.
+ */
+
+#ifndef RULES_DWA_20011020_H
+#define RULES_DWA_20011020_H
+
+#include "function.h"
+#include "modules.h"
+#include "timestamp.h"
+
+
+typedef struct _rule RULE;
+typedef struct _target TARGET;
+typedef struct _targets TARGETS;
+typedef struct _action ACTION;
+typedef struct _actions ACTIONS;
+typedef struct _settings SETTINGS ;
+
+/* RULE - a generic jam rule, the product of RULE and ACTIONS. */
+
+/* Build actions corresponding to a rule. */
+struct rule_actions
+{
+ int reference_count;
+ FUNCTION * command; /* command string from ACTIONS */
+ LIST * bindlist;
+ int flags; /* modifiers on ACTIONS */
+
+#define RULE_NEWSRCS 0x01 /* $(>) is updated sources only */
+#define RULE_TOGETHER 0x02 /* combine actions on single target */
+#define RULE_IGNORE 0x04 /* ignore return status of executes */
+#define RULE_QUIETLY 0x08 /* do not mention it unless verbose */
+#define RULE_PIECEMEAL 0x10 /* split exec so each $(>) is small */
+#define RULE_EXISTING 0x20 /* $(>) is pre-exisitng sources only */
+};
+
+typedef struct rule_actions rule_actions;
+typedef struct argument_list argument_list;
+
+struct _rule
+{
+ OBJECT * name;
+ FUNCTION * procedure;
+ rule_actions * actions; /* build actions, or NULL for no actions */
+ module_t * module; /* module in which this rule is executed */
+ int exported; /* nonzero if this rule is supposed to appear in
+ * the global module and be automatically
+ * imported into other modules
+ */
+};
+
+/* ACTIONS - a chain of ACTIONs. */
+struct _actions
+{
+ ACTIONS * next;
+ ACTIONS * tail; /* valid only for head */
+ ACTION * action;
+};
+
+/* ACTION - a RULE instance with targets and sources. */
+struct _action
+{
+ RULE * rule;
+ TARGETS * targets;
+ TARGETS * sources; /* aka $(>) */
+ char running; /* has been started */
+#define A_INIT 0
+#define A_RUNNING_NOEXEC 1
+#define A_RUNNING 2
+ char status; /* see TARGET status */
+ int refs;
+};
+
+/* SETTINGS - variables to set when executing a TARGET's ACTIONS. */
+struct _settings
+{
+ SETTINGS * next;
+ OBJECT * symbol; /* symbol name for var_set() */
+ LIST * value; /* symbol value for var_set() */
+};
+
+/* TARGETS - a chain of TARGETs. */
+struct _targets
+{
+ TARGETS * next;
+ TARGETS * tail; /* valid only for head */
+ TARGET * target;
+};
+
+/* TARGET - an entity (e.g. a file) that can be built. */
+struct _target
+{
+ OBJECT * name;
+ OBJECT * boundname; /* if search() relocates target */
+ ACTIONS * actions; /* rules to execute, if any */
+ SETTINGS * settings; /* variables to define */
+
+ short flags; /* status info */
+
+#define T_FLAG_TEMP 0x0001 /* TEMPORARY applied */
+#define T_FLAG_NOCARE 0x0002 /* NOCARE applied */
+#define T_FLAG_NOTFILE 0x0004 /* NOTFILE applied */
+#define T_FLAG_TOUCHED 0x0008 /* ALWAYS applied or -t target */
+#define T_FLAG_LEAVES 0x0010 /* LEAVES applied */
+#define T_FLAG_NOUPDATE 0x0020 /* NOUPDATE applied */
+#define T_FLAG_VISITED 0x0040 /* CWM: Used in debugging */
+
+/* This flag has been added to support a new built-in rule named "RMBAD". It is
+ * used to force removal of outdated targets whose dependencies fail to build.
+ */
+#define T_FLAG_RMOLD 0x0080 /* RMBAD applied */
+
+/* This flag was added to support a new built-in rule named "FAIL_EXPECTED" used
+ * to indicate that the result of running a given action should be inverted,
+ * i.e. ok <=> fail. Useful for launching certain test runs from a Jamfile.
+ */
+#define T_FLAG_FAIL_EXPECTED 0x0100 /* FAIL_EXPECTED applied */
+
+#define T_FLAG_INTERNAL 0x0200 /* internal INCLUDES node */
+
+/* Indicates that the target must be a file. Prevents matching non-files, like
+ * directories, when a target is searched.
+ */
+#define T_FLAG_ISFILE 0x0400
+
+#define T_FLAG_PRECIOUS 0x0800
+
+ char binding; /* how target relates to a real file or
+ * folder
+ */
+
+#define T_BIND_UNBOUND 0 /* a disembodied name */
+#define T_BIND_MISSING 1 /* could not find real file */
+#define T_BIND_PARENTS 2 /* using parent's timestamp */
+#define T_BIND_EXISTS 3 /* real file, timestamp valid */
+
+ TARGETS * depends; /* dependencies */
+ TARGETS * dependants; /* the inverse of dependencies */
+ TARGETS * rebuilds; /* targets that should be force-rebuilt
+ * whenever this one is
+ */
+ TARGET * includes; /* internal includes node */
+ TARGET * original_target; /* original_target->includes = this */
+ char rescanned;
+
+ timestamp time; /* update time */
+ timestamp leaf; /* update time of leaf sources */
+
+ char fate; /* make0()'s diagnosis */
+
+#define T_FATE_INIT 0 /* nothing done to target */
+#define T_FATE_MAKING 1 /* make0(target) on stack */
+
+#define T_FATE_STABLE 2 /* target did not need updating */
+#define T_FATE_NEWER 3 /* target newer than parent */
+
+#define T_FATE_SPOIL 4 /* >= SPOIL rebuilds parents */
+#define T_FATE_ISTMP 4 /* unneeded temp target oddly present */
+
+#define T_FATE_BUILD 5 /* >= BUILD rebuilds target */
+#define T_FATE_TOUCHED 5 /* manually touched with -t */
+#define T_FATE_REBUILD 6
+#define T_FATE_MISSING 7 /* is missing, needs updating */
+#define T_FATE_NEEDTMP 8 /* missing temp that must be rebuild */
+#define T_FATE_OUTDATED 9 /* is out of date, needs updating */
+#define T_FATE_UPDATE 10 /* deps updated, needs updating */
+
+#define T_FATE_BROKEN 11 /* >= BROKEN ruins parents */
+#define T_FATE_CANTFIND 11 /* no rules to make missing target */
+#define T_FATE_CANTMAKE 12 /* can not find dependencies */
+
+ char progress; /* tracks make1() progress */
+
+#define T_MAKE_INIT 0 /* make1(target) not yet called */
+#define T_MAKE_ONSTACK 1 /* make1(target) on stack */
+#define T_MAKE_ACTIVE 2 /* make1(target) in make1b() */
+#define T_MAKE_RUNNING 3 /* make1(target) running commands */
+#define T_MAKE_DONE 4 /* make1(target) done */
+#define T_MAKE_NOEXEC_DONE 5 /* make1(target) done with -n in effect */
+
+#ifdef OPT_SEMAPHORE
+ #define T_MAKE_SEMAPHORE 5 /* Special target type for semaphores */
+#endif
+
+#ifdef OPT_SEMAPHORE
+ TARGET * semaphore; /* used in serialization */
+#endif
+
+ char status; /* exec_cmd() result */
+
+ int asynccnt; /* child deps outstanding */
+ TARGETS * parents; /* used by make1() for completion */
+ TARGET * scc_root; /* used by make to resolve cyclic includes
+ */
+ TARGET * rescanning; /* used by make0 to mark visited targets
+ * when rescanning
+ */
+ int depth; /* The depth of the target in the make0
+ * stack.
+ */
+ char * cmds; /* type-punned command list */
+
+ char const * failed;
+};
+
+
+/* Action related functions. */
+void action_free ( ACTION * );
+ACTIONS * actionlist ( ACTIONS *, ACTION * );
+void freeactions ( ACTIONS * );
+SETTINGS * addsettings ( SETTINGS *, int flag, OBJECT * symbol, LIST * value );
+void pushsettings ( module_t *, SETTINGS * );
+void popsettings ( module_t *, SETTINGS * );
+SETTINGS * copysettings ( SETTINGS * );
+void freesettings ( SETTINGS * );
+void actions_refer( rule_actions * );
+void actions_free ( rule_actions * );
+
+/* Rule related functions. */
+RULE * bindrule ( OBJECT * rulename, module_t * );
+RULE * import_rule ( RULE * source, module_t *, OBJECT * name );
+void rule_localize ( RULE * rule, module_t * module );
+RULE * new_rule_body ( module_t *, OBJECT * rulename, FUNCTION * func, int exprt );
+RULE * new_rule_actions( module_t *, OBJECT * rulename, FUNCTION * command, LIST * bindlist, int flags );
+void rule_free ( RULE * );
+
+/* Target related functions. */
+void bind_explicitly_located_targets();
+TARGET * bindtarget ( OBJECT * const );
+void freetargets ( TARGETS * );
+TARGETS * targetchain ( TARGETS *, TARGETS * );
+TARGETS * targetentry ( TARGETS *, TARGET * );
+void target_include ( TARGET * const including,
+ TARGET * const included );
+void target_include_many ( TARGET * const including,
+ LIST * const included_names );
+TARGETS * targetlist ( TARGETS *, LIST * target_names );
+void touch_target ( OBJECT * const );
+void clear_includes ( TARGET * );
+TARGET * target_scc ( TARGET * );
+
+/* Final module cleanup. */
+void rules_done();
+
+#endif
diff --git a/src/kenlm/jam-files/engine/scan.c b/src/kenlm/jam-files/engine/scan.c
new file mode 100644
index 0000000..d92fdca
--- /dev/null
+++ b/src/kenlm/jam-files/engine/scan.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * scan.c - the jam yacc scanner
+ *
+ */
+
+#include "jam.h"
+#include "scan.h"
+
+#include "constants.h"
+#include "jambase.h"
+#include "jamgram.h"
+
+
+struct keyword
+{
+ char * word;
+ int type;
+} keywords[] =
+{
+#include "jamgramtab.h"
+ { 0, 0 }
+};
+
+typedef struct include include;
+struct include
+{
+ include * next; /* next serial include file */
+ char * string; /* pointer into current line */
+ char * * strings; /* for yyfparse() -- text to parse */
+ FILE * file; /* for yyfparse() -- file being read */
+ OBJECT * fname; /* for yyfparse() -- file name */
+ int line; /* line counter for error messages */
+ char buf[ 512 ]; /* for yyfparse() -- line buffer */
+};
+
+static include * incp = 0; /* current file; head of chain */
+
+static int scanmode = SCAN_NORMAL;
+static int anyerrors = 0;
+
+
+static char * symdump( YYSTYPE * );
+
+#define BIGGEST_TOKEN 10240 /* no single token can be larger */
+
+
+/*
+ * Set parser mode: normal, string, or keyword.
+ */
+
+void yymode( int n )
+{
+ scanmode = n;
+}
+
+
+void yyerror( char const * s )
+{
+ /* We use yylval instead of incp to access the error location information as
+ * the incp pointer will already be reset to 0 in case the error occurred at
+ * EOF.
+ *
+ * The two may differ only if ran into an unexpected EOF or we get an error
+ * while reading a lexical token spanning multiple lines, e.g. a multi-line
+ * string literal or action body, in which case yylval location information
+ * will hold the information about where the token started while incp will
+ * hold the information about where reading it broke.
+ */
+ printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
+ symdump( &yylval ) );
+ ++anyerrors;
+}
+
+
+int yyanyerrors()
+{
+ return anyerrors != 0;
+}
+
+
+void yyfparse( OBJECT * s )
+{
+ include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
+
+ /* Push this onto the incp chain. */
+ i->string = "";
+ i->strings = 0;
+ i->file = 0;
+ i->fname = object_copy( s );
+ i->line = 0;
+ i->next = incp;
+ incp = i;
+
+ /* If the filename is "+", it means use the internal jambase. */
+ if ( !strcmp( object_str( s ), "+" ) )
+ i->strings = jambase;
+}
+
+
+/*
+ * yyline() - read new line and return first character.
+ *
+ * Fabricates a continuous stream of characters across include files, returning
+ * EOF at the bitter end.
+ */
+
+int yyline()
+{
+ include * const i = incp;
+
+ if ( !incp )
+ return EOF;
+
+ /* Once we start reading from the input stream, we reset the include
+ * insertion point so that the next include file becomes the head of the
+ * list.
+ */
+
+ /* If there is more data in this line, return it. */
+ if ( *i->string )
+ return *i->string++;
+
+ /* If we are reading from an internal string list, go to the next string. */
+ if ( i->strings )
+ {
+ if ( *i->strings )
+ {
+ ++i->line;
+ i->string = *(i->strings++);
+ return *i->string++;
+ }
+ }
+ else
+ {
+ /* If necessary, open the file. */
+ if ( !i->file )
+ {
+ FILE * f = stdin;
+ if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
+ perror( object_str( i->fname ) );
+ i->file = f;
+ }
+
+ /* If there is another line in this file, start it. */
+ if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
+ {
+ ++i->line;
+ i->string = i->buf;
+ return *i->string++;
+ }
+ }
+
+ /* This include is done. Free it up and return EOF so yyparse() returns to
+ * parse_file().
+ */
+
+ incp = i->next;
+
+ /* Close file, free name. */
+ if ( i->file && ( i->file != stdin ) )
+ fclose( i->file );
+ object_free( i->fname );
+ BJAM_FREE( (char *)i );
+
+ return EOF;
+}
+
+
+/*
+ * yylex() - set yylval to current token; return its type.
+ *
+ * Macros to move things along:
+ *
+ * yychar() - return and advance character; invalid after EOF.
+ * yyprev() - back up one character; invalid before yychar().
+ *
+ * yychar() returns a continuous stream of characters, until it hits the EOF of
+ * the current include file.
+ */
+
+#define yychar() ( *incp->string ? *incp->string++ : yyline() )
+#define yyprev() ( incp->string-- )
+
+int yylex()
+{
+ int c;
+ char buf[ BIGGEST_TOKEN ];
+ char * b = buf;
+
+ if ( !incp )
+ goto eof;
+
+ /* Get first character (whitespace or of token). */
+ c = yychar();
+
+ if ( scanmode == SCAN_STRING )
+ {
+ /* If scanning for a string (action's {}'s), look for the closing brace.
+ * We handle matching braces, if they match.
+ */
+
+ int nest = 1;
+
+ while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
+ {
+ if ( c == '{' )
+ ++nest;
+
+ if ( ( c == '}' ) && !--nest )
+ break;
+
+ *b++ = c;
+
+ c = yychar();
+
+ /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
+ if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
+ --b;
+ }
+
+ /* We ate the ending brace -- regurgitate it. */
+ if ( c != EOF )
+ yyprev();
+
+ /* Check for obvious errors. */
+ if ( b == buf + sizeof( buf ) )
+ {
+ yyerror( "action block too big" );
+ goto eof;
+ }
+
+ if ( nest )
+ {
+ yyerror( "unmatched {} in action block" );
+ goto eof;
+ }
+
+ *b = 0;
+ yylval.type = STRING;
+ yylval.string = object_new( buf );
+ yylval.file = incp->fname;
+ yylval.line = incp->line;
+ }
+ else
+ {
+ char * b = buf;
+ struct keyword * k;
+ int inquote = 0;
+ int notkeyword;
+
+ /* Eat white space. */
+ for ( ; ; )
+ {
+ /* Skip past white space. */
+ while ( ( c != EOF ) && isspace( c ) )
+ c = yychar();
+
+ /* Not a comment? */
+ if ( c != '#' )
+ break;
+
+ /* Swallow up comment line. */
+ while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
+ }
+
+ /* c now points to the first character of a token. */
+ if ( c == EOF )
+ goto eof;
+
+ yylval.file = incp->fname;
+ yylval.line = incp->line;
+
+ /* While scanning the word, disqualify it for (expensive) keyword lookup
+ * when we can: $anything, "anything", \anything
+ */
+ notkeyword = c == '$';
+
+ /* Look for white space to delimit word. "'s get stripped but preserve
+ * white space. \ protects next character.
+ */
+ while
+ (
+ ( c != EOF ) &&
+ ( b < buf + sizeof( buf ) ) &&
+ ( inquote || !isspace( c ) )
+ )
+ {
+ if ( c == '"' )
+ {
+ /* begin or end " */
+ inquote = !inquote;
+ notkeyword = 1;
+ }
+ else if ( c != '\\' )
+ {
+ /* normal char */
+ *b++ = c;
+ }
+ else if ( ( c = yychar() ) != EOF )
+ {
+ /* \c */
+ if (c == 'n')
+ c = '\n';
+ else if (c == 'r')
+ c = '\r';
+ else if (c == 't')
+ c = '\t';
+ *b++ = c;
+ notkeyword = 1;
+ }
+ else
+ {
+ /* \EOF */
+ break;
+ }
+
+ c = yychar();
+ }
+
+ /* Check obvious errors. */
+ if ( b == buf + sizeof( buf ) )
+ {
+ yyerror( "string too big" );
+ goto eof;
+ }
+
+ if ( inquote )
+ {
+ yyerror( "unmatched \" in string" );
+ goto eof;
+ }
+
+ /* We looked ahead a character - back up. */
+ if ( c != EOF )
+ yyprev();
+
+ /* Scan token table. Do not scan if it is obviously not a keyword or if
+ * it is an alphabetic when were looking for punctuation.
+ */
+
+ *b = 0;
+ yylval.type = ARG;
+
+ if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
+ for ( k = keywords; k->word; ++k )
+ if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
+ {
+ yylval.type = k->type;
+ yylval.keyword = k->word; /* used by symdump */
+ break;
+ }
+
+ if ( yylval.type == ARG )
+ yylval.string = object_new( buf );
+ }
+
+ if ( DEBUG_SCAN )
+ printf( "scan %s\n", symdump( &yylval ) );
+
+ return yylval.type;
+
+eof:
+ /* We do not reset yylval.file & yylval.line here so unexpected EOF error
+ * messages would include correct error location information.
+ */
+ yylval.type = EOF;
+ return yylval.type;
+}
+
+
+static char * symdump( YYSTYPE * s )
+{
+ static char buf[ BIGGEST_TOKEN + 20 ];
+ switch ( s->type )
+ {
+ case EOF : sprintf( buf, "EOF" ); break;
+ case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
+ case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break;
+ case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break;
+ default : sprintf( buf, "keyword %s" , s->keyword ); break;
+ }
+ return buf;
+}
+
+
+/*
+ * Get information about the current file and line, for those epsilon
+ * transitions that produce a parse.
+ */
+
+void yyinput_last_read_token( OBJECT * * name, int * line )
+{
+ /* TODO: Consider whether and when we might want to report where the last
+ * read token ended, e.g. EOF errors inside string literals.
+ */
+ *name = yylval.file;
+ *line = yylval.line;
+}
diff --git a/src/kenlm/jam-files/engine/scan.h b/src/kenlm/jam-files/engine/scan.h
new file mode 100644
index 0000000..745477f
--- /dev/null
+++ b/src/kenlm/jam-files/engine/scan.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * scan.h - the jam yacc scanner
+ *
+ * External functions:
+ * yyerror( char *s ) - print a parsing error message.
+ * yyfparse( char *s ) - scan include file s.
+ * yylex() - parse the next token, returning its type.
+ * yymode() - adjust lexicon of scanner.
+ * yyparse() - declaration for yacc parser.
+ * yyanyerrors() - indicate if any parsing errors occured.
+ *
+ * The yymode() function is for the parser to adjust the lexicon of the scanner.
+ * Aside from normal keyword scanning, there is a mode to handle action strings
+ * (look only for the closing }) and a mode to ignore most keywords when looking
+ * for a punctuation keyword. This allows non-punctuation keywords to be used in
+ * lists without quoting.
+ */
+
+#include "lists.h"
+#include "object.h"
+#include "parse.h"
+
+
+/*
+ * YYSTYPE - value of a lexical token
+ */
+
+#define YYSTYPE YYSYMBOL
+
+typedef struct _YYSTYPE
+{
+ int type;
+ OBJECT * string;
+ PARSE * parse;
+ LIST * list;
+ int number;
+ OBJECT * file;
+ int line;
+ char const * keyword;
+} YYSTYPE;
+
+extern YYSTYPE yylval;
+
+void yymode( int n );
+void yyerror( char const * s );
+int yyanyerrors();
+void yyfparse( OBJECT * s );
+int yyline();
+int yylex();
+int yyparse();
+void yyinput_last_read_token( OBJECT * * name, int * line );
+
+#define SCAN_NORMAL 0 /* normal parsing */
+#define SCAN_STRING 1 /* look only for matching } */
+#define SCAN_PUNCT 2 /* only punctuation keywords */
diff --git a/src/kenlm/jam-files/engine/search.c b/src/kenlm/jam-files/engine/search.c
new file mode 100644
index 0000000..b2beada
--- /dev/null
+++ b/src/kenlm/jam-files/engine/search.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include "jam.h"
+#include "search.h"
+
+#include "compile.h"
+#include "filesys.h"
+#include "hash.h"
+#include "lists.h"
+#include "object.h"
+#include "pathsys.h"
+#include "strings.h"
+#include "timestamp.h"
+#include "variable.h"
+
+#include <string.h>
+
+
+typedef struct _binding
+{
+ OBJECT * binding;
+ OBJECT * target;
+} BINDING;
+
+static struct hash * explicit_bindings = 0;
+
+
+void call_bind_rule( OBJECT * target_, OBJECT * boundname_ )
+{
+ LIST * const bind_rule = var_get( root_module(), constant_BINDRULE );
+ if ( !list_empty( bind_rule ) )
+ {
+ OBJECT * target = object_copy( target_ );
+ OBJECT * boundname = object_copy( boundname_ );
+ if ( boundname && target )
+ {
+ /* Prepare the argument list. */
+ FRAME frame[ 1 ];
+ frame_init( frame );
+
+ /* First argument is the target name. */
+ lol_add( frame->args, list_new( target ) );
+
+ lol_add( frame->args, list_new( boundname ) );
+ if ( lol_get( frame->args, 1 ) )
+ {
+ OBJECT * rulename = list_front( bind_rule );
+ list_free( evaluate_rule( bindrule( rulename, root_module() ), rulename, frame ) );
+ }
+
+ /* Clean up */
+ frame_free( frame );
+ }
+ else
+ {
+ if ( boundname )
+ object_free( boundname );
+ if ( target )
+ object_free( target );
+ }
+ }
+}
+
+/* Records the binding of a target with an explicit LOCATE. */
+void set_explicit_binding( OBJECT * target, OBJECT * locate )
+{
+ OBJECT * boundname;
+ OBJECT * key;
+ PATHNAME f[ 1 ];
+ string buf[ 1 ];
+ int found;
+ BINDING * ba;
+
+ if ( !explicit_bindings )
+ explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified "
+ "locations" );
+
+ string_new( buf );
+
+ /* Parse the filename. */
+ path_parse( object_str( target ), f );
+
+ /* Ignore the grist. */
+ f->f_grist.ptr = 0;
+ f->f_grist.len = 0;
+
+ /* Root the target path at the given location. */
+ f->f_root.ptr = object_str( locate );
+ f->f_root.len = strlen( object_str( locate ) );
+
+ path_build( f, buf );
+ boundname = object_new( buf->value );
+ if ( DEBUG_SEARCH )
+ printf( "explicit locate %s: %s\n", object_str( target ), buf->value );
+ string_free( buf );
+ key = path_as_key( boundname );
+ object_free( boundname );
+
+ ba = (BINDING *)hash_insert( explicit_bindings, key, &found );
+ if ( !found )
+ {
+ ba->binding = key;
+ ba->target = target;
+ }
+ else
+ object_free( key );
+}
+
+/*
+ * search.c - find a target along $(SEARCH) or $(LOCATE).
+ *
+ * First, check if LOCATE is set. If so, use it to determine the location of
+ * target and return, regardless of whether anything exists at that location.
+ *
+ * Second, examine all directories in SEARCH. If the file exists there or there
+ * is another target with the same name already placed at this location via the
+ * LOCATE setting, stop and return the location. In case of a previous target,
+ * return its name via the 'another_target' argument.
+ *
+ * This behaviour allows handling dependencies on generated files.
+ *
+ * If caller does not expect that the target is generated, 0 can be passed as
+ * 'another_target'.
+ */
+
+OBJECT * search( OBJECT * target, timestamp * const time,
+ OBJECT * * another_target, int const file )
+{
+ PATHNAME f[ 1 ];
+ LIST * varlist;
+ string buf[ 1 ];
+ int found = 0;
+ OBJECT * boundname = 0;
+
+ if ( another_target )
+ *another_target = 0;
+
+ if ( !explicit_bindings )
+ explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified "
+ "locations" );
+
+ string_new( buf );
+
+ /* Parse the filename. */
+ path_parse( object_str( target ), f );
+
+ f->f_grist.ptr = 0;
+ f->f_grist.len = 0;
+
+ varlist = var_get( root_module(), constant_LOCATE );
+ if ( !list_empty( varlist ) )
+ {
+ OBJECT * key;
+ f->f_root.ptr = object_str( list_front( varlist ) );
+ f->f_root.len = strlen( object_str( list_front( varlist ) ) );
+
+ path_build( f, buf );
+
+ if ( DEBUG_SEARCH )
+ printf( "locate %s: %s\n", object_str( target ), buf->value );
+
+ key = object_new( buf->value );
+ timestamp_from_path( time, key );
+ object_free( key );
+ found = 1;
+ }
+ else if ( varlist = var_get( root_module(), constant_SEARCH ),
+ !list_empty( varlist ) )
+ {
+ LISTITER iter = list_begin( varlist );
+ LISTITER const end = list_end( varlist );
+ for ( ; iter != end; iter = list_next( iter ) )
+ {
+ BINDING * ba;
+ file_info_t * ff;
+ OBJECT * key;
+ OBJECT * test_path;
+
+ f->f_root.ptr = object_str( list_item( iter ) );
+ f->f_root.len = strlen( object_str( list_item( iter ) ) );
+
+ string_truncate( buf, 0 );
+ path_build( f, buf );
+
+ if ( DEBUG_SEARCH )
+ printf( "search %s: %s\n", object_str( target ), buf->value );
+
+ test_path = object_new( buf->value );
+ key = path_as_key( test_path );
+ object_free( test_path );
+ ff = file_query( key );
+ timestamp_from_path( time, key );
+
+ if ( ( ba = (BINDING *)hash_find( explicit_bindings, key ) ) )
+ {
+ if ( DEBUG_SEARCH )
+ printf(" search %s: found explicitly located target %s\n",
+ object_str( target ), object_str( ba->target ) );
+ if ( another_target )
+ *another_target = ba->target;
+ found = 1;
+ object_free( key );
+ break;
+ }
+ else if ( ff )
+ {
+ if ( !file || ff->is_file )
+ {
+ found = 1;
+ object_free( key );
+ break;
+ }
+ }
+ object_free( key );
+ }
+ }
+
+ if ( !found )
+ {
+ /* Look for the obvious. */
+ /* This is a questionable move. Should we look in the obvious place if
+ * SEARCH is set?
+ */
+ OBJECT * key;
+
+ f->f_root.ptr = 0;
+ f->f_root.len = 0;
+
+ string_truncate( buf, 0 );
+ path_build( f, buf );
+
+ if ( DEBUG_SEARCH )
+ printf( "search %s: %s\n", object_str( target ), buf->value );
+
+ key = object_new( buf->value );
+ timestamp_from_path( time, key );
+ object_free( key );
+ }
+
+ boundname = object_new( buf->value );
+ string_free( buf );
+
+ /* Prepare a call to BINDRULE if the variable is set. */
+ call_bind_rule( target, boundname );
+
+ return boundname;
+}
+
+
+static void free_binding( void * xbinding, void * data )
+{
+ object_free( ( (BINDING *)xbinding )->binding );
+}
+
+
+void search_done( void )
+{
+ if ( explicit_bindings )
+ {
+ hashenumerate( explicit_bindings, free_binding, 0 );
+ hashdone( explicit_bindings );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/search.h b/src/kenlm/jam-files/engine/search.h
new file mode 100644
index 0000000..7e74f79
--- /dev/null
+++ b/src/kenlm/jam-files/engine/search.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * search.h - find a target along $(SEARCH) or $(LOCATE)
+ */
+
+#ifndef SEARCH_SW20111118_H
+#define SEARCH_SW20111118_H
+
+#include "object.h"
+#include "timestamp.h"
+
+void set_explicit_binding( OBJECT * target, OBJECT * locate );
+OBJECT * search( OBJECT * target, timestamp * const time,
+ OBJECT * * another_target, int const file );
+void search_done( void );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/strings.c b/src/kenlm/jam-files/engine/strings.c
new file mode 100644
index 0000000..3d3e19b
--- /dev/null
+++ b/src/kenlm/jam-files/engine/strings.c
@@ -0,0 +1,223 @@
+/* Copyright David Abrahams 2004. Distributed under the Boost */
+/* Software License, Version 1.0. (See accompanying */
+/* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
+
+#include "jam.h"
+#include "strings.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#ifndef NDEBUG
+# define JAM_STRING_MAGIC ((char)0xcf)
+# define JAM_STRING_MAGIC_SIZE 4
+static void assert_invariants( string * self )
+{
+ int i;
+
+ if ( self->value == 0 )
+ {
+ assert( self->size == 0 );
+ assert( self->capacity == 0 );
+ assert( self->opt[ 0 ] == 0 );
+ return;
+ }
+
+ assert( self->size < self->capacity );
+ assert( ( self->capacity <= sizeof( self->opt ) ) == ( self->value == self->opt ) );
+ assert( self->value[ self->size ] == 0 );
+ /* String objects modified manually after construction to contain embedded
+ * '\0' characters are considered structurally valid.
+ */
+ assert( strlen( self->value ) <= self->size );
+
+ for ( i = 0; i < 4; ++i )
+ {
+ assert( self->magic[ i ] == JAM_STRING_MAGIC );
+ assert( self->value[ self->capacity + i ] == JAM_STRING_MAGIC );
+ }
+}
+#else
+# define JAM_STRING_MAGIC_SIZE 0
+# define assert_invariants(x) do {} while (0)
+#endif
+
+
+void string_new( string * s )
+{
+ s->value = s->opt;
+ s->size = 0;
+ s->capacity = sizeof( s->opt );
+ s->opt[ 0 ] = 0;
+#ifndef NDEBUG
+ memset( s->magic, JAM_STRING_MAGIC, sizeof( s->magic ) );
+#endif
+ assert_invariants( s );
+}
+
+
+void string_free( string * s )
+{
+ assert_invariants( s );
+ if ( s->value != s->opt )
+ BJAM_FREE( s->value );
+ string_new( s );
+}
+
+
+static void string_reserve_internal( string * self, size_t capacity )
+{
+ if ( self->value == self->opt )
+ {
+ self->value = (char *)BJAM_MALLOC_ATOMIC( capacity +
+ JAM_STRING_MAGIC_SIZE );
+ self->value[ 0 ] = 0;
+ strncat( self->value, self->opt, sizeof(self->opt) );
+ assert( strlen( self->value ) <= self->capacity && "Regression test" );
+ }
+ else
+ {
+ self->value = (char *)BJAM_REALLOC( self->value, capacity +
+ JAM_STRING_MAGIC_SIZE );
+ }
+#ifndef NDEBUG
+ memcpy( self->value + capacity, self->magic, JAM_STRING_MAGIC_SIZE );
+#endif
+ self->capacity = capacity;
+}
+
+
+void string_reserve( string * self, size_t capacity )
+{
+ assert_invariants( self );
+ if ( capacity <= self->capacity )
+ return;
+ string_reserve_internal( self, capacity );
+ assert_invariants( self );
+}
+
+
+static void extend_full( string * self, char const * start, char const * finish )
+{
+ size_t new_size = self->capacity + ( finish - start );
+ size_t new_capacity = self->capacity;
+ size_t old_size = self->capacity;
+ while ( new_capacity < new_size + 1)
+ new_capacity <<= 1;
+ string_reserve_internal( self, new_capacity );
+ memcpy( self->value + old_size, start, new_size - old_size );
+ self->value[ new_size ] = 0;
+ self->size = new_size;
+}
+
+static void maybe_reserve( string * self, size_t new_size )
+{
+ size_t capacity = self->capacity;
+ if ( capacity <= new_size )
+ {
+ size_t new_capacity = capacity;
+ while ( new_capacity <= new_size )
+ new_capacity <<= 1;
+ string_reserve_internal( self, new_capacity );
+ }
+}
+
+
+void string_append( string * self, char const * rhs )
+{
+ size_t rhs_size = strlen( rhs );
+ size_t new_size = self->size + rhs_size;
+ assert_invariants( self );
+
+ maybe_reserve( self, new_size );
+
+ memcpy( self->value + self->size, rhs, rhs_size + 1 );
+ self->size = new_size;
+
+ assert_invariants( self );
+}
+
+
+void string_append_range( string * self, char const * start, char const * finish )
+{
+ size_t rhs_size = finish - start;
+ size_t new_size = self->size + rhs_size;
+ assert_invariants( self );
+
+ maybe_reserve( self, new_size );
+
+ memcpy( self->value + self->size, start, rhs_size );
+ self->size = new_size;
+ self->value[ new_size ] = 0;
+
+ assert_invariants( self );
+}
+
+
+void string_copy( string * s, char const * rhs )
+{
+ string_new( s );
+ string_append( s, rhs );
+}
+
+void string_truncate( string * self, size_t n )
+{
+ assert_invariants( self );
+ assert( n <= self->capacity );
+ self->value[ self->size = n ] = 0;
+ assert_invariants( self );
+}
+
+
+void string_pop_back( string * self )
+{
+ string_truncate( self, self->size - 1 );
+}
+
+
+void string_push_back( string * self, char x )
+{
+ string_append_range( self, &x, &x + 1 );
+}
+
+
+char string_back( string * self )
+{
+ assert_invariants( self );
+ return self->value[ self->size - 1 ];
+}
+
+
+#ifndef NDEBUG
+void string_unit_test()
+{
+ {
+ string s[ 1 ];
+ int i;
+ int const limit = sizeof( s->opt ) * 2 + 2;
+ string_new( s );
+ assert( s->value == s->opt );
+ for ( i = 0; i < limit; ++i )
+ {
+ string_push_back( s, (char)( i + 1 ) );
+ assert( s->size == i + 1 );
+ }
+ assert( s->size == limit );
+ assert( s->value != s->opt );
+ for ( i = 0; i < limit; ++i )
+ assert( s->value[ i ] == (char)( i + 1 ) );
+ string_free( s );
+ }
+
+ {
+ char * const original = " \n\t\v Foo \r\n\v \tBar\n\n\r\r\t\n\v\t \t";
+ string copy[ 1 ];
+ string_copy( copy, original );
+ assert( !strcmp( copy->value, original ) );
+ assert( copy->size == strlen( original ) );
+ string_free( copy );
+ }
+}
+#endif
diff --git a/src/kenlm/jam-files/engine/strings.h b/src/kenlm/jam-files/engine/strings.h
new file mode 100644
index 0000000..749f287
--- /dev/null
+++ b/src/kenlm/jam-files/engine/strings.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2004. David Abrahams
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef STRINGS_DWA20011024_H
+#define STRINGS_DWA20011024_H
+
+#include <stddef.h>
+
+typedef struct string
+{
+ char * value;
+ unsigned long size;
+ unsigned long capacity;
+ char opt[ 32 ];
+#ifndef NDEBUG
+ char magic[ 4 ];
+#endif
+} string;
+
+void string_new( string * );
+void string_copy( string *, char const * );
+void string_free( string * );
+void string_append( string *, char const * );
+void string_append_range( string *, char const *, char const * );
+void string_push_back( string * s, char x );
+void string_reserve( string *, size_t );
+void string_truncate( string *, size_t );
+void string_pop_back( string * );
+char string_back( string * );
+void string_unit_test();
+
+#endif
diff --git a/src/kenlm/jam-files/engine/subst.c b/src/kenlm/jam-files/engine/subst.c
new file mode 100644
index 0000000..a5fcee0
--- /dev/null
+++ b/src/kenlm/jam-files/engine/subst.c
@@ -0,0 +1,116 @@
+#include "jam.h"
+#include "subst.h"
+
+#include "builtins.h"
+#include "frames.h"
+#include "hash.h"
+#include "lists.h"
+
+#include <stddef.h>
+
+
+typedef struct regex_entry
+{
+ OBJECT * pattern;
+ regexp * regex;
+} regex_entry;
+
+static struct hash * regex_hash;
+
+
+regexp * regex_compile( OBJECT * pattern )
+{
+ int found;
+ regex_entry * e ;
+
+ if ( !regex_hash )
+ regex_hash = hashinit( sizeof( regex_entry ), "regex" );
+
+ e = (regex_entry *)hash_insert( regex_hash, pattern, &found );
+ if ( !found )
+ {
+ e->pattern = object_copy( pattern );
+ e->regex = regcomp( (char *)pattern );
+ }
+
+ return e->regex;
+}
+
+
+LIST * builtin_subst( FRAME * frame, int flags )
+{
+ LIST * result = L0;
+ LIST * const arg1 = lol_get( frame->args, 0 );
+ LISTITER iter = list_begin( arg1 );
+ LISTITER const end = list_end( arg1 );
+
+ if ( iter != end && list_next( iter ) != end && list_next( list_next( iter )
+ ) != end )
+ {
+ char const * const source = object_str( list_item( iter ) );
+ OBJECT * const pattern = list_item( list_next( iter ) );
+ regexp * const repat = regex_compile( pattern );
+
+ if ( regexec( repat, (char *)source) )
+ {
+ LISTITER subst = list_next( iter );
+
+ while ( ( subst = list_next( subst ) ) != end )
+ {
+#define BUFLEN 4096
+ char buf[ BUFLEN + 1 ];
+ char const * in = object_str( list_item( subst ) );
+ char * out = buf;
+
+ for ( ; *in && out < buf + BUFLEN; ++in )
+ {
+ if ( *in == '\\' || *in == '$' )
+ {
+ ++in;
+ if ( *in == 0 )
+ break;
+ if ( *in >= '0' && *in <= '9' )
+ {
+ unsigned int const n = *in - '0';
+ size_t const srclen = repat->endp[ n ] -
+ repat->startp[ n ];
+ size_t const remaining = buf + BUFLEN - out;
+ size_t const len = srclen < remaining
+ ? srclen
+ : remaining;
+ memcpy( out, repat->startp[ n ], len );
+ out += len;
+ continue;
+ }
+ /* fall through and copy the next character */
+ }
+ *out++ = *in;
+ }
+ *out = 0;
+
+ result = list_push_back( result, object_new( buf ) );
+#undef BUFLEN
+ }
+ }
+ }
+
+ return result;
+}
+
+
+static void free_regex( void * xregex, void * data )
+{
+ regex_entry * const regex = (regex_entry *)xregex;
+ object_free( regex->pattern );
+ BJAM_FREE( regex->regex );
+}
+
+
+void regex_done()
+{
+ if ( regex_hash )
+ {
+ hashenumerate( regex_hash, free_regex, (void *)0 );
+ hashdone( regex_hash );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/subst.h b/src/kenlm/jam-files/engine/subst.h
new file mode 100644
index 0000000..7dc09a6
--- /dev/null
+++ b/src/kenlm/jam-files/engine/subst.h
@@ -0,0 +1,14 @@
+/* Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#ifndef SUBST_JG20120722_H
+#define SUBST_JG20120722_H
+
+#include "object.h"
+#include "regexp.h"
+
+regexp * regex_compile( OBJECT * pattern );
+
+#endif
diff --git a/src/kenlm/jam-files/engine/timestamp.c b/src/kenlm/jam-files/engine/timestamp.c
new file mode 100644
index 0000000..0d01698
--- /dev/null
+++ b/src/kenlm/jam-files/engine/timestamp.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * timestamp.c - get the timestamp of a file or archive member
+ *
+ * External routines:
+ * timestamp_from_path() - return timestamp for a path, if present
+ * timestamp_done() - free timestamp tables
+ *
+ * Internal routines:
+ * time_enter() - internal worker callback for scanning archives &
+ * directories
+ * free_timestamps() - worker function for freeing timestamp table contents
+ */
+
+#include "jam.h"
+#include "timestamp.h"
+
+#include "filesys.h"
+#include "hash.h"
+#include "object.h"
+#include "pathsys.h"
+#include "strings.h"
+
+
+/*
+ * BINDING - all known files
+ */
+
+typedef struct _binding
+{
+ OBJECT * name;
+ short flags;
+
+#define BIND_SCANNED 0x01 /* if directory or arch, has been scanned */
+
+ short progress;
+
+#define BIND_INIT 0 /* never seen */
+#define BIND_NOENTRY 1 /* timestamp requested but file never found */
+#define BIND_SPOTTED 2 /* file found but not timed yet */
+#define BIND_MISSING 3 /* file found but can not get timestamp */
+#define BIND_FOUND 4 /* file found and time stamped */
+
+ /* update time - cleared if the there is nothing to bind */
+ timestamp time;
+} BINDING;
+
+static struct hash * bindhash = 0;
+
+static void time_enter( void *, OBJECT *, int const found,
+ timestamp const * const );
+
+static char * time_progress[] =
+{
+ "INIT",
+ "NOENTRY",
+ "SPOTTED",
+ "MISSING",
+ "FOUND"
+};
+
+
+#ifdef OS_NT
+/*
+ * timestamp_from_filetime() - Windows FILETIME --> timestamp conversion
+ *
+ * Lifted shamelessly from the CPython implementation.
+ */
+
+void timestamp_from_filetime( timestamp * const t, FILETIME const * const ft )
+{
+ /* Seconds between 1.1.1601 and 1.1.1970 */
+ static __int64 const secs_between_epochs = 11644473600;
+
+ /* We can not simply cast and dereference a FILETIME, since it might not be
+ * aligned properly. __int64 type variables are expected to be aligned to an
+ * 8 byte boundary while FILETIME structures may be aligned to any 4 byte
+ * boundary. Using an incorrectly aligned __int64 variable may cause a
+ * performance penalty on some platforms or even exceptions on others
+ * (documented on MSDN).
+ */
+ __int64 in;
+ memcpy( &in, ft, sizeof( in ) );
+
+ /* FILETIME resolution: 100ns. */
+ timestamp_init( t, (time_t)( ( in / 10000000 ) - secs_between_epochs ),
+ (int)( in % 10000000 ) * 100 );
+}
+#endif /* OS_NT */
+
+
+void timestamp_clear( timestamp * const time )
+{
+ time->secs = time->nsecs = 0;
+}
+
+
+int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs )
+{
+ return lhs->secs == rhs->secs
+ ? lhs->nsecs - rhs->nsecs
+ : lhs->secs - rhs->secs;
+}
+
+
+void timestamp_copy( timestamp * const target, timestamp const * const source )
+{
+ target->secs = source->secs;
+ target->nsecs = source->nsecs;
+}
+
+
+void timestamp_current( timestamp * const t )
+{
+#ifdef OS_NT
+ /* GetSystemTimeAsFileTime()'s resolution seems to be about 15 ms on Windows
+ * XP and under a millisecond on Windows 7.
+ */
+ FILETIME ft;
+ GetSystemTimeAsFileTime( &ft );
+ timestamp_from_filetime( t, &ft );
+#else /* OS_NT */
+ timestamp_init( t, time( 0 ), 0 );
+#endif /* OS_NT */
+}
+
+
+int timestamp_empty( timestamp const * const time )
+{
+ return !time->secs && !time->nsecs;
+}
+
+
+/*
+ * timestamp_from_path() - return timestamp for a path, if present
+ */
+
+void timestamp_from_path( timestamp * const time, OBJECT * const path )
+{
+ PROFILE_ENTER( timestamp );
+
+ PATHNAME f1;
+ PATHNAME f2;
+ int found;
+ BINDING * b;
+ string buf[ 1 ];
+
+
+ if ( file_time( path, time ) < 0 )
+ timestamp_clear( time );
+
+ PROFILE_EXIT( timestamp );
+}
+
+
+void timestamp_init( timestamp * const time, time_t const secs, int const nsecs
+ )
+{
+ time->secs = secs;
+ time->nsecs = nsecs;
+}
+
+
+void timestamp_max( timestamp * const max, timestamp const * const lhs,
+ timestamp const * const rhs )
+{
+ if ( timestamp_cmp( lhs, rhs ) > 0 )
+ timestamp_copy( max, lhs );
+ else
+ timestamp_copy( max, rhs );
+}
+
+
+static char const * timestamp_formatstr( timestamp const * const time,
+ char const * const format )
+{
+ static char result1[ 500 ];
+ static char result2[ 500 ];
+ strftime( result1, sizeof( result1 ) / sizeof( *result1 ), format, gmtime(
+ &time->secs ) );
+ sprintf( result2, result1, time->nsecs );
+ return result2;
+}
+
+
+char const * timestamp_str( timestamp const * const time )
+{
+ return timestamp_formatstr( time, "%Y-%m-%d %H:%M:%S.%%09d +0000" );
+}
+
+
+char const * timestamp_timestr( timestamp const * const time )
+{
+ return timestamp_formatstr( time, "%H:%M:%S.%%09d" );
+}
+
+
+/*
+ * time_enter() - internal worker callback for scanning archives & directories
+ */
+
+static void time_enter( void * closure, OBJECT * target, int const found,
+ timestamp const * const time )
+{
+ int item_found;
+ BINDING * b;
+ struct hash * const bindhash = (struct hash *)closure;
+
+ target = path_as_key( target );
+
+ b = (BINDING *)hash_insert( bindhash, target, &item_found );
+ if ( !item_found )
+ {
+ b->name = object_copy( target );
+ b->flags = 0;
+ }
+
+ timestamp_copy( &b->time, time );
+ b->progress = found ? BIND_FOUND : BIND_SPOTTED;
+
+ if ( DEBUG_BINDSCAN )
+ printf( "time ( %s ) : %s\n", object_str( target ), time_progress[
+ b->progress ] );
+
+ object_free( target );
+}
+
+
+/*
+ * free_timestamps() - worker function for freeing timestamp table contents
+ */
+
+static void free_timestamps( void * xbinding, void * data )
+{
+ object_free( ( (BINDING *)xbinding )->name );
+}
+
+
+/*
+ * timestamp_done() - free timestamp tables
+ */
+
+void timestamp_done()
+{
+ if ( bindhash )
+ {
+ hashenumerate( bindhash, free_timestamps, 0 );
+ hashdone( bindhash );
+ }
+}
diff --git a/src/kenlm/jam-files/engine/timestamp.h b/src/kenlm/jam-files/engine/timestamp.h
new file mode 100644
index 0000000..aaf1310
--- /dev/null
+++ b/src/kenlm/jam-files/engine/timestamp.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 1993, 1995 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * timestamp.h - get the timestamp of a file or archive member
+ */
+
+#ifndef TIMESTAMP_H_SW_2011_11_18
+#define TIMESTAMP_H_SW_2011_11_18
+
+#include "object.h"
+
+#ifdef OS_NT
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+#endif
+
+#include <time.h>
+
+typedef struct timestamp
+{
+ time_t secs;
+ int nsecs;
+} timestamp;
+
+void timestamp_clear( timestamp * const );
+int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs );
+void timestamp_copy( timestamp * const target, timestamp const * const source );
+void timestamp_current( timestamp * const );
+int timestamp_empty( timestamp const * const );
+void timestamp_from_path( timestamp * const, OBJECT * const path );
+void timestamp_init( timestamp * const, time_t const secs, int const nsecs );
+void timestamp_max( timestamp * const max, timestamp const * const lhs,
+ timestamp const * const rhs );
+char const * timestamp_str( timestamp const * const );
+char const * timestamp_timestr( timestamp const * const );
+
+#ifdef OS_NT
+void timestamp_from_filetime( timestamp * const, FILETIME const * const );
+#endif
+
+void timestamp_done();
+
+#endif
diff --git a/src/kenlm/jam-files/engine/variable.c b/src/kenlm/jam-files/engine/variable.c
new file mode 100644
index 0000000..2c292fb
--- /dev/null
+++ b/src/kenlm/jam-files/engine/variable.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/* This file is ALSO:
+ * Copyright 2001-2004 David Abrahams.
+ * Copyright 2005 Reece H. Dunn.
+ * Copyright 2005 Rene Rivera.
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+/*
+ * variable.c - handle Jam multi-element variables.
+ *
+ * External routines:
+ *
+ * var_defines() - load a bunch of variable=value settings
+ * var_get() - get value of a user defined symbol
+ * var_set() - set a variable in jam's user defined symbol table.
+ * var_swap() - swap a variable's value with the given one
+ * var_done() - free variable tables
+ *
+ * Internal routines:
+ *
+ * var_enter() - make new var symbol table entry, returning var ptr
+ * var_dump() - dump a variable to stdout
+ */
+
+#include "jam.h"
+#include "variable.h"
+
+#include "filesys.h"
+#include "hash.h"
+#include "modules.h"
+#include "parse.h"
+#include "pathsys.h"
+#include "strings.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+
+/*
+ * VARIABLE - a user defined multi-value variable
+ */
+
+typedef struct _variable VARIABLE ;
+
+struct _variable
+{
+ OBJECT * symbol;
+ LIST * value;
+};
+
+static LIST * * var_enter( struct module_t *, OBJECT * symbol );
+static void var_dump( OBJECT * symbol, LIST * value, char * what );
+
+
+/*
+ * var_defines() - load a bunch of variable=value settings
+ *
+ * If preprocess is false, take the value verbatim.
+ *
+ * Otherwise, if the variable value is enclosed in quotes, strip the quotes.
+ * Otherwise, if variable name ends in PATH, split value at :'s.
+ * Otherwise, split the value at blanks.
+ */
+
+void var_defines( struct module_t * module, char * const * e, int preprocess )
+{
+ string buf[ 1 ];
+
+ string_new( buf );
+
+ for ( ; *e; ++e )
+ {
+ char * val;
+
+ if ( ( val = strchr( *e, '=' ) )
+#if defined( OS_MAC )
+ /* On the mac (MPW), the var=val is actually var\0val */
+ /* Think different. */
+ || ( val = *e + strlen( *e ) )
+#endif
+ )
+ {
+ LIST * l = L0;
+ size_t const len = strlen( val + 1 );
+ int const quoted = ( val[ 1 ] == '"' ) && ( val[ len ] == '"' ) &&
+ ( len > 1 );
+
+ if ( quoted && preprocess )
+ {
+ string_append_range( buf, val + 2, val + len );
+ l = list_push_back( l, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+ else
+ {
+ char * p;
+ char * pp;
+ char split =
+#if defined( OPT_NO_EXTERNAL_VARIABLE_SPLIT )
+ '\0'
+#elif defined( OS_MAC )
+ ','
+#else
+ ' '
+#endif
+ ;
+
+ /* Split *PATH at :'s, not spaces. */
+ if ( val - 4 >= *e )
+ {
+ if ( !strncmp( val - 4, "PATH", 4 ) ||
+ !strncmp( val - 4, "Path", 4 ) ||
+ !strncmp( val - 4, "path", 4 ) )
+ split = SPLITPATH;
+ }
+
+ /* Do the split. */
+ for
+ (
+ pp = val + 1;
+ preprocess && ( ( p = strchr( pp, split ) ) != 0 );
+ pp = p + 1
+ )
+ {
+ string_append_range( buf, pp, p );
+ l = list_push_back( l, object_new( buf->value ) );
+ string_truncate( buf, 0 );
+ }
+
+ l = list_push_back( l, object_new( pp ) );
+ }
+
+ /* Get name. */
+ string_append_range( buf, *e, val );
+ {
+ OBJECT * const varname = object_new( buf->value );
+ var_set( module, varname, l, VAR_SET );
+ object_free( varname );
+ }
+ string_truncate( buf, 0 );
+ }
+ }
+ string_free( buf );
+}
+
+
+/* Last returned variable value saved so we may clear it in var_done(). */
+static LIST * saved_var = L0;
+
+
+/*
+ * var_get() - get value of a user defined symbol
+ *
+ * Returns NULL if symbol unset.
+ */
+
+LIST * var_get( struct module_t * module, OBJECT * symbol )
+{
+ LIST * result = L0;
+#ifdef OPT_AT_FILES
+ /* Some "fixed" variables... */
+ if ( object_equal( symbol, constant_TMPDIR ) )
+ {
+ list_free( saved_var );
+ result = saved_var = list_new( object_new( path_tmpdir()->value ) );
+ }
+ else if ( object_equal( symbol, constant_TMPNAME ) )
+ {
+ list_free( saved_var );
+ result = saved_var = list_new( path_tmpnam() );
+ }
+ else if ( object_equal( symbol, constant_TMPFILE ) )
+ {
+ list_free( saved_var );
+ result = saved_var = list_new( path_tmpfile() );
+ }
+ else if ( object_equal( symbol, constant_STDOUT ) )
+ {
+ list_free( saved_var );
+ result = saved_var = list_new( object_copy( constant_STDOUT ) );
+ }
+ else if ( object_equal( symbol, constant_STDERR ) )
+ {
+ list_free( saved_var );
+ result = saved_var = list_new( object_copy( constant_STDERR ) );
+ }
+ else
+#endif
+ {
+ VARIABLE * v;
+ int n;
+
+ if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 )
+ {
+ if ( DEBUG_VARGET )
+ var_dump( symbol, module->fixed_variables[ n ], "get" );
+ result = module->fixed_variables[ n ];
+ }
+ else if ( module->variables && ( v = (VARIABLE *)hash_find(
+ module->variables, symbol ) ) )
+ {
+ if ( DEBUG_VARGET )
+ var_dump( v->symbol, v->value, "get" );
+ result = v->value;
+ }
+ }
+ return result;
+}
+
+
+LIST * var_get_and_clear_raw( module_t * module, OBJECT * symbol )
+{
+ LIST * result = L0;
+ VARIABLE * v;
+
+ if ( module->variables && ( v = (VARIABLE *)hash_find( module->variables,
+ symbol ) ) )
+ {
+ result = v->value;
+ v->value = L0;
+ }
+
+ return result;
+}
+
+
+/*
+ * var_set() - set a variable in Jam's user defined symbol table
+ *
+ * 'flag' controls the relationship between new and old values of the variable:
+ * SET replaces the old with the new; APPEND appends the new to the old; DEFAULT
+ * only uses the new if the variable was previously unset.
+ *
+ * Copies symbol. Takes ownership of value.
+ */
+
+void var_set( struct module_t * module, OBJECT * symbol, LIST * value, int flag
+ )
+{
+ LIST * * v = var_enter( module, symbol );
+
+ if ( DEBUG_VARSET )
+ var_dump( symbol, value, "set" );
+
+ switch ( flag )
+ {
+ case VAR_SET: /* Replace value */
+ list_free( *v );
+ *v = value;
+ break;
+
+ case VAR_APPEND: /* Append value */
+ *v = list_append( *v, value );
+ break;
+
+ case VAR_DEFAULT: /* Set only if unset */
+ if ( list_empty( *v ) )
+ *v = value;
+ else
+ list_free( value );
+ break;
+ }
+}
+
+
+/*
+ * var_swap() - swap a variable's value with the given one
+ */
+
+LIST * var_swap( struct module_t * module, OBJECT * symbol, LIST * value )
+{
+ LIST * * v = var_enter( module, symbol );
+ LIST * oldvalue = *v;
+ if ( DEBUG_VARSET )
+ var_dump( symbol, value, "set" );
+ *v = value;
+ return oldvalue;
+}
+
+
+/*
+ * var_enter() - make new var symbol table entry, returning var ptr
+ */
+
+static LIST * * var_enter( struct module_t * module, OBJECT * symbol )
+{
+ int found;
+ VARIABLE * v;
+ int n;
+
+ if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 )
+ return &module->fixed_variables[ n ];
+
+ if ( !module->variables )
+ module->variables = hashinit( sizeof( VARIABLE ), "variables" );
+
+ v = (VARIABLE *)hash_insert( module->variables, symbol, &found );
+ if ( !found )
+ {
+ v->symbol = object_copy( symbol );
+ v->value = L0;
+ }
+
+ return &v->value;
+}
+
+
+/*
+ * var_dump() - dump a variable to stdout
+ */
+
+static void var_dump( OBJECT * symbol, LIST * value, char * what )
+{
+ printf( "%s %s = ", what, object_str( symbol ) );
+ list_print( value );
+ printf( "\n" );
+}
+
+
+/*
+ * var_done() - free variable tables
+ */
+
+static void delete_var_( void * xvar, void * data )
+{
+ VARIABLE * const v = (VARIABLE *)xvar;
+ object_free( v->symbol );
+ list_free( v->value );
+}
+
+void var_done( struct module_t * module )
+{
+ list_free( saved_var );
+ saved_var = L0;
+ hashenumerate( module->variables, delete_var_, 0 );
+ hash_free( module->variables );
+}
diff --git a/src/kenlm/jam-files/engine/variable.h b/src/kenlm/jam-files/engine/variable.h
new file mode 100644
index 0000000..ddb452b
--- /dev/null
+++ b/src/kenlm/jam-files/engine/variable.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 1993, 2000 Christopher Seiwald.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+/*
+ * variable.h - handle jam multi-element variables
+ */
+
+#ifndef VARIABLE_SW20111119_H
+#define VARIABLE_SW20111119_H
+
+#include "lists.h"
+#include "object.h"
+
+
+struct module_t;
+
+void var_defines( struct module_t *, char * const * e, int preprocess );
+LIST * var_get( struct module_t *, OBJECT * symbol );
+void var_set( struct module_t *, OBJECT * symbol, LIST * value, int flag );
+LIST * var_swap( struct module_t *, OBJECT * symbol, LIST * value );
+void var_done( struct module_t * );
+
+/*
+ * Defines for var_set().
+ */
+
+#define VAR_SET 0 /* override previous value */
+#define VAR_APPEND 1 /* append to previous value */
+#define VAR_DEFAULT 2 /* set only if no previous value */
+
+#endif
diff --git a/src/kenlm/jam-files/engine/w32_getreg.c b/src/kenlm/jam-files/engine/w32_getreg.c
new file mode 100644
index 0000000..dd2d0fc
--- /dev/null
+++ b/src/kenlm/jam-files/engine/w32_getreg.c
@@ -0,0 +1,201 @@
+/*
+Copyright Paul Lin 2003. Copyright 2006 Bojan Resnik.
+Distributed under the Boost Software License, Version 1.0. (See accompanying
+file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+# include "jam.h"
+
+# if defined( OS_NT ) || defined( OS_CYGWIN )
+
+# include "lists.h"
+# include "object.h"
+# include "parse.h"
+# include "frames.h"
+# include "strings.h"
+
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+
+# define MAX_REGISTRY_DATA_LENGTH 4096
+# define MAX_REGISTRY_KEYNAME_LENGTH 256
+# define MAX_REGISTRY_VALUENAME_LENGTH 16384
+
+typedef struct
+{
+ LPCSTR name;
+ HKEY value;
+} KeyMap;
+
+static const KeyMap dlRootKeys[] = {
+ { "HKLM", HKEY_LOCAL_MACHINE },
+ { "HKCU", HKEY_CURRENT_USER },
+ { "HKCR", HKEY_CLASSES_ROOT },
+ { "HKEY_LOCAL_MACHINE", HKEY_LOCAL_MACHINE },
+ { "HKEY_CURRENT_USER", HKEY_CURRENT_USER },
+ { "HKEY_CLASSES_ROOT", HKEY_CLASSES_ROOT },
+ { 0, 0 }
+};
+
+static HKEY get_key(char const** path)
+{
+ const KeyMap *p;
+
+ for (p = dlRootKeys; p->name; ++p)
+ {
+ int n = strlen(p->name);
+ if (!strncmp(*path,p->name,n))
+ {
+ if ((*path)[n] == '\\' || (*path)[n] == 0)
+ {
+ *path += n + 1;
+ break;
+ }
+ }
+ }
+
+ return p->value;
+}
+
+LIST * builtin_system_registry( FRAME * frame, int flags )
+{
+ char const* path = object_str( list_front( lol_get(frame->args, 0) ) );
+ LIST* result = L0;
+ HKEY key = get_key(&path);
+
+ if (
+ key != 0
+ && ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key)
+ )
+ {
+ DWORD type;
+ BYTE data[MAX_REGISTRY_DATA_LENGTH];
+ DWORD len = sizeof(data);
+ LIST * const field = lol_get(frame->args, 1);
+
+ if ( ERROR_SUCCESS ==
+ RegQueryValueEx(key, field ? object_str( list_front( field ) ) : 0, 0, &type, data, &len) )
+ {
+ switch (type)
+ {
+
+ case REG_EXPAND_SZ:
+ {
+ long len;
+ string expanded[1];
+ string_new(expanded);
+
+ while (
+ (len = ExpandEnvironmentStrings(
+ (LPCSTR)data, expanded->value, expanded->capacity))
+ > expanded->capacity
+ )
+ string_reserve(expanded, len);
+
+ expanded->size = len - 1;
+
+ result = list_push_back( result, object_new(expanded->value) );
+ string_free( expanded );
+ }
+ break;
+
+ case REG_MULTI_SZ:
+ {
+ char* s;
+
+ for (s = (char*)data; *s; s += strlen(s) + 1)
+ result = list_push_back( result, object_new(s) );
+
+ }
+ break;
+
+ case REG_DWORD:
+ {
+ char buf[100];
+ sprintf( buf, "%u", *(PDWORD)data );
+ result = list_push_back( result, object_new(buf) );
+ }
+ break;
+
+ case REG_SZ:
+ result = list_push_back( result, object_new( (const char *)data ) );
+ break;
+ }
+ }
+ RegCloseKey(key);
+ }
+ return result;
+}
+
+static LIST* get_subkey_names(HKEY key, char const* path)
+{
+ LIST* result = 0;
+
+ if ( ERROR_SUCCESS ==
+ RegOpenKeyEx(key, path, 0, KEY_ENUMERATE_SUB_KEYS, &key)
+ )
+ {
+ char name[MAX_REGISTRY_KEYNAME_LENGTH];
+ DWORD name_size = sizeof(name);
+ DWORD index;
+ FILETIME last_write_time;
+
+ for ( index = 0;
+ ERROR_SUCCESS == RegEnumKeyEx(
+ key, index, name, &name_size, 0, 0, 0, &last_write_time);
+ ++index,
+ name_size = sizeof(name)
+ )
+ {
+ name[name_size] = 0;
+ result = list_append(result, list_new(object_new(name)));
+ }
+
+ RegCloseKey(key);
+ }
+
+ return result;
+}
+
+static LIST* get_value_names(HKEY key, char const* path)
+{
+ LIST* result = 0;
+
+ if ( ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key) )
+ {
+ char name[MAX_REGISTRY_VALUENAME_LENGTH];
+ DWORD name_size = sizeof(name);
+ DWORD index;
+
+ for ( index = 0;
+ ERROR_SUCCESS == RegEnumValue(
+ key, index, name, &name_size, 0, 0, 0, 0);
+ ++index,
+ name_size = sizeof(name)
+ )
+ {
+ name[name_size] = 0;
+ result = list_append(result, list_new(object_new(name)));
+ }
+
+ RegCloseKey(key);
+ }
+
+ return result;
+}
+
+LIST * builtin_system_registry_names( FRAME * frame, int flags )
+{
+ char const* path = object_str( list_front( lol_get(frame->args, 0) ) );
+ char const* result_type = object_str( list_front( lol_get(frame->args, 1) ) );
+
+ HKEY key = get_key(&path);
+
+ if ( !strcmp(result_type, "subkeys") )
+ return get_subkey_names(key, path);
+ if ( !strcmp(result_type, "values") )
+ return get_value_names(key, path);
+ return 0;
+}
+
+# endif
diff --git a/src/kenlm/jam-files/engine/yyacc.c b/src/kenlm/jam-files/engine/yyacc.c
new file mode 100644
index 0000000..b5efc96
--- /dev/null
+++ b/src/kenlm/jam-files/engine/yyacc.c
@@ -0,0 +1,268 @@
+/* Copyright 2002 Rene Rivera.
+** Distributed under the Boost Software License, Version 1.0.
+** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/*
+# yyacc - yacc wrapper
+#
+# Allows tokens to be written as `literal` and then automatically
+# substituted with #defined tokens.
+#
+# Usage:
+# yyacc file.y filetab.h file.yy
+#
+# inputs:
+# file.yy yacc grammar with ` literals
+#
+# outputs:
+# file.y yacc grammar
+# filetab.h array of string <-> token mappings
+#
+# 3-13-93
+# Documented and p moved in sed command (for some reason,
+# s/x/y/p doesn't work).
+# 10-12-93
+# Take basename as second argument.
+# 12-31-96
+# reversed order of args to be compatible with GenFile rule
+# 11-20-2002
+# Reimplemented as a C program for portability. (Rene Rivera)
+*/
+
+void print_usage();
+char * copy_string(char * s, int l);
+char * tokenize_string(char * s);
+int cmp_literal(const void * a, const void * b);
+
+typedef struct
+{
+ char * string;
+ char * token;
+} literal;
+
+int main(int argc, char ** argv)
+{
+ int result = 0;
+ if (argc != 4)
+ {
+ print_usage();
+ result = 1;
+ }
+ else
+ {
+ FILE * token_output_f = 0;
+ FILE * grammar_output_f = 0;
+ FILE * grammar_source_f = 0;
+
+ grammar_source_f = fopen(argv[3],"r");
+ if (grammar_source_f == 0) { result = 1; }
+ if (result == 0)
+ {
+ literal literals[1024];
+ int t = 0;
+ char l[2048];
+ while (1)
+ {
+ if (fgets(l,2048,grammar_source_f) != 0)
+ {
+ char * c = l;
+ while (1)
+ {
+ char * c1 = strchr(c,'`');
+ if (c1 != 0)
+ {
+ char * c2 = strchr(c1+1,'`');
+ if (c2 != 0)
+ {
+ literals[t].string = copy_string(c1+1,c2-c1-1);
+ literals[t].token = tokenize_string(literals[t].string);
+ t += 1;
+ c = c2+1;
+ }
+ else
+ break;
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ literals[t].string = 0;
+ literals[t].token = 0;
+ qsort(literals,t,sizeof(literal),cmp_literal);
+ {
+ int p = 1;
+ int i = 1;
+ while (literals[i].string != 0)
+ {
+ if (strcmp(literals[p-1].string,literals[i].string) != 0)
+ {
+ literals[p] = literals[i];
+ p += 1;
+ }
+ i += 1;
+ }
+ literals[p].string = 0;
+ literals[p].token = 0;
+ t = p;
+ }
+ token_output_f = fopen(argv[2],"w");
+ if (token_output_f != 0)
+ {
+ int i = 0;
+ while (literals[i].string != 0)
+ {
+ fprintf(token_output_f," { \"%s\", %s },\n",literals[i].string,literals[i].token);
+ i += 1;
+ }
+ fclose(token_output_f);
+ }
+ else
+ result = 1;
+ if (result == 0)
+ {
+ grammar_output_f = fopen(argv[1],"w");
+ if (grammar_output_f != 0)
+ {
+ int i = 0;
+ while (literals[i].string != 0)
+ {
+ fprintf(grammar_output_f,"%%token %s\n",literals[i].token);
+ i += 1;
+ }
+ rewind(grammar_source_f);
+ while (1)
+ {
+ if (fgets(l,2048,grammar_source_f) != 0)
+ {
+ char * c = l;
+ while (1)
+ {
+ char * c1 = strchr(c,'`');
+ if (c1 != 0)
+ {
+ char * c2 = strchr(c1+1,'`');
+ if (c2 != 0)
+ {
+ literal key;
+ literal * replacement = 0;
+ key.string = copy_string(c1+1,c2-c1-1);
+ key.token = 0;
+ replacement = (literal*)bsearch(
+ &key,literals,t,sizeof(literal),cmp_literal);
+ *c1 = 0;
+ fprintf(grammar_output_f,"%s%s",c,replacement->token);
+ c = c2+1;
+ }
+ else
+ {
+ fprintf(grammar_output_f,"%s",c);
+ break;
+ }
+ }
+ else
+ {
+ fprintf(grammar_output_f,"%s",c);
+ break;
+ }
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ fclose(grammar_output_f);
+ }
+ else
+ result = 1;
+ }
+ }
+ if (result != 0)
+ {
+ perror("yyacc");
+ }
+ }
+ return result;
+}
+
+static char * usage[] = {
+ "yyacc <grammar output.y> <token table output.h> <grammar source.yy>",
+ 0 };
+
+void print_usage()
+{
+ char ** u;
+ for (u = usage; *u != 0; ++u)
+ {
+ fputs(*u,stderr); putc('\n',stderr);
+ }
+}
+
+char * copy_string(char * s, int l)
+{
+ char * result = (char*)malloc(l+1);
+ strncpy(result,s,l);
+ result[l] = 0;
+ return result;
+}
+
+char * tokenize_string(char * s)
+{
+ char * result;
+ char * literal = s;
+ int l;
+ int c;
+
+ if (strcmp(s,":") == 0) literal = "_colon";
+ else if (strcmp(s,"!") == 0) literal = "_bang";
+ else if (strcmp(s,"!=") == 0) literal = "_bang_equals";
+ else if (strcmp(s,"&&") == 0) literal = "_amperamper";
+ else if (strcmp(s,"&") == 0) literal = "_amper";
+ else if (strcmp(s,"+") == 0) literal = "_plus";
+ else if (strcmp(s,"+=") == 0) literal = "_plus_equals";
+ else if (strcmp(s,"||") == 0) literal = "_barbar";
+ else if (strcmp(s,"|") == 0) literal = "_bar";
+ else if (strcmp(s,";") == 0) literal = "_semic";
+ else if (strcmp(s,"-") == 0) literal = "_minus";
+ else if (strcmp(s,"<") == 0) literal = "_langle";
+ else if (strcmp(s,"<=") == 0) literal = "_langle_equals";
+ else if (strcmp(s,">") == 0) literal = "_rangle";
+ else if (strcmp(s,">=") == 0) literal = "_rangle_equals";
+ else if (strcmp(s,".") == 0) literal = "_period";
+ else if (strcmp(s,"?") == 0) literal = "_question";
+ else if (strcmp(s,"?=") == 0) literal = "_question_equals";
+ else if (strcmp(s,"=") == 0) literal = "_equals";
+ else if (strcmp(s,",") == 0) literal = "_comma";
+ else if (strcmp(s,"[") == 0) literal = "_lbracket";
+ else if (strcmp(s,"]") == 0) literal = "_rbracket";
+ else if (strcmp(s,"{") == 0) literal = "_lbrace";
+ else if (strcmp(s,"}") == 0) literal = "_rbrace";
+ else if (strcmp(s,"(") == 0) literal = "_lparen";
+ else if (strcmp(s,")") == 0) literal = "_rparen";
+ l = strlen(literal)+2;
+ result = (char*)malloc(l+1);
+ for (c = 0; literal[c] != 0; ++c)
+ {
+ result[c] = toupper(literal[c]);
+ }
+ result[l-2] = '_';
+ result[l-1] = 't';
+ result[l] = 0;
+ return result;
+}
+
+int cmp_literal(const void * a, const void * b)
+{
+ return strcmp(((const literal *)a)->string,((const literal *)b)->string);
+}
diff --git a/src/kenlm/jam-files/fail/Jamroot b/src/kenlm/jam-files/fail/Jamroot
new file mode 100644
index 0000000..c3584d8
--- /dev/null
+++ b/src/kenlm/jam-files/fail/Jamroot
@@ -0,0 +1,4 @@
+actions fail {
+ false
+}
+make fail : : fail ;
diff --git a/src/kenlm/jam-files/sanity.jam b/src/kenlm/jam-files/sanity.jam
new file mode 100644
index 0000000..1851ece
--- /dev/null
+++ b/src/kenlm/jam-files/sanity.jam
@@ -0,0 +1,344 @@
+import modules ;
+import option ;
+import os ;
+import path ;
+import project ;
+import build-system ;
+import version ;
+
+#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
+rule trim-nl ( str extras * ) {
+return [ MATCH "([^
+]*)" : $(str) ] $(extras) ;
+}
+rule _shell ( cmd : extras * ) {
+ return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
+}
+
+rule shell_or_fail ( cmd ) {
+ local ret = [ SHELL $(cmd) : exit-status ] ;
+ if $(ret[2]) != 0 {
+ exit $(cmd) failed : 1 ;
+ }
+}
+
+rule shell_or_die ( cmd ) {
+ local ret = [ SHELL $(cmd) : exit-status ] ;
+ if $(ret[2]) != 0 {
+ exit $(cmd) failed : 1 ;
+ }
+ return [ trim-nl $(ret[1]) ] ;
+}
+
+cxxflags = [ os.environ "CXXFLAGS" ] ;
+cflags = [ os.environ "CFLAGS" ] ;
+ldflags = [ os.environ "LDFLAGS" ] ;
+
+#Run g++ with empty main and these arguments to see if it passes.
+rule test_flags ( flags * : main ? ) {
+ flags = $(cxxflags) $(ldflags) $(flags) ;
+ if ! $(main) {
+ main = "int main() {}" ;
+ }
+ local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'$(main)' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ;
+ local ret = [ SHELL $(cmd) : exit-status ] ;
+ if --debug-configuration in [ modules.peek : ARGV ] {
+ echo $(cmd) ;
+ echo $(ret) ;
+ }
+ if $(ret[2]) = 0 {
+ return true ;
+ } else {
+ return ;
+ }
+}
+
+rule test_header ( name ) {
+ return [ test_flags "-include $(name)" ] ;
+}
+
+requirements = ;
+
+FORCE-STATIC = [ option.get "static" : : "yes" ] ;
+if $(FORCE-STATIC) {
+ requirements += <link>static <runtime-link>static ;
+}
+
+rule test_library ( name ) {
+ if $(FORCE-STATIC) {
+ return [ test_flags "-Wl,-Bstatic -l$(name) -Wl,-Bdynamic" ] ;
+ } else {
+ return [ test_flags "-l$(name)" ] ;
+ }
+}
+
+{
+ local cleaning = [ option.get "clean" : : yes ] ;
+ cleaning ?= [ option.get "clean-all" : no : yes ] ;
+ if "clean" in [ modules.peek : ARGV ] {
+ cleaning = yes ;
+ }
+ constant CLEANING : $(cleaning) ;
+}
+
+shared-command-line = ;
+local argv = [ modules.peek : ARGV ] ;
+while $(argv) {
+ if $(argv[1]) = "link=shared" {
+ shared-command-line = <link>shared ;
+ }
+ argv = $(argv[2-]) ;
+}
+
+#Determine if a library can be compiled statically.
+rule auto-shared ( name : additional * ) {
+
+ additional ?= "" ;
+ if $(shared-command-line) = "<link>shared" {
+ return "<link>shared" ;
+ } else {
+ if [ test_flags $(additional)" -Wl,-Bstatic -l"$(name)" -Wl,-Bdynamic" ] {
+ return ;
+ } else {
+ if $(FORCE-STATIC) {
+ echo "Could not statically link against lib $(name). Your build will probably fail." ;
+ return ;
+ } else {
+ return "<link>shared" ;
+ }
+ }
+ }
+}
+
+# MacPorts' default location is /opt/local -- use this if no path is given.
+with-macports = [ option.get "with-macports" : : "/opt/local" ] ;
+if $(with-macports) {
+ using darwin ;
+ ECHO "Using --with-macports=$(with-macports), implying use of darwin GCC" ;
+
+ L-boost-search = -L$(with-macports)/lib ;
+ boost-search = <search>$(with-macports)/lib ;
+ I-boost-include = -I$(with-macports)/include ;
+ boost-include = <include>$(with-macports)/include ;
+ requirements += $(boost-include) ;
+} else {
+ with-boost = [ option.get "with-boost" ] ;
+ with-boost ?= [ os.environ "BOOST_ROOT" ] ;
+ if $(with-boost) {
+ L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ;
+ boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ;
+ I-boost-include = -I$(with-boost)/include ;
+ boost-include = <include>$(with-boost)/include ;
+ requirements += $(boost-include) ;
+ } else {
+ L-boost-search = "" ;
+ boost-search = ;
+ I-boost-include = "" ;
+ boost-include = ;
+ }
+}
+
+#Convenience rule for boost libraries. Defines library boost_$(name).
+rule boost-lib ( name macro : deps * ) {
+ lib boost_$(name)_static : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>static ;
+ lib boost_$(name)_shared : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>shared : : <define>BOOST_$(macro) ;
+
+ alias boost_$(name)_default : $(deps) : <link>static:<source>boost_$(name)_static <link>shared:<source>boost_$(name)_shared ;
+
+ alias boost_$(name)_static_works : $(deps) : [ check-target-builds empty_test_shared "Shared Boost" : <source>boost_$(name)_default : <source>boost_$(name)_static ] ;
+ alias boost_$(name) : $(deps) : [ check-target-builds empty_test_static "Static Boost" : <source>boost_$(name)_static_works : <source>boost_$(name)_shared ] ;
+}
+
+#Argument is e.g. 103600
+rule boost ( min-version ) {
+ local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ;
+ local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
+ if $(boost-shell[2]) != 0 && $(CLEANING) = no {
+ echo Failed to run "$(cmd)" ;
+ exit Boost does not seem to be installed or g++ is confused. : 1 ;
+ }
+ constant BOOST-VERSION : [ MATCH "#define BOOST_VERSION ([0-9]*)" : $(boost-shell[1]) ] ;
+ if $(BOOST-VERSION) < $(min-version) && $(CLEANING) = no {
+ exit You have Boost $(BOOST-VERSION). This package requires Boost at least $(min-version) (and preferably newer). : 1 ;
+ }
+ # If matching version tags exist, use them.
+ boost-lib-version = [ MATCH "#define BOOST_LIB_VERSION \"([^\"]*)\"" : $(boost-shell[1]) ] ;
+ if [ test_flags $(L-boost-search)" -lboost_program_options-"$(boost-lib-version) ] {
+ boost-lib-version = "-"$(boost-lib-version) ;
+ } else {
+ boost-lib-version = "" ;
+ }
+
+ #Crazy amount of testing to make sure that BOOST_TEST_DYN_LINK is defined properly.
+ lib boost_unit_test_framework_static_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>static ;
+ obj empty_test_static.o : jam-files/empty_test_main.cc boost_unit_test_framework_static_test : $(boost-include) ;
+ exe empty_test_static : empty_test_static.o boost_unit_test_framework_static_test ;
+
+ lib boost_unit_test_framework_shared_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>shared : : <define>BOOST_TEST_DYN_LINK ;
+ obj empty_test_shared.o : jam-files/empty_test_main.cc boost_unit_test_framework_shared_test : $(boost-include) ;
+ exe empty_test_shared : empty_test_shared.o boost_unit_test_framework_shared_test ;
+
+ explicit empty_test_static.o empty_test_static empty_test_shared.o empty_test_shared ;
+
+
+ #See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define.
+ boost-lib system SYSTEM_DYN_LINK ;
+ boost-lib thread THREAD_DYN_DLL : boost_system ;
+ boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
+ boost-lib iostreams IOSTREAMS_DYN_LINK ;
+ boost-lib filesystem FILE_SYSTEM_DYN_LINK ;
+ boost-lib unit_test_framework TEST_DYN_LINK ;
+# if $(BOOST-VERSION) >= 104800 {
+# boost-lib chrono CHRONO_DYN_LINK ;
+# boost-lib timer TIMER_DYN_LINK : boost_chrono ;
+# }
+}
+
+#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
+rule external-lib ( name : search-path * : deps * ) {
+ lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) <use>$(deps) ;
+}
+
+#Write the current command line to previous.sh. This does not do shell escaping.
+{
+ local build-log = $(TOP)/previous.sh ;
+ if ! [ path.exists $(build-log) ] {
+ SHELL "touch \"$(build-log)\" && chmod +x \"$(build-log)\"" ;
+ }
+ local script = [ modules.peek : ARGV ] ;
+ if $(script[1]) = "./jam-files/bjam" {
+ #The ./bjam shell script calls ./jam-files/bjam so that appears in argv but
+ #we want ./bjam to appear so the environment variables are set correctly.
+ script = "./bjam "$(script[2-]:J=" ") ;
+ } else {
+ script = $(script:J=" ") ;
+ }
+ script = "#!/bin/sh\n$(script)\n" ;
+ local ignored = @($(build-log):E=$(script)) ;
+}
+
+#Boost jam's static clang for Linux is buggy.
+requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
+
+if ! [ option.get "without-libsegfault" : : "yes" ] && ! $(FORCE-STATIC) {
+ #libSegFault prints a stack trace on segfault. Link against it if available.
+ if [ test_flags "-lSegFault" ] {
+ external-lib SegFault ;
+ requirements += <library>SegFault ;
+ }
+}
+
+if [ option.get "git" : : "yes" ] {
+ local revision = [ _shell "git rev-parse --verify HEAD |head -c 7" ] ;
+ constant GITTAG : "/"$(revision) ;
+} else {
+ constant GITTAG : "" ;
+}
+
+local prefix = [ option.get "prefix" ] ;
+if $(prefix) {
+ prefix = [ path.root $(prefix) [ path.pwd ] ] ;
+ prefix = $(prefix)$(GITTAG) ;
+} else {
+ prefix = $(TOP)$(GITTAG) ;
+}
+
+path-constant PREFIX : $(prefix) ;
+
+path-constant BINDIR : [ option.get "bindir" : $(PREFIX)/bin ] ;
+path-constant LIBDIR : [ option.get "libdir" : $(PREFIX)/lib ] ;
+rule install-bin-libs ( deps * ) {
+ install prefix-bin : $(deps) : <location>$(BINDIR) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(LIBDIR) ;
+ install prefix-lib : $(deps) : <location>$(LIBDIR) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(LIBDIR) ;
+}
+rule install-headers ( name : list * : source-root ? ) {
+ local includedir = [ option.get "includedir" : $(prefix)/include ] ;
+ source-root ?= "." ;
+ install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ;
+}
+
+rule build-projects ( projects * ) {
+ for local p in $(projects) {
+ build-project $(p) ;
+ }
+}
+
+#Only one post build hook is allowed. Allow multiple.
+post-hooks = ;
+rule post-build ( ok ? ) {
+ for local r in $(post-hooks) {
+ $(r) $(ok) ;
+ }
+}
+IMPORT $(__name__) : post-build : : $(__name__).post-build ;
+build-system.set-post-build-hook $(__name__).post-build ;
+rule add-post-hook ( names * ) {
+ post-hooks += $(names) ;
+}
+
+rule failure-message ( ok ? ) {
+ if $(ok) != "ok" {
+ local args = [ modules.peek : ARGV ] ;
+ local args = $(args:J=" ") ;
+ if --debug-configuration in [ modules.peek : ARGV ] {
+ echo "The build failed with command line: " ;
+ echo " $(args)" ;
+ echo "If you need support, attach the full output to your e-mail." ;
+ } else {
+ echo "The build failed. If you need support, run:" ;
+ echo " $(args) --debug-configuration -d2 |gzip >build.log.gz" ;
+ echo "then attach build.log.gz to your e-mail." ;
+ }
+ echo "ERROR" ;
+ } else {
+ echo "SUCCESS" ;
+ }
+}
+add-post-hook failure-message ;
+
+import feature : feature ;
+feature options-to-write : : free ;
+import toolset : flags ;
+flags write-options OPTIONS-TO-WRITE <options-to-write> ;
+actions write-options {
+ echo "$(OPTIONS-TO-WRITE)" > $(<) ;
+}
+
+#Compare contents of file with current. If they're different, write to the
+#file. This file can then be used with <dependency>$(file) to force
+#recompilation.
+rule update-if-changed ( file current ) {
+ if ( ! [ path.exists $(file) ] ) || ( [ _shell "cat $(file)" ] != $(current) ) {
+ make $(file) : : $(__name__).write-options : <options-to-write>$(current) ;
+ always $(file) ;
+ }
+}
+
+if [ option.get "sanity-test" : : "yes" ] {
+ local current_version = [ modules.peek : JAM_VERSION ] ;
+ if ( $(current_version[0]) < 2000 && [ version.check-jam-version 3 1 16 ] ) || [ version.check-jam-version 2011 0 0 ] {
+ EXIT "Sane" : 0 ;
+ } else {
+ EXIT "Bad" : 1 ;
+ }
+}
+
+#Hack to act like alias in the sense that no lib is built, but only build cpp files once.
+import type ;
+rule fakelib ( name : deps * : requirements * : default-build * : usage-requirements * ) {
+ local c-files = ;
+ local real-deps = ;
+ for local c in $(deps) {
+ if [ type.type $(c) ] = CPP {
+ c-files += $(c) ;
+ } else {
+ real-deps += $(c) ;
+ }
+ }
+ for local c in $(c-files) {
+ obj $(c:B).o : $(c) $(real-deps) : $(requirements) : $(default-build) : $(usage_requirements) ;
+ }
+ alias $(name) : $(c-files:B).o $(real-deps) : $(requirements) : $(default-build) : $(usage-requirements) ;
+}
+
+use-project /top : . ;
diff --git a/src/kenlm/lm/CMakeLists.txt b/src/kenlm/lm/CMakeLists.txt
new file mode 100644
index 0000000..e3ef06f
--- /dev/null
+++ b/src/kenlm/lm/CMakeLists.txt
@@ -0,0 +1,90 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+
+set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
+
+add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
+
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+set(KENLM_SOURCE
+ bhiksha.cc
+ binary_format.cc
+ config.cc
+ lm_exception.cc
+ model.cc
+ quantize.cc
+ read_arpa.cc
+ search_hashed.cc
+ search_trie.cc
+ sizes.cc
+ trie.cc
+ trie_sort.cc
+ value_build.cc
+ virtual_interface.cc
+ vocab.cc
+)
+
+
+# Group these objects together for later use.
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm OBJECT ${KENLM_SOURCE})
+
+# This directory has children that need to be processed
+add_subdirectory(builder)
+add_subdirectory(common)
+add_subdirectory(filter)
+
+
+
+# Explicitly list the executable files to be compiled
+set(EXE_LIST
+ query
+ fragment
+ build_binary
+)
+
+AddExes(EXES ${EXE_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+
+# Conditionally build the interpolation code
+if(BUILD_INTERPOLATE)
+ add_subdirectory(interpolate)
+endif()
+
+if(BUILD_TESTING)
+
+ set(KENLM_BOOST_TESTS_LIST left_test partial_test)
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa)
+
+ # model_test requires an extra command line parameter
+ KenLMAddTest(TEST model_test
+ DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
+ ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa)
+endif()
diff --git a/src/kenlm/lm/Jamfile b/src/kenlm/lm/Jamfile
new file mode 100644
index 0000000..a479e2d
--- /dev/null
+++ b/src/kenlm/lm/Jamfile
@@ -0,0 +1,40 @@
+# If you need higher order, change this option
+# Having this limit means that State can be
+# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of
+# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead
+max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
+if ( $(max-order) != 6 ) {
+ echo "Setting KenLM maximum n-gram order to $(max-order)" ;
+}
+max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
+
+path-constant ORDER-LOG : bin/order.log ;
+update-if-changed $(ORDER-LOG) $(max-order) ;
+
+max-order += <dependency>$(ORDER-LOG) ;
+
+wrappers = ;
+local with-nplm = [ option.get "with-nplm" ] ;
+if $(with-nplm) {
+ lib nplm : : <search>$(with-nplm)/src ;
+ obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
+ alias nplm-all : nplm.o nplm ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
+ wrappers += nplm-all ;
+}
+
+fakelib kenlm : $(wrappers) [ glob *.cc : *main.cc *test.cc ] ../util//kenutil : <include>.. $(max-order) : : <include>.. $(max-order) ;
+
+import testing ;
+
+run left_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
+run model_test.cc kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ;
+run partial_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
+
+exes = ;
+for local p in [ glob *_main.cc ] {
+ local name = [ MATCH "(.*)\_main.cc" : $(p) ] ;
+ exe $(name) : $(p) kenlm ;
+ exes += $(name) ;
+}
+
+alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
diff --git a/src/kenlm/lm/bhiksha.cc b/src/kenlm/lm/bhiksha.cc
new file mode 100644
index 0000000..4262b61
--- /dev/null
+++ b/src/kenlm/lm/bhiksha.cc
@@ -0,0 +1,94 @@
+#include "lm/bhiksha.hh"
+
+#include "lm/binary_format.hh"
+#include "lm/config.hh"
+#include "util/file.hh"
+#include "util/exception.hh"
+
+#include <limits>
+
+namespace lm {
+namespace ngram {
+namespace trie {
+
+DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
+ next_(util::BitsMask::ByMax(max_next)) {}
+
+const uint8_t kArrayBhikshaVersion = 0;
+
+// TODO: put this in binary file header instead when I change the binary file format again.
+void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
+ uint8_t buffer[2];
+ file.ReadForConfig(buffer, 2, offset);
+ uint8_t version = buffer[0];
+ uint8_t configured_bits = buffer[1];
+ if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
+ config.pointer_bhiksha_bits = configured_bits;
+}
+
+namespace {
+
+// Find argmin_{chopped \in [0, RequiredBits(max_next)]} ChoppedDelta(max_offset)
+uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
+ uint8_t required = util::RequiredBits(max_next);
+ uint8_t best_chop = 0;
+ int64_t lowest_change = std::numeric_limits<int64_t>::max();
+ // There are probably faster ways but I don't care because this is only done once per order at construction time.
+ for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
+ int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */
+ - max_offset * static_cast<int64_t>(chop); /* savings in bits*/
+ if (change < lowest_change) {
+ lowest_change = change;
+ best_chop = chop;
+ }
+ }
+ return best_chop;
+}
+
+std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) {
+ uint8_t required = util::RequiredBits(max_next);
+ uint8_t chopping = ChopBits(max_offset, max_next, config);
+ return (max_next >> (required - chopping)) + 1 /* we store 0 too */;
+}
+} // namespace
+
+uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
+ return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */;
+}
+
+uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
+ return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config);
+}
+
+namespace {
+
+void *AlignTo8(void *from) {
+ uint8_t *val = reinterpret_cast<uint8_t*>(from);
+ std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7;
+ if (!remainder) return val;
+ return val + 8 - remainder;
+}
+
+} // namespace
+
+ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config)
+ : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))),
+ offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */),
+ offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)),
+ write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */ + 1 /* first entry is 0 */),
+ original_base_(base) {}
+
+void ArrayBhiksha::FinishedLoading(const Config &config) {
+ // *offset_begin_ = 0 but without a const_cast.
+ *(write_to_ - (write_to_ - offset_begin_)) = 0;
+
+ if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected.");
+
+ uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_);
+ *(head_write++) = kArrayBhikshaVersion;
+ *(head_write++) = config.pointer_bhiksha_bits;
+}
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/bhiksha.hh b/src/kenlm/lm/bhiksha.hh
new file mode 100644
index 0000000..36438f1
--- /dev/null
+++ b/src/kenlm/lm/bhiksha.hh
@@ -0,0 +1,122 @@
+/* Simple implementation of
+ * @inproceedings{bhikshacompression,
+ * author={Bhiksha Raj and Ed Whittaker},
+ * year={2003},
+ * title={Lossless Compression of Language Model Structure and Word Identifiers},
+ * booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing},
+ * pages={388--391},
+ * }
+ *
+ * Currently only used for next pointers.
+ */
+
+#ifndef LM_BHIKSHA_H
+#define LM_BHIKSHA_H
+
+#include "lm/model_type.hh"
+#include "lm/trie.hh"
+#include "util/bit_packing.hh"
+#include "util/sorted_uniform.hh"
+
+#include <algorithm>
+#include <stdint.h>
+#include <cassert>
+
+namespace lm {
+namespace ngram {
+struct Config;
+class BinaryFormat;
+
+namespace trie {
+
+class DontBhiksha {
+ public:
+ static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
+
+ static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &/*config*/) {}
+
+ static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }
+
+ static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) {
+ return util::RequiredBits(max_next);
+ }
+
+ DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config);
+
+ void ReadNext(const void *base, uint64_t bit_offset, uint64_t /*index*/, uint8_t total_bits, NodeRange &out) const {
+ out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask);
+ out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask);
+ //assert(out.end >= out.begin);
+ }
+
+ void WriteNext(void *base, uint64_t bit_offset, uint64_t /*index*/, uint64_t value) {
+ util::WriteInt57(base, bit_offset, next_.bits, value);
+ }
+
+ void FinishedLoading(const Config &/*config*/) {}
+
+ uint8_t InlineBits() const { return next_.bits; }
+
+ private:
+ util::BitsMask next_;
+};
+
+class ArrayBhiksha {
+ public:
+ static const ModelType kModelTypeAdd = kArrayAdd;
+
+ static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
+
+ static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
+
+ static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config);
+
+ ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config);
+
+ void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const {
+ // Some assertions are commented out because they are expensive.
+ // assert(*offset_begin_ == 0);
+ // std::upper_bound returns the first element that is greater. Want the
+ // last element that is <= to the index.
+ const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1;
+ // Since *offset_begin_ == 0, the position should be in range.
+ // assert(begin_it >= offset_begin_);
+ const uint64_t *end_it;
+ for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
+ // assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
+ --end_it;
+ // assert(end_it >= begin_it);
+ out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
+ util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
+ out.end = ((end_it - offset_begin_) << next_inline_.bits) |
+ util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
+ // If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
+ assert(out.end >= out.begin);
+ }
+
+ void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) {
+ uint64_t encode = value >> next_inline_.bits;
+ for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index;
+ util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask);
+ }
+
+ void FinishedLoading(const Config &config);
+
+ uint8_t InlineBits() const { return next_inline_.bits; }
+
+ private:
+ const util::BitsMask next_inline_;
+
+ const uint64_t *const offset_begin_;
+ const uint64_t *const offset_end_;
+
+ uint64_t *write_to_;
+
+ void *original_base_;
+};
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_BHIKSHA_H
diff --git a/src/kenlm/lm/binary_format.cc b/src/kenlm/lm/binary_format.cc
new file mode 100644
index 0000000..802943f
--- /dev/null
+++ b/src/kenlm/lm/binary_format.cc
@@ -0,0 +1,302 @@
+#include "lm/binary_format.hh"
+
+#include "lm/lm_exception.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+
+#include <cstddef>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <cstdlib>
+
+#include <stdint.h>
+
+namespace lm {
+namespace ngram {
+
+const char *kModelNames[6] = {"probing hash tables", "probing hash tables with rest costs", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"};
+
+namespace {
+const char kMagicBeforeVersion[] = "mmap lm http://kheafield.com/code format version";
+const char kMagicBytes[] = "mmap lm http://kheafield.com/code format version 5\n\0";
+// This must be shorter than kMagicBytes and indicates an incomplete binary file (i.e. build failed).
+const char kMagicIncomplete[] = "mmap lm http://kheafield.com/code incomplete\n";
+const long int kMagicVersion = 5;
+
+// Old binary files built on 32-bit machines have this header.
+// TODO: eliminate with next binary release.
+struct OldSanity {
+ char magic[sizeof(kMagicBytes)];
+ float zero_f, one_f, minus_half_f;
+ WordIndex one_word_index, max_word_index;
+ uint64_t one_uint64;
+
+ void SetToReference() {
+ std::memset(this, 0, sizeof(OldSanity));
+ std::memcpy(magic, kMagicBytes, sizeof(magic));
+ zero_f = 0.0; one_f = 1.0; minus_half_f = -0.5;
+ one_word_index = 1;
+ max_word_index = std::numeric_limits<WordIndex>::max();
+ one_uint64 = 1;
+ }
+};
+
+
+// Test values aligned to 8 bytes.
+struct Sanity {
+ char magic[ALIGN8(sizeof(kMagicBytes))];
+ float zero_f, one_f, minus_half_f;
+ WordIndex one_word_index, max_word_index, padding_to_8;
+ uint64_t one_uint64;
+
+ void SetToReference() {
+ std::memset(this, 0, sizeof(Sanity));
+ std::memcpy(magic, kMagicBytes, sizeof(kMagicBytes));
+ zero_f = 0.0; one_f = 1.0; minus_half_f = -0.5;
+ one_word_index = 1;
+ max_word_index = std::numeric_limits<WordIndex>::max();
+ padding_to_8 = 0;
+ one_uint64 = 1;
+ }
+};
+
+std::size_t TotalHeaderSize(unsigned char order) {
+ return ALIGN8(sizeof(Sanity) + sizeof(FixedWidthParameters) + sizeof(uint64_t) * order);
+}
+
+void WriteHeader(void *to, const Parameters ¶ms) {
+ Sanity header = Sanity();
+ header.SetToReference();
+ std::memcpy(to, &header, sizeof(Sanity));
+ char *out = reinterpret_cast<char*>(to) + sizeof(Sanity);
+
+ *reinterpret_cast<FixedWidthParameters*>(out) = params.fixed;
+ out += sizeof(FixedWidthParameters);
+
+ uint64_t *counts = reinterpret_cast<uint64_t*>(out);
+ for (std::size_t i = 0; i < params.counts.size(); ++i) {
+ counts[i] = params.counts[i];
+ }
+}
+
+} // namespace
+
+bool IsBinaryFormat(int fd) {
+ const uint64_t size = util::SizeFile(fd);
+ if (size == util::kBadSize || (size <= static_cast<uint64_t>(sizeof(Sanity)))) return false;
+ // Try reading the header.
+ util::scoped_memory memory;
+ try {
+ util::MapRead(util::LAZY, fd, 0, sizeof(Sanity), memory);
+ } catch (const util::Exception &e) {
+ return false;
+ }
+ Sanity reference_header = Sanity();
+ reference_header.SetToReference();
+ if (!std::memcmp(memory.get(), &reference_header, sizeof(Sanity))) return true;
+ if (!std::memcmp(memory.get(), kMagicIncomplete, strlen(kMagicIncomplete))) {
+ UTIL_THROW(FormatLoadException, "This binary file did not finish building");
+ }
+ if (!std::memcmp(memory.get(), kMagicBeforeVersion, strlen(kMagicBeforeVersion))) {
+ char *end_ptr;
+ const char *begin_version = static_cast<const char*>(memory.get()) + strlen(kMagicBeforeVersion);
+ long int version = std::strtol(begin_version, &end_ptr, 10);
+ if ((end_ptr != begin_version) && version != kMagicVersion) {
+ UTIL_THROW(FormatLoadException, "Binary file has version " << version << " but this implementation expects version " << kMagicVersion << " so you'll have to use the ARPA to rebuild your binary");
+ }
+
+ OldSanity old_sanity = OldSanity();
+ old_sanity.SetToReference();
+ UTIL_THROW_IF(!std::memcmp(memory.get(), &old_sanity, sizeof(OldSanity)), FormatLoadException, "Looks like this is an old 32-bit format. The old 32-bit format has been removed so that 64-bit and 32-bit files are exchangeable.");
+ UTIL_THROW(FormatLoadException, "File looks like it should be loaded with mmap, but the test values don't match. Try rebuilding the binary format LM using the same code revision, compiler, and architecture");
+ }
+ return false;
+}
+
+void ReadHeader(int fd, Parameters &out) {
+ util::SeekOrThrow(fd, sizeof(Sanity));
+ util::ReadOrThrow(fd, &out.fixed, sizeof(out.fixed));
+ if (out.fixed.probing_multiplier < 1.0)
+ UTIL_THROW(FormatLoadException, "Binary format claims to have a probing multiplier of " << out.fixed.probing_multiplier << " which is < 1.0.");
+
+ out.counts.resize(static_cast<std::size_t>(out.fixed.order));
+ if (out.fixed.order) util::ReadOrThrow(fd, &*out.counts.begin(), sizeof(uint64_t) * out.fixed.order);
+}
+
+void MatchCheck(ModelType model_type, unsigned int search_version, const Parameters ¶ms) {
+ if (params.fixed.model_type != model_type) {
+ if (static_cast<unsigned int>(params.fixed.model_type) >= (sizeof(kModelNames) / sizeof(const char *)))
+ UTIL_THROW(FormatLoadException, "The binary file claims to be model type " << static_cast<unsigned int>(params.fixed.model_type) << " but this is not implemented for in this inference code.");
+ UTIL_THROW(FormatLoadException, "The binary file was built for " << kModelNames[params.fixed.model_type] << " but the inference code is trying to load " << kModelNames[model_type]);
+ }
+ UTIL_THROW_IF(search_version != params.fixed.search_version, FormatLoadException, "The binary file has " << kModelNames[params.fixed.model_type] << " version " << params.fixed.search_version << " but this code expects " << kModelNames[params.fixed.model_type] << " version " << search_version);
+}
+
+const std::size_t kInvalidSize = static_cast<std::size_t>(-1);
+
+BinaryFormat::BinaryFormat(const Config &config)
+ : write_method_(config.write_method), write_mmap_(config.write_mmap), load_method_(config.load_method),
+ header_size_(kInvalidSize), vocab_size_(kInvalidSize), vocab_string_offset_(kInvalidOffset) {}
+
+void BinaryFormat::InitializeBinary(int fd, ModelType model_type, unsigned int search_version, Parameters ¶ms) {
+ file_.reset(fd);
+ write_mmap_ = NULL; // Ignore write requests; this is already in binary format.
+ ReadHeader(fd, params);
+ MatchCheck(model_type, search_version, params);
+ header_size_ = TotalHeaderSize(params.counts.size());
+}
+
+void BinaryFormat::ReadForConfig(void *to, std::size_t amount, uint64_t offset_excluding_header) const {
+ assert(header_size_ != kInvalidSize);
+ util::ErsatzPRead(file_.get(), to, amount, offset_excluding_header + header_size_);
+}
+
+void *BinaryFormat::LoadBinary(std::size_t size) {
+ assert(header_size_ != kInvalidSize);
+ const uint64_t file_size = util::SizeFile(file_.get());
+ // The header is smaller than a page, so we have to map the whole header as well.
+ uint64_t total_map = static_cast<uint64_t>(header_size_) + static_cast<uint64_t>(size);
+ UTIL_THROW_IF(file_size != util::kBadSize && file_size < total_map, FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map);
+
+ util::MapRead(load_method_, file_.get(), 0, util::CheckOverflow(total_map), mapping_);
+
+ vocab_string_offset_ = total_map;
+ return reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_;
+}
+
+void *BinaryFormat::SetupJustVocab(std::size_t memory_size, uint8_t order) {
+ vocab_size_ = memory_size;
+ if (!write_mmap_) {
+ header_size_ = 0;
+ util::HugeMalloc(memory_size, true, memory_vocab_);
+ return reinterpret_cast<uint8_t*>(memory_vocab_.get());
+ }
+ header_size_ = TotalHeaderSize(order);
+ std::size_t total = util::CheckOverflow(static_cast<uint64_t>(header_size_) + static_cast<uint64_t>(memory_size));
+ file_.reset(util::CreateOrThrow(write_mmap_));
+ // some gccs complain about uninitialized variables even though all enum values are covered.
+ void *vocab_base = NULL;
+ switch (write_method_) {
+ case Config::WRITE_MMAP:
+ mapping_.reset(util::MapZeroedWrite(file_.get(), total), total, util::scoped_memory::MMAP_ALLOCATED);
+ util::AdviseHugePages(vocab_base, total);
+ vocab_base = mapping_.get();
+ break;
+ case Config::WRITE_AFTER:
+ util::ResizeOrThrow(file_.get(), 0);
+ util::HugeMalloc(total, true, memory_vocab_);
+ vocab_base = memory_vocab_.get();
+ break;
+ }
+ strncpy(reinterpret_cast<char*>(vocab_base), kMagicIncomplete, header_size_);
+ return reinterpret_cast<uint8_t*>(vocab_base) + header_size_;
+}
+
+void *BinaryFormat::GrowForSearch(std::size_t memory_size, std::size_t vocab_pad, void *&vocab_base) {
+ assert(vocab_size_ != kInvalidSize);
+ vocab_pad_ = vocab_pad;
+ std::size_t new_size = header_size_ + vocab_size_ + vocab_pad_ + memory_size;
+ vocab_string_offset_ = new_size;
+ if (!write_mmap_ || write_method_ == Config::WRITE_AFTER) {
+ util::HugeMalloc(memory_size, true, memory_search_);
+ assert(header_size_ == 0 || write_mmap_);
+ vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
+ util::AdviseHugePages(memory_search_.get(), memory_size);
+ return reinterpret_cast<uint8_t*>(memory_search_.get());
+ }
+
+ assert(write_method_ == Config::WRITE_MMAP);
+ // Also known as total size without vocab words.
+ // Grow the file to accomodate the search, using zeros.
+ // According to man mmap, behavior is undefined when the file is resized
+ // underneath a mmap that is not a multiple of the page size. So to be
+ // safe, we'll unmap it and map it again.
+ mapping_.reset();
+ util::ResizeOrThrow(file_.get(), new_size);
+ void *ret;
+ MapFile(vocab_base, ret);
+ util::AdviseHugePages(ret, new_size);
+ return ret;
+}
+
+void BinaryFormat::WriteVocabWords(const std::string &buffer, void *&vocab_base, void *&search_base) {
+ // Checking Config's include_vocab is the responsibility of the caller.
+ assert(header_size_ != kInvalidSize && vocab_size_ != kInvalidSize);
+ if (!write_mmap_) {
+ // Unchanged base.
+ vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get());
+ search_base = reinterpret_cast<uint8_t*>(memory_search_.get());
+ return;
+ }
+ if (write_method_ == Config::WRITE_MMAP) {
+ mapping_.reset();
+ }
+ util::SeekOrThrow(file_.get(), VocabStringReadingOffset());
+ util::WriteOrThrow(file_.get(), &buffer[0], buffer.size());
+ if (write_method_ == Config::WRITE_MMAP) {
+ MapFile(vocab_base, search_base);
+ } else {
+ vocab_base = reinterpret_cast<uint8_t*>(memory_vocab_.get()) + header_size_;
+ search_base = reinterpret_cast<uint8_t*>(memory_search_.get());
+ }
+}
+
+void BinaryFormat::FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts) {
+ if (!write_mmap_) return;
+ switch (write_method_) {
+ case Config::WRITE_MMAP:
+ util::SyncOrThrow(mapping_.get(), mapping_.size());
+ break;
+ case Config::WRITE_AFTER:
+ util::SeekOrThrow(file_.get(), 0);
+ util::WriteOrThrow(file_.get(), memory_vocab_.get(), memory_vocab_.size());
+ util::SeekOrThrow(file_.get(), header_size_ + vocab_size_ + vocab_pad_);
+ util::WriteOrThrow(file_.get(), memory_search_.get(), memory_search_.size());
+ util::FSyncOrThrow(file_.get());
+ break;
+ }
+ // header and vocab share the same mmap.
+ Parameters params = Parameters();
+ memset(¶ms, 0, sizeof(Parameters));
+ params.counts = counts;
+ params.fixed.order = counts.size();
+ params.fixed.probing_multiplier = config.probing_multiplier;
+ params.fixed.model_type = model_type;
+ params.fixed.has_vocabulary = config.include_vocab;
+ params.fixed.search_version = search_version;
+ switch (write_method_) {
+ case Config::WRITE_MMAP:
+ WriteHeader(mapping_.get(), params);
+ util::SyncOrThrow(mapping_.get(), mapping_.size());
+ break;
+ case Config::WRITE_AFTER:
+ {
+ std::vector<uint8_t> buffer(TotalHeaderSize(counts.size()));
+ WriteHeader(&buffer[0], params);
+ util::SeekOrThrow(file_.get(), 0);
+ util::WriteOrThrow(file_.get(), &buffer[0], buffer.size());
+ }
+ break;
+ }
+}
+
+void BinaryFormat::MapFile(void *&vocab_base, void *&search_base) {
+ mapping_.reset(util::MapOrThrow(vocab_string_offset_, true, util::kFileFlags, false, file_.get()), vocab_string_offset_, util::scoped_memory::MMAP_ALLOCATED);
+ vocab_base = reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_;
+ search_base = reinterpret_cast<uint8_t*>(mapping_.get()) + header_size_ + vocab_size_ + vocab_pad_;
+}
+
+bool RecognizeBinary(const char *file, ModelType &recognized) {
+ util::scoped_fd fd(util::OpenReadOrThrow(file));
+ if (!IsBinaryFormat(fd.get())) {
+ return false;
+ }
+ Parameters params;
+ ReadHeader(fd.get(), params);
+ recognized = params.fixed.model_type;
+ return true;
+}
+
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/binary_format.hh b/src/kenlm/lm/binary_format.hh
new file mode 100644
index 0000000..ff99b95
--- /dev/null
+++ b/src/kenlm/lm/binary_format.hh
@@ -0,0 +1,106 @@
+#ifndef LM_BINARY_FORMAT_H
+#define LM_BINARY_FORMAT_H
+
+#include "lm/config.hh"
+#include "lm/model_type.hh"
+#include "lm/read_arpa.hh"
+
+#include "util/file_piece.hh"
+#include "util/mmap.hh"
+#include "util/scoped.hh"
+
+#include <cstddef>
+#include <vector>
+
+#include <stdint.h>
+
+namespace lm {
+namespace ngram {
+
+extern const char *kModelNames[6];
+
+/*Inspect a file to determine if it is a binary lm. If not, return false.
+ * If so, return true and set recognized to the type. This is the only API in
+ * this header designed for use by decoder authors.
+ */
+bool RecognizeBinary(const char *file, ModelType &recognized);
+
+struct FixedWidthParameters {
+ unsigned char order;
+ float probing_multiplier;
+ // What type of model is this?
+ ModelType model_type;
+ // Does the end of the file have the actual strings in the vocabulary?
+ bool has_vocabulary;
+ unsigned int search_version;
+};
+
+// This is a macro instead of an inline function so constants can be assigned using it.
+#define ALIGN8(a) ((std::ptrdiff_t(((a)-1)/8)+1)*8)
+
+// Parameters stored in the header of a binary file.
+struct Parameters {
+ FixedWidthParameters fixed;
+ std::vector<uint64_t> counts;
+};
+
+class BinaryFormat {
+ public:
+ explicit BinaryFormat(const Config &config);
+
+ // Reading a binary file:
+ // Takes ownership of fd
+ void InitializeBinary(int fd, ModelType model_type, unsigned int search_version, Parameters ¶ms);
+ // Used to read parts of the file to update the config object before figuring out full size.
+ void ReadForConfig(void *to, std::size_t amount, uint64_t offset_excluding_header) const;
+ // Actually load the binary file and return a pointer to the beginning of the search area.
+ void *LoadBinary(std::size_t size);
+
+ uint64_t VocabStringReadingOffset() const {
+ assert(vocab_string_offset_ != kInvalidOffset);
+ return vocab_string_offset_;
+ }
+
+ // Writing a binary file or initializing in RAM from ARPA:
+ // Size for vocabulary.
+ void *SetupJustVocab(std::size_t memory_size, uint8_t order);
+ // Warning: can change the vocaulary base pointer.
+ void *GrowForSearch(std::size_t memory_size, std::size_t vocab_pad, void *&vocab_base);
+ // Warning: can change vocabulary and search base addresses.
+ void WriteVocabWords(const std::string &buffer, void *&vocab_base, void *&search_base);
+ // Write the header at the beginning of the file.
+ void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts);
+
+ private:
+ void MapFile(void *&vocab_base, void *&search_base);
+
+ // Copied from configuration.
+ const Config::WriteMethod write_method_;
+ const char *write_mmap_;
+ util::LoadMethod load_method_;
+
+ // File behind memory, if any.
+ util::scoped_fd file_;
+
+ // If there is a file involved, a single mapping.
+ util::scoped_memory mapping_;
+
+ // If the data is only in memory, separately allocate each because the trie
+ // knows vocab's size before it knows search's size (because SRILM might
+ // have pruned).
+ util::scoped_memory memory_vocab_, memory_search_;
+
+ // Memory ranges. Note that these may not be contiguous and may not all
+ // exist.
+ std::size_t header_size_, vocab_size_, vocab_pad_;
+ // aka end of search.
+ uint64_t vocab_string_offset_;
+
+ static const uint64_t kInvalidOffset = (uint64_t)-1;
+};
+
+bool IsBinaryFormat(int fd);
+
+} // namespace ngram
+} // namespace lm
+#endif // LM_BINARY_FORMAT_H
diff --git a/src/kenlm/lm/blank.hh b/src/kenlm/lm/blank.hh
new file mode 100644
index 0000000..e09054c
--- /dev/null
+++ b/src/kenlm/lm/blank.hh
@@ -0,0 +1,42 @@
+#ifndef LM_BLANK_H
+#define LM_BLANK_H
+
+#include <limits>
+#include <stdint.h>
+#include <cmath>
+
+namespace lm {
+namespace ngram {
+
+/* Suppose "foo bar" appears with zero backoff but there is no trigram
+ * beginning with these words. Then, when scoring "foo bar", the model could
+ * return out_state containing "bar" or even null context if "bar" also has no
+ * backoff and is never followed by another word. Then the backoff is set to
+ * kNoExtensionBackoff. If the n-gram might be extended, then out_state must
+ * contain the full n-gram, in which case kExtensionBackoff is set. In any
+ * case, if an n-gram has non-zero backoff, the full state is returned so
+ * backoff can be properly charged.
+ * These differ only in sign bit because the backoff is in fact zero in either
+ * case.
+ */
+const float kNoExtensionBackoff = -0.0;
+const float kExtensionBackoff = 0.0;
+const uint64_t kNoExtensionQuant = 0;
+const uint64_t kExtensionQuant = 1;
+
+inline void SetExtension(float &backoff) {
+ if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff;
+}
+
+// This compiles down nicely.
+inline bool HasExtension(const float &backoff) {
+ typedef union { float f; uint32_t i; } UnionValue;
+ UnionValue compare, interpret;
+ compare.f = kNoExtensionBackoff;
+ interpret.f = backoff;
+ return compare.i != interpret.i;
+}
+
+} // namespace ngram
+} // namespace lm
+#endif // LM_BLANK_H
diff --git a/src/kenlm/lm/build_binary_main.cc b/src/kenlm/lm/build_binary_main.cc
new file mode 100644
index 0000000..35206e6
--- /dev/null
+++ b/src/kenlm/lm/build_binary_main.cc
@@ -0,0 +1,234 @@
+#include "lm/model.hh"
+#include "lm/sizes.hh"
+#include "util/file_piece.hh"
+#include "util/usage.hh"
+
+#include <algorithm>
+#include <cstdlib>
+#include <exception>
+#include <iostream>
+#include <iomanip>
+#include <limits>
+#include <cmath>
+#include <cstdlib>
+
+#ifdef WIN32
+#include "util/getopt.hh"
+#else
+#include <unistd.h>
+#endif
+
+namespace lm {
+namespace ngram {
+namespace {
+
+void Usage(const char *name, const char *default_mem) {
+ std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-i] [-w mmap|after] [-p probing_multiplier] [-T trie_temporary] [-S trie_building_mem] [-q bits] [-b bits] [-a bits] [type] input.arpa [output.mmap]\n\n"
+"-u sets the log10 probability for <unk> if the ARPA file does not have one.\n"
+" Default is -100. The ARPA file will always take precedence.\n"
+"-s allows models to be built even if they do not have <s> and </s>.\n"
+"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
+"-w mmap|after determines how writing is done.\n"
+" mmap maps the binary file and writes to it. Default for trie.\n"
+" after allocates anonymous memory, builds, and writes. Default for probing.\n"
+"-r \"order1.arpa order2 order3 order4\" adds lower-order rest costs from these\n"
+" model files. order1.arpa must be an ARPA file. All others may be ARPA or\n"
+" the same data structure as being built. All files must have the same\n"
+" vocabulary. For probing, the unigrams must be in the same order.\n\n"
+"type is either probing or trie. Default is probing.\n\n"
+"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
+"-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n"
+"trie is a straightforward trie with bit-level packing. It uses the least\n"
+"memory and is still faster than SRI or IRST. Building the trie format uses an\n"
+"on-disk sort to save memory.\n"
+"-T is the temporary directory prefix. Default is the output file name.\n"
+"-S determines memory use for sorting. Default is " << default_mem << ". This is compatible\n"
+" with GNU sort. The number is followed by a unit: \% for percent of physical\n"
+" memory, b for bytes, K for Kilobytes, M for megabytes, then G,T,P,E,Z,Y. \n"
+" Default unit is K for Kilobytes.\n"
+"-q turns quantization on and sets the number of bits (e.g. -q 8).\n"
+"-b sets backoff quantization bits. Requires -q and defaults to that value.\n"
+"-a compresses pointers using an array of offsets. The parameter is the\n"
+" maximum number of bits encoded by the array. Memory is minimized subject\n"
+" to the maximum, so pick 255 to minimize memory.\n\n"
+"-h print this help message.\n\n"
+"Get a memory estimate by passing an ARPA file without an output file name.\n";
+ exit(1);
+}
+
+// I could really use boost::lexical_cast right about now.
+float ParseFloat(const char *from) {
+ char *end;
+ float ret = strtod(from, &end);
+ if (*end) throw util::ParseNumberException(from);
+ return ret;
+}
+unsigned long int ParseUInt(const char *from) {
+ char *end;
+ unsigned long int ret = strtoul(from, &end, 10);
+ if (*end) throw util::ParseNumberException(from);
+ return ret;
+}
+
+uint8_t ParseBitCount(const char *from) {
+ unsigned long val = ParseUInt(from);
+ if (val > 25) {
+ util::ParseNumberException e(from);
+ e << " bit counts are limited to 25.";
+ }
+ return val;
+}
+
+void ParseFileList(const char *from, std::vector<std::string> &to) {
+ to.clear();
+ while (true) {
+ const char *i;
+ for (i = from; *i && *i != ' '; ++i) {}
+ to.push_back(std::string(from, i - from));
+ if (!*i) break;
+ from = i + 1;
+ }
+}
+
+void ProbingQuantizationUnsupported() {
+ std::cerr << "Quantization is only implemented in the trie data structure." << std::endl;
+ exit(1);
+}
+
+} // namespace ngram
+} // namespace lm
+} // namespace
+
+int main(int argc, char *argv[]) {
+ using namespace lm::ngram;
+
+ const char *default_mem = util::GuessPhysicalMemory() ? "80%" : "1G";
+
+ if (argc == 2 && !strcmp(argv[1], "--help"))
+ Usage(argv[0], default_mem);
+
+ try {
+ bool quantize = false, set_backoff_bits = false, bhiksha = false, set_write_method = false, rest = false;
+ lm::ngram::Config config;
+ config.building_memory = util::ParseSize(default_mem);
+ int opt;
+ while ((opt = getopt(argc, argv, "q:b:a:u:p:t:T:m:S:w:sir:h")) != -1) {
+ switch(opt) {
+ case 'q':
+ config.prob_bits = ParseBitCount(optarg);
+ if (!set_backoff_bits) config.backoff_bits = config.prob_bits;
+ quantize = true;
+ break;
+ case 'b':
+ config.backoff_bits = ParseBitCount(optarg);
+ set_backoff_bits = true;
+ break;
+ case 'a':
+ config.pointer_bhiksha_bits = ParseBitCount(optarg);
+ bhiksha = true;
+ break;
+ case 'u':
+ config.unknown_missing_logprob = ParseFloat(optarg);
+ break;
+ case 'p':
+ config.probing_multiplier = ParseFloat(optarg);
+ break;
+ case 't': // legacy
+ case 'T':
+ config.temporary_directory_prefix = optarg;
+ util::NormalizeTempPrefix(config.temporary_directory_prefix);
+ break;
+ case 'm': // legacy
+ config.building_memory = ParseUInt(optarg) * 1048576;
+ break;
+ case 'S':
+ config.building_memory = std::min(static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), util::ParseSize(optarg));
+ break;
+ case 'w':
+ set_write_method = true;
+ if (!strcmp(optarg, "mmap")) {
+ config.write_method = Config::WRITE_MMAP;
+ } else if (!strcmp(optarg, "after")) {
+ config.write_method = Config::WRITE_AFTER;
+ } else {
+ Usage(argv[0], default_mem);
+ }
+ break;
+ case 's':
+ config.sentence_marker_missing = lm::SILENT;
+ break;
+ case 'i':
+ config.positive_log_probability = lm::SILENT;
+ break;
+ case 'r':
+ rest = true;
+ ParseFileList(optarg, config.rest_lower_files);
+ config.rest_function = Config::REST_LOWER;
+ break;
+ case 'h': // help
+ default:
+ Usage(argv[0], default_mem);
+ }
+ }
+ if (!quantize && set_backoff_bits) {
+ std::cerr << "You specified backoff quantization (-b) but not probability quantization (-q)" << std::endl;
+ abort();
+ }
+ if (optind + 1 == argc) {
+ ShowSizes(argv[optind], config);
+ return 0;
+ }
+ const char *model_type;
+ const char *from_file;
+
+ if (optind + 2 == argc) {
+ model_type = "probing";
+ from_file = argv[optind];
+ config.write_mmap = argv[optind + 1];
+ } else if (optind + 3 == argc) {
+ model_type = argv[optind];
+ from_file = argv[optind + 1];
+ config.write_mmap = argv[optind + 2];
+ } else {
+ Usage(argv[0], default_mem);
+ return 1;
+ }
+ if (!strcmp(model_type, "probing")) {
+ if (!set_write_method) config.write_method = Config::WRITE_AFTER;
+ if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
+ if (rest) {
+ RestProbingModel(from_file, config);
+ } else {
+ ProbingModel(from_file, config);
+ }
+ } else if (!strcmp(model_type, "trie")) {
+ if (rest) {
+ std::cerr << "Rest + trie is not supported yet." << std::endl;
+ return 1;
+ }
+ if (!set_write_method) config.write_method = Config::WRITE_MMAP;
+ if (quantize) {
+ if (bhiksha) {
+ QuantArrayTrieModel(from_file, config);
+ } else {
+ QuantTrieModel(from_file, config);
+ }
+ } else {
+ if (bhiksha) {
+ ArrayTrieModel(from_file, config);
+ } else {
+ TrieModel(from_file, config);
+ }
+ }
+ } else {
+ Usage(argv[0], default_mem);
+ }
+ }
+ catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ std::cerr << "ERROR" << std::endl;
+ return 1;
+ }
+ std::cerr << "SUCCESS" << std::endl;
+ return 0;
+}
diff --git a/src/kenlm/lm/builder/CMakeLists.txt b/src/kenlm/lm/builder/CMakeLists.txt
new file mode 100644
index 0000000..cc0d3ed
--- /dev/null
+++ b/src/kenlm/lm/builder/CMakeLists.txt
@@ -0,0 +1,67 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+#
+# In order to set correct paths to these files
+# in case this variable is referenced by CMake files in the parent directory,
+# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_BUILDER_SOURCE
+ ${CMAKE_CURRENT_SOURCE_DIR}/adjust_counts.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/corpus_count.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/initial_probabilities.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/output.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/pipeline.cc
+ )
+
+
+# Group these objects together for later use.
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_builder OBJECT ${KENLM_BUILDER_SOURCE})
+
+
+# Compile the executable, linking against the requisite dependent object files
+add_executable(lmplz lmplz_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
+
+# Link the executable against boost
+target_link_libraries(lmplz ${Boost_LIBRARIES} pthread)
+
+# Group executables together
+set_target_properties(lmplz PROPERTIES FOLDER executables)
+
+if(BUILD_TESTING)
+
+ # Explicitly list the Boost test files to be compiled
+ set(KENLM_BOOST_TESTS_LIST
+ adjust_counts_test
+ corpus_count_test
+ )
+
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm>
+ $<TARGET_OBJECTS:kenlm_common>
+ $<TARGET_OBJECTS:kenlm_util>
+ $<TARGET_OBJECTS:kenlm_builder>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+endif()
diff --git a/src/kenlm/lm/builder/Jamfile b/src/kenlm/lm/builder/Jamfile
new file mode 100644
index 0000000..329a8e0
--- /dev/null
+++ b/src/kenlm/lm/builder/Jamfile
@@ -0,0 +1,13 @@
+fakelib builder : [ glob *.cc : *test.cc *main.cc ]
+ ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm ../common//common
+ : : : <library>/top//boost_thread $(timer-link) ;
+
+exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
+
+exe dump_counts : dump_counts_main.cc builder ;
+
+alias programs : lmplz dump_counts ;
+
+import testing ;
+unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
+unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ;
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/README.md b/src/kenlm/lm/builder/README.md
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/builder/README.md
rename to src/kenlm/lm/builder/README.md
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/TODO b/src/kenlm/lm/builder/TODO
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/builder/TODO
rename to src/kenlm/lm/builder/TODO
diff --git a/src/kenlm/lm/builder/adjust_counts.cc b/src/kenlm/lm/builder/adjust_counts.cc
new file mode 100644
index 0000000..b4c5ba8
--- /dev/null
+++ b/src/kenlm/lm/builder/adjust_counts.cc
@@ -0,0 +1,353 @@
+#include "lm/builder/adjust_counts.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/builder/payload.hh"
+#include "util/stream/timer.hh"
+
+#include <algorithm>
+#include <iostream>
+#include <limits>
+
+namespace lm { namespace builder {
+
+BadDiscountException::BadDiscountException() throw() {}
+BadDiscountException::~BadDiscountException() throw() {}
+
+namespace {
+// Return last word in full that is different.
+const WordIndex* FindDifference(const NGram<BuildingPayload> &full, const NGram<BuildingPayload> &lower_last) {
+ const WordIndex *cur_word = full.end() - 1;
+ const WordIndex *pre_word = lower_last.end() - 1;
+ // Find last difference.
+ for (; pre_word >= lower_last.begin() && *pre_word == *cur_word; --cur_word, --pre_word) {}
+ return cur_word;
+}
+
+class StatCollector {
+ public:
+ StatCollector(std::size_t order, std::vector<uint64_t> &counts, std::vector<uint64_t> &counts_pruned, std::vector<Discount> &discounts)
+ : orders_(order), full_(orders_.back()), counts_(counts), counts_pruned_(counts_pruned), discounts_(discounts) {
+ memset(&orders_[0], 0, sizeof(OrderStat) * order);
+ }
+
+ ~StatCollector() {}
+
+ void CalculateDiscounts(const DiscountConfig &config) {
+ counts_.resize(orders_.size());
+ counts_pruned_.resize(orders_.size());
+ for (std::size_t i = 0; i < orders_.size(); ++i) {
+ const OrderStat &s = orders_[i];
+ counts_[i] = s.count;
+ counts_pruned_[i] = s.count_pruned;
+ }
+
+ discounts_ = config.overwrite;
+ discounts_.resize(orders_.size());
+ for (std::size_t i = config.overwrite.size(); i < orders_.size(); ++i) {
+ const OrderStat &s = orders_[i];
+ try {
+ for (unsigned j = 1; j < 4; ++j) {
+ // TODO: Specialize error message for j == 3, meaning 3+
+ UTIL_THROW_IF(s.n[j] == 0, BadDiscountException, "Could not calculate Kneser-Ney discounts for "
+ << (i+1) << "-grams with adjusted count " << (j+1) << " because we didn't observe any "
+ << (i+1) << "-grams with adjusted count " << j << "; Is this small or artificial data?\n"
+ << "Try deduplicating the input. To override this error for e.g. a class-based model, rerun with --discount_fallback\n");
+ }
+
+ // See equation (26) in Chen and Goodman.
+ discounts_[i].amount[0] = 0.0;
+ float y = static_cast<float>(s.n[1]) / static_cast<float>(s.n[1] + 2.0 * s.n[2]);
+ for (unsigned j = 1; j < 4; ++j) {
+ discounts_[i].amount[j] = static_cast<float>(j) - static_cast<float>(j + 1) * y * static_cast<float>(s.n[j+1]) / static_cast<float>(s.n[j]);
+ UTIL_THROW_IF(discounts_[i].amount[j] < 0.0 || discounts_[i].amount[j] > j, BadDiscountException, "ERROR: " << (i+1) << "-gram discount out of range for adjusted count " << j << ": " << discounts_[i].amount[j]);
+ }
+ } catch (const BadDiscountException &e) {
+ switch (config.bad_action) {
+ case THROW_UP:
+ throw;
+ case COMPLAIN:
+ std::cerr << "Substituting fallback discounts for order " << i << ": D1=" << config.fallback.amount[1] << " D2=" << config.fallback.amount[2] << " D3+=" << config.fallback.amount[3] << std::endl;
+ case SILENT:
+ break;
+ }
+ discounts_[i] = config.fallback;
+ }
+ }
+ }
+
+ void Add(std::size_t order_minus_1, uint64_t count, bool pruned = false) {
+ OrderStat &stat = orders_[order_minus_1];
+ ++stat.count;
+ if (!pruned)
+ ++stat.count_pruned;
+ if (count < 5) ++stat.n[count];
+ }
+
+ void AddFull(uint64_t count, bool pruned = false) {
+ ++full_.count;
+ if (!pruned)
+ ++full_.count_pruned;
+ if (count < 5) ++full_.n[count];
+ }
+
+ private:
+ struct OrderStat {
+ // n_1 in equation 26 of Chen and Goodman etc
+ uint64_t n[5];
+ uint64_t count;
+ uint64_t count_pruned;
+ };
+
+ std::vector<OrderStat> orders_;
+ OrderStat &full_;
+
+ std::vector<uint64_t> &counts_;
+ std::vector<uint64_t> &counts_pruned_;
+ std::vector<Discount> &discounts_;
+};
+
+// Reads all entries in order like NGramStream does.
+// But deletes any entries that have <s> in the 1st (not 0th) position on the
+// way out by putting other entries in their place. This disrupts the sort
+// order but we don't care because the data is going to be sorted again.
+class CollapseStream {
+ public:
+ CollapseStream(const util::stream::ChainPosition &position, uint64_t prune_threshold, const std::vector<bool>& prune_words) :
+ current_(NULL, NGram<BuildingPayload>::OrderFromSize(position.GetChain().EntrySize())),
+ prune_threshold_(prune_threshold),
+ prune_words_(prune_words),
+ block_(position) {
+ StartBlock();
+ }
+
+ const NGram<BuildingPayload> &operator*() const { return current_; }
+ const NGram<BuildingPayload> *operator->() const { return ¤t_; }
+
+ operator bool() const { return block_; }
+
+ CollapseStream &operator++() {
+ assert(block_);
+
+ if (current_.begin()[1] == kBOS && current_.Base() < copy_from_) {
+ memcpy(current_.Base(), copy_from_, current_.TotalSize());
+ UpdateCopyFrom();
+
+ // Mark highest order n-grams for later pruning
+ if(current_.Value().count <= prune_threshold_) {
+ current_.Value().Mark();
+ }
+
+ if(!prune_words_.empty()) {
+ for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
+ if(prune_words_[*i]) {
+ current_.Value().Mark();
+ break;
+ }
+ }
+ }
+
+ }
+
+ current_.NextInMemory();
+ uint8_t *block_base = static_cast<uint8_t*>(block_->Get());
+ if (current_.Base() == block_base + block_->ValidSize()) {
+ block_->SetValidSize(copy_from_ + current_.TotalSize() - block_base);
+ ++block_;
+ StartBlock();
+ }
+
+ // Mark highest order n-grams for later pruning
+ if(current_.Value().count <= prune_threshold_) {
+ current_.Value().Mark();
+ }
+
+ if(!prune_words_.empty()) {
+ for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
+ if(prune_words_[*i]) {
+ current_.Value().Mark();
+ break;
+ }
+ }
+ }
+
+ return *this;
+ }
+
+ private:
+ void StartBlock() {
+ for (; ; ++block_) {
+ if (!block_) return;
+ if (block_->ValidSize()) break;
+ }
+ current_.ReBase(block_->Get());
+ copy_from_ = static_cast<uint8_t*>(block_->Get()) + block_->ValidSize();
+ UpdateCopyFrom();
+
+ // Mark highest order n-grams for later pruning
+ if(current_.Value().count <= prune_threshold_) {
+ current_.Value().Mark();
+ }
+
+ if(!prune_words_.empty()) {
+ for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
+ if(prune_words_[*i]) {
+ current_.Value().Mark();
+ break;
+ }
+ }
+ }
+
+ }
+
+ // Find last without bos.
+ void UpdateCopyFrom() {
+ for (copy_from_ -= current_.TotalSize(); copy_from_ >= current_.Base(); copy_from_ -= current_.TotalSize()) {
+ if (NGram<BuildingPayload>(copy_from_, current_.Order()).begin()[1] != kBOS) break;
+ }
+ }
+
+ NGram<BuildingPayload> current_;
+
+ // Goes backwards in the block
+ uint8_t *copy_from_;
+ uint64_t prune_threshold_;
+ const std::vector<bool>& prune_words_;
+ util::stream::Link block_;
+};
+
+} // namespace
+
+void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
+ UTIL_TIMER("(%w s) Adjusted counts\n");
+
+ const std::size_t order = positions.size();
+ StatCollector stats(order, counts_, counts_pruned_, discounts_);
+ if (order == 1) {
+
+ // Only unigrams. Just collect stats.
+ for (NGramStream<BuildingPayload> full(positions[0]); full; ++full) {
+
+ // Do not prune <s> </s> <unk>
+ if(*full->begin() > 2) {
+ if(full->Value().count <= prune_thresholds_[0])
+ full->Value().Mark();
+
+ if(!prune_words_.empty() && prune_words_[*full->begin()])
+ full->Value().Mark();
+ }
+
+ stats.AddFull(full->Value().UnmarkedCount(), full->Value().IsMarked());
+ }
+
+ stats.CalculateDiscounts(discount_config_);
+ return;
+ }
+
+ NGramStreams<BuildingPayload> streams;
+ streams.Init(positions, positions.size() - 1);
+
+ CollapseStream full(positions[positions.size() - 1], prune_thresholds_.back(), prune_words_);
+
+ // Initialization: <unk> has count 0 and so does <s>.
+ NGramStream<BuildingPayload> *lower_valid = streams.begin();
+ const NGramStream<BuildingPayload> *const streams_begin = streams.begin();
+ streams[0]->Value().count = 0;
+ *streams[0]->begin() = kUNK;
+ stats.Add(0, 0);
+ (++streams[0])->Value().count = 0;
+ *streams[0]->begin() = kBOS;
+ // <s> is not in stats yet because it will get put in later.
+
+ // This keeps track of actual counts for lower orders. It is not output
+ // (only adjusted counts are), but used to determine pruning.
+ std::vector<uint64_t> actual_counts(positions.size(), 0);
+ // Something of a hack: don't prune <s>.
+ actual_counts[0] = std::numeric_limits<uint64_t>::max();
+
+ // Iterate over full (the stream of the highest order ngrams)
+ for (; full; ++full) {
+ const WordIndex *different = FindDifference(*full, **lower_valid);
+ std::size_t same = full->end() - 1 - different;
+
+ // STEP 1: Output all the n-grams that changed.
+ for (; lower_valid >= streams.begin() + same; --lower_valid) {
+ uint64_t order_minus_1 = lower_valid - streams_begin;
+ if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1])
+ (*lower_valid)->Value().Mark();
+
+ if(!prune_words_.empty()) {
+ for(WordIndex* i = (*lower_valid)->begin(); i != (*lower_valid)->end(); i++) {
+ if(prune_words_[*i]) {
+ (*lower_valid)->Value().Mark();
+ break;
+ }
+ }
+ }
+
+ stats.Add(order_minus_1, (*lower_valid)->Value().UnmarkedCount(), (*lower_valid)->Value().IsMarked());
+ ++*lower_valid;
+ }
+
+ // STEP 2: Update n-grams that still match.
+ // n-grams that match get count from the full entry.
+ for (std::size_t i = 0; i < same; ++i) {
+ actual_counts[i] += full->Value().UnmarkedCount();
+ }
+ // Increment the number of unique extensions for the longest match.
+ if (same) ++streams[same - 1]->Value().count;
+
+ // STEP 3: Initialize new n-grams.
+ // This is here because bos is also const WordIndex *, so copy gets
+ // consistent argument types.
+ const WordIndex *full_end = full->end();
+ // Initialize and mark as valid up to bos.
+ const WordIndex *bos;
+ for (bos = different; (bos > full->begin()) && (*bos != kBOS); --bos) {
+ NGramStream<BuildingPayload> &to = *++lower_valid;
+ std::copy(bos, full_end, to->begin());
+ to->Value().count = 1;
+ actual_counts[lower_valid - streams_begin] = full->Value().UnmarkedCount();
+ }
+ // Now bos indicates where <s> is or is the 0th word of full.
+ if (bos != full->begin()) {
+ // There is an <s> beyond the 0th word.
+ NGramStream<BuildingPayload> &to = *++lower_valid;
+ std::copy(bos, full_end, to->begin());
+
+ // Anything that begins with <s> has full non adjusted count.
+ to->Value().count = full->Value().UnmarkedCount();
+ actual_counts[lower_valid - streams_begin] = full->Value().UnmarkedCount();
+ } else {
+ stats.AddFull(full->Value().UnmarkedCount(), full->Value().IsMarked());
+ }
+ assert(lower_valid >= &streams[0]);
+ }
+
+ // The above loop outputs n-grams when it observes changes. This outputs
+ // the last n-grams.
+ for (NGramStream<BuildingPayload> *s = streams.begin(); s <= lower_valid; ++s) {
+ uint64_t lower_count = actual_counts[(*s)->Order() - 1];
+ if(lower_count <= prune_thresholds_[(*s)->Order() - 1])
+ (*s)->Value().Mark();
+
+ if(!prune_words_.empty()) {
+ for(WordIndex* i = (*s)->begin(); i != (*s)->end(); i++) {
+ if(prune_words_[*i]) {
+ (*s)->Value().Mark();
+ break;
+ }
+ }
+ }
+
+ stats.Add(s - streams.begin(), lower_count, (*s)->Value().IsMarked());
+ ++*s;
+ }
+ // Poison everyone! Except the N-grams which were already poisoned by the input.
+ for (NGramStream<BuildingPayload> *s = streams.begin(); s != streams.end(); ++s)
+ s->Poison();
+
+ stats.CalculateDiscounts(discount_config_);
+
+ // NOTE: See special early-return case for unigrams near the top of this function
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/builder/adjust_counts.hh b/src/kenlm/lm/builder/adjust_counts.hh
new file mode 100644
index 0000000..29319ba
--- /dev/null
+++ b/src/kenlm/lm/builder/adjust_counts.hh
@@ -0,0 +1,72 @@
+#ifndef LM_BUILDER_ADJUST_COUNTS_H
+#define LM_BUILDER_ADJUST_COUNTS_H
+
+#include "lm/builder/discount.hh"
+#include "lm/lm_exception.hh"
+#include "util/exception.hh"
+
+#include <vector>
+
+#include <stdint.h>
+
+namespace util { namespace stream { class ChainPositions; } }
+
+namespace lm {
+namespace builder {
+
+class BadDiscountException : public util::Exception {
+ public:
+ BadDiscountException() throw();
+ ~BadDiscountException() throw();
+};
+
+struct DiscountConfig {
+ // Overrides discounts for orders [1,discount_override.size()].
+ std::vector<Discount> overwrite;
+ // If discounting fails for an order, copy them from here.
+ Discount fallback;
+ // What to do when discounts are out of range or would trigger divison by
+ // zero. It it does something other than THROW_UP, use fallback_discount.
+ WarningAction bad_action;
+};
+
+/* Compute adjusted counts.
+ * Input: unique suffix sorted N-grams (and just the N-grams) with raw counts.
+ * Output: [1,N]-grams with adjusted counts.
+ * [1,N)-grams are in suffix order
+ * N-grams are in undefined order (they're going to be sorted anyway).
+ */
+class AdjustCounts {
+ public:
+ // counts: output
+ // counts_pruned: output
+ // discounts: mostly output. If the input already has entries, they will be kept.
+ // prune_thresholds: input. n-grams with normal (not adjusted) count below this will be pruned.
+ AdjustCounts(
+ const std::vector<uint64_t> &prune_thresholds,
+ std::vector<uint64_t> &counts,
+ std::vector<uint64_t> &counts_pruned,
+ const std::vector<bool> &prune_words,
+ const DiscountConfig &discount_config,
+ std::vector<Discount> &discounts)
+ : prune_thresholds_(prune_thresholds), counts_(counts), counts_pruned_(counts_pruned),
+ prune_words_(prune_words), discount_config_(discount_config), discounts_(discounts)
+ {}
+
+ void Run(const util::stream::ChainPositions &positions);
+
+ private:
+ const std::vector<uint64_t> &prune_thresholds_;
+ std::vector<uint64_t> &counts_;
+ std::vector<uint64_t> &counts_pruned_;
+ const std::vector<bool> &prune_words_;
+
+ DiscountConfig discount_config_;
+ std::vector<Discount> &discounts_;
+};
+
+} // namespace builder
+} // namespace lm
+
+#endif // LM_BUILDER_ADJUST_COUNTS_H
+
diff --git a/src/kenlm/lm/builder/adjust_counts_test.cc b/src/kenlm/lm/builder/adjust_counts_test.cc
new file mode 100644
index 0000000..fff551f
--- /dev/null
+++ b/src/kenlm/lm/builder/adjust_counts_test.cc
@@ -0,0 +1,112 @@
+#include "lm/builder/adjust_counts.hh"
+
+#include "lm/common/ngram_stream.hh"
+#include "lm/builder/payload.hh"
+#include "util/scoped.hh"
+
+#include <boost/thread/thread.hpp>
+#define BOOST_TEST_MODULE AdjustCounts
+#include <boost/test/unit_test.hpp>
+
+namespace lm { namespace builder { namespace {
+
+class KeepCopy {
+ public:
+ KeepCopy() : size_(0) {}
+
+ void Run(const util::stream::ChainPosition &position) {
+ for (util::stream::Link link(position); link; ++link) {
+ mem_.call_realloc(size_ + link->ValidSize());
+ memcpy(static_cast<uint8_t*>(mem_.get()) + size_, link->Get(), link->ValidSize());
+ size_ += link->ValidSize();
+ }
+ }
+
+ uint8_t *Get() { return static_cast<uint8_t*>(mem_.get()); }
+ std::size_t Size() const { return size_; }
+
+ private:
+ util::scoped_malloc mem_;
+ std::size_t size_;
+};
+
+struct Gram4 {
+ WordIndex ids[4];
+ uint64_t count;
+};
+
+class WriteInput {
+ public:
+ void Run(const util::stream::ChainPosition &position) {
+ NGramStream<BuildingPayload> input(position);
+ Gram4 grams[] = {
+ {{0,0,0,0},10},
+ {{0,0,3,0},3},
+ // bos
+ {{1,1,1,2},5},
+ {{0,0,3,2},5},
+ };
+ for (size_t i = 0; i < sizeof(grams) / sizeof(Gram4); ++i, ++input) {
+ memcpy(input->begin(), grams[i].ids, sizeof(WordIndex) * 4);
+ input->Value().count = grams[i].count;
+ }
+ input.Poison();
+ }
+};
+
+BOOST_AUTO_TEST_CASE(Simple) {
+ KeepCopy outputs[4];
+ std::vector<uint64_t> counts;
+ std::vector<Discount> discount;
+ {
+ util::stream::ChainConfig config;
+ config.total_memory = 100;
+ config.block_count = 1;
+ util::stream::Chains chains(4);
+ for (unsigned i = 0; i < 4; ++i) {
+ config.entry_size = NGram<BuildingPayload>::TotalSize(i + 1);
+ chains.push_back(config);
+ }
+
+ chains[3] >> WriteInput();
+ util::stream::ChainPositions for_adjust(chains);
+ for (unsigned i = 0; i < 4; ++i) {
+ chains[i] >> boost::ref(outputs[i]);
+ }
+ chains >> util::stream::kRecycle;
+ std::vector<uint64_t> counts_pruned(4);
+ std::vector<uint64_t> prune_thresholds(4);
+ DiscountConfig discount_config;
+ discount_config.fallback = Discount();
+ discount_config.bad_action = THROW_UP;
+ BOOST_CHECK_THROW(AdjustCounts(prune_thresholds, counts, counts_pruned, std::vector<bool>(), discount_config, discount).Run(for_adjust), BadDiscountException);
+ }
+ BOOST_REQUIRE_EQUAL(4UL, counts.size());
+ BOOST_CHECK_EQUAL(4UL, counts[0]);
+ // These are no longer set because the discounts are bad.
+/* BOOST_CHECK_EQUAL(4UL, counts[1]);
+ BOOST_CHECK_EQUAL(3UL, counts[2]);
+ BOOST_CHECK_EQUAL(3UL, counts[3]);*/
+ BOOST_REQUIRE_EQUAL(NGram<BuildingPayload>::TotalSize(1) * 4, outputs[0].Size());
+ NGram<BuildingPayload> uni(outputs[0].Get(), 1);
+ BOOST_CHECK_EQUAL(kUNK, *uni.begin());
+ BOOST_CHECK_EQUAL(0ULL, uni.Value().count);
+ uni.NextInMemory();
+ BOOST_CHECK_EQUAL(kBOS, *uni.begin());
+ BOOST_CHECK_EQUAL(0ULL, uni.Value().count);
+ uni.NextInMemory();
+ BOOST_CHECK_EQUAL(0UL, *uni.begin());
+ BOOST_CHECK_EQUAL(2ULL, uni.Value().count);
+ uni.NextInMemory();
+ BOOST_CHECK_EQUAL(2ULL, uni.Value().count);
+ BOOST_CHECK_EQUAL(2UL, *uni.begin());
+
+ BOOST_REQUIRE_EQUAL(NGram<BuildingPayload>::TotalSize(2) * 4, outputs[1].Size());
+ NGram<BuildingPayload> bi(outputs[1].Get(), 2);
+ BOOST_CHECK_EQUAL(0UL, *bi.begin());
+ BOOST_CHECK_EQUAL(0UL, *(bi.begin() + 1));
+ BOOST_CHECK_EQUAL(1ULL, bi.Value().count);
+ bi.NextInMemory();
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/builder/combine_counts.hh b/src/kenlm/lm/builder/combine_counts.hh
new file mode 100644
index 0000000..2eda517
--- /dev/null
+++ b/src/kenlm/lm/builder/combine_counts.hh
@@ -0,0 +1,31 @@
+#ifndef LM_BUILDER_COMBINE_COUNTS_H
+#define LM_BUILDER_COMBINE_COUNTS_H
+
+#include "lm/builder/payload.hh"
+#include "lm/common/ngram.hh"
+#include "lm/common/compare.hh"
+#include "lm/word_index.hh"
+#include "util/stream/sort.hh"
+
+#include <functional>
+#include <string>
+
+namespace lm {
+namespace builder {
+
+// Sum counts for the same n-gram.
+struct CombineCounts {
+ bool operator()(void *first_void, const void *second_void, const SuffixOrder &compare) const {
+ NGram<BuildingPayload> first(first_void, compare.Order());
+ // There isn't a const version of NGram.
+ NGram<BuildingPayload> second(const_cast<void*>(second_void), compare.Order());
+ if (memcmp(first.begin(), second.begin(), sizeof(WordIndex) * compare.Order())) return false;
+ first.Value().count += second.Value().count;
+ return true;
+ }
+};
+
+} // namespace builder
+} // namespace lm
+
+#endif // LM_BUILDER_COMBINE_COUNTS_H
diff --git a/src/kenlm/lm/builder/corpus_count.cc b/src/kenlm/lm/builder/corpus_count.cc
new file mode 100644
index 0000000..0414c22
--- /dev/null
+++ b/src/kenlm/lm/builder/corpus_count.cc
@@ -0,0 +1,239 @@
+#include "lm/builder/corpus_count.hh"
+
+#include "lm/builder/payload.hh"
+#include "lm/common/ngram.hh"
+#include "lm/lm_exception.hh"
+#include "lm/vocab.hh"
+#include "lm/word_index.hh"
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/probing_hash_table.hh"
+#include "util/scoped.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/timer.hh"
+#include "util/tokenize_piece.hh"
+
+#include <functional>
+
+#include <stdint.h>
+
+namespace lm {
+namespace builder {
+namespace {
+
+class DedupeHash : public std::unary_function<const WordIndex *, bool> {
+ public:
+ explicit DedupeHash(std::size_t order) : size_(order * sizeof(WordIndex)) {}
+
+ std::size_t operator()(const WordIndex *start) const {
+ return util::MurmurHashNative(start, size_);
+ }
+
+ private:
+ const std::size_t size_;
+};
+
+class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
+ public:
+ explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
+
+ bool operator()(const WordIndex *first, const WordIndex *second) const {
+ return !memcmp(first, second, size_);
+ }
+
+ private:
+ const std::size_t size_;
+};
+
+struct DedupeEntry {
+ typedef WordIndex *Key;
+ Key GetKey() const { return key; }
+ void SetKey(WordIndex *to) { key = to; }
+ Key key;
+ static DedupeEntry Construct(WordIndex *at) {
+ DedupeEntry ret;
+ ret.key = at;
+ return ret;
+ }
+};
+
+
+// TODO: don't have this here, should be with probing hash table defaults?
+const float kProbingMultiplier = 1.5;
+
+typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;
+
+class Writer {
+ public:
+ Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
+ : block_(position), gram_(block_->Get(), order),
+ dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
+ dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
+ buffer_(new WordIndex[order - 1]),
+ block_size_(position.GetChain().BlockSize()) {
+ dedupe_.Clear();
+ assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
+ if (order == 1) {
+ // Add special words. AdjustCounts is responsible if order != 1.
+ AddUnigramWord(kUNK);
+ AddUnigramWord(kBOS);
+ }
+ }
+
+ ~Writer() {
+ block_->SetValidSize(reinterpret_cast<const uint8_t*>(gram_.begin()) - static_cast<const uint8_t*>(block_->Get()));
+ (++block_).Poison();
+ }
+
+ // Write context with a bunch of <s>
+ void StartSentence() {
+ for (WordIndex *i = gram_.begin(); i != gram_.end() - 1; ++i) {
+ *i = kBOS;
+ }
+ }
+
+ void Append(WordIndex word) {
+ *(gram_.end() - 1) = word;
+ Dedupe::MutableIterator at;
+ bool found = dedupe_.FindOrInsert(DedupeEntry::Construct(gram_.begin()), at);
+ if (found) {
+ // Already present.
+ NGram<BuildingPayload> already(at->key, gram_.Order());
+ ++(already.Value().count);
+ // Shift left by one.
+ memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
+ return;
+ }
+ // Complete the write.
+ gram_.Value().count = 1;
+ // Prepare the next n-gram.
+ if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
+ NGram<BuildingPayload> last(gram_);
+ gram_.NextInMemory();
+ std::copy(last.begin() + 1, last.end(), gram_.begin());
+ return;
+ }
+ // Block end. Need to store the context in a temporary buffer.
+ std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
+ dedupe_.Clear();
+ block_->SetValidSize(block_size_);
+ gram_.ReBase((++block_)->Get());
+ std::copy(buffer_.get(), buffer_.get() + gram_.Order() - 1, gram_.begin());
+ }
+
+ private:
+ void AddUnigramWord(WordIndex index) {
+ *gram_.begin() = index;
+ gram_.Value().count = 0;
+ gram_.NextInMemory();
+ if (gram_.Base() == static_cast<uint8_t*>(block_->Get()) + block_size_) {
+ block_->SetValidSize(block_size_);
+ gram_.ReBase((++block_)->Get());
+ }
+ }
+
+ util::stream::Link block_;
+
+ NGram<BuildingPayload> gram_;
+
+ // This is the memory behind the invalid value in dedupe_.
+ std::vector<WordIndex> dedupe_invalid_;
+ // Hash table combiner implementation.
+ Dedupe dedupe_;
+
+ // Small buffer to hold existing ngrams when shifting across a block boundary.
+ boost::scoped_array<WordIndex> buffer_;
+
+ const std::size_t block_size_;
+};
+
+} // namespace
+
+float CorpusCount::DedupeMultiplier(std::size_t order) {
+ return kProbingMultiplier * static_cast<float>(sizeof(DedupeEntry)) / static_cast<float>(NGram<BuildingPayload>::TotalSize(order));
+}
+
+std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
+ return ngram::GrowableVocab<ngram::WriteUniqueWords>::MemUsage(vocab_estimate);
+}
+
+CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::vector<bool> &prune_words, const std::string& prune_vocab_filename, std::size_t entries_per_block, WarningAction disallowed_symbol)
+ : from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
+ prune_words_(prune_words), prune_vocab_filename_(prune_vocab_filename),
+ dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
+ dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)),
+ disallowed_symbol_action_(disallowed_symbol) {
+}
+
+namespace {
+ void ComplainDisallowed(StringPiece word, WarningAction &action) {
+ switch (action) {
+ case SILENT:
+ return;
+ case COMPLAIN:
+ std::cerr << "Warning: " << word << " appears in the input. All instances of <s>, </s>, and <unk> will be interpreted as whitespace." << std::endl;
+ action = SILENT;
+ return;
+ case THROW_UP:
+ UTIL_THROW(FormatLoadException, "Special word " << word << " is not allowed in the corpus. I plan to support models containing <unk> in the future. Pass --skip_symbols to convert these symbols to whitespace.");
+ }
+ }
+} // namespace
+
+void CorpusCount::Run(const util::stream::ChainPosition &position) {
+ ngram::GrowableVocab<ngram::WriteUniqueWords> vocab(type_count_, vocab_write_);
+ token_count_ = 0;
+ type_count_ = 0;
+ const WordIndex end_sentence = vocab.FindOrInsert("</s>");
+ Writer writer(NGram<BuildingPayload>::OrderFromSize(position.GetChain().EntrySize()), position, dedupe_mem_.get(), dedupe_mem_size_);
+ uint64_t count = 0;
+ bool delimiters[256];
+ util::BoolCharacter::Build("\0\t\n\r ", delimiters);
+ try {
+ while(true) {
+ StringPiece line(from_.ReadLine());
+ writer.StartSentence();
+ for (util::TokenIter<util::BoolCharacter, true> w(line, delimiters); w; ++w) {
+ WordIndex word = vocab.FindOrInsert(*w);
+ if (word <= 2) {
+ ComplainDisallowed(*w, disallowed_symbol_action_);
+ continue;
+ }
+ writer.Append(word);
+ ++count;
+ }
+ writer.Append(end_sentence);
+ }
+ } catch (const util::EndOfFileException &e) {}
+ token_count_ = count;
+ type_count_ = vocab.Size();
+
+ // Create list of unigrams that are supposed to be pruned
+ if (!prune_vocab_filename_.empty()) {
+ try {
+ util::FilePiece prune_vocab_file(prune_vocab_filename_.c_str());
+
+ prune_words_.resize(vocab.Size(), true);
+ try {
+ while (true) {
+ StringPiece word(prune_vocab_file.ReadDelimited(delimiters));
+ prune_words_[vocab.Index(word)] = false;
+ }
+ } catch (const util::EndOfFileException &e) {}
+
+ // Never prune <unk>, <s>, </s>
+ prune_words_[kUNK] = false;
+ prune_words_[kBOS] = false;
+ prune_words_[kEOS] = false;
+
+ } catch (const util::Exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
+ }
+ }
+}
+
+} // namespace builder
+} // namespace lm
diff --git a/src/kenlm/lm/builder/corpus_count.hh b/src/kenlm/lm/builder/corpus_count.hh
new file mode 100644
index 0000000..165505c
--- /dev/null
+++ b/src/kenlm/lm/builder/corpus_count.hh
@@ -0,0 +1,53 @@
+#ifndef LM_BUILDER_CORPUS_COUNT_H
+#define LM_BUILDER_CORPUS_COUNT_H
+
+#include "lm/lm_exception.hh"
+#include "lm/word_index.hh"
+#include "util/scoped.hh"
+
+#include <cstddef>
+#include <string>
+#include <stdint.h>
+#include <vector>
+
+namespace util {
+class FilePiece;
+namespace stream {
+class ChainPosition;
+} // namespace stream
+} // namespace util
+
+namespace lm {
+namespace builder {
+
+class CorpusCount {
+ public:
+ // Memory usage will be DedupeMultipler(order) * block_size + total_chain_size + unknown vocab_hash_size
+ static float DedupeMultiplier(std::size_t order);
+
+ // How much memory vocabulary will use based on estimated size of the vocab.
+ static std::size_t VocabUsage(std::size_t vocab_estimate);
+
+ // token_count: out.
+ // type_count aka vocabulary size. Initialize to an estimate. It is set to the exact value.
+ CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::vector<bool> &prune_words, const std::string& prune_vocab_filename, std::size_t entries_per_block, WarningAction disallowed_symbol);
+
+ void Run(const util::stream::ChainPosition &position);
+
+ private:
+ util::FilePiece &from_;
+ int vocab_write_;
+ uint64_t &token_count_;
+ WordIndex &type_count_;
+ std::vector<bool>& prune_words_;
+ const std::string& prune_vocab_filename_;
+
+ std::size_t dedupe_mem_size_;
+ util::scoped_malloc dedupe_mem_;
+
+ WarningAction disallowed_symbol_action_;
+};
+
+} // namespace builder
+} // namespace lm
+#endif // LM_BUILDER_CORPUS_COUNT_H
diff --git a/src/kenlm/lm/builder/corpus_count_test.cc b/src/kenlm/lm/builder/corpus_count_test.cc
new file mode 100644
index 0000000..88bcf96
--- /dev/null
+++ b/src/kenlm/lm/builder/corpus_count_test.cc
@@ -0,0 +1,79 @@
+#include "lm/builder/corpus_count.hh"
+
+#include "lm/builder/payload.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/common/ngram.hh"
+
+#include "util/file.hh"
+#include "util/file_piece.hh"
+#include "util/tokenize_piece.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/stream.hh"
+
+#define BOOST_TEST_MODULE CorpusCountTest
+#include <boost/test/unit_test.hpp>
+
+namespace lm { namespace builder { namespace {
+
+#define Check(str, cnt) { \
+ BOOST_REQUIRE(stream); \
+ w = stream->begin(); \
+ for (util::TokenIter<util::AnyCharacter, true> t(str, " "); t; ++t, ++w) { \
+ BOOST_CHECK_EQUAL(*t, v[*w]); \
+ } \
+ BOOST_CHECK_EQUAL((uint64_t)cnt, stream->Value().count); \
+ ++stream; \
+}
+
+BOOST_AUTO_TEST_CASE(Short) {
+ util::scoped_fd input_file(util::MakeTemp("corpus_count_test_temp"));
+ const char input[] = "looking on a little more loin\non a little more loin\non foo little more loin\nbar\n\n";
+ // Blocks of 10 are
+ // looking on a little more loin </s> on a little[duplicate] more[duplicate] loin[duplicate] </s>[duplicate] on[duplicate] foo
+ // little more loin </s> bar </s> </s>
+
+ util::WriteOrThrow(input_file.get(), input, sizeof(input) - 1);
+ util::FilePiece input_piece(input_file.release(), "temp file");
+
+ util::stream::ChainConfig config;
+ config.entry_size = NGram<BuildingPayload>::TotalSize(3);
+ config.total_memory = config.entry_size * 20;
+ config.block_count = 2;
+
+ util::scoped_fd vocab(util::MakeTemp("corpus_count_test_vocab"));
+
+ util::stream::Chain chain(config);
+ uint64_t token_count;
+ WordIndex type_count = 10;
+ std::vector<bool> prune_words;
+ CorpusCount counter(input_piece, vocab.get(), token_count, type_count, prune_words, "", chain.BlockSize() / chain.EntrySize(), SILENT);
+ chain >> boost::ref(counter);
+ NGramStream<BuildingPayload> stream(chain.Add());
+ chain >> util::stream::kRecycle;
+
+ const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};
+
+ WordIndex *w;
+
+ Check("<s> <s> looking", 1);
+ Check("<s> looking on", 1);
+ Check("looking on a", 1);
+ Check("on a little", 2);
+ Check("a little more", 2);
+ Check("little more loin", 2);
+ Check("more loin </s>", 2);
+ Check("<s> <s> on", 2);
+ Check("<s> on a", 1);
+ Check("<s> on foo", 1);
+ Check("on foo little", 1);
+ Check("foo little more", 1);
+ Check("little more loin", 1);
+ Check("more loin </s>", 1);
+ Check("<s> <s> bar", 1);
+ Check("<s> bar </s>", 1);
+ Check("<s> <s> </s>", 1);
+ BOOST_CHECK(!stream);
+ BOOST_CHECK_EQUAL(sizeof(v) / sizeof(const char*), type_count);
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/builder/debug_print.hh b/src/kenlm/lm/builder/debug_print.hh
new file mode 100644
index 0000000..4b9f306
--- /dev/null
+++ b/src/kenlm/lm/builder/debug_print.hh
@@ -0,0 +1,70 @@
+#ifndef LM_BUILDER_DEBUG_PRINT_H
+#define LM_BUILDER_DEBUG_PRINT_H
+
+#include "lm/builder/payload.hh"
+#include "lm/common/print.hh"
+#include "lm/common/ngram_stream.hh"
+#include "util/file_stream.hh"
+#include "util/file.hh"
+
+#include <boost/lexical_cast.hpp>
+
+namespace lm { namespace builder {
+// Not defined, only specialized.
+template <class T> void PrintPayload(util::FileStream &to, const BuildingPayload &payload);
+template <> inline void PrintPayload<uint64_t>(util::FileStream &to, const BuildingPayload &payload) {
+ to << payload.count;
+}
+template <> inline void PrintPayload<Uninterpolated>(util::FileStream &to, const BuildingPayload &payload) {
+ to << log10(payload.uninterp.prob) << ' ' << log10(payload.uninterp.gamma);
+}
+template <> inline void PrintPayload<ProbBackoff>(util::FileStream &to, const BuildingPayload &payload) {
+ to << payload.complete.prob << ' ' << payload.complete.backoff;
+}
+
+// template parameter is the type stored.
+template <class V> class Print {
+ public:
+ static void DumpSeparateFiles(const VocabReconstitute &vocab, const std::string &file_base, util::stream::Chains &chains) {
+ for (unsigned int i = 0; i < chains.size(); ++i) {
+ std::string file(file_base + boost::lexical_cast<std::string>(i));
+ chains[i] >> Print(vocab, util::CreateOrThrow(file.c_str()));
+ }
+ }
+
+ explicit Print(const VocabReconstitute &vocab, int fd) : vocab_(vocab), to_(fd) {}
+
+ void Run(const util::stream::ChainPositions &chains) {
+ util::scoped_fd fd(to_);
+ util::FileStream out(to_);
+ NGramStreams<BuildingPayload> streams(chains);
+ for (NGramStream<BuildingPayload> *s = streams.begin(); s != streams.end(); ++s) {
+ DumpStream(*s, out);
+ }
+ }
+
+ void Run(const util::stream::ChainPosition &position) {
+ util::scoped_fd fd(to_);
+ util::FileStream out(to_);
+ NGramStream<BuildingPayload> stream(position);
+ DumpStream(stream, out);
+ }
+
+ private:
+ void DumpStream(NGramStream<BuildingPayload> &stream, util::FileStream &to) {
+ for (; stream; ++stream) {
+ PrintPayload<V>(to, stream->Value());
+ for (const WordIndex *w = stream->begin(); w != stream->end(); ++w) {
+ to << ' ' << vocab_.Lookup(*w) << '=' << *w;
+ }
+ to << '\n';
+ }
+ }
+
+ const VocabReconstitute &vocab_;
+ int to_;
+};
+
+}} // namespaces
+
+#endif // LM_BUILDER_DEBUG_PRINT_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/discount.hh b/src/kenlm/lm/builder/discount.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/builder/discount.hh
rename to src/kenlm/lm/builder/discount.hh
diff --git a/src/kenlm/lm/builder/dump_counts_main.cc b/src/kenlm/lm/builder/dump_counts_main.cc
new file mode 100644
index 0000000..26078d0
--- /dev/null
+++ b/src/kenlm/lm/builder/dump_counts_main.cc
@@ -0,0 +1,36 @@
+#include "lm/common/print.hh"
+#include "lm/word_index.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+
+#include <boost/lexical_cast.hpp>
+
+#include <iostream>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+ if (argc != 4) {
+ std::cerr << "Usage: " << argv[0] << " counts vocabulary order\n"
+ "The counts file contains records with 4-byte vocabulary ids followed by 8-byte\n"
+ "counts. Each record has order many vocabulary ids.\n"
+ "The vocabulary file contains the words delimited by NULL in order of id.\n"
+ "The vocabulary file may not be compressed because it is mmapped but the counts\n"
+ "file can be compressed.\n";
+ return 1;
+ }
+ util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
+ util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
+ lm::VocabReconstitute vocab(vocab_file.get());
+ unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
+ std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
+ while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
+ UTIL_THROW_IF(got != record.size(), util::Exception, "Read " << got << " bytes at the end of file, which is not a complete record of length " << record.size());
+ const lm::WordIndex *words = reinterpret_cast<const lm::WordIndex*>(&*record.begin());
+ for (const lm::WordIndex *i = words; i != words + order; ++i) {
+ UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ". Are you sure you have the right order and vocab file for these counts?");
+ std::cout << vocab.Lookup(*i) << ' ';
+ }
+ // TODO don't use std::cout because it is slow. Add fast uint64_t printing support to FileStream.
+ std::cout << *reinterpret_cast<const uint64_t*>(words + order) << '\n';
+ }
+}
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/builder/hash_gamma.hh b/src/kenlm/lm/builder/hash_gamma.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/builder/hash_gamma.hh
rename to src/kenlm/lm/builder/hash_gamma.hh
diff --git a/src/kenlm/lm/builder/header_info.hh b/src/kenlm/lm/builder/header_info.hh
new file mode 100644
index 0000000..d01d049
--- /dev/null
+++ b/src/kenlm/lm/builder/header_info.hh
@@ -0,0 +1,28 @@
+#ifndef LM_BUILDER_HEADER_INFO_H
+#define LM_BUILDER_HEADER_INFO_H
+
+#include <string>
+#include <vector>
+#include <stdint.h>
+
+namespace lm { namespace builder {
+
+// Some configuration info that is used to add
+// comments to the beginning of an ARPA file
+struct HeaderInfo {
+ std::string input_file;
+ uint64_t token_count;
+ std::vector<uint64_t> counts_pruned;
+
+ HeaderInfo() {}
+
+ HeaderInfo(const std::string& input_file_in, uint64_t token_count_in, const std::vector<uint64_t> &counts_pruned_in)
+ : input_file(input_file_in), token_count(token_count_in), counts_pruned(counts_pruned_in) {}
+
+ // TODO: Add smoothing type
+ // TODO: More info if multiple models were interpolated
+};
+
+}} // namespaces
+
+#endif
diff --git a/src/kenlm/lm/builder/initial_probabilities.cc b/src/kenlm/lm/builder/initial_probabilities.cc
new file mode 100644
index 0000000..5b8d86d
--- /dev/null
+++ b/src/kenlm/lm/builder/initial_probabilities.cc
@@ -0,0 +1,306 @@
+#include "lm/builder/initial_probabilities.hh"
+
+#include "lm/builder/discount.hh"
+#include "lm/builder/hash_gamma.hh"
+#include "lm/builder/payload.hh"
+#include "lm/common/special.hh"
+#include "lm/common/ngram_stream.hh"
+#include "util/murmur_hash.hh"
+#include "util/file.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/io.hh"
+#include "util/stream/stream.hh"
+
+#include <vector>
+
+namespace lm { namespace builder {
+
+namespace {
+struct BufferEntry {
+ // Gamma from page 20 of Chen and Goodman.
+ float gamma;
+ // \sum_w a(c w) for all w.
+ float denominator;
+};
+
+struct HashBufferEntry : public BufferEntry {
+ // Hash value of ngram. Used to join contexts with backoffs.
+ uint64_t hash_value;
+};
+
+// Reads all entries in order like NGramStream does.
+// But deletes any entries that have CutoffCount below or equal to pruning
+// threshold.
+class PruneNGramStream {
+ public:
+ PruneNGramStream(const util::stream::ChainPosition &position, const SpecialVocab &specials) :
+ current_(NULL, NGram<BuildingPayload>::OrderFromSize(position.GetChain().EntrySize())),
+ dest_(NULL, NGram<BuildingPayload>::OrderFromSize(position.GetChain().EntrySize())),
+ currentCount_(0),
+ block_(position),
+ specials_(specials)
+ {
+ StartBlock();
+ }
+
+ NGram<BuildingPayload> &operator*() { return current_; }
+ NGram<BuildingPayload> *operator->() { return ¤t_; }
+
+ operator bool() const {
+ return block_;
+ }
+
+ PruneNGramStream &operator++() {
+ assert(block_);
+ if(UTIL_UNLIKELY(current_.Order() == 1 && specials_.IsSpecial(*current_.begin())))
+ dest_.NextInMemory();
+ else if(currentCount_ > 0) {
+ if(dest_.Base() < current_.Base()) {
+ memcpy(dest_.Base(), current_.Base(), current_.TotalSize());
+ }
+ dest_.NextInMemory();
+ }
+
+ current_.NextInMemory();
+
+ uint8_t *block_base = static_cast<uint8_t*>(block_->Get());
+ if (current_.Base() == block_base + block_->ValidSize()) {
+ block_->SetValidSize(dest_.Base() - block_base);
+ ++block_;
+ StartBlock();
+ if (block_) {
+ currentCount_ = current_.Value().CutoffCount();
+ }
+ } else {
+ currentCount_ = current_.Value().CutoffCount();
+ }
+
+ return *this;
+ }
+
+ private:
+ void StartBlock() {
+ for (; ; ++block_) {
+ if (!block_) return;
+ if (block_->ValidSize()) break;
+ }
+ current_.ReBase(block_->Get());
+ currentCount_ = current_.Value().CutoffCount();
+
+ dest_.ReBase(block_->Get());
+ }
+
+ NGram<BuildingPayload> current_; // input iterator
+ NGram<BuildingPayload> dest_; // output iterator
+
+ uint64_t currentCount_;
+
+ util::stream::Link block_;
+
+ const SpecialVocab specials_;
+};
+
+// Extract an array of HashedGamma from an array of BufferEntry.
+class OnlyGamma {
+ public:
+ explicit OnlyGamma(bool pruning) : pruning_(pruning) {}
+
+ void Run(const util::stream::ChainPosition &position) {
+ for (util::stream::Link block_it(position); block_it; ++block_it) {
+ if(pruning_) {
+ const HashBufferEntry *in = static_cast<const HashBufferEntry*>(block_it->Get());
+ const HashBufferEntry *end = static_cast<const HashBufferEntry*>(block_it->ValidEnd());
+
+ // Just make it point to the beginning of the stream so it can be overwritten
+ // With HashGamma values. Do not attempt to interpret the values until set below.
+ HashGamma *out = static_cast<HashGamma*>(block_it->Get());
+ for (; in < end; out += 1, in += 1) {
+ // buffering, otherwise might overwrite values too early
+ float gamma_buf = in->gamma;
+ uint64_t hash_buf = in->hash_value;
+
+ out->gamma = gamma_buf;
+ out->hash_value = hash_buf;
+ }
+ block_it->SetValidSize((block_it->ValidSize() * sizeof(HashGamma)) / sizeof(HashBufferEntry));
+ }
+ else {
+ float *out = static_cast<float*>(block_it->Get());
+ const float *in = out;
+ const float *end = static_cast<const float*>(block_it->ValidEnd());
+ for (out += 1, in += 2; in < end; out += 1, in += 2) {
+ *out = *in;
+ }
+ block_it->SetValidSize(block_it->ValidSize() / 2);
+ }
+ }
+ }
+
+ private:
+ bool pruning_;
+};
+
+class AddRight {
+ public:
+ AddRight(const Discount &discount, const util::stream::ChainPosition &input, bool pruning)
+ : discount_(discount), input_(input), pruning_(pruning) {}
+
+ void Run(const util::stream::ChainPosition &output) {
+ NGramStream<BuildingPayload> in(input_);
+ util::stream::Stream out(output);
+
+ std::vector<WordIndex> previous(in->Order() - 1);
+ // Silly windows requires this workaround to just get an invalid pointer when empty.
+ void *const previous_raw = previous.empty() ? NULL : static_cast<void*>(&previous[0]);
+ const std::size_t size = sizeof(WordIndex) * previous.size();
+
+ for(; in; ++out) {
+ memcpy(previous_raw, in->begin(), size);
+ uint64_t denominator = 0;
+ uint64_t normalizer = 0;
+
+ uint64_t counts[4];
+ memset(counts, 0, sizeof(counts));
+ do {
+ denominator += in->Value().UnmarkedCount();
+
+ // Collect unused probability mass from pruning.
+ // Becomes 0 for unpruned ngrams.
+ normalizer += in->Value().UnmarkedCount() - in->Value().CutoffCount();
+
+ // Chen&Goodman do not mention counting based on cutoffs, but
+ // backoff becomes larger than 1 otherwise, so probably needs
+ // to count cutoffs. Counts normally without pruning.
+ if(in->Value().CutoffCount() > 0)
+ ++counts[std::min(in->Value().CutoffCount(), static_cast<uint64_t>(3))];
+
+ } while (++in && !memcmp(previous_raw, in->begin(), size));
+
+ BufferEntry &entry = *reinterpret_cast<BufferEntry*>(out.Get());
+ entry.denominator = static_cast<float>(denominator);
+ entry.gamma = 0.0;
+ for (unsigned i = 1; i <= 3; ++i) {
+ entry.gamma += discount_.Get(i) * static_cast<float>(counts[i]);
+ }
+
+ // Makes model sum to 1 with pruning (I hope).
+ entry.gamma += normalizer;
+
+ entry.gamma /= entry.denominator;
+
+ if(pruning_) {
+ // If pruning is enabled the stream actually contains HashBufferEntry, see InitialProbabilities(...),
+ // so add a hash value that identifies the current ngram.
+ static_cast<HashBufferEntry*>(&entry)->hash_value = util::MurmurHashNative(previous_raw, size);
+ }
+ }
+ out.Poison();
+ }
+
+ private:
+ const Discount &discount_;
+ const util::stream::ChainPosition input_;
+ bool pruning_;
+};
+
+class MergeRight {
+ public:
+ MergeRight(bool interpolate_unigrams, const util::stream::ChainPosition &from_adder, const Discount &discount, const SpecialVocab &specials)
+ : interpolate_unigrams_(interpolate_unigrams), from_adder_(from_adder), discount_(discount), specials_(specials) {}
+
+ // calculate the initial probability of each n-gram (before order-interpolation)
+ // Run() gets invoked once for each order
+ void Run(const util::stream::ChainPosition &primary) {
+ util::stream::Stream summed(from_adder_);
+
+ PruneNGramStream grams(primary, specials_);
+
+ // Without interpolation, the interpolation weight goes to <unk>.
+ if (grams->Order() == 1) {
+ BufferEntry sums(*static_cast<const BufferEntry*>(summed.Get()));
+ // Special case for <unk>
+ assert(*grams->begin() == kUNK);
+ float gamma_assign;
+ if (interpolate_unigrams_) {
+ // Default: treat <unk> like a zeroton.
+ gamma_assign = sums.gamma;
+ grams->Value().uninterp.prob = 0.0;
+ } else {
+ // SRI: give all the interpolation mass to <unk>
+ gamma_assign = 0.0;
+ grams->Value().uninterp.prob = sums.gamma;
+ }
+ grams->Value().uninterp.gamma = gamma_assign;
+
+ for (++grams; *grams->begin() != specials_.BOS(); ++grams) {
+ grams->Value().uninterp.prob = discount_.Apply(grams->Value().count) / sums.denominator;
+ grams->Value().uninterp.gamma = gamma_assign;
+ }
+
+ // Special case for <s>: probability 1.0. This allows <s> to be
+ // explicitly scored as part of the sentence without impacting
+ // probability and computes q correctly as b(<s>).
+ assert(*grams->begin() == specials_.BOS());
+ grams->Value().uninterp.prob = 1.0;
+ grams->Value().uninterp.gamma = 0.0;
+
+ while (++grams) {
+ grams->Value().uninterp.prob = discount_.Apply(grams->Value().count) / sums.denominator;
+ grams->Value().uninterp.gamma = gamma_assign;
+ }
+ ++summed;
+ return;
+ }
+
+ std::vector<WordIndex> previous(grams->Order() - 1);
+ const std::size_t size = sizeof(WordIndex) * previous.size();
+ for (; grams; ++summed) {
+ memcpy(&previous[0], grams->begin(), size);
+ const BufferEntry &sums = *static_cast<const BufferEntry*>(summed.Get());
+
+ do {
+ BuildingPayload &pay = grams->Value();
+ pay.uninterp.prob = discount_.Apply(grams->Value().UnmarkedCount()) / sums.denominator;
+ pay.uninterp.gamma = sums.gamma;
+ } while (++grams && !memcmp(&previous[0], grams->begin(), size));
+ }
+ }
+
+ private:
+ bool interpolate_unigrams_;
+ util::stream::ChainPosition from_adder_;
+ Discount discount_;
+ const SpecialVocab specials_;
+};
+
+} // namespace
+
+void InitialProbabilities(
+ const InitialProbabilitiesConfig &config,
+ const std::vector<Discount> &discounts,
+ util::stream::Chains &primary,
+ util::stream::Chains &second_in,
+ util::stream::Chains &gamma_out,
+ const std::vector<uint64_t> &prune_thresholds,
+ bool prune_vocab,
+ const SpecialVocab &specials) {
+ for (size_t i = 0; i < primary.size(); ++i) {
+ util::stream::ChainConfig gamma_config = config.adder_out;
+ if(prune_vocab || prune_thresholds[i] > 0)
+ gamma_config.entry_size = sizeof(HashBufferEntry);
+ else
+ gamma_config.entry_size = sizeof(BufferEntry);
+
+ util::stream::ChainPosition second(second_in[i].Add());
+ second_in[i] >> util::stream::kRecycle;
+ gamma_out.push_back(gamma_config);
+ gamma_out[i] >> AddRight(discounts[i], second, prune_vocab || prune_thresholds[i] > 0);
+
+ primary[i] >> MergeRight(config.interpolate_unigrams, gamma_out[i].Add(), discounts[i], specials);
+
+ // Don't bother with the OnlyGamma thread for something to discard.
+ if (i) gamma_out[i] >> OnlyGamma(prune_vocab || prune_thresholds[i] > 0);
+ }
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/builder/initial_probabilities.hh b/src/kenlm/lm/builder/initial_probabilities.hh
new file mode 100644
index 0000000..caeea58
--- /dev/null
+++ b/src/kenlm/lm/builder/initial_probabilities.hh
@@ -0,0 +1,45 @@
+#ifndef LM_BUILDER_INITIAL_PROBABILITIES_H
+#define LM_BUILDER_INITIAL_PROBABILITIES_H
+
+#include "lm/builder/discount.hh"
+#include "lm/word_index.hh"
+#include "util/stream/config.hh"
+
+#include <vector>
+
+namespace util { namespace stream { class Chains; } }
+
+namespace lm {
+class SpecialVocab;
+namespace builder {
+
+struct InitialProbabilitiesConfig {
+ // These should be small buffers to keep the adder from getting too far ahead
+ util::stream::ChainConfig adder_in;
+ util::stream::ChainConfig adder_out;
+ // SRILM doesn't normally interpolate unigrams.
+ bool interpolate_unigrams;
+};
+
+/* Compute initial (uninterpolated) probabilities
+ * primary: the normal chain of n-grams. Incoming is context sorted adjusted
+ * counts. Outgoing has uninterpolated probabilities for use by Interpolate.
+ * second_in: a second copy of the primary input. Discard the output.
+ * gamma_out: Computed gamma values are output on these chains in suffix order.
+ * The values are bare floats and should be buffered for interpolation to
+ * use.
+ */
+void InitialProbabilities(
+ const InitialProbabilitiesConfig &config,
+ const std::vector<Discount> &discounts,
+ util::stream::Chains &primary,
+ util::stream::Chains &second_in,
+ util::stream::Chains &gamma_out,
+ const std::vector<uint64_t> &prune_thresholds,
+ bool prune_vocab,
+ const SpecialVocab &vocab);
+
+} // namespace builder
+} // namespace lm
+
+#endif // LM_BUILDER_INITIAL_PROBABILITIES_H
diff --git a/src/kenlm/lm/builder/interpolate.cc b/src/kenlm/lm/builder/interpolate.cc
new file mode 100644
index 0000000..a62ef43
--- /dev/null
+++ b/src/kenlm/lm/builder/interpolate.cc
@@ -0,0 +1,166 @@
+#include "lm/builder/interpolate.hh"
+
+#include "lm/builder/hash_gamma.hh"
+#include "lm/builder/payload.hh"
+#include "lm/common/compare.hh"
+#include "lm/common/joint_order.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/lm_exception.hh"
+#include "util/fixed_array.hh"
+#include "util/murmur_hash.hh"
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+namespace lm { namespace builder {
+namespace {
+
+/* Calculate q, the collapsed probability and backoff, as defined in
+ * @inproceedings{Heafield-rest,
+ * author = {Kenneth Heafield and Philipp Koehn and Alon Lavie},
+ * title = {Language Model Rest Costs and Space-Efficient Storage},
+ * year = {2012},
+ * month = {July},
+ * booktitle = {Proceedings of the Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
+ * address = {Jeju Island, Korea},
+ * pages = {1169--1178},
+ * url = {http://kheafield.com/professional/edinburgh/rest\_paper.pdf},
+ * }
+ * This is particularly convenient to calculate during interpolation because
+ * the needed backoff terms are already accessed at the same time.
+ */
+class OutputQ {
+ public:
+ explicit OutputQ(std::size_t order) : q_delta_(order) {}
+
+ void Gram(unsigned order_minus_1, float full_backoff, ProbBackoff &out) {
+ float &q_del = q_delta_[order_minus_1];
+ if (order_minus_1) {
+ // Divide by context's backoff (which comes in as out.backoff)
+ q_del = q_delta_[order_minus_1 - 1] / out.backoff * full_backoff;
+ } else {
+ q_del = full_backoff;
+ }
+ out.prob = log10f(out.prob * q_del);
+ // TODO: stop wastefully outputting this!
+ out.backoff = 0.0;
+ }
+
+ private:
+ // Product of backoffs in the numerator divided by backoffs in the
+ // denominator. Does not include
+ std::vector<float> q_delta_;
+};
+
+/* Default: output probability and backoff */
+class OutputProbBackoff {
+ public:
+ explicit OutputProbBackoff(std::size_t /*order*/) {}
+
+ void Gram(unsigned /*order_minus_1*/, float full_backoff, ProbBackoff &out) const {
+ // Correcting for numerical precision issues. Take that IRST.
+ out.prob = std::min(0.0f, log10f(out.prob));
+ out.backoff = log10f(full_backoff);
+ }
+};
+
+template <class Output> class Callback {
+ public:
+ Callback(float uniform_prob, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, const SpecialVocab &specials)
+ : backoffs_(backoffs.size()), probs_(backoffs.size() + 2),
+ prune_thresholds_(prune_thresholds),
+ prune_vocab_(prune_vocab),
+ output_(backoffs.size() + 1 /* order */),
+ specials_(specials) {
+ probs_[0] = uniform_prob;
+ for (std::size_t i = 0; i < backoffs.size(); ++i) {
+ backoffs_.push_back(backoffs[i]);
+ }
+ }
+
+ ~Callback() {
+ for (std::size_t i = 0; i < backoffs_.size(); ++i) {
+ if(prune_vocab_ || prune_thresholds_[i + 1] > 0)
+ while(backoffs_[i])
+ ++backoffs_[i];
+
+ if (backoffs_[i]) {
+ std::cerr << "Backoffs do not match for order " << (i + 1) << std::endl;
+ abort();
+ }
+ }
+ }
+
+ void Enter(unsigned order_minus_1, void *data) {
+ NGram<BuildingPayload> gram(data, order_minus_1 + 1);
+ BuildingPayload &pay = gram.Value();
+ pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
+ probs_[order_minus_1 + 1] = pay.complete.prob;
+
+ float out_backoff;
+ if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != specials_.UNK() && *(gram.end() - 1) != specials_.EOS() && backoffs_[order_minus_1]) {
+ if(prune_vocab_ || prune_thresholds_[order_minus_1 + 1] > 0) {
+ //Compute hash value for current context
+ uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex));
+
+ const HashGamma *hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
+ while(current_hash != hashed_backoff->hash_value && ++backoffs_[order_minus_1])
+ hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
+
+ if(current_hash == hashed_backoff->hash_value) {
+ out_backoff = hashed_backoff->gamma;
+ ++backoffs_[order_minus_1];
+ } else {
+ // Has been pruned away so it is not a context anymore
+ out_backoff = 1.0;
+ }
+ } else {
+ out_backoff = *static_cast<const float*>(backoffs_[order_minus_1].Get());
+ ++backoffs_[order_minus_1];
+ }
+ } else {
+ // Not a context.
+ out_backoff = 1.0;
+ }
+
+ output_.Gram(order_minus_1, out_backoff, pay.complete);
+ }
+
+ void Exit(unsigned, void *) const {}
+
+ private:
+ util::FixedArray<util::stream::Stream> backoffs_;
+
+ std::vector<float> probs_;
+ const std::vector<uint64_t>& prune_thresholds_;
+ bool prune_vocab_;
+
+ Output output_;
+ const SpecialVocab specials_;
+};
+} // namespace
+
+Interpolate::Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t>& prune_thresholds, bool prune_vocab, bool output_q, const SpecialVocab &specials)
+ : uniform_prob_(1.0 / static_cast<float>(vocab_size)), // Includes <unk> but excludes <s>.
+ backoffs_(backoffs),
+ prune_thresholds_(prune_thresholds),
+ prune_vocab_(prune_vocab),
+ output_q_(output_q),
+ specials_(specials) {}
+
+// perform order-wise interpolation
+void Interpolate::Run(const util::stream::ChainPositions &positions) {
+ assert(positions.size() == backoffs_.size() + 1);
+ if (output_q_) {
+ typedef Callback<OutputQ> C;
+ C callback(uniform_prob_, backoffs_, prune_thresholds_, prune_vocab_, specials_);
+ JointOrder<C, SuffixOrder>(positions, callback);
+ } else {
+ typedef Callback<OutputProbBackoff> C;
+ C callback(uniform_prob_, backoffs_, prune_thresholds_, prune_vocab_, specials_);
+ JointOrder<C, SuffixOrder>(positions, callback);
+ }
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/builder/interpolate.hh b/src/kenlm/lm/builder/interpolate.hh
new file mode 100644
index 0000000..d20cd54
--- /dev/null
+++ b/src/kenlm/lm/builder/interpolate.hh
@@ -0,0 +1,37 @@
+#ifndef LM_BUILDER_INTERPOLATE_H
+#define LM_BUILDER_INTERPOLATE_H
+
+#include "lm/common/special.hh"
+#include "lm/word_index.hh"
+#include "util/stream/multi_stream.hh"
+
+#include <vector>
+
+#include <stdint.h>
+
+namespace lm { namespace builder {
+
+/* Interpolate step.
+ * Input: suffix sorted n-grams with (p_uninterpolated, gamma) from
+ * InitialProbabilities.
+ * Output: suffix sorted n-grams with complete probability
+ */
+class Interpolate {
+ public:
+ // Normally vocab_size is the unigram count-1 (since p(<s>) = 0) but might
+ // be larger when the user specifies a consistent vocabulary size.
+ explicit Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, bool output_q, const SpecialVocab &specials);
+
+ void Run(const util::stream::ChainPositions &positions);
+
+ private:
+ float uniform_prob_;
+ util::stream::ChainPositions backoffs_;
+ const std::vector<uint64_t> prune_thresholds_;
+ bool prune_vocab_;
+ bool output_q_;
+ const SpecialVocab specials_;
+};
+
+}} // namespaces
+#endif // LM_BUILDER_INTERPOLATE_H
diff --git a/src/kenlm/lm/builder/lmplz_main.cc b/src/kenlm/lm/builder/lmplz_main.cc
new file mode 100644
index 0000000..cc3f381
--- /dev/null
+++ b/src/kenlm/lm/builder/lmplz_main.cc
@@ -0,0 +1,220 @@
+#include "lm/builder/output.hh"
+#include "lm/builder/pipeline.hh"
+#include "lm/common/size_option.hh"
+#include "lm/lm_exception.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+#include "util/usage.hh"
+
+#include <iostream>
+
+#include <boost/program_options.hpp>
+#include <boost/version.hpp>
+#include <vector>
+
+namespace {
+
+// Parse and validate pruning thresholds then return vector of threshold counts
+// for each n-grams order.
+std::vector<uint64_t> ParsePruning(const std::vector<std::string> ¶m, std::size_t order) {
+ // convert to vector of integers
+ std::vector<uint64_t> prune_thresholds;
+ prune_thresholds.reserve(order);
+ for (std::vector<std::string>::const_iterator it(param.begin()); it != param.end(); ++it) {
+ try {
+ prune_thresholds.push_back(boost::lexical_cast<uint64_t>(*it));
+ } catch(const boost::bad_lexical_cast &) {
+ UTIL_THROW(util::Exception, "Bad pruning threshold " << *it);
+ }
+ }
+
+ // Fill with zeros by default.
+ if (prune_thresholds.empty()) {
+ prune_thresholds.resize(order, 0);
+ return prune_thresholds;
+ }
+
+ // validate pruning threshold if specified
+ // throw if each n-gram order has not threshold specified
+ UTIL_THROW_IF(prune_thresholds.size() > order, util::Exception, "You specified pruning thresholds for orders 1 through " << prune_thresholds.size() << " but the model only has order " << order);
+ // threshold for unigram can only be 0 (no pruning)
+
+ // check if threshold are not in decreasing order
+ uint64_t lower_threshold = 0;
+ for (std::vector<uint64_t>::iterator it = prune_thresholds.begin(); it != prune_thresholds.end(); ++it) {
+ UTIL_THROW_IF(lower_threshold > *it, util::Exception, "Pruning thresholds should be in non-decreasing order. Otherwise substrings would be removed, which is bad for query-time data structures.");
+ lower_threshold = *it;
+ }
+
+ // Pad to all orders using the last value.
+ prune_thresholds.resize(order, prune_thresholds.back());
+ return prune_thresholds;
+}
+
+lm::builder::Discount ParseDiscountFallback(const std::vector<std::string> ¶m) {
+ lm::builder::Discount ret;
+ UTIL_THROW_IF(param.size() > 3, util::Exception, "Specify at most three fallback discounts: 1, 2, and 3+");
+ UTIL_THROW_IF(param.empty(), util::Exception, "Fallback discounting enabled, but no discount specified");
+ ret.amount[0] = 0.0;
+ for (unsigned i = 0; i < 3; ++i) {
+ float discount = boost::lexical_cast<float>(param[i < param.size() ? i : (param.size() - 1)]);
+ UTIL_THROW_IF(discount < 0.0 || discount > static_cast<float>(i+1), util::Exception, "The discount for count " << (i+1) << " was parsed as " << discount << " which is not in the range [0, " << (i+1) << "].");
+ ret.amount[i + 1] = discount;
+ }
+ return ret;
+}
+
+} // namespace
+
+int main(int argc, char *argv[]) {
+ try {
+ namespace po = boost::program_options;
+ po::options_description options("Language model building options");
+ lm::builder::PipelineConfig pipeline;
+
+ std::string text, intermediate, arpa;
+ std::vector<std::string> pruning;
+ std::vector<std::string> discount_fallback;
+ std::vector<std::string> discount_fallback_default;
+ discount_fallback_default.push_back("0.5");
+ discount_fallback_default.push_back("1");
+ discount_fallback_default.push_back("1.5");
+ bool verbose_header;
+
+ options.add_options()
+ ("help,h", po::bool_switch(), "Show this help message")
+ ("order,o", po::value<std::size_t>(&pipeline.order)
+#if BOOST_VERSION >= 104200
+ ->required()
+#endif
+ , "Order of the model")
+ ("interpolate_unigrams", po::value<bool>(&pipeline.initial_probs.interpolate_unigrams)->default_value(true)->implicit_value(true), "Interpolate the unigrams (default) as opposed to giving lots of mass to <unk> like SRI. If you want SRI's behavior with a large <unk> and the old lmplz default, use --interpolate_unigrams 0.")
+ ("skip_symbols", po::bool_switch(), "Treat <s>, </s>, and <unk> as whitespace instead of throwing an exception")
+ ("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
+ ("memory,S", lm:: SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
+ ("minimum_block", lm::SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
+ ("sort_block", lm::SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
+ ("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
+ ("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
+ ("vocab_pad", po::value<uint64_t>(&pipeline.vocab_size_for_unk)->default_value(0), "If the vocabulary is smaller than this value, pad with <unk> to reach this size. Requires --interpolate_unigrams")
+ ("verbose_header", po::bool_switch(&verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
+ ("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
+ ("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout")
+ ("intermediate", po::value<std::string>(&intermediate), "Write ngrams to intermediate files. Turns off ARPA output (which can be reactivated by --arpa file). Forces --renumber on.")
+ ("renumber", po::bool_switch(&pipeline.renumber_vocabulary), "Rrenumber the vocabulary identifiers so that they are monotone with the hash of each string. This is consistent with the ordering used by the trie data structure.")
+ ("collapse_values", po::bool_switch(&pipeline.output_q), "Collapse probability and backoff into a single value, q that yields the same sentence-level probabilities. See http://kheafield.com/professional/edinburgh/rest_paper.pdf for more details, including a proof.")
+ ("prune", po::value<std::vector<std::string> >(&pruning)->multitoken(), "Prune n-grams with count less than or equal to the given threshold. Specify one value for each order i.e. 0 0 1 to prune singleton trigrams and above. The sequence of values must be non-decreasing and the last value applies to any remaining orders. Default is to not prune, which is equivalent to --prune 0.")
+ ("limit_vocab_file", po::value<std::string>(&pipeline.prune_vocab_file)->default_value(""), "Read allowed vocabulary separated by whitespace. N-grams that contain vocabulary items not in this list will be pruned. Can be combined with --prune arg")
+ ("discount_fallback", po::value<std::vector<std::string> >(&discount_fallback)->multitoken()->implicit_value(discount_fallback_default, "0.5 1 1.5"), "The closed-form estimate for Kneser-Ney discounts does not work without singletons or doubletons. It can also fail if these values are out of range. This option falls back to user-specified discounts when the closed-form estimate fails. Note that this option is generally a bad idea: you should deduplicate your corpus instead. However, class-based models need custom discounts because they lack singleton unigrams. Provide up to three discounts (for adjusted counts 1, 2, and 3+), which will be applied to all orders where the closed-form estimates fail.");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+
+ if (argc == 1 || vm["help"].as<bool>()) {
+ std::cerr <<
+ "Builds unpruned language models with modified Kneser-Ney smoothing.\n\n"
+ "Please cite:\n"
+ "@inproceedings{Heafield-estimate,\n"
+ " author = {Kenneth Heafield and Ivan Pouzyrevsky and Jonathan H. Clark and Philipp Koehn},\n"
+ " title = {Scalable Modified {Kneser-Ney} Language Model Estimation},\n"
+ " year = {2013},\n"
+ " month = {8},\n"
+ " booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics},\n"
+ " address = {Sofia, Bulgaria},\n"
+ " url = {http://kheafield.com/professional/edinburgh/estimate\\_paper.pdf},\n"
+ "}\n\n"
+ "Provide the corpus on stdin. The ARPA file will be written to stdout. Order of\n"
+ "the model (-o) is the only mandatory option. As this is an on-disk program,\n"
+ "setting the temporary file location (-T) and sorting memory (-S) is recommended.\n\n"
+ "Memory sizes are specified like GNU sort: a number followed by a unit character.\n"
+ "Valid units are \% for percentage of memory (supported platforms only) and (in\n"
+ "increasing powers of 1024): b, K, M, G, T, P, E, Z, Y. Default is K (*1024).\n";
+ uint64_t mem = util::GuessPhysicalMemory();
+ if (mem) {
+ std::cerr << "This machine has " << mem << " bytes of memory.\n\n";
+ } else {
+ std::cerr << "Unable to determine the amount of memory on this machine.\n\n";
+ }
+ std::cerr << options << std::endl;
+ return 1;
+ }
+
+ po::notify(vm);
+
+ // required() appeared in Boost 1.42.0.
+#if BOOST_VERSION < 104200
+ if (!vm.count("order")) {
+ std::cerr << "the option '--order' is required but missing" << std::endl;
+ return 1;
+ }
+#endif
+
+ if (pipeline.vocab_size_for_unk && !pipeline.initial_probs.interpolate_unigrams) {
+ std::cerr << "--vocab_pad requires --interpolate_unigrams be on" << std::endl;
+ return 1;
+ }
+
+ if (vm["skip_symbols"].as<bool>()) {
+ pipeline.disallowed_symbol_action = lm::COMPLAIN;
+ } else {
+ pipeline.disallowed_symbol_action = lm::THROW_UP;
+ }
+
+ if (vm.count("discount_fallback")) {
+ pipeline.discount.fallback = ParseDiscountFallback(discount_fallback);
+ pipeline.discount.bad_action = lm::COMPLAIN;
+ } else {
+ // Unused, just here to prevent the compiler from complaining about uninitialized.
+ pipeline.discount.fallback = lm::builder::Discount();
+ pipeline.discount.bad_action = lm::THROW_UP;
+ }
+
+ // parse pruning thresholds. These depend on order, so it is not done as a notifier.
+ pipeline.prune_thresholds = ParsePruning(pruning, pipeline.order);
+
+ if (!vm["limit_vocab_file"].as<std::string>().empty()) {
+ pipeline.prune_vocab = true;
+ }
+ else {
+ pipeline.prune_vocab = false;
+ }
+
+ util::NormalizeTempPrefix(pipeline.sort.temp_prefix);
+
+ lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs;
+ // TODO: evaluate options for these.
+ initial.adder_in.total_memory = 32768;
+ initial.adder_in.block_count = 2;
+ initial.adder_out.total_memory = 32768;
+ initial.adder_out.block_count = 2;
+ pipeline.read_backoffs = initial.adder_out;
+
+ // Read from stdin, write to stdout by default
+ util::scoped_fd in(0), out(1);
+ if (vm.count("text")) {
+ in.reset(util::OpenReadOrThrow(text.c_str()));
+ }
+ if (vm.count("arpa")) {
+ out.reset(util::CreateOrThrow(arpa.c_str()));
+ }
+
+ try {
+ bool writing_intermediate = vm.count("intermediate");
+ if (writing_intermediate) {
+ pipeline.renumber_vocabulary = true;
+ }
+ lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate, pipeline.output_q);
+ if (!writing_intermediate || vm.count("arpa")) {
+ output.Add(new lm::builder::PrintHook(out.release(), verbose_header));
+ }
+ lm::builder::Pipeline(pipeline, in.release(), output);
+ } catch (const util::MallocException &e) {
+ std::cerr << e.what() << std::endl;
+ std::cerr << "Try rerunning with a more conservative -S setting than " << vm["memory"].as<std::string>() << std::endl;
+ return 1;
+ }
+ util::PrintUsage(std::cerr);
+ } catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+}
diff --git a/src/kenlm/lm/builder/output.cc b/src/kenlm/lm/builder/output.cc
new file mode 100644
index 0000000..604fa22
--- /dev/null
+++ b/src/kenlm/lm/builder/output.cc
@@ -0,0 +1,52 @@
+#include "lm/builder/output.hh"
+
+#include "lm/common/model_buffer.hh"
+#include "lm/common/print.hh"
+#include "util/file_stream.hh"
+#include "util/stream/multi_stream.hh"
+
+#include <iostream>
+
+namespace lm { namespace builder {
+
+OutputHook::~OutputHook() {}
+
+Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
+ : buffer_(file_base, keep_buffer, output_q) {}
+
+void Output::SinkProbs(util::stream::Chains &chains) {
+ Apply(PROB_PARALLEL_HOOK, chains);
+ if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
+ chains >> util::stream::kRecycle;
+ chains.Wait(true);
+ return;
+ }
+ buffer_.Sink(chains, header_.counts_pruned);
+ chains >> util::stream::kRecycle;
+ chains.Wait(false);
+ if (Have(PROB_SEQUENTIAL_HOOK)) {
+ std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
+ buffer_.Source(chains);
+ Apply(PROB_SEQUENTIAL_HOOK, chains);
+ chains >> util::stream::kRecycle;
+ chains.Wait(true);
+ }
+}
+
+void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
+ for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
+ entry->Sink(header_, VocabFile(), chains);
+ }
+}
+
+void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
+ if (verbose_header_) {
+ util::FileStream out(file_.get(), 50);
+ out << "# Input file: " << info.input_file << '\n';
+ out << "# Token count: " << info.token_count << '\n';
+ out << "# Smoothing: Modified Kneser-Ney" << '\n';
+ }
+ chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/builder/output.hh b/src/kenlm/lm/builder/output.hh
new file mode 100644
index 0000000..69d6c6d
--- /dev/null
+++ b/src/kenlm/lm/builder/output.hh
@@ -0,0 +1,85 @@
+#ifndef LM_BUILDER_OUTPUT_H
+#define LM_BUILDER_OUTPUT_H
+
+#include "lm/builder/header_info.hh"
+#include "lm/common/model_buffer.hh"
+#include "util/file.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/utility.hpp>
+
+namespace util { namespace stream { class Chains; class ChainPositions; } }
+
+/* Outputs from lmplz: ARPA, sharded files, etc */
+namespace lm { namespace builder {
+
+// These are different types of hooks. Values should be consecutive to enable a vector lookup.
+enum HookType {
+ // TODO: counts.
+ PROB_PARALLEL_HOOK, // Probability and backoff (or just q). Output must process the orders in parallel or there will be a deadlock.
+ PROB_SEQUENTIAL_HOOK, // Probability and backoff (or just q). Output can process orders any way it likes. This requires writing the data to disk then reading. Useful for ARPA files, which put unigrams first etc.
+ NUMBER_OF_HOOKS // Keep this last so we know how many values there are.
+};
+
+class OutputHook {
+ public:
+ explicit OutputHook(HookType hook_type) : type_(hook_type) {}
+
+ virtual ~OutputHook();
+
+ virtual void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) = 0;
+
+ HookType Type() const { return type_; }
+
+ private:
+ HookType type_;
+};
+
+class Output : boost::noncopyable {
+ public:
+ Output(StringPiece file_base, bool keep_buffer, bool output_q);
+
+ // Takes ownership.
+ void Add(OutputHook *hook) {
+ outputs_[hook->Type()].push_back(hook);
+ }
+
+ bool Have(HookType hook_type) const {
+ return !outputs_[hook_type].empty();
+ }
+
+ int VocabFile() const { return buffer_.VocabFile(); }
+
+ void SetHeader(const HeaderInfo &header) { header_ = header; }
+ const HeaderInfo &GetHeader() const { return header_; }
+
+ // This is called by the pipeline.
+ void SinkProbs(util::stream::Chains &chains);
+
+ unsigned int Steps() const { return Have(PROB_SEQUENTIAL_HOOK); }
+
+ private:
+ void Apply(HookType hook_type, util::stream::Chains &chains);
+
+ ModelBuffer buffer_;
+
+ boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
+ HeaderInfo header_;
+};
+
+class PrintHook : public OutputHook {
+ public:
+ // Takes ownership
+ PrintHook(int write_fd, bool verbose_header)
+ : OutputHook(PROB_SEQUENTIAL_HOOK), file_(write_fd), verbose_header_(verbose_header) {}
+
+ void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains);
+
+ private:
+ util::scoped_fd file_;
+ bool verbose_header_;
+};
+
+}} // namespaces
+
+#endif // LM_BUILDER_OUTPUT_H
diff --git a/src/kenlm/lm/builder/payload.hh b/src/kenlm/lm/builder/payload.hh
new file mode 100644
index 0000000..ba12725
--- /dev/null
+++ b/src/kenlm/lm/builder/payload.hh
@@ -0,0 +1,48 @@
+#ifndef LM_BUILDER_PAYLOAD_H
+#define LM_BUILDER_PAYLOAD_H
+
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include <stdint.h>
+
+namespace lm { namespace builder {
+
+struct Uninterpolated {
+ float prob; // Uninterpolated probability.
+ float gamma; // Interpolation weight for lower order.
+};
+
+union BuildingPayload {
+ uint64_t count;
+ Uninterpolated uninterp;
+ ProbBackoff complete;
+
+ /*mjd**********************************************************************/
+ bool IsMarked() const {
+ return count >> (sizeof(count) * 8 - 1);
+ }
+
+ void Mark() {
+ count |= (1ul << (sizeof(count) * 8 - 1));
+ }
+
+ void Unmark() {
+ count &= ~(1ul << (sizeof(count) * 8 - 1));
+ }
+
+ uint64_t UnmarkedCount() const {
+ return count & ~(1ul << (sizeof(count) * 8 - 1));
+ }
+
+ uint64_t CutoffCount() const {
+ return IsMarked() ? 0 : UnmarkedCount();
+ }
+ /*mjd**********************************************************************/
+};
+
+const WordIndex kBOS = 1;
+const WordIndex kEOS = 2;
+
+}} // namespaces
+
+#endif // LM_BUILDER_PAYLOAD_H
diff --git a/src/kenlm/lm/builder/pipeline.cc b/src/kenlm/lm/builder/pipeline.cc
new file mode 100644
index 0000000..64e30f7
--- /dev/null
+++ b/src/kenlm/lm/builder/pipeline.cc
@@ -0,0 +1,385 @@
+#include "lm/builder/pipeline.hh"
+
+#include "lm/builder/adjust_counts.hh"
+#include "lm/builder/combine_counts.hh"
+#include "lm/builder/corpus_count.hh"
+#include "lm/builder/hash_gamma.hh"
+#include "lm/builder/initial_probabilities.hh"
+#include "lm/builder/interpolate.hh"
+#include "lm/builder/output.hh"
+#include "lm/common/compare.hh"
+#include "lm/common/renumber.hh"
+
+#include "lm/sizes.hh"
+#include "lm/vocab.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/stream/io.hh"
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+namespace lm { namespace builder {
+
+using util::stream::Sorts;
+
+namespace {
+
+void PrintStatistics(const std::vector<uint64_t> &counts, const std::vector<uint64_t> &counts_pruned, const std::vector<Discount> &discounts) {
+ std::cerr << "Statistics:\n";
+ for (size_t i = 0; i < counts.size(); ++i) {
+ std::cerr << (i + 1) << ' ' << counts_pruned[i];
+ if(counts[i] != counts_pruned[i])
+ std::cerr << "/" << counts[i];
+
+ for (size_t d = 1; d <= 3; ++d)
+ std::cerr << " D" << d << (d == 3 ? "+=" : "=") << discounts[i].amount[d];
+ std::cerr << '\n';
+ }
+}
+
+class Master {
+ public:
+ explicit Master(PipelineConfig &config, unsigned output_steps)
+ : config_(config), chains_(config.order), unigrams_(util::MakeTemp(config_.TempPrefix())), steps_(output_steps + 4) {
+ config_.minimum_block = std::max(NGram<BuildingPayload>::TotalSize(config_.order), config_.minimum_block);
+ }
+
+ const PipelineConfig &Config() const { return config_; }
+
+ util::stream::Chains &MutableChains() { return chains_; }
+
+ template <class T> Master &operator>>(const T &worker) {
+ chains_ >> worker;
+ return *this;
+ }
+
+ // This takes the (partially) sorted ngrams and sets up for adjusted counts.
+ void InitForAdjust(util::stream::Sort<SuffixOrder, CombineCounts> &ngrams, WordIndex types, std::size_t subtract_for_numbering) {
+ const std::size_t each_order_min = config_.minimum_block * config_.block_count;
+ // We know how many unigrams there are. Don't allocate more than needed to them.
+ const std::size_t min_chains = (config_.order - 1) * each_order_min +
+ std::min(types * NGram<BuildingPayload>::TotalSize(1), each_order_min);
+ // Prevent overflow in subtracting.
+ const std::size_t total = std::max<std::size_t>(config_.TotalMemory(), min_chains + subtract_for_numbering + config_.minimum_block);
+ // Do merge sort with calculated laziness.
+ const std::size_t merge_using = ngrams.Merge(std::min(total - min_chains - subtract_for_numbering, ngrams.DefaultLazy()));
+
+ std::vector<uint64_t> count_bounds(1, types);
+ CreateChains(total - merge_using - subtract_for_numbering, count_bounds);
+ ngrams.Output(chains_.back(), merge_using);
+ }
+
+ // For initial probabilities, but this is generic.
+ void SortAndReadTwice(const std::vector<uint64_t> &counts, Sorts<ContextOrder> &sorts, util::stream::Chains &second, util::stream::ChainConfig second_config) {
+ bool unigrams_are_sorted = !config_.renumber_vocabulary;
+ // Do merge first before allocating chain memory.
+ for (std::size_t i = 0; i < config_.order - unigrams_are_sorted; ++i) {
+ sorts[i].Merge(0);
+ }
+ // There's no lazy merge, so just divide memory amongst the chains.
+ CreateChains(config_.TotalMemory(), counts);
+ chains_.back().ActivateProgress();
+ if (unigrams_are_sorted) {
+ chains_[0] >> unigrams_.Source();
+ second_config.entry_size = NGram<BuildingPayload>::TotalSize(1);
+ second.push_back(second_config);
+ second.back() >> unigrams_.Source();
+ }
+ for (std::size_t i = unigrams_are_sorted; i < config_.order; ++i) {
+ util::scoped_fd fd(sorts[i - unigrams_are_sorted].StealCompleted());
+ chains_[i].SetProgressTarget(util::SizeOrThrow(fd.get()));
+ chains_[i] >> util::stream::PRead(util::DupOrThrow(fd.get()), true);
+ second_config.entry_size = NGram<BuildingPayload>::TotalSize(i + 1);
+ second.push_back(second_config);
+ second.back() >> util::stream::PRead(fd.release(), true);
+ }
+ }
+
+ // There is no sort after this, so go for broke on lazy merging.
+ template <class Compare> void MaximumLazyInput(const std::vector<uint64_t> &counts, Sorts<Compare> &sorts) {
+ // Determine the minimum we can use for all the chains.
+ std::size_t min_chains = 0;
+ for (std::size_t i = 0; i < config_.order; ++i) {
+ min_chains += std::min(counts[i] * NGram<BuildingPayload>::TotalSize(i + 1), static_cast<uint64_t>(config_.minimum_block));
+ }
+ std::size_t for_merge = min_chains > config_.TotalMemory() ? 0 : (config_.TotalMemory() - min_chains);
+ std::vector<std::size_t> laziness;
+ // Prioritize longer n-grams.
+ for (util::stream::Sort<SuffixOrder> *i = sorts.end() - 1; i >= sorts.begin(); --i) {
+ laziness.push_back(i->Merge(for_merge));
+ assert(for_merge >= laziness.back());
+ for_merge -= laziness.back();
+ }
+ std::reverse(laziness.begin(), laziness.end());
+
+ CreateChains(for_merge + min_chains, counts);
+ chains_.back().ActivateProgress();
+ chains_[0] >> unigrams_.Source();
+ for (std::size_t i = 1; i < config_.order; ++i) {
+ sorts[i - 1].Output(chains_[i], laziness[i - 1]);
+ }
+ }
+
+ template <class Compare> void SetupSorts(Sorts<Compare> &sorts, bool exclude_unigrams) {
+ sorts.Init(config_.order - exclude_unigrams);
+ // Unigrams don't get sorted because their order is always the same.
+ if (exclude_unigrams) chains_[0] >> unigrams_.Sink();
+ for (std::size_t i = exclude_unigrams; i < config_.order; ++i) {
+ sorts.push_back(chains_[i], config_.sort, Compare(i + 1));
+ }
+ chains_.Wait(true);
+ }
+
+ unsigned int Steps() const { return steps_; }
+
+ private:
+ // Create chains, allocating memory to them. Totally heuristic. Count
+ // bounds are upper bounds on the counts or not present.
+ void CreateChains(std::size_t remaining_mem, const std::vector<uint64_t> &count_bounds) {
+ std::vector<std::size_t> assignments;
+ assignments.reserve(config_.order);
+ // Start by assigning maximum memory usage (to be refined later).
+ for (std::size_t i = 0; i < count_bounds.size(); ++i) {
+ assignments.push_back(static_cast<std::size_t>(std::min(
+ static_cast<uint64_t>(remaining_mem),
+ count_bounds[i] * static_cast<uint64_t>(NGram<BuildingPayload>::TotalSize(i + 1)))));
+ }
+ assignments.resize(config_.order, remaining_mem);
+
+ // Now we know how much memory everybody wants. How much will they get?
+ // Proportional to this.
+ std::vector<float> portions;
+ // Indices of orders that have yet to be assigned.
+ std::vector<std::size_t> unassigned;
+ for (std::size_t i = 0; i < config_.order; ++i) {
+ portions.push_back(static_cast<float>((i+1) * NGram<BuildingPayload>::TotalSize(i+1)));
+ unassigned.push_back(i);
+ }
+ /*If somebody doesn't eat their full dinner, give it to the rest of the
+ * family. Then somebody else might not eat their full dinner etc. Ends
+ * when everybody unassigned is hungry.
+ */
+ float sum;
+ bool found_more;
+ std::vector<std::size_t> block_count(config_.order);
+ do {
+ sum = 0.0;
+ for (std::size_t i = 0; i < unassigned.size(); ++i) {
+ sum += portions[unassigned[i]];
+ }
+ found_more = false;
+ // If the proportional assignment is more than needed, give it just what it needs.
+ for (std::vector<std::size_t>::iterator i = unassigned.begin(); i != unassigned.end();) {
+ if (assignments[*i] <= remaining_mem * (portions[*i] / sum)) {
+ remaining_mem -= assignments[*i];
+ block_count[*i] = 1;
+ i = unassigned.erase(i);
+ found_more = true;
+ } else {
+ ++i;
+ }
+ }
+ } while (found_more);
+ for (std::vector<std::size_t>::iterator i = unassigned.begin(); i != unassigned.end(); ++i) {
+ assignments[*i] = remaining_mem * (portions[*i] / sum);
+ block_count[*i] = config_.block_count;
+ }
+ chains_.clear();
+ std::cerr << "Chain sizes:";
+ for (std::size_t i = 0; i < config_.order; ++i) {
+ // Always have enough for at least one record.
+ // This was crashing if e.g. there was no 5-gram.
+ assignments[i] = std::max(assignments[i], block_count[i] * NGram<BuildingPayload>::TotalSize(i + 1));
+ std::cerr << ' ' << (i+1) << ":" << assignments[i];
+ chains_.push_back(util::stream::ChainConfig(NGram<BuildingPayload>::TotalSize(i + 1), block_count[i], assignments[i]));
+ }
+ std::cerr << std::endl;
+ }
+
+ PipelineConfig &config_;
+
+ util::stream::Chains chains_;
+
+ util::stream::FileBuffer unigrams_;
+
+ const unsigned int steps_;
+};
+
+util::stream::Sort<SuffixOrder, CombineCounts> *CountText(int text_file /* input */, int vocab_file /* output */, Master &master, uint64_t &token_count, WordIndex &type_count, std::string &text_file_name, std::vector<bool> &prune_words) {
+ const PipelineConfig &config = master.Config();
+ std::cerr << "=== 1/" << master.Steps() << " Counting and sorting n-grams ===" << std::endl;
+
+ const std::size_t vocab_usage = CorpusCount::VocabUsage(config.vocab_estimate);
+ UTIL_THROW_IF(config.TotalMemory() < vocab_usage, util::Exception, "Vocab hash size estimate " << vocab_usage << " exceeds total memory " << config.TotalMemory());
+ std::size_t memory_for_chain =
+ // This much memory to work with after vocab hash table.
+ static_cast<float>(config.TotalMemory() - vocab_usage) /
+ // Solve for block size including the dedupe multiplier for one block.
+ (static_cast<float>(config.block_count) + CorpusCount::DedupeMultiplier(config.order)) *
+ // Chain likes memory expressed in terms of total memory.
+ static_cast<float>(config.block_count);
+ util::stream::Chain chain(util::stream::ChainConfig(NGram<BuildingPayload>::TotalSize(config.order), config.block_count, memory_for_chain));
+
+ type_count = config.vocab_estimate;
+ util::FilePiece text(text_file, NULL, &std::cerr);
+ text_file_name = text.FileName();
+ CorpusCount counter(text, vocab_file, token_count, type_count, prune_words, config.prune_vocab_file, chain.BlockSize() / chain.EntrySize(), config.disallowed_symbol_action);
+ chain >> boost::ref(counter);
+
+ util::scoped_ptr<util::stream::Sort<SuffixOrder, CombineCounts> > sorter(new util::stream::Sort<SuffixOrder, CombineCounts>(chain, config.sort, SuffixOrder(config.order), CombineCounts()));
+ chain.Wait(true);
+ return sorter.release();
+}
+
+void InitialProbabilities(const std::vector<uint64_t> &counts, const std::vector<uint64_t> &counts_pruned, const std::vector<Discount> &discounts, Master &master, Sorts<SuffixOrder> &primary, util::FixedArray<util::stream::FileBuffer> &gammas, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, const SpecialVocab &specials) {
+ const PipelineConfig &config = master.Config();
+ util::stream::Chains second(config.order);
+
+ {
+ Sorts<ContextOrder> sorts;
+ master.SetupSorts(sorts, !config.renumber_vocabulary);
+ PrintStatistics(counts, counts_pruned, discounts);
+ lm::ngram::ShowSizes(counts_pruned);
+ std::cerr << "=== 3/" << master.Steps() << " Calculating and sorting initial probabilities ===" << std::endl;
+ master.SortAndReadTwice(counts_pruned, sorts, second, config.initial_probs.adder_in);
+ }
+
+ util::stream::Chains gamma_chains(config.order);
+ InitialProbabilities(config.initial_probs, discounts, master.MutableChains(), second, gamma_chains, prune_thresholds, prune_vocab, specials);
+ // Don't care about gamma for 0.
+ gamma_chains[0] >> util::stream::kRecycle;
+ gammas.Init(config.order - 1);
+ for (std::size_t i = 1; i < config.order; ++i) {
+ gammas.push_back(util::MakeTemp(config.TempPrefix()));
+ gamma_chains[i] >> gammas[i - 1].Sink();
+ }
+ // Has to be done here due to gamma_chains scope.
+ master.SetupSorts(primary, true);
+}
+
+void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &master, Sorts<SuffixOrder> &primary, util::FixedArray<util::stream::FileBuffer> &gammas, Output &output, const SpecialVocab &specials) {
+ std::cerr << "=== 4/" << master.Steps() << " Calculating and writing order-interpolated probabilities ===" << std::endl;
+ const PipelineConfig &config = master.Config();
+ master.MaximumLazyInput(counts, primary);
+
+ util::stream::Chains gamma_chains(config.order - 1);
+ for (std::size_t i = 0; i < config.order - 1; ++i) {
+ util::stream::ChainConfig read_backoffs(config.read_backoffs);
+
+ if(config.prune_vocab || config.prune_thresholds[i + 1] > 0)
+ read_backoffs.entry_size = sizeof(HashGamma);
+ else
+ read_backoffs.entry_size = sizeof(float);
+
+ gamma_chains.push_back(read_backoffs);
+ gamma_chains.back() >> gammas[i].Source(true);
+ }
+ master >> Interpolate(std::max(master.Config().vocab_size_for_unk, counts[0] - 1 /* <s> is not included */), util::stream::ChainPositions(gamma_chains), config.prune_thresholds, config.prune_vocab, config.output_q, specials);
+ gamma_chains >> util::stream::kRecycle;
+ output.SinkProbs(master.MutableChains());
+}
+
+class VocabNumbering {
+ public:
+ VocabNumbering(int final_vocab, StringPiece temp_prefix, bool renumber)
+ : final_vocab_(final_vocab),
+ renumber_(renumber),
+ specials_(kBOS, kEOS) {
+ if (renumber) {
+ temporary_.reset(util::MakeTemp(temp_prefix));
+ }
+ }
+
+ int WriteOnTheFly() const { return renumber_ ? temporary_.get() : final_vocab_; }
+
+ // Compute the vocabulary mapping and return the memory used.
+ std::size_t ComputeMapping(WordIndex type_count) {
+ if (!renumber_) return 0;
+ ngram::SortedVocabulary::ComputeRenumbering(type_count, temporary_.get(), final_vocab_, vocab_mapping_);
+ temporary_.reset();
+ return sizeof(WordIndex) * vocab_mapping_.size();
+ }
+
+ void ApplyRenumber(util::stream::Chains &chains) {
+ if (!renumber_) return;
+ for (std::size_t i = 0; i < chains.size(); ++i) {
+ chains[i] >> Renumber(&*vocab_mapping_.begin(), i + 1);
+ }
+ specials_ = SpecialVocab(vocab_mapping_[specials_.BOS()], vocab_mapping_[specials_.EOS()]);
+ }
+
+ const SpecialVocab &Specials() const { return specials_; }
+
+ private:
+ int final_vocab_;
+ // Out of order vocab file created on the fly.
+ util::scoped_fd temporary_;
+
+ bool renumber_;
+
+ std::vector<WordIndex> vocab_mapping_;
+
+ SpecialVocab specials_;
+};
+
+} // namespace
+
+void Pipeline(PipelineConfig &config, int text_file, Output &output) {
+ // Some fail-fast sanity checks.
+ if (config.sort.buffer_size * 4 > config.TotalMemory()) {
+ config.sort.buffer_size = config.TotalMemory() / 4;
+ std::cerr << "Warning: changing sort block size to " << config.sort.buffer_size << " bytes due to low total memory." << std::endl;
+ }
+ if (config.minimum_block < NGram<BuildingPayload>::TotalSize(config.order)) {
+ config.minimum_block = NGram<BuildingPayload>::TotalSize(config.order);
+ std::cerr << "Warning: raising minimum block to " << config.minimum_block << " to fit an ngram in every block." << std::endl;
+ }
+ UTIL_THROW_IF(config.sort.buffer_size < config.minimum_block, util::Exception, "Sort block size " << config.sort.buffer_size << " is below the minimum block size " << config.minimum_block << ".");
+ UTIL_THROW_IF(config.TotalMemory() < config.minimum_block * config.order * config.block_count, util::Exception,
+ "Not enough memory to fit " << (config.order * config.block_count) << " blocks with minimum size " << config.minimum_block << ". Increase memory to " << (config.minimum_block * config.order * config.block_count) << " bytes or decrease the minimum block size.");
+
+ Master master(config, output.Steps());
+ // master's destructor will wait for chains. But they might be deadlocked if
+ // this thread dies because e.g. it ran out of memory.
+ try {
+ VocabNumbering numbering(output.VocabFile(), config.TempPrefix(), config.renumber_vocabulary);
+ uint64_t token_count;
+ WordIndex type_count;
+ std::string text_file_name;
+ std::vector<bool> prune_words;
+ util::scoped_ptr<util::stream::Sort<SuffixOrder, CombineCounts> > sorted_counts(
+ CountText(text_file, numbering.WriteOnTheFly(), master, token_count, type_count, text_file_name, prune_words));
+ std::cerr << "Unigram tokens " << token_count << " types " << type_count << std::endl;
+
+ // Create vocab mapping, which uses temporary memory, while nothing else is happening.
+ std::size_t subtract_for_numbering = numbering.ComputeMapping(type_count);
+
+ std::cerr << "=== 2/" << master.Steps() << " Calculating and sorting adjusted counts ===" << std::endl;
+ master.InitForAdjust(*sorted_counts, type_count, subtract_for_numbering);
+ sorted_counts.reset();
+
+ std::vector<uint64_t> counts;
+ std::vector<uint64_t> counts_pruned;
+ std::vector<Discount> discounts;
+ master >> AdjustCounts(config.prune_thresholds, counts, counts_pruned, prune_words, config.discount, discounts);
+ numbering.ApplyRenumber(master.MutableChains());
+
+ {
+ util::FixedArray<util::stream::FileBuffer> gammas;
+ Sorts<SuffixOrder> primary;
+ InitialProbabilities(counts, counts_pruned, discounts, master, primary, gammas, config.prune_thresholds, config.prune_vocab, numbering.Specials());
+ output.SetHeader(HeaderInfo(text_file_name, token_count, counts_pruned));
+ // Also does output.
+ InterpolateProbabilities(counts_pruned, master, primary, gammas, output, numbering.Specials());
+ }
+ } catch (const util::Exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
+ }
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/builder/pipeline.hh b/src/kenlm/lm/builder/pipeline.hh
new file mode 100644
index 0000000..66f1fd9
--- /dev/null
+++ b/src/kenlm/lm/builder/pipeline.hh
@@ -0,0 +1,76 @@
+#ifndef LM_BUILDER_PIPELINE_H
+#define LM_BUILDER_PIPELINE_H
+
+#include "lm/builder/adjust_counts.hh"
+#include "lm/builder/initial_probabilities.hh"
+#include "lm/builder/header_info.hh"
+#include "lm/lm_exception.hh"
+#include "lm/word_index.hh"
+#include "util/stream/config.hh"
+#include "util/file_piece.hh"
+
+#include <string>
+#include <cstddef>
+
+namespace lm { namespace builder {
+
+class Output;
+
+struct PipelineConfig {
+ std::size_t order;
+ util::stream::SortConfig sort;
+ InitialProbabilitiesConfig initial_probs;
+ util::stream::ChainConfig read_backoffs;
+
+ // Estimated vocabulary size. Used for sizing CorpusCount memory and
+ // initial probing hash table sizing, also in CorpusCount.
+ lm::WordIndex vocab_estimate;
+
+ // Minimum block size to tolerate.
+ std::size_t minimum_block;
+
+ // Number of blocks to use. This will be overridden to 1 if everything fits.
+ std::size_t block_count;
+
+ // n-gram count thresholds for pruning. 0 values means no pruning for
+ // corresponding n-gram order
+ std::vector<uint64_t> prune_thresholds; //mjd
+ bool prune_vocab;
+ std::string prune_vocab_file;
+
+ /* Renumber the vocabulary the way the trie likes it? */
+ bool renumber_vocabulary;
+
+ // What to do with discount failures.
+ DiscountConfig discount;
+
+ // Compute collapsed q values instead of probability and backoff
+ bool output_q;
+
+ /* Computing the perplexity of LMs with different vocabularies is hard. For
+ * example, the lowest perplexity is attained by a unigram model that
+ * predicts p(<unk>) = 1 and has no other vocabulary. Also, linearly
+ * interpolated models will sum to more than 1 because <unk> is duplicated
+ * (SRI just pretends p(<unk>) = 0 for these purposes, which makes it sum to
+ * 1 but comes with its own problems). This option will make the vocabulary
+ * a particular size by replicating <unk> multiple times for purposes of
+ * computing vocabulary size. It has no effect if the actual vocabulary is
+ * larger. This parameter serves the same purpose as IRSTLM's "dub".
+ */
+ uint64_t vocab_size_for_unk;
+
+ /* What to do the first time <s>, </s>, or <unk> appears in the input. If
+ * this is anything but THROW_UP, then the symbol will always be treated as
+ * whitespace.
+ */
+ WarningAction disallowed_symbol_action;
+
+ const std::string &TempPrefix() const { return sort.temp_prefix; }
+ std::size_t TotalMemory() const { return sort.total_memory; }
+};
+
+// Takes ownership of text_file and out_arpa.
+void Pipeline(PipelineConfig &config, int text_file, Output &output);
+
+}} // namespaces
+#endif // LM_BUILDER_PIPELINE_H
diff --git a/src/kenlm/lm/common/CMakeLists.txt b/src/kenlm/lm/common/CMakeLists.txt
new file mode 100644
index 0000000..942e24b
--- /dev/null
+++ b/src/kenlm/lm/common/CMakeLists.txt
@@ -0,0 +1,40 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+#
+# In order to set correct paths to these files
+# in case this variable is referenced by CMake files in the parent directory,
+# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_COMMON_SOURCE
+ ${CMAKE_CURRENT_SOURCE_DIR}/model_buffer.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/print.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/renumber.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/size_option.cc
+ )
+
+
+# Group these objects together for later use.
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_common OBJECT ${KENLM_COMMON_SOURCE})
+
diff --git a/src/kenlm/lm/common/Jamfile b/src/kenlm/lm/common/Jamfile
new file mode 100644
index 0000000..c9bdfd0
--- /dev/null
+++ b/src/kenlm/lm/common/Jamfile
@@ -0,0 +1,2 @@
+fakelib common : [ glob *.cc : *test.cc *main.cc ]
+ ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm /top//boost_program_options ;
diff --git a/src/kenlm/lm/common/compare.hh b/src/kenlm/lm/common/compare.hh
new file mode 100644
index 0000000..1c7cd24
--- /dev/null
+++ b/src/kenlm/lm/common/compare.hh
@@ -0,0 +1,174 @@
+#ifndef LM_COMMON_COMPARE_H
+#define LM_COMMON_COMPARE_H
+
+#include "lm/word_index.hh"
+
+#include <functional>
+#include <string>
+
+namespace lm {
+
+/**
+ * Abstract parent class for defining custom n-gram comparators.
+ */
+template <class Child> class Comparator : public std::binary_function<const void *, const void *, bool> {
+ public:
+
+ /**
+ * Constructs a comparator capable of comparing two n-grams.
+ *
+ * @param order Number of words in each n-gram
+ */
+ explicit Comparator(std::size_t order) : order_(order) {}
+
+ /**
+ * Applies the comparator using the Compare method that must be defined in any class that inherits from this class.
+ *
+ * @param lhs A pointer to the n-gram on the left-hand side of the comparison
+ * @param rhs A pointer to the n-gram on the right-hand side of the comparison
+ *
+ * @see ContextOrder::Compare
+ * @see PrefixOrder::Compare
+ * @see SuffixOrder::Compare
+ */
+ inline bool operator()(const void *lhs, const void *rhs) const {
+ return static_cast<const Child*>(this)->Compare(static_cast<const WordIndex*>(lhs), static_cast<const WordIndex*>(rhs));
+ }
+
+ /** Gets the n-gram order defined for this comparator. */
+ std::size_t Order() const { return order_; }
+
+ protected:
+ std::size_t order_;
+};
+
+/**
+ * N-gram comparator that compares n-grams according to their reverse (suffix) order.
+ *
+ * This comparator compares n-grams lexicographically, one word at a time,
+ * beginning with the last word of each n-gram and ending with the first word of each n-gram.
+ *
+ * Some examples of n-gram comparisons as defined by this comparator:
+ * - a b c == a b c
+ * - a b c < a b d
+ * - a b c > a d b
+ * - a b c > a b b
+ * - a b c > x a c
+ * - a b c < x y z
+ */
+class SuffixOrder : public Comparator<SuffixOrder> {
+ public:
+
+ /**
+ * Constructs a comparator capable of comparing two n-grams.
+ *
+ * @param order Number of words in each n-gram
+ */
+ explicit SuffixOrder(std::size_t order) : Comparator<SuffixOrder>(order) {}
+
+ /**
+ * Compares two n-grams lexicographically, one word at a time,
+ * beginning with the last word of each n-gram and ending with the first word of each n-gram.
+ *
+ * @param lhs A pointer to the n-gram on the left-hand side of the comparison
+ * @param rhs A pointer to the n-gram on the right-hand side of the comparison
+ */
+ inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
+ for (std::size_t i = order_ - 1; i != 0; --i) {
+ if (lhs[i] != rhs[i])
+ return lhs[i] < rhs[i];
+ }
+ return lhs[0] < rhs[0];
+ }
+
+ static const unsigned kMatchOffset = 1;
+};
+
+
+/**
+ * N-gram comparator that compares n-grams according to the reverse (suffix) order of the n-gram context.
+ *
+ * This comparator compares n-grams lexicographically, one word at a time,
+ * beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
+ * finally, this comparator compares the last word of each n-gram.
+ *
+ * Some examples of n-gram comparisons as defined by this comparator:
+ * - a b c == a b c
+ * - a b c < a b d
+ * - a b c < a d b
+ * - a b c > a b b
+ * - a b c > x a c
+ * - a b c < x y z
+ */
+class ContextOrder : public Comparator<ContextOrder> {
+ public:
+
+ /**
+ * Constructs a comparator capable of comparing two n-grams.
+ *
+ * @param order Number of words in each n-gram
+ */
+ explicit ContextOrder(std::size_t order) : Comparator<ContextOrder>(order) {}
+
+ /**
+ * Compares two n-grams lexicographically, one word at a time,
+ * beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
+ * finally, this comparator compares the last word of each n-gram.
+ *
+ * @param lhs A pointer to the n-gram on the left-hand side of the comparison
+ * @param rhs A pointer to the n-gram on the right-hand side of the comparison
+ */
+ inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
+ for (int i = order_ - 2; i >= 0; --i) {
+ if (lhs[i] != rhs[i])
+ return lhs[i] < rhs[i];
+ }
+ return lhs[order_ - 1] < rhs[order_ - 1];
+ }
+};
+
+/**
+ * N-gram comparator that compares n-grams according to their natural (prefix) order.
+ *
+ * This comparator compares n-grams lexicographically, one word at a time,
+ * beginning with the first word of each n-gram and ending with the last word of each n-gram.
+ *
+ * Some examples of n-gram comparisons as defined by this comparator:
+ * - a b c == a b c
+ * - a b c < a b d
+ * - a b c < a d b
+ * - a b c > a b b
+ * - a b c < x a c
+ * - a b c < x y z
+ */
+class PrefixOrder : public Comparator<PrefixOrder> {
+ public:
+
+ /**
+ * Constructs a comparator capable of comparing two n-grams.
+ *
+ * @param order Number of words in each n-gram
+ */
+ explicit PrefixOrder(std::size_t order) : Comparator<PrefixOrder>(order) {}
+
+ /**
+ * Compares two n-grams lexicographically, one word at a time,
+ * beginning with the first word of each n-gram and ending with the last word of each n-gram.
+ *
+ * @param lhs A pointer to the n-gram on the left-hand side of the comparison
+ * @param rhs A pointer to the n-gram on the right-hand side of the comparison
+ */
+ inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
+ for (std::size_t i = 0; i < order_; ++i) {
+ if (lhs[i] != rhs[i])
+ return lhs[i] < rhs[i];
+ }
+ return false;
+ }
+
+ static const unsigned kMatchOffset = 0;
+};
+
+} // namespace lm
+
+#endif // LM_COMMON_COMPARE_H
diff --git a/src/kenlm/lm/common/joint_order.hh b/src/kenlm/lm/common/joint_order.hh
new file mode 100644
index 0000000..6113bb8
--- /dev/null
+++ b/src/kenlm/lm/common/joint_order.hh
@@ -0,0 +1,71 @@
+#ifndef LM_COMMON_JOINT_ORDER_H
+#define LM_COMMON_JOINT_ORDER_H
+
+#include "lm/common/ngram_stream.hh"
+#include "lm/lm_exception.hh"
+
+#ifdef DEBUG
+#include "util/fixed_array.hh"
+#include <iostream>
+#endif
+
+#include <cstring>
+
+namespace lm {
+
+template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
+ // Allow matching to reference streams[-1].
+ util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1);
+ // A bogus stream for [-1].
+ streams_with_dummy.push_back();
+ for (std::size_t i = 0; i < positions.size(); ++i) {
+ streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1));
+ }
+ ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1;
+
+ std::size_t order;
+ for (order = 0; order < positions.size() && streams[order]; ++order) {}
+ assert(order); // should always have <unk>.
+
+ // Debugging only: call comparison function to sanity check order.
+#ifdef DEBUG
+ util::FixedArray<Compare> less_compare(order);
+ for (unsigned i = 0; i < order; ++i)
+ less_compare.push_back(i + 1);
+#endif // DEBUG
+
+ std::size_t current = 0;
+ while (true) {
+ // Does the context match the lower one?
+ if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
+ callback.Enter(current, streams[current].Get());
+ // Transition to looking for extensions.
+ if (++current < order) continue;
+ }
+#ifdef DEBUG
+ // match_check[current - 1] matches current-grams
+ // The lower-order stream (which skips fewer current-grams) should always be <= the higher order-stream (which can skip current-grams).
+ else if (!less_compare[current - 1](streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset)) {
+ std::cerr << "Stream out of order detected" << std::endl;
+ abort();
+ }
+#endif // DEBUG
+ // No extension left.
+ while(true) {
+ assert(current > 0);
+ --current;
+ callback.Exit(current, streams[current].Get());
+
+ if (++streams[current]) break;
+
+ UTIL_THROW_IF(order != current + 1, FormatLoadException, "Detected n-gram without matching suffix");
+
+ order = current;
+ if (!order) return;
+ }
+ }
+}
+
+} // namespaces
+
+#endif // LM_COMMON_JOINT_ORDER_H
diff --git a/src/kenlm/lm/common/model_buffer.cc b/src/kenlm/lm/common/model_buffer.cc
new file mode 100644
index 0000000..ae9b08c
--- /dev/null
+++ b/src/kenlm/lm/common/model_buffer.cc
@@ -0,0 +1,91 @@
+#include "lm/common/model_buffer.hh"
+#include "util/exception.hh"
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+#include "util/stream/io.hh"
+#include "util/stream/multi_stream.hh"
+
+#include <boost/lexical_cast.hpp>
+
+namespace lm {
+
+namespace {
+const char kMetadataHeader[] = "KenLM intermediate binary file";
+} // namespace
+
+ModelBuffer::ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q)
+ : file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer), output_q_(output_q),
+ vocab_file_(keep_buffer ? util::CreateOrThrow((file_base_ + ".vocab").c_str()) : util::MakeTemp(file_base_)) {}
+
+ModelBuffer::ModelBuffer(StringPiece file_base)
+ : file_base_(file_base.data(), file_base.size()), keep_buffer_(false) {
+ const std::string full_name = file_base_ + ".kenlm_intermediate";
+ util::FilePiece in(full_name.c_str());
+ StringPiece token = in.ReadLine();
+ UTIL_THROW_IF2(token != kMetadataHeader, "File " << full_name << " begins with \"" << token << "\" not " << kMetadataHeader);
+
+ token = in.ReadDelimited();
+ UTIL_THROW_IF2(token != "Counts", "Expected Counts, got \"" << token << "\" in " << full_name);
+ char got;
+ while ((got = in.get()) == ' ') {
+ counts_.push_back(in.ReadULong());
+ }
+ UTIL_THROW_IF2(got != '\n', "Expected newline at end of counts.");
+
+ token = in.ReadDelimited();
+ UTIL_THROW_IF2(token != "Payload", "Expected Payload, got \"" << token << "\" in " << full_name);
+ token = in.ReadDelimited();
+ if (token == "q") {
+ output_q_ = true;
+ } else if (token == "pb") {
+ output_q_ = false;
+ } else {
+ UTIL_THROW(util::Exception, "Unknown payload " << token);
+ }
+
+ vocab_file_.reset(util::OpenReadOrThrow((file_base_ + ".vocab").c_str()));
+
+ files_.Init(counts_.size());
+ for (unsigned long i = 0; i < counts_.size(); ++i) {
+ files_.push_back(util::OpenReadOrThrow((file_base_ + '.' + boost::lexical_cast<std::string>(i + 1)).c_str()));
+ }
+}
+
+void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts) {
+ counts_ = counts;
+ // Open files.
+ files_.Init(chains.size());
+ for (std::size_t i = 0; i < chains.size(); ++i) {
+ if (keep_buffer_) {
+ files_.push_back(util::CreateOrThrow(
+ (file_base_ + '.' + boost::lexical_cast<std::string>(i + 1)).c_str()
+ ));
+ } else {
+ files_.push_back(util::MakeTemp(file_base_));
+ }
+ chains[i] >> util::stream::Write(files_.back().get());
+ }
+ if (keep_buffer_) {
+ util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str()));
+ util::FileStream meta(metadata.get(), 200);
+ meta << kMetadataHeader << "\nCounts";
+ for (std::vector<uint64_t>::const_iterator i = counts_.begin(); i != counts_.end(); ++i) {
+ meta << ' ' << *i;
+ }
+ meta << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
+ }
+}
+
+void ModelBuffer::Source(util::stream::Chains &chains) {
+ assert(chains.size() <= files_.size());
+ for (unsigned int i = 0; i < chains.size(); ++i) {
+ chains[i] >> util::stream::PRead(files_[i].get());
+ }
+}
+
+void ModelBuffer::Source(std::size_t order_minus_1, util::stream::Chain &chain) {
+ chain >> util::stream::PRead(files_[order_minus_1].get());
+}
+
+} // namespace
diff --git a/src/kenlm/lm/common/model_buffer.hh b/src/kenlm/lm/common/model_buffer.hh
new file mode 100644
index 0000000..92662bb
--- /dev/null
+++ b/src/kenlm/lm/common/model_buffer.hh
@@ -0,0 +1,63 @@
+#ifndef LM_COMMON_MODEL_BUFFER_H
+#define LM_COMMON_MODEL_BUFFER_H
+
+/* Format with separate files in suffix order. Each file contains
+ * n-grams of the same order.
+ */
+
+#include "util/file.hh"
+#include "util/fixed_array.hh"
+
+#include <string>
+#include <vector>
+
+namespace util { namespace stream {
+class Chains;
+class Chain;
+}} // namespaces
+
+namespace lm {
+
+class ModelBuffer {
+ public:
+ // Construct for writing. Must call VocabFile() and fill it with null-delimited vocab words.
+ ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q);
+
+ // Load from file.
+ explicit ModelBuffer(StringPiece file_base);
+
+ // Must call VocabFile and populate before calling this function.
+ void Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts);
+
+ // Read files and write to the given chains. If fewer chains are provided,
+ // only do the lower orders.
+ void Source(util::stream::Chains &chains);
+
+ void Source(std::size_t order_minus_1, util::stream::Chain &chain);
+
+ // The order of the n-gram model that is associated with the model buffer.
+ std::size_t Order() const { return counts_.size(); }
+ // Requires Sink or load from file.
+ const std::vector<uint64_t> &Counts() const {
+ assert(!counts_.empty());
+ return counts_;
+ }
+
+ int VocabFile() const { return vocab_file_.get(); }
+ int StealVocabFile() { return vocab_file_.release(); }
+
+ bool Keep() const { return keep_buffer_; }
+
+ private:
+ const std::string file_base_;
+ const bool keep_buffer_;
+ bool output_q_;
+ std::vector<uint64_t> counts_;
+
+ util::scoped_fd vocab_file_;
+ util::FixedArray<util::scoped_fd> files_;
+};
+
+} // namespace lm
+
+#endif // LM_COMMON_MODEL_BUFFER_H
diff --git a/src/kenlm/lm/common/ngram.hh b/src/kenlm/lm/common/ngram.hh
new file mode 100644
index 0000000..7a6d1c3
--- /dev/null
+++ b/src/kenlm/lm/common/ngram.hh
@@ -0,0 +1,77 @@
+#ifndef LM_COMMON_NGRAM_H
+#define LM_COMMON_NGRAM_H
+
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+
+#include <cstddef>
+#include <cassert>
+#include <stdint.h>
+#include <cstring>
+
+namespace lm {
+
+class NGramHeader {
+ public:
+ NGramHeader(void *begin, std::size_t order)
+ : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}
+
+ NGramHeader() : begin_(NULL), end_(NULL) {}
+
+ const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
+ uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }
+
+ void ReBase(void *to) {
+ std::size_t difference = end_ - begin_;
+ begin_ = reinterpret_cast<WordIndex*>(to);
+ end_ = begin_ + difference;
+ }
+
+ // These are for the vocab index.
+ // Lower-case in deference to STL.
+ const WordIndex *begin() const { return begin_; }
+ WordIndex *begin() { return begin_; }
+ const WordIndex *end() const { return end_; }
+ WordIndex *end() { return end_; }
+
+ std::size_t size() const { return end_ - begin_; }
+ std::size_t Order() const { return end_ - begin_; }
+
+ private:
+ WordIndex *begin_, *end_;
+};
+
+template <class PayloadT> class NGram : public NGramHeader {
+ public:
+ typedef PayloadT Payload;
+
+ NGram() : NGramHeader(NULL, 0) {}
+
+ NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {}
+
+ // Would do operator++ but that can get confusing for a stream.
+ void NextInMemory() {
+ ReBase(&Value() + 1);
+ }
+
+ static std::size_t TotalSize(std::size_t order) {
+ return order * sizeof(WordIndex) + sizeof(Payload);
+ }
+ std::size_t TotalSize() const {
+ // Compiler should optimize this.
+ return TotalSize(Order());
+ }
+
+ static std::size_t OrderFromSize(std::size_t size) {
+ std::size_t ret = (size - sizeof(Payload)) / sizeof(WordIndex);
+ assert(size == TotalSize(ret));
+ return ret;
+ }
+
+ const Payload &Value() const { return *reinterpret_cast<const Payload *>(end()); }
+ Payload &Value() { return *reinterpret_cast<Payload *>(end()); }
+};
+
+} // namespace lm
+
+#endif // LM_COMMON_NGRAM_H
diff --git a/src/kenlm/lm/common/ngram_stream.hh b/src/kenlm/lm/common/ngram_stream.hh
new file mode 100644
index 0000000..8bdf36e
--- /dev/null
+++ b/src/kenlm/lm/common/ngram_stream.hh
@@ -0,0 +1,65 @@
+#ifndef LM_BUILDER_NGRAM_STREAM_H
+#define LM_BUILDER_NGRAM_STREAM_H
+
+#include "lm/common/ngram.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/multi_stream.hh"
+#include "util/stream/stream.hh"
+
+#include <cstddef>
+
+namespace lm {
+
+template <class Proxy> class ProxyStream {
+ public:
+ // Make an invalid stream.
+ ProxyStream() {}
+
+ explicit ProxyStream(const util::stream::ChainPosition &position, const Proxy &proxy = Proxy())
+ : proxy_(proxy), stream_(position) {
+ proxy_.ReBase(stream_.Get());
+ }
+
+ Proxy &operator*() { return proxy_; }
+ const Proxy &operator*() const { return proxy_; }
+
+ Proxy *operator->() { return &proxy_; }
+ const Proxy *operator->() const { return &proxy_; }
+
+ void *Get() { return stream_.Get(); }
+ const void *Get() const { return stream_.Get(); }
+
+ operator bool() const { return stream_; }
+ bool operator!() const { return !stream_; }
+ void Poison() { stream_.Poison(); }
+
+ ProxyStream<Proxy> &operator++() {
+ ++stream_;
+ proxy_.ReBase(stream_.Get());
+ return *this;
+ }
+
+ private:
+ Proxy proxy_;
+ util::stream::Stream stream_;
+};
+
+template <class Payload> class NGramStream : public ProxyStream<NGram<Payload> > {
+ public:
+ // Make an invalid stream.
+ NGramStream() {}
+
+ explicit NGramStream(const util::stream::ChainPosition &position) :
+ ProxyStream<NGram<Payload> >(position, NGram<Payload>(NULL, NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()))) {}
+};
+
+template <class Payload> class NGramStreams : public util::stream::GenericStreams<NGramStream<Payload> > {
+ private:
+ typedef util::stream::GenericStreams<NGramStream<Payload> > P;
+ public:
+ NGramStreams() : P() {}
+ NGramStreams(const util::stream::ChainPositions &positions) : P(positions) {}
+};
+
+} // namespace
+#endif // LM_BUILDER_NGRAM_STREAM_H
diff --git a/src/kenlm/lm/common/print.cc b/src/kenlm/lm/common/print.cc
new file mode 100644
index 0000000..518b62f
--- /dev/null
+++ b/src/kenlm/lm/common/print.cc
@@ -0,0 +1,62 @@
+#include "lm/common/print.hh"
+
+#include "lm/common/ngram_stream.hh"
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/mmap.hh"
+#include "util/scoped.hh"
+
+#include <sstream>
+#include <cstring>
+
+namespace lm {
+
+VocabReconstitute::VocabReconstitute(int fd) {
+ uint64_t size = util::SizeOrThrow(fd);
+ util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
+ const char *const start = static_cast<const char*>(memory_.get());
+ const char *i;
+ for (i = start; i != start + size; i += strlen(i) + 1) {
+ map_.push_back(i);
+ }
+ // Last one for LookupPiece.
+ map_.push_back(i);
+}
+
+namespace {
+template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FileStream &out) {
+ out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin());
+ for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
+ out << ' ' << vocab.Lookup(*i);
+ }
+}
+} // namespace
+
+void PrintARPA::Run(const util::stream::ChainPositions &positions) {
+ VocabReconstitute vocab(vocab_fd_);
+ util::FileStream out(out_fd_);
+ out << "\\data\\\n";
+ for (size_t i = 0; i < positions.size(); ++i) {
+ out << "ngram " << (i+1) << '=' << counts_[i] << '\n';
+ }
+ out << '\n';
+
+ for (unsigned order = 1; order < positions.size(); ++order) {
+ out << "\\" << order << "-grams:" << '\n';
+ for (ProxyStream<NGram<ProbBackoff> > stream(positions[order - 1], NGram<ProbBackoff>(NULL, order)); stream; ++stream) {
+ PrintLead(vocab, stream, out);
+ out << '\t' << stream->Value().backoff << '\n';
+ }
+ out << '\n';
+ }
+
+ out << "\\" << positions.size() << "-grams:" << '\n';
+ for (ProxyStream<NGram<Prob> > stream(positions.back(), NGram<Prob>(NULL, positions.size())); stream; ++stream) {
+ PrintLead(vocab, stream, out);
+ out << '\n';
+ }
+ out << '\n';
+ out << "\\end\\\n";
+}
+
+} // namespace lm
diff --git a/src/kenlm/lm/common/print.hh b/src/kenlm/lm/common/print.hh
new file mode 100644
index 0000000..6aa08b3
--- /dev/null
+++ b/src/kenlm/lm/common/print.hh
@@ -0,0 +1,58 @@
+#ifndef LM_COMMON_PRINT_H
+#define LM_COMMON_PRINT_H
+
+#include "lm/word_index.hh"
+#include "util/mmap.hh"
+#include "util/string_piece.hh"
+
+#include <cassert>
+#include <vector>
+
+namespace util { namespace stream { class ChainPositions; }}
+
+// Warning: PrintARPA routines read all unigrams before all bigrams before all
+// trigrams etc. So if other parts of the chain move jointly, you'll have to
+// buffer.
+
+namespace lm {
+
+class VocabReconstitute {
+ public:
+ // fd must be alive for life of this object; does not take ownership.
+ explicit VocabReconstitute(int fd);
+
+ const char *Lookup(WordIndex index) const {
+ assert(index < map_.size() - 1);
+ return map_[index];
+ }
+
+ StringPiece LookupPiece(WordIndex index) const {
+ return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
+ }
+
+ std::size_t Size() const {
+ // There's an extra entry to support StringPiece lengths.
+ return map_.size() - 1;
+ }
+
+ private:
+ util::scoped_memory memory_;
+ std::vector<const char*> map_;
+};
+
+class PrintARPA {
+ public:
+ // Does not take ownership of vocab_fd or out_fd.
+ explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts)
+ : vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {}
+
+ void Run(const util::stream::ChainPositions &positions);
+
+ private:
+ int vocab_fd_;
+ int out_fd_;
+ std::vector<uint64_t> counts_;
+};
+
+} // namespace lm
+#endif // LM_COMMON_PRINT_H
diff --git a/src/kenlm/lm/common/renumber.cc b/src/kenlm/lm/common/renumber.cc
new file mode 100644
index 0000000..0632a14
--- /dev/null
+++ b/src/kenlm/lm/common/renumber.cc
@@ -0,0 +1,17 @@
+#include "lm/common/renumber.hh"
+#include "lm/common/ngram.hh"
+
+#include "util/stream/stream.hh"
+
+namespace lm {
+
+void Renumber::Run(const util::stream::ChainPosition &position) {
+ for (util::stream::Stream stream(position); stream; ++stream) {
+ NGramHeader gram(stream.Get(), order_);
+ for (WordIndex *w = gram.begin(); w != gram.end(); ++w) {
+ *w = new_numbers_[*w];
+ }
+ }
+}
+
+} // namespace lm
diff --git a/src/kenlm/lm/common/renumber.hh b/src/kenlm/lm/common/renumber.hh
new file mode 100644
index 0000000..ca25c4d
--- /dev/null
+++ b/src/kenlm/lm/common/renumber.hh
@@ -0,0 +1,30 @@
+/* Map vocab ids. This is useful to merge independently collected counts or
+ * change the vocab ids to the order used by the trie.
+ */
+#ifndef LM_COMMON_RENUMBER_H
+#define LM_COMMON_RENUMBER_H
+
+#include "lm/word_index.hh"
+
+#include <cstddef>
+
+namespace util { namespace stream { class ChainPosition; }}
+
+namespace lm {
+
+class Renumber {
+ public:
+ // Assumes the array is large enough to map all words and stays alive while
+ // the thread is active.
+ Renumber(const WordIndex *new_numbers, std::size_t order)
+ : new_numbers_(new_numbers), order_(order) {}
+
+ void Run(const util::stream::ChainPosition &position);
+
+ private:
+ const WordIndex *new_numbers_;
+ std::size_t order_;
+};
+
+} // namespace lm
+#endif // LM_COMMON_RENUMBER_H
diff --git a/src/kenlm/lm/common/size_option.cc b/src/kenlm/lm/common/size_option.cc
new file mode 100644
index 0000000..46a920e
--- /dev/null
+++ b/src/kenlm/lm/common/size_option.cc
@@ -0,0 +1,24 @@
+#include <boost/program_options.hpp>
+#include "util/usage.hh"
+
+namespace lm {
+
+namespace {
+class SizeNotify {
+ public:
+ explicit SizeNotify(std::size_t &out) : behind_(out) {}
+
+ void operator()(const std::string &from) {
+ behind_ = util::ParseSize(from);
+ }
+
+ private:
+ std::size_t &behind_;
+};
+}
+
+boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
+ return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
+}
+
+} // namespace lm
diff --git a/src/kenlm/lm/common/size_option.hh b/src/kenlm/lm/common/size_option.hh
new file mode 100644
index 0000000..d3b8e33
--- /dev/null
+++ b/src/kenlm/lm/common/size_option.hh
@@ -0,0 +1,11 @@
+#include <boost/program_options.hpp>
+
+#include <cstddef>
+#include <string>
+
+namespace lm {
+
+// Create a boost program option for data sizes. This parses sizes like 1T and 10k.
+boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value);
+
+} // namespace lm
diff --git a/src/kenlm/lm/common/special.hh b/src/kenlm/lm/common/special.hh
new file mode 100644
index 0000000..0677cd7
--- /dev/null
+++ b/src/kenlm/lm/common/special.hh
@@ -0,0 +1,27 @@
+#ifndef LM_COMMON_SPECIAL_H
+#define LM_COMMON_SPECIAL_H
+
+#include "lm/word_index.hh"
+
+namespace lm {
+
+class SpecialVocab {
+ public:
+ SpecialVocab(WordIndex bos, WordIndex eos) : bos_(bos), eos_(eos) {}
+
+ bool IsSpecial(WordIndex word) const {
+ return word == kUNK || word == bos_ || word == eos_;
+ }
+
+ WordIndex UNK() const { return kUNK; }
+ WordIndex BOS() const { return bos_; }
+ WordIndex EOS() const { return eos_; }
+
+ private:
+ WordIndex bos_;
+ WordIndex eos_;
+};
+
+} // namespace lm
+
+#endif // LM_COMMON_SPECIAL_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/config.cc b/src/kenlm/lm/config.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/config.cc
rename to src/kenlm/lm/config.cc
diff --git a/src/kenlm/lm/config.hh b/src/kenlm/lm/config.hh
new file mode 100644
index 0000000..21b9e7e
--- /dev/null
+++ b/src/kenlm/lm/config.hh
@@ -0,0 +1,124 @@
+#ifndef LM_CONFIG_H
+#define LM_CONFIG_H
+
+#include "lm/lm_exception.hh"
+#include "util/mmap.hh"
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+/* Configuration for ngram model. Separate header to reduce pollution. */
+
+namespace lm {
+
+class EnumerateVocab;
+
+namespace ngram {
+
+struct Config {
+ // EFFECTIVE FOR BOTH ARPA AND BINARY READS
+
+ // (default true) print progress bar to messages
+ bool show_progress;
+
+ // Where to log messages including the progress bar. Set to NULL for
+ // silence.
+ std::ostream *messages;
+
+ std::ostream *ProgressMessages() const {
+ return show_progress ? messages : 0;
+ }
+
+ // This will be called with every string in the vocabulary by the
+ // constructor; it need only exist for the lifetime of the constructor.
+ // See enumerate_vocab.hh for more detail. Config does not take ownership;
+ // just delete/let it go out of scope after the constructor exits.
+ EnumerateVocab *enumerate_vocab;
+
+
+ // ONLY EFFECTIVE WHEN READING ARPA
+
+ // What to do when <unk> isn't in the provided model.
+ WarningAction unknown_missing;
+ // What to do when <s> or </s> is missing from the model.
+ // If THROW_UP, the exception will be of type util::SpecialWordMissingException.
+ WarningAction sentence_marker_missing;
+
+ // What to do with a positive log probability. For COMPLAIN and SILENT, map
+ // to 0.
+ WarningAction positive_log_probability;
+
+ // The probability to substitute for <unk> if it's missing from the model.
+ // No effect if the model has <unk> or unknown_missing == THROW_UP.
+ float unknown_missing_logprob;
+
+ // Size multiplier for probing hash table. Must be > 1. Space is linear in
+ // this. Time is probing_multiplier / (probing_multiplier - 1). No effect
+ // for sorted variant.
+ // If you find yourself setting this to a low number, consider using the
+ // TrieModel which has lower memory consumption.
+ float probing_multiplier;
+
+ // Amount of memory to use for building. The actual memory usage will be
+ // higher since this just sets sort buffer size. Only applies to trie
+ // models.
+ std::size_t building_memory;
+
+ // Template for temporary directory appropriate for passing to mkdtemp.
+ // The characters XXXXXX are appended before passing to mkdtemp. Only
+ // applies to trie. If empty, defaults to write_mmap. If that's NULL,
+ // defaults to input file name.
+ std::string temporary_directory_prefix;
+
+ // Level of complaining to do when loading from ARPA instead of binary format.
+ enum ARPALoadComplain {ALL, EXPENSIVE, NONE};
+ ARPALoadComplain arpa_complain;
+
+ // While loading an ARPA file, also write out this binary format file. Set
+ // to NULL to disable.
+ const char *write_mmap;
+
+ enum WriteMethod {
+ WRITE_MMAP, // Map the file directly.
+ WRITE_AFTER // Write after we're done.
+ };
+ WriteMethod write_method;
+
+ // Include the vocab in the binary file? Only effective if write_mmap != NULL.
+ bool include_vocab;
+
+
+ // Left rest options. Only used when the model includes rest costs.
+ enum RestFunction {
+ REST_MAX, // Maximum of any score to the left
+ REST_LOWER, // Use lower-order files given below.
+ };
+ RestFunction rest_function;
+ // Only used for REST_LOWER.
+ std::vector<std::string> rest_lower_files;
+
+
+ // Quantization options. Only effective for QuantTrieModel. One value is
+ // reserved for each of prob and backoff, so 2^bits - 1 buckets will be used
+ // to quantize (and one of the remaining backoffs will be 0).
+ uint8_t prob_bits, backoff_bits;
+
+ // Bhiksha compression (simple form). Only works with trie.
+ uint8_t pointer_bhiksha_bits;
+
+
+ // ONLY EFFECTIVE WHEN READING BINARY
+
+ // How to get the giant array into memory: lazy mmap, populate, read etc.
+ // See util/mmap.hh for details of MapMethod.
+ util::LoadMethod load_method;
+
+
+ // Set defaults.
+ Config();
+};
+
+} /* namespace ngram */ } /* namespace lm */
+
+#endif // LM_CONFIG_H
diff --git a/src/kenlm/lm/enumerate_vocab.hh b/src/kenlm/lm/enumerate_vocab.hh
new file mode 100644
index 0000000..f4c94cd
--- /dev/null
+++ b/src/kenlm/lm/enumerate_vocab.hh
@@ -0,0 +1,28 @@
+#ifndef LM_ENUMERATE_VOCAB_H
+#define LM_ENUMERATE_VOCAB_H
+
+#include "lm/word_index.hh"
+#include "util/string_piece.hh"
+
+namespace lm {
+
+/* If you need the actual strings in the vocabulary, inherit from this class
+ * and implement Add. Then put a pointer in Config.enumerate_vocab; it does
+ * not take ownership. Add is called once per vocab word. index starts at 0
+ * and increases by 1 each time. This is only used by the Model constructor;
+ * the pointer is not retained by the class.
+ */
+class EnumerateVocab {
+ public:
+ virtual ~EnumerateVocab() {}
+
+ virtual void Add(WordIndex index, const StringPiece &str) = 0;
+
+ protected:
+ EnumerateVocab() {}
+};
+
+} // namespace lm
+
+#endif // LM_ENUMERATE_VOCAB_H
+
diff --git a/src/kenlm/lm/facade.hh b/src/kenlm/lm/facade.hh
new file mode 100644
index 0000000..325ef15
--- /dev/null
+++ b/src/kenlm/lm/facade.hh
@@ -0,0 +1,73 @@
+#ifndef LM_FACADE_H
+#define LM_FACADE_H
+
+#include "lm/virtual_interface.hh"
+#include "util/string_piece.hh"
+
+#include <string>
+
+namespace lm {
+namespace base {
+
+// Common model interface that depends on knowing the specific classes.
+// Curiously recurring template pattern.
+template <class Child, class StateT, class VocabularyT> class ModelFacade : public Model {
+ public:
+ typedef StateT State;
+ typedef VocabularyT Vocabulary;
+
+ /* Translate from void* to State */
+ FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const {
+ return static_cast<const Child*>(this)->FullScore(
+ *reinterpret_cast<const State*>(in_state),
+ new_word,
+ *reinterpret_cast<State*>(out_state));
+ }
+
+ FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const {
+ return static_cast<const Child*>(this)->FullScoreForgotState(
+ context_rbegin,
+ context_rend,
+ new_word,
+ *reinterpret_cast<State*>(out_state));
+ }
+
+ // Default Score function calls FullScore. Model can override this.
+ float Score(const State &in_state, const WordIndex new_word, State &out_state) const {
+ return static_cast<const Child*>(this)->FullScore(in_state, new_word, out_state).prob;
+ }
+
+ float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const {
+ return static_cast<const Child*>(this)->Score(
+ *reinterpret_cast<const State*>(in_state),
+ new_word,
+ *reinterpret_cast<State*>(out_state));
+ }
+
+ const State &BeginSentenceState() const { return begin_sentence_; }
+ const State &NullContextState() const { return null_context_; }
+ const Vocabulary &GetVocabulary() const { return *static_cast<const Vocabulary*>(&BaseVocabulary()); }
+
+ protected:
+ ModelFacade() : Model(sizeof(State)) {}
+
+ virtual ~ModelFacade() {}
+
+ // begin_sentence and null_context can disappear after. vocab should stay.
+ void Init(const State &begin_sentence, const State &null_context, const Vocabulary &vocab, unsigned char order) {
+ begin_sentence_ = begin_sentence;
+ null_context_ = null_context;
+ begin_sentence_memory_ = &begin_sentence_;
+ null_context_memory_ = &null_context_;
+ base_vocab_ = &vocab;
+ order_ = order;
+ }
+
+ private:
+ State begin_sentence_, null_context_;
+};
+
+} // mamespace base
+} // namespace lm
+
+#endif // LM_FACADE_H
diff --git a/src/kenlm/lm/filter/CMakeLists.txt b/src/kenlm/lm/filter/CMakeLists.txt
new file mode 100644
index 0000000..d4616cc
--- /dev/null
+++ b/src/kenlm/lm/filter/CMakeLists.txt
@@ -0,0 +1,62 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+#
+# In order to set correct paths to these files
+# in case this variable is referenced by CMake files in the parent directory,
+# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_FILTER_SOURCE
+ ${CMAKE_CURRENT_SOURCE_DIR}/arpa_io.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/phrase.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc
+ )
+
+
+# Group these objects together for later use.
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_filter OBJECT ${KENLM_FILTER_SOURCE})
+
+
+# Explicitly list the executable files to be compiled
+set(EXE_LIST
+ filter
+ phrase_table_vocab
+)
+
+
+# Iterate through the executable list
+foreach(exe ${EXE_LIST})
+
+ # Compile the executable, linking against the requisite dependent object files
+ add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_filter> $<TARGET_OBJECTS:kenlm_util>)
+
+ # Link the executable against boost
+ target_link_libraries(${exe} ${Boost_LIBRARIES} pthread)
+
+ # Group executables together
+ set_target_properties(${exe} PROPERTIES FOLDER executables)
+
+# End for loop
+endforeach(exe)
+
diff --git a/src/kenlm/lm/filter/Jamfile b/src/kenlm/lm/filter/Jamfile
new file mode 100644
index 0000000..bcf62da
--- /dev/null
+++ b/src/kenlm/lm/filter/Jamfile
@@ -0,0 +1,7 @@
+fakelib lm_filter : phrase.cc vocab.cc arpa_io.cc ../../util//kenutil : <threading>multi:<library>/top//boost_thread ;
+
+obj main : filter_main.cc : <threading>single:<define>NTHREAD <include>../.. ;
+
+exe filter : main lm_filter ../../util//kenutil ..//kenlm : <threading>multi:<library>/top//boost_thread ;
+
+exe phrase_table_vocab : phrase_table_vocab_main.cc ../../util//kenutil ;
diff --git a/src/kenlm/lm/filter/arpa_io.cc b/src/kenlm/lm/filter/arpa_io.cc
new file mode 100644
index 0000000..2cae60f
--- /dev/null
+++ b/src/kenlm/lm/filter/arpa_io.cc
@@ -0,0 +1,77 @@
+#include "lm/filter/arpa_io.hh"
+#include "util/file_piece.hh"
+#include "util/string_stream.hh"
+
+#include <iostream>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include <cctype>
+#include <cerrno>
+#include <cstring>
+
+namespace lm {
+
+ARPAInputException::ARPAInputException(const StringPiece &message) throw() {
+ *this << message;
+}
+
+ARPAInputException::ARPAInputException(const StringPiece &message, const StringPiece &line) throw() {
+ *this << message << " in line " << line;
+}
+
+ARPAInputException::~ARPAInputException() throw() {}
+
+// Seeking is the responsibility of the caller.
+template <class Stream> void WriteCounts(Stream &out, const std::vector<uint64_t> &number) {
+ out << "\n\\data\\\n";
+ for (unsigned int i = 0; i < number.size(); ++i) {
+ out << "ngram " << i+1 << "=" << number[i] << '\n';
+ }
+ out << '\n';
+}
+
+size_t SizeNeededForCounts(const std::vector<uint64_t> &number) {
+ std::string buf;
+ util::StringStream stream(buf);
+ WriteCounts(stream, number);
+ return buf.size();
+}
+
+bool IsEntirelyWhiteSpace(const StringPiece &line) {
+ for (size_t i = 0; i < static_cast<size_t>(line.size()); ++i) {
+ if (!isspace(line.data()[i])) return false;
+ }
+ return true;
+}
+
+ARPAOutput::ARPAOutput(const char *name, size_t buffer_size)
+ : file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {}
+
+void ARPAOutput::ReserveForCounts(std::streampos reserve) {
+ for (std::streampos i = 0; i < reserve; i += std::streampos(1)) {
+ file_ << '\n';
+ }
+}
+
+void ARPAOutput::BeginLength(unsigned int length) {
+ file_ << '\\' << length << "-grams:" << '\n';
+}
+
+void ARPAOutput::EndLength(unsigned int length) {
+ file_ << '\n';
+ if (length > counts_.size()) {
+ counts_.resize(length);
+ }
+ counts_[length - 1] = fast_counter_;
+}
+
+void ARPAOutput::Finish() {
+ file_ << "\\end\\\n";
+ file_.seekp(0);
+ WriteCounts(file_, counts_);
+ file_.flush();
+}
+
+} // namespace lm
diff --git a/src/kenlm/lm/filter/arpa_io.hh b/src/kenlm/lm/filter/arpa_io.hh
new file mode 100644
index 0000000..7489270
--- /dev/null
+++ b/src/kenlm/lm/filter/arpa_io.hh
@@ -0,0 +1,99 @@
+#ifndef LM_FILTER_ARPA_IO_H
+#define LM_FILTER_ARPA_IO_H
+/* Input and output for ARPA format language model files.
+ */
+#include "lm/read_arpa.hh"
+#include "util/exception.hh"
+#include "util/file_stream.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include <boost/noncopyable.hpp>
+#include <boost/scoped_array.hpp>
+
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <cstring>
+#include <stdint.h>
+
+namespace util { class FilePiece; }
+
+namespace lm {
+
+class ARPAInputException : public util::Exception {
+ public:
+ explicit ARPAInputException(const StringPiece &message) throw();
+ explicit ARPAInputException(const StringPiece &message, const StringPiece &line) throw();
+ virtual ~ARPAInputException() throw();
+};
+
+// Handling for the counts of n-grams at the beginning of ARPA files.
+size_t SizeNeededForCounts(const std::vector<uint64_t> &number);
+
+/* Writes an ARPA file. This has to be seekable so the counts can be written
+ * at the end. Hence, I just have it own a std::fstream instead of accepting
+ * a separately held std::ostream. TODO: use the fast one from estimation.
+ */
+class ARPAOutput : boost::noncopyable {
+ public:
+ explicit ARPAOutput(const char *name, size_t buffer_size = 65536);
+
+ void ReserveForCounts(std::streampos reserve);
+
+ void BeginLength(unsigned int length);
+
+ void AddNGram(const StringPiece &line) {
+ file_ << line << '\n';
+ ++fast_counter_;
+ }
+
+ void AddNGram(const StringPiece &ngram, const StringPiece &line) {
+ AddNGram(line);
+ }
+
+ template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
+ AddNGram(line);
+ }
+
+ void EndLength(unsigned int length);
+
+ void Finish();
+
+ private:
+ util::scoped_fd file_backing_;
+ util::FileStream file_;
+ size_t fast_counter_;
+ std::vector<uint64_t> counts_;
+};
+
+
+template <class Output> void ReadNGrams(util::FilePiece &in, unsigned int length, uint64_t number, Output &out) {
+ ReadNGramHeader(in, length);
+ out.BeginLength(length);
+ for (uint64_t i = 0; i < number; ++i) {
+ StringPiece line = in.ReadLine();
+ util::TokenIter<util::SingleCharacter> tabber(line, '\t');
+ if (!tabber) throw ARPAInputException("blank line", line);
+ if (!++tabber) throw ARPAInputException("no tab", line);
+
+ out.AddNGram(*tabber, line);
+ }
+ out.EndLength(length);
+}
+
+template <class Output> void ReadARPA(util::FilePiece &in_lm, Output &out) {
+ std::vector<uint64_t> number;
+ ReadARPACounts(in_lm, number);
+ out.ReserveForCounts(SizeNeededForCounts(number));
+ for (unsigned int i = 0; i < number.size(); ++i) {
+ ReadNGrams(in_lm, i + 1, number[i], out);
+ }
+ ReadEnd(in_lm);
+ out.Finish();
+}
+
+} // namespace lm
+
+#endif // LM_FILTER_ARPA_IO_H
diff --git a/src/kenlm/lm/filter/count_io.hh b/src/kenlm/lm/filter/count_io.hh
new file mode 100644
index 0000000..1af6676
--- /dev/null
+++ b/src/kenlm/lm/filter/count_io.hh
@@ -0,0 +1,89 @@
+#ifndef LM_FILTER_COUNT_IO_H
+#define LM_FILTER_COUNT_IO_H
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+
+namespace lm {
+
+class CountOutput : boost::noncopyable {
+ public:
+ explicit CountOutput(const char *name) : file_(util::CreateOrThrow(name)) {}
+
+ void AddNGram(const StringPiece &line) {
+ file_ << line << '\n';
+ }
+
+ template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
+ AddNGram(line);
+ }
+
+ void AddNGram(const StringPiece &ngram, const StringPiece &line) {
+ AddNGram(line);
+ }
+
+ private:
+ util::FileStream file_;
+};
+
+class CountBatch {
+ public:
+ explicit CountBatch(std::streamsize initial_read)
+ : initial_read_(initial_read) {
+ buffer_.reserve(initial_read);
+ }
+
+ void Read(std::istream &in) {
+ buffer_.resize(initial_read_);
+ in.read(&*buffer_.begin(), initial_read_);
+ buffer_.resize(in.gcount());
+ char got;
+ while (in.get(got) && got != '\n')
+ buffer_.push_back(got);
+ }
+
+ template <class Output> void Send(Output &out) {
+ for (util::TokenIter<util::SingleCharacter> line(StringPiece(&*buffer_.begin(), buffer_.size()), '\n'); line; ++line) {
+ util::TokenIter<util::SingleCharacter> tabber(*line, '\t');
+ if (!tabber) {
+ std::cerr << "Warning: empty n-gram count line being removed\n";
+ continue;
+ }
+ util::TokenIter<util::SingleCharacter, true> words(*tabber, ' ');
+ if (!words) {
+ std::cerr << "Line has a tab but no words.\n";
+ continue;
+ }
+ out.AddNGram(words, util::TokenIter<util::SingleCharacter, true>::end(), *line);
+ }
+ }
+
+ private:
+ std::streamsize initial_read_;
+
+ // This could have been a std::string but that's less happy with raw writes.
+ std::vector<char> buffer_;
+};
+
+template <class Output> void ReadCount(util::FilePiece &in_file, Output &out) {
+ try {
+ while (true) {
+ StringPiece line = in_file.ReadLine();
+ util::TokenIter<util::SingleCharacter> tabber(line, '\t');
+ if (!tabber) {
+ std::cerr << "Warning: empty n-gram count line being removed\n";
+ continue;
+ }
+ out.AddNGram(*tabber, line);
+ }
+ } catch (const util::EndOfFileException &e) {}
+}
+
+} // namespace lm
+
+#endif // LM_FILTER_COUNT_IO_H
diff --git a/src/kenlm/lm/filter/filter_main.cc b/src/kenlm/lm/filter/filter_main.cc
new file mode 100644
index 0000000..6e89d1f
--- /dev/null
+++ b/src/kenlm/lm/filter/filter_main.cc
@@ -0,0 +1,253 @@
+#include "lm/filter/arpa_io.hh"
+#include "lm/filter/format.hh"
+#include "lm/filter/phrase.hh"
+#ifndef NTHREAD
+#include "lm/filter/thread.hh"
+#endif
+#include "lm/filter/vocab.hh"
+#include "lm/filter/wrapper.hh"
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <memory>
+
+namespace lm {
+namespace {
+
+void DisplayHelp(const char *name) {
+ std::cerr
+ << "Usage: " << name << " mode [context] [phrase] [raw|arpa] [threads:m] [batch_size:m] (vocab|model):input_file output_file\n\n"
+ "copy mode just copies, but makes the format nicer for e.g. irstlm's broken\n"
+ " parser.\n"
+ "single mode treats the entire input as a single sentence.\n"
+ "multiple mode filters to multiple sentences in parallel. Each sentence is on\n"
+ " a separate line. A separate file is created for each sentence by appending\n"
+ " the 0-indexed line number to the output file name.\n"
+ "union mode produces one filtered model that is the union of models created by\n"
+ " multiple mode.\n\n"
+ "context means only the context (all but last word) has to pass the filter, but\n"
+ " the entire n-gram is output.\n\n"
+ "phrase means that the vocabulary is actually tab-delimited phrases and that the\n"
+ " phrases can generate the n-gram when assembled in arbitrary order and\n"
+ " clipped. Currently works with multiple or union mode.\n\n"
+ "The file format is set by [raw|arpa] with default arpa:\n"
+ "raw means space-separated tokens, optionally followed by a tab and arbitrary\n"
+ " text. This is useful for ngram count files.\n"
+ "arpa means the ARPA file format for n-gram language models.\n\n"
+#ifndef NTHREAD
+ "threads:m sets m threads (default: conccurrency detected by boost)\n"
+ "batch_size:m sets the batch size for threading. Expect memory usage from this\n"
+ " of 2*threads*batch_size n-grams.\n\n"
+#else
+ "This binary was compiled with -DNTHREAD, disabling threading. If you wanted\n"
+ " threading, compile without this flag against Boost >=1.42.0.\n\n"
+#endif
+ "There are two inputs: vocabulary and model. Either may be given as a file\n"
+ " while the other is on stdin. Specify the type given as a file using\n"
+ " vocab: or model: before the file name. \n\n"
+ "For ARPA format, the output must be seekable. For raw format, it can be a\n"
+ " stream i.e. /dev/stdout\n";
+}
+
+typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} FilterMode;
+typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;
+
+struct Config {
+ Config() :
+#ifndef NTHREAD
+ batch_size(25000),
+ threads(boost::thread::hardware_concurrency()),
+#endif
+ phrase(false),
+ context(false),
+ format(FORMAT_ARPA)
+ {
+#ifndef NTHREAD
+ if (!threads) threads = 1;
+#endif
+ }
+
+#ifndef NTHREAD
+ size_t batch_size;
+ size_t threads;
+#endif
+ bool phrase;
+ bool context;
+ FilterMode mode;
+ Format format;
+};
+
+template <class Format, class Filter, class OutputBuffer, class Output> void RunThreadedFilter(const Config &config, util::FilePiece &in_lm, Filter &filter, Output &output) {
+#ifndef NTHREAD
+ if (config.threads == 1) {
+#endif
+ Format::RunFilter(in_lm, filter, output);
+#ifndef NTHREAD
+ } else {
+ typedef Controller<Filter, OutputBuffer, Output> Threaded;
+ Threaded threading(config.batch_size, config.threads * 2, config.threads, filter, output);
+ Format::RunFilter(in_lm, threading, output);
+ }
+#endif
+}
+
+template <class Format, class Filter, class OutputBuffer, class Output> void RunContextFilter(const Config &config, util::FilePiece &in_lm, Filter filter, Output &output) {
+ if (config.context) {
+ ContextFilter<Filter> context_filter(filter);
+ RunThreadedFilter<Format, ContextFilter<Filter>, OutputBuffer, Output>(config, in_lm, context_filter, output);
+ } else {
+ RunThreadedFilter<Format, Filter, OutputBuffer, Output>(config, in_lm, filter, output);
+ }
+}
+
+template <class Format, class Binary> void DispatchBinaryFilter(const Config &config, util::FilePiece &in_lm, const Binary &binary, typename Format::Output &out) {
+ typedef BinaryFilter<Binary> Filter;
+ RunContextFilter<Format, Filter, BinaryOutputBuffer, typename Format::Output>(config, in_lm, Filter(binary), out);
+}
+
+template <class Format> void DispatchFilterModes(const Config &config, std::istream &in_vocab, util::FilePiece &in_lm, const char *out_name) {
+ if (config.mode == MODE_MULTIPLE) {
+ if (config.phrase) {
+ typedef phrase::Multiple Filter;
+ phrase::Substrings substrings;
+ typename Format::Multiple out(out_name, phrase::ReadMultiple(in_vocab, substrings));
+ RunContextFilter<Format, Filter, MultipleOutputBuffer, typename Format::Multiple>(config, in_lm, Filter(substrings), out);
+ } else {
+ typedef vocab::Multiple Filter;
+ boost::unordered_map<std::string, std::vector<unsigned int> > words;
+ typename Format::Multiple out(out_name, vocab::ReadMultiple(in_vocab, words));
+ RunContextFilter<Format, Filter, MultipleOutputBuffer, typename Format::Multiple>(config, in_lm, Filter(words), out);
+ }
+ return;
+ }
+
+ typename Format::Output out(out_name);
+
+ if (config.mode == MODE_COPY) {
+ Format::Copy(in_lm, out);
+ return;
+ }
+
+ if (config.mode == MODE_SINGLE) {
+ vocab::Single::Words words;
+ vocab::ReadSingle(in_vocab, words);
+ DispatchBinaryFilter<Format, vocab::Single>(config, in_lm, vocab::Single(words), out);
+ return;
+ }
+
+ if (config.mode == MODE_UNION) {
+ if (config.phrase) {
+ phrase::Substrings substrings;
+ phrase::ReadMultiple(in_vocab, substrings);
+ DispatchBinaryFilter<Format, phrase::Union>(config, in_lm, phrase::Union(substrings), out);
+ } else {
+ vocab::Union::Words words;
+ vocab::ReadMultiple(in_vocab, words);
+ DispatchBinaryFilter<Format, vocab::Union>(config, in_lm, vocab::Union(words), out);
+ }
+ return;
+ }
+}
+
+} // namespace
+} // namespace lm
+
+int main(int argc, char *argv[]) {
+ try {
+ if (argc < 4) {
+ lm::DisplayHelp(argv[0]);
+ return 1;
+ }
+
+ // I used to have boost::program_options, but some users didn't want to compile boost.
+ lm::Config config;
+ config.mode = lm::MODE_UNSET;
+ for (int i = 1; i < argc - 2; ++i) {
+ const char *str = argv[i];
+ if (!std::strcmp(str, "copy")) {
+ config.mode = lm::MODE_COPY;
+ } else if (!std::strcmp(str, "single")) {
+ config.mode = lm::MODE_SINGLE;
+ } else if (!std::strcmp(str, "multiple")) {
+ config.mode = lm::MODE_MULTIPLE;
+ } else if (!std::strcmp(str, "union")) {
+ config.mode = lm::MODE_UNION;
+ } else if (!std::strcmp(str, "phrase")) {
+ config.phrase = true;
+ } else if (!std::strcmp(str, "context")) {
+ config.context = true;
+ } else if (!std::strcmp(str, "arpa")) {
+ config.format = lm::FORMAT_ARPA;
+ } else if (!std::strcmp(str, "raw")) {
+ config.format = lm::FORMAT_COUNT;
+#ifndef NTHREAD
+ } else if (!std::strncmp(str, "threads:", 8)) {
+ config.threads = boost::lexical_cast<size_t>(str + 8);
+ if (!config.threads) {
+ std::cerr << "Specify at least one thread." << std::endl;
+ return 1;
+ }
+ } else if (!std::strncmp(str, "batch_size:", 11)) {
+ config.batch_size = boost::lexical_cast<size_t>(str + 11);
+ if (config.batch_size < 5000) {
+ std::cerr << "Batch size must be at least one and should probably be >= 5000" << std::endl;
+ if (!config.batch_size) return 1;
+ }
+#endif
+ } else {
+ lm::DisplayHelp(argv[0]);
+ return 1;
+ }
+ }
+
+ if (config.mode == lm::MODE_UNSET) {
+ lm::DisplayHelp(argv[0]);
+ return 1;
+ }
+
+ if (config.phrase && config.mode != lm::MODE_UNION && config.mode != lm::MODE_MULTIPLE) {
+ std::cerr << "Phrase constraint currently only works in multiple or union mode. If you really need it for single, put everything on one line and use union." << std::endl;
+ return 1;
+ }
+
+ bool cmd_is_model = true;
+ const char *cmd_input = argv[argc - 2];
+ if (!strncmp(cmd_input, "vocab:", 6)) {
+ cmd_is_model = false;
+ cmd_input += 6;
+ } else if (!strncmp(cmd_input, "model:", 6)) {
+ cmd_input += 6;
+ } else if (strchr(cmd_input, ':')) {
+ std::cerr << "Specify vocab: or model: before the input file name, not " << cmd_input << std::endl;
+ return 1;
+ } else {
+ std::cerr << "Assuming that " << cmd_input << " is a model file" << std::endl;
+ }
+ std::ifstream cmd_file;
+ std::istream *vocab;
+ if (cmd_is_model) {
+ vocab = &std::cin;
+ } else {
+ cmd_file.open(cmd_input, std::ios::in);
+ UTIL_THROW_IF(!cmd_file, util::ErrnoException, "Failed to open " << cmd_input);
+ vocab = &cmd_file;
+ }
+
+ util::FilePiece model(cmd_is_model ? util::OpenReadOrThrow(cmd_input) : 0, cmd_is_model ? cmd_input : NULL, &std::cerr);
+
+ if (config.format == lm::FORMAT_ARPA) {
+ lm::DispatchFilterModes<lm::ARPAFormat>(config, *vocab, model, argv[argc - 1]);
+ } else if (config.format == lm::FORMAT_COUNT) {
+ lm::DispatchFilterModes<lm::CountFormat>(config, *vocab, model, argv[argc - 1]);
+ }
+ return 0;
+ } catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+}
diff --git a/src/kenlm/lm/filter/format.hh b/src/kenlm/lm/filter/format.hh
new file mode 100644
index 0000000..d453f05
--- /dev/null
+++ b/src/kenlm/lm/filter/format.hh
@@ -0,0 +1,250 @@
+#ifndef LM_FILTER_FORMAT_H
+#define LM_FILTER_FORMAT_H
+
+#include "lm/filter/arpa_io.hh"
+#include "lm/filter/count_io.hh"
+
+#include <boost/lexical_cast.hpp>
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include <iosfwd>
+
+namespace lm {
+
+template <class Single> class MultipleOutput {
+ private:
+ typedef boost::ptr_vector<Single> Singles;
+ typedef typename Singles::iterator SinglesIterator;
+
+ public:
+ MultipleOutput(const char *prefix, size_t number) {
+ files_.reserve(number);
+ std::string tmp;
+ for (unsigned int i = 0; i < number; ++i) {
+ tmp = prefix;
+ tmp += boost::lexical_cast<std::string>(i);
+ files_.push_back(new Single(tmp.c_str()));
+ }
+ }
+
+ void AddNGram(const StringPiece &line) {
+ for (SinglesIterator i = files_.begin(); i != files_.end(); ++i)
+ i->AddNGram(line);
+ }
+
+ template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
+ for (SinglesIterator i = files_.begin(); i != files_.end(); ++i)
+ i->AddNGram(begin, end, line);
+ }
+
+ void SingleAddNGram(size_t offset, const StringPiece &line) {
+ files_[offset].AddNGram(line);
+ }
+
+ template <class Iterator> void SingleAddNGram(size_t offset, const Iterator &begin, const Iterator &end, const StringPiece &line) {
+ files_[offset].AddNGram(begin, end, line);
+ }
+
+ protected:
+ Singles files_;
+};
+
+class MultipleARPAOutput : public MultipleOutput<ARPAOutput> {
+ public:
+ MultipleARPAOutput(const char *prefix, size_t number) : MultipleOutput<ARPAOutput>(prefix, number) {}
+
+ void ReserveForCounts(std::streampos reserve) {
+ for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
+ i->ReserveForCounts(reserve);
+ }
+
+ void BeginLength(unsigned int length) {
+ for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
+ i->BeginLength(length);
+ }
+
+ void EndLength(unsigned int length) {
+ for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
+ i->EndLength(length);
+ }
+
+ void Finish() {
+ for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
+ i->Finish();
+ }
+};
+
+template <class Filter, class Output> class DispatchInput {
+ public:
+ DispatchInput(Filter &filter, Output &output) : filter_(filter), output_(output) {}
+
+/* template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
+ filter_.AddNGram(begin, end, line, output_);
+ }*/
+
+ void AddNGram(const StringPiece &ngram, const StringPiece &line) {
+ filter_.AddNGram(ngram, line, output_);
+ }
+
+ protected:
+ Filter &filter_;
+ Output &output_;
+};
+
+template <class Filter, class Output> class DispatchARPAInput : public DispatchInput<Filter, Output> {
+ private:
+ typedef DispatchInput<Filter, Output> B;
+
+ public:
+ DispatchARPAInput(Filter &filter, Output &output) : B(filter, output) {}
+
+ void ReserveForCounts(std::streampos reserve) { B::output_.ReserveForCounts(reserve); }
+ void BeginLength(unsigned int length) { B::output_.BeginLength(length); }
+
+ void EndLength(unsigned int length) {
+ B::filter_.Flush();
+ B::output_.EndLength(length);
+ }
+ void Finish() { B::output_.Finish(); }
+};
+
+struct ARPAFormat {
+ typedef ARPAOutput Output;
+ typedef MultipleARPAOutput Multiple;
+ static void Copy(util::FilePiece &in, Output &out) {
+ ReadARPA(in, out);
+ }
+ template <class Filter, class Out> static void RunFilter(util::FilePiece &in, Filter &filter, Out &output) {
+ DispatchARPAInput<Filter, Out> dispatcher(filter, output);
+ ReadARPA(in, dispatcher);
+ }
+};
+
+struct CountFormat {
+ typedef CountOutput Output;
+ typedef MultipleOutput<Output> Multiple;
+ static void Copy(util::FilePiece &in, Output &out) {
+ ReadCount(in, out);
+ }
+ template <class Filter, class Out> static void RunFilter(util::FilePiece &in, Filter &filter, Out &output) {
+ DispatchInput<Filter, Out> dispatcher(filter, output);
+ ReadCount(in, dispatcher);
+ }
+};
+
+/* For multithreading, the buffer classes hold batches of filter inputs and
+ * outputs in memory. The strings get reused a lot, so keep them around
+ * instead of clearing each time.
+ */
+class InputBuffer {
+ public:
+ InputBuffer() : actual_(0) {}
+
+ void Reserve(size_t size) { lines_.reserve(size); }
+
+ template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
+ if (lines_.size() == actual_) lines_.resize(lines_.size() + 1);
+ // TODO avoid this copy.
+ std::string &copied = lines_[actual_].line;
+ copied.assign(line.data(), line.size());
+ lines_[actual_].ngram.set(copied.data() + (ngram.data() - line.data()), ngram.size());
+ ++actual_;
+ }
+
+ template <class Filter, class Output> void CallFilter(Filter &filter, Output &output) const {
+ for (std::vector<Line>::const_iterator i = lines_.begin(); i != lines_.begin() + actual_; ++i) {
+ filter.AddNGram(i->ngram, i->line, output);
+ }
+ }
+
+ void Clear() { actual_ = 0; }
+ bool Empty() { return actual_ == 0; }
+ size_t Size() { return actual_; }
+
+ private:
+ struct Line {
+ std::string line;
+ StringPiece ngram;
+ };
+
+ size_t actual_;
+
+ std::vector<Line> lines_;
+};
+
+class BinaryOutputBuffer {
+ public:
+ BinaryOutputBuffer() {}
+
+ void Reserve(size_t size) {
+ lines_.reserve(size);
+ }
+
+ void AddNGram(const StringPiece &line) {
+ lines_.push_back(line);
+ }
+
+ template <class Output> void Flush(Output &output) {
+ for (std::vector<StringPiece>::const_iterator i = lines_.begin(); i != lines_.end(); ++i) {
+ output.AddNGram(*i);
+ }
+ lines_.clear();
+ }
+
+ private:
+ std::vector<StringPiece> lines_;
+};
+
+class MultipleOutputBuffer {
+ public:
+ MultipleOutputBuffer() : last_(NULL) {}
+
+ void Reserve(size_t size) {
+ annotated_.reserve(size);
+ }
+
+ void AddNGram(const StringPiece &line) {
+ annotated_.resize(annotated_.size() + 1);
+ annotated_.back().line = line;
+ }
+
+ void SingleAddNGram(size_t offset, const StringPiece &line) {
+ if ((line.data() == last_.data()) && (line.length() == last_.length())) {
+ annotated_.back().systems.push_back(offset);
+ } else {
+ annotated_.resize(annotated_.size() + 1);
+ annotated_.back().systems.push_back(offset);
+ annotated_.back().line = line;
+ last_ = line;
+ }
+ }
+
+ template <class Output> void Flush(Output &output) {
+ for (std::vector<Annotated>::const_iterator i = annotated_.begin(); i != annotated_.end(); ++i) {
+ if (i->systems.empty()) {
+ output.AddNGram(i->line);
+ } else {
+ for (std::vector<size_t>::const_iterator j = i->systems.begin(); j != i->systems.end(); ++j) {
+ output.SingleAddNGram(*j, i->line);
+ }
+ }
+ }
+ annotated_.clear();
+ }
+
+ private:
+ struct Annotated {
+ // If this is empty, send to all systems.
+ // A filter should never send to all systems and send to a single one.
+ std::vector<size_t> systems;
+ StringPiece line;
+ };
+
+ StringPiece last_;
+
+ std::vector<Annotated> annotated_;
+};
+
+} // namespace lm
+
+#endif // LM_FILTER_FORMAT_H
diff --git a/src/kenlm/lm/filter/phrase.cc b/src/kenlm/lm/filter/phrase.cc
new file mode 100644
index 0000000..d8260d5
--- /dev/null
+++ b/src/kenlm/lm/filter/phrase.cc
@@ -0,0 +1,292 @@
+#include "lm/filter/phrase.hh"
+
+#include "lm/filter/format.hh"
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include <cctype>
+
+namespace lm {
+namespace phrase {
+
+unsigned int ReadMultiple(std::istream &in, Substrings &out) {
+ bool sentence_content = false;
+ unsigned int sentence_id = 0;
+ std::vector<Hash> phrase;
+ std::string word;
+ while (in) {
+ char c;
+ // Gather a word.
+ while (!isspace(c = in.get()) && in) word += c;
+ // Treat EOF like a newline.
+ if (!in) c = '\n';
+ // Add the word to the phrase.
+ if (!word.empty()) {
+ phrase.push_back(util::MurmurHashNative(word.data(), word.size()));
+ word.clear();
+ }
+ if (c == ' ') continue;
+ // It's more than just a space. Close out the phrase.
+ if (!phrase.empty()) {
+ sentence_content = true;
+ out.AddPhrase(sentence_id, phrase.begin(), phrase.end());
+ phrase.clear();
+ }
+ if (c == '\t' || c == '\v') continue;
+ // It's more than a space or tab: a newline.
+ if (sentence_content) {
+ ++sentence_id;
+ sentence_content = false;
+ }
+ }
+ if (!in.eof()) in.exceptions(std::istream::failbit | std::istream::badbit);
+ return sentence_id + sentence_content;
+}
+
+namespace {
+typedef unsigned int Sentence;
+typedef std::vector<Sentence> Sentences;
+} // namespace
+
+namespace detail {
+
+const StringPiece kEndSentence("</s>");
+
+class Arc {
+ public:
+ Arc() {}
+
+ // For arcs from one vertex to another.
+ void SetPhrase(detail::Vertex &from, detail::Vertex &to, const Sentences &intersect) {
+ Set(to, intersect);
+ from_ = &from;
+ }
+
+ /* For arcs from before the n-gram begins to somewhere in the n-gram (right
+ * aligned). These have no from_ vertex; it implictly matches every
+ * sentence. This also handles when the n-gram is a substring of a phrase.
+ */
+ void SetRight(detail::Vertex &to, const Sentences &complete) {
+ Set(to, complete);
+ from_ = NULL;
+ }
+
+ Sentence Current() const {
+ return *current_;
+ }
+
+ bool Empty() const {
+ return current_ == last_;
+ }
+
+ /* When this function returns:
+ * If Empty() then there's nothing left from this intersection.
+ *
+ * If Current() == to then to is part of the intersection.
+ *
+ * Otherwise, Current() > to. In this case, to is not part of the
+ * intersection and neither is anything < Current(). To determine if
+ * any value >= Current() is in the intersection, call LowerBound again
+ * with the value.
+ */
+ void LowerBound(const Sentence to);
+
+ private:
+ void Set(detail::Vertex &to, const Sentences &sentences);
+
+ const Sentence *current_;
+ const Sentence *last_;
+ detail::Vertex *from_;
+};
+
+struct ArcGreater : public std::binary_function<const Arc *, const Arc *, bool> {
+ bool operator()(const Arc *first, const Arc *second) const {
+ return first->Current() > second->Current();
+ }
+};
+
+class Vertex {
+ public:
+ Vertex() : current_(0) {}
+
+ Sentence Current() const {
+ return current_;
+ }
+
+ bool Empty() const {
+ return incoming_.empty();
+ }
+
+ void LowerBound(const Sentence to);
+
+ private:
+ friend class Arc;
+
+ void AddIncoming(Arc *arc) {
+ if (!arc->Empty()) incoming_.push(arc);
+ }
+
+ unsigned int current_;
+ std::priority_queue<Arc*, std::vector<Arc*>, ArcGreater> incoming_;
+};
+
+void Arc::LowerBound(const Sentence to) {
+ current_ = std::lower_bound(current_, last_, to);
+ // If *current_ > to, don't advance from_. The intervening values of
+ // from_ may be useful for another one of its outgoing arcs.
+ if (!from_ || Empty() || (Current() > to)) return;
+ assert(Current() == to);
+ from_->LowerBound(to);
+ if (from_->Empty()) {
+ current_ = last_;
+ return;
+ }
+ assert(from_->Current() >= to);
+ if (from_->Current() > to) {
+ current_ = std::lower_bound(current_ + 1, last_, from_->Current());
+ }
+}
+
+void Arc::Set(Vertex &to, const Sentences &sentences) {
+ current_ = &*sentences.begin();
+ last_ = &*sentences.end();
+ to.AddIncoming(this);
+}
+
+void Vertex::LowerBound(const Sentence to) {
+ if (Empty()) return;
+ // Union lower bound.
+ while (true) {
+ Arc *top = incoming_.top();
+ if (top->Current() > to) {
+ current_ = top->Current();
+ return;
+ }
+ // If top->Current() == to, we still need to verify that's an actual
+ // element and not just a bound.
+ incoming_.pop();
+ top->LowerBound(to);
+ if (!top->Empty()) {
+ incoming_.push(top);
+ if (top->Current() == to) {
+ current_ = to;
+ return;
+ }
+ } else if (Empty()) {
+ return;
+ }
+ }
+}
+
+} // namespace detail
+
+namespace {
+
+void BuildGraph(const Substrings &phrase, const std::vector<Hash> &hashes, detail::Vertex *const vertices, detail::Arc *free_arc) {
+ using detail::Vertex;
+ using detail::Arc;
+ assert(!hashes.empty());
+
+ const Hash *const first_word = &*hashes.begin();
+ const Hash *const last_word = &*hashes.end() - 1;
+
+ Hash hash = 0;
+ const Sentences *found;
+ // Phrases starting at or before the first word in the n-gram.
+ {
+ Vertex *vertex = vertices;
+ for (const Hash *word = first_word; ; ++word, ++vertex) {
+ hash = util::MurmurHashNative(&hash, sizeof(uint64_t), *word);
+ // Now hash is [hashes.begin(), word].
+ if (word == last_word) {
+ if (phrase.FindSubstring(hash, found))
+ (free_arc++)->SetRight(*vertex, *found);
+ break;
+ }
+ if (!phrase.FindRight(hash, found)) break;
+ (free_arc++)->SetRight(*vertex, *found);
+ }
+ }
+
+ // Phrases starting at the second or later word in the n-gram.
+ Vertex *vertex_from = vertices;
+ for (const Hash *word_from = first_word + 1; word_from != &*hashes.end(); ++word_from, ++vertex_from) {
+ hash = 0;
+ Vertex *vertex_to = vertex_from + 1;
+ for (const Hash *word_to = word_from; ; ++word_to, ++vertex_to) {
+ // Notice that word_to and vertex_to have the same index.
+ hash = util::MurmurHashNative(&hash, sizeof(uint64_t), *word_to);
+ // Now hash covers [word_from, word_to].
+ if (word_to == last_word) {
+ if (phrase.FindLeft(hash, found))
+ (free_arc++)->SetPhrase(*vertex_from, *vertex_to, *found);
+ break;
+ }
+ if (!phrase.FindPhrase(hash, found)) break;
+ (free_arc++)->SetPhrase(*vertex_from, *vertex_to, *found);
+ }
+ }
+}
+
+} // namespace
+
+namespace detail {
+
+// Here instead of header due to forward declaration.
+ConditionCommon::ConditionCommon(const Substrings &substrings) : substrings_(substrings) {}
+
+// Rest of the variables are temporaries anyway
+ConditionCommon::ConditionCommon(const ConditionCommon &from) : substrings_(from.substrings_) {}
+
+ConditionCommon::~ConditionCommon() {}
+
+detail::Vertex &ConditionCommon::MakeGraph() {
+ assert(!hashes_.empty());
+ vertices_.clear();
+ vertices_.resize(hashes_.size());
+ arcs_.clear();
+ // One for every substring.
+ arcs_.resize(((hashes_.size() + 1) * hashes_.size()) / 2);
+ BuildGraph(substrings_, hashes_, &*vertices_.begin(), &*arcs_.begin());
+ return vertices_[hashes_.size() - 1];
+}
+
+} // namespace detail
+
+bool Union::Evaluate() {
+ detail::Vertex &last_vertex = MakeGraph();
+ unsigned int lower = 0;
+ while (true) {
+ last_vertex.LowerBound(lower);
+ if (last_vertex.Empty()) return false;
+ if (last_vertex.Current() == lower) return true;
+ lower = last_vertex.Current();
+ }
+}
+
+template <class Output> void Multiple::Evaluate(const StringPiece &line, Output &output) {
+ detail::Vertex &last_vertex = MakeGraph();
+ unsigned int lower = 0;
+ while (true) {
+ last_vertex.LowerBound(lower);
+ if (last_vertex.Empty()) return;
+ if (last_vertex.Current() == lower) {
+ output.SingleAddNGram(lower, line);
+ ++lower;
+ } else {
+ lower = last_vertex.Current();
+ }
+ }
+}
+
+template void Multiple::Evaluate<CountFormat::Multiple>(const StringPiece &line, CountFormat::Multiple &output);
+template void Multiple::Evaluate<ARPAFormat::Multiple>(const StringPiece &line, ARPAFormat::Multiple &output);
+template void Multiple::Evaluate<MultipleOutputBuffer>(const StringPiece &line, MultipleOutputBuffer &output);
+
+} // namespace phrase
+} // namespace lm
diff --git a/src/kenlm/lm/filter/phrase.hh b/src/kenlm/lm/filter/phrase.hh
new file mode 100644
index 0000000..5227ab2
--- /dev/null
+++ b/src/kenlm/lm/filter/phrase.hh
@@ -0,0 +1,168 @@
+#ifndef LM_FILTER_PHRASE_H
+#define LM_FILTER_PHRASE_H
+
+#include "util/murmur_hash.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include <boost/unordered_map.hpp>
+
+#include <iosfwd>
+#include <vector>
+
+#define LM_FILTER_PHRASE_METHOD(caps, lower) \
+bool Find##caps(Hash key, const std::vector<unsigned int> *&out) const {\
+ Table::const_iterator i(table_.find(key));\
+ if (i==table_.end()) return false; \
+ out = &i->second.lower; \
+ return true; \
+}
+
+namespace lm {
+namespace phrase {
+
+typedef uint64_t Hash;
+
+class Substrings {
+ private:
+ /* This is the value in a hash table where the key is a string. It indicates
+ * four sets of sentences:
+ * substring is sentences with a phrase containing the key as a substring.
+ * left is sentencess with a phrase that begins with the key (left aligned).
+ * right is sentences with a phrase that ends with the key (right aligned).
+ * phrase is sentences where the key is a phrase.
+ * Each set is encoded as a vector of sentence ids in increasing order.
+ */
+ struct SentenceRelation {
+ std::vector<unsigned int> substring, left, right, phrase;
+ };
+ /* Most of the CPU is hash table lookups, so let's not complicate it with
+ * vector equality comparisons. If a collision happens, the SentenceRelation
+ * structure will contain the union of sentence ids over the colliding strings.
+ * In that case, the filter will be slightly more permissive.
+ * The key here is the same as boost's hash of std::vector<std::string>.
+ */
+ typedef boost::unordered_map<Hash, SentenceRelation> Table;
+
+ public:
+ Substrings() {}
+
+ /* If the string isn't a substring of any phrase, return NULL. Otherwise,
+ * return a pointer to std::vector<unsigned int> listing sentences with
+ * matching phrases. This set may be empty for Left, Right, or Phrase.
+ * Example: const std::vector<unsigned int> *FindSubstring(Hash key)
+ */
+ LM_FILTER_PHRASE_METHOD(Substring, substring)
+ LM_FILTER_PHRASE_METHOD(Left, left)
+ LM_FILTER_PHRASE_METHOD(Right, right)
+ LM_FILTER_PHRASE_METHOD(Phrase, phrase)
+
+#pragma GCC diagnostic ignored "-Wuninitialized" // end != finish so there's always an initialization
+ // sentence_id must be non-decreasing. Iterators are over words in the phrase.
+ template <class Iterator> void AddPhrase(unsigned int sentence_id, const Iterator &begin, const Iterator &end) {
+ // Iterate over all substrings.
+ for (Iterator start = begin; start != end; ++start) {
+ Hash hash = 0;
+ SentenceRelation *relation;
+ for (Iterator finish = start; finish != end; ++finish) {
+ hash = util::MurmurHashNative(&hash, sizeof(uint64_t), *finish);
+ // Now hash is of [start, finish].
+ relation = &table_[hash];
+ AppendSentence(relation->substring, sentence_id);
+ if (start == begin) AppendSentence(relation->left, sentence_id);
+ }
+ AppendSentence(relation->right, sentence_id);
+ if (start == begin) AppendSentence(relation->phrase, sentence_id);
+ }
+ }
+
+ private:
+ void AppendSentence(std::vector<unsigned int> &vec, unsigned int sentence_id) {
+ if (vec.empty() || vec.back() != sentence_id) vec.push_back(sentence_id);
+ }
+
+ Table table_;
+};
+
+// Read a file with one sentence per line containing tab-delimited phrases of
+// space-separated words.
+unsigned int ReadMultiple(std::istream &in, Substrings &out);
+
+namespace detail {
+extern const StringPiece kEndSentence;
+
+template <class Iterator> void MakeHashes(Iterator i, const Iterator &end, std::vector<Hash> &hashes) {
+ hashes.clear();
+ if (i == end) return;
+ // TODO: check strict phrase boundaries after <s> and before </s>. For now, just skip tags.
+ if ((i->data()[0] == '<') && (i->data()[i->size() - 1] == '>')) {
+ ++i;
+ }
+ for (; i != end && (*i != kEndSentence); ++i) {
+ hashes.push_back(util::MurmurHashNative(i->data(), i->size()));
+ }
+}
+
+class Vertex;
+class Arc;
+
+class ConditionCommon {
+ protected:
+ ConditionCommon(const Substrings &substrings);
+ ConditionCommon(const ConditionCommon &from);
+
+ ~ConditionCommon();
+
+ detail::Vertex &MakeGraph();
+
+ // Temporaries in PassNGram and Evaluate to avoid reallocation.
+ std::vector<Hash> hashes_;
+
+ private:
+ std::vector<detail::Vertex> vertices_;
+ std::vector<detail::Arc> arcs_;
+
+ const Substrings &substrings_;
+};
+
+} // namespace detail
+
+class Union : public detail::ConditionCommon {
+ public:
+ explicit Union(const Substrings &substrings) : detail::ConditionCommon(substrings) {}
+
+ template <class Iterator> bool PassNGram(const Iterator &begin, const Iterator &end) {
+ detail::MakeHashes(begin, end, hashes_);
+ return hashes_.empty() || Evaluate();
+ }
+
+ private:
+ bool Evaluate();
+};
+
+class Multiple : public detail::ConditionCommon {
+ public:
+ explicit Multiple(const Substrings &substrings) : detail::ConditionCommon(substrings) {}
+
+ template <class Iterator, class Output> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line, Output &output) {
+ detail::MakeHashes(begin, end, hashes_);
+ if (hashes_.empty()) {
+ output.AddNGram(line);
+ } else {
+ Evaluate(line, output);
+ }
+ }
+
+ template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
+ AddNGram(util::TokenIter<util::SingleCharacter, true>(ngram, ' '), util::TokenIter<util::SingleCharacter, true>::end(), line, output);
+ }
+
+ void Flush() const {}
+
+ private:
+ template <class Output> void Evaluate(const StringPiece &line, Output &output);
+};
+
+} // namespace phrase
+} // namespace lm
+#endif // LM_FILTER_PHRASE_H
diff --git a/src/kenlm/lm/filter/phrase_table_vocab_main.cc b/src/kenlm/lm/filter/phrase_table_vocab_main.cc
new file mode 100644
index 0000000..9ffa35f
--- /dev/null
+++ b/src/kenlm/lm/filter/phrase_table_vocab_main.cc
@@ -0,0 +1,165 @@
+#include "util/file_stream.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+#include "util/string_piece_hash.hh"
+#include "util/tokenize_piece.hh"
+
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+
+#include <cstddef>
+#include <vector>
+
+namespace {
+
+struct MutablePiece {
+ mutable StringPiece behind;
+ bool operator==(const MutablePiece &other) const {
+ return behind == other.behind;
+ }
+};
+
+std::size_t hash_value(const MutablePiece &m) {
+ return hash_value(m.behind);
+}
+
+class InternString {
+ public:
+ const char *Add(StringPiece str) {
+ MutablePiece mut;
+ mut.behind = str;
+ std::pair<boost::unordered_set<MutablePiece>::iterator, bool> res(strs_.insert(mut));
+ if (res.second) {
+ void *mem = backing_.Allocate(str.size() + 1);
+ memcpy(mem, str.data(), str.size());
+ static_cast<char*>(mem)[str.size()] = 0;
+ res.first->behind = StringPiece(static_cast<char*>(mem), str.size());
+ }
+ return res.first->behind.data();
+ }
+
+ private:
+ util::Pool backing_;
+ boost::unordered_set<MutablePiece> strs_;
+};
+
+class TargetWords {
+ public:
+ void Introduce(StringPiece source) {
+ vocab_.resize(vocab_.size() + 1);
+ std::vector<unsigned int> temp(1, vocab_.size() - 1);
+ Add(temp, source);
+ }
+
+ void Add(const std::vector<unsigned int> &sentences, StringPiece target) {
+ if (sentences.empty()) return;
+ interns_.clear();
+ for (util::TokenIter<util::SingleCharacter, true> i(target, ' '); i; ++i) {
+ interns_.push_back(intern_.Add(*i));
+ }
+ for (std::vector<unsigned int>::const_iterator i(sentences.begin()); i != sentences.end(); ++i) {
+ boost::unordered_set<const char *> &vocab = vocab_[*i];
+ for (std::vector<const char *>::const_iterator j = interns_.begin(); j != interns_.end(); ++j) {
+ vocab.insert(*j);
+ }
+ }
+ }
+
+ void Print() const {
+ util::FileStream out(1);
+ for (std::vector<boost::unordered_set<const char *> >::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) {
+ for (boost::unordered_set<const char *>::const_iterator j = i->begin(); j != i->end(); ++j) {
+ out << *j << ' ';
+ }
+ out << '\n';
+ }
+ }
+
+ private:
+ InternString intern_;
+
+ std::vector<boost::unordered_set<const char *> > vocab_;
+
+ // Temporary in Add.
+ std::vector<const char *> interns_;
+};
+
+class Input {
+ public:
+ explicit Input(std::size_t max_length)
+ : max_length_(max_length), sentence_id_(0), empty_() {}
+
+ void AddSentence(StringPiece sentence, TargetWords &targets) {
+ canonical_.clear();
+ starts_.clear();
+ starts_.push_back(0);
+ for (util::TokenIter<util::AnyCharacter, true> i(sentence, StringPiece("\0 \t", 3)); i; ++i) {
+ canonical_.append(i->data(), i->size());
+ canonical_ += ' ';
+ starts_.push_back(canonical_.size());
+ }
+ targets.Introduce(canonical_);
+ for (std::size_t i = 0; i < starts_.size() - 1; ++i) {
+ std::size_t subtract = starts_[i];
+ const char *start = &canonical_[subtract];
+ for (std::size_t j = i + 1; j < std::min(starts_.size(), i + max_length_ + 1); ++j) {
+ map_[util::MurmurHash64A(start, &canonical_[starts_[j]] - start - 1)].push_back(sentence_id_);
+ }
+ }
+ ++sentence_id_;
+ }
+
+ // Assumes single space-delimited phrase with no space at the beginning or end.
+ const std::vector<unsigned int> &Matches(StringPiece phrase) const {
+ Map::const_iterator i = map_.find(util::MurmurHash64A(phrase.data(), phrase.size()));
+ return i == map_.end() ? empty_ : i->second;
+ }
+
+ private:
+ const std::size_t max_length_;
+
+ // hash of phrase is the key, array of sentences is the value.
+ typedef boost::unordered_map<uint64_t, std::vector<unsigned int> > Map;
+ Map map_;
+
+ std::size_t sentence_id_;
+
+ // Temporaries in AddSentence.
+ std::string canonical_;
+ std::vector<std::size_t> starts_;
+
+ const std::vector<unsigned int> empty_;
+};
+
+} // namespace
+
+int main(int argc, char *argv[]) {
+ if (argc != 2) {
+ std::cerr << "Expected source text on the command line" << std::endl;
+ return 1;
+ }
+ Input input(7);
+ TargetWords targets;
+ try {
+ util::FilePiece inputs(argv[1], &std::cerr);
+ while (true)
+ input.AddSentence(inputs.ReadLine(), targets);
+ } catch (const util::EndOfFileException &e) {}
+
+ util::FilePiece table(0, NULL, &std::cerr);
+ StringPiece line;
+ const StringPiece pipes("|||");
+ while (true) {
+ try {
+ line = table.ReadLine();
+ } catch (const util::EndOfFileException &e) { break; }
+ util::TokenIter<util::MultiCharacter> it(line, pipes);
+ StringPiece source(*it);
+ if (!source.empty() && source[source.size() - 1] == ' ')
+ source.remove_suffix(1);
+ targets.Add(input.Matches(source), *++it);
+ }
+ targets.Print();
+}
diff --git a/src/kenlm/lm/filter/thread.hh b/src/kenlm/lm/filter/thread.hh
new file mode 100644
index 0000000..88e069c
--- /dev/null
+++ b/src/kenlm/lm/filter/thread.hh
@@ -0,0 +1,167 @@
+#ifndef LM_FILTER_THREAD_H
+#define LM_FILTER_THREAD_H
+
+#include "util/thread_pool.hh"
+
+#include <boost/utility/in_place_factory.hpp>
+
+#include <deque>
+#include <stack>
+
+namespace lm {
+
+template <class OutputBuffer> class ThreadBatch {
+ public:
+ ThreadBatch() {}
+
+ void Reserve(size_t size) {
+ input_.Reserve(size);
+ output_.Reserve(size);
+ }
+
+ // File reading thread.
+ InputBuffer &Fill(uint64_t sequence) {
+ sequence_ = sequence;
+ // Why wait until now to clear instead of after output? free in the same
+ // thread as allocated.
+ input_.Clear();
+ return input_;
+ }
+
+ // Filter worker thread.
+ template <class Filter> void CallFilter(Filter &filter) {
+ input_.CallFilter(filter, output_);
+ }
+
+ uint64_t Sequence() const { return sequence_; }
+
+ // File writing thread.
+ template <class RealOutput> void Flush(RealOutput &output) {
+ output_.Flush(output);
+ }
+
+ private:
+ InputBuffer input_;
+ OutputBuffer output_;
+
+ uint64_t sequence_;
+};
+
+template <class Batch, class Filter> class FilterWorker {
+ public:
+ typedef Batch *Request;
+
+ FilterWorker(const Filter &filter, util::PCQueue<Request> &done) : filter_(filter), done_(done) {}
+
+ void operator()(Request request) {
+ request->CallFilter(filter_);
+ done_.Produce(request);
+ }
+
+ private:
+ Filter filter_;
+
+ util::PCQueue<Request> &done_;
+};
+
+// There should only be one OutputWorker.
+template <class Batch, class Output> class OutputWorker {
+ public:
+ typedef Batch *Request;
+
+ OutputWorker(Output &output, util::PCQueue<Request> &done) : output_(output), done_(done), base_sequence_(0) {}
+
+ void operator()(Request request) {
+ assert(request->Sequence() >= base_sequence_);
+ // Assemble the output in order.
+ uint64_t pos = request->Sequence() - base_sequence_;
+ if (pos >= ordering_.size()) {
+ ordering_.resize(pos + 1, NULL);
+ }
+ ordering_[pos] = request;
+ while (!ordering_.empty() && ordering_.front()) {
+ ordering_.front()->Flush(output_);
+ done_.Produce(ordering_.front());
+ ordering_.pop_front();
+ ++base_sequence_;
+ }
+ }
+
+ private:
+ Output &output_;
+
+ util::PCQueue<Request> &done_;
+
+ std::deque<Request> ordering_;
+
+ uint64_t base_sequence_;
+};
+
+template <class Filter, class OutputBuffer, class RealOutput> class Controller : boost::noncopyable {
+ private:
+ typedef ThreadBatch<OutputBuffer> Batch;
+
+ public:
+ Controller(size_t batch_size, size_t queue, size_t workers, const Filter &filter, RealOutput &output)
+ : batch_size_(batch_size), queue_size_(queue),
+ batches_(queue),
+ to_read_(queue),
+ output_(queue, 1, boost::in_place(boost::ref(output), boost::ref(to_read_)), NULL),
+ filter_(queue, workers, boost::in_place(boost::ref(filter), boost::ref(output_.In())), NULL),
+ sequence_(0) {
+ for (size_t i = 0; i < queue; ++i) {
+ batches_[i].Reserve(batch_size);
+ local_read_.push(&batches_[i]);
+ }
+ NewInput();
+ }
+
+ void AddNGram(const StringPiece &ngram, const StringPiece &line, RealOutput &output) {
+ input_->AddNGram(ngram, line, output);
+ if (input_->Size() == batch_size_) {
+ FlushInput();
+ NewInput();
+ }
+ }
+
+ void Flush() {
+ FlushInput();
+ while (local_read_.size() < queue_size_) {
+ MoveRead();
+ }
+ NewInput();
+ }
+
+ private:
+ void FlushInput() {
+ if (input_->Empty()) return;
+ filter_.Produce(local_read_.top());
+ local_read_.pop();
+ if (local_read_.empty()) MoveRead();
+ }
+
+ void NewInput() {
+ input_ = &local_read_.top()->Fill(sequence_++);
+ }
+
+ void MoveRead() {
+ local_read_.push(to_read_.Consume());
+ }
+
+ const size_t batch_size_;
+ const size_t queue_size_;
+
+ std::vector<Batch> batches_;
+
+ util::PCQueue<Batch*> to_read_;
+ std::stack<Batch*> local_read_;
+ util::ThreadPool<OutputWorker<Batch, RealOutput> > output_;
+ util::ThreadPool<FilterWorker<Batch, Filter> > filter_;
+
+ uint64_t sequence_;
+ InputBuffer *input_;
+};
+
+} // namespace lm
+
+#endif // LM_FILTER_THREAD_H
diff --git a/src/kenlm/lm/filter/vocab.cc b/src/kenlm/lm/filter/vocab.cc
new file mode 100644
index 0000000..2aca4fc
--- /dev/null
+++ b/src/kenlm/lm/filter/vocab.cc
@@ -0,0 +1,53 @@
+#include "lm/filter/vocab.hh"
+
+#include <istream>
+#include <iostream>
+
+#include <cctype>
+
+namespace lm {
+namespace vocab {
+
+void ReadSingle(std::istream &in, boost::unordered_set<std::string> &out) {
+ in.exceptions(std::istream::badbit);
+ std::string word;
+ while (in >> word) {
+ out.insert(word);
+ }
+}
+
+namespace {
+bool IsLineEnd(std::istream &in) {
+ int got;
+ do {
+ got = in.get();
+ if (!in) return true;
+ if (got == '\n') return true;
+ } while (isspace(got));
+ in.unget();
+ return false;
+}
+}// namespace
+
+// Read space separated words in enter separated lines. These lines can be
+// very long, so don't read an entire line at a time.
+unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
+ in.exceptions(std::istream::badbit);
+ unsigned int sentence = 0;
+ bool used_id = false;
+ std::string word;
+ while (in >> word) {
+ used_id = true;
+ std::vector<unsigned int> &posting = out[word];
+ if (posting.empty() || (posting.back() != sentence))
+ posting.push_back(sentence);
+ if (IsLineEnd(in)) {
+ ++sentence;
+ used_id = false;
+ }
+ }
+ return sentence + used_id;
+}
+
+} // namespace vocab
+} // namespace lm
diff --git a/src/kenlm/lm/filter/vocab.hh b/src/kenlm/lm/filter/vocab.hh
new file mode 100644
index 0000000..397a932
--- /dev/null
+++ b/src/kenlm/lm/filter/vocab.hh
@@ -0,0 +1,133 @@
+#ifndef LM_FILTER_VOCAB_H
+#define LM_FILTER_VOCAB_H
+
+// Vocabulary-based filters for language models.
+
+#include "util/multi_intersection.hh"
+#include "util/string_piece.hh"
+#include "util/string_piece_hash.hh"
+#include "util/tokenize_piece.hh"
+
+#include <boost/noncopyable.hpp>
+#include <boost/range/iterator_range.hpp>
+#include <boost/unordered/unordered_map.hpp>
+#include <boost/unordered/unordered_set.hpp>
+
+#include <string>
+#include <vector>
+
+namespace lm {
+namespace vocab {
+
+void ReadSingle(std::istream &in, boost::unordered_set<std::string> &out);
+
+// Read one sentence vocabulary per line. Return the number of sentences.
+unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out);
+
+/* Is this a special tag like <s> or <UNK>? This actually includes anything
+ * surrounded with < and >, which most tokenizers separate for real words, so
+ * this should not catch real words as it looks at a single token.
+ */
+inline bool IsTag(const StringPiece &value) {
+ // The parser should never give an empty string.
+ assert(!value.empty());
+ return (value.data()[0] == '<' && value.data()[value.size() - 1] == '>');
+}
+
+class Single {
+ public:
+ typedef boost::unordered_set<std::string> Words;
+
+ explicit Single(const Words &vocab) : vocab_(vocab) {}
+
+ template <class Iterator> bool PassNGram(const Iterator &begin, const Iterator &end) {
+ for (Iterator i = begin; i != end; ++i) {
+ if (IsTag(*i)) continue;
+ if (FindStringPiece(vocab_, *i) == vocab_.end()) return false;
+ }
+ return true;
+ }
+
+ private:
+ const Words &vocab_;
+};
+
+class Union {
+ public:
+ typedef boost::unordered_map<std::string, std::vector<unsigned int> > Words;
+
+ explicit Union(const Words &vocabs) : vocabs_(vocabs) {}
+
+ template <class Iterator> bool PassNGram(const Iterator &begin, const Iterator &end) {
+ sets_.clear();
+
+ for (Iterator i(begin); i != end; ++i) {
+ if (IsTag(*i)) continue;
+ Words::const_iterator found(FindStringPiece(vocabs_, *i));
+ if (vocabs_.end() == found) return false;
+ sets_.push_back(boost::iterator_range<const unsigned int*>(&*found->second.begin(), &*found->second.end()));
+ }
+ return (sets_.empty() || util::FirstIntersection(sets_));
+ }
+
+ private:
+ const Words &vocabs_;
+
+ std::vector<boost::iterator_range<const unsigned int*> > sets_;
+};
+
+class Multiple {
+ public:
+ typedef boost::unordered_map<std::string, std::vector<unsigned int> > Words;
+
+ Multiple(const Words &vocabs) : vocabs_(vocabs) {}
+
+ private:
+ // Callback from AllIntersection that does AddNGram.
+ template <class Output> class Callback {
+ public:
+ Callback(Output &out, const StringPiece &line) : out_(out), line_(line) {}
+
+ void operator()(unsigned int index) {
+ out_.SingleAddNGram(index, line_);
+ }
+
+ private:
+ Output &out_;
+ const StringPiece &line_;
+ };
+
+ public:
+ template <class Iterator, class Output> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line, Output &output) {
+ sets_.clear();
+ for (Iterator i(begin); i != end; ++i) {
+ if (IsTag(*i)) continue;
+ Words::const_iterator found(FindStringPiece(vocabs_, *i));
+ if (vocabs_.end() == found) return;
+ sets_.push_back(boost::iterator_range<const unsigned int*>(&*found->second.begin(), &*found->second.end()));
+ }
+ if (sets_.empty()) {
+ output.AddNGram(line);
+ return;
+ }
+
+ Callback<Output> cb(output, line);
+ util::AllIntersection(sets_, cb);
+ }
+
+ template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
+ AddNGram(util::TokenIter<util::SingleCharacter, true>(ngram, ' '), util::TokenIter<util::SingleCharacter, true>::end(), line, output);
+ }
+
+ void Flush() const {}
+
+ private:
+ const Words &vocabs_;
+
+ std::vector<boost::iterator_range<const unsigned int*> > sets_;
+};
+
+} // namespace vocab
+} // namespace lm
+
+#endif // LM_FILTER_VOCAB_H
diff --git a/src/kenlm/lm/filter/wrapper.hh b/src/kenlm/lm/filter/wrapper.hh
new file mode 100644
index 0000000..227ec8e
--- /dev/null
+++ b/src/kenlm/lm/filter/wrapper.hh
@@ -0,0 +1,56 @@
+#ifndef LM_FILTER_WRAPPER_H
+#define LM_FILTER_WRAPPER_H
+
+#include "util/string_piece.hh"
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+namespace lm {
+
+// Provide a single-output filter with the same interface as a
+// multiple-output filter so clients code against one interface.
+template <class Binary> class BinaryFilter {
+ public:
+ // Binary modes are just references (and a set) and it makes the API cleaner to copy them.
+ explicit BinaryFilter(Binary binary) : binary_(binary) {}
+
+ template <class Iterator, class Output> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line, Output &output) {
+ if (binary_.PassNGram(begin, end))
+ output.AddNGram(line);
+ }
+
+ template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
+ AddNGram(util::TokenIter<util::SingleCharacter, true>(ngram, ' '), util::TokenIter<util::SingleCharacter, true>::end(), line, output);
+ }
+
+ void Flush() const {}
+
+ private:
+ Binary binary_;
+};
+
+// Wrap another filter to pay attention only to context words
+template <class FilterT> class ContextFilter {
+ public:
+ typedef FilterT Filter;
+
+ explicit ContextFilter(Filter &backend) : backend_(backend) {}
+
+ template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
+ // Find beginning of string or last space.
+ const char *last_space;
+ for (last_space = ngram.data() + ngram.size() - 1; last_space > ngram.data() && *last_space != ' '; --last_space) {}
+ backend_.AddNGram(StringPiece(ngram.data(), last_space - ngram.data()), line, output);
+ }
+
+ void Flush() const {}
+
+ private:
+ Filter backend_;
+};
+
+} // namespace lm
+
+#endif // LM_FILTER_WRAPPER_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/fragment_main.cc b/src/kenlm/lm/fragment_main.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/fragment_main.cc
rename to src/kenlm/lm/fragment_main.cc
diff --git a/src/kenlm/lm/interpolate/CMakeLists.txt b/src/kenlm/lm/interpolate/CMakeLists.txt
new file mode 100644
index 0000000..c146ab6
--- /dev/null
+++ b/src/kenlm/lm/interpolate/CMakeLists.txt
@@ -0,0 +1,54 @@
+find_package(Eigen3 REQUIRED)
+include_directories(${EIGEN3_INCLUDE_DIR})
+
+set(KENLM_INTERPOLATE_SOURCE
+ backoff_reunification.cc
+ bounded_sequence_encoding.cc
+ enumerate_global_vocab.cc
+ merge_probabilities.cc
+ merge_vocab.cc
+ normalize.cc
+ pipeline.cc
+ split_worker.cc
+ tune_derivatives.cc
+ tune_instance.cc
+ universal_vocab.cc)
+
+add_library(kenlm_interpolate OBJECT ${KENLM_INTERPOLATE_SOURCE})
+
+set(KENLM_INTERPOLATE_EXES
+ interpolate
+ perf_enum_gv
+ streaming_example
+ train_params
+ tune)
+
+set(KENLM_INTERPOLATE_DEPENDS
+ $<TARGET_OBJECTS:kenlm>
+ $<TARGET_OBJECTS:kenlm_util>
+ $<TARGET_OBJECTS:kenlm_common>
+ $<TARGET_OBJECTS:kenlm_interpolate>)
+
+AddExes(EXES ${KENLM_INTERPOLATE_EXES}
+ DEPENDS ${KENLM_INTERPOLATE_DEPENDS}
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+
+if(BUILD_TESTING)
+ set(KENLM_INTERPOLATE_TESTS
+ backoff_reunification_test
+ bounded_sequence_encoding_test
+ merge_vocab_test
+ normalize_test
+ tune_derivatives_test)
+
+ AddTests(TESTS ${KENLM_INTERPOLATE_TESTS}
+ DEPENDS ${KENLM_INTERPOLATE_DEPENDS}
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+
+ # tune_instance_test needs an extra command line parameter
+ KenLMAddTest(TEST tune_instance_test
+ DEPENDS ${KENLM_INTERPOLATE_DEPENDS}
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS
+ ${CMAKE_CURRENT_SOURCE_DIR}/tune_instance_data/toy0.1)
+endif()
diff --git a/src/kenlm/lm/interpolate/Jamfile b/src/kenlm/lm/interpolate/Jamfile
new file mode 100644
index 0000000..411a346
--- /dev/null
+++ b/src/kenlm/lm/interpolate/Jamfile
@@ -0,0 +1,22 @@
+fakelib interp : ../common//common [ glob *.cc : *_main.cc *_test.cc tune_*.cc ] : <cxxflags>-fopenmp ;
+
+import testing ;
+
+local with-eigen = [ option.get "with-eigen" ] ;
+if $(with-eigen) {
+ fakelib tuning : tune_instance.cc tune_derivatives.cc interp ..//kenlm : <include>$(with-eigen) ;
+ unit-test tune_derivatives_test : tune_derivatives_test.cc tuning /top//boost_unit_test_framework : <include>$(with-eigen) ;
+
+ obj tune_instance_test.o : tune_instance_test.cc /top//boost_unit_test_framework : <include>$(with-eigen) ;
+ run tune_instance_test.o tuning /top//boost_unit_test_framework : : tune_instance_data/toy0.1 ;
+
+ exe tune : tune_main.cc tuning /top//boost_program_options : <include>$(with-eigen) ;
+}
+
+exe interpolate : interpolate_main.cc interp /top//boost_program_options ;
+exe streaming_example : ../builder//builder interp streaming_example_main.cc /top//boost_program_options ;
+
+unit-test normalize_test : interp normalize_test.cc /top//boost_unit_test_framework ;
+unit-test backoff_reunification_test : interp backoff_reunification_test.cc /top//boost_unit_test_framework ;
+unit-test bounded_sequence_encoding_test : interp bounded_sequence_encoding_test.cc /top//boost_unit_test_framework ;
+run merge_vocab_test.cc interp /top//boost_unit_test_framework : : merge_test/test1 merge_test/test2 merge_test/test3 merge_test/test_bad_order merge_test/test_no_unk ;
diff --git a/src/kenlm/lm/interpolate/backoff_matrix.hh b/src/kenlm/lm/interpolate/backoff_matrix.hh
new file mode 100644
index 0000000..c7552df
--- /dev/null
+++ b/src/kenlm/lm/interpolate/backoff_matrix.hh
@@ -0,0 +1,29 @@
+#ifndef LM_INTERPOLATE_BACKOFF_MATRIX_H
+#define LM_INTERPOLATE_BACKOFF_MATRIX_H
+
+#include <cstddef>
+#include <vector>
+
+namespace lm { namespace interpolate {
+
+class BackoffMatrix {
+ public:
+ BackoffMatrix(std::size_t num_models, std::size_t max_order)
+ : max_order_(max_order), backing_(num_models * max_order) {}
+
+ float &Backoff(std::size_t model, std::size_t order_minus_1) {
+ return backing_[model * max_order_ + order_minus_1];
+ }
+
+ float Backoff(std::size_t model, std::size_t order_minus_1) const {
+ return backing_[model * max_order_ + order_minus_1];
+ }
+
+ private:
+ const std::size_t max_order_;
+ std::vector<float> backing_;
+};
+
+}} // namespaces
+
+#endif // LM_INTERPOLATE_BACKOFF_MATRIX_H
diff --git a/src/kenlm/lm/interpolate/backoff_reunification.cc b/src/kenlm/lm/interpolate/backoff_reunification.cc
new file mode 100644
index 0000000..edf6ddc
--- /dev/null
+++ b/src/kenlm/lm/interpolate/backoff_reunification.cc
@@ -0,0 +1,57 @@
+#include "lm/interpolate/backoff_reunification.hh"
+#include "lm/common/model_buffer.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/common/ngram.hh"
+#include "lm/common/compare.hh"
+
+#include <cassert>
+
+namespace lm {
+namespace interpolate {
+
+namespace {
+class MergeWorker {
+public:
+ MergeWorker(std::size_t order, const util::stream::ChainPosition &prob_pos,
+ const util::stream::ChainPosition &boff_pos)
+ : order_(order), prob_pos_(prob_pos), boff_pos_(boff_pos) {
+ // nothing
+ }
+
+ void Run(const util::stream::ChainPosition &position) {
+ lm::NGramStream<ProbBackoff> stream(position);
+
+ lm::NGramStream<float> prob_input(prob_pos_);
+ util::stream::Stream boff_input(boff_pos_);
+ for (; prob_input && boff_input; ++prob_input, ++boff_input, ++stream) {
+ std::copy(prob_input->begin(), prob_input->end(), stream->begin());
+ stream->Value().prob = prob_input->Value();
+ stream->Value().backoff = *reinterpret_cast<float *>(boff_input.Get());
+ }
+ UTIL_THROW_IF2(prob_input || boff_input,
+ "Streams were not the same size during merging");
+ stream.Poison();
+ }
+
+private:
+ std::size_t order_;
+ util::stream::ChainPosition prob_pos_;
+ util::stream::ChainPosition boff_pos_;
+};
+}
+
+// Since we are *adding* something to the output chain here, we pass in the
+// chain itself so that we can safely add a new step to the chain without
+// creating a deadlock situation (since creating a new ChainPosition will
+// make a new input/output pair---we want that position to be created
+// *here*, not before).
+void ReunifyBackoff(util::stream::ChainPositions &prob_pos,
+ util::stream::ChainPositions &boff_pos,
+ util::stream::Chains &output_chains) {
+ assert(prob_pos.size() == boff_pos.size());
+
+ for (size_t i = 0; i < prob_pos.size(); ++i)
+ output_chains[i] >> MergeWorker(i + 1, prob_pos[i], boff_pos[i]);
+}
+}
+}
diff --git a/src/kenlm/lm/interpolate/backoff_reunification.hh b/src/kenlm/lm/interpolate/backoff_reunification.hh
new file mode 100644
index 0000000..327db65
--- /dev/null
+++ b/src/kenlm/lm/interpolate/backoff_reunification.hh
@@ -0,0 +1,27 @@
+#ifndef KENLM_INTERPOLATE_BACKOFF_REUNIFICATION_
+#define KENLM_INTERPOLATE_BACKOFF_REUNIFICATION_
+
+#include "util/stream/stream.hh"
+#include "util/stream/multi_stream.hh"
+
+namespace lm {
+namespace interpolate {
+
+/**
+ * The third pass for the offline log-linear interpolation algorithm. This
+ * reads **suffix-ordered** probability values (ngram-id, float) and
+ * **suffix-ordered** backoff values (float) and writes the merged contents
+ * to the output.
+ *
+ * @param prob_pos The chain position for each order from which to read
+ * the probability values
+ * @param boff_pos The chain position for each order from which to read
+ * the backoff values
+ * @param output_chains The output chains for each order
+ */
+void ReunifyBackoff(util::stream::ChainPositions &prob_pos,
+ util::stream::ChainPositions &boff_pos,
+ util::stream::Chains &output_chains);
+}
+}
+#endif
diff --git a/src/kenlm/lm/interpolate/backoff_reunification_test.cc b/src/kenlm/lm/interpolate/backoff_reunification_test.cc
new file mode 100644
index 0000000..d8faf94
--- /dev/null
+++ b/src/kenlm/lm/interpolate/backoff_reunification_test.cc
@@ -0,0 +1,159 @@
+#include "lm/interpolate/backoff_reunification.hh"
+#include "lm/common/ngram_stream.hh"
+
+#define BOOST_TEST_MODULE InterpolateBackoffReunificationTest
+#include <boost/test/unit_test.hpp>
+
+namespace lm {
+namespace interpolate {
+
+namespace {
+
+// none of this input actually makes sense, all we care about is making
+// sure the merging works
+template <uint8_t N>
+struct Gram {
+ WordIndex ids[N];
+ float prob;
+ float boff;
+};
+
+template <uint8_t N>
+struct Grams {
+ const static Gram<N> grams[];
+};
+
+template <>
+const Gram<1> Grams<1>::grams[]
+ = {{{0}, 0.1f, 0.1f}, {{1}, 0.4f, 0.2f}, {{2}, 0.5f, 0.1f}};
+
+template <>
+const Gram<2> Grams<2>::grams[] = {{{0, 0}, 0.05f, 0.05f},
+ {{1, 0}, 0.05f, 0.02f},
+ {{1, 1}, 0.2f, 0.04f},
+ {{2, 2}, 0.2f, 0.01f}};
+
+template <>
+const Gram<3> Grams<3>::grams[] = {{{0, 0, 0}, 0.001f, 0.005f},
+ {{1, 0, 0}, 0.001f, 0.002f},
+ {{2, 0, 0}, 0.001f, 0.003f},
+ {{0, 1, 0}, 0.1f, 0.008f},
+ {{1, 1, 0}, 0.1f, 0.09f},
+ {{1, 1, 1}, 0.2f, 0.08f}};
+
+template <uint8_t N>
+class WriteInput {
+public:
+ void Run(const util::stream::ChainPosition &position) {
+ lm::NGramStream<float> output(position);
+
+ for (std::size_t i = 0; i < sizeof(Grams<N>::grams) / sizeof(Gram<N>);
+ ++i, ++output) {
+ std::copy(Grams<N>::grams[i].ids, Grams<N>::grams[i].ids + N,
+ output->begin());
+ output->Value() = Grams<N>::grams[i].prob;
+ }
+ output.Poison();
+ }
+};
+
+template <uint8_t N>
+class WriteBackoffs {
+public:
+ void Run(const util::stream::ChainPosition &position) {
+ util::stream::Stream output(position);
+
+ for (std::size_t i = 0; i < sizeof(Grams<N>::grams) / sizeof(Gram<N>);
+ ++i, ++output) {
+ *reinterpret_cast<float *>(output.Get()) = Grams<N>::grams[i].boff;
+ }
+ output.Poison();
+ }
+};
+
+template <uint8_t N>
+class CheckOutput {
+public:
+ void Run(const util::stream::ChainPosition &position) {
+ lm::NGramStream<ProbBackoff> stream(position);
+
+ std::size_t i = 0;
+ for (; stream; ++stream, ++i) {
+ std::stringstream ss;
+ for (WordIndex *idx = stream->begin(); idx != stream->end(); ++idx)
+ ss << "(" << *idx << ")";
+
+ UTIL_THROW_IF2(
+ !std::equal(stream->begin(), stream->end(), Grams<N>::grams[i].ids),
+ "Mismatched id in CheckOutput<" << (int)N << ">: " << ss.str());
+
+ UTIL_THROW_IF2(stream->Value().prob != Grams<N>::grams[i].prob,
+ "Mismatched probability in CheckOutput<"
+ << (int)N << ">, got " << stream->Value().prob
+ << ", expected " << Grams<N>::grams[i].prob);
+
+ UTIL_THROW_IF2(stream->Value().backoff != Grams<N>::grams[i].boff,
+ "Mismatched backoff in CheckOutput<"
+ << (int)N << ">, got " << stream->Value().backoff
+ << ", expected " << Grams<N>::grams[i].boff);
+ }
+ UTIL_THROW_IF2(i != sizeof(Grams<N>::grams) / sizeof(Gram<N>),
+ "Did not get correct number of "
+ << (int)N << "-grams: expected "
+ << sizeof(Grams<N>::grams) / sizeof(Gram<N>)
+ << ", got " << i);
+ }
+};
+}
+
+BOOST_AUTO_TEST_CASE(BackoffReunificationTest) {
+ util::stream::ChainConfig config;
+ config.total_memory = 100;
+ config.block_count = 1;
+
+ util::stream::Chains prob_chains(3);
+ config.entry_size = NGram<float>::TotalSize(1);
+ prob_chains.push_back(config);
+ prob_chains.back() >> WriteInput<1>();
+
+ config.entry_size = NGram<float>::TotalSize(2);
+ prob_chains.push_back(config);
+ prob_chains.back() >> WriteInput<2>();
+
+ config.entry_size = NGram<float>::TotalSize(3);
+ prob_chains.push_back(config);
+ prob_chains.back() >> WriteInput<3>();
+
+ util::stream::Chains boff_chains(3);
+ config.entry_size = sizeof(float);
+ boff_chains.push_back(config);
+ boff_chains.back() >> WriteBackoffs<1>();
+
+ boff_chains.push_back(config);
+ boff_chains.back() >> WriteBackoffs<2>();
+
+ boff_chains.push_back(config);
+ boff_chains.back() >> WriteBackoffs<3>();
+
+ util::stream::ChainPositions prob_pos(prob_chains);
+ util::stream::ChainPositions boff_pos(boff_chains);
+
+ util::stream::Chains output_chains(3);
+ for (std::size_t i = 0; i < 3; ++i) {
+ config.entry_size = NGram<ProbBackoff>::TotalSize(i + 1);
+ output_chains.push_back(config);
+ }
+
+ ReunifyBackoff(prob_pos, boff_pos, output_chains);
+
+ output_chains[0] >> CheckOutput<1>();
+ output_chains[1] >> CheckOutput<2>();
+ output_chains[2] >> CheckOutput<3>();
+
+ prob_chains >> util::stream::kRecycle;
+ boff_chains >> util::stream::kRecycle;
+
+ output_chains.Wait();
+}
+}
+}
diff --git a/src/kenlm/lm/interpolate/bounded_sequence_encoding.cc b/src/kenlm/lm/interpolate/bounded_sequence_encoding.cc
new file mode 100644
index 0000000..aca8ed7
--- /dev/null
+++ b/src/kenlm/lm/interpolate/bounded_sequence_encoding.cc
@@ -0,0 +1,36 @@
+#include "lm/interpolate/bounded_sequence_encoding.hh"
+
+#include <algorithm>
+
+namespace lm { namespace interpolate {
+
+BoundedSequenceEncoding::BoundedSequenceEncoding(const unsigned char *bound_begin, const unsigned char *bound_end)
+ : entries_(bound_end - bound_begin) {
+ std::size_t full = 0;
+ Entry entry;
+ entry.shift = 0;
+ for (const unsigned char *i = bound_begin; i != bound_end; ++i) {
+ uint8_t length;
+ if (*i <= 1) {
+ length = 0;
+ } else {
+ length = sizeof(unsigned int) * 8 - __builtin_clz((unsigned int)*i);
+ }
+ entry.mask = (1ULL << length) - 1ULL;
+ if (entry.shift + length > 64) {
+ entry.shift = 0;
+ entry.next = true;
+ ++full;
+ } else {
+ entry.next = false;
+ }
+ entries_.push_back(entry);
+ entry.shift += length;
+ }
+ byte_length_ = full * sizeof(uint64_t) + (entry.shift + 7) / 8;
+ first_copy_ = std::min<std::size_t>(byte_length_, sizeof(uint64_t));
+ // Size of last uint64_t. Zero if empty, otherwise [1,8] depending on mod.
+ overhang_ = byte_length_ == 0 ? 0 : ((byte_length_ - 1) % 8 + 1);
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/interpolate/bounded_sequence_encoding.hh b/src/kenlm/lm/interpolate/bounded_sequence_encoding.hh
new file mode 100644
index 0000000..84dd63a
--- /dev/null
+++ b/src/kenlm/lm/interpolate/bounded_sequence_encoding.hh
@@ -0,0 +1,76 @@
+#ifndef LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H
+#define LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H
+
+/* Encodes fixed-length sequences of integers with known bounds on each entry.
+ * This is used to encode how far each model has backed off.
+ * TODO: make this class efficient. Bit-level packing or multiply by bound and
+ * add.
+ */
+
+#include "util/exception.hh"
+#include "util/fixed_array.hh"
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+#warning The interpolation code assumes little endian for now.
+#endif
+
+#include <algorithm>
+#include <cstring>
+
+namespace lm {
+namespace interpolate {
+
+class BoundedSequenceEncoding {
+ public:
+ // Encode [0, bound_begin[0]) x [0, bound_begin[1]) x [0, bound_begin[2]) x ... x [0, *(bound_end - 1)) for entries in the sequence
+ BoundedSequenceEncoding(const unsigned char *bound_begin, const unsigned char *bound_end);
+
+ std::size_t Entries() const { return entries_.size(); }
+
+ std::size_t EncodedLength() const { return byte_length_; }
+
+ void Encode(const unsigned char *from, void *to_void) const {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ uint64_t cur = 0;
+ for (const Entry *i = entries_.begin(); i != entries_.end(); ++i, ++from) {
+ if (UTIL_UNLIKELY(i->next)) {
+ std::memcpy(to, &cur, sizeof(uint64_t));
+ to += sizeof(uint64_t);
+ cur = 0;
+ }
+ cur |= static_cast<uint64_t>(*from) << i->shift;
+ }
+ memcpy(to, &cur, overhang_);
+ }
+
+ void Decode(const void *from_void, unsigned char *to) const {
+ const uint8_t *from = static_cast<const uint8_t*>(from_void);
+ uint64_t cur = 0;
+ memcpy(&cur, from, first_copy_);
+ for (const Entry *i = entries_.begin(); i != entries_.end(); ++i, ++to) {
+ if (UTIL_UNLIKELY(i->next)) {
+ from += sizeof(uint64_t);
+ cur = 0;
+ std::memcpy(&cur, from,
+ std::min<std::size_t>(sizeof(uint64_t), static_cast<const uint8_t*>(from_void) + byte_length_ - from));
+ }
+ *to = (cur >> i->shift) & i->mask;
+ }
+ }
+
+ private:
+ struct Entry {
+ bool next;
+ uint8_t shift;
+ uint64_t mask;
+ };
+ util::FixedArray<Entry> entries_;
+ std::size_t byte_length_;
+ std::size_t first_copy_;
+ std::size_t overhang_;
+};
+
+
+}} // namespaces
+
+#endif // LM_INTERPOLATE_BOUNDED_SEQUENCE_ENCODING_H
diff --git a/src/kenlm/lm/interpolate/bounded_sequence_encoding_test.cc b/src/kenlm/lm/interpolate/bounded_sequence_encoding_test.cc
new file mode 100644
index 0000000..2c4bbd9
--- /dev/null
+++ b/src/kenlm/lm/interpolate/bounded_sequence_encoding_test.cc
@@ -0,0 +1,86 @@
+#include "lm/interpolate/bounded_sequence_encoding.hh"
+
+#include "util/scoped.hh"
+
+#define BOOST_TEST_MODULE BoundedSequenceEncodingTest
+#include <boost/test/unit_test.hpp>
+
+namespace lm {
+namespace interpolate {
+namespace {
+
+void ExhaustiveTest(unsigned char *bound_begin, unsigned char *bound_end) {
+ BoundedSequenceEncoding enc(bound_begin, bound_end);
+ util::scoped_malloc backing(util::MallocOrThrow(enc.EncodedLength()));
+ std::vector<unsigned char> values(bound_end - bound_begin),
+ out(bound_end - bound_begin);
+ while (true) {
+ enc.Encode(&values[0], backing.get());
+ enc.Decode(backing.get(), &out[0]);
+ for (std::size_t i = 0; i != values.size(); ++i) {
+ BOOST_CHECK_EQUAL(values[i], out[i]);
+ }
+ for (std::size_t i = 0;; ++i) {
+ if (i == values.size()) return;
+ ++values[i];
+ if (values[i] < bound_begin[i]) break;
+ values[i] = 0;
+ }
+ }
+}
+
+void CheckEncodeDecode(unsigned char *bounds, unsigned char *input,
+ unsigned char *output, std::size_t len) {
+ BoundedSequenceEncoding encoder(bounds, bounds + len);
+ util::scoped_malloc backing(util::MallocOrThrow(encoder.EncodedLength()));
+
+ encoder.Encode(input, backing.get());
+ encoder.Decode(backing.get(), output);
+
+ for (std::size_t i = 0; i < len; ++i) {
+ BOOST_CHECK_EQUAL(input[i], output[i]);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(Exhaustive) {
+ unsigned char bounds[] = {5, 2, 3, 9, 7, 20, 8};
+ ExhaustiveTest(bounds, bounds + sizeof(bounds) / sizeof(unsigned char));
+}
+
+BOOST_AUTO_TEST_CASE(LessThan64) {
+ unsigned char bounds[] = {255, 255, 255, 255, 255, 255, 255, 3};
+ unsigned char input[] = {172, 183, 254, 187, 96, 87, 65, 2};
+ unsigned char output[] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+ std::size_t len = sizeof(bounds) / sizeof(unsigned char);
+ assert(sizeof(input) / sizeof(unsigned char) == len);
+ assert(sizeof(output) / sizeof(unsigned char) == len);
+
+ CheckEncodeDecode(bounds, input, output, len);
+}
+
+BOOST_AUTO_TEST_CASE(Exactly64) {
+ unsigned char bounds[] = {255, 255, 255, 255, 255, 255, 255, 255};
+ unsigned char input[] = {172, 183, 254, 187, 96, 87, 65, 16};
+ unsigned char output[] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+ std::size_t len = sizeof(bounds) / sizeof(unsigned char);
+ assert(sizeof(input) / sizeof(unsigned char) == len);
+ assert(sizeof(output) / sizeof(unsigned char) == len);
+
+ CheckEncodeDecode(bounds, input, output, len);
+}
+
+BOOST_AUTO_TEST_CASE(MoreThan64) {
+ unsigned char bounds[] = {255, 255, 255, 255, 255, 255, 255, 255, 255};
+ unsigned char input[] = {172, 183, 254, 187, 96, 87, 65, 16, 137};
+ unsigned char output[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ std::size_t len = sizeof(bounds) / sizeof(unsigned char);
+ assert(sizeof(input) / sizeof(unsigned char) == len);
+ assert(sizeof(output) / sizeof(unsigned char) == len);
+
+ CheckEncodeDecode(bounds, input, output, len);
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/interpolate/enumerate_global_vocab.cc b/src/kenlm/lm/interpolate/enumerate_global_vocab.cc
new file mode 100644
index 0000000..26f63c8
--- /dev/null
+++ b/src/kenlm/lm/interpolate/enumerate_global_vocab.cc
@@ -0,0 +1,48 @@
+
+#include "lm/interpolate/enumerate_global_vocab.hh"
+
+#include <iostream>
+#include <map>
+
+namespace lm {
+//constructor
+
+ EnumerateGlobalVocab::EnumerateGlobalVocab(std::map<std::string, int*> * vm, int nm) {
+
+ vmap = vm;
+ num_models = nm;
+ cur_model = 0; //blah
+ cnt = 0;
+ std::cerr << "Vocab Builder with models: " << nm << std::endl;
+ }
+
+ void EnumerateGlobalVocab::Add(WordIndex index, const StringPiece &str) {
+
+ std::string st = str.as_string();
+
+ //check for existence of key
+ std::map<std::string, int*>::iterator itr = vmap->find(st);
+
+ //put stuff
+ if(itr != vmap->end()) {
+ std::cerr << "Vocab exist: " << str << " M: " << cur_model << " I:" << index << std::endl;
+ itr->second[cur_model] = index;
+ }
+ //new key
+ else {
+
+ //create model index map for this vocab word
+ //init to 0, 0 is UNK
+ int * indices = new int[num_models];
+ memset(indices, 0, (sizeof(int)*num_models)); //this still legit?
+
+ indices[cur_model] = index;
+ (*vmap)[st] = indices;
+ std::cerr << cnt << ":Vocab add: " << str << " M: " << cur_model << " I:" << index << std::endl;
+ cnt++;
+ }
+
+
+ }
+
+}
diff --git a/src/kenlm/lm/interpolate/enumerate_global_vocab.hh b/src/kenlm/lm/interpolate/enumerate_global_vocab.hh
new file mode 100644
index 0000000..c37d649
--- /dev/null
+++ b/src/kenlm/lm/interpolate/enumerate_global_vocab.hh
@@ -0,0 +1,38 @@
+#ifndef LM_ENUMERATE_GLOBAL_VOCAB_H
+#define LM_ENUMERATE_GLOBAL_VOCAB_H
+
+#include "lm/enumerate_vocab.hh"
+#include <map>
+
+/* Use this to create a global vocab across models for use when
+ * calculating lambdas for interpolation. Or other stuff.
+ */
+namespace lm {
+
+ class EnumerateGlobalVocab : EnumerateVocab {
+
+ public:
+
+ //yes, ugly...
+ std::map<std::string, int*> * vmap;
+ int num_models;
+ int cur_model;
+ int cnt; //stupid
+
+ ~EnumerateGlobalVocab() {}
+
+ void Add(WordIndex index, const StringPiece & str);
+
+ EnumerateGlobalVocab(std::map<std::string, int*> *, int);
+
+ void SetCurModel(int i) { cur_model = i; }
+
+ protected:
+ EnumerateGlobalVocab() {}
+
+ };
+
+} //namespace lm
+
+#endif // LM_ENUMERATE_GLOBAL_VOCAB_H
+
diff --git a/src/kenlm/lm/interpolate/interpolate_info.hh b/src/kenlm/lm/interpolate/interpolate_info.hh
new file mode 100644
index 0000000..ebecd92
--- /dev/null
+++ b/src/kenlm/lm/interpolate/interpolate_info.hh
@@ -0,0 +1,35 @@
+#ifndef KENLM_INTERPOLATE_INTERPOLATE_INFO_H
+#define KENLM_INTERPOLATE_INTERPOLATE_INFO_H
+
+#include <cstddef>
+#include <vector>
+#include <stdint.h>
+
+namespace lm {
+namespace interpolate {
+
+/**
+ * Stores relevant info for interpolating several language models, for use
+ * during the three-pass offline log-linear interpolation algorithm.
+ */
+struct InterpolateInfo {
+ /**
+ * @return the number of models being interpolated
+ */
+ std::size_t Models() const {
+ return orders.size();
+ }
+
+ /**
+ * The lambda (interpolation weight) for each model.
+ */
+ std::vector<float> lambdas;
+
+ /**
+ * The maximum ngram order for each model.
+ */
+ std::vector<uint8_t> orders;
+};
+}
+}
+#endif
diff --git a/src/kenlm/lm/interpolate/interpolate_main.cc b/src/kenlm/lm/interpolate/interpolate_main.cc
new file mode 100644
index 0000000..a99b62d
--- /dev/null
+++ b/src/kenlm/lm/interpolate/interpolate_main.cc
@@ -0,0 +1,37 @@
+#include "lm/common/model_buffer.hh"
+#include "lm/common/size_option.hh"
+#include "lm/interpolate/pipeline.hh"
+#include "util/fixed_array.hh"
+#include "util/usage.hh"
+
+#include <boost/program_options.hpp>
+
+#include <iostream>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+ lm::interpolate::Config config;
+ std::vector<std::string> input_models;
+ namespace po = boost::program_options;
+ po::options_description options("Log-linear interpolation options");
+ options.add_options()
+ ("lambda,w", po::value<std::vector<float> >(&config.lambdas)->multitoken()->required(), "Interpolation weights")
+ ("model,m", po::value<std::vector<std::string> >(&input_models)->multitoken()->required(), "Models to interpolate")
+ ("temp_prefix,T", po::value<std::string>(&config.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
+ ("memory,S", lm::SizeOption(config.sort.total_memory, util::GuessPhysicalMemory() ? "50%" : "1G"), "Sorting memory")
+ ("sort_block", lm::SizeOption(config.sort.buffer_size, "64M"), "Block size");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+ po::notify(vm);
+
+ if (config.lambdas.size() != input_models.size()) {
+ std::cerr << "Number of models " << input_models.size() << " should match the number of weights" << config.lambdas.size() << "." << std::endl;
+ return 1;
+ }
+
+ util::FixedArray<lm::ModelBuffer> models(input_models.size());
+ for (std::size_t i = 0; i < input_models.size(); ++i) {
+ models.push_back(input_models[i]);
+ }
+ lm::interpolate::Pipeline(models, config, 1);
+}
diff --git a/src/kenlm/lm/interpolate/merge_probabilities.cc b/src/kenlm/lm/interpolate/merge_probabilities.cc
new file mode 100644
index 0000000..b6c949f
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_probabilities.cc
@@ -0,0 +1,285 @@
+#include "lm/interpolate/merge_probabilities.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/interpolate/bounded_sequence_encoding.hh"
+#include "lm/interpolate/interpolate_info.hh"
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+
+namespace lm {
+namespace interpolate {
+
+/**
+ * Helper to generate the BoundedSequenceEncoding used for writing the
+ * from values.
+ */
+BoundedSequenceEncoding MakeEncoder(const InterpolateInfo &info, uint8_t order) {
+ util::FixedArray<uint8_t> max_orders(info.orders.size());
+ for (std::size_t i = 0; i < info.orders.size(); ++i) {
+ max_orders.push_back(std::min(order, info.orders[i]));
+ }
+ return BoundedSequenceEncoding(max_orders.begin(), max_orders.end());
+}
+
+namespace {
+
+/**
+ * A simple wrapper class that holds information needed to read and write
+ * the ngrams of a particular order. This class has the memory needed to
+ * buffer the data needed for the recursive process of computing the
+ * probabilities and "from" values for each component model.
+ *
+ * "From" values indicate, for each model, what order (as an index, so -1)
+ * was backed off to in order to arrive at a probability. For example, if a
+ * 5-gram model (order index 4) backed off twice, we would write a 2.
+ */
+class NGramHandler {
+public:
+ NGramHandler(uint8_t order, const InterpolateInfo &ifo,
+ util::FixedArray<util::stream::ChainPositions> &models_by_order)
+ : info(ifo),
+ encoder(MakeEncoder(info, order)),
+ out_record(order, encoder.EncodedLength()) {
+ std::size_t count_has_order = 0;
+ for (std::size_t i = 0; i < models_by_order.size(); ++i) {
+ count_has_order += (models_by_order[i].size() >= order);
+ }
+ inputs_.Init(count_has_order);
+ for (std::size_t i = 0; i < models_by_order.size(); ++i) {
+ if (models_by_order[i].size() < order)
+ continue;
+ inputs_.push_back(models_by_order[i][order - 1]);
+ if (inputs_.back()) {
+ active_.resize(active_.size() + 1);
+ active_.back().model = i;
+ active_.back().stream = &inputs_.back();
+ }
+ }
+
+ // have to init outside since NGramStreams doesn't forward to
+ // GenericStreams ctor given a ChainPositions
+
+ probs.Init(info.Models());
+ from.Init(info.Models());
+ for (std::size_t i = 0; i < info.Models(); ++i) {
+ probs.push_back(0.0);
+ from.push_back(0);
+ }
+ }
+
+ struct StreamIndex {
+ NGramStream<ProbBackoff> *stream;
+ NGramStream<ProbBackoff> &Stream() { return *stream; }
+ std::size_t model;
+ };
+
+ std::size_t ActiveSize() const {
+ return active_.size();
+ }
+
+ /**
+ * @return the input stream for a particular model that corresponds to
+ * this ngram order
+ */
+ StreamIndex &operator[](std::size_t idx) {
+ return active_[idx];
+ }
+
+ void erase(std::size_t idx) {
+ active_.erase(active_.begin() + idx);
+ }
+
+ const InterpolateInfo &info;
+ BoundedSequenceEncoding encoder;
+ PartialProbGamma out_record;
+ util::FixedArray<float> probs;
+ util::FixedArray<uint8_t> from;
+
+private:
+ std::vector<StreamIndex> active_;
+ NGramStreams<ProbBackoff> inputs_;
+};
+
+/**
+ * A collection of NGramHandlers.
+ */
+class NGramHandlers : public util::FixedArray<NGramHandler> {
+public:
+ explicit NGramHandlers(std::size_t num)
+ : util::FixedArray<NGramHandler>(num) {
+ }
+
+ void push_back(
+ std::size_t order, const InterpolateInfo &info,
+ util::FixedArray<util::stream::ChainPositions> &models_by_order) {
+ new (end()) NGramHandler(order, info, models_by_order);
+ Constructed();
+ }
+};
+
+/**
+ * The recursive helper function that computes probability and "from"
+ * values for all ngrams matching a particular suffix.
+ *
+ * The current order can be computed as the suffix length + 1. Note that
+ * the suffix could be empty (suffix_begin == suffix_end == NULL), in which
+ * case we are handling unigrams with the UNK token as the fallback
+ * probability.
+ *
+ * @param handlers The full collection of handlers
+ * @param suffix_begin A start iterator for the suffix
+ * @param suffix_end An end iterator for the suffix
+ * @param fallback_probs The probabilities of this ngram if we need to
+ * back off (that is, the probability of the suffix)
+ * @param fallback_from The order that the corresponding fallback
+ * probability in the fallback_probs is from
+ * @param combined_fallback interpolated fallback_probs
+ * @param outputs The output streams, one for each order
+ */
+void HandleSuffix(NGramHandlers &handlers, WordIndex *suffix_begin,
+ WordIndex *suffix_end,
+ const util::FixedArray<float> &fallback_probs,
+ const util::FixedArray<uint8_t> &fallback_from,
+ float combined_fallback,
+ util::stream::Streams &outputs) {
+ uint8_t order = std::distance(suffix_begin, suffix_end) + 1;
+ if (order > outputs.size()) return;
+
+ util::stream::Stream &output = outputs[order - 1];
+ NGramHandler &handler = handlers[order - 1];
+
+ while (true) {
+ // find the next smallest ngram which matches our suffix
+ // TODO: priority queue driven.
+ WordIndex *minimum = NULL;
+ for (std::size_t i = 0; i < handler.ActiveSize(); ++i) {
+ if (!std::equal(suffix_begin, suffix_end, handler[i].Stream()->begin() + 1))
+ continue;
+
+ // if we either haven't set a minimum yet or this one is smaller than
+ // the minimum we found before, replace it
+ WordIndex *last = handler[i].Stream()->begin();
+ if (!minimum || *last < *minimum) { minimum = handler[i].Stream()->begin(); }
+ }
+
+ // no more ngrams of this order match our suffix, so we're done
+ if (!minimum) return;
+
+ handler.out_record.ReBase(output.Get());
+ std::copy(minimum, minimum + order, handler.out_record.begin());
+
+ // Default case is having backed off.
+ std::copy(fallback_probs.begin(), fallback_probs.end(), handler.probs.begin());
+ std::copy(fallback_from.begin(), fallback_from.end(), handler.from.begin());
+
+ for (std::size_t i = 0; i < handler.ActiveSize();) {
+ if (std::equal(handler.out_record.begin(), handler.out_record.end(),
+ handler[i].Stream()->begin())) {
+ handler.probs[handler[i].model] = handler.info.lambdas[handler[i].model] * handler[i].Stream()->Value().prob;
+ handler.from[handler[i].model] = order - 1;
+ if (++handler[i].Stream()) {
+ ++i;
+ } else {
+ handler.erase(i);
+ }
+ } else {
+ ++i;
+ }
+ }
+ handler.out_record.Prob() = std::accumulate(handler.probs.begin(), handler.probs.end(), 0.0);
+ handler.out_record.LowerProb() = combined_fallback;
+ handler.encoder.Encode(handler.from.begin(),
+ handler.out_record.FromBegin());
+
+ // we've handled this particular ngram, so now recurse to the higher
+ // order using the current ngram as the suffix
+ HandleSuffix(handlers, handler.out_record.begin(), handler.out_record.end(),
+ handler.probs, handler.from, handler.out_record.Prob(), outputs);
+ // consume the output
+ ++output;
+ }
+}
+
+/**
+ * Kicks off the recursion for computing the probabilities and "from"
+ * values for each ngram order. We begin by handling the UNK token that
+ * should be at the front of each of the unigram input streams. This is
+ * then output to the stream and it is used as the fallback for handling
+ * our unigram case, the unigram used as the fallback for the bigram case,
+ * etc.
+ */
+void HandleNGrams(NGramHandlers &handlers, util::stream::Streams &outputs) {
+ PartialProbGamma unk_record(1, 0);
+ // First: populate the unk probabilities by reading the first unigram
+ // from each stream
+ util::FixedArray<float> unk_probs(handlers[0].info.Models());
+
+ // start by populating the ngram id from the first stream
+ lm::NGram<ProbBackoff> ngram = *handlers[0][0].Stream();
+ unk_record.ReBase(outputs[0].Get());
+ std::copy(ngram.begin(), ngram.end(), unk_record.begin());
+ unk_record.Prob() = 0;
+
+ // then populate the probabilities into unk_probs while "multiply" the
+ // model probabilities together into the unk record
+ //
+ // note that from doesn't need to be set for unigrams
+ assert(handlers[0].ActiveSize() == handlers[0].info.Models());
+ for (std::size_t i = 0; i < handlers[0].info.Models();) {
+ ngram = *handlers[0][i].Stream();
+ unk_probs.push_back(handlers[0].info.lambdas[i] * ngram.Value().prob);
+ unk_record.Prob() += unk_probs[i];
+ assert(*ngram.begin() == kUNK);
+ if (++handlers[0][i].Stream()) {
+ ++i;
+ } else {
+ handlers[0].erase(i);
+ }
+ }
+ float unk_combined = unk_record.Prob();
+ unk_record.LowerProb() = unk_combined;
+ // flush the unk output record
+ ++outputs[0];
+
+ // Then, begin outputting everything in lexicographic order: first we'll
+ // get the unigram then the first bigram with that context, then the
+ // first trigram with that bigram context, etc., until we exhaust all of
+ // the ngrams, then all of the (n-1)grams, etc.
+ //
+ // This function is the "root" of this recursive process.
+ util::FixedArray<uint8_t> unk_from(handlers[0].info.Models());
+ for (std::size_t i = 0; i < handlers[0].info.Models(); ++i) {
+ unk_from.push_back(0);
+ }
+
+ // the two nulls are to encode that our "fallback" word is the "0-gram"
+ // case, e.g. we "backed off" to UNK
+ // TODO: stop generating vocab ids and LowerProb for unigrams.
+ HandleSuffix(handlers, NULL, NULL, unk_probs, unk_from, unk_combined, outputs);
+
+ // Verify we reached the end. And poison!
+ for (std::size_t i = 0; i < handlers.size(); ++i) {
+ UTIL_THROW_IF2(handlers[i].ActiveSize(),
+ "MergeProbabilities did not exhaust all ngram streams");
+ outputs[i].Poison();
+ }
+}
+}
+
+void MergeProbabilities(
+ const InterpolateInfo &info,
+ util::FixedArray<util::stream::ChainPositions> &models_by_order,
+ util::stream::Chains &output_chains) {
+ NGramHandlers handlers(output_chains.size());
+ for (std::size_t i = 0; i < output_chains.size(); ++i) {
+ handlers.push_back(i + 1, info, models_by_order);
+ }
+
+ util::stream::ChainPositions output_pos(output_chains);
+ util::stream::Streams outputs(output_pos);
+
+ HandleNGrams(handlers, outputs);
+}
+}
+}
diff --git a/src/kenlm/lm/interpolate/merge_probabilities.hh b/src/kenlm/lm/interpolate/merge_probabilities.hh
new file mode 100644
index 0000000..59b8046
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_probabilities.hh
@@ -0,0 +1,89 @@
+#ifndef LM_INTERPOLATE_MERGE_PROBABILITIES_H
+#define LM_INTERPOLATE_MERGE_PROBABILITIES_H
+
+#include "lm/common/ngram.hh"
+#include "lm/interpolate/bounded_sequence_encoding.hh"
+#include "util/fixed_array.hh"
+#include "util/stream/multi_stream.hh"
+
+#include <stdint.h>
+
+namespace lm {
+namespace interpolate {
+
+struct InterpolateInfo;
+
+/**
+ * Make the encoding of backoff values for a given order. This stores values
+ * in [PartialProbGamma::FromBegin(), PartialProbGamma::FromEnd())
+ */
+BoundedSequenceEncoding MakeEncoder(const InterpolateInfo &info, uint8_t order);
+
+/**
+ * The first pass for the offline log-linear interpolation algorithm. This
+ * reads K **suffix-ordered** streams for each model, for each order, of
+ * ngram records (ngram-id, prob, backoff). It further assumes that the
+ * ngram-ids have been unified over all of the stream inputs.
+ *
+ * Its output is records of (ngram-id, prob-prod, backoff-level,
+ * backoff-level, ...) where the backoff-levels (of which there are K) are
+ * the context length (0 for unigrams) that the corresponding model had to
+ * back off to in order to obtain a probability for that ngram-id. Each of
+ * these streams is terminated with a record whose ngram-id is all
+ * maximum-integers for simplicity in implementation here.
+ *
+ * @param models An array of length N (max_i N_i) containing at
+ * the ChainPositions for the streams for order (i + 1).
+ * @param output_chains The output chains for each order (of length K)
+ */
+void MergeProbabilities(
+ const InterpolateInfo &info,
+ util::FixedArray<util::stream::ChainPositions> &models_by_order,
+ util::stream::Chains &output_chains);
+
+/**
+ * This class represents the output payload for this pass, which consists
+ * of an ngram-id, a probability, and then a vector of orders from which
+ * each of the component models backed off to for this ngram, encoded
+ * using the BoundedSequenceEncoding class.
+ */
+class PartialProbGamma : public lm::NGramHeader {
+public:
+ PartialProbGamma(std::size_t order, std::size_t backoff_bytes)
+ : lm::NGramHeader(NULL, order), backoff_bytes_(backoff_bytes) {
+ // nothing
+ }
+
+ std::size_t TotalSize() const {
+ return sizeof(WordIndex) * Order() + sizeof(After) + backoff_bytes_;
+ }
+
+ // TODO: cache bounded sequence encoding in the pipeline?
+ static std::size_t TotalSize(const InterpolateInfo &info, uint8_t order) {
+ return sizeof(WordIndex) * order + sizeof(After) + MakeEncoder(info, order).EncodedLength();
+ }
+
+ float &Prob() { return Pay().prob; }
+ float Prob() const { return Pay().prob; }
+
+ float &LowerProb() { return Pay().lower_prob; }
+ float LowerProb() const { return Pay().lower_prob; }
+
+ const uint8_t *FromBegin() const { return Pay().from; }
+ uint8_t *FromBegin() { return Pay().from; }
+
+private:
+ struct After {
+ // Note that backoff_and_normalize assumes this comes first.
+ float prob;
+ float lower_prob;
+ uint8_t from[];
+ };
+ const After &Pay() const { return *reinterpret_cast<const After *>(end()); }
+ After &Pay() { return *reinterpret_cast<After*>(end()); }
+
+ std::size_t backoff_bytes_;
+};
+
+}} // namespaces
+#endif // LM_INTERPOLATE_MERGE_PROBABILITIES_H
diff --git a/src/kenlm/lm/interpolate/merge_test/test1 b/src/kenlm/lm/interpolate/merge_test/test1
new file mode 100644
index 0000000..08d114d
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_test/test1
Binary files differ
diff --git a/src/kenlm/lm/interpolate/merge_test/test2 b/src/kenlm/lm/interpolate/merge_test/test2
new file mode 100644
index 0000000..fd3a380
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_test/test2
Binary files differ
diff --git a/src/kenlm/lm/interpolate/merge_test/test3 b/src/kenlm/lm/interpolate/merge_test/test3
new file mode 100644
index 0000000..6c89d7f
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_test/test3
Binary files differ
diff --git a/src/kenlm/lm/interpolate/merge_test/test_bad_order b/src/kenlm/lm/interpolate/merge_test/test_bad_order
new file mode 100644
index 0000000..d50a5e8
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_test/test_bad_order
Binary files differ
diff --git a/src/kenlm/lm/interpolate/merge_test/test_no_unk b/src/kenlm/lm/interpolate/merge_test/test_no_unk
new file mode 100644
index 0000000..fbcf12d
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_test/test_no_unk
@@ -0,0 +1 @@
+toto
diff --git a/src/kenlm/lm/interpolate/merge_vocab.cc b/src/kenlm/lm/interpolate/merge_vocab.cc
new file mode 100644
index 0000000..3e84389
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_vocab.cc
@@ -0,0 +1,131 @@
+#include "lm/interpolate/merge_vocab.hh"
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/interpolate/universal_vocab.hh"
+#include "lm/lm_exception.hh"
+#include "lm/vocab.hh"
+#include "util/file_piece.hh"
+
+#include <queue>
+#include <string>
+#include <iostream>
+#include <vector>
+
+namespace lm {
+namespace interpolate {
+namespace {
+
+class VocabFileReader {
+ public:
+ explicit VocabFileReader(const int fd, size_t model_num, uint64_t offset = 0);
+
+ VocabFileReader &operator++();
+ operator bool() const { return !eof_; }
+ uint64_t operator*() const { return Value(); }
+
+ uint64_t Value() const { return hash_value_; }
+ size_t ModelNum() const { return model_num_; }
+ WordIndex CurrentIndex() const { return current_index_; }
+
+ StringPiece Word() const { return word_; }
+
+ private:
+ uint64_t hash_value_;
+ WordIndex current_index_;
+ bool eof_;
+ size_t model_num_;
+ StringPiece word_;
+ util::FilePiece file_piece_;
+};
+
+VocabFileReader::VocabFileReader(const int fd, const size_t model_num, uint64_t offset) :
+ hash_value_(0),
+ current_index_(0),
+ eof_(false),
+ model_num_(model_num),
+ file_piece_(fd) {
+ word_ = file_piece_.ReadLine('\0');
+ UTIL_THROW_IF(word_ != "<unk>",
+ FormatLoadException,
+ "Vocabulary words are in the wrong place.");
+ // setup to initial value
+ ++*this;
+}
+
+VocabFileReader &VocabFileReader::operator++() {
+ try {
+ word_ = file_piece_.ReadLine('\0');
+ } catch(util::EndOfFileException &e) {
+ eof_ = true;
+ return *this;
+ }
+ uint64_t prev_hash_value = hash_value_;
+ hash_value_ = ngram::detail::HashForVocab(word_.data(), word_.size());
+
+ // hash values should be monotonically increasing
+ UTIL_THROW_IF(hash_value_ < prev_hash_value, FormatLoadException,
+ ": word index not monotonically increasing."
+ << " model_num: " << model_num_
+ << " prev hash: " << prev_hash_value
+ << " new hash: " << hash_value_);
+
+ ++current_index_;
+ return *this;
+}
+
+class CompareFiles {
+public:
+ bool operator()(const VocabFileReader* x,
+ const VocabFileReader* y)
+ { return x->Value() > y->Value(); }
+};
+
+class Readers : public util::FixedArray<VocabFileReader> {
+ public:
+ Readers(std::size_t number) : util::FixedArray<VocabFileReader>(number) {}
+ void push_back(int fd, std::size_t i) {
+ new(end()) VocabFileReader(fd, i);
+ Constructed();
+ }
+};
+
+} // namespace
+
+WordIndex MergeVocab(util::FixedArray<util::scoped_fd> &files, UniversalVocab &vocab, EnumerateVocab &enumerate) {
+ typedef std::priority_queue<VocabFileReader*, std::vector<VocabFileReader*>, CompareFiles> HeapType;
+ HeapType heap;
+ Readers readers(files.size());
+ for (size_t i = 0; i < files.size(); ++i) {
+ readers.push_back(files[i].release(), i);
+ heap.push(&readers.back());
+ // initialize first index to 0 for <unk>
+ vocab.InsertUniversalIdx(i, 0, 0);
+ }
+
+ uint64_t prev_hash_value = 0;
+ // global_index starts with <unk> which is 0
+ WordIndex global_index = 0;
+
+ enumerate.Add(0, "<unk>");
+ while (!heap.empty()) {
+ VocabFileReader* top_vocab_file = heap.top();
+ if (top_vocab_file->Value() != prev_hash_value) {
+ enumerate.Add(++global_index, top_vocab_file->Word());
+ }
+ vocab.InsertUniversalIdx(top_vocab_file->ModelNum(),
+ top_vocab_file->CurrentIndex(),
+ global_index);
+
+ prev_hash_value = top_vocab_file->Value();
+
+ heap.pop();
+ if (++(*top_vocab_file)) {
+ heap.push(top_vocab_file);
+ }
+ }
+ return global_index + 1;
+}
+
+} // namespace interpolate
+} // namespace lm
+
diff --git a/src/kenlm/lm/interpolate/merge_vocab.hh b/src/kenlm/lm/interpolate/merge_vocab.hh
new file mode 100644
index 0000000..cc74d33
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_vocab.hh
@@ -0,0 +1,23 @@
+#ifndef LM_INTERPOLATE_MERGE_VOCAB_H
+#define LM_INTERPOLATE_MERGE_VOCAB_H
+
+#include "lm/word_index.hh"
+#include "util/file.hh"
+#include "util/fixed_array.hh"
+
+namespace lm {
+
+class EnumerateVocab;
+
+namespace interpolate {
+
+class UniversalVocab;
+
+// Takes ownership of vocab_files.
+// The combined vocabulary is enumerated with enumerate.
+// Returns the size of the combined vocabulary.
+WordIndex MergeVocab(util::FixedArray<util::scoped_fd> &vocab_files, UniversalVocab &vocab, EnumerateVocab &enumerate);
+
+}} // namespaces
+
+#endif // LM_INTERPOLATE_MERGE_VOCAB_H
diff --git a/src/kenlm/lm/interpolate/merge_vocab_test.cc b/src/kenlm/lm/interpolate/merge_vocab_test.cc
new file mode 100644
index 0000000..6df25b2
--- /dev/null
+++ b/src/kenlm/lm/interpolate/merge_vocab_test.cc
@@ -0,0 +1,126 @@
+#define BOOST_TEST_MODULE InterpolateMergeVocabTest
+#include <boost/test/unit_test.hpp>
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/interpolate/merge_vocab.hh"
+#include "lm/interpolate/universal_vocab.hh"
+#include "lm/lm_exception.hh"
+#include "lm/vocab.hh"
+#include "lm/word_index.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+
+#include <cstring>
+
+namespace lm {
+namespace interpolate {
+namespace {
+
+// Stupid bjam permutes the command line arguments randomly.
+class TestFiles {
+ public:
+ TestFiles() {
+ char **argv = boost::unit_test::framework::master_test_suite().argv;
+ int argc = boost::unit_test::framework::master_test_suite().argc;
+ BOOST_REQUIRE_EQUAL(6, argc);
+ for (int i = 1; i < argc; ++i) {
+ EndsWithAssign(argv[i], "test1", test[0]);
+ EndsWithAssign(argv[i], "test2", test[1]);
+ EndsWithAssign(argv[i], "test3", test[2]);
+ EndsWithAssign(argv[i], "no_unk", no_unk);
+ EndsWithAssign(argv[i], "bad_order", bad_order);
+ }
+ }
+
+ void EndsWithAssign(char *arg, StringPiece value, util::scoped_fd &to) {
+ StringPiece str(arg);
+ if (str.size() < value.size()) return;
+ if (std::memcmp(str.data() + str.size() - value.size(), value.data(), value.size())) return;
+ to.reset(util::OpenReadOrThrow(arg));
+ }
+
+ util::scoped_fd test[3], no_unk, bad_order;
+};
+
+class DoNothingEnumerate : public EnumerateVocab {
+ public:
+ void Add(WordIndex, const StringPiece &) {}
+};
+
+BOOST_AUTO_TEST_CASE(MergeVocabTest) {
+ TestFiles files;
+
+ util::FixedArray<util::scoped_fd> used_files(3);
+ used_files.push_back(files.test[0].release());
+ used_files.push_back(files.test[1].release());
+ used_files.push_back(files.test[2].release());
+
+ std::vector<lm::WordIndex> model_max_idx;
+ model_max_idx.push_back(10);
+ model_max_idx.push_back(10);
+ model_max_idx.push_back(10);
+
+ util::scoped_fd combined(util::MakeTemp("temporary"));
+
+ UniversalVocab universal_vocab(model_max_idx);
+ {
+ ngram::ImmediateWriteWordsWrapper writer(NULL, combined.get(), 0);
+ MergeVocab(used_files, universal_vocab, writer);
+ }
+
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(0, 0), 0);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(1, 0), 0);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(2, 0), 0);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(0, 1), 1);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(1, 1), 2);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(2, 1), 8);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(0, 5), 11);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(1, 3), 4);
+ BOOST_CHECK_EQUAL(universal_vocab.GetUniversalIdx(2, 3), 10);
+
+ util::FilePiece f(combined.release());
+ BOOST_CHECK_EQUAL("<unk>", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("a", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("is this", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("this a", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("first cut", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("this", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("a first", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("cut", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("is", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("i", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("secd", f.ReadLine('\0'));
+ BOOST_CHECK_EQUAL("first", f.ReadLine('\0'));
+ BOOST_CHECK_THROW(f.ReadLine('\0'), util::EndOfFileException);
+}
+
+BOOST_AUTO_TEST_CASE(MergeVocabNoUnkTest) {
+ TestFiles files;
+ util::FixedArray<util::scoped_fd> used_files(1);
+ used_files.push_back(files.no_unk.release());
+
+ std::vector<lm::WordIndex> model_max_idx;
+ model_max_idx.push_back(10);
+
+ UniversalVocab universal_vocab(model_max_idx);
+ DoNothingEnumerate nothing;
+ BOOST_CHECK_THROW(MergeVocab(used_files, universal_vocab, nothing), FormatLoadException);
+}
+
+BOOST_AUTO_TEST_CASE(MergeVocabWrongOrderTest) {
+ TestFiles files;
+
+ util::FixedArray<util::scoped_fd> used_files(2);
+ used_files.push_back(files.test[0].release());
+ used_files.push_back(files.bad_order.release());
+
+ std::vector<lm::WordIndex> model_max_idx;
+ model_max_idx.push_back(10);
+ model_max_idx.push_back(10);
+
+ lm::interpolate::UniversalVocab universal_vocab(model_max_idx);
+ DoNothingEnumerate nothing;
+ BOOST_CHECK_THROW(MergeVocab(used_files, universal_vocab, nothing), FormatLoadException);
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/interpolate/normalize.cc b/src/kenlm/lm/interpolate/normalize.cc
new file mode 100644
index 0000000..f683f10
--- /dev/null
+++ b/src/kenlm/lm/interpolate/normalize.cc
@@ -0,0 +1,384 @@
+#include "lm/interpolate/normalize.hh"
+
+#include "lm/common/compare.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/interpolate/backoff_matrix.hh"
+#include "lm/interpolate/bounded_sequence_encoding.hh"
+#include "lm/interpolate/interpolate_info.hh"
+#include "lm/interpolate/merge_probabilities.hh"
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/fixed_array.hh"
+#include "util/scoped.hh"
+#include "util/stream/stream.hh"
+#include "util/stream/rewindable_stream.hh"
+
+#include <functional>
+#include <queue>
+#include <vector>
+
+namespace lm { namespace interpolate {
+namespace {
+
+struct SuffixLexicographicLess : public std::binary_function<NGramHeader, NGramHeader, bool> {
+ bool operator()(const NGramHeader first, const NGramHeader second) const {
+ for (const WordIndex *f = first.end() - 1, *s = second.end() - 1; f >= first.begin() && s >= second.begin(); --f, --s) {
+ if (*f < *s) return true;
+ if (*f > *s) return false;
+ }
+ return first.size() < second.size();
+ }
+};
+
+class BackoffQueueEntry {
+ public:
+ BackoffQueueEntry(float &entry, const util::stream::ChainPosition &position)
+ : entry_(entry), stream_(position) {
+ entry_ = 0.0;
+ }
+
+ operator bool() const { return stream_; }
+
+ NGramHeader operator*() const { return *stream_; }
+ const NGramHeader *operator->() const { return &*stream_; }
+
+ void Enter() {
+ entry_ = stream_->Value().backoff;
+ }
+
+ BackoffQueueEntry &Next() {
+ entry_ = 0.0;
+ ++stream_;
+ return *this;
+ }
+
+ private:
+ float &entry_;
+ NGramStream<ProbBackoff> stream_;
+};
+
+struct PtrGreater : public std::binary_function<const BackoffQueueEntry *, const BackoffQueueEntry *, bool> {
+ bool operator()(const BackoffQueueEntry *first, const BackoffQueueEntry *second) const {
+ return SuffixLexicographicLess()(**second, **first);
+ }
+};
+
+class EntryOwner : public util::FixedArray<BackoffQueueEntry> {
+ public:
+ void push_back(float &entry, const util::stream::ChainPosition &position) {
+ new (end()) BackoffQueueEntry(entry, position);
+ Constructed();
+ }
+};
+
+std::size_t MaxOrder(const util::FixedArray<util::stream::ChainPositions> &model) {
+ std::size_t ret = 0;
+ for (const util::stream::ChainPositions *m = model.begin(); m != model.end(); ++m) {
+ ret = std::max(ret, m->size());
+ }
+ return ret;
+}
+
+class BackoffManager {
+ public:
+ explicit BackoffManager(const util::FixedArray<util::stream::ChainPositions> &models)
+ : entered_(MaxOrder(models)), matrix_(models.size(), MaxOrder(models)), skip_write_(MaxOrder(models)) {
+ std::size_t total = 0;
+ for (const util::stream::ChainPositions *m = models.begin(); m != models.end(); ++m) {
+ total += m->size();
+ }
+ for (std::size_t i = 0; i < MaxOrder(models); ++i) {
+ entered_.push_back(models.size());
+ }
+ owner_.Init(total);
+ for (const util::stream::ChainPositions *m = models.begin(); m != models.end(); ++m) {
+ for (const util::stream::ChainPosition *j = m->begin(); j != m->end(); ++j) {
+ owner_.push_back(matrix_.Backoff(m - models.begin(), j - m->begin()), *j);
+ if (owner_.back()) {
+ queue_.push(&owner_.back());
+ }
+ }
+ }
+ }
+
+ void SetupSkip(std::size_t order, util::stream::Stream &stream) {
+ skip_write_[order - 2] = &stream;
+ }
+
+ // Move up the backoffs for the given n-gram. The n-grams must be provided
+ // in suffix lexicographic order.
+ void Enter(const NGramHeader &to) {
+ // Check that we exited properly.
+ for (std::size_t i = to.Order() - 1; i < entered_.size(); ++i) {
+ assert(entered_[i].empty());
+ }
+ SuffixLexicographicLess less;
+ while (!queue_.empty() && less(**queue_.top(), to))
+ SkipRecord();
+ while (TopMatches(to)) {
+ BackoffQueueEntry *matches = queue_.top();
+ entered_[to.Order() - 1].push_back(matches);
+ matches->Enter();
+ queue_.pop();
+ }
+ }
+
+ void Exit(std::size_t order_minus_1) {
+ for (BackoffQueueEntry **i = entered_[order_minus_1].begin(); i != entered_[order_minus_1].end(); ++i) {
+ if ((*i)->Next())
+ queue_.push(*i);
+ }
+ entered_[order_minus_1].clear();
+ }
+
+ float Get(std::size_t model, std::size_t order_minus_1) const {
+ return matrix_.Backoff(model, order_minus_1);
+ }
+
+ void Finish() {
+ while (!queue_.empty())
+ SkipRecord();
+ }
+
+ private:
+ void SkipRecord() {
+ BackoffQueueEntry *top = queue_.top();
+ queue_.pop();
+ // Is this the last instance of the n-gram?
+ if (!TopMatches(**top)) {
+ // An n-gram is being skipped. Called once per skipped n-gram,
+ // regardless of how many models it comes from.
+ *reinterpret_cast<float*>(skip_write_[(*top)->Order() - 1]->Get()) = 0.0;
+ ++*skip_write_[(*top)->Order() - 1];
+ }
+ if (top->Next())
+ queue_.push(top);
+ }
+
+ bool TopMatches(const NGramHeader &header) const {
+ return !queue_.empty() && (*queue_.top())->Order() == header.Order() && std::equal(header.begin(), header.end(), (*queue_.top())->begin());
+ }
+
+ EntryOwner owner_;
+ std::priority_queue<BackoffQueueEntry*, std::vector<BackoffQueueEntry*>, PtrGreater> queue_;
+
+ // Indexed by order then just all the matching models.
+ util::FixedArray<util::FixedArray<BackoffQueueEntry*> > entered_;
+
+ std::size_t order_;
+
+ BackoffMatrix matrix_;
+
+ std::vector<util::stream::Stream*> skip_write_;
+};
+
+typedef long double Accum;
+
+// Handles n-grams of the same order, using recursion to call another instance
+// for higher orders.
+class Recurse {
+ public:
+ Recurse(
+ const InterpolateInfo &info, // Must stay alive the entire time.
+ std::size_t order,
+ const util::stream::ChainPosition &merged_probs,
+ const util::stream::ChainPosition &prob_out,
+ const util::stream::ChainPosition &backoff_out,
+ BackoffManager &backoffs,
+ Recurse *higher) // higher is null for the highest order.
+ : order_(order),
+ encoding_(MakeEncoder(info, order)),
+ input_(merged_probs, PartialProbGamma(order, encoding_.EncodedLength())),
+ prob_out_(prob_out),
+ backoff_out_(backoff_out),
+ backoffs_(backoffs),
+ lambdas_(&*info.lambdas.begin()),
+ higher_(higher),
+ decoded_backoffs_(info.Models()),
+ extended_context_(order - 1) {
+ // This is only for bigrams and above. Summing unigrams is a much easier case.
+ assert(order >= 2);
+ }
+
+ // context = w_1^{n-1}
+ // z_lower = Z(w_2^{n-1})
+ // Input:
+ // Merged probabilities without backoff applied in input_.
+ // Backoffs via backoffs_.
+ // Calculates:
+ // Z(w_1^{n-1}): intermediate only.
+ // p_I(x | w_1^{n-1}) for all x: w_1^{n-1}x exists: Written to prob_out_.
+ // b_I(w_1^{n-1}): Written to backoff_out_.
+ void SameContext(const NGramHeader &context, Accum z_lower) {
+ assert(context.size() == order_ - 1);
+ backoffs_.Enter(context);
+ prob_out_.Mark();
+
+ // This is the backoff term that applies when one assumes everything backs off:
+ // \prod_i b_i(w_1^{n-1})^{\lambda_i}.
+ Accum backoff_once = 0.0;
+ for (std::size_t m = 0; m < decoded_backoffs_.size(); ++m) {
+ backoff_once += lambdas_[m] * backoffs_.Get(m, order_ - 2);
+ }
+
+ Accum z_delta = 0.0;
+ std::size_t count = 0;
+ for (; input_ && std::equal(context.begin(), context.end(), input_->begin()); ++input_, ++prob_out_, ++count) {
+ // Apply backoffs to probabilities.
+ // TODO: change bounded sequence encoding to have an iterator for decoding instead of doing a copy here.
+ encoding_.Decode(input_->FromBegin(), &*decoded_backoffs_.begin());
+ for (std::size_t m = 0; m < NumModels(); ++m) {
+ // Apply the backoffs as instructed for model m.
+ float accumulated = 0.0;
+ // Change backoffs for [order it backed off to, order - 1) except
+ // with 0-indexing. There is still the potential to charge backoff
+ // for order - 1, which is done later. The backoffs charged here
+ // are b_m(w_{n-1}^{n-1}) ... b_m(w_2^{n-1})
+ for (unsigned char backed_to = decoded_backoffs_[m]; backed_to < order_ - 2; ++backed_to) {
+ accumulated += backoffs_.Get(m, backed_to);
+ }
+ float lambda = lambdas_[m];
+ // Lower p(x | w_2^{n-1}) gets all the backoffs except the highest.
+ input_->LowerProb() += accumulated * lambda;
+ // Charge the backoff b(w_1^{n-1}) if applicable, but only to attain p(x | w_1^{n-1})
+ if (decoded_backoffs_[m] < order_ - 1) {
+ accumulated += backoffs_.Get(m, order_ - 2);
+ }
+ input_->Prob() += accumulated * lambda;
+ }
+ // TODO: better precision/less operations here.
+ z_delta += pow(10.0, input_->Prob()) - pow(10.0, input_->LowerProb() + backoff_once);
+
+ // Write unnormalized probability record.
+ std::copy(input_->begin(), input_->end(), reinterpret_cast<WordIndex*>(prob_out_.Get()));
+ ProbWrite() = input_->Prob();
+ }
+ // TODO numerical precision.
+ Accum z = log10(pow(10.0, z_lower + backoff_once) + z_delta);
+
+ // Normalize.
+ prob_out_.Rewind();
+ for (std::size_t i = 0; i < count; ++i, ++prob_out_) {
+ ProbWrite() -= z;
+ }
+ // This allows the stream to release data.
+ prob_out_.Mark();
+
+ // Output backoff.
+ *reinterpret_cast<float*>(backoff_out_.Get()) = z_lower + backoff_once - z;
+ ++backoff_out_;
+
+ if (higher_.get())
+ higher_->ExtendContext(context, z);
+
+ backoffs_.Exit(order_ - 2);
+ }
+
+ // Call is given a context and z(context).
+ // Evaluates y context x for all y,x.
+ void ExtendContext(const NGramHeader &middle, Accum z_lower) {
+ assert(middle.size() == order_ - 2);
+ // Copy because the input will advance. TODO avoid this copy by sharing amongst classes.
+ std::copy(middle.begin(), middle.end(), extended_context_.begin() + 1);
+ while (input_ && std::equal(middle.begin(), middle.end(), input_->begin() + 1)) {
+ *extended_context_.begin() = *input_->begin();
+ SameContext(NGramHeader(&*extended_context_.begin(), order_ - 1), z_lower);
+ }
+ }
+
+ void Finish() {
+ assert(!input_);
+ prob_out_.Poison();
+ backoff_out_.Poison();
+ if (higher_.get())
+ higher_->Finish();
+ }
+
+ // The BackoffManager class also injects backoffs when it skips ahead e.g. b(</s>) = 1
+ util::stream::Stream &BackoffStream() { return backoff_out_; }
+
+ private:
+ // Write the probability to the correct place in prob_out_. Should use a proxy but currently incompatible with RewindableStream.
+ float &ProbWrite() {
+ return *reinterpret_cast<float*>(reinterpret_cast<uint8_t*>(prob_out_.Get()) + order_ * sizeof(WordIndex));
+ }
+
+ std::size_t NumModels() const { return decoded_backoffs_.size(); }
+
+ const std::size_t order_;
+
+ const BoundedSequenceEncoding encoding_;
+
+ ProxyStream<PartialProbGamma> input_;
+ util::stream::RewindableStream prob_out_;
+ util::stream::Stream backoff_out_;
+
+ BackoffManager &backoffs_;
+ const float *const lambdas_;
+
+ // Higher order instance of this same class.
+ util::scoped_ptr<Recurse> higher_;
+
+ // Temporary in SameContext.
+ std::vector<unsigned char> decoded_backoffs_;
+ // Temporary in ExtendContext.
+ std::vector<WordIndex> extended_context_;
+};
+
+class Thread {
+ public:
+ Thread(const InterpolateInfo &info, util::FixedArray<util::stream::ChainPositions> &models_by_order, util::stream::Chains &prob_out, util::stream::Chains &backoff_out)
+ : info_(info), models_by_order_(models_by_order), prob_out_(prob_out), backoff_out_(backoff_out) {}
+
+ void Run(const util::stream::ChainPositions &merged_probabilities) {
+ // Unigrams do not have enocded backoff info.
+ ProxyStream<PartialProbGamma> in(merged_probabilities[0], PartialProbGamma(1, 0));
+ util::stream::RewindableStream prob_write(prob_out_[0]);
+ Accum z = 0.0;
+ prob_write.Mark();
+ WordIndex count = 0;
+ for (; in; ++in, ++prob_write, ++count) {
+ // Note assumption that probabilitity comes first
+ memcpy(prob_write.Get(), in.Get(), sizeof(WordIndex) + sizeof(float));
+ z += pow(10.0, in->Prob());
+ }
+ // TODO HACK TODO: lmplz outputs p(<s>) = 1 to get q to compute nicely. That will always result in 1.0 more than it should be.
+ z -= 1.0;
+ float log_z = log10(z);
+ prob_write.Rewind();
+ // Normalize unigram probabilities.
+ for (WordIndex i = 0; i < count; ++i, ++prob_write) {
+ *reinterpret_cast<float*>(reinterpret_cast<uint8_t*>(prob_write.Get()) + sizeof(WordIndex)) -= log_z;
+ }
+ prob_write.Poison();
+
+ // Now setup the higher orders.
+ util::scoped_ptr<Recurse> higher_order;
+ BackoffManager backoffs(models_by_order_);
+ std::size_t max_order = merged_probabilities.size();
+ for (std::size_t order = max_order; order >= 2; --order) {
+ higher_order.reset(new Recurse(info_, order, merged_probabilities[order - 1], prob_out_[order - 1], backoff_out_[order - 2], backoffs, higher_order.release()));
+ backoffs.SetupSkip(order, higher_order->BackoffStream());
+ }
+ if (max_order > 1) {
+ higher_order->ExtendContext(NGramHeader(NULL, 0), log_z);
+ higher_order->Finish();
+ }
+ }
+
+ private:
+ const InterpolateInfo info_;
+ util::FixedArray<util::stream::ChainPositions> &models_by_order_;
+ util::stream::ChainPositions prob_out_;
+ util::stream::ChainPositions backoff_out_;
+};
+
+} // namespace
+
+void Normalize(const InterpolateInfo &info, util::FixedArray<util::stream::ChainPositions> &models_by_order, util::stream::Chains &merged_probabilities, util::stream::Chains &prob_out, util::stream::Chains &backoff_out) {
+ assert(prob_out.size() == backoff_out.size() + 1);
+ // Arbitrarily put the thread on the merged_probabilities Chains.
+ merged_probabilities >> Thread(info, models_by_order, prob_out, backoff_out);
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/interpolate/normalize.hh b/src/kenlm/lm/interpolate/normalize.hh
new file mode 100644
index 0000000..dd26b25
--- /dev/null
+++ b/src/kenlm/lm/interpolate/normalize.hh
@@ -0,0 +1,35 @@
+#ifndef LM_INTERPOLATE_NORMALIZE_H
+#define LM_INTERPOLATE_NORMALIZE_H
+
+#include "util/fixed_array.hh"
+
+/* Pass 2:
+ * - Multiply backoff weights by the backed off probabilities from pass 1.
+ * - Compute the normalization factor Z.
+ * - Send Z to the next highest order.
+ * - Rewind and divide by Z.
+ */
+
+namespace util { namespace stream {
+class ChainPositions;
+class Chains;
+}} // namespaces
+
+namespace lm { namespace interpolate {
+
+struct InterpolateInfo;
+
+void Normalize(
+ const InterpolateInfo &info,
+ // Input full models for backoffs. Assumes that renumbering has been done. Suffix order.
+ util::FixedArray<util::stream::ChainPositions> &models_by_order,
+ // Input PartialProbGamma from MergeProbabilities. Context order.
+ util::stream::Chains &merged_probabilities,
+ // Output NGram<float> with normalized probabilities. Context order.
+ util::stream::Chains &probabilities_out,
+ // Output bare floats with backoffs. Note backoffs.size() == order - 1. Suffix order.
+ util::stream::Chains &backoffs_out);
+
+}} // namespaces
+
+#endif // LM_INTERPOLATE_NORMALIZE_H
diff --git a/src/kenlm/lm/interpolate/normalize_test.cc b/src/kenlm/lm/interpolate/normalize_test.cc
new file mode 100644
index 0000000..fe220f3
--- /dev/null
+++ b/src/kenlm/lm/interpolate/normalize_test.cc
@@ -0,0 +1,86 @@
+#include "lm/interpolate/normalize.hh"
+
+#include "lm/interpolate/interpolate_info.hh"
+#include "lm/interpolate/merge_probabilities.hh"
+#include "lm/common/ngram_stream.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/multi_stream.hh"
+
+#define BOOST_TEST_MODULE NormalizeTest
+#include <boost/test/unit_test.hpp>
+
+namespace lm { namespace interpolate { namespace {
+
+// log without backoff
+const float kInputs[] = {-0.3, 1.2, -9.8, 4.0, -7.0, 0.0};
+
+class WriteInput {
+ public:
+ WriteInput() {}
+ void Run(const util::stream::ChainPosition &to) {
+ util::stream::Stream out(to);
+ for (WordIndex i = 0; i < sizeof(kInputs) / sizeof(float); ++i, ++out) {
+ memcpy(out.Get(), &i, sizeof(WordIndex));
+ memcpy((uint8_t*)out.Get() + sizeof(WordIndex), &kInputs[i], sizeof(float));
+ }
+ out.Poison();
+ }
+};
+
+void CheckOutput(const util::stream::ChainPosition &from) {
+ NGramStream<float> in(from);
+ float sum = 0.0;
+ for (WordIndex i = 0; i < sizeof(kInputs) / sizeof(float) - 1 /* <s> at the end */; ++i) {
+ sum += pow(10.0, kInputs[i]);
+ }
+ sum = log10(sum);
+ BOOST_REQUIRE(in);
+ BOOST_CHECK_CLOSE(kInputs[0] - sum, in->Value(), 0.0001);
+ BOOST_REQUIRE(++in);
+ BOOST_CHECK_CLOSE(kInputs[1] - sum, in->Value(), 0.0001);
+ BOOST_REQUIRE(++in);
+ BOOST_CHECK_CLOSE(kInputs[2] - sum, in->Value(), 0.0001);
+ BOOST_REQUIRE(++in);
+ BOOST_CHECK_CLOSE(kInputs[3] - sum, in->Value(), 0.0001);
+ BOOST_REQUIRE(++in);
+ BOOST_CHECK_CLOSE(kInputs[4] - sum, in->Value(), 0.0001);
+ BOOST_REQUIRE(++in);
+ BOOST_CHECK_CLOSE(kInputs[5] - sum, in->Value(), 0.0001);
+ BOOST_CHECK(!++in);
+}
+
+BOOST_AUTO_TEST_CASE(Unigrams) {
+ InterpolateInfo info;
+ info.lambdas.push_back(2.0);
+ info.lambdas.push_back(-0.1);
+ info.orders.push_back(1);
+ info.orders.push_back(1);
+
+ BOOST_CHECK_EQUAL(0, MakeEncoder(info, 1).EncodedLength());
+
+ // No backoffs.
+ util::stream::Chains blank(0);
+ util::FixedArray<util::stream::ChainPositions> models_by_order(2);
+ models_by_order.push_back(blank);
+ models_by_order.push_back(blank);
+
+ util::stream::Chains merged_probabilities(1);
+ util::stream::Chains probabilities_out(1);
+ util::stream::Chains backoffs_out(0);
+
+ merged_probabilities.push_back(util::stream::ChainConfig(sizeof(WordIndex) + sizeof(float) + sizeof(float), 2, 24));
+ probabilities_out.push_back(util::stream::ChainConfig(sizeof(WordIndex) + sizeof(float), 2, 100));
+
+ merged_probabilities[0] >> WriteInput();
+ Normalize(info, models_by_order, merged_probabilities, probabilities_out, backoffs_out);
+
+ util::stream::ChainPosition checker(probabilities_out[0].Add());
+
+ merged_probabilities >> util::stream::kRecycle;
+ probabilities_out >> util::stream::kRecycle;
+
+ CheckOutput(checker);
+ probabilities_out.Wait();
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/interpolate/perf_enum_gv_main.cc b/src/kenlm/lm/interpolate/perf_enum_gv_main.cc
new file mode 100644
index 0000000..a68e9e9
--- /dev/null
+++ b/src/kenlm/lm/interpolate/perf_enum_gv_main.cc
@@ -0,0 +1,215 @@
+/*
+Usage example
+1) Download from http://www.gwinnup.org/lminterp/train-params-output.tar.bz2
+2) then run perf_enum_gv -t lm.en.dev -m model-a.3.srilm -m model-b.3.srilm -m model-c.3.srilm
+ */
+
+#include "lm/ngram_query.hh"
+#include "lm/model.hh"
+#include "lm/word_index.hh"
+#include "lm/interpolate/enumerate_global_vocab.hh"
+
+#include "util/fixed_array.hh"
+#include "util/usage.hh"
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
+
+#include <boost/program_options.hpp>
+#include <boost/version.hpp>
+#include <boost/foreach.hpp>
+
+#include <Eigen/Eigen>
+
+#include <iostream>
+#include <sys/time.h>
+
+inline double deltaTV(const timeval& s, const timeval& e)
+{
+ return (e.tv_sec - s.tv_sec)*1000.0 + (e.tv_usec - s.tv_usec)/1000.0;
+}
+
+typedef struct timeval Wall;
+Wall GetWall() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv;
+}
+
+typedef Eigen::MatrixXf FMatrix;
+typedef Eigen::VectorXf FVector;
+
+
+bool HAS_BIAS = true;
+
+using namespace lm::ngram;
+using namespace lm;
+
+inline void logProb(Model * model, const std::vector<std::string>& ctx, const std::string& word) {
+
+ // Horribly inefficient
+ const Vocabulary &vocab = model->GetVocabulary();
+
+ State nextState; //throwaway
+
+ WordIndex word_idx = vocab.Index(word);
+ WordIndex context_idx[ctx.size()];
+
+ //reverse context
+ for(unsigned int i = 0; i < ctx.size(); i++) {
+ context_idx[ctx.size() - 1 - i] = vocab.Index(ctx[i]);
+ }
+ FullScoreReturn score = model->FullScoreForgotState(context_idx, &(context_idx[ctx.size() -1]), word_idx, nextState);
+}
+
+void set_features(const std::vector<std::string>& ctx,
+ const std::string& word,
+ const std::vector<Model *>& models,
+ FVector& v) {
+
+ for (unsigned i=0; i < models.size(); ++i)
+ logProb(models[i], ctx, word);
+
+}
+
+//const util::FixedArray<Model *>& models)
+void train_params(
+ const std::vector<std::vector<std::string> >& corpus,
+ const std::vector<std::string>& vocab,
+ const std::vector<Model *>& models) {
+ using namespace std;
+
+ vector<string> context(5, "<s>");
+ const int ITERATIONS = 10;
+ const int nlambdas = models.size(); // #models
+ FVector params = FVector::Zero(nlambdas);
+ vector<FVector> feats(vocab.size(), params);
+ static Wall start,stop;
+
+ for (int iter = 0; iter < ITERATIONS; ++iter) { // iterations
+ std::cout << "iteration: " << iter
+ << " corpus size " << corpus.size()
+ << std::endl;
+ for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
+ const vector<string>& sentence = corpus[ci];
+ context.resize(5);
+ for (unsigned t = 0; t < sentence.size(); ++t) { // words in sentence
+ std::cout << "sentence " << ci << " word " << t << std::endl;
+ start = GetWall();
+ const string& ref_word_string = sentence[t];
+ for (unsigned i = 0; i < vocab.size(); ++i) { // vocab
+ set_features(context, vocab[i], models, feats[i]);
+ }
+ stop = GetWall();
+ std::cout << " time elapsed = " << deltaTV(start,stop) << std::endl;
+ context.push_back(ref_word_string);
+ }
+ }
+ }
+}
+
+int main(int argc, char** argv) {
+
+ std::string tuning_data;
+ std::vector<std::string> lms;
+
+ try {
+ namespace po = boost::program_options;
+ po::options_description options("train-params");
+
+ options.add_options()
+ ("help,h", po::bool_switch(), "Show this help message")
+ ("no_bias_term,B", po::bool_switch(), "Do not include a 'bias' feature")
+ ("tuning_data,t", po::value<std::string>(&tuning_data), "File to tune perplexity on")
+ ("model,m", po::value<std::vector<std::string> >(&lms), "Language models in KenLM format to interpolate");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+
+ // Display help
+ if(argc == 1 || vm["help"].as<bool>()) {
+ std::cerr << options << std::endl;
+ return 1;
+ }
+ if (vm["no_bias_term"].as<bool>())
+ HAS_BIAS = false;
+ lms = vm["model"].as<std::vector<std::string> >();
+ tuning_data = vm["tuning_data"].as<std::string>();
+ }
+ catch(const std::exception &e) {
+
+ std::cerr << e.what() << std::endl;
+ return 1;
+
+ }
+ if (lms.size() < 2) {
+ std::cerr << "Please specify at least two language model files with -m LM.KLM\n";
+ return 1;
+ }
+ if (tuning_data.empty()) {
+ std::cerr << "Please specify tuning set with -t FILE.TXT\n";
+ return 1;
+ }
+
+ std::map<std::string, int*> vmap;
+ util::FixedArray<WordIndex> vm(2);
+
+ //stuff it into the
+ EnumerateGlobalVocab * globalVocabBuilder = new EnumerateGlobalVocab(&vmap, lms.size());
+ // EnumerateGlobalVocab * globalVocabBuilder = new EnumerateGlobalVocab(vm);
+
+ Config cfg;
+ cfg.enumerate_vocab = (EnumerateVocab *) globalVocabBuilder;
+
+ //load models
+ //util::FixedArray<Model *> models(lms.size());
+ std::vector<Model *> models;
+ for(int i=0; i < lms.size(); i++) {
+ std::cerr << "Loading LM file: " << lms[i] << std::endl;
+
+ //haaaack
+ globalVocabBuilder->SetCurModel(i); //yes this is dumb
+
+ //models[i] = new Model(lms[i].c_str());
+ Model * this_model = new Model(lms[i].c_str(), cfg);
+ models.push_back( this_model );
+
+ }
+
+ //assemble vocabulary vector
+ std::vector<std::string> vocab;
+ std::cerr << "Global Vocab Map has size: " << vmap.size() << std::endl;
+
+ std::pair<StringPiece,int *> me;
+
+ for(std::map<std::string, int*>::iterator iter = vmap.begin(); iter != vmap.end(); ++iter) {
+ vocab.push_back(iter->first);
+ }
+ std::cerr << "Vocab vector has size: " << vocab.size() << std::endl;
+
+ //load context sorted ngrams into vector of vectors
+ std::vector<std::vector<std::string> > corpus;
+
+ std::cerr << "Loading context-sorted ngrams: " << tuning_data << std::endl;
+ std::ifstream infile(tuning_data);
+
+ for(std::string line; std::getline(infile, line); ) {
+
+ std::vector<std::string> words; {
+
+ std::stringstream stream(line);
+ std::string word;
+
+ while(stream >> word) {
+ words.push_back(word);
+ }
+ }
+ corpus.push_back(words);
+ }
+
+ train_params(corpus, vocab, models);
+
+ return 0;
+}
diff --git a/src/kenlm/lm/interpolate/pipeline.cc b/src/kenlm/lm/interpolate/pipeline.cc
new file mode 100644
index 0000000..47b8288
--- /dev/null
+++ b/src/kenlm/lm/interpolate/pipeline.cc
@@ -0,0 +1,159 @@
+#include "lm/interpolate/pipeline.hh"
+
+#include "lm/common/compare.hh"
+#include "lm/common/print.hh"
+#include "lm/common/renumber.hh"
+#include "lm/vocab.hh"
+#include "lm/interpolate/backoff_reunification.hh"
+#include "lm/interpolate/interpolate_info.hh"
+#include "lm/interpolate/merge_probabilities.hh"
+#include "lm/interpolate/merge_vocab.hh"
+#include "lm/interpolate/normalize.hh"
+#include "lm/interpolate/universal_vocab.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/count_records.hh"
+#include "util/stream/io.hh"
+#include "util/stream/multi_stream.hh"
+#include "util/stream/sort.hh"
+#include "util/fixed_array.hh"
+
+namespace lm { namespace interpolate { namespace {
+
+/* Put the original input files on chains and renumber them */
+void SetupInputs(std::size_t buffer_size, const UniversalVocab &vocab, util::FixedArray<ModelBuffer> &models, bool exclude_highest, util::FixedArray<util::stream::Chains> &chains, util::FixedArray<util::stream::ChainPositions> &positions) {
+ chains.clear();
+ positions.clear();
+ // TODO: much better memory sizing heuristics e.g. not making the chain larger than it will use.
+ util::stream::ChainConfig config(0, 2, buffer_size);
+ for (std::size_t i = 0; i < models.size(); ++i) {
+ chains.push_back(models[i].Order() - exclude_highest);
+ for (std::size_t j = 0; j < models[i].Order() - exclude_highest; ++j) {
+ config.entry_size = sizeof(WordIndex) * (j + 1) + sizeof(float) * 2; // TODO do not include wasteful backoff for highest.
+ chains.back().push_back(config);
+ }
+ models[i].Source(chains.back());
+ for (std::size_t j = 0; j < models[i].Order() - exclude_highest; ++j) {
+ chains[i][j] >> Renumber(vocab.Mapping(i), j + 1);
+ }
+ }
+ for (std::size_t i = 0; i < chains.size(); ++i) {
+ positions.push_back(chains[i]);
+ }
+}
+
+template <class SortOrder> void ApplySort(const util::stream::SortConfig &config, util::stream::Chains &chains) {
+ util::stream::Sorts<SortOrder> sorts(chains.size());
+ for (std::size_t i = 0; i < chains.size(); ++i) {
+ sorts.push_back(chains[i], config, SortOrder(i + 1));
+ }
+ chains.Wait(true);
+ // TODO memory management
+ for (std::size_t i = 0; i < sorts.size(); ++i) {
+ sorts[i].Merge(sorts[i].DefaultLazy());
+ }
+ for (std::size_t i = 0; i < sorts.size(); ++i) {
+ sorts[i].Output(chains[i], sorts[i].DefaultLazy());
+ }
+};
+
+} // namespace
+
+void Pipeline(util::FixedArray<ModelBuffer> &models, const Config &config, int write_file) {
+ // Setup InterpolateInfo and UniversalVocab.
+ InterpolateInfo info;
+ info.lambdas = config.lambdas;
+ std::vector<WordIndex> vocab_sizes;
+
+ util::scoped_fd vocab_null(util::MakeTemp(config.sort.temp_prefix));
+ std::size_t max_order = 0;
+ util::FixedArray<util::scoped_fd> vocab_files(models.size());
+ for (ModelBuffer *i = models.begin(); i != models.end(); ++i) {
+ info.orders.push_back(i->Order());
+ vocab_sizes.push_back(i->Counts()[0]);
+ vocab_files.push_back(util::DupOrThrow(i->VocabFile()));
+ max_order = std::max(max_order, i->Order());
+ }
+ UniversalVocab vocab(vocab_sizes);
+ {
+ ngram::ImmediateWriteWordsWrapper writer(NULL, vocab_null.get(), 0);
+ MergeVocab(vocab_files, vocab, writer);
+ }
+ vocab_files.clear();
+
+ std::cerr << "Merging probabilities." << std::endl;
+ // Pass 1: merge probabilities
+ util::FixedArray<util::stream::Chains> input_chains(models.size());
+ util::FixedArray<util::stream::ChainPositions> models_by_order(models.size());
+ SetupInputs(config.BufferSize(), vocab, models, false, input_chains, models_by_order);
+
+ util::stream::Chains merged_probs(max_order);
+ for (std::size_t i = 0; i < max_order; ++i) {
+ merged_probs.push_back(util::stream::ChainConfig(PartialProbGamma::TotalSize(info, i + 1), 2, config.BufferSize())); // TODO: not buffer_size
+ }
+ MergeProbabilities(info, models_by_order, merged_probs);
+ std::vector<uint64_t> counts(max_order);
+ for (std::size_t i = 0; i < max_order; ++i) {
+ merged_probs[i] >> util::stream::CountRecords(&counts[i]);
+ }
+
+ // Pass 2: normalize.
+ ApplySort<ContextOrder>(config.sort, merged_probs);
+ std::cerr << "Normalizing" << std::endl;
+ SetupInputs(config.BufferSize(), vocab, models, true, input_chains, models_by_order);
+ util::stream::Chains probabilities(max_order), backoffs(max_order - 1);
+ std::size_t block_count = 2;
+ for (std::size_t i = 0; i < max_order; ++i) {
+ // Careful accounting to ensure RewindableStream can fit the entire vocabulary.
+ block_count = std::max<std::size_t>(block_count, 2);
+ // This much needs to fit in RewindableStream.
+ std::size_t fit = NGram<float>::TotalSize(i + 1) * counts[0];
+ // fit / (block_count - 1) rounded up
+ std::size_t min_block = (fit + block_count - 2) / (block_count - 1);
+ std::size_t specify = std::max(config.BufferSize(), min_block * block_count);
+ probabilities.push_back(util::stream::ChainConfig(NGram<float>::TotalSize(i + 1), block_count, specify));
+ }
+ for (std::size_t i = 0; i < max_order - 1; ++i) {
+ backoffs.push_back(util::stream::ChainConfig(sizeof(float), 2, config.BufferSize()));
+ }
+ Normalize(info, models_by_order, merged_probs, probabilities, backoffs);
+
+ util::FixedArray<util::stream::FileBuffer> backoff_buffers(backoffs.size());
+ for (std::size_t i = 0; i < max_order - 1; ++i) {
+ backoff_buffers.push_back(util::MakeTemp(config.sort.temp_prefix));
+ backoffs[i] >> backoff_buffers.back().Sink();
+ }
+
+ // Pass 3: backoffs in the right place.
+ ApplySort<SuffixOrder>(config.sort, probabilities);
+ // TODO destroy universal vocab to save RAM.
+ // TODO these should be freed before merge sort happens in the above function.
+ backoffs.Wait(true);
+ merged_probs.Wait(true);
+ std::cerr << "Reunifying backoffs" << std::endl;
+
+ util::stream::ChainPositions prob_pos(max_order - 1);
+ util::stream::Chains combined(max_order - 1);
+ for (std::size_t i = 0; i < max_order - 1; ++i) {
+ backoffs[i] >> backoff_buffers[i].Source(true);
+ prob_pos.push_back(probabilities[i].Add());
+ combined.push_back(util::stream::ChainConfig(NGram<ProbBackoff>::TotalSize(i + 1), 2, config.BufferSize()));
+ }
+ util::stream::ChainPositions backoff_pos(backoffs);
+
+ ReunifyBackoff(prob_pos, backoff_pos, combined);
+
+ util::stream::ChainPositions output_pos(max_order);
+ for (std::size_t i = 0; i < max_order - 1; ++i) {
+ output_pos.push_back(combined[i].Add());
+ }
+ output_pos.push_back(probabilities.back().Add());
+
+ probabilities >> util::stream::kRecycle;
+ backoffs >> util::stream::kRecycle;
+ combined >> util::stream::kRecycle;
+
+ // TODO genericize to ModelBuffer etc.
+ PrintARPA(vocab_null.get(), write_file, counts).Run(output_pos);
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/interpolate/pipeline.hh b/src/kenlm/lm/interpolate/pipeline.hh
new file mode 100644
index 0000000..b200248
--- /dev/null
+++ b/src/kenlm/lm/interpolate/pipeline.hh
@@ -0,0 +1,22 @@
+#ifndef LM_INTERPOLATE_PIPELINE_H
+#define LM_INTERPOLATE_PIPELINE_H
+
+#include "lm/common/model_buffer.hh"
+#include "util/fixed_array.hh"
+#include "util/stream/config.hh"
+
+#include <cstddef>
+#include <string>
+
+namespace lm { namespace interpolate {
+
+struct Config {
+ std::vector<float> lambdas;
+ util::stream::SortConfig sort;
+ std::size_t BufferSize() const { return sort.buffer_size; }
+};
+
+void Pipeline(util::FixedArray<ModelBuffer> &models, const Config &config, int write_file);
+
+}} // namespaces
+#endif // LM_INTERPOLATE_PIPELINE_H
diff --git a/src/kenlm/lm/interpolate/split_worker.cc b/src/kenlm/lm/interpolate/split_worker.cc
new file mode 100644
index 0000000..e777bf0
--- /dev/null
+++ b/src/kenlm/lm/interpolate/split_worker.cc
@@ -0,0 +1,40 @@
+#include "lm/interpolate/split_worker.hh"
+#include "lm/common/ngram.hh"
+
+namespace lm {
+namespace interpolate {
+
+SplitWorker::SplitWorker(std::size_t order, util::stream::Chain &backoff_chain,
+ util::stream::Chain &sort_chain)
+ : order_(order) {
+ backoff_chain >> backoff_input_;
+ sort_chain >> sort_input_;
+}
+
+void SplitWorker::Run(const util::stream::ChainPosition &position) {
+ // input: ngram record (id, prob, and backoff)
+ // output: a float to the backoff_input stream
+ // an ngram id and a float to the sort_input stream
+ for (util::stream::Stream stream(position); stream; ++stream) {
+ NGram<ProbBackoff> ngram(stream.Get(), order_);
+
+ // write id and prob to the sort stream
+ float prob = ngram.Value().prob;
+ lm::WordIndex *out = reinterpret_cast<lm::WordIndex *>(sort_input_.Get());
+ for (const lm::WordIndex *it = ngram.begin(); it != ngram.end(); ++it) {
+ *out++ = *it;
+ }
+ *reinterpret_cast<float *>(out) = prob;
+ ++sort_input_;
+
+ // write backoff to the backoff output stream
+ float boff = ngram.Value().backoff;
+ *reinterpret_cast<float *>(backoff_input_.Get()) = boff;
+ ++backoff_input_;
+ }
+ sort_input_.Poison();
+ backoff_input_.Poison();
+}
+
+}
+}
diff --git a/src/kenlm/lm/interpolate/split_worker.hh b/src/kenlm/lm/interpolate/split_worker.hh
new file mode 100644
index 0000000..15fae68
--- /dev/null
+++ b/src/kenlm/lm/interpolate/split_worker.hh
@@ -0,0 +1,44 @@
+#ifndef KENLM_INTERPOLATE_SPLIT_WORKER_H_
+#define KENLM_INTERPOLATE_SPLIT_WORKER_H_
+
+#include "util/stream/chain.hh"
+#include "util/stream/stream.hh"
+
+namespace lm {
+namespace interpolate {
+
+class SplitWorker {
+ public:
+ /**
+ * Constructs a split worker for a particular order. It writes the
+ * split-off backoff values to the backoff chain and the ngram id and
+ * probability to the sort chain for each ngram in the input.
+ */
+ SplitWorker(std::size_t order, util::stream::Chain &backoff_chain,
+ util::stream::Chain &sort_chain);
+
+ /**
+ * The callback invoked to handle the input from the ngram intermediate
+ * files.
+ */
+ void Run(const util::stream::ChainPosition& position);
+
+ private:
+ /**
+ * The ngram order we are reading/writing for.
+ */
+ std::size_t order_;
+
+ /**
+ * The stream to write to for the backoff values.
+ */
+ util::stream::Stream backoff_input_;
+
+ /**
+ * The stream to write to for the ngram id + probability values.
+ */
+ util::stream::Stream sort_input_;
+};
+}
+}
+#endif
diff --git a/src/kenlm/lm/interpolate/streaming_example_main.cc b/src/kenlm/lm/interpolate/streaming_example_main.cc
new file mode 100644
index 0000000..1f543cb
--- /dev/null
+++ b/src/kenlm/lm/interpolate/streaming_example_main.cc
@@ -0,0 +1,195 @@
+#include "lm/common/compare.hh"
+#include "lm/common/model_buffer.hh"
+#include "lm/common/ngram.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/multi_stream.hh"
+#include "util/stream/sort.hh"
+#include "lm/interpolate/split_worker.hh"
+
+#include <boost/program_options.hpp>
+#include <boost/version.hpp>
+
+#if defined(_WIN32) || defined(_WIN64)
+
+// Windows doesn't define <unistd.h>
+//
+// So we define what we need here instead:
+//
+#define STDIN_FILENO = 0
+#define STDOUT_FILENO = 1
+#else // Huzzah for POSIX!
+#include <unistd.h>
+#endif
+
+/*
+ * This is a simple example program that takes in intermediate
+ * suffix-sorted ngram files and outputs two sets of files: one for backoff
+ * probability values (raw numbers, in suffix order) and one for
+ * probability values (ngram id and probability, in *context* order)
+ */
+int main(int argc, char *argv[]) {
+ using namespace lm::interpolate;
+
+ const std::size_t ONE_GB = 1 << 30;
+ const std::size_t SIXTY_FOUR_MB = 1 << 26;
+ const std::size_t NUMBER_OF_BLOCKS = 2;
+
+ std::string FILE_NAME = "ngrams";
+ std::string CONTEXT_SORTED_FILENAME = "csorted-ngrams";
+ std::string BACKOFF_FILENAME = "backoffs";
+ std::string TMP_DIR = "/tmp/";
+
+ try {
+ namespace po = boost::program_options;
+ po::options_description options("canhazinterp Pass-3 options");
+
+ options.add_options()
+ ("help,h", po::bool_switch(), "Show this help message")
+ ("ngrams,n", po::value<std::string>(&FILE_NAME), "ngrams file")
+ ("csortngrams,c", po::value<std::string>(&CONTEXT_SORTED_FILENAME), "context sorted ngrams file")
+ ("backoffs,b", po::value<std::string>(&BACKOFF_FILENAME), "backoffs file")
+ ("tmpdir,t", po::value<std::string>(&TMP_DIR), "tmp dir");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+
+ // Display help
+ if(vm["help"].as<bool>()) {
+ std::cerr << "Usage: " << options << std::endl;
+ return 1;
+ }
+ }
+ catch(const std::exception &e) {
+
+ std::cerr << e.what() << std::endl;
+ return 1;
+
+ }
+
+ // The basic strategy here is to have three chains:
+ // - The first reads the ngram order inputs using ModelBuffer. Those are
+ // then stripped of their backoff values and fed into the third chain;
+ // the backoff values *themselves* are written to the second chain.
+ //
+ // - The second chain takes the backoff values and writes them out to a
+ // file (one for each order).
+ //
+ // - The third chain takes just the probability values and ngrams and
+ // writes them out, sorted in context-order, to a file (one for each
+ // order).
+
+ // This will be used to read in the binary intermediate files. There is
+ // one file per order (e.g. ngrams.1, ngrams.2, ...)
+ lm::ModelBuffer buffer(FILE_NAME);
+
+ // Create a separate chains for each ngram order for:
+ // - Input from the intermediate files
+ // - Output to the backoff file
+ // - Output to the (context-sorted) probability file
+ util::stream::Chains ngram_inputs(buffer.Order());
+ util::stream::Chains backoff_chains(buffer.Order());
+ util::stream::Chains prob_chains(buffer.Order());
+ for (std::size_t i = 0; i < buffer.Order(); ++i) {
+ ngram_inputs.push_back(util::stream::ChainConfig(
+ lm::NGram<lm::ProbBackoff>::TotalSize(i + 1), NUMBER_OF_BLOCKS, ONE_GB));
+
+ backoff_chains.push_back(
+ util::stream::ChainConfig(sizeof(float), NUMBER_OF_BLOCKS, ONE_GB));
+
+ prob_chains.push_back(util::stream::ChainConfig(
+ sizeof(lm::WordIndex) * (i + 1) + sizeof(float), NUMBER_OF_BLOCKS,
+ ONE_GB));
+ }
+
+ // This sets the input for each of the ngram order chains to the
+ // appropriate file
+ buffer.Source(ngram_inputs);
+
+ util::FixedArray<util::scoped_ptr<SplitWorker> > workers(buffer.Order());
+ for (std::size_t i = 0; i < buffer.Order(); ++i) {
+ // Attach a SplitWorker to each of the ngram input chains, writing to the
+ // corresponding order's backoff and probability chains
+ workers.push_back(
+ new SplitWorker(i + 1, backoff_chains[i], prob_chains[i]));
+ ngram_inputs[i] >> boost::ref(*workers.back());
+ }
+
+ util::stream::SortConfig sort_cfg;
+ sort_cfg.temp_prefix = TMP_DIR;
+ sort_cfg.buffer_size = SIXTY_FOUR_MB;
+ sort_cfg.total_memory = ONE_GB;
+
+ // This will parallel merge sort the individual order files, putting
+ // them in context-order instead of suffix-order.
+ //
+ // Two new threads will be running, each owned by the chains[i] object.
+ // - The first executes BlockSorter.Run() to sort the n-gram entries
+ // - The second executes WriteAndRecycle.Run() to write each sorted
+ // block to disk as a temporary file
+ util::stream::Sorts<lm::ContextOrder> sorts(buffer.Order());
+ for (std::size_t i = 0; i < prob_chains.size(); ++i) {
+ sorts.push_back(prob_chains[i], sort_cfg, lm::ContextOrder(i + 1));
+ }
+
+ // Set the sort output to be on the same chain
+ for (std::size_t i = 0; i < prob_chains.size(); ++i) {
+ // The following call to Chain::Wait()
+ // joins the threads owned by chains[i].
+ //
+ // As such the following call won't return
+ // until all threads owned by chains[i] have completed.
+ //
+ // The following call also resets chain[i]
+ // so that it can be reused
+ // (including free'ing the memory previously used by the chain)
+ prob_chains[i].Wait();
+
+ // In an ideal world (without memory restrictions)
+ // we could merge all of the previously sorted blocks
+ // by reading them all completely into memory
+ // and then running merge sort over them.
+ //
+ // In the real world, we have memory restrictions;
+ // depending on how many blocks we have,
+ // and how much memory we can use to read from each block
+ // (sort_config.buffer_size)
+ // it may be the case that we have insufficient memory
+ // to read sort_config.buffer_size of data from each block from disk.
+ //
+ // If this occurs, then it will be necessary to perform one or more rounds
+ // of merge sort on disk;
+ // doing so will reduce the number of blocks that we will eventually
+ // need to read from
+ // when performing the final round of merge sort in memory.
+ //
+ // So, the following call determines whether it is necessary
+ // to perform one or more rounds of merge sort on disk;
+ // if such on-disk merge sorting is required, such sorting is performed.
+ //
+ // Finally, the following method launches a thread that calls
+ // OwningMergingReader.Run()
+ // to perform the final round of merge sort in memory.
+ //
+ // Merge sort could have be invoked directly
+ // so that merge sort memory doesn't coexist with Chain memory.
+ sorts[i].Output(prob_chains[i]);
+ }
+
+ // Create another model buffer for our output on e.g. csorted-ngrams.1,
+ // csorted-ngrams.2, ...
+ lm::ModelBuffer output_buf(CONTEXT_SORTED_FILENAME, true, false);
+ output_buf.Sink(prob_chains, buffer.Counts());
+
+ // Create a third model buffer for our backoff output on e.g. backoff.1,
+ // backoff.2, ...
+ lm::ModelBuffer boff_buf(BACKOFF_FILENAME, true, false);
+ boff_buf.Sink(backoff_chains, buffer.Counts());
+
+ // Joins all threads that chains owns,
+ // and does a for loop over each chain object in chains,
+ // calling chain.Wait() on each such chain object
+ ngram_inputs.Wait(true);
+ backoff_chains.Wait(true);
+ prob_chains.Wait(true);
+
+ return 0;
+}
diff --git a/src/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm b/src/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm
new file mode 100644
index 0000000..f4bac3d
--- /dev/null
+++ b/src/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm
@@ -0,0 +1,23 @@
+
+\data\
+ngram 1=5
+ngram 2=8
+
+\1-grams:
+-0.5850267 </s>
+-99 <s> -7.004176
+-0.7447274 <unk>
+-0.4685211 a -7.272811
+-0.6575773 b -99
+
+\2-grams:
+-0.2839967 <s> a
+-0.3187588 <s> b
+-0.79588 a </s>
+-0.5058454 a a
+-0.2773661 a b
+-0.7447275 b </s>
+-0.1135093 b a
+-0.9586073 b b
+
+\end\
diff --git a/src/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm b/src/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm
new file mode 100644
index 0000000..9874ba9
--- /dev/null
+++ b/src/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm
@@ -0,0 +1,23 @@
+
+\data\
+ngram 1=5
+ngram 2=8
+
+\1-grams:
+-0.6190513 </s>
+-99 <s> -6.885247
+-99 <unk>
+-0.3394633 a 0
+-0.5200813 b 0
+
+\2-grams:
+-0.2807584 <s> a
+-0.3222944 <s> b
+-0.70763 a </s>
+-0.6780174 a a
+-0.2261675 a b
+-0.7991036 b </s>
+-0.1237299 b a
+-1.050158 b b
+
+\end\
diff --git a/src/kenlm/lm/interpolate/toy_data/toy1.lm b/src/kenlm/lm/interpolate/toy_data/toy1.lm
new file mode 100644
index 0000000..fba4263
--- /dev/null
+++ b/src/kenlm/lm/interpolate/toy_data/toy1.lm
@@ -0,0 +1,23 @@
+
+\data\
+ngram 1=5
+ngram 2=8
+
+\1-grams:
+-1.3010299957 </s>
+-99 <s> -7.446389
+-99 <unk>
+-0.6020599913 a 0
+-0.6020599913 b 0
+
+\2-grams:
+-0.15490196 <s> a
+-0.5228787453 <s> b
+-1 a </s>
+-1.5228787453 a a
+-0.0604807474 a b
+-0.5228787453 b </s>
+-0.3010299957 b a
+-0.6989700043 b b
+
+\end\
diff --git a/src/kenlm/lm/interpolate/toy_data/toy2.lm b/src/kenlm/lm/interpolate/toy_data/toy2.lm
new file mode 100644
index 0000000..67c2ba1
--- /dev/null
+++ b/src/kenlm/lm/interpolate/toy_data/toy2.lm
@@ -0,0 +1,23 @@
+
+\data\
+ngram 1=5
+ngram 2=8
+
+\1-grams:
+-0.3979400087 </s>
+-99 <s> -7.446389
+-99 <unk>
+-0.3979400087 a 0
+-0.6989700043 b 0
+
+\2-grams:
+-0.3979400087 <s> a
+-0.2218487496 <s> b
+-0.6989700043 a </s>
+-0.3010299957 a a
+-0.5228787453 a b
+-1 b </s>
+-0.0222763947 b a
+-1.3010299957 b b
+
+\end\
diff --git a/src/kenlm/lm/interpolate/train_params_grant_main.cc b/src/kenlm/lm/interpolate/train_params_grant_main.cc
new file mode 100644
index 0000000..a106cac
--- /dev/null
+++ b/src/kenlm/lm/interpolate/train_params_grant_main.cc
@@ -0,0 +1,561 @@
+#include "lm/ngram_query.hh"
+#include "lm/model.hh"
+#include "lm/word_index.hh"
+#include "lm/interpolate/enumerate_global_vocab.hh"
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <iomanip>
+
+#include <boost/program_options.hpp>
+#include <boost/version.hpp>
+#include <boost/foreach.hpp>
+
+#include "util/fixed_array.hh"
+
+#include <Eigen/Eigen>
+#include <Eigen/Dense>
+
+// typedef Eigen::MatrixXf FMatrix;
+// typedef Eigen::VectorXf FVector;
+typedef Eigen::MatrixXd DMatrix;
+typedef Eigen::VectorXd DVector;
+
+bool HAS_BIAS = true;
+
+using namespace lm::ngram;
+using namespace lm;
+
+inline double logProb(Model *model, const std::vector<std::string> &ctx,
+ WordIndex word_idx) {
+ // Horribly inefficient
+ const Vocabulary &vocab = model->GetVocabulary();
+
+ State nextState; // throwaway
+
+ WordIndex context_idx[ctx.size()];
+
+ // reverse context
+ for (std::size_t i = 0; i < ctx.size(); i++) {
+ context_idx[ctx.size() - 1 - i] = vocab.Index(ctx[i]);
+ }
+
+ FullScoreReturn score = model->FullScoreForgotState(
+ context_idx, &(context_idx[ctx.size() - 1]), word_idx, nextState);
+
+ double ret = score.prob;
+ // std::cerr << "w: " << word << " p: " << ret << std::endl;
+ return ret;
+}
+
+inline double logProb(Model *model, double unkprob,
+ const std::vector<std::string> &ctx,
+ const std::string &word) {
+ // Horribly inefficient
+ const Vocabulary &vocab = model->GetVocabulary();
+
+ WordIndex word_idx = vocab.Index(word);
+ if (word_idx == lm::kUNK) return unkprob;
+
+ return logProb(model, ctx, word_idx);
+}
+
+void set_features(const std::vector<std::string> &ctx, const std::string &word,
+ const std::vector<Model *> &models,
+ const std::vector<double> &unkprobs, DVector &v) {
+ if (HAS_BIAS) {
+ v(0) = 1;
+ for (std::size_t i = 0; i < models.size(); ++i)
+ v(i + 1) = logProb(models[i], unkprobs[i], ctx, word);
+ } else {
+ for (std::size_t i = 0; i < models.size(); ++i)
+ v(i) = logProb(models[i], unkprobs[i], ctx, word);
+ }
+}
+
+void train_params(const std::vector<std::vector<std::string> > &corpus,
+ const std::vector<std::string> &vocab,
+ const std::vector<Model *> &models) {
+ using namespace std;
+
+ // A safeguarded Newton's method to find optimum parameters.
+ // Reverts to steepest-descent linesearch if Newton step does not improve
+ // objective.
+ //
+ // Two Boolean variables below are used to "AllowExtrapolation" and
+ // "AllowNegativeParams".
+
+ bool AllowExtrapolation = true; // if true, params need not sum to one
+ bool AllowNegativeParams = true; // if true, params can be negative
+ const int ITERATIONS = 20;
+ double minstepsize = 1.0e-9; // convergence criterion
+ int context_size = 5; // (context_size+1)-grams considered in perplexity
+ double stepdecreasefactor = 0.1; // if step unsuccessful
+ double initstepsize = 1.0; // Initial step size
+ std::size_t linesinstartercorpus = 12; // The first few lines are tuned
+ // first, to find basin of attraction
+ // for Newton
+ // bias + #models
+ const std::size_t nlambdas = models.size() + (HAS_BIAS ? 1 : 0);
+
+ // initialize to sum to 1
+ DVector params = DVector::Constant(nlambdas, 1.0 / nlambdas);
+ DMatrix N = DMatrix::Constant(
+ nlambdas, nlambdas - 1,
+ -1.0 / sqrt((nlambdas - 1) * (nlambdas - 1) + nlambdas - 1.0));
+ for (unsigned i = 0; i < nlambdas - 1; ++i)
+ N(i, i) = N(i, i) * (1.0 - nlambdas);
+ // N is nullspace matrix, each column sums to zero
+
+ cerr << setprecision(16) << "++ Parameter training ++" << endl;
+ if (AllowExtrapolation)
+ cerr << " Allowing extrapolation (sharpening and flattening of individual "
+ "LM distributions)" << endl;
+ else
+ cerr << " Interpolating only (not sharpening or flattening individual LM "
+ "distributions)" << endl;
+ if (AllowNegativeParams)
+ cerr << " Allowing negative parameters\n"
+ << " (more general but slow and rarely useful\n"
+ << " -LM with negative weight has probability rankings reversed and "
+ "is weighted more highly than all LMs with positive weights)"
+ << endl;
+ else
+ cerr << "Not allowing negative parameters (mild assumption, and faster)"
+ << endl;
+ cerr << " Maximum number of iterations: " << ITERATIONS << endl;
+ cerr << " Minimum step size: " << minstepsize << endl;
+ cerr << " Perplexity computed with " << context_size + 1 << "-grams" << endl;
+
+ if ((!AllowExtrapolation) && (nlambdas == 1)) {
+ // Nothing to optimize. One parameter, and it sums to one.
+ cerr << "Training complete. Best weights:" << endl;
+ cerr << setprecision(16) << 1.0 << endl;
+ return;
+ }
+
+ // Smart initialization of full tuning by tuning on smaller set first
+ vector<std::size_t> linestotune;
+ if (linesinstartercorpus < corpus.size())
+ linestotune.push_back(linesinstartercorpus);
+ linestotune.push_back(corpus.size());
+
+ for (std::size_t setiter = 0; setiter < linestotune.size(); ++setiter) {
+ cerr << " Now tuning the first " << linestotune[setiter] << " lines"
+ << endl;
+
+ vector<DVector> paramhistory;
+ double bestppl = 0.0; // best recorded ppl
+ DVector bestgrad = DVector::Zero(nlambdas); // corresp. gradient,
+ // feasible direction
+ DVector bestparams = DVector::Zero(nlambdas); // corresp. weights
+ double maxbestgradstep = 0.0; // max feasible step in grad. direction
+ double stepsize = initstepsize; // Initial step size
+
+ for (int iter = 0; iter < ITERATIONS; ++iter) { // iterations
+ cerr << "ITERATION " << iter + 1 << " (of max " << ITERATIONS
+ << "), step size " << stepsize << " (of min " << minstepsize
+ << "), weights: " << endl;
+ cerr << params << endl;
+
+ paramhistory.push_back(params);
+ // Hard-coded to be 6-gram perplexity
+ vector<string> context(context_size, "<s>");
+ double ppl = 0.0;
+ DVector grad = DVector::Zero(nlambdas);
+ DMatrix H = DMatrix::Zero(nlambdas, nlambdas);
+ cerr << "o";
+
+ std::vector<double> unkprobs(models.size());
+ // for each sentence in tuning corpus
+ for (std::size_t ci = 0; ci < linestotune[setiter]; ++ci) {
+ const vector<string> &sentence = corpus[ci];
+ // pad our beginning context
+ std::fill(context.begin(), context.end(), "<s>");
+
+ // for each word in sentence
+ for (std::size_t t = 0; t < sentence.size(); ++t) {
+ // fill in unk probabilities for this context, to avoid having to
+ // look them up redundantly later
+ for (std::size_t mi = 0; mi < models.size(); ++mi) {
+ unkprobs[mi] = logProb(models[mi], context, lm::kUNK);
+ }
+
+ DVector feats = DVector::Zero(nlambdas);
+ // probs for actual n-gram
+ set_features(context, sentence[t], models, unkprobs, feats);
+
+ double z = 0.0;
+ double maxlogprob = 0.0; // Allows us to avoid overflow with
+ // negative params
+ DVector expectfeats = DVector::Zero(nlambdas);
+ DMatrix expectfeatmatrix = DMatrix::Zero(nlambdas, nlambdas);
+ // Logically, this should be in the loop's scope
+ DVector iterfeats = DVector::Zero(nlambdas);
+
+ // probs over possible n-grams, for normalization
+ for (std::size_t i = 0; i < vocab.size(); ++i) {
+ set_features(context, vocab[i], models, unkprobs, iterfeats);
+ double logprob = params.dot(iterfeats);
+ if (i == 0) {
+ // maxlogprob=logprob;// more precise, less underflow
+ maxlogprob = 0.0; // reduces number of updates
+ } else if (logprob > maxlogprob) {
+ // Adjust all old values to new scaling
+ double adjust = exp(maxlogprob - logprob);
+ z *= adjust;
+ expectfeats *= adjust;
+ expectfeatmatrix *= adjust;
+ maxlogprob = logprob;
+ }
+ double us = exp(params.dot(iterfeats) - maxlogprob); // measure
+
+ z += us;
+ expectfeats += us * iterfeats;
+ expectfeatmatrix += us * (iterfeats * iterfeats.transpose());
+ }
+ expectfeats /= z; // Expectation
+ expectfeatmatrix /= z; // Expectation
+
+ // Add sentence[t] to the end of the context
+ context[0] = sentence[t];
+ std::rotate(context.begin(), context.begin() + 1, context.end());
+
+ // Perplexity (actually log(perplexity))
+ ppl += params.dot(feats) - log(z);
+ // Gradient
+ grad += feats - expectfeats;
+ // Hessian
+ H += -expectfeatmatrix + expectfeats * expectfeats.transpose();
+ }
+ cerr << ".";
+ }
+ ppl *= -1.0 / corpus.size();
+ // The gradient and Hessian coefficients cancel out, so don't really need
+ // to do this, but it's fast.
+ grad *= -1.0 / corpus.size();
+ H *= -1.0 / corpus.size();
+ cerr << " log(PPL)=" << ppl << " PPL=" << exp(ppl) << endl;
+
+ // Use results to determine next params to evaluate
+ if ((ppl < bestppl) || (iter == 0)) {
+ // Found a new best
+ bestppl = ppl;
+ bestparams = params;
+ double beststepsize = stepsize;
+ if (iter > 0)
+ cerr << " New best point found, step size " << beststepsize << endl;
+ else
+ cerr << " New best point found" << endl;
+
+ bestgrad = grad;
+ DVector deltaparams = DVector::Zero(nlambdas);
+
+ bool reverttograd = false;
+
+ {
+ double gradnorm = 0.0;
+ double solvenorm = 0.0;
+ double errnorm = 0.0;
+ // Find Newton step
+ if (AllowExtrapolation) {
+ deltaparams = -H.colPivHouseholderQr().solve(grad);
+ Eigen::SelfAdjointEigenSolver<DMatrix> eigensolver(H);
+ cerr << "Eigenvalues (negative values should be negligible):\n"
+ << eigensolver.eigenvalues() << endl;
+ gradnorm = grad.norm();
+ solvenorm = (H * deltaparams).norm();
+ errnorm = (grad + H * deltaparams).norm();
+ } else {
+ // Project gradient to interpolation space
+ bestgrad = N * N.transpose() * bestgrad;
+
+ // need to work in nullspace to maintain unit sum
+ DMatrix Hnull = DMatrix::Zero(nlambdas - 1, nlambdas - 1);
+
+ // Looks like we don't need the three lines below -- we can do it
+ // in-line (if we don't want eigenvalues)
+ Hnull = N.transpose() * H * N;
+ Eigen::SelfAdjointEigenSolver<DMatrix> eigensolver(Hnull);
+ cerr << "Eigenvalues (best if all positive):\n"
+ << eigensolver.eigenvalues() << endl;
+ deltaparams =
+ -N * Hnull.fullPivHouseholderQr().solve(N.transpose() * grad);
+ gradnorm = (N.transpose() * grad).norm();
+ solvenorm = (Hnull * deltaparams).norm();
+ errnorm = (N.transpose() * grad + Hnull * deltaparams).norm();
+ }
+ // eventually, params = bestparams + deltaparams;
+ cerr << " Error norm " << errnorm << ", gradient norm " << gradnorm
+ << ", solution norm " << solvenorm << endl;
+ // Check for numerical errors. Don't trust Newton step if they are too
+ // big.
+ if (errnorm < 1e-12 * std::max(1.0, std::min(gradnorm, solvenorm))) {
+ stepsize = 0.0;
+ for (std::size_t i = 0; i < nlambdas; i++)
+ stepsize += deltaparams(i) * deltaparams(i);
+ stepsize = sqrt(stepsize); // holds length of Newton step
+ cerr << "Newton step, length " << stepsize << ": " << endl;
+ cerr << deltaparams << endl;
+
+ // Don't let the Newton step get much bigger than last successful
+ // step (likely would have to shrink later, anyway)
+ if (stepsize > 2.0 * beststepsize) {
+ stepsize = 1.5 * beststepsize;
+ reverttograd = true;
+ cerr << "Reverting to gradient, because Newton step is too large."
+ << endl;
+ }
+ } else {
+ stepsize = 1.5 * beststepsize;
+ reverttograd = true;
+ cerr << "Reverting to gradient, because Newton step computation "
+ "unsuccessful." << endl;
+ }
+ // Make the gradient unit norm, in feasible search direction.
+ if (!AllowNegativeParams) {
+ // Project gradient to be a feasible search direction
+ vector<bool> active(nlambdas, false);
+ std::size_t numactive = 0;
+ for (std::size_t i = 0; i < nlambdas; i++) {
+ // Project gradient to inactive constraints
+ if ((bestparams(i) == 0) && (bestgrad(i) > 0)) {
+ active[i] = true;
+ bestgrad(i) = 0.0; // Do this now, in case extrapolation
+ // allowed.
+ ++numactive;
+ }
+ }
+ if (numactive > 0) {
+ if (!AllowExtrapolation) {
+ // Project gradient, for activity concerns
+ DMatrix tmpN = DMatrix::Constant(
+ nlambdas, nlambdas - 1,
+ -1.0 / sqrt((nlambdas - numactive - 1) *
+ (nlambdas - numactive - 1) +
+ nlambdas - numactive - 1.0));
+
+ for (std::size_t i = 0; i < nlambdas - 1; ++i)
+ tmpN(i, i) = tmpN(i, i) * (1.0 - (nlambdas - numactive));
+
+ for (std::size_t i = 0; i < nlambdas; ++i) {
+ if (active[i]) {
+ for (std::size_t j = 0; j < nlambdas - 1; ++i) {
+ tmpN(i, j) = 0;
+ }
+ }
+ }
+
+ // projected gradient onto unit sum and active set constraints
+ bestgrad = -tmpN * tmpN.transpose() * bestgrad;
+ }
+ }
+ }
+ }
+ double norm = 0.0;
+ for (std::size_t i = 0; i < nlambdas; i++)
+ norm += bestgrad(i) * bestgrad(i);
+ if (norm != 0) {
+ bestgrad /= sqrt(norm);
+ } else {
+ cerr << " Gradient is zero. Exiting.";
+ break;
+ }
+ cerr << "Gradient, unit length: " << endl;
+ cerr << bestgrad << endl;
+
+ // Find max step in gradient direction that remains feasible.
+ if (!AllowNegativeParams) {
+ double limitfraction = 0.5; // Not 1: If Newton step is bad, probably
+ // will need to reduce later anyway
+ for (std::size_t i = 0; i < nlambdas; i++) {
+ if (bestparams(i) - maxbestgradstep * bestgrad(i) < 0) {
+ double tmplimitfraction =
+ bestparams(i) / (bestgrad(i) * maxbestgradstep);
+ if (tmplimitfraction < limitfraction)
+ limitfraction = tmplimitfraction;
+ }
+ }
+ maxbestgradstep = stepsize * limitfraction;
+ cerr << " Max grad step: " << maxbestgradstep << endl;
+ } else {
+ maxbestgradstep = stepsize;
+ }
+
+ if (!reverttograd) {
+ if (!AllowNegativeParams) {
+ for (std::size_t i = 0; i < nlambdas; i++) {
+ if (bestparams(i) + deltaparams(i) < 0) {
+ // Can't do Newton step. Revert to descent.
+ reverttograd = true;
+ }
+ }
+ }
+ if (reverttograd) {
+ cerr << "Reverting to gradient, since Newton step infeasible:"
+ << endl;
+ }
+ }
+
+ if (reverttograd) {
+ stepsize = maxbestgradstep;
+ deltaparams = -bestgrad * stepsize;
+ }
+
+ params = bestparams + deltaparams;
+ cerr << "Change in weights from best, step size " << stepsize << ": "
+ << endl;
+ cerr << deltaparams << endl;
+ } else {
+ // Last attempt failed at being better, so move in gradient direction
+ // with reduced step.
+ // stepsize reduction factor is empirical
+ stepsize = std::min(stepdecreasefactor * stepsize, maxbestgradstep);
+ cerr << "Taking smaller step: " << stepsize << endl;
+ params = bestparams - bestgrad * stepsize;
+ }
+ // Clean the parameters up.
+ double sumparams = 0.0;
+ for (std::size_t i = 0; i < nlambdas; i++) {
+ if (!AllowNegativeParams) {
+ if (params(i) < 1e-12) {
+ // snap to zero, for active set and duplicate weights
+ params(i) = 0;
+ }
+ }
+ sumparams += params(i);
+ }
+ if (!AllowExtrapolation) params /= sumparams;
+
+ bool duplicateentry = false;
+ for (std::size_t i = 0; i < paramhistory.size(); ++i) {
+ if (params == paramhistory[i]) duplicateentry = true;
+ }
+ while ((duplicateentry) && (stepsize >= minstepsize)) {
+ cerr << "Duplicate weight found: " << endl;
+ cerr << params << endl;
+ stepsize *= 0.5; // Step in this direction is duplicate, so try again
+ // with smaller step
+ params = bestparams - stepsize * bestgrad;
+
+ sumparams = 0.0;
+ for (std::size_t i = 0; i < nlambdas; i++) {
+ if (!AllowNegativeParams) {
+ if (params(i) < 1e-12) params(i) = 0;
+ }
+ sumparams += params(i);
+ }
+ if (!AllowExtrapolation) params /= sumparams;
+
+ duplicateentry = false;
+ for (std::size_t i = 0; i < paramhistory.size(); ++i) {
+ if (params == paramhistory[i]) duplicateentry = true;
+ }
+ }
+ if (stepsize < minstepsize) break; // No need to make another step
+ }
+
+ params = bestparams; // So that next setiter is correct
+ cerr << "Training complete. Best weights:" << endl;
+ cerr << params << endl;
+ }
+}
+
+int main(int argc, char **argv) {
+ std::string tuning_data;
+ std::vector<std::string> lms;
+
+ try {
+ namespace po = boost::program_options;
+ po::options_description options("train-params");
+
+ options.add_options()("help,h", po::bool_switch(),
+ "Show this help message")(
+ "no_bias_term,B", po::bool_switch(), "Do not include a 'bias' feature")(
+ "tuning_data,t", po::value<std::string>(&tuning_data),
+ "File to tune perplexity on")(
+ "model,m", po::value<std::vector<std::string> >(&lms),
+ "Language models in KenLM format to interpolate");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+
+ // Display help
+ if (argc == 1 || vm["help"].as<bool>()) {
+ std::cerr << options << std::endl;
+ return 1;
+ }
+ if (vm["no_bias_term"].as<bool>()) HAS_BIAS = false;
+ lms = vm["model"].as<std::vector<std::string> >();
+ tuning_data = vm["tuning_data"].as<std::string>();
+ } catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+ if (lms.size() < 2) {
+ std::cerr
+ << "Please specify at least two language model files with -m LM.KLM\n";
+ return 1;
+ }
+ if (tuning_data.empty()) {
+ std::cerr << "Please specify tuning set with -t FILE.TXT\n";
+ return 1;
+ }
+
+ // Growable vocab here
+ // GrowableVocab gvoc(100000); //dummy default
+
+ // no comment
+ std::map<std::string, int *> vmap;
+
+ // stuff it into the
+ EnumerateGlobalVocab *globalVocabBuilder =
+ new EnumerateGlobalVocab(&vmap, lms.size());
+
+ Config cfg;
+ cfg.enumerate_vocab = (EnumerateVocab *)globalVocabBuilder;
+
+ // load models
+ std::vector<Model *> models;
+ for (std::size_t i = 0; i < lms.size(); i++) {
+ std::cerr << "Loading LM file: " << lms[i] << std::endl;
+
+ // haaaack
+ globalVocabBuilder->SetCurModel(i); // yes this is dumb
+
+ Model *this_model = new Model(lms[i].c_str(), cfg);
+ models.push_back(this_model);
+ }
+
+ // assemble vocabulary vector
+ std::vector<std::string> vocab;
+ std::cerr << "Global Vocab Map has size: " << vmap.size() << std::endl;
+
+ for (std::map<std::string, int *>::iterator iter = vmap.begin();
+ iter != vmap.end(); ++iter) {
+ vocab.push_back(iter->first);
+ }
+ std::cerr << "Vocab vector has size: " << vocab.size() << std::endl;
+
+ // load context sorted ngrams into vector of vectors
+ std::vector<std::vector<std::string> > corpus;
+
+ std::cerr << "Loading context-sorted ngrams: " << tuning_data << std::endl;
+ std::ifstream infile(tuning_data.c_str());
+
+ for (std::string line; std::getline(infile, line);) {
+ std::vector<std::string> words;
+ std::stringstream stream(line);
+ std::string word;
+
+ while (stream >> word)
+ words.push_back(word);
+ corpus.push_back(words);
+ }
+
+ train_params(corpus, vocab, models);
+
+ return 0;
+}
diff --git a/src/kenlm/lm/interpolate/train_params_main.cc b/src/kenlm/lm/interpolate/train_params_main.cc
new file mode 100644
index 0000000..39233e8
--- /dev/null
+++ b/src/kenlm/lm/interpolate/train_params_main.cc
@@ -0,0 +1,349 @@
+#include "lm/ngram_query.hh"
+#include "lm/model.hh"
+#include "lm/word_index.hh"
+#include "lm/interpolate/enumerate_global_vocab.hh"
+
+
+#include <string>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
+
+#include <boost/program_options.hpp>
+#include <boost/version.hpp>
+#include <boost/foreach.hpp>
+
+#include "util/fixed_array.hh"
+
+#include <Eigen/Eigen>
+
+typedef Eigen::MatrixXf FMatrix;
+typedef Eigen::VectorXf FVector;
+
+bool HAS_BIAS = true;
+
+using namespace lm::ngram;
+using namespace lm;
+
+inline float logProb(Model * model, const std::vector<std::string>& ctx, const std::string& word) {
+
+ // Horribly inefficient
+ const Vocabulary &vocab = model->GetVocabulary();
+
+ State nextState; //throwaway
+
+ WordIndex word_idx = vocab.Index(word);
+ WordIndex context_idx[ctx.size()];
+
+ //reverse context
+ for(unsigned int i = 0; i < ctx.size(); i++) {
+ context_idx[ctx.size() - 1 - i] = vocab.Index(ctx[i]);
+ }
+
+ FullScoreReturn score = model->FullScoreForgotState(context_idx, &(context_idx[ctx.size() -1]), word_idx, nextState);
+
+ float ret = score.prob;
+ //std::cerr << "w: " << word << " p: " << ret << std::endl;
+ return ret;
+}
+
+void set_features(const std::vector<std::string>& ctx,
+ const std::string& word,
+ const std::vector<Model *>& models,
+ FVector& v) {
+
+ //std::cerr << "setting feats for " << word << std::endl;
+
+ if (HAS_BIAS) {
+ v(0) = 1;
+ for (unsigned i=0; i < models.size(); ++i)
+ v(i + 1) = logProb(models[i], ctx, word);
+ } else {
+ for (unsigned i=0; i < models.size(); ++i)
+ v(i) = logProb(models[i], ctx, word);
+ }
+}
+
+void translate_input(
+ const std::vector<std::vector<std::string> >& corpus,
+ const std::vector<std::string>& gvocab,
+ const std::vector<Model *>& models,
+ std::vector<std::vector<std::vector<WordIndex> > >&translated_corpus,
+ std::vector<std::vector<WordIndex> >&translated_vocab
+ ) {
+ translated_corpus.resize(models.size());
+ translated_vocab.resize(models.size());
+ for (unsigned mn=0; mn < models.size(); ++mn) { // models
+
+ const Vocabulary &vocab = models[mn]->GetVocabulary();
+
+ for (unsigned i = 0; i < gvocab.size(); ++i) {
+ translated_vocab[mn].push_back(vocab.Index(gvocab[i]));
+ }
+
+ translated_corpus[mn].resize(corpus.size());
+ for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
+ const std::vector<std::string>& sentence = corpus[ci];
+ for (int t = sentence.size() -1; t >= 0; --t) { // words in sentence
+ translated_corpus[mn][ci].push_back(vocab.Index(sentence[t]));
+ }
+ for (int i=0; i<5; ++i) {
+ translated_corpus[mn][ci].push_back(vocab.Index("<s>"));
+ }
+ }
+ }
+}
+
+
+void train_params_fast(
+ const std::vector<std::vector<std::string> >& corpus,
+ const std::vector<std::string>& vocab,
+ const std::vector<Model *>& models) {
+ using namespace std;
+
+ // model / sentence / words in sentence in reverse order with <s> padding
+ std::vector<std::vector<std::vector<WordIndex> > > t_corpus;
+ std::vector<std::vector<WordIndex> > t_vocab;
+ translate_input(corpus, vocab, models, t_corpus, t_vocab);
+
+
+
+ const int ITERATIONS = 10;
+ const int nlambdas = models.size() + (HAS_BIAS ? 1 : 0); // bias + #models
+ FVector params = FVector::Zero(nlambdas);
+ vector<FVector> feats(vocab.size(), params);
+ vector<float> us(vocab.size(), 0);
+ vector<float> ps(vocab.size(), 0);
+ FVector grad = FVector::Zero(nlambdas);
+ FMatrix H = FMatrix::Zero(nlambdas, nlambdas);
+ FVector ef = FVector::Zero(nlambdas);
+ for (int iter = 0; iter < ITERATIONS; ++iter) { // iterations
+ grad.setZero();
+ H.setZero();
+ double loss = 0;
+ unsigned numchars = 0;
+ for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
+ const vector<string>& sentence = corpus[ci];
+ double z = 0;
+ for (int t = sentence.size() -1 ; t >=0; --t) { // words in sentence
+ ++numchars;
+ int ref_word = 0;
+ for (unsigned i = 0; i < vocab.size(); ++i) { // vocab
+ // set_features(context, vocab[i], models, feats[i]);
+ for (unsigned j=0; j < models.size(); ++j) {
+ // NOTE: reference ---- WordIndex word_idx = t_corpus[j][ci][t];
+ WordIndex word_idx = t_vocab[j][i];
+ State nextState; //throwaway
+ FullScoreReturn score = models[j]->FullScoreForgotState(&(t_corpus[j][ci][t]), &(t_corpus[j][ci][t+5]), word_idx, nextState);
+ feats[i](j) = score.prob;
+ // feats[i](j) = logProb(models[j], ctx, word);
+ }
+
+ us[i] = params.dot(feats[i]);
+ z += exp(double(us[i]));
+ }
+ //std::cerr << "there..." << std::endl;
+ const float logz = log(z);
+
+ // expected feature values
+ ef.setZero();
+ for (unsigned i = 0; i < vocab.size(); ++i) {
+ ps[i] = expf(us[i] - logz);
+ ef += ps[i] * feats[i];
+ }
+ loss -= log(ps[ref_word]);
+ const FVector& reffeats = feats[ref_word];
+ grad += ef - reffeats;
+
+ // Hessian
+ for (unsigned i = 0; i < vocab.size(); ++i)
+ H.noalias() += ps[i] * feats[i] * feats[i].transpose() -
+ ps[i] * feats[i] * ef.transpose();
+
+ // this should just be the state for each model
+ }
+ cerr << ".";
+ }
+ cerr << "ITERATION " << (iter + 1) << ": PPL=" << exp(loss / numchars) << endl;
+ params = H.colPivHouseholderQr().solve(grad);
+ cerr << params << endl;
+ }
+}
+
+
+
+
+//const util::FixedArray<Model *>& models)
+void train_params(
+ const std::vector<std::vector<std::string> >& corpus,
+ const std::vector<std::string>& vocab,
+ const std::vector<Model *>& models) {
+ using namespace std;
+
+ vector<string> context(5, "<s>");
+ const int ITERATIONS = 10;
+ const int nlambdas = models.size() + (HAS_BIAS ? 1 : 0); // bias + #models
+ FVector params = FVector::Zero(nlambdas);
+ vector<FVector> feats(vocab.size(), params);
+ vector<float> us(vocab.size(), 0);
+ vector<float> ps(vocab.size(), 0);
+ FVector grad = FVector::Zero(nlambdas);
+ FMatrix H = FMatrix::Zero(nlambdas, nlambdas);
+ FVector ef = FVector::Zero(nlambdas);
+ for (int iter = 0; iter < ITERATIONS; ++iter) { // iterations
+ grad.setZero();
+ H.setZero();
+ double loss = 0;
+ unsigned numchars = 0;
+ for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
+ const vector<string>& sentence = corpus[ci];
+ std::fill(context.begin(), context.end(), "<s>");
+ for (unsigned t = 0; t < sentence.size(); ++t) { // words in sentence
+ ++numchars;
+ const string& ref_word_string = sentence[t];
+ int ref_word = 0; // TODO
+ double z = 0;
+ //std::cerr << "here..." << std::endl;
+ for (unsigned i = 0; i < vocab.size(); ++i) { // vocab
+ set_features(context, vocab[i], models, feats[i]);
+ us[i] = params.dot(feats[i]);
+ z += exp(double(us[i]));
+ }
+ //std::cerr << "there..." << std::endl;
+ context.push_back(ref_word_string);
+ const float logz = log(z);
+
+ // expected feature values
+ ef.setZero();
+ for (unsigned i = 0; i < vocab.size(); ++i) {
+ ps[i] = expf(us[i] - logz);
+ ef += ps[i] * feats[i];
+ }
+ loss -= log(ps[ref_word]);
+ const FVector& reffeats = feats[ref_word];
+ grad += ef - reffeats;
+
+ // Hessian
+ for (unsigned i = 0; i < vocab.size(); ++i)
+ H.noalias() += ps[i] * feats[i] * feats[i].transpose() -
+ ps[i] * feats[i] * ef.transpose();
+
+ // this should just be the state for each model
+ }
+ cerr << ".";
+ }
+ cerr << "ITERATION " << (iter + 1) << ": PPL=" << exp(loss / numchars) << endl;
+ params = H.colPivHouseholderQr().solve(grad);
+ cerr << params << endl;
+ }
+}
+
+int main(int argc, char** argv) {
+
+ std::string tuning_data;
+ std::vector<std::string> lms;
+
+ try {
+ namespace po = boost::program_options;
+ po::options_description options("train-params");
+
+ options.add_options()
+ ("help,h", po::bool_switch(), "Show this help message")
+ ("no_bias_term,B", po::bool_switch(), "Do not include a 'bias' feature")
+ ("tuning_data,t", po::value<std::string>(&tuning_data), "File to tune perplexity on")
+ ("model,m", po::value<std::vector<std::string> >(&lms), "Language models in KenLM format to interpolate");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+
+ // Display help
+ if(argc == 1 || vm["help"].as<bool>()) {
+ std::cerr << options << std::endl;
+ return 1;
+ }
+ if (vm["no_bias_term"].as<bool>())
+ HAS_BIAS = false;
+ lms = vm["model"].as<std::vector<std::string> >();
+ tuning_data = vm["tuning_data"].as<std::string>();
+ }
+ catch(const std::exception &e) {
+
+ std::cerr << e.what() << std::endl;
+ return 1;
+
+ }
+ if (lms.size() < 2) {
+ std::cerr << "Please specify at least two language model files with -m LM.KLM\n";
+ return 1;
+ }
+ if (tuning_data.empty()) {
+ std::cerr << "Please specify tuning set with -t FILE.TXT\n";
+ return 1;
+ }
+
+ //Growable vocab here
+ //GrowableVocab gvoc(100000); //dummy default
+
+ std::map<std::string, int*> vmap;
+ util::FixedArray<WordIndex> vm(2);
+
+ //stuff it into the
+ EnumerateGlobalVocab * globalVocabBuilder = new EnumerateGlobalVocab(&vmap, lms.size());
+ // EnumerateGlobalVocab * globalVocabBuilder = new EnumerateGlobalVocab(vm);
+
+ Config cfg;
+ cfg.enumerate_vocab = (EnumerateVocab *) globalVocabBuilder;
+
+ //load models
+ //util::FixedArray<Model *> models(lms.size());
+ std::vector<Model *> models;
+ for(int i=0; i < lms.size(); i++) {
+ std::cerr << "Loading LM file: " << lms[i] << std::endl;
+
+ //haaaack
+ globalVocabBuilder->SetCurModel(i); //yes this is dumb
+
+ //models[i] = new Model(lms[i].c_str());
+ Model * this_model = new Model(lms[i].c_str(), cfg);
+ models.push_back( this_model );
+
+ }
+
+ //assemble vocabulary vector
+ std::vector<std::string> vocab;
+ std::cerr << "Global Vocab Map has size: " << vmap.size() << std::endl;
+
+ std::pair<StringPiece,int *> me;
+
+ for(std::map<std::string, int*>::iterator iter = vmap.begin(); iter != vmap.end(); ++iter) {
+ vocab.push_back(iter->first);
+ }
+ std::cerr << "Vocab vector has size: " << vocab.size() << std::endl;
+
+ //load context sorted ngrams into vector of vectors
+ std::vector<std::vector<std::string> > corpus;
+
+ std::cerr << "Loading context-sorted ngrams: " << tuning_data << std::endl;
+ std::ifstream infile(tuning_data);
+
+ for(std::string line; std::getline(infile, line); ) {
+
+ std::vector<std::string> words; {
+
+ std::stringstream stream(line);
+ std::string word;
+
+ while(stream >> word) {
+ words.push_back(word);
+ }
+ }
+ corpus.push_back(words);
+ }
+
+ train_params_fast(corpus, vocab, models);
+
+ return 0;
+}
+
+
+
diff --git a/src/kenlm/lm/interpolate/tune_derivatives.cc b/src/kenlm/lm/interpolate/tune_derivatives.cc
new file mode 100644
index 0000000..c20e637
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_derivatives.cc
@@ -0,0 +1,91 @@
+#include "lm/interpolate/tune_derivatives.hh"
+
+namespace lm { namespace interpolate {
+
+ComputeDerivative::ComputeDerivative(const util::FixedArray<Instance> &instances, const Matrix &ln_unigrams, WordIndex bos)
+ : instances_(instances), ln_unigrams_(ln_unigrams), bos_(bos) {
+ neg_correct_summed_ = Vector::Zero(ln_unigrams.cols());
+ for (const Instance *i = instances.begin(); i != instances.end(); ++i) {
+ neg_correct_summed_ -= i->ln_correct;
+ }
+}
+
+Accum ComputeDerivative::Iteration(const Vector &weights, Vector &gradient, Matrix &hessian) {
+ gradient = neg_correct_summed_;
+ hessian = Matrix::Zero(weights.rows(), weights.rows());
+
+ // TODO: loop instead to force low-memory evaluation
+ // Compute p_I(x).
+ Vector interp_uni((ln_unigrams_ * weights).array().exp());
+ // Even -inf doesn't work for <s> because weights can be negative. Manually set it to zero.
+ interp_uni(bos_) = 0.0;
+ Accum Z_epsilon = interp_uni.sum();
+ interp_uni /= Z_epsilon;
+ // unigram_cross(i) = \sum_{all x} p_I(x) ln p_i(x)
+ Vector unigram_cross(ln_unigrams_.transpose() * interp_uni);
+
+ Accum sum_B_I = 0.0;
+ Accum sum_ln_Z_context = 0.0;
+
+ Vector weighted_extensions;
+ Matrix convolve;
+ Vector full_cross;
+
+ for (const Instance *n = instances_.begin(); n != instances_.end(); ++n) {
+ Accum ln_weighted_backoffs = n->ln_backoff.dot(weights);
+ Accum weighted_backoffs = exp(ln_weighted_backoffs);
+
+ // Compute \sum_{x: model does not backoff to unigram} p_I(x)
+ Accum sum_x_p_I = 0.0;
+ for (std::vector<WordIndex>::const_iterator x = n->extension_words.begin(); x != n->extension_words.end(); ++x) {
+ sum_x_p_I += interp_uni(*x);
+ }
+ weighted_extensions = (n->ln_extensions * weights).array().exp();
+ Accum Z_context = Z_epsilon * weighted_backoffs * (1.0 - sum_x_p_I) + weighted_extensions.sum();
+ sum_ln_Z_context += log(Z_context);
+
+ Accum B_I = Z_epsilon / Z_context * weighted_backoffs;
+ sum_B_I += B_I;
+
+ // This is the gradient term for this instance except for -log p_i(w_n | w_1^{n-1}) which was accounted for as part of neg_correct_sum_.
+ // full_cross(i) is \sum_{all x} p_I(x | context) log p_i(x | context)
+ full_cross =
+ // Uncorrected term
+ B_I * (n->ln_backoff + unigram_cross)
+ // Correction term: add correct values
+ + n->ln_extensions.transpose() * weighted_extensions / Z_context
+ // Subtract values that should not have been charged.
+ - sum_x_p_I * B_I * n->ln_backoff;
+ for (std::vector<WordIndex>::const_iterator x = n->extension_words.begin(); x != n->extension_words.end(); ++x) {
+ full_cross.noalias() -= interp_uni(*x) * B_I * ln_unigrams_.row(*x);
+ }
+
+ gradient += full_cross;
+
+ convolve = unigram_cross * n->ln_backoff.transpose();
+ // There's one missing term here, which is independent of context and done at the end.
+ hessian.noalias() +=
+ // First term of Hessian, assuming all models back off to unigram.
+ B_I * (convolve + convolve.transpose() + n->ln_backoff * n->ln_backoff.transpose())
+ // Second term of Hessian, with correct full probabilities.
+ - full_cross * full_cross.transpose();
+
+ // Adjust the first term of the Hessian to account for extension
+ for (std::size_t x = 0; x < n->extension_words.size(); ++x) {
+ WordIndex universal_x = n->extension_words[x];
+ hessian.noalias() +=
+ // Replacement terms.
+ weighted_extensions(x) / Z_context * n->ln_extensions.row(x).transpose() * n->ln_extensions.row(x)
+ // Presumed unigrams. TODO: individual terms with backoffs pulled out? Maybe faster?
+ - interp_uni(universal_x) * B_I * (ln_unigrams_.row(universal_x).transpose() + n->ln_backoff) * (ln_unigrams_.row(universal_x) + n->ln_backoff.transpose());
+ }
+ }
+
+ for (Matrix::Index x = 0; x < interp_uni.rows(); ++x) {
+ // \sum_{contexts} B_I(context) \sum_x p_I(x) log p_i(x) log p_j(x)
+ hessian.noalias() += sum_B_I * interp_uni(x) * ln_unigrams_.row(x).transpose() * ln_unigrams_.row(x);
+ }
+ return exp((neg_correct_summed_.dot(weights) + sum_ln_Z_context) / static_cast<double>(instances_.size()));
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/interpolate/tune_derivatives.hh b/src/kenlm/lm/interpolate/tune_derivatives.hh
new file mode 100644
index 0000000..40c058e
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_derivatives.hh
@@ -0,0 +1,30 @@
+#ifndef LM_INTERPOLATE_TUNE_DERIVATIVES_H
+#define LM_INTERPOLATE_TUNE_DERIVATIVES_H
+
+#include "lm/interpolate/tune_instance.hh"
+
+#include <Eigen/Core>
+#include <cmath>
+
+namespace lm { namespace interpolate {
+
+class ComputeDerivative {
+ public:
+ explicit ComputeDerivative(const util::FixedArray<Instance> &instances, const Matrix &ln_unigrams, WordIndex bos);
+
+ Accum Iteration(const Vector &weights, Vector &gradient, Matrix &hessian);
+
+ private:
+ const util::FixedArray<Instance> &instances_;
+ const Matrix &ln_unigrams_;
+
+ const WordIndex bos_;
+
+ // neg_correct_summed_(i) = -\sum_n ln p_i(w_n | w_1^{n-1})
+ Vector neg_correct_summed_;
+};
+
+}} // namespaces
+
+#endif // LM_INTERPOLATE_TUNE_DERIVATIVES_H
+
diff --git a/src/kenlm/lm/interpolate/tune_derivatives_test.cc b/src/kenlm/lm/interpolate/tune_derivatives_test.cc
new file mode 100644
index 0000000..75c0d12
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_derivatives_test.cc
@@ -0,0 +1,89 @@
+#include "lm/interpolate/tune_derivatives.hh"
+
+#include "lm/interpolate/tune_instance.hh"
+
+#define BOOST_TEST_MODULE DerivativeTest
+#include <boost/test/unit_test.hpp>
+
+namespace lm { namespace interpolate { namespace {
+
+BOOST_AUTO_TEST_CASE(Small) {
+ // Three vocabulary words plus <s>, two models.
+ Matrix unigrams(4, 2);
+ unigrams <<
+ 0.1, 0.6,
+ 0.4, 0.3,
+ 0.5, 0.1,
+ // <s>
+ 1.0, 1.0;
+ unigrams = unigrams.array().log();
+
+ // One instance
+ util::FixedArray<Instance> instances(1);
+ instances.push_back(2);
+ Instance &instance = instances.back();
+
+ instance.ln_backoff << 0.2, 0.4;
+ instance.ln_backoff = instance.ln_backoff.array().log();
+
+ // Sparse cases: model 0 word 2 and model 1 word 1.
+
+ // Assuming that model 1 only matches word 1, this is p_1(1 | context)
+ Accum model_1_word_1 = 1.0 - .6 * .4 - .1 * .4;
+
+ // We'll suppose correct has WordIndex 1, which backs off in model 0, and matches in model 1
+ instance.ln_correct << (0.4 * 0.2), model_1_word_1;
+ instance.ln_correct = instance.ln_correct.array().log();
+
+ Accum model_0_word_2 = 1.0 - .1 * .2 - .4 * .2;
+
+ instance.extension_words.push_back(1);
+ instance.extension_words.push_back(2);
+ instance.ln_extensions.resize(2, 2);
+ instance.ln_extensions <<
+ (0.4 * 0.2), model_1_word_1,
+ model_0_word_2, 0.1 * 0.4;
+ instance.ln_extensions = instance.ln_extensions.array().log();
+
+ ComputeDerivative compute(instances, unigrams, 3);
+ Vector weights(2);
+ weights << 0.9, 1.2;
+
+ Vector gradient(2);
+ Matrix hessian(2,2);
+ compute.Iteration(weights, gradient, hessian);
+
+ // p_I(x | context)
+ Vector p_I(3);
+ p_I <<
+ pow(0.1 * 0.2, 0.9) * pow(0.6 * 0.4, 1.2),
+ pow(0.4 * 0.2, 0.9) * pow(model_1_word_1, 1.2),
+ pow(model_0_word_2, 0.9) * pow(0.1 * 0.4, 1.2);
+ p_I /= p_I.sum();
+
+ Vector expected_gradient = -instance.ln_correct;
+ expected_gradient(0) += p_I(0) * log(0.1 * 0.2);
+ expected_gradient(0) += p_I(1) * log(0.4 * 0.2);
+ expected_gradient(0) += p_I(2) * log(model_0_word_2);
+ BOOST_CHECK_CLOSE(expected_gradient(0), gradient(0), 0.01);
+
+ expected_gradient(1) += p_I(0) * log(0.6 * 0.4);
+ expected_gradient(1) += p_I(1) * log(model_1_word_1);
+ expected_gradient(1) += p_I(2) * log(0.1 * 0.4);
+ BOOST_CHECK_CLOSE(expected_gradient(1), gradient(1), 0.01);
+
+ Matrix expected_hessian(2, 2);
+ expected_hessian(1, 0) =
+ // First term
+ p_I(0) * log(0.1 * 0.2) * log(0.6 * 0.4) +
+ p_I(1) * log(0.4 * 0.2) * log(model_1_word_1) +
+ p_I(2) * log(model_0_word_2) * log(0.1 * 0.4);
+ expected_hessian(1, 0) -=
+ (p_I(0) * log(0.1 * 0.2) + p_I(1) * log(0.4 * 0.2) + p_I(2) * log(model_0_word_2)) *
+ (p_I(0) * log(0.6 * 0.4) + p_I(1) * log(model_1_word_1) + p_I(2) * log(0.1 * 0.4));
+ expected_hessian(0, 1) = expected_hessian(1, 0);
+ BOOST_CHECK_CLOSE(expected_hessian(1, 0), hessian(1, 0), 0.01);
+ BOOST_CHECK_CLOSE(expected_hessian(0, 1), hessian(0, 1), 0.01);
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/interpolate/tune_instance.cc b/src/kenlm/lm/interpolate/tune_instance.cc
new file mode 100644
index 0000000..f1c9924
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance.cc
@@ -0,0 +1,354 @@
+#include "lm/interpolate/tune_instance.hh"
+
+#include "lm/common/model_buffer.hh"
+#include "lm/common/ngram_stream.hh"
+#include "lm/common/renumber.hh"
+#include "lm/enumerate_vocab.hh"
+#include "lm/interpolate/merge_vocab.hh"
+#include "lm/interpolate/universal_vocab.hh"
+#include "lm/lm_exception.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/stream/chain.hh"
+#include "util/tokenize_piece.hh"
+
+#include <boost/unordered_map.hpp>
+
+#include <cmath>
+#include <limits>
+#include <vector>
+
+namespace lm { namespace interpolate {
+
+// An extension without backoff weights applied yet.
+#pragma pack(push)
+#pragma pack(1)
+struct InitialExtension {
+ Extension ext;
+ // Order from which it came.
+ uint8_t order;
+};
+#pragma pack(pop)
+
+// Intended use
+// For each model:
+// stream through orders jointly in suffix order:
+// Call MatchedBackoff for full matches.
+// Call Exit when the context matches.
+// Call FinishModel with the unigram probability of the correct word, get full
+// probability in return.
+// Use Backoffs to adjust records that were written to the stream.
+class InstanceMatch {
+ public:
+ InstanceMatch(ModelIndex models, uint8_t max_order, const WordIndex correct)
+ : seen_(std::numeric_limits<WordIndex>::max()),
+ backoffs_(Matrix::Zeros(models, max_order)),
+ correct_(correct), correct_from_(1), correct_ln_prob_(std::numeric_limits<float>::quiet_NaN()) {}
+
+ void MatchedBackoff(ModelIndex model, uint8_t order, float ln_backoff) {
+ backoffs_(model, order - 1) = ln_backoff;
+ }
+
+ // We only want the highest-order matches, which are the first to be exited for a given word.
+ void Exit(const InitialExtension &from, util::stream::Stream &out) {
+ if (from.ext.word == seen_) return;
+ seen_ = from.ext.word;
+ *static_cast<InitialExtension*>(out.Get()) = from;
+ ++out;
+ if (UTIL_UNLIKELY(correct_ == from.ext.word)) {
+ correct_from_ = from.order;
+ correct_ln_prob_ = from.ext.ln_prob;
+ }
+ }
+
+ WordIndex Correct() const { return correct_; }
+
+ // Call this after each model has been passed through. The
+ float FinishModel(ModelIndex model, float correct_ln_unigram) {
+ seen_ = std::numeric_limits<WordIndex>::max();
+ // Turn backoffs into multiplied values (added in log space).
+ // So backoffs_(model, order - 1) is the penalty for matching order.
+ float accum = 0.0;
+ for (int order = backoffs_.cols() - 1; order >= 0; --order) {
+ accum += backoffs_(model, order);
+ backoffs_(model, order) = accum;
+ }
+ if (correct_from_ == 1) {
+ correct_ln_prob_ = correct_ln_unigram;
+ }
+ if (correct_from_ - 1 < backoffs_.cols()) {
+ correct_ln_prob_ += backoffs_(model, correct_from_ - 1);
+ }
+ correct_from_ = 1;
+ return correct_ln_prob_;
+ }
+
+ const Matrix &Backoffs() const {
+ return backoffs_;
+ }
+
+ private:
+ // What's the last word we've seen? Used to act only on exiting the longest match.
+ WordIndex seen_;
+
+ Matrix backoffs_;
+
+ const WordIndex correct_;
+
+ // These only apply to the most recent model.
+ uint8_t correct_from_;
+
+ float correct_ln_prob_;
+};
+
+namespace {
+
+// Forward information to multiple instances of a context.
+class DispatchContext {
+ public:
+ void Register(InstanceMatch &context) {
+ registered_.push_back(&context);
+ }
+
+ void MatchedBackoff(uint8_t order, float ln_backoff) {
+ for (std::vector<InstanceMatch*>::iterator i = registered_.begin(); i != registered_.end(); ++i)
+ (*i)->MatchedBackoff(order, ln_backoff);
+ }
+
+ void Exit(const InitialExtension &from, util::stream::Stream &out) {
+ for (std::vector<InstanceMatch*>::iterator i = registered_.begin(); i != registered_.end(); ++i) {
+ (*i)->Exit(from, out);
+ }
+ }
+
+ private:
+ std::vector<InstanceMatch*> registered_;
+};
+
+// Map from n-gram hash to contexts in the tuning data.
+typedef boost::unordered_map<uint64_t, DispatchContext> ContextMap;
+
+class ApplyBackoffs {
+ public:
+ explicit ApplyBackoffs(const InstanceMatch *backoffs) : backoffs_(backoffs) {}
+
+ void Run(const util::stream::ChainPosition &position) {
+ for (util::stream::Stream stream(position); stream; ++stream) {
+ InitialExtension &ini = *reinterpret_cast<InitialExtension*>(stream.Get());
+ ini.ext.ln_prob += backoffs_[ini.ext.instance]
+ }
+ }
+
+ private:
+ const InstanceMatch *backoffs_;
+};
+
+Instances::ReadExtensions(util::stream::Chain &on) {
+ if (extensions_first_.get()) {
+ // Lazy sort and save a sorted copy to disk. TODO: cut down on record size by stripping out order information?
+ extensions_first_->Output(on);
+ extensions_first_->reset();
+ // TODO: apply backoff data!!!!
+
+ extensions_subsequent_.reset(new util::stream::FileBuffer(util::MakeTemp(sorting_config_.temp_prefix)));
+ on >> extensions_subsequent_->Sink();
+ } else {
+ on >> extensions_subsequent_->Source();
+ }
+}
+
+class UnigramLoader {
+ public:
+ UnigramLoader(ContextMap &contexts_for_backoffs, Matrix &ln_probs, std::size_t model_number)
+ : map_(contexts_for_backoffs),
+ prob_(ln_probs.col(model_number)) {}
+
+ void Run(const util::stream::ChainPosition &position) {
+ // TODO handle the case of a unigram model?
+ NGramStream<ProbBackoff> input(position);
+ assert(input);
+ Accum unk = input->Value().prob * M_LN10;
+ WordIndex previous = 0;
+ for (; input; ++input) {
+ WordIndex word = *input->begin();
+ prob_.segment(previous, word - previous) = Vector::Constant(word - previous, unk);
+ prob_(word) = input->Value().prob * M_LN10;
+ ContextMap::iterator i = map_.find(util::MurmurHashNative(input->begin(), sizeof(WordIndex)));
+ if (i != map_.end()) {
+ i->second.MatchedBackoff(1, input->Value().backoff * M_LN10);
+ }
+ previous = word + 1;
+ }
+ prob_.segment(previous, prob_.rows() - previous) = Vector::Constant(prob_.rows() - previous, unk);
+ }
+
+ private:
+ ContextMap &map_;
+ Matrix::ColXpr prob_;
+ std::size_t model_;
+};
+
+class MiddleLoader {
+ public:
+ explicit MiddleLoader(ContextMap &map)
+ : map_(map) {}
+
+ void Run(const util::stream::ChainPosition &position) {
+ NGramStream<ProbBackoff> input(position);
+ const std::size_t full_size = (uint8_t*)input->end() - (uint8_t*)input->begin();
+ const std::size_t context_size = full_size - sizeof(WordIndex);
+ ContextMap::iterator i;
+ for (; input; ++input) {
+ i = map_.find(util::MurmurHashNative(input->begin(), full_size));
+ if (i != map_.end()) {
+ i->second.MatchedBackoff(input->Order(), input->Value().backoff * M_LN10);
+ }
+ i = map_.find(util::MurmurHashNative(input->begin(), context_size));
+ if (i != map_.end()) {
+ i->second.MatchedContext(input->Order(), *(input->end() - 1), input->Value().prob * M_LN10);
+ }
+ }
+ }
+
+ private:
+ ContextMap &map_;
+};
+
+class HighestLoader {
+ public:
+ HighestLoader(ContextMap &map, uint8_t order)
+ : map_(map), order_(order) {}
+
+ void Run(const util::stream::ChainPosition &position) {
+ ContextMap::iterator i;
+ const std::size_t context_size = sizeof(WordIndex) * (order_ - 1);
+ for (ProxyStream<NGram<float> > input(position, NGram<float>(NULL, order_)); input; ++input) {
+ i = map_.find(util::MurmurHashNative(input->begin(), context_size));
+ if (i != map_.end()) {
+ i->second.MatchedContext(order_, *(input->end() - 1), input->Value() * M_LN10);
+ }
+ }
+ }
+
+ private:
+ ContextMap &map_;
+ const uint8_t order_;
+};
+
+class IdentifyTuning : public EnumerateVocab {
+ public:
+ IdentifyTuning(int tuning_file, std::vector<WordIndex> &out) : indices_(out) {
+ indices_.clear();
+ StringPiece line;
+ std::size_t counter = 0;
+ std::vector<std::size_t> &eos = words_[util::MurmurHashNative("</s>", 4)];
+ for (util::FilePiece f(tuning_file); f.ReadLineOrEOF(line);) {
+ for (util::TokenIter<util::BoolCharacter, true> word(line, util::kSpaces); word; ++word) {
+ UTIL_THROW_IF(*word == "<s>" || *word == "</s>", FormatLoadException, "Illegal word in tuning data: " << *word);
+ words_[util::MurmurHashNative(word->data(), word->size())].push_back(counter++);
+ }
+ eos.push_back(counter++);
+ }
+ // Also get <s>
+ indices_.resize(counter + 1);
+ words_[util::MurmurHashNative("<s>", 3)].push_back(indices_.size() - 1);
+ }
+
+ void Add(WordIndex id, const StringPiece &str) {
+ boost::unordered_map<uint64_t, std::vector<std::size_t> >::iterator i = words_.find(util::MurmurHashNative(str.data(), str.size()));
+ if (i != words_.end()) {
+ for (std::vector<std::size_t>::iterator j = i->second.begin(); j != i->second.end(); ++j) {
+ indices_[*j] = id;
+ }
+ }
+ }
+
+ WordIndex FinishGetBOS() {
+ WordIndex ret = indices_.back();
+ indices_.pop_back();
+ return ret;
+ }
+
+ private:
+ std::vector<WordIndex> &indices_;
+
+ boost::unordered_map<uint64_t, std::vector<std::size_t> > words_;
+};
+
+} // namespace
+
+Instance::Instance(std::size_t num_models) : ln_backoff(num_models), ln_correct(num_models), ln_extensions(0, num_models) {}
+
+WordIndex LoadInstances(int tuning_file, const std::vector<StringPiece> &model_names, util::FixedArray<Instance> &instances, Matrix &ln_unigrams) {
+ util::FixedArray<ModelBuffer> models(model_names.size());
+ std::vector<WordIndex> vocab_sizes;
+ vocab_sizes.reserve(model_names.size());
+ util::FixedArray<util::scoped_fd> vocab_files(model_names.size());
+ std::size_t max_order = 0;
+ for (std::vector<StringPiece>::const_iterator i = model_names.begin(); i != model_names.end(); ++i) {
+ models.push_back(*i);
+ vocab_sizes.push_back(models.back().Counts()[0]);
+ vocab_files.push_back(models.back().StealVocabFile());
+ max_order = std::max(max_order, models.back().Order());
+ }
+ UniversalVocab vocab(vocab_sizes);
+ std::vector<WordIndex> tuning_words;
+ WordIndex bos;
+ WordIndex combined_vocab_size;
+ {
+ IdentifyTuning identify(tuning_file, tuning_words);
+ combined_vocab_size = MergeVocab(vocab_files, vocab, identify);
+ bos = identify.FinishGetBOS();
+ }
+
+ instances.Init(tuning_words.size());
+ util::FixedArray<InstanceBuilder> builders(tuning_words.size());
+ std::vector<WordIndex> context;
+ context.push_back(bos);
+
+ // Populate the map from contexts to instance builders.
+ ContextMap cmap;
+ const WordIndex eos = tuning_words.back();
+ for (std::size_t i = 0; i < tuning_words.size(); ++i) {
+ instances.push_back(model_names.size());
+ builders.push_back(tuning_words[i], max_order);
+ for (std::size_t j = 0; j < context.size(); ++j) {
+ cmap[util::MurmurHashNative(&context[j], sizeof(WordIndex) * (context.size() - j))].Register(builders.back());
+ }
+ // Prepare for next word.
+ if (tuning_words[i] == eos) {
+ context.clear();
+ context.push_back(bos);
+ } else {
+ if (context.size() == max_order) {
+ context.erase(context.begin());
+ }
+ context.push_back(tuning_words[i]);
+ }
+ }
+
+ ln_unigrams.resize(combined_vocab_size, models.size());
+
+ // Scan through input files. Sadly not parallel due to an underlying hash table.
+ for (std::size_t m = 0; m < models.size(); ++m) {
+ for (std::size_t order = 1; order <= models[m].Order(); ++order) {
+ util::stream::Chain chain(util::stream::ChainConfig(sizeof(ProbBackoff) + order * sizeof(WordIndex), 2, 64 * 1048576));
+ models[m].Source(order - 1, chain);
+ chain >> Renumber(vocab.Mapping(m), order);
+ if (order == 1) {
+ chain >> UnigramLoader(cmap, ln_unigrams, m);
+ } else if (order < models[m].Order()) {
+ chain >> MiddleLoader(cmap);
+ } else {
+ chain >> HighestLoader(cmap, order);
+ }
+ }
+ for (std::size_t instance = 0; instance < tuning_words.size(); ++instance) {
+ builders[instance].Dump(m, ln_unigrams, instances[instance]);
+ }
+ ln_unigrams(bos, m) = -99; // Does not matter as long as it does not produce nans since tune_derivatives sets this to zero.
+ }
+ return bos;
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/interpolate/tune_instance.hh b/src/kenlm/lm/interpolate/tune_instance.hh
new file mode 100644
index 0000000..c11eec9
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance.hh
@@ -0,0 +1,86 @@
+#ifndef LM_INTERPOLATE_TUNE_INSTANCE_H
+#define LM_INTERPOLATE_TUNE_INSTANCE_H
+
+#include "lm/interpolate/tune_matrix.hh"
+#include "lm/word_index.hh"
+#include "util/scoped.hh"
+#include "util/stream/config.hh"
+#include "util/string_piece.hh"
+
+#include <boost/optional.hpp>
+
+#include <vector>
+
+namespace util { namespace stream {
+template <class S, class T> class Sort;
+class Chain;
+class FileBuffer;
+}} // namespaces
+
+namespace lm { namespace interpolate {
+
+typedef uint32_t InstanceIndex;
+typedef uint32_t ModelIndex;
+
+struct Extension {
+ // Which tuning instance does this belong to?
+ InstanceIndex instance;
+ WordIndex word;
+ ModelIndex model;
+ // ln p_{model} (word | context(instance))
+ float ln_prob;
+
+ bool operator<(const Extension &other) const {
+ if (instance != other.instance)
+ return instance < other.instance;
+ if (word != other.word)
+ return word < other.word;
+ if (model != other.model)
+ return model < other.model;
+ return false;
+ }
+};
+
+class Instances {
+ public:
+ Instances(int tune_file, const std::vector<StringPiece> &model_names);
+
+ Eigen::ConstRowXpr Backoffs(InstanceIndex instance) const {
+ return ln_backoffs_.row(instance);
+ }
+
+ const Vector &CorrectGradientTerm() const { return neg_ln_correct_sum_; }
+
+ const Matrix &LNUnigrams() const { return ln_unigrams_; }
+
+ void ReadExtensions(util::stream::Chain &to);
+
+ private:
+ // backoffs_(instance, model) is the backoff all the way to unigrams.
+ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> BackoffMatrix;
+ BackoffMatrix ln_backoffs_;
+
+ // neg_correct_sum_(model) = -\sum_{instances} ln p_{model}(correct(instance) | context(instance)).
+ // This appears as a term in the gradient.
+ Vector neg_ln_correct_sum_;
+
+ // unigrams_(word, model) = ln p_{model}(word).
+ Matrix ln_unigrams_;
+
+ struct ExtensionCompare {
+ bool operator()(const void *f, const void *s) const {
+ return reinterpret_cast<const Extension &>(f) < reinterpret_cast<const Extension &>(s);
+ }
+ };
+
+ // This is the source of data for the first iteration.
+ util::scoped_ptr<util::stream::Sort<ExtensionCompare> > extensions_first_;
+
+ // Source of data for subsequent iterations. This contains already-sorted data.
+ util::scoped_ptr<util::stream::FileBuffer> extensions_subsequent_;
+
+ const util::stream::SortConfig sorting_config_;
+};
+
+}} // namespaces
+#endif // LM_INTERPOLATE_TUNE_INSTANCE_H
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/generate.sh b/src/kenlm/lm/interpolate/tune_instance_data/generate.sh
new file mode 100755
index 0000000..d725572
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/generate.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+../../../bin/lmplz --discount_fallback -o 3 -S 100M --intermediate toy0 --arpa toy0.arpa <<EOF
+a a b a
+b a a b
+EOF
+../../../bin/lmplz --discount_fallback -o 3 -S 100M --intermediate toy1 --arpa toy1.arpa <<EOF
+a a b b b b b b b
+c
+EOF
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy0.1 b/src/kenlm/lm/interpolate/tune_instance_data/toy0.1
new file mode 100644
index 0000000..1b66c51
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy0.1
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy0.2 b/src/kenlm/lm/interpolate/tune_instance_data/toy0.2
new file mode 100644
index 0000000..d735b1c
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy0.2
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy0.3 b/src/kenlm/lm/interpolate/tune_instance_data/toy0.3
new file mode 100644
index 0000000..2d97aa3
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy0.3
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate b/src/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate
new file mode 100644
index 0000000..8513475
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate
@@ -0,0 +1,3 @@
+KenLM intermediate binary file
+Counts 5 7 7
+Payload pb
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy0.vocab b/src/kenlm/lm/interpolate/tune_instance_data/toy0.vocab
new file mode 100644
index 0000000..520c0f9
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy0.vocab
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy1.1 b/src/kenlm/lm/interpolate/tune_instance_data/toy1.1
new file mode 100644
index 0000000..a50cec6
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy1.1
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy1.2 b/src/kenlm/lm/interpolate/tune_instance_data/toy1.2
new file mode 100644
index 0000000..58d28a0
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy1.2
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy1.3 b/src/kenlm/lm/interpolate/tune_instance_data/toy1.3
new file mode 100644
index 0000000..1a63afe
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy1.3
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy1.kenlm_intermediate b/src/kenlm/lm/interpolate/tune_instance_data/toy1.kenlm_intermediate
new file mode 100644
index 0000000..fe82667
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy1.kenlm_intermediate
@@ -0,0 +1,3 @@
+KenLM intermediate binary file
+Counts 6 7 6
+Payload pb
diff --git a/src/kenlm/lm/interpolate/tune_instance_data/toy1.vocab b/src/kenlm/lm/interpolate/tune_instance_data/toy1.vocab
new file mode 100644
index 0000000..763b2af
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_data/toy1.vocab
Binary files differ
diff --git a/src/kenlm/lm/interpolate/tune_instance_test.cc b/src/kenlm/lm/interpolate/tune_instance_test.cc
new file mode 100644
index 0000000..a0db59c
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_instance_test.cc
@@ -0,0 +1,93 @@
+#include "lm/interpolate/tune_instance.hh"
+
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/string_piece.hh"
+
+#define BOOST_TEST_MODULE InstanceTest
+#include <boost/test/unit_test.hpp>
+
+#include <iostream>
+
+#include <vector>
+
+namespace lm { namespace interpolate { namespace {
+
+Matrix::Index FindRow(const std::vector<WordIndex> &words, WordIndex word) {
+ std::vector<WordIndex>::const_iterator it = std::find(words.begin(), words.end(), word);
+ BOOST_REQUIRE(it != words.end());
+ return it - words.begin();
+}
+
+BOOST_AUTO_TEST_CASE(Toy) {
+ util::scoped_fd test_input(util::MakeTemp("temporary"));
+ {
+ util::FileStream(test_input.get()) << "c\n";
+ }
+
+ StringPiece dir("tune_instance_data/");
+ if (boost::unit_test::framework::master_test_suite().argc == 2) {
+ StringPiece zero_file(boost::unit_test::framework::master_test_suite().argv[1]);
+ BOOST_REQUIRE(zero_file.size() > strlen("toy0.1"));
+ BOOST_REQUIRE_EQUAL("toy0.1", StringPiece(zero_file.data() + zero_file.size() - 6, 6));
+ dir = StringPiece(zero_file.data(), zero_file.size() - 6);
+ }
+
+ std::vector<StringPiece> model_names;
+ std::string full0 = std::string(dir.data(), dir.size()) + "toy0";
+ std::string full1 = std::string(dir.data(), dir.size()) + "toy1";
+ model_names.push_back(full0);
+ model_names.push_back(full1);
+
+ util::FixedArray<Instance> instances;
+ Matrix ln_unigrams;
+ // Returns vocab id of <s>
+ BOOST_CHECK_EQUAL(1, LoadInstances(test_input.release(), model_names, instances, ln_unigrams));
+ // <unk>
+ BOOST_CHECK_CLOSE(-0.90309 * M_LN10, ln_unigrams(0, 0), 0.001);
+ BOOST_CHECK_CLOSE(-1 * M_LN10, ln_unigrams(0, 1), 0.001);
+ // <s>
+ BOOST_CHECK_GT(-98.0, ln_unigrams(1, 0));
+ BOOST_CHECK_GT(-98.0, ln_unigrams(1, 1));
+ // a
+ BOOST_CHECK_CLOSE(-0.46943438 * M_LN10, ln_unigrams(2, 0), 0.001);
+ BOOST_CHECK_CLOSE(-0.6146491 * M_LN10, ln_unigrams(2, 1), 0.001);
+ // </s>
+ BOOST_CHECK_CLOSE(-0.5720968 * M_LN10, ln_unigrams(3, 0), 0.001);
+ BOOST_CHECK_CLOSE(-0.6146491 * M_LN10, ln_unigrams(3, 1), 0.001);
+ // c
+ BOOST_CHECK_CLOSE(-0.90309 * M_LN10, ln_unigrams(4, 0), 0.001); // <unk>
+ BOOST_CHECK_CLOSE(-0.7659168 * M_LN10, ln_unigrams(4, 1), 0.001);
+ // too lazy to do b.
+
+ // Two instances:
+ // <s> predicts c
+ // <s> c predicts </s>
+ BOOST_REQUIRE_EQUAL(2, instances.size());
+ BOOST_CHECK_CLOSE(-0.30103 * M_LN10, instances[0].ln_backoff(0), 0.001);
+ BOOST_CHECK_CLOSE(-0.30103 * M_LN10, instances[0].ln_backoff(1), 0.001);
+
+ // Backoffs of <s> c
+ BOOST_CHECK_CLOSE(0.0, instances[1].ln_backoff(0), 0.001);
+ BOOST_CHECK_CLOSE((-0.30103 - 0.30103) * M_LN10, instances[1].ln_backoff(1), 0.001);
+
+ // Three extensions: a, b, c
+ BOOST_REQUIRE_EQUAL(3, instances[0].ln_extensions.rows());
+ BOOST_REQUIRE_EQUAL(3, instances[0].extension_words.size());
+
+ // <s> a
+ BOOST_CHECK_CLOSE(-0.37712017 * M_LN10, instances[0].ln_extensions(FindRow(instances[0].extension_words, 2), 0), 0.001);
+ // <s> c
+ BOOST_CHECK_CLOSE((-0.90309 + -0.30103) * M_LN10, instances[0].ln_extensions(FindRow(instances[0].extension_words, 4), 0), 0.001);
+ BOOST_CHECK_CLOSE(-0.4740302 * M_LN10, instances[0].ln_extensions(FindRow(instances[0].extension_words, 4), 1), 0.001);
+
+ // <s> c </s>
+ BOOST_CHECK_CLOSE(-0.09113217 * M_LN10, instances[1].ln_extensions(FindRow(instances[1].extension_words, 3), 1), 0.001);
+
+ // p_0(c | <s>) = p_0(c)b_0(<s>) = 10^(-0.90309 + -0.30103)
+ BOOST_CHECK_CLOSE((-0.90309 + -0.30103) * M_LN10, instances[0].ln_correct(0), 0.001);
+ // p_1(c | <s>) = 10^-0.4740302
+ BOOST_CHECK_CLOSE(-0.4740302 * M_LN10, instances[0].ln_correct(1), 0.001);
+}
+
+}}} // namespaces
diff --git a/src/kenlm/lm/interpolate/tune_main.cc b/src/kenlm/lm/interpolate/tune_main.cc
new file mode 100644
index 0000000..8296af1
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_main.cc
@@ -0,0 +1,59 @@
+#include "lm/interpolate/tune_derivatives.hh"
+#include "lm/interpolate/tune_instance.hh"
+#include "util/file.hh"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
+#include <Eigen/Dense>
+#pragma GCC diagnostic pop
+#include <boost/program_options.hpp>
+
+#include <cmath>
+#include <iostream>
+
+namespace lm { namespace interpolate {
+void TuneWeights(int tune_file, const std::vector<StringPiece> &model_names, Vector &weights) {
+ util::FixedArray<Instance> instances;
+ Matrix ln_unigrams;
+ WordIndex bos = LoadInstances(tune_file, model_names, instances, ln_unigrams);
+ ComputeDerivative derive(instances, ln_unigrams, bos);
+ weights = Vector::Constant(model_names.size(), 1.0 / model_names.size());
+ Vector gradient;
+ Matrix hessian;
+ for (std::size_t iteration = 0; iteration < 10 /*TODO fancy stopping criteria */; ++iteration) {
+ std::cerr << "Iteration " << iteration << ": weights =";
+ for (Vector::Index i = 0; i < weights.rows(); ++i) {
+ std::cerr << ' ' << weights(i);
+ }
+ std::cerr << std::endl;
+ std::cerr << "Perplexity = " <<
+ derive.Iteration(weights, gradient, hessian)
+ << std::endl;
+ // TODO: 1.0 step size was too big and it kept getting unstable. More math.
+ weights -= 0.7 * hessian.inverse() * gradient;
+ }
+}
+}} // namespaces
+
+int main(int argc, char *argv[]) {
+ Eigen::initParallel();
+ namespace po = boost::program_options;
+ // TODO help
+ po::options_description options("Tuning options");
+ std::string tuning_file;
+ std::vector<std::string> input_models;
+ options.add_options()
+ ("tuning,t", po::value<std::string>(&tuning_file)->required(), "File to tune on. This should be a text file with one sentence per line.")
+ ("model,m", po::value<std::vector<std::string> >(&input_models)->multitoken()->required(), "Models to interpolate");
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+ po::notify(vm);
+
+ std::vector<StringPiece> model_names;
+ for (std::vector<std::string>::const_iterator i = input_models.begin(); i != input_models.end(); ++i) {
+ model_names.push_back(*i);
+ }
+ lm::interpolate::Vector weights;
+ lm::interpolate::TuneWeights(util::OpenReadOrThrow(tuning_file.c_str()), model_names, weights);
+ std::cout << weights.transpose() << std::endl;
+}
diff --git a/src/kenlm/lm/interpolate/tune_matrix.hh b/src/kenlm/lm/interpolate/tune_matrix.hh
new file mode 100644
index 0000000..7f1a0c9
--- /dev/null
+++ b/src/kenlm/lm/interpolate/tune_matrix.hh
@@ -0,0 +1,17 @@
+#ifndef LM_INTERPOLATE_TUNE_MATRIX_H
+#define LM_INTERPOLATE_TUNE_MATRIX_H
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
+#include <Eigen/Core>
+#pragma GCC diagnostic pop
+
+namespace lm { namespace interpolate {
+
+typedef Eigen::MatrixXd Matrix;
+typedef Eigen::VectorXd Vector;
+
+typedef Matrix::Scalar Accum;
+
+}} // namespaces
+#endif // LM_INTERPOLATE_TUNE_MATRIX_H
diff --git a/src/kenlm/lm/interpolate/universal_vocab.cc b/src/kenlm/lm/interpolate/universal_vocab.cc
new file mode 100644
index 0000000..5cdf41e
--- /dev/null
+++ b/src/kenlm/lm/interpolate/universal_vocab.cc
@@ -0,0 +1,13 @@
+#include "lm/interpolate/universal_vocab.hh"
+
+namespace lm {
+namespace interpolate {
+
+UniversalVocab::UniversalVocab(const std::vector<WordIndex>& model_vocab_sizes) {
+ model_index_map_.resize(model_vocab_sizes.size());
+ for (size_t i = 0; i < model_vocab_sizes.size(); ++i) {
+ model_index_map_[i].resize(model_vocab_sizes[i]);
+ }
+}
+
+}} // namespaces
diff --git a/src/kenlm/lm/interpolate/universal_vocab.hh b/src/kenlm/lm/interpolate/universal_vocab.hh
new file mode 100644
index 0000000..c720298
--- /dev/null
+++ b/src/kenlm/lm/interpolate/universal_vocab.hh
@@ -0,0 +1,38 @@
+#ifndef LM_INTERPOLATE_UNIVERSAL_VOCAB_H
+#define LM_INTERPOLATE_UNIVERSAL_VOCAB_H
+
+#include "lm/word_index.hh"
+
+#include <vector>
+#include <cstddef>
+
+namespace lm {
+namespace interpolate {
+
+class UniversalVocab {
+public:
+ explicit UniversalVocab(const std::vector<WordIndex>& model_vocab_sizes);
+
+ // GetUniversalIndex takes the model number and index for the specific
+ // model and returns the universal model number
+ WordIndex GetUniversalIdx(std::size_t model_num, WordIndex model_word_index) const {
+ return model_index_map_[model_num][model_word_index];
+ }
+
+ const WordIndex *Mapping(std::size_t model) const {
+ return &*model_index_map_[model].begin();
+ }
+
+ void InsertUniversalIdx(std::size_t model_num, WordIndex word_index,
+ WordIndex universal_word_index) {
+ model_index_map_[model_num][word_index] = universal_word_index;
+ }
+
+private:
+ std::vector<std::vector<WordIndex> > model_index_map_;
+};
+
+} // namespace interpolate
+} // namespace lm
+
+#endif // LM_INTERPOLATE_UNIVERSAL_VOCAB_H
diff --git a/src/kenlm/lm/kenlm_benchmark_main.cc b/src/kenlm/lm/kenlm_benchmark_main.cc
new file mode 100644
index 0000000..c9ee165
--- /dev/null
+++ b/src/kenlm/lm/kenlm_benchmark_main.cc
@@ -0,0 +1,142 @@
+#include "lm/model.hh"
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/file_piece.hh"
+#include "util/usage.hh"
+
+#include <stdint.h>
+
+namespace {
+
+template <class Model, class Width> void ConvertToBytes(const Model &model, int fd_in) {
+ util::FilePiece in(fd_in);
+ util::FileStream out(1);
+ Width width;
+ StringPiece word;
+ const Width end_sentence = (Width)model.GetVocabulary().EndSentence();
+ while (true) {
+ while (in.ReadWordSameLine(word)) {
+ width = (Width)model.GetVocabulary().Index(word);
+ out.write(&width, sizeof(Width));
+ }
+ if (!in.ReadLineOrEOF(word)) break;
+ out.write(&end_sentence, sizeof(Width));
+ }
+}
+
+template <class Model, class Width> void QueryFromBytes(const Model &model, int fd_in) {
+ lm::ngram::State state[3];
+ const lm::ngram::State *const begin_state = &model.BeginSentenceState();
+ const lm::ngram::State *next_state = begin_state;
+ Width kEOS = model.GetVocabulary().EndSentence();
+ Width buf[4096];
+
+ uint64_t completed = 0;
+ double loaded = util::CPUTime();
+
+ std::cout << "CPU_to_load: " << loaded << std::endl;
+
+ // Numerical precision: batch sums.
+ double total = 0.0;
+ while (std::size_t got = util::ReadOrEOF(fd_in, buf, sizeof(buf))) {
+ float sum = 0.0;
+ UTIL_THROW_IF2(got % sizeof(Width), "File size not a multiple of vocab id size " << sizeof(Width));
+ got /= sizeof(Width);
+ completed += got;
+ // Do even stuff first.
+ const Width *even_end = buf + (got & ~1);
+ // Alternating states
+ const Width *i;
+ for (i = buf; i != even_end;) {
+ sum += model.FullScore(*next_state, *i, state[1]).prob;
+ next_state = (*i++ == kEOS) ? begin_state : &state[1];
+ sum += model.FullScore(*next_state, *i, state[0]).prob;
+ next_state = (*i++ == kEOS) ? begin_state : &state[0];
+ }
+ // Odd corner case.
+ if (got & 1) {
+ sum += model.FullScore(*next_state, *i, state[2]).prob;
+ next_state = (*i++ == kEOS) ? begin_state : &state[2];
+ }
+ total += sum;
+ }
+ double after = util::CPUTime();
+ std::cerr << "Probability sum is " << total << std::endl;
+ std::cout << "Queries: " << completed << std::endl;
+ std::cout << "CPU_excluding_load: " << (after - loaded) << "\nCPU_per_query: " << ((after - loaded) / static_cast<double>(completed)) << std::endl;
+ std::cout << "RSSMax: " << util::RSSMax() << std::endl;
+}
+
+template <class Model, class Width> void DispatchFunction(const Model &model, bool query) {
+ if (query) {
+ QueryFromBytes<Model, Width>(model, 0);
+ } else {
+ ConvertToBytes<Model, Width>(model, 0);
+ }
+}
+
+template <class Model> void DispatchWidth(const char *file, bool query) {
+ lm::ngram::Config config;
+ config.load_method = util::READ;
+ std::cerr << "Using load_method = READ." << std::endl;
+ Model model(file, config);
+ lm::WordIndex bound = model.GetVocabulary().Bound();
+ if (bound <= 256) {
+ DispatchFunction<Model, uint8_t>(model, query);
+ } else if (bound <= 65536) {
+ DispatchFunction<Model, uint16_t>(model, query);
+ } else if (bound <= (1ULL << 32)) {
+ DispatchFunction<Model, uint32_t>(model, query);
+ } else {
+ DispatchFunction<Model, uint64_t>(model, query);
+ }
+}
+
+void Dispatch(const char *file, bool query) {
+ using namespace lm::ngram;
+ lm::ngram::ModelType model_type;
+ if (lm::ngram::RecognizeBinary(file, model_type)) {
+ switch(model_type) {
+ case PROBING:
+ DispatchWidth<lm::ngram::ProbingModel>(file, query);
+ break;
+ case REST_PROBING:
+ DispatchWidth<lm::ngram::RestProbingModel>(file, query);
+ break;
+ case TRIE:
+ DispatchWidth<lm::ngram::TrieModel>(file, query);
+ break;
+ case QUANT_TRIE:
+ DispatchWidth<lm::ngram::QuantTrieModel>(file, query);
+ break;
+ case ARRAY_TRIE:
+ DispatchWidth<lm::ngram::ArrayTrieModel>(file, query);
+ break;
+ case QUANT_ARRAY_TRIE:
+ DispatchWidth<lm::ngram::QuantArrayTrieModel>(file, query);
+ break;
+ default:
+ UTIL_THROW(util::Exception, "Unrecognized kenlm model type " << model_type);
+ }
+ } else {
+ UTIL_THROW(util::Exception, "Binarize before running benchmarks.");
+ }
+}
+
+} // namespace
+
+int main(int argc, char *argv[]) {
+ if (argc != 3 || (strcmp(argv[1], "vocab") && strcmp(argv[1], "query"))) {
+ std::cerr
+ << "Benchmark program for KenLM. Intended usage:\n"
+ << "#Convert text to vocabulary ids offline. These ids are tied to a model.\n"
+ << argv[0] << " vocab $model <$text >$text.vocab\n"
+ << "#Ensure files are in RAM.\n"
+ << "cat $text.vocab $model >/dev/null\n"
+ << "#Timed query against the model.\n"
+ << argv[0] << " query $model <$text.vocab\n";
+ return 1;
+ }
+ Dispatch(argv[2], !strcmp(argv[1], "query"));
+ return 0;
+}
diff --git a/src/kenlm/lm/left.hh b/src/kenlm/lm/left.hh
new file mode 100644
index 0000000..4d49686
--- /dev/null
+++ b/src/kenlm/lm/left.hh
@@ -0,0 +1,216 @@
+/* Efficient left and right language model state for sentence fragments.
+ * Intended usage:
+ * Store ChartState with every chart entry.
+ * To do a rule application:
+ * 1. Make a ChartState object for your new entry.
+ * 2. Construct RuleScore.
+ * 3. Going from left to right, call Terminal or NonTerminal.
+ * For terminals, just pass the vocab id.
+ * For non-terminals, pass that non-terminal's ChartState.
+ * If your decoder expects scores inclusive of subtree scores (i.e. you
+ * label entries with the highest-scoring path), pass the non-terminal's
+ * score as prob.
+ * If your decoder expects relative scores and will walk the chart later,
+ * pass prob = 0.0.
+ * In other words, the only effect of prob is that it gets added to the
+ * returned log probability.
+ * 4. Call Finish. It returns the log probability.
+ *
+ * There's a couple more details:
+ * Do not pass <s> to Terminal as it is formally not a word in the sentence,
+ * only context. Instead, call BeginSentence. If called, it should be the
+ * first call after RuleScore is constructed (since <s> is always the
+ * leftmost).
+ *
+ * If the leftmost RHS is a non-terminal, it's faster to call BeginNonTerminal.
+ *
+ * Hashing and sorting comparison operators are provided. All state objects
+ * are POD. If you intend to use memcmp on raw state objects, you must call
+ * ZeroRemaining first, as the value of array entries beyond length is
+ * otherwise undefined.
+ *
+ * Usage is of course not limited to chart decoding. Anything that generates
+ * sentence fragments missing left context could benefit. For example, a
+ * phrase-based decoder could pre-score phrases, storing ChartState with each
+ * phrase, even if hypotheses are generated left-to-right.
+ */
+
+#ifndef LM_LEFT_H
+#define LM_LEFT_H
+
+#include "lm/max_order.hh"
+#include "lm/state.hh"
+#include "lm/return.hh"
+
+#include "util/murmur_hash.hh"
+
+#include <algorithm>
+
+namespace lm {
+namespace ngram {
+
+template <class M> class RuleScore {
+ public:
+ explicit RuleScore(const M &model, ChartState &out) : model_(model), out_(&out), left_done_(false), prob_(0.0) {
+ out.left.length = 0;
+ out.right.length = 0;
+ }
+
+ void BeginSentence() {
+ out_->right = model_.BeginSentenceState();
+ // out_->left is empty.
+ left_done_ = true;
+ }
+
+ void Terminal(WordIndex word) {
+ State copy(out_->right);
+ FullScoreReturn ret(model_.FullScore(copy, word, out_->right));
+ if (left_done_) { prob_ += ret.prob; return; }
+ if (ret.independent_left) {
+ prob_ += ret.prob;
+ left_done_ = true;
+ return;
+ }
+ out_->left.pointers[out_->left.length++] = ret.extend_left;
+ prob_ += ret.rest;
+ if (out_->right.length != copy.length + 1)
+ left_done_ = true;
+ }
+
+ // Faster version of NonTerminal for the case where the rule begins with a non-terminal.
+ void BeginNonTerminal(const ChartState &in, float prob = 0.0) {
+ prob_ = prob;
+ *out_ = in;
+ left_done_ = in.left.full;
+ }
+
+ void NonTerminal(const ChartState &in, float prob = 0.0) {
+ prob_ += prob;
+
+ if (!in.left.length) {
+ if (in.left.full) {
+ for (const float *i = out_->right.backoff; i < out_->right.backoff + out_->right.length; ++i) prob_ += *i;
+ left_done_ = true;
+ out_->right = in.right;
+ }
+ return;
+ }
+
+ if (!out_->right.length) {
+ out_->right = in.right;
+ if (left_done_) {
+ prob_ += model_.UnRest(in.left.pointers, in.left.pointers + in.left.length, 1);
+ return;
+ }
+ if (out_->left.length) {
+ left_done_ = true;
+ } else {
+ out_->left = in.left;
+ left_done_ = in.left.full;
+ }
+ return;
+ }
+
+ float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1];
+ float *back = backoffs, *back2 = backoffs2;
+ unsigned char next_use = out_->right.length;
+
+ // First word
+ if (ExtendLeft(in, next_use, 1, out_->right.backoff, back)) return;
+
+ // Words after the first, so extending a bigram to begin with
+ for (unsigned char extend_length = 2; extend_length <= in.left.length; ++extend_length) {
+ if (ExtendLeft(in, next_use, extend_length, back, back2)) return;
+ std::swap(back, back2);
+ }
+
+ if (in.left.full) {
+ for (const float *i = back; i != back + next_use; ++i) prob_ += *i;
+ left_done_ = true;
+ out_->right = in.right;
+ return;
+ }
+
+ // Right state was minimized, so it's already independent of the new words to the left.
+ if (in.right.length < in.left.length) {
+ out_->right = in.right;
+ return;
+ }
+
+ // Shift exisiting words down.
+ for (WordIndex *i = out_->right.words + next_use - 1; i >= out_->right.words; --i) {
+ *(i + in.right.length) = *i;
+ }
+ // Add words from in.right.
+ std::copy(in.right.words, in.right.words + in.right.length, out_->right.words);
+ // Assemble backoff composed on the existing state's backoff followed by the new state's backoff.
+ std::copy(in.right.backoff, in.right.backoff + in.right.length, out_->right.backoff);
+ std::copy(back, back + next_use, out_->right.backoff + in.right.length);
+ out_->right.length = in.right.length + next_use;
+ }
+
+ float Finish() {
+ // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram.
+ out_->left.full = left_done_ || (out_->left.length == model_.Order() - 1);
+ return prob_;
+ }
+
+ void Reset() {
+ prob_ = 0.0;
+ left_done_ = false;
+ out_->left.length = 0;
+ out_->right.length = 0;
+ }
+ void Reset(ChartState &replacement) {
+ out_ = &replacement;
+ Reset();
+ }
+
+ private:
+ bool ExtendLeft(const ChartState &in, unsigned char &next_use, unsigned char extend_length, const float *back_in, float *back_out) {
+ ProcessRet(model_.ExtendLeft(
+ out_->right.words, out_->right.words + next_use, // Words to extend into
+ back_in, // Backoffs to use
+ in.left.pointers[extend_length - 1], extend_length, // Words to be extended
+ back_out, // Backoffs for the next score
+ next_use)); // Length of n-gram to use in next scoring.
+ if (next_use != out_->right.length) {
+ left_done_ = true;
+ if (!next_use) {
+ // Early exit.
+ out_->right = in.right;
+ prob_ += model_.UnRest(in.left.pointers + extend_length, in.left.pointers + in.left.length, extend_length + 1);
+ return true;
+ }
+ }
+ // Continue scoring.
+ return false;
+ }
+
+ void ProcessRet(const FullScoreReturn &ret) {
+ if (left_done_) {
+ prob_ += ret.prob;
+ return;
+ }
+ if (ret.independent_left) {
+ prob_ += ret.prob;
+ left_done_ = true;
+ return;
+ }
+ out_->left.pointers[out_->left.length++] = ret.extend_left;
+ prob_ += ret.rest;
+ }
+
+ const M &model_;
+
+ ChartState *out_;
+
+ bool left_done_;
+
+ float prob_;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_LEFT_H
diff --git a/src/kenlm/lm/left_test.cc b/src/kenlm/lm/left_test.cc
new file mode 100644
index 0000000..fdb6416
--- /dev/null
+++ b/src/kenlm/lm/left_test.cc
@@ -0,0 +1,397 @@
+#include "lm/left.hh"
+#include "lm/model.hh"
+
+#include "util/tokenize_piece.hh"
+
+#include <vector>
+
+#define BOOST_TEST_MODULE LeftTest
+#include <boost/test/unit_test.hpp>
+#include <boost/test/floating_point_comparison.hpp>
+
+namespace lm {
+namespace ngram {
+namespace {
+
+#define Term(word) score.Terminal(m.GetVocabulary().Index(word));
+#define VCheck(word, value) BOOST_CHECK_EQUAL(m.GetVocabulary().Index(word), value);
+
+// Apparently some Boost versions use templates and are pretty strict about types matching.
+#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
+
+template <class M> void Short(const M &m) {
+ ChartState base;
+ {
+ RuleScore<M> score(m, base);
+ Term("more");
+ Term("loin");
+ SLOPPY_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001);
+ }
+ BOOST_CHECK(base.left.full);
+ BOOST_CHECK_EQUAL(2, base.left.length);
+ BOOST_CHECK_EQUAL(1, base.right.length);
+ VCheck("loin", base.right.words[0]);
+
+ ChartState more_left;
+ {
+ RuleScore<M> score(m, more_left);
+ Term("little");
+ score.NonTerminal(base, -1.206319 - 0.3561665);
+ // p(little more loin | null context)
+ SLOPPY_CHECK_CLOSE(-1.56538, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(3, more_left.left.length);
+ BOOST_CHECK_EQUAL(1, more_left.right.length);
+ VCheck("loin", more_left.right.words[0]);
+ BOOST_CHECK(more_left.left.full);
+
+ ChartState shorter;
+ {
+ RuleScore<M> score(m, shorter);
+ Term("to");
+ score.NonTerminal(base, -1.206319 - 0.3561665);
+ SLOPPY_CHECK_CLOSE(-0.30103 - 1.687872 - 1.206319 - 0.3561665, score.Finish(), 0.01);
+ }
+ BOOST_CHECK_EQUAL(1, shorter.left.length);
+ BOOST_CHECK_EQUAL(1, shorter.right.length);
+ VCheck("loin", shorter.right.words[0]);
+ BOOST_CHECK(shorter.left.full);
+}
+
+template <class M> void Charge(const M &m) {
+ ChartState base;
+ {
+ RuleScore<M> score(m, base);
+ Term("on");
+ Term("more");
+ SLOPPY_CHECK_CLOSE(-1.509559 -0.4771212 -1.206319, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(1, base.left.length);
+ BOOST_CHECK_EQUAL(1, base.right.length);
+ VCheck("more", base.right.words[0]);
+ BOOST_CHECK(base.left.full);
+
+ ChartState extend;
+ {
+ RuleScore<M> score(m, extend);
+ Term("looking");
+ score.NonTerminal(base, -1.509559 -0.4771212 -1.206319);
+ SLOPPY_CHECK_CLOSE(-3.91039, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(2, extend.left.length);
+ BOOST_CHECK_EQUAL(1, extend.right.length);
+ VCheck("more", extend.right.words[0]);
+ BOOST_CHECK(extend.left.full);
+
+ ChartState tobos;
+ {
+ RuleScore<M> score(m, tobos);
+ score.BeginSentence();
+ score.NonTerminal(extend, -3.91039);
+ SLOPPY_CHECK_CLOSE(-3.471169, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(0, tobos.left.length);
+ BOOST_CHECK_EQUAL(1, tobos.right.length);
+}
+
+template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
+ float ret = 0.0;
+ State right = begin_sentence ? m.BeginSentenceState() : m.NullContextState();
+ for (std::vector<WordIndex>::const_iterator i = words.begin(); i != words.end(); ++i) {
+ State copy(right);
+ ret += m.Score(copy, *i, right);
+ }
+ return ret;
+}
+
+template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
+ float ret = 0.0;
+ ChartState state;
+ state.left.length = 0;
+ state.right.length = 0;
+ state.left.full = false;
+ for (std::vector<WordIndex>::const_reverse_iterator i = words.rbegin(); i != words.rend(); ++i) {
+ ChartState copy(state);
+ RuleScore<M> score(m, state);
+ score.Terminal(*i);
+ score.NonTerminal(copy, ret);
+ ret = score.Finish();
+ }
+ if (begin_sentence) {
+ ChartState copy(state);
+ RuleScore<M> score(m, state);
+ score.BeginSentence();
+ score.NonTerminal(copy, ret);
+ ret = score.Finish();
+ }
+ return ret;
+}
+
+template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
+ std::vector<std::pair<ChartState, float> > states(words.size());
+ for (unsigned int i = 0; i < words.size(); ++i) {
+ RuleScore<M> score(m, states[i].first);
+ score.Terminal(words[i]);
+ states[i].second = score.Finish();
+ }
+ while (states.size() > 1) {
+ std::vector<std::pair<ChartState, float> > upper((states.size() + 1) / 2);
+ for (unsigned int i = 0; i < states.size() / 2; ++i) {
+ RuleScore<M> score(m, upper[i].first);
+ score.NonTerminal(states[i*2].first, states[i*2].second);
+ score.NonTerminal(states[i*2+1].first, states[i*2+1].second);
+ upper[i].second = score.Finish();
+ }
+ if (states.size() % 2) {
+ upper.back() = states.back();
+ }
+ std::swap(states, upper);
+ }
+
+ if (states.empty()) return 0.0;
+
+ if (begin_sentence) {
+ ChartState ignored;
+ RuleScore<M> score(m, ignored);
+ score.BeginSentence();
+ score.NonTerminal(states.front().first, states.front().second);
+ return score.Finish();
+ } else {
+ return states.front().second;
+ }
+
+}
+
+template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vector<WordIndex> &out) {
+ out.clear();
+ for (util::TokenIter<util::SingleCharacter, true> i(str, ' '); i; ++i) {
+ out.push_back(m.GetVocabulary().Index(*i));
+ }
+}
+
+#define TEXT_TEST(str) \
+ LookupVocab(m, str, words); \
+ expect = LeftToRight(m, words, rest); \
+ SLOPPY_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \
+ SLOPPY_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \
+
+// Build sentences, or parts thereof, from right to left.
+template <class M> void GrowBig(const M &m, bool rest = false) {
+ std::vector<WordIndex> words;
+ float expect;
+ TEXT_TEST("in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
+ TEXT_TEST("on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
+ TEXT_TEST("on a little more loin also would consider higher to look good");
+ TEXT_TEST("more loin also would consider higher to look good");
+ TEXT_TEST("more loin also would consider higher to look");
+ TEXT_TEST("also would consider higher to look");
+ TEXT_TEST("also would consider higher");
+ TEXT_TEST("would consider higher to look");
+ TEXT_TEST("consider higher to look");
+ TEXT_TEST("consider higher to");
+ TEXT_TEST("consider higher");
+}
+
+template <class M> void GrowSmall(const M &m, bool rest = false) {
+ std::vector<WordIndex> words;
+ float expect;
+ TEXT_TEST("in biarritz watching considering looking . </s>");
+ TEXT_TEST("in biarritz watching considering looking .");
+ TEXT_TEST("in biarritz");
+}
+
+template <class M> void AlsoWouldConsiderHigher(const M &m) {
+ ChartState also;
+ {
+ RuleScore<M> score(m, also);
+ score.Terminal(m.GetVocabulary().Index("also"));
+ SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
+ }
+ ChartState would;
+ {
+ RuleScore<M> score(m, would);
+ score.Terminal(m.GetVocabulary().Index("would"));
+ SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
+ }
+ ChartState combine_also_would;
+ {
+ RuleScore<M> score(m, combine_also_would);
+ score.NonTerminal(also, -1.687872);
+ score.NonTerminal(would, -1.687872);
+ SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(2, combine_also_would.right.length);
+
+ ChartState also_would;
+ {
+ RuleScore<M> score(m, also_would);
+ score.Terminal(m.GetVocabulary().Index("also"));
+ score.Terminal(m.GetVocabulary().Index("would"));
+ SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(2, also_would.right.length);
+
+ ChartState consider;
+ {
+ RuleScore<M> score(m, consider);
+ score.Terminal(m.GetVocabulary().Index("consider"));
+ SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(1, consider.left.length);
+ BOOST_CHECK_EQUAL(1, consider.right.length);
+ BOOST_CHECK(!consider.left.full);
+
+ ChartState higher;
+ float higher_score;
+ {
+ RuleScore<M> score(m, higher);
+ score.Terminal(m.GetVocabulary().Index("higher"));
+ higher_score = score.Finish();
+ }
+ SLOPPY_CHECK_CLOSE(-1.509559, higher_score, 0.001);
+ BOOST_CHECK_EQUAL(1, higher.left.length);
+ BOOST_CHECK_EQUAL(1, higher.right.length);
+ BOOST_CHECK(!higher.left.full);
+ VCheck("higher", higher.right.words[0]);
+ SLOPPY_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001);
+
+ ChartState consider_higher;
+ {
+ RuleScore<M> score(m, consider_higher);
+ score.NonTerminal(consider, -1.687872);
+ score.NonTerminal(higher, higher_score);
+ SLOPPY_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(2, consider_higher.left.length);
+ BOOST_CHECK(!consider_higher.left.full);
+
+ ChartState full;
+ {
+ RuleScore<M> score(m, full);
+ score.NonTerminal(combine_also_would, -1.687872 - 2.0);
+ score.NonTerminal(consider_higher, -1.509559 - 1.687872 - 0.30103);
+ SLOPPY_CHECK_CLOSE(-10.6879, score.Finish(), 0.001);
+ }
+ BOOST_CHECK_EQUAL(4, full.right.length);
+}
+
+#define CHECK_SCORE(str, val) \
+{ \
+ float got = val; \
+ std::vector<WordIndex> indices; \
+ LookupVocab(m, str, indices); \
+ SLOPPY_CHECK_CLOSE(LeftToRight(m, indices), got, 0.001); \
+}
+
+template <class M> void FullGrow(const M &m) {
+ std::vector<WordIndex> words;
+ LookupVocab(m, "in biarritz watching considering looking . </s>", words);
+
+ ChartState lexical[7];
+ float lexical_scores[7];
+ for (unsigned int i = 0; i < 7; ++i) {
+ RuleScore<M> score(m, lexical[i]);
+ score.Terminal(words[i]);
+ lexical_scores[i] = score.Finish();
+ }
+ CHECK_SCORE("in", lexical_scores[0]);
+ CHECK_SCORE("biarritz", lexical_scores[1]);
+ CHECK_SCORE("watching", lexical_scores[2]);
+ CHECK_SCORE("</s>", lexical_scores[6]);
+
+ ChartState l1[4];
+ float l1_scores[4];
+ {
+ RuleScore<M> score(m, l1[0]);
+ score.NonTerminal(lexical[0], lexical_scores[0]);
+ score.NonTerminal(lexical[1], lexical_scores[1]);
+ CHECK_SCORE("in biarritz", l1_scores[0] = score.Finish());
+ }
+ {
+ RuleScore<M> score(m, l1[1]);
+ score.NonTerminal(lexical[2], lexical_scores[2]);
+ score.NonTerminal(lexical[3], lexical_scores[3]);
+ CHECK_SCORE("watching considering", l1_scores[1] = score.Finish());
+ }
+ {
+ RuleScore<M> score(m, l1[2]);
+ score.NonTerminal(lexical[4], lexical_scores[4]);
+ score.NonTerminal(lexical[5], lexical_scores[5]);
+ CHECK_SCORE("looking .", l1_scores[2] = score.Finish());
+ }
+ BOOST_CHECK_EQUAL(l1[2].left.length, 1);
+ l1[3] = lexical[6];
+ l1_scores[3] = lexical_scores[6];
+
+ ChartState l2[2];
+ float l2_scores[2];
+ {
+ RuleScore<M> score(m, l2[0]);
+ score.NonTerminal(l1[0], l1_scores[0]);
+ score.NonTerminal(l1[1], l1_scores[1]);
+ CHECK_SCORE("in biarritz watching considering", l2_scores[0] = score.Finish());
+ }
+ {
+ RuleScore<M> score(m, l2[1]);
+ score.NonTerminal(l1[2], l1_scores[2]);
+ score.NonTerminal(l1[3], l1_scores[3]);
+ CHECK_SCORE("looking . </s>", l2_scores[1] = score.Finish());
+ }
+ BOOST_CHECK_EQUAL(l2[1].left.length, 1);
+ BOOST_CHECK(l2[1].left.full);
+
+ ChartState top;
+ {
+ RuleScore<M> score(m, top);
+ score.NonTerminal(l2[0], l2_scores[0]);
+ score.NonTerminal(l2[1], l2_scores[1]);
+ CHECK_SCORE("in biarritz watching considering looking . </s>", score.Finish());
+ }
+}
+
+const char *FileLocation() {
+ if (boost::unit_test::framework::master_test_suite().argc < 2) {
+ return "test.arpa";
+ }
+ return boost::unit_test::framework::master_test_suite().argv[1];
+}
+
+template <class M> void Everything() {
+ Config config;
+ config.messages = NULL;
+ M m(FileLocation(), config);
+
+ Short(m);
+ Charge(m);
+ GrowBig(m);
+ AlsoWouldConsiderHigher(m);
+ GrowSmall(m);
+ FullGrow(m);
+}
+
+BOOST_AUTO_TEST_CASE(ProbingAll) {
+ Everything<Model>();
+}
+BOOST_AUTO_TEST_CASE(TrieAll) {
+ Everything<TrieModel>();
+}
+BOOST_AUTO_TEST_CASE(QuantTrieAll) {
+ Everything<QuantTrieModel>();
+}
+BOOST_AUTO_TEST_CASE(ArrayQuantTrieAll) {
+ Everything<QuantArrayTrieModel>();
+}
+BOOST_AUTO_TEST_CASE(ArrayTrieAll) {
+ Everything<ArrayTrieModel>();
+}
+
+BOOST_AUTO_TEST_CASE(RestProbing) {
+ Config config;
+ config.messages = NULL;
+ RestProbingModel m(FileLocation(), config);
+ GrowBig(m, true);
+}
+
+} // namespace
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/lm_exception.cc b/src/kenlm/lm/lm_exception.cc
new file mode 100644
index 0000000..58d468f
--- /dev/null
+++ b/src/kenlm/lm/lm_exception.cc
@@ -0,0 +1,23 @@
+#include "lm/lm_exception.hh"
+
+#include <cerrno>
+#include <cstdio>
+
+namespace lm {
+
+ConfigException::ConfigException() throw() {}
+ConfigException::~ConfigException() throw() {}
+
+LoadException::LoadException() throw() {}
+LoadException::~LoadException() throw() {}
+
+FormatLoadException::FormatLoadException() throw() {}
+FormatLoadException::~FormatLoadException() throw() {}
+
+VocabLoadException::VocabLoadException() throw() {}
+VocabLoadException::~VocabLoadException() throw() {}
+
+SpecialWordMissingException::SpecialWordMissingException() throw() {}
+SpecialWordMissingException::~SpecialWordMissingException() throw() {}
+
+} // namespace lm
diff --git a/src/kenlm/lm/lm_exception.hh b/src/kenlm/lm/lm_exception.hh
new file mode 100644
index 0000000..85a5738
--- /dev/null
+++ b/src/kenlm/lm/lm_exception.hh
@@ -0,0 +1,50 @@
+#ifndef LM_LM_EXCEPTION_H
+#define LM_LM_EXCEPTION_H
+
+// Named to avoid conflict with util/exception.hh.
+
+#include "util/exception.hh"
+#include "util/string_piece.hh"
+
+#include <exception>
+#include <string>
+
+namespace lm {
+
+typedef enum {THROW_UP, COMPLAIN, SILENT} WarningAction;
+
+class ConfigException : public util::Exception {
+ public:
+ ConfigException() throw();
+ ~ConfigException() throw();
+};
+
+class LoadException : public util::Exception {
+ public:
+ virtual ~LoadException() throw();
+
+ protected:
+ LoadException() throw();
+};
+
+class FormatLoadException : public LoadException {
+ public:
+ FormatLoadException() throw();
+ ~FormatLoadException() throw();
+};
+
+class VocabLoadException : public LoadException {
+ public:
+ virtual ~VocabLoadException() throw();
+ VocabLoadException() throw();
+};
+
+class SpecialWordMissingException : public VocabLoadException {
+ public:
+ explicit SpecialWordMissingException() throw();
+ ~SpecialWordMissingException() throw();
+};
+
+} // namespace lm
+
+#endif // LM_LM_EXCEPTION
diff --git a/src/kenlm/lm/max_order.hh b/src/kenlm/lm/max_order.hh
new file mode 100644
index 0000000..0ad1379
--- /dev/null
+++ b/src/kenlm/lm/max_order.hh
@@ -0,0 +1,13 @@
+#ifndef LM_MAX_ORDER_H
+#define LM_MAX_ORDER_H
+/* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM.
+ * If not, this is the default maximum order.
+ * Having this limit means that State can be
+ * (kMaxOrder - 1) * sizeof(float) bytes instead of
+ * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
+ */
+#ifndef KENLM_ORDER_MESSAGE
+#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh."
+#endif
+
+#endif // LM_MAX_ORDER_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/model.cc b/src/kenlm/lm/model.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/model.cc
rename to src/kenlm/lm/model.cc
diff --git a/src/kenlm/lm/model.hh b/src/kenlm/lm/model.hh
new file mode 100644
index 0000000..b2bbe39
--- /dev/null
+++ b/src/kenlm/lm/model.hh
@@ -0,0 +1,155 @@
+#ifndef LM_MODEL_H
+#define LM_MODEL_H
+
+#include "lm/bhiksha.hh"
+#include "lm/binary_format.hh"
+#include "lm/config.hh"
+#include "lm/facade.hh"
+#include "lm/quantize.hh"
+#include "lm/search_hashed.hh"
+#include "lm/search_trie.hh"
+#include "lm/state.hh"
+#include "lm/value.hh"
+#include "lm/vocab.hh"
+#include "lm/weights.hh"
+
+#include "util/murmur_hash.hh"
+
+#include <algorithm>
+#include <vector>
+#include <cstring>
+
+namespace util { class FilePiece; }
+
+namespace lm {
+namespace ngram {
+namespace detail {
+
+// Should return the same results as SRI.
+// ModelFacade typedefs Vocabulary so we use VocabularyT to avoid naming conflicts.
+template <class Search, class VocabularyT> class GenericModel : public base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> {
+ private:
+ typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P;
+ public:
+ // This is the model type returned by RecognizeBinary.
+ static const ModelType kModelType;
+
+ static const unsigned int kVersion = Search::kVersion;
+
+ /* Get the size of memory that will be mapped given ngram counts. This
+ * does not include small non-mapped control structures, such as this class
+ * itself.
+ */
+ static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
+
+ /* Load the model from a file. It may be an ARPA or binary file. Binary
+ * files must have the format expected by this class or you'll get an
+ * exception. So TrieModel can only load ARPA or binary created by
+ * TrieModel. To classify binary files, call RecognizeBinary in
+ * lm/binary_format.hh.
+ */
+ explicit GenericModel(const char *file, const Config &config = Config());
+
+ /* Score p(new_word | in_state) and incorporate new_word into out_state.
+ * Note that in_state and out_state must be different references:
+ * &in_state != &out_state.
+ */
+ FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const;
+
+ /* Slower call without in_state. Try to remember state, but sometimes it
+ * would cost too much memory or your decoder isn't setup properly.
+ * To use this function, make an array of WordIndex containing the context
+ * vocabulary ids in reverse order. Then, pass the bounds of the array:
+ * [context_rbegin, context_rend). The new_word is not part of the context
+ * array unless you intend to repeat words.
+ */
+ FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
+
+ /* Get the state for a context. Don't use this if you can avoid it. Use
+ * BeginSentenceState or NullContextState and extend from those. If
+ * you're only going to use this state to call FullScore once, use
+ * FullScoreForgotState.
+ * To use this function, make an array of WordIndex containing the context
+ * vocabulary ids in reverse order. Then, pass the bounds of the array:
+ * [context_rbegin, context_rend).
+ */
+ void GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const;
+
+ /* More efficient version of FullScore where a partial n-gram has already
+ * been scored.
+ * NOTE: THE RETURNED .rest AND .prob ARE RELATIVE TO THE .rest RETURNED BEFORE.
+ */
+ FullScoreReturn ExtendLeft(
+ // Additional context in reverse order. This will update add_rend to
+ const WordIndex *add_rbegin, const WordIndex *add_rend,
+ // Backoff weights to use.
+ const float *backoff_in,
+ // extend_left returned by a previous query.
+ uint64_t extend_pointer,
+ // Length of n-gram that the pointer corresponds to.
+ unsigned char extend_length,
+ // Where to write additional backoffs for [extend_length + 1, min(Order() - 1, return.ngram_length)]
+ float *backoff_out,
+ // Amount of additional content that should be considered by the next call.
+ unsigned char &next_use) const;
+
+ /* Return probabilities minus rest costs for an array of pointers. The
+ * first length should be the length of the n-gram to which pointers_begin
+ * points.
+ */
+ float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const {
+ // Compiler should optimize this if away.
+ return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0;
+ }
+
+ private:
+ FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const;
+
+ // Score bigrams and above. Do not include backoff.
+ void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const;
+
+ // Appears after Size in the cc file.
+ void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config);
+
+ void InitializeFromARPA(int fd, const char *file, const Config &config);
+
+ float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const;
+
+ BinaryFormat backing_;
+
+ VocabularyT vocab_;
+
+ Search search_;
+};
+
+} // namespace detail
+
+// Instead of typedef, inherit. This allows the Model etc to be forward declared.
+// Oh the joys of C and C++.
+#define LM_COMMA() ,
+#define LM_NAME_MODEL(name, from)\
+class name : public from {\
+ public:\
+ name(const char *file, const Config &config = Config()) : from(file, config) {}\
+};
+
+LM_NAME_MODEL(ProbingModel, detail::GenericModel<detail::HashedSearch<BackoffValue> LM_COMMA() ProbingVocabulary>);
+LM_NAME_MODEL(RestProbingModel, detail::GenericModel<detail::HashedSearch<RestValue> LM_COMMA() ProbingVocabulary>);
+LM_NAME_MODEL(TrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
+LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
+LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
+LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
+
+// Default implementation. No real reason for it to be the default.
+typedef ::lm::ngram::ProbingVocabulary Vocabulary;
+typedef ProbingModel Model;
+
+/* Autorecognize the file type, load, and return the virtual base class. Don't
+ * use the virtual base class if you can avoid it. Instead, use the above
+ * classes as template arguments to your own virtual feature function.*/
+base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING);
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_MODEL_H
diff --git a/src/kenlm/lm/model_test.cc b/src/kenlm/lm/model_test.cc
new file mode 100644
index 0000000..d408d6f
--- /dev/null
+++ b/src/kenlm/lm/model_test.cc
@@ -0,0 +1,448 @@
+#include "lm/model.hh"
+
+#include <cstdlib>
+#include <cstring>
+
+#define BOOST_TEST_MODULE ModelTest
+#include <boost/test/unit_test.hpp>
+#include <boost/test/floating_point_comparison.hpp>
+
+// Apparently some Boost versions use templates and are pretty strict about types matching.
+#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
+
+namespace lm {
+namespace ngram {
+
+std::ostream &operator<<(std::ostream &o, const State &state) {
+ o << "State length " << static_cast<unsigned int>(state.length) << ':';
+ for (const WordIndex *i = state.words; i < state.words + state.length; ++i) {
+ o << ' ' << *i;
+ }
+ return o;
+}
+
+namespace {
+
+// Stupid bjam reverses the command line arguments randomly.
+const char *TestLocation() {
+ if (boost::unit_test::framework::master_test_suite().argc < 3) {
+ return "test.arpa";
+ }
+ char **argv = boost::unit_test::framework::master_test_suite().argv;
+ return argv[strstr(argv[1], "nounk") ? 2 : 1];
+}
+const char *TestNoUnkLocation() {
+ if (boost::unit_test::framework::master_test_suite().argc < 3) {
+ return "test_nounk.arpa";
+ }
+ char **argv = boost::unit_test::framework::master_test_suite().argv;
+ return argv[strstr(argv[1], "nounk") ? 1 : 2];
+}
+
+template <class Model> State GetState(const Model &model, const char *word, const State &in) {
+ WordIndex context[in.length + 1];
+ context[0] = model.GetVocabulary().Index(word);
+ std::copy(in.words, in.words + in.length, context + 1);
+ State ret;
+ model.GetState(context, context + in.length + 1, ret);
+ return ret;
+}
+
+#define StartTest(word, ngram, score, indep_left) \
+ ret = model.FullScore( \
+ state, \
+ model.GetVocabulary().Index(word), \
+ out);\
+ SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
+ BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
+ BOOST_CHECK_GE(std::min<unsigned char>(ngram, 5 - 1), out.length); \
+ BOOST_CHECK_EQUAL(indep_left, ret.independent_left); \
+ BOOST_CHECK_EQUAL(out, GetState(model, word, state));
+
+#define AppendTest(word, ngram, score, indep_left) \
+ StartTest(word, ngram, score, indep_left) \
+ state = out;
+
+template <class M> void Starters(const M &model) {
+ FullScoreReturn ret;
+ Model::State state(model.BeginSentenceState());
+ Model::State out;
+
+ StartTest("looking", 2, -0.4846522, true);
+
+ // , probability plus <s> backoff
+ StartTest(",", 1, -1.383514 + -0.4149733, true);
+ // <unk> probability plus <s> backoff
+ StartTest("this_is_not_found", 1, -1.995635 + -0.4149733, true);
+}
+
+template <class M> void Continuation(const M &model) {
+ FullScoreReturn ret;
+ Model::State state(model.BeginSentenceState());
+ Model::State out;
+
+ AppendTest("looking", 2, -0.484652, true);
+ AppendTest("on", 3, -0.348837, true);
+ AppendTest("a", 4, -0.0155266, true);
+ AppendTest("little", 5, -0.00306122, true);
+ State preserve = state;
+ AppendTest("the", 1, -4.04005, true);
+ AppendTest("biarritz", 1, -1.9889, true);
+ AppendTest("not_found", 1, -2.29666, true);
+ AppendTest("more", 1, -1.20632 - 20.0, true);
+ AppendTest(".", 2, -0.51363, true);
+ AppendTest("</s>", 3, -0.0191651, true);
+ BOOST_CHECK_EQUAL(0, state.length);
+
+ state = preserve;
+ AppendTest("more", 5, -0.00181395, true);
+ BOOST_CHECK_EQUAL(4, state.length);
+ AppendTest("loin", 5, -0.0432557, true);
+ BOOST_CHECK_EQUAL(1, state.length);
+}
+
+template <class M> void Blanks(const M &model) {
+ FullScoreReturn ret;
+ State state(model.NullContextState());
+ State out;
+ AppendTest("also", 1, -1.687872, false);
+ AppendTest("would", 2, -2, true);
+ AppendTest("consider", 3, -3, true);
+ State preserve = state;
+ AppendTest("higher", 4, -4, true);
+ AppendTest("looking", 5, -5, true);
+ BOOST_CHECK_EQUAL(1, state.length);
+
+ state = preserve;
+ // also would consider not_found
+ AppendTest("not_found", 1, -1.995635 - 7.0 - 0.30103, true);
+
+ state = model.NullContextState();
+ // higher looking is a blank.
+ AppendTest("higher", 1, -1.509559, false);
+ AppendTest("looking", 2, -1.285941 - 0.30103, false);
+
+ State higher_looking = state;
+
+ BOOST_CHECK_EQUAL(1, state.length);
+ AppendTest("not_found", 1, -1.995635 - 0.4771212, true);
+
+ state = higher_looking;
+ // higher looking consider
+ AppendTest("consider", 1, -1.687872 - 0.4771212, true);
+
+ state = model.NullContextState();
+ AppendTest("would", 1, -1.687872, false);
+ BOOST_CHECK_EQUAL(1, state.length);
+ AppendTest("consider", 2, -1.687872 -0.30103, false);
+ BOOST_CHECK_EQUAL(2, state.length);
+ AppendTest("higher", 3, -1.509559 - 0.30103, false);
+ BOOST_CHECK_EQUAL(3, state.length);
+ AppendTest("looking", 4, -1.285941 - 0.30103, false);
+}
+
+template <class M> void Unknowns(const M &model) {
+ FullScoreReturn ret;
+ State state(model.NullContextState());
+ State out;
+
+ AppendTest("not_found", 1, -1.995635, false);
+ State preserve = state;
+ AppendTest("not_found2", 2, -15.0, true);
+ AppendTest("not_found3", 2, -15.0 - 2.0, true);
+
+ state = preserve;
+ AppendTest("however", 2, -4, true);
+ AppendTest("not_found3", 3, -6, true);
+}
+
+template <class M> void MinimalState(const M &model) {
+ FullScoreReturn ret;
+ State state(model.NullContextState());
+ State out;
+
+ AppendTest("baz", 1, -6.535897, true);
+ BOOST_CHECK_EQUAL(0, state.length);
+ state = model.NullContextState();
+ AppendTest("foo", 1, -3.141592, true);
+ BOOST_CHECK_EQUAL(1, state.length);
+ AppendTest("bar", 2, -6.0, true);
+ // Has to include the backoff weight.
+ BOOST_CHECK_EQUAL(1, state.length);
+ AppendTest("bar", 1, -2.718281 + 3.0, true);
+ BOOST_CHECK_EQUAL(1, state.length);
+
+ state = model.NullContextState();
+ AppendTest("to", 1, -1.687872, false);
+ AppendTest("look", 2, -0.2922095, true);
+ BOOST_CHECK_EQUAL(2, state.length);
+ AppendTest("a", 3, -7, true);
+}
+
+template <class M> void ExtendLeftTest(const M &model) {
+ State right;
+ FullScoreReturn little(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("little"), right));
+ const float kLittleProb = -1.285941;
+ SLOPPY_CHECK_CLOSE(kLittleProb, little.prob, 0.001);
+ unsigned char next_use;
+ float backoff_out[4];
+
+ FullScoreReturn extend_none(model.ExtendLeft(NULL, NULL, NULL, little.extend_left, 1, NULL, next_use));
+ BOOST_CHECK_EQUAL(0, next_use);
+ BOOST_CHECK_EQUAL(little.extend_left, extend_none.extend_left);
+ SLOPPY_CHECK_CLOSE(little.prob - little.rest, extend_none.prob, 0.001);
+ BOOST_CHECK_EQUAL(1, extend_none.ngram_length);
+
+ const WordIndex a = model.GetVocabulary().Index("a");
+ float backoff_in = 3.14;
+ // a little
+ FullScoreReturn extend_a(model.ExtendLeft(&a, &a + 1, &backoff_in, little.extend_left, 1, backoff_out, next_use));
+ BOOST_CHECK_EQUAL(1, next_use);
+ SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.09132547 - little.rest, extend_a.prob, 0.001);
+ BOOST_CHECK_EQUAL(2, extend_a.ngram_length);
+ BOOST_CHECK(!extend_a.independent_left);
+
+ const WordIndex on = model.GetVocabulary().Index("on");
+ FullScoreReturn extend_on(model.ExtendLeft(&on, &on + 1, &backoff_in, extend_a.extend_left, 2, backoff_out, next_use));
+ BOOST_CHECK_EQUAL(1, next_use);
+ SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.0283603 - (extend_a.rest + little.rest), extend_on.prob, 0.001);
+ BOOST_CHECK_EQUAL(3, extend_on.ngram_length);
+ BOOST_CHECK(!extend_on.independent_left);
+
+ const WordIndex both[2] = {a, on};
+ float backoff_in_arr[4];
+ FullScoreReturn extend_both(model.ExtendLeft(both, both + 2, backoff_in_arr, little.extend_left, 1, backoff_out, next_use));
+ BOOST_CHECK_EQUAL(2, next_use);
+ SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.0283603 - little.rest, extend_both.prob, 0.001);
+ BOOST_CHECK_EQUAL(3, extend_both.ngram_length);
+ BOOST_CHECK(!extend_both.independent_left);
+ BOOST_CHECK_EQUAL(extend_on.extend_left, extend_both.extend_left);
+}
+
+#define StatelessTest(word, provide, ngram, score) \
+ ret = model.FullScoreForgotState(indices + num_words - word, indices + num_words - word + provide, indices[num_words - word - 1], state); \
+ SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
+ BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
+ model.GetState(indices + num_words - word, indices + num_words - word + provide, before); \
+ ret = model.FullScore(before, indices[num_words - word - 1], out); \
+ BOOST_CHECK(state == out); \
+ SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
+ BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length);
+
+template <class M> void Stateless(const M &model) {
+ const char *words[] = {"<s>", "looking", "on", "a", "little", "the", "biarritz", "not_found", "more", ".", "</s>"};
+ const size_t num_words = sizeof(words) / sizeof(const char*);
+ // Silience "array subscript is above array bounds" when extracting end pointer.
+ WordIndex indices[num_words + 1];
+ for (unsigned int i = 0; i < num_words; ++i) {
+ indices[num_words - 1 - i] = model.GetVocabulary().Index(words[i]);
+ }
+ FullScoreReturn ret;
+ State state, out, before;
+
+ ret = model.FullScoreForgotState(indices + num_words - 1, indices + num_words, indices[num_words - 2], state);
+ SLOPPY_CHECK_CLOSE(-0.484652, ret.prob, 0.001);
+ StatelessTest(1, 1, 2, -0.484652);
+
+ // looking
+ StatelessTest(1, 2, 2, -0.484652);
+ // on
+ AppendTest("on", 3, -0.348837, true);
+ StatelessTest(2, 3, 3, -0.348837);
+ StatelessTest(2, 2, 3, -0.348837);
+ StatelessTest(2, 1, 2, -0.4638903);
+ // a
+ StatelessTest(3, 4, 4, -0.0155266);
+ // little
+ AppendTest("little", 5, -0.00306122, true);
+ StatelessTest(4, 5, 5, -0.00306122);
+ // the
+ AppendTest("the", 1, -4.04005, true);
+ StatelessTest(5, 5, 1, -4.04005);
+ // No context of the.
+ StatelessTest(5, 0, 1, -1.687872);
+ // biarritz
+ StatelessTest(6, 1, 1, -1.9889);
+ // not found
+ StatelessTest(7, 1, 1, -2.29666);
+ StatelessTest(7, 0, 1, -1.995635);
+
+ WordIndex unk[1];
+ unk[0] = 0;
+ model.GetState(unk, unk + 1, state);
+ BOOST_CHECK_EQUAL(1, state.length);
+ BOOST_CHECK_EQUAL(static_cast<WordIndex>(0), state.words[0]);
+}
+
+template <class M> void NoUnkCheck(const M &model) {
+ WordIndex unk_index = 0;
+ State state;
+
+ FullScoreReturn ret = model.FullScoreForgotState(&unk_index, &unk_index + 1, unk_index, state);
+ SLOPPY_CHECK_CLOSE(-100.0, ret.prob, 0.001);
+}
+
+template <class M> void Everything(const M &m) {
+ Starters(m);
+ Continuation(m);
+ Blanks(m);
+ Unknowns(m);
+ MinimalState(m);
+ ExtendLeftTest(m);
+ Stateless(m);
+}
+
+class ExpectEnumerateVocab : public EnumerateVocab {
+ public:
+ ExpectEnumerateVocab() {}
+
+ void Add(WordIndex index, const StringPiece &str) {
+ BOOST_CHECK_EQUAL(seen.size(), index);
+ seen.push_back(std::string(str.data(), str.length()));
+ }
+
+ void Check(const base::Vocabulary &vocab) {
+ BOOST_CHECK_EQUAL(37ULL, seen.size());
+ BOOST_REQUIRE(!seen.empty());
+ BOOST_CHECK_EQUAL("<unk>", seen[0]);
+ for (WordIndex i = 0; i < seen.size(); ++i) {
+ BOOST_CHECK_EQUAL(i, vocab.Index(seen[i]));
+ }
+ }
+
+ void Clear() {
+ seen.clear();
+ }
+
+ std::vector<std::string> seen;
+};
+
+template <class ModelT> void LoadingTest() {
+ Config config;
+ config.arpa_complain = Config::NONE;
+ config.messages = NULL;
+ config.probing_multiplier = 2.0;
+ {
+ ExpectEnumerateVocab enumerate;
+ config.enumerate_vocab = &enumerate;
+ ModelT m(TestLocation(), config);
+ enumerate.Check(m.GetVocabulary());
+ BOOST_CHECK_EQUAL((WordIndex)37, m.GetVocabulary().Bound());
+ Everything(m);
+ }
+ {
+ ExpectEnumerateVocab enumerate;
+ config.enumerate_vocab = &enumerate;
+ ModelT m(TestNoUnkLocation(), config);
+ enumerate.Check(m.GetVocabulary());
+ BOOST_CHECK_EQUAL((WordIndex)37, m.GetVocabulary().Bound());
+ NoUnkCheck(m);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(probing) {
+ LoadingTest<Model>();
+}
+BOOST_AUTO_TEST_CASE(trie) {
+ LoadingTest<TrieModel>();
+}
+BOOST_AUTO_TEST_CASE(quant_trie) {
+ LoadingTest<QuantTrieModel>();
+}
+BOOST_AUTO_TEST_CASE(bhiksha_trie) {
+ LoadingTest<ArrayTrieModel>();
+}
+BOOST_AUTO_TEST_CASE(quant_bhiksha_trie) {
+ LoadingTest<QuantArrayTrieModel>();
+}
+
+template <class ModelT> void BinaryTest(Config::WriteMethod write_method) {
+ Config config;
+ config.write_mmap = "test.binary";
+ config.messages = NULL;
+ config.write_method = write_method;
+ ExpectEnumerateVocab enumerate;
+ config.enumerate_vocab = &enumerate;
+
+ {
+ ModelT copy_model(TestLocation(), config);
+ enumerate.Check(copy_model.GetVocabulary());
+ enumerate.Clear();
+ Everything(copy_model);
+ }
+
+ config.write_mmap = NULL;
+
+ ModelType type;
+ BOOST_REQUIRE(RecognizeBinary("test.binary", type));
+ BOOST_CHECK_EQUAL(ModelT::kModelType, type);
+
+ {
+ ModelT binary("test.binary", config);
+ enumerate.Check(binary.GetVocabulary());
+ Everything(binary);
+ }
+ unlink("test.binary");
+
+ // Now test without <unk>.
+ config.write_mmap = "test_nounk.binary";
+ config.messages = NULL;
+ enumerate.Clear();
+ {
+ ModelT copy_model(TestNoUnkLocation(), config);
+ enumerate.Check(copy_model.GetVocabulary());
+ enumerate.Clear();
+ NoUnkCheck(copy_model);
+ }
+ config.write_mmap = NULL;
+ {
+ ModelT binary(TestNoUnkLocation(), config);
+ enumerate.Check(binary.GetVocabulary());
+ NoUnkCheck(binary);
+ }
+ unlink("test_nounk.binary");
+}
+
+template <class ModelT> void BinaryTest() {
+ BinaryTest<ModelT>(Config::WRITE_MMAP);
+ BinaryTest<ModelT>(Config::WRITE_AFTER);
+}
+
+BOOST_AUTO_TEST_CASE(write_and_read_probing) {
+ BinaryTest<ProbingModel>();
+}
+BOOST_AUTO_TEST_CASE(write_and_read_rest_probing) {
+ BinaryTest<RestProbingModel>();
+}
+BOOST_AUTO_TEST_CASE(write_and_read_trie) {
+ BinaryTest<TrieModel>();
+}
+BOOST_AUTO_TEST_CASE(write_and_read_quant_trie) {
+ BinaryTest<QuantTrieModel>();
+}
+BOOST_AUTO_TEST_CASE(write_and_read_array_trie) {
+ BinaryTest<ArrayTrieModel>();
+}
+BOOST_AUTO_TEST_CASE(write_and_read_quant_array_trie) {
+ BinaryTest<QuantArrayTrieModel>();
+}
+
+BOOST_AUTO_TEST_CASE(rest_max) {
+ Config config;
+ config.arpa_complain = Config::NONE;
+ config.messages = NULL;
+
+ RestProbingModel model(TestLocation(), config);
+ State state, out;
+ FullScoreReturn ret(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("."), state));
+ SLOPPY_CHECK_CLOSE(-0.2705918, ret.rest, 0.001);
+ SLOPPY_CHECK_CLOSE(-0.01916512, model.FullScore(state, model.GetVocabulary().EndSentence(), out).rest, 0.001);
+}
+
+} // namespace
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/model_type.hh b/src/kenlm/lm/model_type.hh
new file mode 100644
index 0000000..dcdc6ac
--- /dev/null
+++ b/src/kenlm/lm/model_type.hh
@@ -0,0 +1,23 @@
+#ifndef LM_MODEL_TYPE_H
+#define LM_MODEL_TYPE_H
+
+namespace lm {
+namespace ngram {
+
+/* Not the best numbering system, but it grew this way for historical reasons
+ * and I want to preserve existing binary files. */
+typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType;
+
+// Historical names.
+const ModelType HASH_PROBING = PROBING;
+const ModelType TRIE_SORTED = TRIE;
+const ModelType QUANT_TRIE_SORTED = QUANT_TRIE;
+const ModelType ARRAY_TRIE_SORTED = ARRAY_TRIE;
+const ModelType QUANT_ARRAY_TRIE_SORTED = QUANT_ARRAY_TRIE;
+
+const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE - TRIE);
+const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE - TRIE);
+
+} // namespace ngram
+} // namespace lm
+#endif // LM_MODEL_TYPE_H
diff --git a/src/kenlm/lm/neural/Jamfile b/src/kenlm/lm/neural/Jamfile
new file mode 100644
index 0000000..14cd8e3
--- /dev/null
+++ b/src/kenlm/lm/neural/Jamfile
@@ -0,0 +1,6 @@
+with-eigen = [ option.get "with-eigen" ] ;
+if ! $(with-eigen) && ! [ test_flags "" : "#include <Eigen/Dense>\nint main() {}" ] {
+ with-eigen = "/usr/include/eigen3" ;
+}
+with-eigen = <include>$(with-eigen) ;
+fakelib neural : ..//kenlm wordvecs.cc : $(with-eigen) : : <cxxflags>-fopenmp <linkflags>-fopenmp $(with-eigen) ;
diff --git a/src/kenlm/lm/neural/wordvecs.hh b/src/kenlm/lm/neural/wordvecs.hh
new file mode 100644
index 0000000..921a2b2
--- /dev/null
+++ b/src/kenlm/lm/neural/wordvecs.hh
@@ -0,0 +1,38 @@
+#ifndef LM_NEURAL_WORDVECS_H
+#define LM_NEURAL_WORDVECS_H
+
+#include "util/scoped.hh"
+#include "lm/vocab.hh"
+
+#include <Eigen/Dense>
+
+namespace util { class FilePiece; }
+
+namespace lm {
+namespace neural {
+
+class WordVecs {
+ public:
+ // Columns of the matrix are word vectors. The column index is the word.
+ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> Storage;
+
+ /* The file should begin with a line stating the number of word vectors and
+ * the length of the vectors. Then it's followed by lines containing a
+ * word followed by floating-point values.
+ */
+ explicit WordVecs(util::FilePiece &in);
+
+ const Storage &Vectors() const { return vecs_; }
+
+ WordIndex Index(StringPiece str) const { return vocab_.Index(str); }
+
+ private:
+ util::scoped_malloc vocab_backing_;
+ ngram::ProbingVocabulary vocab_;
+
+ Storage vecs_;
+};
+
+}} // namespaces
+
+#endif // LM_NEURAL_WORDVECS_H
diff --git a/src/kenlm/lm/ngram_query.hh b/src/kenlm/lm/ngram_query.hh
new file mode 100644
index 0000000..4430841
--- /dev/null
+++ b/src/kenlm/lm/ngram_query.hh
@@ -0,0 +1,113 @@
+#ifndef LM_NGRAM_QUERY_H
+#define LM_NGRAM_QUERY_H
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/model.hh"
+#include "util/file_stream.hh"
+#include "util/file_piece.hh"
+#include "util/usage.hh"
+
+#include <cstdlib>
+#include <string>
+#include <cmath>
+
+namespace lm {
+namespace ngram {
+
+class QueryPrinter {
+ public:
+ QueryPrinter(int fd, bool print_word, bool print_line, bool print_summary, bool flush)
+ : out_(fd), print_word_(print_word), print_line_(print_line), print_summary_(print_summary), flush_(flush) {}
+
+ void Word(StringPiece surface, WordIndex vocab, const FullScoreReturn &ret) {
+ if (!print_word_) return;
+ out_ << surface << '=' << vocab << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
+ if (flush_) out_.flush();
+ }
+
+ void Line(uint64_t oov, float total) {
+ if (!print_line_) return;
+ out_ << "Total: " << total << " OOV: " << oov << '\n';
+ if (flush_) out_.flush();
+ }
+
+ void Summary(double ppl_including_oov, double ppl_excluding_oov, uint64_t corpus_oov, uint64_t corpus_tokens) {
+ if (!print_summary_) return;
+ out_ <<
+ "Perplexity including OOVs:\t" << ppl_including_oov << "\n"
+ "Perplexity excluding OOVs:\t" << ppl_excluding_oov << "\n"
+ "OOVs:\t" << corpus_oov << "\n"
+ "Tokens:\t" << corpus_tokens << '\n';
+ out_.flush();
+ }
+
+ private:
+ util::FileStream out_;
+ bool print_word_;
+ bool print_line_;
+ bool print_summary_;
+ bool flush_;
+};
+
+template <class Model, class Printer> void Query(const Model &model, bool sentence_context, Printer &printer) {
+ typename Model::State state, out;
+ lm::FullScoreReturn ret;
+ StringPiece word;
+
+ util::FilePiece in(0);
+
+ double corpus_total = 0.0;
+ double corpus_total_oov_only = 0.0;
+ uint64_t corpus_oov = 0;
+ uint64_t corpus_tokens = 0;
+
+ while (true) {
+ state = sentence_context ? model.BeginSentenceState() : model.NullContextState();
+ float total = 0.0;
+ uint64_t oov = 0;
+
+ while (in.ReadWordSameLine(word)) {
+ lm::WordIndex vocab = model.GetVocabulary().Index(word);
+ ret = model.FullScore(state, vocab, out);
+ if (vocab == model.GetVocabulary().NotFound()) {
+ ++oov;
+ corpus_total_oov_only += ret.prob;
+ }
+ total += ret.prob;
+ printer.Word(word, vocab, ret);
+ ++corpus_tokens;
+ state = out;
+ }
+ // If people don't have a newline after their last query, this won't add a </s>.
+ // Sue me.
+ try {
+ UTIL_THROW_IF('\n' != in.get(), util::Exception, "FilePiece is confused.");
+ } catch (const util::EndOfFileException &e) { break; }
+ if (sentence_context) {
+ ret = model.FullScore(state, model.GetVocabulary().EndSentence(), out);
+ total += ret.prob;
+ ++corpus_tokens;
+ printer.Word("</s>", model.GetVocabulary().EndSentence(), ret);
+ }
+ printer.Line(oov, total);
+ corpus_total += total;
+ corpus_oov += oov;
+ }
+ printer.Summary(
+ pow(10.0, -(corpus_total / static_cast<double>(corpus_tokens))), // PPL including OOVs
+ pow(10.0, -((corpus_total - corpus_total_oov_only) / static_cast<double>(corpus_tokens - corpus_oov))), // PPL excluding OOVs
+ corpus_oov,
+ corpus_tokens);
+}
+
+template <class Model> void Query(const char *file, const Config &config, bool sentence_context, QueryPrinter &printer) {
+ Model model(file, config);
+ Query<Model, QueryPrinter>(model, sentence_context, printer);
+}
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_NGRAM_QUERY_H
+
+
diff --git a/src/kenlm/lm/partial.hh b/src/kenlm/lm/partial.hh
new file mode 100644
index 0000000..9e4e352
--- /dev/null
+++ b/src/kenlm/lm/partial.hh
@@ -0,0 +1,166 @@
+#ifndef LM_PARTIAL_H
+#define LM_PARTIAL_H
+
+#include "lm/return.hh"
+#include "lm/state.hh"
+
+#include <algorithm>
+#include <cassert>
+
+namespace lm {
+namespace ngram {
+
+struct ExtendReturn {
+ float adjust;
+ bool make_full;
+ unsigned char next_use;
+};
+
+template <class Model> ExtendReturn ExtendLoop(
+ const Model &model,
+ unsigned char seen, const WordIndex *add_rbegin, const WordIndex *add_rend, const float *backoff_start,
+ const uint64_t *pointers, const uint64_t *pointers_end,
+ uint64_t *&pointers_write,
+ float *backoff_write) {
+ unsigned char add_length = add_rend - add_rbegin;
+
+ float backoff_buf[2][KENLM_MAX_ORDER - 1];
+ float *backoff_in = backoff_buf[0], *backoff_out = backoff_buf[1];
+ std::copy(backoff_start, backoff_start + add_length, backoff_in);
+
+ ExtendReturn value;
+ value.make_full = false;
+ value.adjust = 0.0;
+ value.next_use = add_length;
+
+ unsigned char i = 0;
+ unsigned char length = pointers_end - pointers;
+ // pointers_write is NULL means that the existing left state is full, so we should use completed probabilities.
+ if (pointers_write) {
+ // Using full context, writing to new left state.
+ for (; i < length; ++i) {
+ FullScoreReturn ret(model.ExtendLeft(
+ add_rbegin, add_rbegin + value.next_use,
+ backoff_in,
+ pointers[i], i + seen + 1,
+ backoff_out,
+ value.next_use));
+ std::swap(backoff_in, backoff_out);
+ if (ret.independent_left) {
+ value.adjust += ret.prob;
+ value.make_full = true;
+ ++i;
+ break;
+ }
+ value.adjust += ret.rest;
+ *pointers_write++ = ret.extend_left;
+ if (value.next_use != add_length) {
+ value.make_full = true;
+ ++i;
+ break;
+ }
+ }
+ }
+ // Using some of the new context.
+ for (; i < length && value.next_use; ++i) {
+ FullScoreReturn ret(model.ExtendLeft(
+ add_rbegin, add_rbegin + value.next_use,
+ backoff_in,
+ pointers[i], i + seen + 1,
+ backoff_out,
+ value.next_use));
+ std::swap(backoff_in, backoff_out);
+ value.adjust += ret.prob;
+ }
+ float unrest = model.UnRest(pointers + i, pointers_end, i + seen + 1);
+ // Using none of the new context.
+ value.adjust += unrest;
+
+ std::copy(backoff_in, backoff_in + value.next_use, backoff_write);
+ return value;
+}
+
+template <class Model> float RevealBefore(const Model &model, const Right &reveal, const unsigned char seen, bool reveal_full, Left &left, Right &right) {
+ assert(seen < reveal.length || reveal_full);
+ uint64_t *pointers_write = reveal_full ? NULL : left.pointers;
+ float backoff_buffer[KENLM_MAX_ORDER - 1];
+ ExtendReturn value(ExtendLoop(
+ model,
+ seen, reveal.words + seen, reveal.words + reveal.length, reveal.backoff + seen,
+ left.pointers, left.pointers + left.length,
+ pointers_write,
+ left.full ? backoff_buffer : (right.backoff + right.length)));
+ if (reveal_full) {
+ left.length = 0;
+ value.make_full = true;
+ } else {
+ left.length = pointers_write - left.pointers;
+ value.make_full |= (left.length == model.Order() - 1);
+ }
+ if (left.full) {
+ for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i];
+ } else {
+ // If left wasn't full when it came in, put words into right state.
+ std::copy(reveal.words + seen, reveal.words + seen + value.next_use, right.words + right.length);
+ right.length += value.next_use;
+ left.full = value.make_full || (right.length == model.Order() - 1);
+ }
+ return value.adjust;
+}
+
+template <class Model> float RevealAfter(const Model &model, Left &left, Right &right, const Left &reveal, unsigned char seen) {
+ assert(seen < reveal.length || reveal.full);
+ uint64_t *pointers_write = left.full ? NULL : (left.pointers + left.length);
+ ExtendReturn value(ExtendLoop(
+ model,
+ seen, right.words, right.words + right.length, right.backoff,
+ reveal.pointers + seen, reveal.pointers + reveal.length,
+ pointers_write,
+ right.backoff));
+ if (reveal.full) {
+ for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += right.backoff[i];
+ right.length = 0;
+ value.make_full = true;
+ } else {
+ right.length = value.next_use;
+ value.make_full |= (right.length == model.Order() - 1);
+ }
+ if (!left.full) {
+ left.length = pointers_write - left.pointers;
+ left.full = value.make_full || (left.length == model.Order() - 1);
+ }
+ return value.adjust;
+}
+
+template <class Model> float Subsume(const Model &model, Left &first_left, const Right &first_right, const Left &second_left, Right &second_right, const unsigned int between_length) {
+ assert(first_right.length < KENLM_MAX_ORDER);
+ assert(second_left.length < KENLM_MAX_ORDER);
+ assert(between_length < KENLM_MAX_ORDER - 1);
+ uint64_t *pointers_write = first_left.full ? NULL : (first_left.pointers + first_left.length);
+ float backoff_buffer[KENLM_MAX_ORDER - 1];
+ ExtendReturn value(ExtendLoop(
+ model,
+ between_length, first_right.words, first_right.words + first_right.length, first_right.backoff,
+ second_left.pointers, second_left.pointers + second_left.length,
+ pointers_write,
+ second_left.full ? backoff_buffer : (second_right.backoff + second_right.length)));
+ if (second_left.full) {
+ for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i];
+ } else {
+ std::copy(first_right.words, first_right.words + value.next_use, second_right.words + second_right.length);
+ second_right.length += value.next_use;
+ value.make_full |= (second_right.length == model.Order() - 1);
+ }
+ if (!first_left.full) {
+ first_left.length = pointers_write - first_left.pointers;
+ first_left.full = value.make_full || second_left.full || (first_left.length == model.Order() - 1);
+ }
+ assert(first_left.length < KENLM_MAX_ORDER);
+ assert(second_right.length < KENLM_MAX_ORDER);
+ return value.adjust;
+}
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_PARTIAL_H
diff --git a/src/kenlm/lm/partial_test.cc b/src/kenlm/lm/partial_test.cc
new file mode 100644
index 0000000..adb644f
--- /dev/null
+++ b/src/kenlm/lm/partial_test.cc
@@ -0,0 +1,199 @@
+#include "lm/partial.hh"
+
+#include "lm/left.hh"
+#include "lm/model.hh"
+#include "util/tokenize_piece.hh"
+
+#define BOOST_TEST_MODULE PartialTest
+#include <boost/test/unit_test.hpp>
+#include <boost/test/floating_point_comparison.hpp>
+
+namespace lm {
+namespace ngram {
+namespace {
+
+const char *TestLocation() {
+ if (boost::unit_test::framework::master_test_suite().argc < 2) {
+ return "test.arpa";
+ }
+ return boost::unit_test::framework::master_test_suite().argv[1];
+}
+
+Config SilentConfig() {
+ Config config;
+ config.arpa_complain = Config::NONE;
+ config.messages = NULL;
+ return config;
+}
+
+struct ModelFixture {
+ ModelFixture() : m(TestLocation(), SilentConfig()) {}
+
+ RestProbingModel m;
+};
+
+BOOST_FIXTURE_TEST_SUITE(suite, ModelFixture)
+
+BOOST_AUTO_TEST_CASE(SimpleBefore) {
+ Left left;
+ left.full = false;
+ left.length = 0;
+ Right right;
+ right.length = 0;
+
+ Right reveal;
+ reveal.length = 1;
+ WordIndex period = m.GetVocabulary().Index(".");
+ reveal.words[0] = period;
+ reveal.backoff[0] = -0.845098;
+
+ BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 0, false, left, right), 0.001);
+ BOOST_CHECK_EQUAL(0, left.length);
+ BOOST_CHECK(!left.full);
+ BOOST_CHECK_EQUAL(1, right.length);
+ BOOST_CHECK_EQUAL(period, right.words[0]);
+ BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001);
+
+ WordIndex more = m.GetVocabulary().Index("more");
+ reveal.words[1] = more;
+ reveal.backoff[1] = -0.4771212;
+ reveal.length = 2;
+ BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 1, false, left, right), 0.001);
+ BOOST_CHECK_EQUAL(0, left.length);
+ BOOST_CHECK(!left.full);
+ BOOST_CHECK_EQUAL(2, right.length);
+ BOOST_CHECK_EQUAL(period, right.words[0]);
+ BOOST_CHECK_EQUAL(more, right.words[1]);
+ BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001);
+ BOOST_CHECK_CLOSE(-0.4771212, right.backoff[1], 0.001);
+}
+
+BOOST_AUTO_TEST_CASE(AlsoWouldConsider) {
+ WordIndex would = m.GetVocabulary().Index("would");
+ WordIndex consider = m.GetVocabulary().Index("consider");
+
+ ChartState current;
+ current.left.length = 1;
+ current.left.pointers[0] = would;
+ current.left.full = false;
+ current.right.length = 1;
+ current.right.words[0] = would;
+ current.right.backoff[0] = -0.30103;
+
+ Left after;
+ after.full = false;
+ after.length = 1;
+ after.pointers[0] = consider;
+
+ // adjustment for would consider
+ BOOST_CHECK_CLOSE(-1.687872 - -0.2922095 - 0.30103, RevealAfter(m, current.left, current.right, after, 0), 0.001);
+
+ BOOST_CHECK_EQUAL(2, current.left.length);
+ BOOST_CHECK_EQUAL(would, current.left.pointers[0]);
+ BOOST_CHECK_EQUAL(false, current.left.full);
+
+ WordIndex also = m.GetVocabulary().Index("also");
+ Right before;
+ before.length = 1;
+ before.words[0] = also;
+ before.backoff[0] = -0.30103;
+ // r(would) = -0.2922095 [i would], r(would -> consider) = -1.988902 [b(would) + p(consider)]
+ // p(also -> would) = -2, p(also would -> consider) = -3
+ BOOST_CHECK_CLOSE(-2 + 0.2922095 -3 + 1.988902, RevealBefore(m, before, 0, false, current.left, current.right), 0.001);
+ BOOST_CHECK_EQUAL(0, current.left.length);
+ BOOST_CHECK(current.left.full);
+ BOOST_CHECK_EQUAL(2, current.right.length);
+ BOOST_CHECK_EQUAL(would, current.right.words[0]);
+ BOOST_CHECK_EQUAL(also, current.right.words[1]);
+}
+
+BOOST_AUTO_TEST_CASE(EndSentence) {
+ WordIndex loin = m.GetVocabulary().Index("loin");
+ WordIndex period = m.GetVocabulary().Index(".");
+ WordIndex eos = m.GetVocabulary().EndSentence();
+
+ ChartState between;
+ between.left.length = 1;
+ between.left.pointers[0] = eos;
+ between.left.full = true;
+ between.right.length = 0;
+
+ Right before;
+ before.words[0] = period;
+ before.words[1] = loin;
+ before.backoff[0] = -0.845098;
+ before.backoff[1] = 0.0;
+
+ before.length = 1;
+ BOOST_CHECK_CLOSE(-0.0410707, RevealBefore(m, before, 0, true, between.left, between.right), 0.001);
+ BOOST_CHECK_EQUAL(0, between.left.length);
+}
+
+float ScoreFragment(const RestProbingModel &model, unsigned int *begin, unsigned int *end, ChartState &out) {
+ RuleScore<RestProbingModel> scorer(model, out);
+ for (unsigned int *i = begin; i < end; ++i) {
+ scorer.Terminal(*i);
+ }
+ return scorer.Finish();
+}
+
+void CheckAdjustment(const RestProbingModel &model, float expect, const Right &before_in, bool before_full, ChartState between, const Left &after_in) {
+ Right before(before_in);
+ Left after(after_in);
+ after.full = false;
+ float got = 0.0;
+ for (unsigned int i = 1; i < 5; ++i) {
+ if (before_in.length >= i) {
+ before.length = i;
+ got += RevealBefore(model, before, i - 1, false, between.left, between.right);
+ }
+ if (after_in.length >= i) {
+ after.length = i;
+ got += RevealAfter(model, between.left, between.right, after, i - 1);
+ }
+ }
+ if (after_in.full) {
+ after.full = true;
+ got += RevealAfter(model, between.left, between.right, after, after.length);
+ }
+ if (before_full) {
+ got += RevealBefore(model, before, before.length, true, between.left, between.right);
+ }
+ // Sometimes they're zero and BOOST_CHECK_CLOSE fails for this.
+ BOOST_CHECK(fabs(expect - got) < 0.001);
+}
+
+void FullDivide(const RestProbingModel &model, StringPiece str) {
+ std::vector<WordIndex> indices;
+ for (util::TokenIter<util::SingleCharacter, true> i(str, ' '); i; ++i) {
+ indices.push_back(model.GetVocabulary().Index(*i));
+ }
+ ChartState full_state;
+ float full = ScoreFragment(model, &indices.front(), &indices.back() + 1, full_state);
+
+ ChartState before_state;
+ before_state.left.full = false;
+ RuleScore<RestProbingModel> before_scorer(model, before_state);
+ float before_score = 0.0;
+ for (unsigned int before = 0; before < indices.size(); ++before) {
+ for (unsigned int after = before; after <= indices.size(); ++after) {
+ ChartState after_state, between_state;
+ float after_score = ScoreFragment(model, &indices.front() + after, &indices.front() + indices.size(), after_state);
+ float between_score = ScoreFragment(model, &indices.front() + before, &indices.front() + after, between_state);
+ CheckAdjustment(model, full - before_score - after_score - between_score, before_state.right, before_state.left.full, between_state, after_state.left);
+ }
+ before_scorer.Terminal(indices[before]);
+ before_score = before_scorer.Finish();
+ }
+}
+
+BOOST_AUTO_TEST_CASE(Strings) {
+ FullDivide(m, "also would consider");
+ FullDivide(m, "looking on a little more loin . </s>");
+ FullDivide(m, "in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+} // namespace
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/quantize.cc b/src/kenlm/lm/quantize.cc
new file mode 100644
index 0000000..02b5dbc
--- /dev/null
+++ b/src/kenlm/lm/quantize.cc
@@ -0,0 +1,93 @@
+/* Quantize into bins of equal size as described in
+ * M. Federico and N. Bertoldi. 2006. How many bits are needed
+ * to store probabilities for phrase-based translation? In Proc.
+ * of the Workshop on Statistical Machine Translation, pages
+ * 94–101, New York City, June. Association for Computa-
+ * tional Linguistics.
+ */
+
+#include "lm/quantize.hh"
+
+#include "lm/binary_format.hh"
+#include "lm/lm_exception.hh"
+#include "util/file.hh"
+
+#include <algorithm>
+#include <numeric>
+
+namespace lm {
+namespace ngram {
+
+namespace {
+
+void MakeBins(std::vector<float> &values, float *centers, uint32_t bins) {
+ std::sort(values.begin(), values.end());
+ std::vector<float>::const_iterator start = values.begin(), finish;
+ for (uint32_t i = 0; i < bins; ++i, ++centers, start = finish) {
+ finish = values.begin() + ((values.size() * static_cast<uint64_t>(i + 1)) / bins);
+ if (finish == start) {
+ // zero length bucket.
+ *centers = i ? *(centers - 1) : -std::numeric_limits<float>::infinity();
+ } else {
+ *centers = std::accumulate(start, finish, 0.0) / static_cast<float>(finish - start);
+ }
+ }
+}
+
+const char kSeparatelyQuantizeVersion = 2;
+
+} // namespace
+
+void SeparatelyQuantize::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
+ unsigned char buffer[3];
+ file.ReadForConfig(buffer, 3, offset);
+ char version = buffer[0];
+ config.prob_bits = buffer[1];
+ config.backoff_bits = buffer[2];
+ if (version != kSeparatelyQuantizeVersion) UTIL_THROW(FormatLoadException, "This file has quantization version " << (unsigned)version << " but the code expects version " << (unsigned)kSeparatelyQuantizeVersion);
+}
+
+void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) {
+ prob_bits_ = config.prob_bits;
+ backoff_bits_ = config.backoff_bits;
+ // We need the reserved values.
+ if (config.prob_bits == 0) UTIL_THROW(ConfigException, "You can't quantize probability to zero");
+ if (config.backoff_bits == 0) UTIL_THROW(ConfigException, "You can't quantize backoff to zero");
+ if (config.prob_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing probability supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.prob_bits) << " bits.");
+ if (config.backoff_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing backoff supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.backoff_bits) << " bits.");
+ // Reserve 8 byte header for bit counts.
+ actual_base_ = static_cast<uint8_t*>(base);
+ float *start = reinterpret_cast<float*>(actual_base_ + 8);
+ for (unsigned char i = 0; i < order - 2; ++i) {
+ tables_[i][0] = Bins(prob_bits_, start);
+ start += (1ULL << prob_bits_);
+ tables_[i][1] = Bins(backoff_bits_, start);
+ start += (1ULL << backoff_bits_);
+ }
+ longest_ = tables_[order - 2][0] = Bins(prob_bits_, start);
+}
+
+void SeparatelyQuantize::Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff) {
+ TrainProb(order, prob);
+
+ // Backoff
+ float *centers = tables_[order - 2][1].Populate();
+ *(centers++) = kNoExtensionBackoff;
+ *(centers++) = kExtensionBackoff;
+ MakeBins(backoff, centers, (1ULL << backoff_bits_) - 2);
+}
+
+void SeparatelyQuantize::TrainProb(uint8_t order, std::vector<float> &prob) {
+ float *centers = tables_[order - 2][0].Populate();
+ MakeBins(prob, centers, (1ULL << prob_bits_));
+}
+
+void SeparatelyQuantize::FinishedLoading(const Config &config) {
+ uint8_t *actual_base = actual_base_;
+ *(actual_base++) = kSeparatelyQuantizeVersion; // version
+ *(actual_base++) = config.prob_bits;
+ *(actual_base++) = config.backoff_bits;
+}
+
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/quantize.hh b/src/kenlm/lm/quantize.hh
new file mode 100644
index 0000000..8500ace
--- /dev/null
+++ b/src/kenlm/lm/quantize.hh
@@ -0,0 +1,233 @@
+#ifndef LM_QUANTIZE_H
+#define LM_QUANTIZE_H
+
+#include "lm/blank.hh"
+#include "lm/config.hh"
+#include "lm/max_order.hh"
+#include "lm/model_type.hh"
+#include "util/bit_packing.hh"
+
+#include <algorithm>
+#include <vector>
+
+#include <stdint.h>
+
+#include <iostream>
+
+namespace lm {
+namespace ngram {
+
+struct Config;
+class BinaryFormat;
+
+/* Store values directly and don't quantize. */
+class DontQuantize {
+ public:
+ static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
+ static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &) {}
+ static uint64_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; }
+ static uint8_t MiddleBits(const Config &/*config*/) { return 63; }
+ static uint8_t LongestBits(const Config &/*config*/) { return 31; }
+
+ class MiddlePointer {
+ public:
+ MiddlePointer(const DontQuantize & /*quant*/, unsigned char /*order_minus_2*/, util::BitAddress address) : address_(address) {}
+
+ MiddlePointer() : address_(NULL, 0) {}
+
+ bool Found() const {
+ return address_.base != NULL;
+ }
+
+ float Prob() const {
+ return util::ReadNonPositiveFloat31(address_.base, address_.offset);
+ }
+
+ float Backoff() const {
+ return util::ReadFloat32(address_.base, address_.offset + 31);
+ }
+
+ float Rest() const { return Prob(); }
+
+ void Write(float prob, float backoff) {
+ util::WriteNonPositiveFloat31(address_.base, address_.offset, prob);
+ util::WriteFloat32(address_.base, address_.offset + 31, backoff);
+ }
+
+ private:
+ util::BitAddress address_;
+ };
+
+ class LongestPointer {
+ public:
+ explicit LongestPointer(const DontQuantize &/*quant*/, util::BitAddress address) : address_(address) {}
+
+ LongestPointer() : address_(NULL, 0) {}
+
+ bool Found() const {
+ return address_.base != NULL;
+ }
+
+ float Prob() const {
+ return util::ReadNonPositiveFloat31(address_.base, address_.offset);
+ }
+
+ void Write(float prob) {
+ util::WriteNonPositiveFloat31(address_.base, address_.offset, prob);
+ }
+
+ private:
+ util::BitAddress address_;
+ };
+
+ DontQuantize() {}
+
+ void SetupMemory(void * /*start*/, unsigned char /*order*/, const Config & /*config*/) {}
+
+ static const bool kTrain = false;
+ // These should never be called because kTrain is false.
+ void Train(uint8_t /*order*/, std::vector<float> &/*prob*/, std::vector<float> &/*backoff*/) {}
+ void TrainProb(uint8_t, std::vector<float> &/*prob*/) {}
+
+ void FinishedLoading(const Config &) {}
+};
+
+class SeparatelyQuantize {
+ private:
+ class Bins {
+ public:
+ // Sigh C++ default constructor
+ Bins() {}
+
+ Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + (1ULL << bits)), bits_(bits), mask_((1ULL << bits) - 1) {}
+
+ float *Populate() { return begin_; }
+
+ uint64_t EncodeProb(float value) const {
+ return Encode(value, 0);
+ }
+
+ uint64_t EncodeBackoff(float value) const {
+ if (value == 0.0) {
+ return HasExtension(value) ? kExtensionQuant : kNoExtensionQuant;
+ }
+ return Encode(value, 2);
+ }
+
+ float Decode(std::size_t off) const { return begin_[off]; }
+
+ uint8_t Bits() const { return bits_; }
+
+ uint64_t Mask() const { return mask_; }
+
+ private:
+ uint64_t Encode(float value, size_t reserved) const {
+ const float *above = std::lower_bound(static_cast<const float*>(begin_) + reserved, end_, value);
+ if (above == begin_ + reserved) return reserved;
+ if (above == end_) return end_ - begin_ - 1;
+ return above - begin_ - (value - *(above - 1) < *above - value);
+ }
+
+ float *begin_;
+ const float *end_;
+ uint8_t bits_;
+ uint64_t mask_;
+ };
+
+ public:
+ static const ModelType kModelTypeAdd = kQuantAdd;
+
+ static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
+
+ static uint64_t Size(uint8_t order, const Config &config) {
+ uint64_t longest_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.prob_bits)) * sizeof(float);
+ uint64_t middle_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.backoff_bits)) * sizeof(float) + longest_table;
+ // unigrams are currently not quantized so no need for a table.
+ return (order - 2) * middle_table + longest_table + /* for the bit counts and alignment padding) */ 8;
+ }
+
+ static uint8_t MiddleBits(const Config &config) { return config.prob_bits + config.backoff_bits; }
+ static uint8_t LongestBits(const Config &config) { return config.prob_bits; }
+
+ class MiddlePointer {
+ public:
+ MiddlePointer(const SeparatelyQuantize &quant, unsigned char order_minus_2, const util::BitAddress &address) : bins_(quant.GetTables(order_minus_2)), address_(address) {}
+
+ MiddlePointer() : address_(NULL, 0) {}
+
+ bool Found() const { return address_.base != NULL; }
+
+ float Prob() const {
+ return ProbBins().Decode(util::ReadInt25(address_.base, address_.offset + BackoffBins().Bits(), ProbBins().Bits(), ProbBins().Mask()));
+ }
+
+ float Backoff() const {
+ return BackoffBins().Decode(util::ReadInt25(address_.base, address_.offset, BackoffBins().Bits(), BackoffBins().Mask()));
+ }
+
+ float Rest() const { return Prob(); }
+
+ void Write(float prob, float backoff) const {
+ util::WriteInt57(address_.base, address_.offset, ProbBins().Bits() + BackoffBins().Bits(),
+ (ProbBins().EncodeProb(prob) << BackoffBins().Bits()) | BackoffBins().EncodeBackoff(backoff));
+ }
+
+ private:
+ const Bins &ProbBins() const { return bins_[0]; }
+ const Bins &BackoffBins() const { return bins_[1]; }
+ const Bins *bins_;
+
+ util::BitAddress address_;
+ };
+
+ class LongestPointer {
+ public:
+ LongestPointer(const SeparatelyQuantize &quant, const util::BitAddress &address) : table_(&quant.LongestTable()), address_(address) {}
+
+ LongestPointer() : address_(NULL, 0) {}
+
+ bool Found() const { return address_.base != NULL; }
+
+ void Write(float prob) const {
+ util::WriteInt25(address_.base, address_.offset, table_->Bits(), table_->EncodeProb(prob));
+ }
+
+ float Prob() const {
+ return table_->Decode(util::ReadInt25(address_.base, address_.offset, table_->Bits(), table_->Mask()));
+ }
+
+ private:
+ const Bins *table_;
+ util::BitAddress address_;
+ };
+
+ SeparatelyQuantize() {}
+
+ void SetupMemory(void *start, unsigned char order, const Config &config);
+
+ static const bool kTrain = true;
+ // Assumes 0.0 is removed from backoff.
+ void Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff);
+ // Train just probabilities (for longest order).
+ void TrainProb(uint8_t order, std::vector<float> &prob);
+
+ void FinishedLoading(const Config &config);
+
+ const Bins *GetTables(unsigned char order_minus_2) const { return tables_[order_minus_2]; }
+
+ const Bins &LongestTable() const { return longest_; }
+
+ private:
+ Bins tables_[KENLM_MAX_ORDER - 1][2];
+
+ Bins longest_;
+
+ uint8_t *actual_base_;
+
+ uint8_t prob_bits_, backoff_bits_;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_QUANTIZE_H
diff --git a/src/kenlm/lm/query_main.cc b/src/kenlm/lm/query_main.cc
new file mode 100644
index 0000000..0bd28f7
--- /dev/null
+++ b/src/kenlm/lm/query_main.cc
@@ -0,0 +1,115 @@
+#include "lm/ngram_query.hh"
+#include "util/getopt.hh"
+
+#ifdef WITH_NPLM
+#include "lm/wrappers/nplm.hh"
+#endif
+
+#include <stdlib.h>
+
+void Usage(const char *name) {
+ std::cerr <<
+ "KenLM was compiled with maximum order " << KENLM_MAX_ORDER << ".\n"
+ "Usage: " << name << " [-b] [-n] [-w] [-s] lm_file\n"
+ "-b: Do not buffer output.\n"
+ "-n: Do not wrap the input in <s> and </s>.\n"
+ "-v summary|sentence|word: Level of verbosity\n"
+ "-l lazy|populate|read|parallel: Load lazily, with populate, or malloc+read\n"
+ "The default loading method is populate on Linux and read on others.\n";
+ exit(1);
+}
+
+int main(int argc, char *argv[]) {
+ if (argc == 1 || (argc == 2 && !strcmp(argv[1], "--help")))
+ Usage(argv[0]);
+
+ lm::ngram::Config config;
+ bool sentence_context = true;
+ unsigned int verbosity = 2;
+ bool flush = false;
+
+ int opt;
+ while ((opt = getopt(argc, argv, "bnv:l:")) != -1) {
+ switch (opt) {
+ case 'b':
+ flush = true;
+ break;
+ case 'n':
+ sentence_context = false;
+ break;
+ case 'v':
+ if (!strcmp(optarg, "word") || !strcmp(optarg, "2")) {
+ verbosity = 2;
+ } else if (!strcmp(optarg, "sentence") || !strcmp(optarg, "1")) {
+ verbosity = 1;
+ } else if (!strcmp(optarg, "summary") || !strcmp(optarg, "0")) {
+ verbosity = 0;
+ } else {
+ Usage(argv[0]);
+ }
+ break;
+ case 'l':
+ if (!strcmp(optarg, "lazy")) {
+ config.load_method = util::LAZY;
+ } else if (!strcmp(optarg, "populate")) {
+ config.load_method = util::POPULATE_OR_READ;
+ } else if (!strcmp(optarg, "read")) {
+ config.load_method = util::READ;
+ } else if (!strcmp(optarg, "parallel")) {
+ config.load_method = util::PARALLEL_READ;
+ } else {
+ Usage(argv[0]);
+ }
+ break;
+ case 'h':
+ default:
+ Usage(argv[0]);
+ }
+ }
+ if (optind + 1 != argc)
+ Usage(argv[0]);
+ lm::ngram::QueryPrinter printer(1, verbosity >= 2, verbosity >= 1, true, flush);
+ const char *file = argv[optind];
+ try {
+ using namespace lm::ngram;
+ ModelType model_type;
+ if (RecognizeBinary(file, model_type)) {
+ switch(model_type) {
+ case PROBING:
+ Query<lm::ngram::ProbingModel>(file, config, sentence_context, printer);
+ break;
+ case REST_PROBING:
+ Query<lm::ngram::RestProbingModel>(file, config, sentence_context, printer);
+ break;
+ case TRIE:
+ Query<TrieModel>(file, config, sentence_context, printer);
+ break;
+ case QUANT_TRIE:
+ Query<QuantTrieModel>(file, config, sentence_context, printer);
+ break;
+ case ARRAY_TRIE:
+ Query<ArrayTrieModel>(file, config, sentence_context, printer);
+ break;
+ case QUANT_ARRAY_TRIE:
+ Query<QuantArrayTrieModel>(file, config, sentence_context, printer);
+ break;
+ default:
+ std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
+ abort();
+ }
+#ifdef WITH_NPLM
+ } else if (lm::np::Model::Recognize(file)) {
+ lm::np::Model model(file);
+ Query<lm::np::Model, lm::ngram::QueryPrinter>(model, sentence_context, printer);
+ Query<lm::np::Model, lm::ngram::QueryPrinter>(model, sentence_context, printer);
+#endif
+ } else {
+ Query<ProbingModel>(file, config, sentence_context, printer);
+ }
+ util::PrintUsage(std::cerr);
+ } catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+ return 0;
+}
diff --git a/src/kenlm/lm/read_arpa.cc b/src/kenlm/lm/read_arpa.cc
new file mode 100644
index 0000000..dc05a65
--- /dev/null
+++ b/src/kenlm/lm/read_arpa.cc
@@ -0,0 +1,161 @@
+#include "lm/read_arpa.hh"
+
+#include "lm/blank.hh"
+#include "util/file.hh"
+
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include <cctype>
+#include <cstring>
+#include <stdint.h>
+
+#ifdef WIN32
+#include <float.h>
+#endif
+
+namespace lm {
+
+// 1 for '\t', '\n', and ' '. This is stricter than isspace.
+const bool kARPASpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+namespace {
+
+bool IsEntirelyWhiteSpace(const StringPiece &line) {
+ for (size_t i = 0; i < static_cast<size_t>(line.size()); ++i) {
+ if (!isspace(line.data()[i])) return false;
+ }
+ return true;
+}
+
+const char kBinaryMagic[] = "mmap lm http://kheafield.com/code";
+
+// strtoull isn't portable enough :-(
+uint64_t ReadCount(const std::string &from) {
+ std::stringstream stream(from);
+ uint64_t ret;
+ stream >> ret;
+ UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from);
+ return ret;
+}
+
+} // namespace
+
+void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
+ number.clear();
+ StringPiece line = in.ReadLine();
+ // In general, ARPA files can have arbitrary text before "\data\"
+ // But in KenLM, we require such lines to start with "#", so that
+ // we can do stricter error checking
+ while (IsEntirelyWhiteSpace(line) || starts_with(line, "#")) {
+ line = in.ReadLine();
+ }
+
+ if (line != "\\data\\") {
+ if ((line.size() >= 2) && (line.data()[0] == 0x1f) && (static_cast<unsigned char>(line.data()[1]) == 0x8b)) {
+ UTIL_THROW(FormatLoadException, "Looks like a gzip file. If this is an ARPA file, pipe " << in.FileName() << " through zcat. If this already in binary format, you need to decompress it because mmap doesn't work on top of gzip.");
+ }
+ if (static_cast<size_t>(line.size()) >= strlen(kBinaryMagic) && StringPiece(line.data(), strlen(kBinaryMagic)) == kBinaryMagic)
+ UTIL_THROW(FormatLoadException, "This looks like a binary file but got sent to the ARPA parser. Did you compress the binary file or pass a binary file where only ARPA files are accepted?");
+ UTIL_THROW_IF(line.size() >= 4 && StringPiece(line.data(), 4) == "blmt", FormatLoadException, "This looks like an IRSTLM binary file. Did you forget to pass --text yes to compile-lm?");
+ UTIL_THROW_IF(line == "iARPA", FormatLoadException, "This looks like an IRSTLM iARPA file. You need an ARPA file. Run\n compile-lm --text yes " << in.FileName() << " " << in.FileName() << ".arpa\nfirst.");
+ UTIL_THROW(FormatLoadException, "first non-empty line was \"" << line << "\" not \\data\\.");
+ }
+ while (!IsEntirelyWhiteSpace(line = in.ReadLine())) {
+ if (line.size() < 6 || strncmp(line.data(), "ngram ", 6)) UTIL_THROW(FormatLoadException, "count line \"" << line << "\"doesn't begin with \"ngram \"");
+ // So strtol doesn't go off the end of line.
+ std::string remaining(line.data() + 6, line.size() - 6);
+ char *end_ptr;
+ unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10);
+ if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line);
+ if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line);
+ ++end_ptr;
+ number.push_back(ReadCount(end_ptr));
+ }
+}
+
+void ReadNGramHeader(util::FilePiece &in, unsigned int length) {
+ StringPiece line;
+ while (IsEntirelyWhiteSpace(line = in.ReadLine())) {}
+ std::stringstream expected;
+ expected << '\\' << length << "-grams:";
+ if (line != expected.str()) UTIL_THROW(FormatLoadException, "Was expecting n-gram header " << expected.str() << " but got " << line << " instead");
+}
+
+void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
+ switch (in.get()) {
+ case '\t':
+ {
+ float got = in.ReadFloat();
+ if (got != 0.0)
+ UTIL_THROW(FormatLoadException, "Non-zero backoff " << got << " provided for an n-gram that should have no backoff");
+ }
+ break;
+ case '\n':
+ break;
+ default:
+ UTIL_THROW(FormatLoadException, "Expected tab or newline for backoff");
+ }
+}
+
+void ReadBackoff(util::FilePiece &in, float &backoff) {
+ // Always make zero negative.
+ // Negative zero means that no (n+1)-gram has this n-gram as context.
+ // Therefore the hypothesis state can be shorter. Of course, many n-grams
+ // are context for (n+1)-grams. An algorithm in the data structure will go
+ // back and set the backoff to positive zero in these cases.
+ switch (in.get()) {
+ case '\t':
+ backoff = in.ReadFloat();
+ if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff;
+ {
+#if defined(WIN32) && !defined(__MINGW32__)
+ int float_class = _fpclass(backoff);
+ UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
+#else
+ int float_class = std::fpclassify(backoff);
+ UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
+#endif
+ }
+ UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
+ break;
+ case '\n':
+ backoff = ngram::kNoExtensionBackoff;
+ break;
+ default:
+ UTIL_THROW(FormatLoadException, "Expected tab or newline for backoff");
+ }
+}
+
+void ReadEnd(util::FilePiece &in) {
+ StringPiece line;
+ do {
+ line = in.ReadLine();
+ } while (IsEntirelyWhiteSpace(line));
+ if (line != "\\end\\") UTIL_THROW(FormatLoadException, "Expected \\end\\ but the ARPA file has " << line);
+
+ try {
+ while (true) {
+ line = in.ReadLine();
+ if (!IsEntirelyWhiteSpace(line)) UTIL_THROW(FormatLoadException, "Trailing line " << line);
+ }
+ } catch (const util::EndOfFileException &e) {}
+}
+
+void PositiveProbWarn::Warn(float prob) {
+ switch (action_) {
+ case THROW_UP:
+ UTIL_THROW(FormatLoadException, "Positive log probability " << prob << " in the model. This is a bug in IRSTLM; you can set config.positive_log_probability = SILENT or pass -i to build_binary to substitute 0.0 for the log probability. Error");
+ case COMPLAIN:
+ std::cerr << "There's a positive log probability " << prob << " in the APRA file, probably because of a bug in IRSTLM. This and subsequent entires will be mapped to 0 log probability." << std::endl;
+ action_ = SILENT;
+ break;
+ case SILENT:
+ break;
+ }
+}
+
+} // namespace lm
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/read_arpa.hh b/src/kenlm/lm/read_arpa.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/read_arpa.hh
rename to src/kenlm/lm/read_arpa.hh
diff --git a/src/kenlm/lm/return.hh b/src/kenlm/lm/return.hh
new file mode 100644
index 0000000..ee1f25e
--- /dev/null
+++ b/src/kenlm/lm/return.hh
@@ -0,0 +1,42 @@
+#ifndef LM_RETURN_H
+#define LM_RETURN_H
+
+#include <stdint.h>
+
+namespace lm {
+/* Structure returned by scoring routines. */
+struct FullScoreReturn {
+ // log10 probability
+ float prob;
+
+ /* The length of n-gram matched. Do not use this for recombination.
+ * Consider a model containing only the following n-grams:
+ * -1 foo
+ * -3.14 bar
+ * -2.718 baz -5
+ * -6 foo bar
+ *
+ * If you score ``bar'' then ngram_length is 1 and recombination state is the
+ * empty string because bar has zero backoff and does not extend to the
+ * right.
+ * If you score ``foo'' then ngram_length is 1 and recombination state is
+ * ``foo''.
+ *
+ * Ideally, keep output states around and compare them. Failing that,
+ * get out_state.ValidLength() and use that length for recombination.
+ */
+ unsigned char ngram_length;
+
+ /* Left extension information. If independent_left is set, then prob is
+ * independent of words to the left (up to additional backoff). Otherwise,
+ * extend_left indicates how to efficiently extend further to the left.
+ */
+ bool independent_left;
+ uint64_t extend_left; // Defined only if independent_left
+
+ // Rest cost for extension to the left.
+ float rest;
+};
+
+} // namespace lm
+#endif // LM_RETURN_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/search_hashed.cc b/src/kenlm/lm/search_hashed.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/search_hashed.cc
rename to src/kenlm/lm/search_hashed.cc
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/search_hashed.hh b/src/kenlm/lm/search_hashed.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/search_hashed.hh
rename to src/kenlm/lm/search_hashed.hh
diff --git a/src/kenlm/lm/search_trie.cc b/src/kenlm/lm/search_trie.cc
new file mode 100644
index 0000000..a63985a
--- /dev/null
+++ b/src/kenlm/lm/search_trie.cc
@@ -0,0 +1,600 @@
+/* This is where the trie is built. It's on-disk. */
+#include "lm/search_trie.hh"
+
+#include "lm/bhiksha.hh"
+#include "lm/binary_format.hh"
+#include "lm/blank.hh"
+#include "lm/lm_exception.hh"
+#include "lm/max_order.hh"
+#include "lm/quantize.hh"
+#include "lm/trie.hh"
+#include "lm/trie_sort.hh"
+#include "lm/vocab.hh"
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/ersatz_progress.hh"
+#include "util/mmap.hh"
+#include "util/proxy_iterator.hh"
+#include "util/scoped.hh"
+#include "util/sized_iterator.hh"
+
+#include <algorithm>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <queue>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#endif
+
+namespace lm {
+namespace ngram {
+namespace trie {
+namespace {
+
+void ReadOrThrow(FILE *from, void *data, size_t size) {
+ UTIL_THROW_IF(1 != std::fread(data, size, 1, from), util::ErrnoException, "Short read");
+}
+
+int Compare(unsigned char order, const void *first_void, const void *second_void) {
+ const WordIndex *first = reinterpret_cast<const WordIndex*>(first_void), *second = reinterpret_cast<const WordIndex*>(second_void);
+ const WordIndex *end = first + order;
+ for (; first != end; ++first, ++second) {
+ if (*first < *second) return -1;
+ if (*first > *second) return 1;
+ }
+ return 0;
+}
+
+struct ProbPointer {
+ unsigned char array;
+ uint64_t index;
+};
+
+// Array of n-grams and float indices.
+class BackoffMessages {
+ public:
+ void Init(std::size_t entry_size) {
+ current_ = NULL;
+ allocated_ = NULL;
+ entry_size_ = entry_size;
+ }
+
+ void Add(const WordIndex *to, ProbPointer index) {
+ while (current_ + entry_size_ > allocated_) {
+ std::size_t allocated_size = allocated_ - (uint8_t*)backing_.get();
+ Resize(std::max<std::size_t>(allocated_size * 2, entry_size_));
+ }
+ memcpy(current_, to, entry_size_ - sizeof(ProbPointer));
+ *reinterpret_cast<ProbPointer*>(current_ + entry_size_ - sizeof(ProbPointer)) = index;
+ current_ += entry_size_;
+ }
+
+ void Apply(float *const *const base, FILE *unigrams) {
+ FinishedAdding();
+ if (current_ == allocated_) return;
+ rewind(unigrams);
+ ProbBackoff weights;
+ WordIndex unigram = 0;
+ ReadOrThrow(unigrams, &weights, sizeof(weights));
+ for (; current_ != allocated_; current_ += entry_size_) {
+ const WordIndex &cur_word = *reinterpret_cast<const WordIndex*>(current_);
+ for (; unigram < cur_word; ++unigram) {
+ ReadOrThrow(unigrams, &weights, sizeof(weights));
+ }
+ if (!HasExtension(weights.backoff)) {
+ weights.backoff = kExtensionBackoff;
+ UTIL_THROW_IF(fseek(unigrams, -sizeof(weights), SEEK_CUR), util::ErrnoException, "Seeking backwards to denote unigram extension failed.");
+ util::WriteOrThrow(unigrams, &weights, sizeof(weights));
+ }
+ const ProbPointer &write_to = *reinterpret_cast<const ProbPointer*>(current_ + sizeof(WordIndex));
+ base[write_to.array][write_to.index] += weights.backoff;
+ }
+ backing_.reset();
+ }
+
+ void Apply(float *const *const base, RecordReader &reader) {
+ FinishedAdding();
+ if (current_ == allocated_) return;
+ // We'll also use the same buffer to record messages to blanks that they extend.
+ WordIndex *extend_out = reinterpret_cast<WordIndex*>(current_);
+ const unsigned char order = (entry_size_ - sizeof(ProbPointer)) / sizeof(WordIndex);
+ for (reader.Rewind(); reader && (current_ != allocated_); ) {
+ switch (Compare(order, reader.Data(), current_)) {
+ case -1:
+ ++reader;
+ break;
+ case 1:
+ // Message but nobody to receive it. Write it down at the beginning of the buffer so we can inform this blank that it extends.
+ for (const WordIndex *w = reinterpret_cast<const WordIndex *>(current_); w != reinterpret_cast<const WordIndex *>(current_) + order; ++w, ++extend_out) *extend_out = *w;
+ current_ += entry_size_;
+ break;
+ case 0:
+ float &backoff = reinterpret_cast<ProbBackoff*>((uint8_t*)reader.Data() + order * sizeof(WordIndex))->backoff;
+ if (!HasExtension(backoff)) {
+ backoff = kExtensionBackoff;
+ reader.Overwrite(&backoff, sizeof(float));
+ } else {
+ const ProbPointer &write_to = *reinterpret_cast<const ProbPointer*>(current_ + entry_size_ - sizeof(ProbPointer));
+ base[write_to.array][write_to.index] += backoff;
+ }
+ current_ += entry_size_;
+ break;
+ }
+ }
+ // Now this is a list of blanks that extend right.
+ entry_size_ = sizeof(WordIndex) * order;
+ Resize(sizeof(WordIndex) * (extend_out - (const WordIndex*)backing_.get()));
+ current_ = (uint8_t*)backing_.get();
+ }
+
+ // Call after Apply
+ bool Extends(unsigned char order, const WordIndex *words) {
+ if (current_ == allocated_) return false;
+ assert(order * sizeof(WordIndex) == entry_size_);
+ while (true) {
+ switch(Compare(order, words, current_)) {
+ case 1:
+ current_ += entry_size_;
+ if (current_ == allocated_) return false;
+ break;
+ case -1:
+ return false;
+ case 0:
+ return true;
+ }
+ }
+ }
+
+ private:
+ void FinishedAdding() {
+ Resize(current_ - (uint8_t*)backing_.get());
+ // Sort requests in same order as files.
+ std::sort(
+ util::SizedIterator(util::SizedProxy(backing_.get(), entry_size_)),
+ util::SizedIterator(util::SizedProxy(current_, entry_size_)),
+ util::SizedCompare<EntryCompare>(EntryCompare((entry_size_ - sizeof(ProbPointer)) / sizeof(WordIndex))));
+ current_ = (uint8_t*)backing_.get();
+ }
+
+ void Resize(std::size_t to) {
+ std::size_t current = current_ - (uint8_t*)backing_.get();
+ backing_.call_realloc(to);
+ current_ = (uint8_t*)backing_.get() + current;
+ allocated_ = (uint8_t*)backing_.get() + to;
+ }
+
+ util::scoped_malloc backing_;
+
+ uint8_t *current_, *allocated_;
+
+ std::size_t entry_size_;
+};
+
+const float kBadProb = std::numeric_limits<float>::infinity();
+
+class SRISucks {
+ public:
+ SRISucks() {
+ for (BackoffMessages *i = messages_; i != messages_ + KENLM_MAX_ORDER - 1; ++i)
+ i->Init(sizeof(ProbPointer) + sizeof(WordIndex) * (i - messages_ + 1));
+ }
+
+ void Send(unsigned char begin, unsigned char order, const WordIndex *to, float prob_basis) {
+ assert(prob_basis != kBadProb);
+ ProbPointer pointer;
+ pointer.array = order - 1;
+ pointer.index = values_[order - 1].size();
+ for (unsigned char i = begin; i < order; ++i) {
+ messages_[i - 1].Add(to, pointer);
+ }
+ values_[order - 1].push_back(prob_basis);
+ }
+
+ void ObtainBackoffs(unsigned char total_order, FILE *unigram_file, RecordReader *reader) {
+ for (unsigned char i = 0; i < KENLM_MAX_ORDER - 1; ++i) {
+ it_[i] = values_[i].empty() ? NULL : &*values_[i].begin();
+ }
+ messages_[0].Apply(it_, unigram_file);
+ BackoffMessages *messages = messages_ + 1;
+ const RecordReader *end = reader + total_order - 2 /* exclude unigrams and longest order */;
+ for (; reader != end; ++messages, ++reader) {
+ messages->Apply(it_, *reader);
+ }
+ }
+
+ ProbBackoff GetBlank(unsigned char total_order, unsigned char order, const WordIndex *indices) {
+ assert(order > 1);
+ ProbBackoff ret;
+ ret.prob = *(it_[order - 1]++);
+ ret.backoff = ((order != total_order - 1) && messages_[order - 1].Extends(order, indices)) ? kExtensionBackoff : kNoExtensionBackoff;
+ return ret;
+ }
+
+ const std::vector<float> &Values(unsigned char order) const {
+ return values_[order - 1];
+ }
+
+ private:
+ // This used to be one array. Then I needed to separate it by order for quantization to work.
+ std::vector<float> values_[KENLM_MAX_ORDER - 1];
+ BackoffMessages messages_[KENLM_MAX_ORDER - 1];
+
+ float *it_[KENLM_MAX_ORDER - 1];
+};
+
+class FindBlanks {
+ public:
+ FindBlanks(unsigned char order, const ProbBackoff *unigrams, SRISucks &messages)
+ : counts_(order), unigrams_(unigrams), sri_(messages) {}
+
+ float UnigramProb(WordIndex index) const {
+ return unigrams_[index].prob;
+ }
+
+ void Unigram(WordIndex /*index*/) {
+ ++counts_[0];
+ }
+
+ void MiddleBlank(const unsigned char order, const WordIndex *indices, unsigned char lower, float prob_basis) {
+ sri_.Send(lower, order, indices + 1, prob_basis);
+ ++counts_[order - 1];
+ }
+
+ void Middle(const unsigned char order, const void * /*data*/) {
+ ++counts_[order - 1];
+ }
+
+ void Longest(const void * /*data*/) {
+ ++counts_.back();
+ }
+
+ const std::vector<uint64_t> &Counts() const {
+ return counts_;
+ }
+
+ private:
+ std::vector<uint64_t> counts_;
+
+ const ProbBackoff *unigrams_;
+
+ SRISucks &sri_;
+};
+
+// Phase to actually write n-grams to the trie.
+template <class Quant, class Bhiksha> class WriteEntries {
+ public:
+ WriteEntries(RecordReader *contexts, const Quant &quant, UnigramValue *unigrams, BitPackedMiddle<Bhiksha> *middle, BitPackedLongest &longest, unsigned char order, SRISucks &sri) :
+ contexts_(contexts),
+ quant_(quant),
+ unigrams_(unigrams),
+ middle_(middle),
+ longest_(longest),
+ bigram_pack_((order == 2) ? static_cast<BitPacked&>(longest_) : static_cast<BitPacked&>(*middle_)),
+ order_(order),
+ sri_(sri) {}
+
+ float UnigramProb(WordIndex index) const { return unigrams_[index].weights.prob; }
+
+ void Unigram(WordIndex word) {
+ unigrams_[word].next = bigram_pack_.InsertIndex();
+ }
+
+ void MiddleBlank(const unsigned char order, const WordIndex *indices, unsigned char /*lower*/, float /*prob_base*/) {
+ ProbBackoff weights = sri_.GetBlank(order_, order, indices);
+ typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(indices[order - 1])).Write(weights.prob, weights.backoff);
+ }
+
+ void Middle(const unsigned char order, const void *data) {
+ RecordReader &context = contexts_[order - 1];
+ const WordIndex *words = reinterpret_cast<const WordIndex*>(data);
+ ProbBackoff weights = *reinterpret_cast<const ProbBackoff*>(words + order);
+ if (context && !memcmp(data, context.Data(), sizeof(WordIndex) * order)) {
+ SetExtension(weights.backoff);
+ ++context;
+ }
+ typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(words[order - 1])).Write(weights.prob, weights.backoff);
+ }
+
+ void Longest(const void *data) {
+ const WordIndex *words = reinterpret_cast<const WordIndex*>(data);
+ typename Quant::LongestPointer(quant_, longest_.Insert(words[order_ - 1])).Write(reinterpret_cast<const Prob*>(words + order_)->prob);
+ }
+
+ private:
+ RecordReader *contexts_;
+ const Quant &quant_;
+ UnigramValue *const unigrams_;
+ BitPackedMiddle<Bhiksha> *const middle_;
+ BitPackedLongest &longest_;
+ BitPacked &bigram_pack_;
+ const unsigned char order_;
+ SRISucks &sri_;
+};
+
+struct Gram {
+ Gram(const WordIndex *in_begin, unsigned char order) : begin(in_begin), end(in_begin + order) {}
+
+ const WordIndex *begin, *end;
+
+ // For queue, this is the direction we want.
+ bool operator<(const Gram &other) const {
+ return std::lexicographical_compare(other.begin, other.end, begin, end);
+ }
+};
+
+template <class Doing> class BlankManager {
+ public:
+ BlankManager(unsigned char total_order, Doing &doing) : total_order_(total_order), been_length_(0), doing_(doing) {
+ for (float *i = basis_; i != basis_ + KENLM_MAX_ORDER - 1; ++i) *i = kBadProb;
+ }
+
+ void Visit(const WordIndex *to, unsigned char length, float prob) {
+ basis_[length - 1] = prob;
+ unsigned char overlap = std::min<unsigned char>(length - 1, been_length_);
+ const WordIndex *cur;
+ WordIndex *pre;
+ for (cur = to, pre = been_; cur != to + overlap; ++cur, ++pre) {
+ if (*pre != *cur) break;
+ }
+ if (cur == to + length - 1) {
+ *pre = *cur;
+ been_length_ = length;
+ return;
+ }
+ // There are blanks to insert starting with order blank.
+ unsigned char blank = cur - to + 1;
+ UTIL_THROW_IF(blank == 1, FormatLoadException, "Missing a unigram that appears as context.");
+ const float *lower_basis;
+ for (lower_basis = basis_ + blank - 2; *lower_basis == kBadProb; --lower_basis) {}
+ unsigned char based_on = lower_basis - basis_ + 1;
+ for (; cur != to + length - 1; ++blank, ++cur, ++pre) {
+ assert(*lower_basis != kBadProb);
+ doing_.MiddleBlank(blank, to, based_on, *lower_basis);
+ *pre = *cur;
+ // Mark that the probability is a blank so it shouldn't be used as the basis for a later n-gram.
+ basis_[blank - 1] = kBadProb;
+ }
+ *pre = *cur;
+ been_length_ = length;
+ }
+
+ private:
+ const unsigned char total_order_;
+
+ WordIndex been_[KENLM_MAX_ORDER];
+ unsigned char been_length_;
+
+ float basis_[KENLM_MAX_ORDER];
+
+ Doing &doing_;
+};
+
+template <class Doing> void RecursiveInsert(const unsigned char total_order, const WordIndex unigram_count, RecordReader *input, std::ostream *progress_out, const char *message, Doing &doing) {
+ util::ErsatzProgress progress(unigram_count + 1, progress_out, message);
+ WordIndex unigram = 0;
+ std::priority_queue<Gram> grams;
+ if (unigram_count) grams.push(Gram(&unigram, 1));
+ for (unsigned char i = 2; i <= total_order; ++i) {
+ if (input[i-2]) grams.push(Gram(reinterpret_cast<const WordIndex*>(input[i-2].Data()), i));
+ }
+
+ BlankManager<Doing> blank(total_order, doing);
+
+ while (!grams.empty()) {
+ Gram top = grams.top();
+ grams.pop();
+ unsigned char order = top.end - top.begin;
+ if (order == 1) {
+ blank.Visit(&unigram, 1, doing.UnigramProb(unigram));
+ doing.Unigram(unigram);
+ progress.Set(unigram);
+ if (++unigram < unigram_count) grams.push(top);
+ } else {
+ if (order == total_order) {
+ blank.Visit(top.begin, order, reinterpret_cast<const Prob*>(top.end)->prob);
+ doing.Longest(top.begin);
+ } else {
+ blank.Visit(top.begin, order, reinterpret_cast<const ProbBackoff*>(top.end)->prob);
+ doing.Middle(order, top.begin);
+ }
+ RecordReader &reader = input[order - 2];
+ if (++reader) grams.push(top);
+ }
+ }
+}
+
+void SanityCheckCounts(const std::vector<uint64_t> &initial, const std::vector<uint64_t> &fixed) {
+ if (fixed[0] != initial[0]) UTIL_THROW(util::Exception, "Unigram count should be constant but initial is " << initial[0] << " and recounted is " << fixed[0]);
+ if (fixed.back() != initial.back()) UTIL_THROW(util::Exception, "Longest count should be constant but it changed from " << initial.back() << " to " << fixed.back());
+ for (unsigned char i = 0; i < initial.size(); ++i) {
+ if (fixed[i] < initial[i]) UTIL_THROW(util::Exception, "Counts came out lower than expected. This shouldn't happen");
+ }
+}
+
+template <class Quant> void TrainQuantizer(uint8_t order, uint64_t count, const std::vector<float> &additional, RecordReader &reader, util::ErsatzProgress &progress, Quant &quant) {
+ std::vector<float> probs(additional), backoffs;
+ probs.reserve(count + additional.size());
+ backoffs.reserve(count);
+ for (reader.Rewind(); reader; ++reader) {
+ const ProbBackoff &weights = *reinterpret_cast<const ProbBackoff*>(reinterpret_cast<const uint8_t*>(reader.Data()) + sizeof(WordIndex) * order);
+ probs.push_back(weights.prob);
+ if (weights.backoff != 0.0) backoffs.push_back(weights.backoff);
+ ++progress;
+ }
+ quant.Train(order, probs, backoffs);
+}
+
+template <class Quant> void TrainProbQuantizer(uint8_t order, uint64_t count, RecordReader &reader, util::ErsatzProgress &progress, Quant &quant) {
+ std::vector<float> probs, backoffs;
+ probs.reserve(count);
+ for (reader.Rewind(); reader; ++reader) {
+ const Prob &weights = *reinterpret_cast<const Prob*>(reinterpret_cast<const uint8_t*>(reader.Data()) + sizeof(WordIndex) * order);
+ probs.push_back(weights.prob);
+ ++progress;
+ }
+ quant.TrainProb(order, probs);
+}
+
+void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, RecordReader &contexts, UnigramValue *unigrams) {
+ // Fill unigram probabilities.
+ try {
+ rewind(file);
+ for (WordIndex i = 0; i < unigram_count; ++i) {
+ ReadOrThrow(file, &unigrams[i].weights, sizeof(ProbBackoff));
+ if (contexts && *reinterpret_cast<const WordIndex*>(contexts.Data()) == i) {
+ SetExtension(unigrams[i].weights.backoff);
+ ++contexts;
+ }
+ }
+ } catch (util::Exception &e) {
+ e << " while re-reading unigram probabilities";
+ throw;
+ }
+}
+
+} // namespace
+
+template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing) {
+ RecordReader inputs[KENLM_MAX_ORDER - 1];
+ RecordReader contexts[KENLM_MAX_ORDER - 1];
+
+ for (unsigned char i = 2; i <= counts.size(); ++i) {
+ inputs[i-2].Init(files.Full(i), i * sizeof(WordIndex) + (i == counts.size() ? sizeof(Prob) : sizeof(ProbBackoff)));
+ contexts[i-2].Init(files.Context(i), (i-1) * sizeof(WordIndex));
+ }
+
+ SRISucks sri;
+ std::vector<uint64_t> fixed_counts;
+ util::scoped_FILE unigram_file;
+ util::scoped_fd unigram_fd(files.StealUnigram());
+ {
+ util::scoped_memory unigrams;
+ MapRead(util::POPULATE_OR_READ, unigram_fd.get(), 0, counts[0] * sizeof(ProbBackoff), unigrams);
+ FindBlanks finder(counts.size(), reinterpret_cast<const ProbBackoff*>(unigrams.get()), sri);
+ RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Identifying n-grams omitted by SRI", finder);
+ fixed_counts = finder.Counts();
+ }
+ unigram_file.reset(util::FDOpenOrThrow(unigram_fd));
+ for (const RecordReader *i = inputs; i != inputs + counts.size() - 2; ++i) {
+ if (*i) UTIL_THROW(FormatLoadException, "There's a bug in the trie implementation: the " << (i - inputs + 2) << "-gram table did not complete reading");
+ }
+ SanityCheckCounts(counts, fixed_counts);
+ counts = fixed_counts;
+
+ sri.ObtainBackoffs(counts.size(), unigram_file.get(), inputs);
+
+ void *vocab_relocate;
+ void *search_base = backing.GrowForSearch(TrieSearch<Quant, Bhiksha>::Size(fixed_counts, config), vocab.UnkCountChangePadding(), vocab_relocate);
+ vocab.Relocate(vocab_relocate);
+ out.SetupMemory(reinterpret_cast<uint8_t*>(search_base), fixed_counts, config);
+
+ for (unsigned char i = 2; i <= counts.size(); ++i) {
+ inputs[i-2].Rewind();
+ }
+ if (Quant::kTrain) {
+ util::ErsatzProgress progress(std::accumulate(counts.begin() + 1, counts.end(), 0),
+ config.ProgressMessages(), "Quantizing");
+ for (unsigned char i = 2; i < counts.size(); ++i) {
+ TrainQuantizer(i, counts[i-1], sri.Values(i), inputs[i-2], progress, quant);
+ }
+ TrainProbQuantizer(counts.size(), counts.back(), inputs[counts.size() - 2], progress, quant);
+ quant.FinishedLoading(config);
+ }
+
+ UnigramValue *unigrams = out.unigram_.Raw();
+ PopulateUnigramWeights(unigram_file.get(), counts[0], contexts[0], unigrams);
+ unigram_file.reset();
+
+ for (unsigned char i = 2; i <= counts.size(); ++i) {
+ inputs[i-2].Rewind();
+ }
+ // Fill entries except unigram probabilities.
+ {
+ WriteEntries<Quant, Bhiksha> writer(contexts, quant, unigrams, out.middle_begin_, out.longest_, counts.size(), sri);
+ RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Writing trie", writer);
+ // Write the last unigram entry, which is the end pointer for the bigrams.
+ writer.Unigram(counts[0]);
+ }
+
+ // Do not disable this error message or else too little state will be returned. Both WriteEntries::Middle and returning state based on found n-grams will need to be fixed to handle this situation.
+ for (unsigned char order = 2; order <= counts.size(); ++order) {
+ const RecordReader &context = contexts[order - 2];
+ if (context) {
+ FormatLoadException e;
+ e << "A " << static_cast<unsigned int>(order) << "-gram has context";
+ const WordIndex *ctx = reinterpret_cast<const WordIndex*>(context.Data());
+ for (const WordIndex *i = ctx; i != ctx + order - 1; ++i) {
+ e << ' ' << *i;
+ }
+ e << " so this context must appear in the model as a " << static_cast<unsigned int>(order - 1) << "-gram but it does not";
+ throw e;
+ }
+ }
+
+ /* Set ending offsets so the last entry will be sized properly */
+ // Last entry for unigrams was already set.
+ if (out.middle_begin_ != out.middle_end_) {
+ for (typename TrieSearch<Quant, Bhiksha>::Middle *i = out.middle_begin_; i != out.middle_end_ - 1; ++i) {
+ i->FinishedLoading((i+1)->InsertIndex(), config);
+ }
+ (out.middle_end_ - 1)->FinishedLoading(out.longest_.InsertIndex(), config);
+ }
+}
+
+template <class Quant, class Bhiksha> uint8_t *TrieSearch<Quant, Bhiksha>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
+ quant_.SetupMemory(start, counts.size(), config);
+ start += Quant::Size(counts.size(), config);
+ unigram_.Init(start);
+ start += Unigram::Size(counts[0]);
+ FreeMiddles();
+ middle_begin_ = static_cast<Middle*>(malloc(sizeof(Middle) * (counts.size() - 2)));
+ middle_end_ = middle_begin_ + (counts.size() - 2);
+ std::vector<uint8_t*> middle_starts(counts.size() - 2);
+ for (unsigned char i = 2; i < counts.size(); ++i) {
+ middle_starts[i-2] = start;
+ start += Middle::Size(Quant::MiddleBits(config), counts[i-1], counts[0], counts[i], config);
+ }
+ // Crazy backwards thing so we initialize using pointers to ones that have already been initialized
+ for (unsigned char i = counts.size() - 1; i >= 2; --i) {
+ // use "placement new" syntax to initalize Middle in an already-allocated memory location
+ new (middle_begin_ + i - 2) Middle(
+ middle_starts[i-2],
+ quant_.MiddleBits(config),
+ counts[i-1],
+ counts[0],
+ counts[i],
+ (i == counts.size() - 1) ? static_cast<const BitPacked&>(longest_) : static_cast<const BitPacked &>(middle_begin_[i-1]),
+ config);
+ }
+ longest_.Init(start, quant_.LongestBits(config), counts[0]);
+ return start + Longest::Size(Quant::LongestBits(config), counts.back(), counts[0]);
+}
+
+template <class Quant, class Bhiksha> void TrieSearch<Quant, Bhiksha>::InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, BinaryFormat &backing) {
+ std::string temporary_prefix;
+ if (!config.temporary_directory_prefix.empty()) {
+ temporary_prefix = config.temporary_directory_prefix;
+ } else if (config.write_mmap) {
+ temporary_prefix = config.write_mmap;
+ } else {
+ temporary_prefix = file;
+ }
+ // At least 1MB sorting memory.
+ SortedFiles sorted(config, f, counts, std::max<size_t>(config.building_memory, 1048576), temporary_prefix, vocab);
+
+ BuildTrie(sorted, counts, config, *this, quant_, vocab, backing);
+}
+
+template class TrieSearch<DontQuantize, DontBhiksha>;
+template class TrieSearch<DontQuantize, ArrayBhiksha>;
+template class TrieSearch<SeparatelyQuantize, DontBhiksha>;
+template class TrieSearch<SeparatelyQuantize, ArrayBhiksha>;
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/search_trie.hh b/src/kenlm/lm/search_trie.hh
new file mode 100644
index 0000000..1adba6e
--- /dev/null
+++ b/src/kenlm/lm/search_trie.hh
@@ -0,0 +1,129 @@
+#ifndef LM_SEARCH_TRIE_H
+#define LM_SEARCH_TRIE_H
+
+#include "lm/config.hh"
+#include "lm/model_type.hh"
+#include "lm/return.hh"
+#include "lm/trie.hh"
+#include "lm/weights.hh"
+
+#include "util/file.hh"
+#include "util/file_piece.hh"
+
+#include <vector>
+#include <cstdlib>
+#include <cassert>
+
+namespace lm {
+namespace ngram {
+class BinaryFormat;
+class SortedVocabulary;
+namespace trie {
+
+template <class Quant, class Bhiksha> class TrieSearch;
+class SortedFiles;
+template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing);
+
+template <class Quant, class Bhiksha> class TrieSearch {
+ public:
+ typedef NodeRange Node;
+
+ typedef ::lm::ngram::trie::UnigramPointer UnigramPointer;
+ typedef typename Quant::MiddlePointer MiddlePointer;
+ typedef typename Quant::LongestPointer LongestPointer;
+
+ static const bool kDifferentRest = false;
+
+ static const ModelType kModelType = static_cast<ModelType>(TRIE_SORTED + Quant::kModelTypeAdd + Bhiksha::kModelTypeAdd);
+
+ static const unsigned int kVersion = 1;
+
+ static void UpdateConfigFromBinary(const BinaryFormat &file, const std::vector<uint64_t> &counts, uint64_t offset, Config &config) {
+ Quant::UpdateConfigFromBinary(file, offset, config);
+ // Currently the unigram pointers are not compresssed, so there will only be a header for order > 2.
+ if (counts.size() > 2)
+ Bhiksha::UpdateConfigFromBinary(file, offset + Quant::Size(counts.size(), config) + Unigram::Size(counts[0]), config);
+ }
+
+ static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
+ uint64_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]);
+ for (unsigned char i = 1; i < counts.size() - 1; ++i) {
+ ret += Middle::Size(Quant::MiddleBits(config), counts[i], counts[0], counts[i+1], config);
+ }
+ return ret + Longest::Size(Quant::LongestBits(config), counts.back(), counts[0]);
+ }
+
+ TrieSearch() : middle_begin_(NULL), middle_end_(NULL) {}
+
+ ~TrieSearch() { FreeMiddles(); }
+
+ uint8_t *SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config);
+
+ void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, BinaryFormat &backing);
+
+ unsigned char Order() const {
+ return middle_end_ - middle_begin_ + 2;
+ }
+
+ ProbBackoff &UnknownUnigram() { return unigram_.Unknown(); }
+
+ UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &independent_left, uint64_t &extend_left) const {
+ extend_left = static_cast<uint64_t>(word);
+ UnigramPointer ret(unigram_.Find(word, next));
+ independent_left = (next.begin == next.end);
+ return ret;
+ }
+
+ MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
+ return MiddlePointer(quant_, extend_length - 2, middle_begin_[extend_length - 2].ReadEntry(extend_pointer, node));
+ }
+
+ MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_left) const {
+ util::BitAddress address(middle_begin_[order_minus_2].Find(word, node, extend_left));
+ independent_left = (address.base == NULL) || (node.begin == node.end);
+ return MiddlePointer(quant_, order_minus_2, address);
+ }
+
+ LongestPointer LookupLongest(WordIndex word, const Node &node) const {
+ return LongestPointer(quant_, longest_.Find(word, node));
+ }
+
+ bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
+ assert(begin != end);
+ bool independent_left;
+ uint64_t ignored;
+ LookupUnigram(*begin, node, independent_left, ignored);
+ for (const WordIndex *i = begin + 1; i < end; ++i) {
+ if (independent_left || !LookupMiddle(i - begin - 1, *i, node, independent_left, ignored).Found()) return false;
+ }
+ return true;
+ }
+
+ private:
+ friend void BuildTrie<Quant, Bhiksha>(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, SortedVocabulary &vocab, BinaryFormat &backing);
+
+ // Middles are managed manually so we can delay construction and they don't have to be copyable.
+ void FreeMiddles() {
+ for (const Middle *i = middle_begin_; i != middle_end_; ++i) {
+ i->~Middle();
+ }
+ std::free(middle_begin_);
+ }
+
+ typedef trie::BitPackedMiddle<Bhiksha> Middle;
+
+ typedef trie::BitPackedLongest Longest;
+ Longest longest_;
+
+ Middle *middle_begin_, *middle_end_;
+ Quant quant_;
+
+ typedef ::lm::ngram::trie::Unigram Unigram;
+ Unigram unigram_;
+};
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_SEARCH_TRIE_H
diff --git a/src/kenlm/lm/sizes.cc b/src/kenlm/lm/sizes.cc
new file mode 100644
index 0000000..dd831c5
--- /dev/null
+++ b/src/kenlm/lm/sizes.cc
@@ -0,0 +1,63 @@
+#include "lm/sizes.hh"
+#include "lm/model.hh"
+#include "util/file_piece.hh"
+
+#include <vector>
+#include <iomanip>
+
+namespace lm {
+namespace ngram {
+
+void ShowSizes(const std::vector<uint64_t> &counts, const lm::ngram::Config &config) {
+ uint64_t sizes[6];
+ sizes[0] = ProbingModel::Size(counts, config);
+ sizes[1] = RestProbingModel::Size(counts, config);
+ sizes[2] = TrieModel::Size(counts, config);
+ sizes[3] = QuantTrieModel::Size(counts, config);
+ sizes[4] = ArrayTrieModel::Size(counts, config);
+ sizes[5] = QuantArrayTrieModel::Size(counts, config);
+ uint64_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
+ uint64_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
+ uint64_t divide;
+ char prefix;
+ if (min_length < (1 << 10) * 10) {
+ prefix = ' ';
+ divide = 1;
+ } else if (min_length < (1 << 20) * 10) {
+ prefix = 'k';
+ divide = 1 << 10;
+ } else if (min_length < (1ULL << 30) * 10) {
+ prefix = 'M';
+ divide = 1 << 20;
+ } else {
+ prefix = 'G';
+ divide = 1 << 30;
+ }
+ long int length = std::max<long int>(2, static_cast<long int>(ceil(log10((double) max_length / divide))));
+ std::cerr << "Memory estimate for binary LM:\ntype ";
+
+ // right align bytes.
+ for (long int i = 0; i < length - 2; ++i) std::cerr << ' ';
+
+ std::cerr << prefix << "B\n"
+ "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n"
+ "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r models -p " << config.probing_multiplier << "\n"
+ "trie " << std::setw(length) << (sizes[2] / divide) << " without quantization\n"
+ "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n"
+ "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n"
+ "trie " << std::setw(length) << (sizes[5] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n";
+}
+
+void ShowSizes(const std::vector<uint64_t> &counts) {
+ lm::ngram::Config config;
+ ShowSizes(counts, config);
+}
+
+void ShowSizes(const char *file, const lm::ngram::Config &config) {
+ std::vector<uint64_t> counts;
+ util::FilePiece f(file);
+ lm::ReadARPACounts(f, counts);
+ ShowSizes(counts, config);
+}
+
+}} //namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/sizes.hh b/src/kenlm/lm/sizes.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/sizes.hh
rename to src/kenlm/lm/sizes.hh
diff --git a/src/kenlm/lm/state.hh b/src/kenlm/lm/state.hh
new file mode 100644
index 0000000..2195dee
--- /dev/null
+++ b/src/kenlm/lm/state.hh
@@ -0,0 +1,125 @@
+#ifndef LM_STATE_H
+#define LM_STATE_H
+
+#include "lm/max_order.hh"
+#include "lm/word_index.hh"
+#include "util/murmur_hash.hh"
+
+#include <cstring>
+
+namespace lm {
+namespace ngram {
+
+// This is a POD but if you want memcmp to return the same as operator==, call
+// ZeroRemaining first.
+class State {
+ public:
+ bool operator==(const State &other) const {
+ if (length != other.length) return false;
+ return !memcmp(words, other.words, length * sizeof(WordIndex));
+ }
+
+ // Three way comparison function.
+ int Compare(const State &other) const {
+ if (length != other.length) return length < other.length ? -1 : 1;
+ return memcmp(words, other.words, length * sizeof(WordIndex));
+ }
+
+ bool operator<(const State &other) const {
+ if (length != other.length) return length < other.length;
+ return memcmp(words, other.words, length * sizeof(WordIndex)) < 0;
+ }
+
+ // Call this before using raw memcmp.
+ void ZeroRemaining() {
+ for (unsigned char i = length; i < KENLM_MAX_ORDER - 1; ++i) {
+ words[i] = 0;
+ backoff[i] = 0.0;
+ }
+ }
+
+ unsigned char Length() const { return length; }
+
+ // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
+ // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
+ WordIndex words[KENLM_MAX_ORDER - 1];
+ float backoff[KENLM_MAX_ORDER - 1];
+ unsigned char length;
+};
+
+typedef State Right;
+
+inline uint64_t hash_value(const State &state, uint64_t seed = 0) {
+ return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length, seed);
+}
+
+struct Left {
+ bool operator==(const Left &other) const {
+ return
+ length == other.length &&
+ (!length || (pointers[length - 1] == other.pointers[length - 1] && full == other.full));
+ }
+
+ int Compare(const Left &other) const {
+ if (length < other.length) return -1;
+ if (length > other.length) return 1;
+ if (length == 0) return 0; // Must be full.
+ if (pointers[length - 1] > other.pointers[length - 1]) return 1;
+ if (pointers[length - 1] < other.pointers[length - 1]) return -1;
+ return (int)full - (int)other.full;
+ }
+
+ bool operator<(const Left &other) const {
+ return Compare(other) == -1;
+ }
+
+ void ZeroRemaining() {
+ for (uint64_t * i = pointers + length; i < pointers + KENLM_MAX_ORDER - 1; ++i)
+ *i = 0;
+ }
+
+ uint64_t pointers[KENLM_MAX_ORDER - 1];
+ unsigned char length;
+ bool full;
+};
+
+inline uint64_t hash_value(const Left &left) {
+ unsigned char add[2];
+ add[0] = left.length;
+ add[1] = left.full;
+ return util::MurmurHashNative(add, 2, left.length ? left.pointers[left.length - 1] : 0);
+}
+
+struct ChartState {
+ bool operator==(const ChartState &other) const {
+ return (right == other.right) && (left == other.left);
+ }
+
+ int Compare(const ChartState &other) const {
+ int lres = left.Compare(other.left);
+ if (lres) return lres;
+ return right.Compare(other.right);
+ }
+
+ bool operator<(const ChartState &other) const {
+ return Compare(other) < 0;
+ }
+
+ void ZeroRemaining() {
+ left.ZeroRemaining();
+ right.ZeroRemaining();
+ }
+
+ Left left;
+ State right;
+};
+
+inline uint64_t hash_value(const ChartState &state) {
+ return hash_value(state.right, hash_value(state.left));
+}
+
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_STATE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/test.arpa b/src/kenlm/lm/test.arpa
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/test.arpa
rename to src/kenlm/lm/test.arpa
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/test_nounk.arpa b/src/kenlm/lm/test_nounk.arpa
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/test_nounk.arpa
rename to src/kenlm/lm/test_nounk.arpa
diff --git a/src/kenlm/lm/trie.cc b/src/kenlm/lm/trie.cc
new file mode 100644
index 0000000..72ad544
--- /dev/null
+++ b/src/kenlm/lm/trie.cc
@@ -0,0 +1,131 @@
+#include "lm/trie.hh"
+
+#include "lm/bhiksha.hh"
+#include "util/bit_packing.hh"
+#include "util/exception.hh"
+#include "util/sorted_uniform.hh"
+
+#include <cassert>
+
+namespace lm {
+namespace ngram {
+namespace trie {
+namespace {
+
+class KeyAccessor {
+ public:
+ KeyAccessor(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_t total_bits)
+ : base_(reinterpret_cast<const uint8_t*>(base)), key_mask_(key_mask), key_bits_(key_bits), total_bits_(total_bits) {}
+
+ typedef uint64_t Key;
+
+ Key operator()(uint64_t index) const {
+ return util::ReadInt57(base_, index * static_cast<uint64_t>(total_bits_), key_bits_, key_mask_);
+ }
+
+ private:
+ const uint8_t *const base_;
+ const WordIndex key_mask_;
+ const uint8_t key_bits_, total_bits_;
+};
+
+bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_t total_bits, uint64_t begin_index, uint64_t end_index, const uint64_t max_vocab, const uint64_t key, uint64_t &at_index) {
+ KeyAccessor accessor(base, key_mask, key_bits, total_bits);
+ if (!util::BoundedSortedUniformFind<uint64_t, KeyAccessor, util::PivotSelect<sizeof(WordIndex)>::T>(accessor, begin_index - 1, (uint64_t)0, end_index, max_vocab, key, at_index)) return false;
+ return true;
+}
+} // namespace
+
+uint64_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) {
+ uint8_t total_bits = util::RequiredBits(max_vocab) + remaining_bits;
+ // Extra entry for next pointer at the end.
+ // +7 then / 8 to round up bits and convert to bytes
+ // +sizeof(uint64_t) so that ReadInt57 etc don't go segfault.
+ // Note that this waste is O(order), not O(number of ngrams).
+ return ((1 + entries) * total_bits + 7) / 8 + sizeof(uint64_t);
+}
+
+void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits) {
+ util::BitPackingSanity();
+ word_bits_ = util::RequiredBits(max_vocab);
+ word_mask_ = (1ULL << word_bits_) - 1ULL;
+ if (word_bits_ > 57) UTIL_THROW(util::Exception, "Sorry, word indices more than " << (1ULL << 57) << " are not implemented. Edit util/bit_packing.hh and fix the bit packing functions.");
+ total_bits_ = word_bits_ + remaining_bits;
+
+ base_ = static_cast<uint8_t*>(base);
+ insert_index_ = 0;
+ max_vocab_ = max_vocab;
+}
+
+template <class Bhiksha> uint64_t BitPackedMiddle<Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) {
+ return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config));
+}
+
+template <class Bhiksha> BitPackedMiddle<Bhiksha>::BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config) :
+ BitPacked(),
+ quant_bits_(quant_bits),
+ // If the offset of the method changes, also change TrieSearch::UpdateConfigFromBinary.
+ bhiksha_(base, entries + 1, max_next, config),
+ next_source_(&next_source) {
+ if (entries + 1 >= (1ULL << 57) || (max_next >= (1ULL << 57))) UTIL_THROW(util::Exception, "Sorry, this does not support more than " << (1ULL << 57) << " n-grams of a particular order. Edit util/bit_packing.hh and fix the bit packing functions.");
+ BaseInit(reinterpret_cast<uint8_t*>(base) + Bhiksha::Size(entries + 1, max_next, config), max_vocab, quant_bits_ + bhiksha_.InlineBits());
+}
+
+template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Insert(WordIndex word) {
+ assert(word <= word_mask_);
+ uint64_t at_pointer = insert_index_ * total_bits_;
+
+ util::WriteInt57(base_, at_pointer, word_bits_, word);
+ at_pointer += word_bits_;
+ util::BitAddress ret(base_, at_pointer);
+ at_pointer += quant_bits_;
+ uint64_t next = next_source_->InsertIndex();
+ bhiksha_.WriteNext(base_, at_pointer, insert_index_, next);
+ ++insert_index_;
+ return ret;
+}
+
+template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Find(WordIndex word, NodeRange &range, uint64_t &pointer) const {
+ uint64_t at_pointer;
+ if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) {
+ return util::BitAddress(NULL, 0);
+ }
+ pointer = at_pointer;
+ at_pointer *= total_bits_;
+ at_pointer += word_bits_;
+ bhiksha_.ReadNext(base_, at_pointer + quant_bits_, pointer, total_bits_, range);
+
+ return util::BitAddress(base_, at_pointer);
+}
+
+template <class Bhiksha> void BitPackedMiddle<Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) {
+ // Write at insert_index. . .
+ uint64_t last_next_write = insert_index_ * total_bits_ +
+ // at the offset where the next pointers are stored.
+ (total_bits_ - bhiksha_.InlineBits());
+ bhiksha_.WriteNext(base_, last_next_write, insert_index_, next_end);
+ bhiksha_.FinishedLoading(config);
+}
+
+util::BitAddress BitPackedLongest::Insert(WordIndex index) {
+ assert(index <= word_mask_);
+ uint64_t at_pointer = insert_index_ * total_bits_;
+ util::WriteInt57(base_, at_pointer, word_bits_, index);
+ at_pointer += word_bits_;
+ ++insert_index_;
+ return util::BitAddress(base_, at_pointer);
+}
+
+util::BitAddress BitPackedLongest::Find(WordIndex word, const NodeRange &range) const {
+ uint64_t at_pointer;
+ if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) return util::BitAddress(NULL, 0);
+ at_pointer = at_pointer * total_bits_ + word_bits_;
+ return util::BitAddress(base_, at_pointer);
+}
+
+template class BitPackedMiddle<DontBhiksha>;
+template class BitPackedMiddle<ArrayBhiksha>;
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/trie.hh b/src/kenlm/lm/trie.hh
new file mode 100644
index 0000000..b7f0458
--- /dev/null
+++ b/src/kenlm/lm/trie.hh
@@ -0,0 +1,146 @@
+#ifndef LM_TRIE_H
+#define LM_TRIE_H
+
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/bit_packing.hh"
+
+#include <cstddef>
+
+#include <stdint.h>
+
+namespace lm {
+namespace ngram {
+struct Config;
+namespace trie {
+
+struct NodeRange {
+ uint64_t begin, end;
+};
+
+// TODO: if the number of unigrams is a concern, also bit pack these records.
+struct UnigramValue {
+ ProbBackoff weights;
+ uint64_t next;
+ uint64_t Next() const { return next; }
+};
+
+class UnigramPointer {
+ public:
+ explicit UnigramPointer(const ProbBackoff &to) : to_(&to) {}
+
+ UnigramPointer() : to_(NULL) {}
+
+ bool Found() const { return to_ != NULL; }
+
+ float Prob() const { return to_->prob; }
+ float Backoff() const { return to_->backoff; }
+ float Rest() const { return Prob(); }
+
+ private:
+ const ProbBackoff *to_;
+};
+
+class Unigram {
+ public:
+ Unigram() {}
+
+ void Init(void *start) {
+ unigram_ = static_cast<UnigramValue*>(start);
+ }
+
+ static uint64_t Size(uint64_t count) {
+ // +1 in case unknown doesn't appear. +1 for the final next.
+ return (count + 2) * sizeof(UnigramValue);
+ }
+
+ const ProbBackoff &Lookup(WordIndex index) const { return unigram_[index].weights; }
+
+ ProbBackoff &Unknown() { return unigram_[0].weights; }
+
+ UnigramValue *Raw() {
+ return unigram_;
+ }
+
+ UnigramPointer Find(WordIndex word, NodeRange &next) const {
+ UnigramValue *val = unigram_ + word;
+ next.begin = val->next;
+ next.end = (val+1)->next;
+ return UnigramPointer(val->weights);
+ }
+
+ private:
+ UnigramValue *unigram_;
+};
+
+class BitPacked {
+ public:
+ BitPacked() {}
+
+ uint64_t InsertIndex() const {
+ return insert_index_;
+ }
+
+ protected:
+ static uint64_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits);
+
+ void BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits);
+
+ uint8_t word_bits_;
+ uint8_t total_bits_;
+ uint64_t word_mask_;
+
+ uint8_t *base_;
+
+ uint64_t insert_index_, max_vocab_;
+};
+
+template <class Bhiksha> class BitPackedMiddle : public BitPacked {
+ public:
+ static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config);
+
+ // next_source need not be initialized.
+ BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config);
+
+ util::BitAddress Insert(WordIndex word);
+
+ void FinishedLoading(uint64_t next_end, const Config &config);
+
+ util::BitAddress Find(WordIndex word, NodeRange &range, uint64_t &pointer) const;
+
+ util::BitAddress ReadEntry(uint64_t pointer, NodeRange &range) {
+ uint64_t addr = pointer * total_bits_;
+ addr += word_bits_;
+ bhiksha_.ReadNext(base_, addr + quant_bits_, pointer, total_bits_, range);
+ return util::BitAddress(base_, addr);
+ }
+
+ private:
+ uint8_t quant_bits_;
+ Bhiksha bhiksha_;
+
+ const BitPacked *next_source_;
+};
+
+class BitPackedLongest : public BitPacked {
+ public:
+ static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) {
+ return BaseSize(entries, max_vocab, quant_bits);
+ }
+
+ BitPackedLongest() {}
+
+ void Init(void *base, uint8_t quant_bits, uint64_t max_vocab) {
+ BaseInit(base, max_vocab, quant_bits);
+ }
+
+ util::BitAddress Insert(WordIndex word);
+
+ util::BitAddress Find(WordIndex word, const NodeRange &node) const;
+};
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_TRIE_H
diff --git a/src/kenlm/lm/trie_sort.cc b/src/kenlm/lm/trie_sort.cc
new file mode 100644
index 0000000..33a2f96
--- /dev/null
+++ b/src/kenlm/lm/trie_sort.cc
@@ -0,0 +1,304 @@
+#include "lm/trie_sort.hh"
+
+#include "lm/config.hh"
+#include "lm/lm_exception.hh"
+#include "lm/read_arpa.hh"
+#include "lm/vocab.hh"
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/file_piece.hh"
+#include "util/mmap.hh"
+#include "util/proxy_iterator.hh"
+#include "util/sized_iterator.hh"
+
+#include <algorithm>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <deque>
+#include <iterator>
+#include <limits>
+#include <vector>
+
+namespace lm {
+namespace ngram {
+namespace trie {
+namespace {
+
+typedef util::SizedIterator NGramIter;
+
+// Proxy for an entry except there is some extra cruft between the entries. This is used to sort (n-1)-grams using the same memory as the sorted n-grams.
+class PartialViewProxy {
+ public:
+ PartialViewProxy() : attention_size_(0), inner_() {}
+
+ PartialViewProxy(void *ptr, std::size_t block_size, std::size_t attention_size) : attention_size_(attention_size), inner_(ptr, block_size) {}
+
+ operator std::string() const {
+ return std::string(reinterpret_cast<const char*>(inner_.Data()), attention_size_);
+ }
+
+ PartialViewProxy &operator=(const PartialViewProxy &from) {
+ memcpy(inner_.Data(), from.inner_.Data(), attention_size_);
+ return *this;
+ }
+
+ PartialViewProxy &operator=(const std::string &from) {
+ memcpy(inner_.Data(), from.data(), attention_size_);
+ return *this;
+ }
+
+ const void *Data() const { return inner_.Data(); }
+ void *Data() { return inner_.Data(); }
+
+ friend void swap(PartialViewProxy first, PartialViewProxy second) {
+ std::swap_ranges(reinterpret_cast<char*>(first.Data()), reinterpret_cast<char*>(first.Data()) + first.attention_size_, reinterpret_cast<char*>(second.Data()));
+ }
+
+ private:
+ friend class util::ProxyIterator<PartialViewProxy>;
+
+ typedef std::string value_type;
+
+ const std::size_t attention_size_;
+
+ typedef util::SizedInnerIterator InnerIterator;
+ InnerIterator &Inner() { return inner_; }
+ const InnerIterator &Inner() const { return inner_; }
+ InnerIterator inner_;
+};
+
+typedef util::ProxyIterator<PartialViewProxy> PartialIter;
+
+FILE *DiskFlush(const void *mem_begin, const void *mem_end, const std::string &temp_prefix) {
+ util::scoped_fd file(util::MakeTemp(temp_prefix));
+ util::WriteOrThrow(file.get(), mem_begin, (uint8_t*)mem_end - (uint8_t*)mem_begin);
+ return util::FDOpenOrThrow(file);
+}
+
+FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &temp_prefix, std::size_t entry_size, unsigned char order) {
+ const size_t context_size = sizeof(WordIndex) * (order - 1);
+ // Sort just the contexts using the same memory.
+ PartialIter context_begin(PartialViewProxy(begin + sizeof(WordIndex), entry_size, context_size));
+ PartialIter context_end(PartialViewProxy(end + sizeof(WordIndex), entry_size, context_size));
+
+#if defined(_WIN32) || defined(_WIN64)
+ std::stable_sort
+#else
+ std::sort
+#endif
+ (context_begin, context_end, util::SizedCompare<EntryCompare, PartialViewProxy>(EntryCompare(order - 1)));
+
+ util::scoped_FILE out(util::FMakeTemp(temp_prefix));
+
+ // Write out to file and uniqueify at the same time. Could have used unique_copy if there was an appropriate OutputIterator.
+ if (context_begin == context_end) return out.release();
+ PartialIter i(context_begin);
+ util::WriteOrThrow(out.get(), i->Data(), context_size);
+ const void *previous = i->Data();
+ ++i;
+ for (; i != context_end; ++i) {
+ if (memcmp(previous, i->Data(), context_size)) {
+ util::WriteOrThrow(out.get(), i->Data(), context_size);
+ previous = i->Data();
+ }
+ }
+ return out.release();
+}
+
+struct ThrowCombine {
+ void operator()(std::size_t entry_size, unsigned char order, const void *first, const void *second, FILE * /*out*/) const {
+ const WordIndex *base = reinterpret_cast<const WordIndex*>(first);
+ FormatLoadException e;
+ e << "Duplicate n-gram detected with vocab ids";
+ for (const WordIndex *i = base; i != base + order; ++i) {
+ e << ' ' << *i;
+ }
+ throw e;
+ }
+};
+
+// Useful for context files that just contain records with no value.
+struct FirstCombine {
+ void operator()(std::size_t entry_size, unsigned char /*order*/, const void *first, const void * /*second*/, FILE *out) const {
+ util::WriteOrThrow(out, first, entry_size);
+ }
+};
+
+template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_file, const std::string &temp_prefix, std::size_t weights_size, unsigned char order, const Combine &combine) {
+ std::size_t entry_size = sizeof(WordIndex) * order + weights_size;
+ RecordReader first, second;
+ first.Init(first_file, entry_size);
+ second.Init(second_file, entry_size);
+ util::scoped_FILE out_file(util::FMakeTemp(temp_prefix));
+ EntryCompare less(order);
+ while (first && second) {
+ if (less(first.Data(), second.Data())) {
+ util::WriteOrThrow(out_file.get(), first.Data(), entry_size);
+ ++first;
+ } else if (less(second.Data(), first.Data())) {
+ util::WriteOrThrow(out_file.get(), second.Data(), entry_size);
+ ++second;
+ } else {
+ combine(entry_size, order, first.Data(), second.Data(), out_file.get());
+ ++first; ++second;
+ }
+ }
+ for (RecordReader &remains = (first ? first : second); remains; ++remains) {
+ util::WriteOrThrow(out_file.get(), remains.Data(), entry_size);
+ }
+ return out_file.release();
+}
+
+} // namespace
+
+void RecordReader::Init(FILE *file, std::size_t entry_size) {
+ entry_size_ = entry_size;
+ data_.reset(malloc(entry_size));
+ UTIL_THROW_IF(!data_.get(), util::ErrnoException, "Failed to malloc read buffer");
+ file_ = file;
+ if (file) {
+ rewind(file);
+ remains_ = true;
+ ++*this;
+ } else {
+ remains_ = false;
+ }
+}
+
+void RecordReader::Overwrite(const void *start, std::size_t amount) {
+ long internal = (uint8_t*)start - (uint8_t*)data_.get();
+ UTIL_THROW_IF(fseek(file_, internal - entry_size_, SEEK_CUR), util::ErrnoException, "Couldn't seek backwards for revision");
+ util::WriteOrThrow(file_, start, amount);
+ long forward = entry_size_ - internal - amount;
+#if !defined(_WIN32) && !defined(_WIN64)
+ if (forward)
+#endif
+ UTIL_THROW_IF(fseek(file_, forward, SEEK_CUR), util::ErrnoException, "Couldn't seek forwards past revision");
+}
+
+void RecordReader::Rewind() {
+ if (file_) {
+ rewind(file_);
+ remains_ = true;
+ ++*this;
+ } else {
+ remains_ = false;
+ }
+}
+
+SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab) {
+ PositiveProbWarn warn(config.positive_log_probability);
+ unigram_.reset(util::MakeTemp(file_prefix));
+ {
+ // In case <unk> appears.
+ size_t size_out = (counts[0] + 1) * sizeof(ProbBackoff);
+ util::scoped_mmap unigram_mmap(util::MapZeroedWrite(unigram_.get(), size_out), size_out);
+ Read1Grams(f, counts[0], vocab, reinterpret_cast<ProbBackoff*>(unigram_mmap.get()), warn);
+ CheckSpecials(config, vocab);
+ if (!vocab.SawUnk()) ++counts[0];
+ }
+
+ // Only use as much buffer as we need.
+ size_t buffer_use = 0;
+ for (unsigned int order = 2; order < counts.size(); ++order) {
+ buffer_use = std::max<size_t>(buffer_use, static_cast<size_t>((sizeof(WordIndex) * order + 2 * sizeof(float)) * counts[order - 1]));
+ }
+ buffer_use = std::max<size_t>(buffer_use, static_cast<size_t>((sizeof(WordIndex) * counts.size() + sizeof(float)) * counts.back()));
+ buffer = std::min<size_t>(buffer, buffer_use);
+
+ util::scoped_malloc mem;
+ mem.reset(malloc(buffer));
+ if (!mem.get()) UTIL_THROW(util::ErrnoException, "malloc failed for sort buffer size " << buffer);
+
+ for (unsigned char order = 2; order <= counts.size(); ++order) {
+ ConvertToSorted(f, vocab, counts, file_prefix, order, warn, mem.get(), buffer);
+ }
+ ReadEnd(f);
+}
+
+namespace {
+class Closer {
+ public:
+ explicit Closer(std::deque<FILE*> &files) : files_(files) {}
+
+ ~Closer() {
+ for (std::deque<FILE*>::iterator i = files_.begin(); i != files_.end(); ++i) {
+ util::scoped_FILE deleter(*i);
+ }
+ }
+
+ void PopFront() {
+ util::scoped_FILE deleter(files_.front());
+ files_.pop_front();
+ }
+ private:
+ std::deque<FILE*> &files_;
+};
+} // namespace
+
+void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &file_prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size) {
+ ReadNGramHeader(f, order);
+ const size_t count = counts[order - 1];
+ // Size of weights. Does it include backoff?
+ const size_t words_size = sizeof(WordIndex) * order;
+ const size_t weights_size = sizeof(float) + ((order == counts.size()) ? 0 : sizeof(float));
+ const size_t entry_size = words_size + weights_size;
+ const size_t batch_size = std::min(count, mem_size / entry_size);
+ uint8_t *const begin = reinterpret_cast<uint8_t*>(mem);
+
+ std::deque<FILE*> files, contexts;
+ Closer files_closer(files), contexts_closer(contexts);
+
+ for (std::size_t batch = 0, done = 0; done < count; ++batch) {
+ uint8_t *out = begin;
+ uint8_t *out_end = out + std::min(count - done, batch_size) * entry_size;
+ if (order == counts.size()) {
+ for (; out != out_end; out += entry_size) {
+ std::reverse_iterator<WordIndex*> it(reinterpret_cast<WordIndex*>(out) + order);
+ ReadNGram(f, order, vocab, it, *reinterpret_cast<Prob*>(out + words_size), warn);
+ }
+ } else {
+ for (; out != out_end; out += entry_size) {
+ std::reverse_iterator<WordIndex*> it(reinterpret_cast<WordIndex*>(out) + order);
+ ReadNGram(f, order, vocab, it, *reinterpret_cast<ProbBackoff*>(out + words_size), warn);
+ }
+ }
+ // Sort full records by full n-gram.
+ util::SizedProxy proxy_begin(begin, entry_size), proxy_end(out_end, entry_size);
+ // parallel_sort uses too much RAM. TODO: figure out why windows sort doesn't like my proxies.
+#if defined(_WIN32) || defined(_WIN64)
+ std::stable_sort
+#else
+ std::sort
+#endif
+ (NGramIter(proxy_begin), NGramIter(proxy_end), util::SizedCompare<EntryCompare>(EntryCompare(order)));
+ files.push_back(DiskFlush(begin, out_end, file_prefix));
+ contexts.push_back(WriteContextFile(begin, out_end, file_prefix, entry_size, order));
+
+ done += (out_end - begin) / entry_size;
+ }
+
+ // All individual files created. Merge them.
+
+ while (files.size() > 1) {
+ files.push_back(MergeSortedFiles(files[0], files[1], file_prefix, weights_size, order, ThrowCombine()));
+ files_closer.PopFront();
+ files_closer.PopFront();
+ contexts.push_back(MergeSortedFiles(contexts[0], contexts[1], file_prefix, 0, order - 1, FirstCombine()));
+ contexts_closer.PopFront();
+ contexts_closer.PopFront();
+ }
+
+ if (!files.empty()) {
+ // Steal from closers.
+ full_[order - 2].reset(files.front());
+ files.pop_front();
+ context_[order - 2].reset(contexts.front());
+ contexts.pop_front();
+ }
+}
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/trie_sort.hh b/src/kenlm/lm/trie_sort.hh
new file mode 100644
index 0000000..594efee
--- /dev/null
+++ b/src/kenlm/lm/trie_sort.hh
@@ -0,0 +1,114 @@
+// Step of trie builder: create sorted files.
+
+#ifndef LM_TRIE_SORT_H
+#define LM_TRIE_SORT_H
+
+#include "lm/max_order.hh"
+#include "lm/word_index.hh"
+
+#include "util/file.hh"
+#include "util/scoped.hh"
+
+#include <cstddef>
+#include <functional>
+#include <string>
+#include <vector>
+
+#include <stdint.h>
+
+namespace util {
+class FilePiece;
+} // namespace util
+
+namespace lm {
+class PositiveProbWarn;
+namespace ngram {
+class SortedVocabulary;
+struct Config;
+
+namespace trie {
+
+class EntryCompare : public std::binary_function<const void*, const void*, bool> {
+ public:
+ explicit EntryCompare(unsigned char order) : order_(order) {}
+
+ bool operator()(const void *first_void, const void *second_void) const {
+ const WordIndex *first = static_cast<const WordIndex*>(first_void);
+ const WordIndex *second = static_cast<const WordIndex*>(second_void);
+ const WordIndex *end = first + order_;
+ for (; first != end; ++first, ++second) {
+ if (*first < *second) return true;
+ if (*first > *second) return false;
+ }
+ return false;
+ }
+ private:
+ unsigned char order_;
+};
+
+class RecordReader {
+ public:
+ RecordReader() : remains_(true) {}
+
+ void Init(FILE *file, std::size_t entry_size);
+
+ void *Data() { return data_.get(); }
+ const void *Data() const { return data_.get(); }
+
+ RecordReader &operator++() {
+ std::size_t ret = fread(data_.get(), entry_size_, 1, file_);
+ if (!ret) {
+ UTIL_THROW_IF(!feof(file_), util::ErrnoException, "Error reading temporary file");
+ remains_ = false;
+ }
+ return *this;
+ }
+
+ operator bool() const { return remains_; }
+
+ void Rewind();
+
+ std::size_t EntrySize() const { return entry_size_; }
+
+ void Overwrite(const void *start, std::size_t amount);
+
+ private:
+ FILE *file_;
+
+ util::scoped_malloc data_;
+
+ bool remains_;
+
+ std::size_t entry_size_;
+};
+
+class SortedFiles {
+ public:
+ // Build from ARPA
+ SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, std::size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab);
+
+ int StealUnigram() {
+ return unigram_.release();
+ }
+
+ FILE *Full(unsigned char order) {
+ return full_[order - 2].get();
+ }
+
+ FILE *Context(unsigned char of_order) {
+ return context_[of_order - 2].get();
+ }
+
+ private:
+ void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size);
+
+ util::scoped_fd unigram_;
+
+ util::scoped_FILE full_[KENLM_MAX_ORDER - 1], context_[KENLM_MAX_ORDER - 1];
+};
+
+} // namespace trie
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_TRIE_SORT_H
diff --git a/src/kenlm/lm/value.hh b/src/kenlm/lm/value.hh
new file mode 100644
index 0000000..d2425cc
--- /dev/null
+++ b/src/kenlm/lm/value.hh
@@ -0,0 +1,158 @@
+#ifndef LM_VALUE_H
+#define LM_VALUE_H
+
+#include "lm/config.hh"
+#include "lm/model_type.hh"
+#include "lm/value_build.hh"
+#include "lm/weights.hh"
+#include "util/bit_packing.hh"
+
+#include <stdint.h>
+
+namespace lm {
+namespace ngram {
+
+// Template proxy for probing unigrams and middle.
+template <class Weights> class GenericProbingProxy {
+ public:
+ explicit GenericProbingProxy(const Weights &to) : to_(&to) {}
+
+ GenericProbingProxy() : to_(0) {}
+
+ bool Found() const { return to_ != 0; }
+
+ float Prob() const {
+ util::FloatEnc enc;
+ enc.f = to_->prob;
+ enc.i |= util::kSignBit;
+ return enc.f;
+ }
+
+ float Backoff() const { return to_->backoff; }
+
+ bool IndependentLeft() const {
+ util::FloatEnc enc;
+ enc.f = to_->prob;
+ return enc.i & util::kSignBit;
+ }
+
+ protected:
+ const Weights *to_;
+};
+
+// Basic proxy for trie unigrams.
+template <class Weights> class GenericTrieUnigramProxy {
+ public:
+ explicit GenericTrieUnigramProxy(const Weights &to) : to_(&to) {}
+
+ GenericTrieUnigramProxy() : to_(0) {}
+
+ bool Found() const { return to_ != 0; }
+ float Prob() const { return to_->prob; }
+ float Backoff() const { return to_->backoff; }
+ float Rest() const { return Prob(); }
+
+ protected:
+ const Weights *to_;
+};
+
+struct BackoffValue {
+ typedef ProbBackoff Weights;
+ static const ModelType kProbingModelType = PROBING;
+
+ class ProbingProxy : public GenericProbingProxy<Weights> {
+ public:
+ explicit ProbingProxy(const Weights &to) : GenericProbingProxy<Weights>(to) {}
+ ProbingProxy() {}
+ float Rest() const { return Prob(); }
+ };
+
+ class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
+ public:
+ explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {}
+ TrieUnigramProxy() {}
+ float Rest() const { return Prob(); }
+ };
+
+ struct ProbingEntry {
+ typedef uint64_t Key;
+ typedef Weights Value;
+ uint64_t key;
+ ProbBackoff value;
+ uint64_t GetKey() const { return key; }
+ };
+
+ struct TrieUnigramValue {
+ Weights weights;
+ uint64_t next;
+ uint64_t Next() const { return next; }
+ };
+
+ const static bool kDifferentRest = false;
+
+ template <class Model, class C> void Callback(const Config &, unsigned int, typename Model::Vocabulary &, C &callback) {
+ NoRestBuild build;
+ callback(build);
+ }
+};
+
+struct RestValue {
+ typedef RestWeights Weights;
+ static const ModelType kProbingModelType = REST_PROBING;
+
+ class ProbingProxy : public GenericProbingProxy<RestWeights> {
+ public:
+ explicit ProbingProxy(const Weights &to) : GenericProbingProxy<RestWeights>(to) {}
+ ProbingProxy() {}
+ float Rest() const { return to_->rest; }
+ };
+
+ class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
+ public:
+ explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {}
+ TrieUnigramProxy() {}
+ float Rest() const { return to_->rest; }
+ };
+
+// gcc 4.1 doesn't properly back dependent types :-(.
+#pragma pack(push)
+#pragma pack(4)
+ struct ProbingEntry {
+ typedef uint64_t Key;
+ typedef Weights Value;
+ Key key;
+ Value value;
+ Key GetKey() const { return key; }
+ };
+
+ struct TrieUnigramValue {
+ Weights weights;
+ uint64_t next;
+ uint64_t Next() const { return next; }
+ };
+#pragma pack(pop)
+
+ const static bool kDifferentRest = true;
+
+ template <class Model, class C> void Callback(const Config &config, unsigned int order, typename Model::Vocabulary &vocab, C &callback) {
+ switch (config.rest_function) {
+ case Config::REST_MAX:
+ {
+ MaxRestBuild build;
+ callback(build);
+ }
+ break;
+ case Config::REST_LOWER:
+ {
+ LowerRestBuild<Model> build(config, order, vocab);
+ callback(build);
+ }
+ break;
+ }
+ }
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_VALUE_H
diff --git a/src/kenlm/lm/value_build.cc b/src/kenlm/lm/value_build.cc
new file mode 100644
index 0000000..ac623a6
--- /dev/null
+++ b/src/kenlm/lm/value_build.cc
@@ -0,0 +1,59 @@
+#include "lm/value_build.hh"
+
+#include "lm/model.hh"
+#include "lm/read_arpa.hh"
+
+namespace lm {
+namespace ngram {
+
+template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
+ UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
+ Config for_lower = config;
+ for_lower.write_mmap = NULL;
+ for_lower.rest_lower_files.clear();
+
+ // Unigram models aren't supported, so this is a custom loader.
+ // TODO: optimize the unigram loading?
+ {
+ util::FilePiece uni(config.rest_lower_files[0].c_str());
+ std::vector<uint64_t> number;
+ ReadARPACounts(uni, number);
+ UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
+ ReadNGramHeader(uni, 1);
+ unigrams_.resize(number[0]);
+ unigrams_[0] = config.unknown_missing_logprob;
+ PositiveProbWarn warn;
+ for (uint64_t i = 0; i < number[0]; ++i) {
+ WordIndex w;
+ Prob entry;
+ ReadNGram(uni, 1, vocab, &w, entry, warn);
+ unigrams_[w] = entry.prob;
+ }
+ }
+
+ try {
+ for (unsigned int i = 2; i < order; ++i) {
+ models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
+ UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
+ }
+ } catch (...) {
+ for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
+ delete *i;
+ }
+ models_.clear();
+ throw;
+ }
+
+ // TODO: force/check same vocab.
+}
+
+template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
+ for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
+ delete *i;
+ }
+}
+
+template class LowerRestBuild<ProbingModel>;
+
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/value_build.hh b/src/kenlm/lm/value_build.hh
new file mode 100644
index 0000000..49989ab
--- /dev/null
+++ b/src/kenlm/lm/value_build.hh
@@ -0,0 +1,97 @@
+#ifndef LM_VALUE_BUILD_H
+#define LM_VALUE_BUILD_H
+
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/bit_packing.hh"
+
+#include <vector>
+
+namespace lm {
+namespace ngram {
+
+struct Config;
+struct BackoffValue;
+struct RestValue;
+
+class NoRestBuild {
+ public:
+ typedef BackoffValue Value;
+
+ NoRestBuild() {}
+
+ void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
+ void SetRest(const WordIndex *, unsigned int, const ProbBackoff &) const {}
+
+ template <class Second> bool MarkExtends(ProbBackoff &weights, const Second &) const {
+ util::UnsetSign(weights.prob);
+ return false;
+ }
+
+ // Probing doesn't need to go back to unigram.
+ const static bool kMarkEvenLower = false;
+};
+
+class MaxRestBuild {
+ public:
+ typedef RestValue Value;
+
+ MaxRestBuild() {}
+
+ void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
+ void SetRest(const WordIndex *, unsigned int, RestWeights &weights) const {
+ weights.rest = weights.prob;
+ util::SetSign(weights.rest);
+ }
+
+ bool MarkExtends(RestWeights &weights, const RestWeights &to) const {
+ util::UnsetSign(weights.prob);
+ if (weights.rest >= to.rest) return false;
+ weights.rest = to.rest;
+ return true;
+ }
+ bool MarkExtends(RestWeights &weights, const Prob &to) const {
+ util::UnsetSign(weights.prob);
+ if (weights.rest >= to.prob) return false;
+ weights.rest = to.prob;
+ return true;
+ }
+
+ // Probing does need to go back to unigram.
+ const static bool kMarkEvenLower = true;
+};
+
+template <class Model> class LowerRestBuild {
+ public:
+ typedef RestValue Value;
+
+ LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab);
+
+ ~LowerRestBuild();
+
+ void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
+ void SetRest(const WordIndex *vocab_ids, unsigned int n, RestWeights &weights) const {
+ typename Model::State ignored;
+ if (n == 1) {
+ weights.rest = unigrams_[*vocab_ids];
+ } else {
+ weights.rest = models_[n-2]->FullScoreForgotState(vocab_ids + 1, vocab_ids + n, *vocab_ids, ignored).prob;
+ }
+ }
+
+ template <class Second> bool MarkExtends(RestWeights &weights, const Second &) const {
+ util::UnsetSign(weights.prob);
+ return false;
+ }
+
+ const static bool kMarkEvenLower = false;
+
+ std::vector<float> unigrams_;
+
+ std::vector<const Model*> models_;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_VALUE_BUILD_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/lm/virtual_interface.cc b/src/kenlm/lm/virtual_interface.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/lm/virtual_interface.cc
rename to src/kenlm/lm/virtual_interface.cc
diff --git a/src/kenlm/lm/virtual_interface.hh b/src/kenlm/lm/virtual_interface.hh
new file mode 100644
index 0000000..ea491fb
--- /dev/null
+++ b/src/kenlm/lm/virtual_interface.hh
@@ -0,0 +1,160 @@
+#ifndef LM_VIRTUAL_INTERFACE_H
+#define LM_VIRTUAL_INTERFACE_H
+
+#include "lm/return.hh"
+#include "lm/word_index.hh"
+#include "util/string_piece.hh"
+
+#include <string>
+#include <cstring>
+
+namespace lm {
+namespace base {
+
+template <class T, class U, class V> class ModelFacade;
+
+/* Vocabulary interface. Call Index(string) and get a word index for use in
+ * calling Model. It provides faster convenience functions for <s>, </s>, and
+ * <unk> although you can also find these using Index.
+ *
+ * Some models do not load the mapping from index to string. If you need this,
+ * check if the model Vocabulary class implements such a function and access it
+ * directly.
+ *
+ * The Vocabulary object is always owned by the Model and can be retrieved from
+ * the Model using BaseVocabulary() for this abstract interface or
+ * GetVocabulary() for the actual implementation (in which case you'll need the
+ * actual implementation of the Model too).
+ */
+class Vocabulary {
+ public:
+ virtual ~Vocabulary();
+
+ WordIndex BeginSentence() const { return begin_sentence_; }
+ WordIndex EndSentence() const { return end_sentence_; }
+ WordIndex NotFound() const { return not_found_; }
+
+ /* Most implementations allow StringPiece lookups and need only override
+ * Index(StringPiece). SRI requires null termination and overrides all
+ * three methods.
+ */
+ virtual WordIndex Index(const StringPiece &str) const = 0;
+ virtual WordIndex Index(const std::string &str) const {
+ return Index(StringPiece(str));
+ }
+ virtual WordIndex Index(const char *str) const {
+ return Index(StringPiece(str));
+ }
+
+ protected:
+ // Call SetSpecial afterward.
+ Vocabulary() {}
+
+ Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) {
+ SetSpecial(begin_sentence, end_sentence, not_found);
+ }
+
+ void SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found);
+
+ WordIndex begin_sentence_, end_sentence_, not_found_;
+
+ private:
+ // Disable copy constructors. They're private and undefined.
+ // Ersatz boost::noncopyable.
+ Vocabulary(const Vocabulary &);
+ Vocabulary &operator=(const Vocabulary &);
+};
+
+/* There are two ways to access a Model.
+ *
+ *
+ * OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh).
+ *
+ * Every Model implements the scoring function:
+ * float Score(
+ * const Model::State &in_state,
+ * const WordIndex new_word,
+ * Model::State &out_state) const;
+ *
+ * It can also return the length of n-gram matched by the model:
+ * FullScoreReturn FullScore(
+ * const Model::State &in_state,
+ * const WordIndex new_word,
+ * Model::State &out_state) const;
+ *
+ *
+ * There are also accessor functions:
+ * const State &BeginSentenceState() const;
+ * const State &NullContextState() const;
+ * const Vocabulary &GetVocabulary() const;
+ * unsigned int Order() const;
+ *
+ * NB: In case you're wondering why the model implementation looks like it's
+ * missing these methods, see facade.hh.
+ *
+ * This is the fastest way to use a model and presents a normal State class to
+ * be included in a hypothesis state structure.
+ *
+ *
+ * OPTION 2: Use the virtual interface below.
+ *
+ * The virtual interface allow you to decide which Model to use at runtime
+ * without templatizing everything on the Model type. However, each Model has
+ * its own State class, so a single State cannot be efficiently provided (it
+ * would require using the maximum memory of any Model's State or memory
+ * allocation with each lookup). This means you become responsible for
+ * allocating memory with size StateSize() and passing it to the Score or
+ * FullScore functions provided here.
+ *
+ * For example, cdec has a std::string containing the entire state of a
+ * hypothesis. It can reserve StateSize bytes in this string for the model
+ * state.
+ *
+ * All the State objects are POD, so it's ok to use raw memory for storing
+ * State.
+ * in_state and out_state must not have the same address.
+ */
+class Model {
+ public:
+ virtual ~Model();
+
+ size_t StateSize() const { return state_size_; }
+ const void *BeginSentenceMemory() const { return begin_sentence_memory_; }
+ void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); }
+ const void *NullContextMemory() const { return null_context_memory_; }
+ void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); }
+
+ // Requires in_state != out_state
+ virtual float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0;
+
+ // Requires in_state != out_state
+ virtual FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0;
+
+ // Prefer to use FullScore. The context words should be provided in reverse order.
+ virtual FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const = 0;
+
+ unsigned char Order() const { return order_; }
+
+ const Vocabulary &BaseVocabulary() const { return *base_vocab_; }
+
+ private:
+ template <class T, class U, class V> friend class ModelFacade;
+ explicit Model(size_t state_size) : state_size_(state_size) {}
+
+ const size_t state_size_;
+ const void *begin_sentence_memory_, *null_context_memory_;
+
+ const Vocabulary *base_vocab_;
+
+ unsigned char order_;
+
+ // Disable copy constructors. They're private and undefined.
+ // Ersatz boost::noncopyable.
+ Model(const Model &);
+ Model &operator=(const Model &);
+};
+
+} // mamespace base
+} // namespace lm
+
+#endif // LM_VIRTUAL_INTERFACE_H
diff --git a/src/kenlm/lm/vocab.cc b/src/kenlm/lm/vocab.cc
new file mode 100644
index 0000000..3d83e04
--- /dev/null
+++ b/src/kenlm/lm/vocab.cc
@@ -0,0 +1,329 @@
+#include "lm/vocab.hh"
+
+#include "lm/binary_format.hh"
+#include "lm/enumerate_vocab.hh"
+#include "lm/lm_exception.hh"
+#include "lm/config.hh"
+#include "lm/weights.hh"
+#include "util/exception.hh"
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/joint_sort.hh"
+#include "util/murmur_hash.hh"
+#include "util/probing_hash_table.hh"
+
+#include <cstring>
+#include <string>
+
+namespace lm {
+namespace ngram {
+
+namespace detail {
+uint64_t HashForVocab(const char *str, std::size_t len) {
+ // This proved faster than Boost's hash in speed trials: total load time Murmur 67090000, Boost 72210000
+ // Chose to use 64A instead of native so binary format will be portable across 64 and 32 bit.
+ return util::MurmurHash64A(str, len, 0);
+}
+} // namespace detail
+
+namespace {
+// Normally static initialization is a bad idea but MurmurHash is pure arithmetic, so this is ok.
+const uint64_t kUnknownHash = detail::HashForVocab("<unk>", 5);
+// Sadly some LMs have <UNK>.
+const uint64_t kUnknownCapHash = detail::HashForVocab("<UNK>", 5);
+
+// TODO: replace with FilePiece.
+void ReadWords(int fd, EnumerateVocab *enumerate, WordIndex expected_count, uint64_t offset) {
+ util::SeekOrThrow(fd, offset);
+ // Check that we're at the right place by reading <unk> which is always first.
+ char check_unk[6];
+ util::ReadOrThrow(fd, check_unk, 6);
+ UTIL_THROW_IF(
+ memcmp(check_unk, "<unk>", 6),
+ FormatLoadException,
+ "Vocabulary words are in the wrong place. This could be because the binary file was built with stale gcc and old kenlm. Stale gcc, including the gcc distributed with RedHat and OS X, has a bug that ignores pragma pack for template-dependent types. New kenlm works around this, so you'll save memory but have to rebuild any binary files using the probing data structure.");
+ if (!enumerate) return;
+ enumerate->Add(0, "<unk>");
+
+ // Read all the words after unk.
+ const std::size_t kInitialRead = 16384;
+ std::string buf;
+ buf.reserve(kInitialRead + 100);
+ buf.resize(kInitialRead);
+ WordIndex index = 1; // Read <unk> already.
+ while (true) {
+ std::size_t got = util::ReadOrEOF(fd, &buf[0], kInitialRead);
+ if (got == 0) break;
+ buf.resize(got);
+ while (buf[buf.size() - 1]) {
+ char next_char;
+ util::ReadOrThrow(fd, &next_char, 1);
+ buf.push_back(next_char);
+ }
+ // Ok now we have null terminated strings.
+ for (const char *i = buf.data(); i != buf.data() + buf.size();) {
+ std::size_t length = strlen(i);
+ enumerate->Add(index++, StringPiece(i, length));
+ i += length + 1 /* null byte */;
+ }
+ }
+
+ UTIL_THROW_IF(expected_count != index, FormatLoadException, "The binary file has the wrong number of words at the end. This could be caused by a truncated binary file.");
+}
+
+// Constructor ordering madness.
+int SeekAndReturn(int fd, uint64_t start) {
+ util::SeekOrThrow(fd, start);
+ return fd;
+}
+} // namespace
+
+ImmediateWriteWordsWrapper::ImmediateWriteWordsWrapper(EnumerateVocab *inner, int fd, uint64_t start)
+ : inner_(inner), stream_(SeekAndReturn(fd, start)) {}
+
+WriteWordsWrapper::WriteWordsWrapper(EnumerateVocab *inner) : inner_(inner) {}
+
+void WriteWordsWrapper::Add(WordIndex index, const StringPiece &str) {
+ if (inner_) inner_->Add(index, str);
+ buffer_.append(str.data(), str.size());
+ buffer_.push_back(0);
+}
+
+void WriteWordsWrapper::Write(int fd, uint64_t start) {
+ util::SeekOrThrow(fd, start);
+ util::WriteOrThrow(fd, buffer_.data(), buffer_.size());
+ // Free memory from the string.
+ std::string for_swap;
+ std::swap(buffer_, for_swap);
+}
+
+SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {}
+
+uint64_t SortedVocabulary::Size(uint64_t entries, const Config &/*config*/) {
+ // Lead with the number of entries.
+ return sizeof(uint64_t) + sizeof(uint64_t) * entries;
+}
+
+void SortedVocabulary::SetupMemory(void *start, std::size_t allocated, std::size_t entries, const Config &config) {
+ assert(allocated >= Size(entries, config));
+ // Leave space for number of entries.
+ begin_ = reinterpret_cast<uint64_t*>(start) + 1;
+ end_ = begin_;
+ saw_unk_ = false;
+}
+
+void SortedVocabulary::Relocate(void *new_start) {
+ std::size_t delta = end_ - begin_;
+ begin_ = reinterpret_cast<uint64_t*>(new_start) + 1;
+ end_ = begin_ + delta;
+}
+
+void SortedVocabulary::ConfigureEnumerate(EnumerateVocab *to, std::size_t max_entries) {
+ enumerate_ = to;
+ if (enumerate_) {
+ enumerate_->Add(0, "<unk>");
+ strings_to_enumerate_.resize(max_entries);
+ }
+}
+
+WordIndex SortedVocabulary::Insert(const StringPiece &str) {
+ uint64_t hashed = detail::HashForVocab(str);
+ if (hashed == kUnknownHash || hashed == kUnknownCapHash) {
+ saw_unk_ = true;
+ return 0;
+ }
+ *end_ = hashed;
+ if (enumerate_) {
+ void *copied = string_backing_.Allocate(str.size());
+ memcpy(copied, str.data(), str.size());
+ strings_to_enumerate_[end_ - begin_] = StringPiece(static_cast<const char*>(copied), str.size());
+ }
+ ++end_;
+ // This is 1 + the offset where it was inserted to make room for unk.
+ return end_ - begin_;
+}
+
+void SortedVocabulary::FinishedLoading(ProbBackoff *reorder) {
+ GenericFinished(reorder);
+}
+
+namespace {
+#pragma pack(push)
+#pragma pack(4)
+struct RenumberEntry {
+ uint64_t hash;
+ const char *str;
+ WordIndex old;
+ bool operator<(const RenumberEntry &other) const {
+ return hash < other.hash;
+ }
+};
+#pragma pack(pop)
+} // namespace
+
+void SortedVocabulary::ComputeRenumbering(WordIndex types, int from_words, int to_words, std::vector<WordIndex> &mapping) {
+ mapping.clear();
+ uint64_t file_size = util::SizeOrThrow(from_words);
+ util::scoped_memory strings;
+ util::MapRead(util::POPULATE_OR_READ, from_words, 0, file_size, strings);
+ const char *const start = static_cast<const char*>(strings.get());
+ UTIL_THROW_IF(memcmp(start, "<unk>", 6), FormatLoadException, "Vocab file does not begin with <unk> followed by null");
+ std::vector<RenumberEntry> entries;
+ entries.reserve(types - 1);
+ RenumberEntry entry;
+ entry.old = 1;
+ for (entry.str = start + 6 /* skip <unk>\0 */; entry.str < start + file_size; ++entry.old) {
+ StringPiece str(entry.str, strlen(entry.str));
+ entry.hash = detail::HashForVocab(str);
+ entries.push_back(entry);
+ entry.str += str.size() + 1;
+ }
+ UTIL_THROW_IF2(entries.size() != types - 1, "Wrong number of vocab ids. Got " << (entries.size() + 1) << " expected " << types);
+ std::sort(entries.begin(), entries.end());
+ // Write out new vocab file.
+ {
+ util::FileStream out(to_words);
+ out << "<unk>" << '\0';
+ for (std::vector<RenumberEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
+ out << i->str << '\0';
+ }
+ }
+ strings.reset();
+
+ mapping.resize(types);
+ mapping[0] = 0; // <unk>
+ for (std::vector<RenumberEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i) {
+ mapping[i->old] = i + 1 - entries.begin();
+ }
+}
+
+void SortedVocabulary::Populated() {
+ saw_unk_ = true;
+ SetSpecial(Index("<s>"), Index("</s>"), 0);
+ bound_ = end_ - begin_ + 1;
+ *(reinterpret_cast<uint64_t*>(begin_) - 1) = end_ - begin_;
+}
+
+void SortedVocabulary::LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset) {
+ end_ = begin_ + *(reinterpret_cast<const uint64_t*>(begin_) - 1);
+ SetSpecial(Index("<s>"), Index("</s>"), 0);
+ bound_ = end_ - begin_ + 1;
+ if (have_words) ReadWords(fd, to, bound_, offset);
+}
+
+template <class T> void SortedVocabulary::GenericFinished(T *reorder) {
+ if (enumerate_) {
+ if (!strings_to_enumerate_.empty()) {
+ util::PairedIterator<T*, StringPiece*> values(reorder + 1, &*strings_to_enumerate_.begin());
+ util::JointSort(begin_, end_, values);
+ }
+ for (WordIndex i = 0; i < static_cast<WordIndex>(end_ - begin_); ++i) {
+ // <unk> strikes again: +1 here.
+ enumerate_->Add(i + 1, strings_to_enumerate_[i]);
+ }
+ strings_to_enumerate_.clear();
+ string_backing_.FreeAll();
+ } else {
+ util::JointSort(begin_, end_, reorder + 1);
+ }
+ SetSpecial(Index("<s>"), Index("</s>"), 0);
+ // Save size. Excludes UNK.
+ *(reinterpret_cast<uint64_t*>(begin_) - 1) = end_ - begin_;
+ // Includes UNK.
+ bound_ = end_ - begin_ + 1;
+}
+
+namespace {
+const unsigned int kProbingVocabularyVersion = 0;
+} // namespace
+
+namespace detail {
+struct ProbingVocabularyHeader {
+ // Lowest unused vocab id. This is also the number of words, including <unk>.
+ unsigned int version;
+ WordIndex bound;
+};
+} // namespace detail
+
+ProbingVocabulary::ProbingVocabulary() : enumerate_(NULL) {}
+
+uint64_t ProbingVocabulary::Size(uint64_t entries, float probing_multiplier) {
+ return ALIGN8(sizeof(detail::ProbingVocabularyHeader)) + Lookup::Size(entries, probing_multiplier);
+}
+
+uint64_t ProbingVocabulary::Size(uint64_t entries, const Config &config) {
+ return Size(entries, config.probing_multiplier);
+}
+
+void ProbingVocabulary::SetupMemory(void *start, std::size_t allocated) {
+ header_ = static_cast<detail::ProbingVocabularyHeader*>(start);
+ lookup_ = Lookup(static_cast<uint8_t*>(start) + ALIGN8(sizeof(detail::ProbingVocabularyHeader)), allocated);
+ bound_ = 1;
+ saw_unk_ = false;
+}
+
+void ProbingVocabulary::Relocate(void *new_start) {
+ header_ = static_cast<detail::ProbingVocabularyHeader*>(new_start);
+ lookup_.Relocate(static_cast<uint8_t*>(new_start) + ALIGN8(sizeof(detail::ProbingVocabularyHeader)));
+}
+
+void ProbingVocabulary::ConfigureEnumerate(EnumerateVocab *to, std::size_t /*max_entries*/) {
+ enumerate_ = to;
+ if (enumerate_) {
+ enumerate_->Add(0, "<unk>");
+ }
+}
+
+WordIndex ProbingVocabulary::Insert(const StringPiece &str) {
+ uint64_t hashed = detail::HashForVocab(str);
+ // Prevent unknown from going into the table.
+ if (hashed == kUnknownHash || hashed == kUnknownCapHash) {
+ saw_unk_ = true;
+ return 0;
+ } else {
+ if (enumerate_) enumerate_->Add(bound_, str);
+ lookup_.Insert(ProbingVocabularyEntry::Make(hashed, bound_));
+ return bound_++;
+ }
+}
+
+void ProbingVocabulary::InternalFinishedLoading() {
+ lookup_.FinishedInserting();
+ header_->bound = bound_;
+ header_->version = kProbingVocabularyVersion;
+ SetSpecial(Index("<s>"), Index("</s>"), 0);
+}
+
+void ProbingVocabulary::LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset) {
+ UTIL_THROW_IF(header_->version != kProbingVocabularyVersion, FormatLoadException, "The binary file has probing version " << header_->version << " but the code expects version " << kProbingVocabularyVersion << ". Please rerun build_binary using the same version of the code.");
+ bound_ = header_->bound;
+ SetSpecial(Index("<s>"), Index("</s>"), 0);
+ if (have_words) ReadWords(fd, to, bound_, offset);
+}
+
+void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {
+ switch(config.unknown_missing) {
+ case SILENT:
+ return;
+ case COMPLAIN:
+ if (config.messages) *config.messages << "The ARPA file is missing <unk>. Substituting log10 probability " << config.unknown_missing_logprob << "." << std::endl;
+ break;
+ case THROW_UP:
+ UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing <unk> and the model is configured to throw an exception.");
+ }
+}
+
+void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {
+ switch (config.sentence_marker_missing) {
+ case SILENT:
+ return;
+ case COMPLAIN:
+ if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as <unk>.";
+ break;
+ case THROW_UP:
+ UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check.");
+ }
+}
+
+} // namespace ngram
+} // namespace lm
diff --git a/src/kenlm/lm/vocab.hh b/src/kenlm/lm/vocab.hh
new file mode 100644
index 0000000..59740e8
--- /dev/null
+++ b/src/kenlm/lm/vocab.hh
@@ -0,0 +1,279 @@
+#ifndef LM_VOCAB_H
+#define LM_VOCAB_H
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/lm_exception.hh"
+#include "lm/virtual_interface.hh"
+#include "util/file_stream.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/probing_hash_table.hh"
+#include "util/sorted_uniform.hh"
+#include "util/string_piece.hh"
+
+#include <limits>
+#include <string>
+#include <vector>
+
+namespace lm {
+struct ProbBackoff;
+class EnumerateVocab;
+
+namespace ngram {
+struct Config;
+
+namespace detail {
+uint64_t HashForVocab(const char *str, std::size_t len);
+inline uint64_t HashForVocab(const StringPiece &str) {
+ return HashForVocab(str.data(), str.length());
+}
+struct ProbingVocabularyHeader;
+} // namespace detail
+
+// Writes words immediately to a file instead of buffering, because we know
+// where in the file to put them.
+class ImmediateWriteWordsWrapper : public EnumerateVocab {
+ public:
+ ImmediateWriteWordsWrapper(EnumerateVocab *inner, int fd, uint64_t start);
+
+ void Add(WordIndex index, const StringPiece &str) {
+ stream_ << str << '\0';
+ if (inner_) inner_->Add(index, str);
+ }
+
+ private:
+ EnumerateVocab *inner_;
+
+ util::FileStream stream_;
+};
+
+// When the binary size isn't known yet.
+class WriteWordsWrapper : public EnumerateVocab {
+ public:
+ WriteWordsWrapper(EnumerateVocab *inner);
+
+ void Add(WordIndex index, const StringPiece &str);
+
+ const std::string &Buffer() const { return buffer_; }
+ void Write(int fd, uint64_t start);
+
+ private:
+ EnumerateVocab *inner_;
+
+ std::string buffer_;
+};
+
+// Vocabulary based on sorted uniform find storing only uint64_t values and using their offsets as indices.
+class SortedVocabulary : public base::Vocabulary {
+ public:
+ SortedVocabulary();
+
+ WordIndex Index(const StringPiece &str) const {
+ const uint64_t *found;
+ if (util::BoundedSortedUniformFind<const uint64_t*, util::IdentityAccessor<uint64_t>, util::Pivot64>(
+ util::IdentityAccessor<uint64_t>(),
+ begin_ - 1, 0,
+ end_, std::numeric_limits<uint64_t>::max(),
+ detail::HashForVocab(str), found)) {
+ return found - begin_ + 1; // +1 because <unk> is 0 and does not appear in the lookup table.
+ } else {
+ return 0;
+ }
+ }
+
+ // Size for purposes of file writing
+ static uint64_t Size(uint64_t entries, const Config &config);
+
+ /* Read null-delimited words from file from_words, renumber according to
+ * hash order, write null-delimited words to to_words, and create a mapping
+ * from old id to new id. The 0th vocab word must be <unk>.
+ */
+ static void ComputeRenumbering(WordIndex types, int from_words, int to_words, std::vector<WordIndex> &mapping);
+
+ // Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary.
+ WordIndex Bound() const { return bound_; }
+
+ // Everything else is for populating. I'm too lazy to hide and friend these, but you'll only get a const reference anyway.
+ void SetupMemory(void *start, std::size_t allocated, std::size_t entries, const Config &config);
+
+ void Relocate(void *new_start);
+
+ void ConfigureEnumerate(EnumerateVocab *to, std::size_t max_entries);
+
+ // Insert and FinishedLoading go together.
+ WordIndex Insert(const StringPiece &str);
+ // Reorders reorder_vocab so that the IDs are sorted.
+ void FinishedLoading(ProbBackoff *reorder_vocab);
+
+ // Trie stores the correct counts including <unk> in the header. If this was previously sized based on a count exluding <unk>, padding with 8 bytes will make it the correct size based on a count including <unk>.
+ std::size_t UnkCountChangePadding() const { return SawUnk() ? 0 : sizeof(uint64_t); }
+
+ bool SawUnk() const { return saw_unk_; }
+
+ void LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset);
+
+ uint64_t *&EndHack() { return end_; }
+
+ void Populated();
+
+ private:
+ template <class T> void GenericFinished(T *reorder);
+
+ uint64_t *begin_, *end_;
+
+ WordIndex bound_;
+
+ bool saw_unk_;
+
+ EnumerateVocab *enumerate_;
+
+ // Actual strings. Used only when loading from ARPA and enumerate_ != NULL
+ util::Pool string_backing_;
+
+ std::vector<StringPiece> strings_to_enumerate_;
+};
+
+#pragma pack(push)
+#pragma pack(4)
+struct ProbingVocabularyEntry {
+ uint64_t key;
+ WordIndex value;
+
+ typedef uint64_t Key;
+ uint64_t GetKey() const { return key; }
+ void SetKey(uint64_t to) { key = to; }
+
+ static ProbingVocabularyEntry Make(uint64_t key, WordIndex value) {
+ ProbingVocabularyEntry ret;
+ ret.key = key;
+ ret.value = value;
+ return ret;
+ }
+};
+#pragma pack(pop)
+
+// Vocabulary storing a map from uint64_t to WordIndex.
+class ProbingVocabulary : public base::Vocabulary {
+ public:
+ ProbingVocabulary();
+
+ WordIndex Index(const StringPiece &str) const {
+ Lookup::ConstIterator i;
+ return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0;
+ }
+
+ static uint64_t Size(uint64_t entries, float probing_multiplier);
+ // This just unwraps Config to get the probing_multiplier.
+ static uint64_t Size(uint64_t entries, const Config &config);
+
+ // Vocab words are [0, Bound()).
+ WordIndex Bound() const { return bound_; }
+
+ // Everything else is for populating. I'm too lazy to hide and friend these, but you'll only get a const reference anyway.
+ void SetupMemory(void *start, std::size_t allocated);
+ void SetupMemory(void *start, std::size_t allocated, std::size_t /*entries*/, const Config &/*config*/) {
+ SetupMemory(start, allocated);
+ }
+
+ void Relocate(void *new_start);
+
+ void ConfigureEnumerate(EnumerateVocab *to, std::size_t max_entries);
+
+ WordIndex Insert(const StringPiece &str);
+
+ template <class Weights> void FinishedLoading(Weights * /*reorder_vocab*/) {
+ InternalFinishedLoading();
+ }
+
+ std::size_t UnkCountChangePadding() const { return 0; }
+
+ bool SawUnk() const { return saw_unk_; }
+
+ void LoadedBinary(bool have_words, int fd, EnumerateVocab *to, uint64_t offset);
+
+ private:
+ void InternalFinishedLoading();
+
+ typedef util::ProbingHashTable<ProbingVocabularyEntry, util::IdentityHash> Lookup;
+
+ Lookup lookup_;
+
+ WordIndex bound_;
+
+ bool saw_unk_;
+
+ EnumerateVocab *enumerate_;
+
+ detail::ProbingVocabularyHeader *header_;
+};
+
+void MissingUnknown(const Config &config) throw(SpecialWordMissingException);
+void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);
+
+template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {
+ if (!vocab.SawUnk()) MissingUnknown(config);
+ if (vocab.BeginSentence() == vocab.NotFound()) MissingSentenceMarker(config, "<s>");
+ if (vocab.EndSentence() == vocab.NotFound()) MissingSentenceMarker(config, "</s>");
+}
+
+class WriteUniqueWords {
+ public:
+ explicit WriteUniqueWords(int fd) : word_list_(fd) {}
+
+ void operator()(const StringPiece &word) {
+ word_list_ << word << '\0';
+ }
+
+ private:
+ util::FileStream word_list_;
+};
+
+class NoOpUniqueWords {
+ public:
+ NoOpUniqueWords() {}
+ void operator()(const StringPiece &word) {}
+};
+
+template <class NewWordAction = NoOpUniqueWords> class GrowableVocab {
+ public:
+ static std::size_t MemUsage(WordIndex content) {
+ return Lookup::MemUsage(content > 2 ? content : 2);
+ }
+
+ // Does not take ownership of write_wordi
+ template <class NewWordConstruct> GrowableVocab(WordIndex initial_size, const NewWordConstruct &new_word_construct = NewWordAction())
+ : lookup_(initial_size), new_word_(new_word_construct) {
+ FindOrInsert("<unk>"); // Force 0
+ FindOrInsert("<s>"); // Force 1
+ FindOrInsert("</s>"); // Force 2
+ }
+
+ WordIndex Index(const StringPiece &str) const {
+ Lookup::ConstIterator i;
+ return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0;
+ }
+
+ WordIndex FindOrInsert(const StringPiece &word) {
+ ProbingVocabularyEntry entry = ProbingVocabularyEntry::Make(util::MurmurHashNative(word.data(), word.size()), Size());
+ Lookup::MutableIterator it;
+ if (!lookup_.FindOrInsert(entry, it)) {
+ new_word_(word);
+ UTIL_THROW_IF(Size() >= std::numeric_limits<lm::WordIndex>::max(), VocabLoadException, "Too many vocabulary words. Change WordIndex to uint64_t in lm/word_index.hh");
+ }
+ return it->value;
+ }
+
+ WordIndex Size() const { return lookup_.Size(); }
+
+ private:
+ typedef util::AutoProbing<ProbingVocabularyEntry, util::IdentityHash> Lookup;
+
+ Lookup lookup_;
+
+ NewWordAction new_word_;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_VOCAB_H
diff --git a/src/kenlm/lm/weights.hh b/src/kenlm/lm/weights.hh
new file mode 100644
index 0000000..f143127
--- /dev/null
+++ b/src/kenlm/lm/weights.hh
@@ -0,0 +1,22 @@
+#ifndef LM_WEIGHTS_H
+#define LM_WEIGHTS_H
+
+// Weights for n-grams. Probability and possibly a backoff.
+
+namespace lm {
+struct Prob {
+ float prob;
+};
+// No inheritance so this will be a POD.
+struct ProbBackoff {
+ float prob;
+ float backoff;
+};
+struct RestWeights {
+ float prob;
+ float backoff;
+ float rest;
+};
+
+} // namespace lm
+#endif // LM_WEIGHTS_H
diff --git a/src/kenlm/lm/word_index.hh b/src/kenlm/lm/word_index.hh
new file mode 100644
index 0000000..59b24d7
--- /dev/null
+++ b/src/kenlm/lm/word_index.hh
@@ -0,0 +1,15 @@
+// Separate header because this is used often.
+#ifndef LM_WORD_INDEX_H
+#define LM_WORD_INDEX_H
+
+#include <climits>
+
+namespace lm {
+typedef unsigned int WordIndex;
+const WordIndex kMaxWordIndex = UINT_MAX;
+const WordIndex kUNK = 0;
+} // namespace lm
+
+typedef lm::WordIndex LMWordIndex;
+
+#endif
diff --git a/src/kenlm/lm/wrappers/README b/src/kenlm/lm/wrappers/README
new file mode 100644
index 0000000..56c34c2
--- /dev/null
+++ b/src/kenlm/lm/wrappers/README
@@ -0,0 +1,3 @@
+This directory is for wrappers around other people's LMs, presenting an interface similar to KenLM's. You will need to have their LM installed.
+
+NPLM is a work in progress.
diff --git a/src/kenlm/lm/wrappers/nplm.cc b/src/kenlm/lm/wrappers/nplm.cc
new file mode 100644
index 0000000..9bd7c1e
--- /dev/null
+++ b/src/kenlm/lm/wrappers/nplm.cc
@@ -0,0 +1,116 @@
+#include "lm/wrappers/nplm.hh"
+#include "util/exception.hh"
+#include "util/file.hh"
+
+#include <algorithm>
+#include <cstring>
+
+#include "neuralLM.h"
+
+namespace lm {
+namespace np {
+
+Vocabulary::Vocabulary(const nplm::vocabulary &vocab)
+ : base::Vocabulary(vocab.lookup_word("<s>"), vocab.lookup_word("</s>"), vocab.lookup_word("<unk>")),
+ vocab_(vocab), null_word_(vocab.lookup_word("<null>")) {}
+
+Vocabulary::~Vocabulary() {}
+
+WordIndex Vocabulary::Index(const std::string &str) const {
+ return vocab_.lookup_word(str);
+}
+
+class Backend {
+ public:
+ Backend(const nplm::neuralLM &from, const std::size_t cache_size) : lm_(from), ngram_(from.get_order()) {
+ lm_.set_cache(cache_size);
+ }
+
+ nplm::neuralLM &LM() { return lm_; }
+ const nplm::neuralLM &LM() const { return lm_; }
+
+ Eigen::Matrix<int,Eigen::Dynamic,1> &staging_ngram() { return ngram_; }
+
+ double lookup_from_staging() { return lm_.lookup_ngram(ngram_); }
+
+ int order() const { return lm_.get_order(); }
+
+ private:
+ nplm::neuralLM lm_;
+ Eigen::Matrix<int,Eigen::Dynamic,1> ngram_;
+};
+
+bool Model::Recognize(const std::string &name) {
+ try {
+ util::scoped_fd file(util::OpenReadOrThrow(name.c_str()));
+ char magic_check[16];
+ util::ReadOrThrow(file.get(), magic_check, sizeof(magic_check));
+ const char nnlm_magic[] = "\\config\nversion ";
+ return !memcmp(magic_check, nnlm_magic, 16);
+ } catch (const util::Exception &) {
+ return false;
+ }
+}
+
+namespace {
+nplm::neuralLM *LoadNPLM(const std::string &file) {
+ util::scoped_ptr<nplm::neuralLM> ret(new nplm::neuralLM());
+ ret->read(file);
+ return ret.release();
+}
+} // namespace
+
+Model::Model(const std::string &file, std::size_t cache)
+ : base_instance_(LoadNPLM(file)), vocab_(base_instance_->get_vocabulary()), cache_size_(cache) {
+ UTIL_THROW_IF(base_instance_->get_order() > NPLM_MAX_ORDER, util::Exception, "This NPLM has order " << (unsigned int)base_instance_->get_order() << " but the KenLM wrapper was compiled with " << NPLM_MAX_ORDER << ". Change the defintion of NPLM_MAX_ORDER and recompile.");
+ // log10 compatible with backoff models.
+ base_instance_->set_log_base(10.0);
+ State begin_sentence, null_context;
+ std::fill(begin_sentence.words, begin_sentence.words + NPLM_MAX_ORDER - 1, base_instance_->lookup_word("<s>"));
+ null_word_ = base_instance_->lookup_word("<null>");
+ std::fill(null_context.words, null_context.words + NPLM_MAX_ORDER - 1, null_word_);
+
+ Init(begin_sentence, null_context, vocab_, base_instance_->get_order());
+}
+
+Model::~Model() {}
+
+FullScoreReturn Model::FullScore(const State &from, const WordIndex new_word, State &out_state) const {
+ Backend *backend = backend_.get();
+ if (!backend) {
+ backend = new Backend(*base_instance_, cache_size_);
+ backend_.reset(backend);
+ }
+ // State is in natural word order.
+ FullScoreReturn ret;
+ for (int i = 0; i < backend->order() - 1; ++i) {
+ backend->staging_ngram()(i) = from.words[i];
+ }
+ backend->staging_ngram()(backend->order() - 1) = new_word;
+ ret.prob = backend->lookup_from_staging();
+ // Always say full order.
+ ret.ngram_length = backend->order();
+ // Shift everything down by one.
+ memcpy(out_state.words, from.words + 1, sizeof(WordIndex) * (backend->order() - 2));
+ out_state.words[backend->order() - 2] = new_word;
+ // Fill in trailing words with zeros so state comparison works.
+ memset(out_state.words + backend->order() - 1, 0, sizeof(WordIndex) * (NPLM_MAX_ORDER - backend->order()));
+ return ret;
+}
+
+// TODO: optimize with direct call?
+FullScoreReturn Model::FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const {
+ // State is in natural word order. The API here specifies reverse order.
+ std::size_t state_length = std::min<std::size_t>(Order() - 1, context_rend - context_rbegin);
+ State state;
+ // Pad with null words.
+ for (lm::WordIndex *i = state.words; i < state.words + Order() - 1 - state_length; ++i) {
+ *i = null_word_;
+ }
+ // Put new words at the end.
+ std::reverse_copy(context_rbegin, context_rbegin + state_length, state.words + Order() - 1 - state_length);
+ return FullScore(state, new_word, out_state);
+}
+
+} // namespace np
+} // namespace lm
diff --git a/src/kenlm/lm/wrappers/nplm.hh b/src/kenlm/lm/wrappers/nplm.hh
new file mode 100644
index 0000000..82b38fd
--- /dev/null
+++ b/src/kenlm/lm/wrappers/nplm.hh
@@ -0,0 +1,85 @@
+#ifndef LM_WRAPPERS_NPLM_H
+#define LM_WRAPPERS_NPLM_H
+
+#include "lm/facade.hh"
+#include "lm/max_order.hh"
+#include "util/string_piece.hh"
+
+#include <boost/thread/tss.hpp>
+#include <boost/scoped_ptr.hpp>
+
+/* Wrapper to NPLM "by Ashish Vaswani, with contributions from David Chiang
+ * and Victoria Fossum."
+ * http://nlg.isi.edu/software/nplm/
+ */
+
+namespace nplm {
+class vocabulary;
+class neuralLM;
+} // namespace nplm
+
+namespace lm {
+namespace np {
+
+class Vocabulary : public base::Vocabulary {
+ public:
+ Vocabulary(const nplm::vocabulary &vocab);
+
+ ~Vocabulary();
+
+ WordIndex Index(const std::string &str) const;
+
+ // TODO: lobby them to support StringPiece
+ WordIndex Index(const StringPiece &str) const {
+ return Index(std::string(str.data(), str.size()));
+ }
+
+ lm::WordIndex NullWord() const { return null_word_; }
+
+ private:
+ const nplm::vocabulary &vocab_;
+
+ const lm::WordIndex null_word_;
+};
+
+// Sorry for imposing my limitations on your code.
+#define NPLM_MAX_ORDER 7
+
+struct State {
+ WordIndex words[NPLM_MAX_ORDER - 1];
+};
+
+class Backend;
+
+class Model : public lm::base::ModelFacade<Model, State, Vocabulary> {
+ private:
+ typedef lm::base::ModelFacade<Model, State, Vocabulary> P;
+
+ public:
+ // Does this look like an NPLM?
+ static bool Recognize(const std::string &file);
+
+ explicit Model(const std::string &file, std::size_t cache_size = 1 << 20);
+
+ ~Model();
+
+ FullScoreReturn FullScore(const State &from, const WordIndex new_word, State &out_state) const;
+
+ FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
+
+ private:
+ boost::scoped_ptr<nplm::neuralLM> base_instance_;
+
+ mutable boost::thread_specific_ptr<Backend> backend_;
+
+ Vocabulary vocab_;
+
+ lm::WordIndex null_word_;
+
+ const std::size_t cache_size_;
+};
+
+} // namespace np
+} // namespace lm
+
+#endif // LM_WRAPPERS_NPLM_H
diff --git a/src/kenlm/python/_kenlm.pxd b/src/kenlm/python/_kenlm.pxd
new file mode 100644
index 0000000..e0c0248
--- /dev/null
+++ b/src/kenlm/python/_kenlm.pxd
@@ -0,0 +1,33 @@
+cdef extern from "lm/word_index.hh" namespace "lm":
+ ctypedef unsigned WordIndex
+
+cdef extern from "lm/return.hh" namespace "lm":
+ cdef struct FullScoreReturn:
+ float prob
+ unsigned char ngram_length
+
+cdef extern from "lm/state.hh" namespace "lm::ngram":
+ cdef cppclass State :
+ int Compare(const State &other) const
+
+ int hash_value(const State &state)
+
+cdef extern from "lm/virtual_interface.hh" namespace "lm::base":
+ cdef cppclass Vocabulary:
+ WordIndex Index(char*)
+ WordIndex BeginSentence()
+ WordIndex EndSentence()
+ WordIndex NotFound()
+
+ ctypedef Vocabulary const_Vocabulary "const lm::base::Vocabulary"
+
+ cdef cppclass Model:
+ void BeginSentenceWrite(void *)
+ void NullContextWrite(void *)
+ unsigned int Order()
+ const_Vocabulary& BaseVocabulary()
+ float BaseScore(void *in_state, WordIndex new_word, void *out_state)
+ FullScoreReturn BaseFullScore(void *in_state, WordIndex new_word, void *out_state)
+
+cdef extern from "lm/model.hh" namespace "lm::ngram":
+ cdef Model *LoadVirtual(char *) except +
diff --git a/src/kenlm/python/example.py b/src/kenlm/python/example.py
new file mode 100644
index 0000000..8a18f3a
--- /dev/null
+++ b/src/kenlm/python/example.py
@@ -0,0 +1,28 @@
+import os
+import kenlm
+
+LM = os.path.join(os.path.dirname(__file__), '..', 'lm', 'test.arpa')
+model = kenlm.LanguageModel(LM)
+print('{0}-gram model'.format(model.order))
+
+sentence = 'language modeling is fun .'
+print(sentence)
+print(model.score(sentence))
+
+# Check that total full score = direct score
+def score(s):
+ return sum(prob for prob, _, _ in model.full_scores(s))
+
+assert (abs(score(sentence) - model.score(sentence)) < 1e-3)
+
+# Show scores and n-gram matches
+words = ['<s>'] + sentence.split() + ['</s>']
+for i, (prob, length, oov) in enumerate(model.full_scores(sentence)):
+ print('{0} {1}: {2}'.format(prob, length, ' '.join(words[i+2-length:i+2])))
+ if oov:
+ print('\t"{0}" is an OOV'.format(words[i+1]))
+
+# Find out-of-vocabulary words
+for w in words:
+ if not w in model:
+ print('"{0}" is an OOV'.format(w))
diff --git a/src/kenlm/python/kenlm.cpp b/src/kenlm/python/kenlm.cpp
new file mode 100644
index 0000000..5bf47ff
--- /dev/null
+++ b/src/kenlm/python/kenlm.cpp
@@ -0,0 +1,7379 @@
+/* Generated by Cython 0.22 */
+
+#define PY_SSIZE_T_CLEAN
+#ifndef CYTHON_USE_PYLONG_INTERNALS
+#ifdef PYLONG_BITS_IN_DIGIT
+#define CYTHON_USE_PYLONG_INTERNALS 0
+#else
+#include "pyconfig.h"
+#ifdef PYLONG_BITS_IN_DIGIT
+#define CYTHON_USE_PYLONG_INTERNALS 1
+#else
+#define CYTHON_USE_PYLONG_INTERNALS 0
+#endif
+#endif
+#endif
+#include "Python.h"
+#ifndef Py_PYTHON_H
+ #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000)
+ #error Cython requires Python 2.6+ or Python 3.2+.
+#else
+#define CYTHON_ABI "0_22"
+#include <stddef.h>
+#ifndef offsetof
+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+ #ifndef __stdcall
+ #define __stdcall
+ #endif
+ #ifndef __cdecl
+ #define __cdecl
+ #endif
+ #ifndef __fastcall
+ #define __fastcall
+ #endif
+#endif
+#ifndef DL_IMPORT
+ #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+ #define DL_EXPORT(t) t
+#endif
+#ifndef PY_LONG_LONG
+ #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+ #define Py_HUGE_VAL HUGE_VAL
+#endif
+#ifdef PYPY_VERSION
+#define CYTHON_COMPILING_IN_PYPY 1
+#define CYTHON_COMPILING_IN_CPYTHON 0
+#else
+#define CYTHON_COMPILING_IN_PYPY 0
+#define CYTHON_COMPILING_IN_CPYTHON 1
+#endif
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
+#define Py_OptimizeFlag 0
+#endif
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+ #define __Pyx_DefaultClassType PyClass_Type
+#else
+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+ #define __Pyx_DefaultClassType PyType_Type
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define Py_TPFLAGS_CHECKTYPES 0
+ #define Py_TPFLAGS_HAVE_INDEX 0
+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE)
+ #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+ #define CYTHON_PEP393_ENABLED 1
+ #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \
+ 0 : _PyUnicode_Ready((PyObject *)(op)))
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+ #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
+ #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
+ #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
+#else
+ #define CYTHON_PEP393_ENABLED 0
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+ #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
+ #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
+ #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
+ #define __Pyx_PyFrozenSet_Size(s) PyObject_Size(s)
+#else
+ #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \
+ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+ #define __Pyx_PyFrozenSet_Size(s) PySet_Size(s)
+#endif
+#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
+#else
+ #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBaseString_Type PyUnicode_Type
+ #define PyStringObject PyUnicodeObject
+ #define PyString_Type PyUnicode_Type
+ #define PyString_Check PyUnicode_Check
+ #define PyString_CheckExact PyUnicode_CheckExact
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+ #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+ #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+ #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#ifndef PySet_CheckExact
+ #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
+#endif
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#if PY_MAJOR_VERSION >= 3
+ #define PyIntObject PyLongObject
+ #define PyInt_Type PyLong_Type
+ #define PyInt_Check(op) PyLong_Check(op)
+ #define PyInt_CheckExact(op) PyLong_CheckExact(op)
+ #define PyInt_FromString PyLong_FromString
+ #define PyInt_FromUnicode PyLong_FromUnicode
+ #define PyInt_FromLong PyLong_FromLong
+ #define PyInt_FromSize_t PyLong_FromSize_t
+ #define PyInt_FromSsize_t PyLong_FromSsize_t
+ #define PyInt_AsLong PyLong_AsLong
+ #define PyInt_AS_LONG PyLong_AS_LONG
+ #define PyInt_AsSsize_t PyLong_AsSsize_t
+ #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
+ #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+ #define PyNumber_Int PyNumber_Long
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBoolObject PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+ #ifndef PyUnicode_InternFromString
+ #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+ #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+ typedef long Py_hash_t;
+ #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+ #define __Pyx_PyInt_AsHash_t PyInt_AsLong
+#else
+ #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+ #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
+#else
+ #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
+#endif
+#ifndef CYTHON_INLINE
+ #if defined(__GNUC__)
+ #define CYTHON_INLINE __inline__
+ #elif defined(_MSC_VER)
+ #define CYTHON_INLINE __inline
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_INLINE inline
+ #else
+ #define CYTHON_INLINE
+ #endif
+#endif
+#ifndef CYTHON_RESTRICT
+ #if defined(__GNUC__)
+ #define CYTHON_RESTRICT __restrict__
+ #elif defined(_MSC_VER) && _MSC_VER >= 1400
+ #define CYTHON_RESTRICT __restrict
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_RESTRICT restrict
+ #else
+ #define CYTHON_RESTRICT
+ #endif
+#endif
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+ /* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and
+ a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is
+ a quiet NaN. */
+ float value;
+ memset(&value, 0xFF, sizeof(value));
+ return value;
+}
+#endif
+#define __Pyx_void_to_None(void_result) (void_result, Py_INCREF(Py_None), Py_None)
+#ifdef __cplusplus
+template<typename T>
+void __Pyx_call_destructor(T* x) {
+ x->~T();
+}
+template<typename T>
+class __Pyx_FakeReference {
+ public:
+ __Pyx_FakeReference() : ptr(NULL) { }
+ __Pyx_FakeReference(T& ref) : ptr(&ref) { }
+ T *operator->() { return ptr; }
+ operator T&() { return *ptr; }
+ private:
+ T *ptr;
+};
+#endif
+
+
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
+#else
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
+#endif
+
+#ifndef __PYX_EXTERN_C
+ #ifdef __cplusplus
+ #define __PYX_EXTERN_C extern "C"
+ #else
+ #define __PYX_EXTERN_C extern
+ #endif
+#endif
+
+#if defined(WIN32) || defined(MS_WINDOWS)
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+#define __PYX_HAVE__kenlm
+#define __PYX_HAVE_API__kenlm
+#include "lm/word_index.hh"
+#include "lm/return.hh"
+#include "lm/state.hh"
+#include "lm/virtual_interface.hh"
+#include "lm/model.hh"
+#include "ios"
+#include "new"
+#include "stdexcept"
+#include "typeinfo"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#ifdef PYREX_WITHOUT_ASSERTIONS
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+#endif
+typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
+ const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
+ (sizeof(type) < sizeof(Py_ssize_t)) || \
+ (sizeof(type) > sizeof(Py_ssize_t) && \
+ likely(v < (type)PY_SSIZE_T_MAX || \
+ v == (type)PY_SSIZE_T_MAX) && \
+ (!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \
+ v == (type)PY_SSIZE_T_MIN))) || \
+ (sizeof(type) == sizeof(Py_ssize_t) && \
+ (is_signed || likely(v < (type)PY_SSIZE_T_MAX || \
+ v == (type)PY_SSIZE_T_MAX))) )
+static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+ #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+#if PY_MAJOR_VERSION < 3
+static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
+{
+ const Py_UNICODE *u_end = u;
+ while (*u_end++) ;
+ return (size_t)(u_end - u - 1);
+}
+#else
+#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
+#endif
+#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
+#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
+#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ PyObject* ascii_chars_u = NULL;
+ PyObject* ascii_chars_b = NULL;
+ const char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ if (strcmp(default_encoding_c, "ascii") == 0) {
+ __Pyx_sys_getdefaultencoding_not_ascii = 0;
+ } else {
+ char ascii_chars[128];
+ int c;
+ for (c = 0; c < 128; c++) {
+ ascii_chars[c] = c;
+ }
+ __Pyx_sys_getdefaultencoding_not_ascii = 1;
+ ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+ if (!ascii_chars_u) goto bad;
+ ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+ if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+ PyErr_Format(
+ PyExc_ValueError,
+ "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+ default_encoding_c);
+ goto bad;
+ }
+ Py_DECREF(ascii_chars_u);
+ Py_DECREF(ascii_chars_b);
+ }
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ Py_XDECREF(ascii_chars_u);
+ Py_XDECREF(ascii_chars_b);
+ return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
+ if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+ strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+ #define likely(x) __builtin_expect(!!(x), 1)
+ #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+ #define likely(x) (x)
+ #define unlikely(x) (x)
+#endif /* __GNUC__ */
+
+static PyObject *__pyx_m;
+static PyObject *__pyx_d;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+
+static const char *__pyx_f[] = {
+ "kenlm.pyx",
+};
+
+/*--- Type declarations ---*/
+struct __pyx_obj_5kenlm_FullScoreReturn;
+struct __pyx_obj_5kenlm_State;
+struct __pyx_obj_5kenlm_LanguageModel;
+struct __pyx_obj_5kenlm_Model;
+struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores;
+
+/* "kenlm.pyx":11
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ *
+ * cdef class FullScoreReturn: # <<<<<<<<<<<<<<
+ * """
+ * Wrapper around FullScoreReturn.
+ */
+struct __pyx_obj_5kenlm_FullScoreReturn {
+ PyObject_HEAD
+ float log_prob;
+ int ngram_length;
+ int oov;
+};
+
+
+/* "kenlm.pyx":44
+ * return self.oov
+ *
+ * cdef class State: # <<<<<<<<<<<<<<
+ * """
+ * Wrapper around lm::ngram::State so that python code can make incremental queries.
+ */
+struct __pyx_obj_5kenlm_State {
+ PyObject_HEAD
+ lm::ngram::State _c_state;
+};
+
+
+/* "kenlm.pyx":74
+ *
+ *
+ * cdef class LanguageModel: # <<<<<<<<<<<<<<
+ * """
+ * This is not a strict wrapper, the interface is more pythonic.
+ */
+struct __pyx_obj_5kenlm_LanguageModel {
+ PyObject_HEAD
+ lm::base::Model *model;
+ PyObject *path;
+ const lm::base::Vocabulary *vocab;
+};
+
+
+/* "kenlm.pyx":159
+ * return (_kenlm.LanguageModel, (self.path,))
+ *
+ * cdef class Model: # <<<<<<<<<<<<<<
+ * """
+ * This is closer to a wrapper around lm::ngram::Model.
+ */
+struct __pyx_obj_5kenlm_Model {
+ PyObject_HEAD
+ lm::base::Model *model;
+ PyObject *path;
+ const lm::base::Vocabulary *vocab;
+};
+
+
+/* "kenlm.pyx":122
+ * return total
+ *
+ * def full_scores(self, sentence, bos = True, eos = True): # <<<<<<<<<<<<<<
+ * """
+ * full_scores(sentence, bos = True, eos = Ture) -> generate full scores (prob, ngram length, oov)
+ */
+struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores {
+ PyObject_HEAD
+ PyObject *__pyx_v_bos;
+ PyObject *__pyx_v_eos;
+ lm::ngram::State __pyx_v_out_state;
+ struct lm::FullScoreReturn __pyx_v_ret;
+ struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self;
+ PyObject *__pyx_v_sentence;
+ lm::ngram::State __pyx_v_state;
+ float __pyx_v_total;
+ lm::WordIndex __pyx_v_wid;
+ PyObject *__pyx_v_word;
+ PyObject *__pyx_v_words;
+ PyObject *__pyx_t_0;
+ Py_ssize_t __pyx_t_1;
+};
+
+
+/* --- Runtime support code (head) --- */
+#ifndef CYTHON_REFNANNY
+ #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+ typedef struct {
+ void (*INCREF)(void*, PyObject*, int);
+ void (*DECREF)(void*, PyObject*, int);
+ void (*GOTREF)(void*, PyObject*, int);
+ void (*GIVEREF)(void*, PyObject*, int);
+ void* (*SetupContext)(const char*, int, const char*);
+ void (*FinishContext)(void**);
+ } __Pyx_RefNannyAPIStruct;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+ #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+ #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+ if (acquire_gil) { \
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+ PyGILState_Release(__pyx_gilstate_save); \
+ } else { \
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+ }
+#else
+ #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+ #define __Pyx_RefNannyFinishContext() \
+ __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+ #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+ #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+ #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+ #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+ #define __Pyx_RefNannyDeclarations
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)
+ #define __Pyx_RefNannyFinishContext()
+ #define __Pyx_INCREF(r) Py_INCREF(r)
+ #define __Pyx_DECREF(r) Py_DECREF(r)
+ #define __Pyx_GOTREF(r)
+ #define __Pyx_GIVEREF(r)
+ #define __Pyx_XINCREF(r) Py_XINCREF(r)
+ #define __Pyx_XDECREF(r) Py_XDECREF(r)
+ #define __Pyx_XGOTREF(r)
+ #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_XDECREF_SET(r, v) do { \
+ PyObject *tmp = (PyObject *) r; \
+ r = v; __Pyx_XDECREF(tmp); \
+ } while (0)
+#define __Pyx_DECREF_SET(r, v) do { \
+ PyObject *tmp = (PyObject *) r; \
+ r = v; __Pyx_DECREF(tmp); \
+ } while (0)
+#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+ PyTypeObject* tp = Py_TYPE(obj);
+ if (likely(tp->tp_getattro))
+ return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+ if (likely(tp->tp_getattr))
+ return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+ return PyObject_GetAttr(obj, attr_name);
+}
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb);
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+ Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \
+ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \
+ const char* function_name);
+
+static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+ const char *name, int exact);
+
+static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name);
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
+#endif
+
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
+
+static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb);
+static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb);
+
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func);
+#else
+#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL)
+#endif
+
+#include <string.h>
+
+typedef struct {
+ int code_line;
+ PyCodeObject* code_object;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+ int count;
+ int max_count;
+ __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename);
+
+#include <new>
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
+
+#ifndef __Pyx_CppExn2PyErr
+#include <new>
+#include <typeinfo>
+#include <stdexcept>
+#include <ios>
+static void __Pyx_CppExn2PyErr() {
+ try {
+ if (PyErr_Occurred())
+ ; // let the latest Python exn pass through and ignore the current one
+ else
+ throw;
+ } catch (const std::bad_alloc& exn) {
+ PyErr_SetString(PyExc_MemoryError, exn.what());
+ } catch (const std::bad_cast& exn) {
+ PyErr_SetString(PyExc_TypeError, exn.what());
+ } catch (const std::domain_error& exn) {
+ PyErr_SetString(PyExc_ValueError, exn.what());
+ } catch (const std::invalid_argument& exn) {
+ PyErr_SetString(PyExc_ValueError, exn.what());
+ } catch (const std::ios_base::failure& exn) {
+ PyErr_SetString(PyExc_IOError, exn.what());
+ } catch (const std::out_of_range& exn) {
+ PyErr_SetString(PyExc_IndexError, exn.what());
+ } catch (const std::overflow_error& exn) {
+ PyErr_SetString(PyExc_OverflowError, exn.what());
+ } catch (const std::range_error& exn) {
+ PyErr_SetString(PyExc_ArithmeticError, exn.what());
+ } catch (const std::underflow_error& exn) {
+ PyErr_SetString(PyExc_ArithmeticError, exn.what());
+ } catch (const std::exception& exn) {
+ PyErr_SetString(PyExc_RuntimeError, exn.what());
+ }
+ catch (...)
+ {
+ PyErr_SetString(PyExc_RuntimeError, "Unknown exception");
+ }
+}
+#endif
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value);
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_char(unsigned char value);
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);
+
+static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb);
+
+static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg);
+
+#define __Pyx_Generator_USED
+#include <structmember.h>
+#include <frameobject.h>
+typedef PyObject *(*__pyx_generator_body_t)(PyObject *, PyObject *);
+typedef struct {
+ PyObject_HEAD
+ __pyx_generator_body_t body;
+ PyObject *closure;
+ PyObject *exc_type;
+ PyObject *exc_value;
+ PyObject *exc_traceback;
+ PyObject *gi_weakreflist;
+ PyObject *classobj;
+ PyObject *yieldfrom;
+ PyObject *gi_name;
+ PyObject *gi_qualname;
+ int resume_label;
+ char is_running;
+} __pyx_GeneratorObject;
+static __pyx_GeneratorObject *__Pyx_Generator_New(__pyx_generator_body_t body,
+ PyObject *closure, PyObject *name, PyObject *qualname);
+static int __pyx_Generator_init(void);
+static int __Pyx_Generator_clear(PyObject* self);
+#if 1 || PY_VERSION_HEX < 0x030300B0
+static int __Pyx_PyGen_FetchStopIterationValue(PyObject **pvalue);
+#else
+#define __Pyx_PyGen_FetchStopIterationValue(pvalue) PyGen_FetchStopIterationValue(pvalue)
+#endif
+
+static int __Pyx_check_binary_version(void);
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+
+/* Module declarations from '_kenlm' */
+
+/* Module declarations from 'kenlm' */
+static PyTypeObject *__pyx_ptype_5kenlm_FullScoreReturn = 0;
+static PyTypeObject *__pyx_ptype_5kenlm_State = 0;
+static PyTypeObject *__pyx_ptype_5kenlm_LanguageModel = 0;
+static PyTypeObject *__pyx_ptype_5kenlm_Model = 0;
+static PyTypeObject *__pyx_ptype_5kenlm___pyx_scope_struct__full_scores = 0;
+static PyObject *__pyx_f_5kenlm_as_str(PyObject *); /*proto*/
+#define __Pyx_MODULE_NAME "kenlm"
+int __pyx_module_is_main_kenlm = 0;
+
+/* Implementation of 'kenlm' */
+static PyObject *__pyx_builtin_TypeError;
+static PyObject *__pyx_builtin_RuntimeError;
+static PyObject *__pyx_builtin_IOError;
+static int __pyx_pf_5kenlm_15FullScoreReturn___cinit__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self, PyObject *__pyx_v_log_prob, PyObject *__pyx_v_ngram_length, PyObject *__pyx_v_oov); /* proto */
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_2__repr__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_8log_prob___get__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_12ngram_length___get__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_3oov___get__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_5State___richcmp__(struct __pyx_obj_5kenlm_State *__pyx_v_qa, struct __pyx_obj_5kenlm_State *__pyx_v_qb, int __pyx_v_op); /* proto */
+static Py_hash_t __pyx_pf_5kenlm_5State_2__hash__(struct __pyx_obj_5kenlm_State *__pyx_v_self); /* proto */
+static int __pyx_pf_5kenlm_13LanguageModel___init__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_path); /* proto */
+static void __pyx_pf_5kenlm_13LanguageModel_2__dealloc__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_5order___get__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_4score(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_bos, PyObject *__pyx_v_eos); /* proto */
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_6full_scores(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_bos, PyObject *__pyx_v_eos); /* proto */
+static int __pyx_pf_5kenlm_13LanguageModel_9__contains__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_word); /* proto */
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_11__repr__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_13__reduce__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_4path___get__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self); /* proto */
+static int __pyx_pf_5kenlm_13LanguageModel_4path_2__set__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_value); /* proto */
+static int __pyx_pf_5kenlm_13LanguageModel_4path_4__del__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self); /* proto */
+static int __pyx_pf_5kenlm_5Model___init__(struct __pyx_obj_5kenlm_Model *__pyx_v_self, PyObject *__pyx_v_path); /* proto */
+static void __pyx_pf_5kenlm_5Model_2__dealloc__(struct __pyx_obj_5kenlm_Model *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_5order___get__(struct __pyx_obj_5kenlm_Model *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_4BeginSentenceWrite(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_state); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_6NullContextWrite(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_state); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_8BaseScore(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_in_state, PyObject *__pyx_v_word, struct __pyx_obj_5kenlm_State *__pyx_v_out_state); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_10BaseFullScore(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_in_state, PyObject *__pyx_v_word, struct __pyx_obj_5kenlm_State *__pyx_v_out_state); /* proto */
+static int __pyx_pf_5kenlm_5Model_12__contains__(struct __pyx_obj_5kenlm_Model *__pyx_v_self, PyObject *__pyx_v_word); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_14__repr__(struct __pyx_obj_5kenlm_Model *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_16__reduce__(struct __pyx_obj_5kenlm_Model *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5kenlm_5Model_4path___get__(struct __pyx_obj_5kenlm_Model *__pyx_v_self); /* proto */
+static int __pyx_pf_5kenlm_5Model_4path_2__set__(struct __pyx_obj_5kenlm_Model *__pyx_v_self, PyObject *__pyx_v_value); /* proto */
+static int __pyx_pf_5kenlm_5Model_4path_4__del__(struct __pyx_obj_5kenlm_Model *__pyx_v_self); /* proto */
+static PyObject *__pyx_tp_new_5kenlm_FullScoreReturn(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new_5kenlm_State(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new_5kenlm_LanguageModel(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new_5kenlm_Model(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static PyObject *__pyx_tp_new_5kenlm___pyx_scope_struct__full_scores(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
+static char __pyx_k__2[] = "\n";
+static char __pyx_k__3[] = " ";
+static char __pyx_k_os[] = "os";
+static char __pyx_k_bos[] = "bos";
+static char __pyx_k_eos[] = "eos";
+static char __pyx_k_oov[] = "oov";
+static char __pyx_k_args[] = "args";
+static char __pyx_k_main[] = "__main__";
+static char __pyx_k_name[] = "__name__";
+static char __pyx_k_path[] = "path";
+static char __pyx_k_send[] = "send";
+static char __pyx_k_test[] = "__test__";
+static char __pyx_k_utf8[] = "utf8";
+static char __pyx_k_word[] = "word";
+static char __pyx_k_class[] = "__class__";
+static char __pyx_k_close[] = "close";
+static char __pyx_k_kenlm[] = "_kenlm";
+static char __pyx_k_split[] = "split";
+static char __pyx_k_throw[] = "throw";
+static char __pyx_k_encode[] = "encode";
+static char __pyx_k_format[] = "format";
+static char __pyx_k_import[] = "__import__";
+static char __pyx_k_0_1_2_3[] = "{0}({1}, {2}, {3})";
+static char __pyx_k_IOError[] = "IOError";
+static char __pyx_k_abspath[] = "abspath";
+static char __pyx_k_replace[] = "replace";
+static char __pyx_k_basename[] = "basename";
+static char __pyx_k_in_state[] = "in_state";
+static char __pyx_k_log_prob[] = "log_prob";
+static char __pyx_k_sentence[] = "sentence";
+static char __pyx_k_TypeError[] = "TypeError";
+static char __pyx_k_out_state[] = "out_state";
+static char __pyx_k_full_scores[] = "full_scores";
+static char __pyx_k_Model_from_0[] = "<Model from {0}>";
+static char __pyx_k_RuntimeError[] = "RuntimeError";
+static char __pyx_k_ngram_length[] = "ngram_length";
+static char __pyx_k_LanguageModel[] = "LanguageModel";
+static char __pyx_k_Cannot_read_model[] = "Cannot read model '{}' ({})";
+static char __pyx_k_LanguageModel_from_0[] = "<LanguageModel from {0}>";
+static char __pyx_k_LanguageModel_full_scores[] = "LanguageModel.full_scores";
+static char __pyx_k_Cannot_convert_s_to_string[] = "Cannot convert %s to string";
+static PyObject *__pyx_kp_s_0_1_2_3;
+static PyObject *__pyx_kp_s_Cannot_convert_s_to_string;
+static PyObject *__pyx_kp_s_Cannot_read_model;
+static PyObject *__pyx_n_s_IOError;
+static PyObject *__pyx_n_s_LanguageModel;
+static PyObject *__pyx_kp_s_LanguageModel_from_0;
+static PyObject *__pyx_n_s_LanguageModel_full_scores;
+static PyObject *__pyx_kp_s_Model_from_0;
+static PyObject *__pyx_n_s_RuntimeError;
+static PyObject *__pyx_n_s_TypeError;
+static PyObject *__pyx_kp_s__2;
+static PyObject *__pyx_kp_s__3;
+static PyObject *__pyx_n_s_abspath;
+static PyObject *__pyx_n_s_args;
+static PyObject *__pyx_n_s_basename;
+static PyObject *__pyx_n_s_bos;
+static PyObject *__pyx_n_s_class;
+static PyObject *__pyx_n_s_close;
+static PyObject *__pyx_n_s_encode;
+static PyObject *__pyx_n_s_eos;
+static PyObject *__pyx_n_s_format;
+static PyObject *__pyx_n_s_full_scores;
+static PyObject *__pyx_n_s_import;
+static PyObject *__pyx_n_s_in_state;
+static PyObject *__pyx_n_s_kenlm;
+static PyObject *__pyx_n_s_log_prob;
+static PyObject *__pyx_n_s_main;
+static PyObject *__pyx_n_s_name;
+static PyObject *__pyx_n_s_ngram_length;
+static PyObject *__pyx_n_s_oov;
+static PyObject *__pyx_n_s_os;
+static PyObject *__pyx_n_s_out_state;
+static PyObject *__pyx_n_s_path;
+static PyObject *__pyx_n_s_replace;
+static PyObject *__pyx_n_s_send;
+static PyObject *__pyx_n_s_sentence;
+static PyObject *__pyx_n_s_split;
+static PyObject *__pyx_n_s_test;
+static PyObject *__pyx_n_s_throw;
+static PyObject *__pyx_n_s_utf8;
+static PyObject *__pyx_n_s_word;
+static PyObject *__pyx_tuple_;
+static PyObject *__pyx_tuple__4;
+static PyObject *__pyx_tuple__5;
+
+/* "kenlm.pyx":4
+ * cimport _kenlm
+ *
+ * cdef bytes as_str(data): # <<<<<<<<<<<<<<
+ * if isinstance(data, bytes):
+ * return data
+ */
+
+static PyObject *__pyx_f_5kenlm_as_str(PyObject *__pyx_v_data) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ int __pyx_t_2;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("as_str", 0);
+
+ /* "kenlm.pyx":5
+ *
+ * cdef bytes as_str(data):
+ * if isinstance(data, bytes): # <<<<<<<<<<<<<<
+ * return data
+ * elif isinstance(data, unicode):
+ */
+ __pyx_t_1 = PyBytes_Check(__pyx_v_data);
+ __pyx_t_2 = (__pyx_t_1 != 0);
+ if (__pyx_t_2) {
+
+ /* "kenlm.pyx":6
+ * cdef bytes as_str(data):
+ * if isinstance(data, bytes):
+ * return data # <<<<<<<<<<<<<<
+ * elif isinstance(data, unicode):
+ * return data.encode('utf8')
+ */
+ __Pyx_XDECREF(__pyx_r);
+ if (!(likely(PyBytes_CheckExact(__pyx_v_data))||((__pyx_v_data) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_data)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_INCREF(__pyx_v_data);
+ __pyx_r = ((PyObject*)__pyx_v_data);
+ goto __pyx_L0;
+ }
+
+ /* "kenlm.pyx":7
+ * if isinstance(data, bytes):
+ * return data
+ * elif isinstance(data, unicode): # <<<<<<<<<<<<<<
+ * return data.encode('utf8')
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ */
+ __pyx_t_2 = PyUnicode_Check(__pyx_v_data);
+ __pyx_t_1 = (__pyx_t_2 != 0);
+ if (__pyx_t_1) {
+
+ /* "kenlm.pyx":8
+ * return data
+ * elif isinstance(data, unicode):
+ * return data.encode('utf8') # <<<<<<<<<<<<<<
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ *
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_data, __pyx_n_s_encode); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = ((PyObject*)__pyx_t_4);
+ __pyx_t_4 = 0;
+ goto __pyx_L0;
+ }
+
+ /* "kenlm.pyx":9
+ * elif isinstance(data, unicode):
+ * return data.encode('utf8')
+ * raise TypeError('Cannot convert %s to string' % type(data)) # <<<<<<<<<<<<<<
+ *
+ * cdef class FullScoreReturn:
+ */
+ __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Cannot_convert_s_to_string, ((PyObject *)Py_TYPE(__pyx_v_data))); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_4);
+ __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+ /* "kenlm.pyx":4
+ * cimport _kenlm
+ *
+ * cdef bytes as_str(data): # <<<<<<<<<<<<<<
+ * if isinstance(data, bytes):
+ * return data
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_AddTraceback("kenlm.as_str", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":24
+ * cdef bint oov
+ *
+ * def __cinit__(self, log_prob, ngram_length, oov): # <<<<<<<<<<<<<<
+ * self.log_prob = log_prob
+ * self.ngram_length = ngram_length
+ */
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_15FullScoreReturn_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_pw_5kenlm_15FullScoreReturn_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_log_prob = 0;
+ PyObject *__pyx_v_ngram_length = 0;
+ PyObject *__pyx_v_oov = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_log_prob,&__pyx_n_s_ngram_length,&__pyx_n_s_oov,0};
+ PyObject* values[3] = {0,0,0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_log_prob)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_ngram_length)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ case 2:
+ if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_oov)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 3) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ }
+ __pyx_v_log_prob = values[0];
+ __pyx_v_ngram_length = values[1];
+ __pyx_v_oov = values[2];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.FullScoreReturn.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return -1;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5kenlm_15FullScoreReturn___cinit__(((struct __pyx_obj_5kenlm_FullScoreReturn *)__pyx_v_self), __pyx_v_log_prob, __pyx_v_ngram_length, __pyx_v_oov);
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_15FullScoreReturn___cinit__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self, PyObject *__pyx_v_log_prob, PyObject *__pyx_v_ngram_length, PyObject *__pyx_v_oov) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ float __pyx_t_1;
+ int __pyx_t_2;
+ int __pyx_t_3;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__cinit__", 0);
+
+ /* "kenlm.pyx":25
+ *
+ * def __cinit__(self, log_prob, ngram_length, oov):
+ * self.log_prob = log_prob # <<<<<<<<<<<<<<
+ * self.ngram_length = ngram_length
+ * self.oov = oov
+ */
+ __pyx_t_1 = __pyx_PyFloat_AsFloat(__pyx_v_log_prob); if (unlikely((__pyx_t_1 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_self->log_prob = __pyx_t_1;
+
+ /* "kenlm.pyx":26
+ * def __cinit__(self, log_prob, ngram_length, oov):
+ * self.log_prob = log_prob
+ * self.ngram_length = ngram_length # <<<<<<<<<<<<<<
+ * self.oov = oov
+ *
+ */
+ __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_ngram_length); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_self->ngram_length = __pyx_t_2;
+
+ /* "kenlm.pyx":27
+ * self.log_prob = log_prob
+ * self.ngram_length = ngram_length
+ * self.oov = oov # <<<<<<<<<<<<<<
+ *
+ * def __repr__(self):
+ */
+ __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_v_oov); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_self->oov = __pyx_t_3;
+
+ /* "kenlm.pyx":24
+ * cdef bint oov
+ *
+ * def __cinit__(self, log_prob, ngram_length, oov): # <<<<<<<<<<<<<<
+ * self.log_prob = log_prob
+ * self.ngram_length = ngram_length
+ */
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_AddTraceback("kenlm.FullScoreReturn.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":29
+ * self.oov = oov
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return '{0}({1}, {2}, {3})'.format(self.__class__.__name__, repr(self.log_prob), repr(self.ngram_length), repr(self.oov))
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_3__repr__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_3__repr__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_15FullScoreReturn_2__repr__(((struct __pyx_obj_5kenlm_FullScoreReturn *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_2__repr__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ PyObject *__pyx_t_7 = NULL;
+ Py_ssize_t __pyx_t_8;
+ PyObject *__pyx_t_9 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__repr__", 0);
+
+ /* "kenlm.pyx":30
+ *
+ * def __repr__(self):
+ * return '{0}({1}, {2}, {3})'.format(self.__class__.__name__, repr(self.log_prob), repr(self.ngram_length), repr(self.oov)) # <<<<<<<<<<<<<<
+ *
+ * property log_prob:
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_0_1_2_3, __pyx_n_s_format); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_class); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_name); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyFloat_FromDouble(__pyx_v_self->log_prob); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_5 = PyObject_Repr(__pyx_t_3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_self->ngram_length); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_6 = PyObject_Repr(__pyx_t_3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_PyBool_FromLong(__pyx_v_self->oov); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_7 = PyObject_Repr(__pyx_t_3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = NULL;
+ __pyx_t_8 = 0;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_3)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_3);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ __pyx_t_8 = 1;
+ }
+ }
+ __pyx_t_9 = PyTuple_New(4+__pyx_t_8); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_9);
+ if (__pyx_t_3) {
+ PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = NULL;
+ }
+ PyTuple_SET_ITEM(__pyx_t_9, 0+__pyx_t_8, __pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_4);
+ PyTuple_SET_ITEM(__pyx_t_9, 1+__pyx_t_8, __pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_9, 2+__pyx_t_8, __pyx_t_6);
+ __Pyx_GIVEREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_9, 3+__pyx_t_8, __pyx_t_7);
+ __Pyx_GIVEREF(__pyx_t_7);
+ __pyx_t_4 = 0;
+ __pyx_t_5 = 0;
+ __pyx_t_6 = 0;
+ __pyx_t_7 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_9, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":29
+ * self.oov = oov
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return '{0}({1}, {2}, {3})'.format(self.__class__.__name__, repr(self.log_prob), repr(self.ngram_length), repr(self.oov))
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_9);
+ __Pyx_AddTraceback("kenlm.FullScoreReturn.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":33
+ *
+ * property log_prob:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.log_prob
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_8log_prob_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_8log_prob_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_15FullScoreReturn_8log_prob___get__(((struct __pyx_obj_5kenlm_FullScoreReturn *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_8log_prob___get__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__get__", 0);
+
+ /* "kenlm.pyx":34
+ * property log_prob:
+ * def __get__(self):
+ * return self.log_prob # <<<<<<<<<<<<<<
+ *
+ * property ngram_length:
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->log_prob); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":33
+ *
+ * property log_prob:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.log_prob
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.FullScoreReturn.log_prob.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":37
+ *
+ * property ngram_length:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.ngram_length
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_12ngram_length_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_12ngram_length_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_15FullScoreReturn_12ngram_length___get__(((struct __pyx_obj_5kenlm_FullScoreReturn *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_12ngram_length___get__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__get__", 0);
+
+ /* "kenlm.pyx":38
+ * property ngram_length:
+ * def __get__(self):
+ * return self.ngram_length # <<<<<<<<<<<<<<
+ *
+ * property oov:
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->ngram_length); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":37
+ *
+ * property ngram_length:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.ngram_length
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.FullScoreReturn.ngram_length.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":41
+ *
+ * property oov:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.oov
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_3oov_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_15FullScoreReturn_3oov_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_15FullScoreReturn_3oov___get__(((struct __pyx_obj_5kenlm_FullScoreReturn *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_15FullScoreReturn_3oov___get__(struct __pyx_obj_5kenlm_FullScoreReturn *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__get__", 0);
+
+ /* "kenlm.pyx":42
+ * property oov:
+ * def __get__(self):
+ * return self.oov # <<<<<<<<<<<<<<
+ *
+ * cdef class State:
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong(__pyx_v_self->oov); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":41
+ *
+ * property oov:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.oov
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.FullScoreReturn.oov.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":55
+ * cdef _kenlm.State _c_state
+ *
+ * def __richcmp__(State qa, State qb, int op): # <<<<<<<<<<<<<<
+ * r = qa._c_state.Compare(qb._c_state)
+ * if op == 0: # <
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5State_1__richcmp__(PyObject *__pyx_v_qa, PyObject *__pyx_v_qb, int __pyx_v_op); /*proto*/
+static PyObject *__pyx_pw_5kenlm_5State_1__richcmp__(PyObject *__pyx_v_qa, PyObject *__pyx_v_qb, int __pyx_v_op) {
+ CYTHON_UNUSED int __pyx_lineno = 0;
+ CYTHON_UNUSED const char *__pyx_filename = NULL;
+ CYTHON_UNUSED int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0);
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_qa), __pyx_ptype_5kenlm_State, 1, "qa", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_qb), __pyx_ptype_5kenlm_State, 1, "qb", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = __pyx_pf_5kenlm_5State___richcmp__(((struct __pyx_obj_5kenlm_State *)__pyx_v_qa), ((struct __pyx_obj_5kenlm_State *)__pyx_v_qb), ((int)__pyx_v_op));
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5State___richcmp__(struct __pyx_obj_5kenlm_State *__pyx_v_qa, struct __pyx_obj_5kenlm_State *__pyx_v_qb, int __pyx_v_op) {
+ int __pyx_v_r;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__richcmp__", 0);
+
+ /* "kenlm.pyx":56
+ *
+ * def __richcmp__(State qa, State qb, int op):
+ * r = qa._c_state.Compare(qb._c_state) # <<<<<<<<<<<<<<
+ * if op == 0: # <
+ * return r < 0
+ */
+ __pyx_v_r = __pyx_v_qa->_c_state.Compare(__pyx_v_qb->_c_state);
+
+ /* "kenlm.pyx":65
+ * elif op == 3: # !=
+ * return r != 0
+ * elif op == 4: # > # <<<<<<<<<<<<<<
+ * return r > 0
+ * else: # >=
+ */
+ switch (__pyx_v_op) {
+
+ /* "kenlm.pyx":57
+ * def __richcmp__(State qa, State qb, int op):
+ * r = qa._c_state.Compare(qb._c_state)
+ * if op == 0: # < # <<<<<<<<<<<<<<
+ * return r < 0
+ * elif op == 1: # <=
+ */
+ case 0:
+
+ /* "kenlm.pyx":58
+ * r = qa._c_state.Compare(qb._c_state)
+ * if op == 0: # <
+ * return r < 0 # <<<<<<<<<<<<<<
+ * elif op == 1: # <=
+ * return r <= 0
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_r < 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ break;
+
+ /* "kenlm.pyx":59
+ * if op == 0: # <
+ * return r < 0
+ * elif op == 1: # <= # <<<<<<<<<<<<<<
+ * return r <= 0
+ * elif op == 2: # ==
+ */
+ case 1:
+
+ /* "kenlm.pyx":60
+ * return r < 0
+ * elif op == 1: # <=
+ * return r <= 0 # <<<<<<<<<<<<<<
+ * elif op == 2: # ==
+ * return r == 0
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_r <= 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ break;
+
+ /* "kenlm.pyx":61
+ * elif op == 1: # <=
+ * return r <= 0
+ * elif op == 2: # == # <<<<<<<<<<<<<<
+ * return r == 0
+ * elif op == 3: # !=
+ */
+ case 2:
+
+ /* "kenlm.pyx":62
+ * return r <= 0
+ * elif op == 2: # ==
+ * return r == 0 # <<<<<<<<<<<<<<
+ * elif op == 3: # !=
+ * return r != 0
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_r == 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ break;
+
+ /* "kenlm.pyx":63
+ * elif op == 2: # ==
+ * return r == 0
+ * elif op == 3: # != # <<<<<<<<<<<<<<
+ * return r != 0
+ * elif op == 4: # >
+ */
+ case 3:
+
+ /* "kenlm.pyx":64
+ * return r == 0
+ * elif op == 3: # !=
+ * return r != 0 # <<<<<<<<<<<<<<
+ * elif op == 4: # >
+ * return r > 0
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_r != 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ break;
+
+ /* "kenlm.pyx":65
+ * elif op == 3: # !=
+ * return r != 0
+ * elif op == 4: # > # <<<<<<<<<<<<<<
+ * return r > 0
+ * else: # >=
+ */
+ case 4:
+
+ /* "kenlm.pyx":66
+ * return r != 0
+ * elif op == 4: # >
+ * return r > 0 # <<<<<<<<<<<<<<
+ * else: # >=
+ * return r >= 0
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_r > 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ break;
+ default:
+
+ /* "kenlm.pyx":68
+ * return r > 0
+ * else: # >=
+ * return r >= 0 # <<<<<<<<<<<<<<
+ *
+ * def __hash__(self):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_r >= 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ break;
+ }
+
+ /* "kenlm.pyx":55
+ * cdef _kenlm.State _c_state
+ *
+ * def __richcmp__(State qa, State qb, int op): # <<<<<<<<<<<<<<
+ * r = qa._c_state.Compare(qb._c_state)
+ * if op == 0: # <
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.State.__richcmp__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":70
+ * return r >= 0
+ *
+ * def __hash__(self): # <<<<<<<<<<<<<<
+ * return _kenlm.hash_value(self._c_state)
+ *
+ */
+
+/* Python wrapper */
+static Py_hash_t __pyx_pw_5kenlm_5State_3__hash__(PyObject *__pyx_v_self); /*proto*/
+static Py_hash_t __pyx_pw_5kenlm_5State_3__hash__(PyObject *__pyx_v_self) {
+ Py_hash_t __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__hash__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5State_2__hash__(((struct __pyx_obj_5kenlm_State *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static Py_hash_t __pyx_pf_5kenlm_5State_2__hash__(struct __pyx_obj_5kenlm_State *__pyx_v_self) {
+ Py_hash_t __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__hash__", 0);
+
+ /* "kenlm.pyx":71
+ *
+ * def __hash__(self):
+ * return _kenlm.hash_value(self._c_state) # <<<<<<<<<<<<<<
+ *
+ *
+ */
+ __pyx_r = lm::ngram::hash_value(__pyx_v_self->_c_state);
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":70
+ * return r >= 0
+ *
+ * def __hash__(self): # <<<<<<<<<<<<<<
+ * return _kenlm.hash_value(self._c_state)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L0:;
+ if (unlikely(__pyx_r == -1) && !PyErr_Occurred()) __pyx_r = -2;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":84
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ * def __init__(self, path): # <<<<<<<<<<<<<<
+ * """
+ * Load the language model.
+ */
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_13LanguageModel_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5kenlm_13LanguageModel___init__[] = "\n Load the language model.\n\n :param path: path to an arpa file or a kenlm binary file.\n ";
+#if CYTHON_COMPILING_IN_CPYTHON
+struct wrapperbase __pyx_wrapperbase_5kenlm_13LanguageModel___init__;
+#endif
+static int __pyx_pw_5kenlm_13LanguageModel_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_path = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__init__ (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_path,0};
+ PyObject* values[1] = {0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_path)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__init__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 1) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ }
+ __pyx_v_path = values[0];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.LanguageModel.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return -1;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel___init__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self), __pyx_v_path);
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_13LanguageModel___init__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_path) {
+ PyObject *__pyx_v_exception = NULL;
+ PyObject *__pyx_v_exception_message = NULL;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ PyObject *__pyx_t_7 = NULL;
+ PyObject *__pyx_t_8 = NULL;
+ char *__pyx_t_9;
+ lm::base::Model *__pyx_t_10;
+ int __pyx_t_11;
+ PyObject *__pyx_t_12 = NULL;
+ Py_ssize_t __pyx_t_13;
+ PyObject *__pyx_t_14 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__init__", 0);
+
+ /* "kenlm.pyx":90
+ * :param path: path to an arpa file or a kenlm binary file.
+ * """
+ * self.path = os.path.abspath(as_str(path)) # <<<<<<<<<<<<<<
+ * try:
+ * self.model = _kenlm.LoadVirtual(self.path)
+ */
+ __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_abspath); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __pyx_f_5kenlm_as_str(__pyx_v_path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ }
+ }
+ if (!__pyx_t_4) {
+ __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_GOTREF(__pyx_t_1);
+ } else {
+ __pyx_t_5 = PyTuple_New(1+1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = NULL;
+ PyTuple_SET_ITEM(__pyx_t_5, 0+1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ if (!(likely(PyBytes_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_1)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GIVEREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_v_self->path);
+ __Pyx_DECREF(__pyx_v_self->path);
+ __pyx_v_self->path = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":91
+ * """
+ * self.path = os.path.abspath(as_str(path))
+ * try: # <<<<<<<<<<<<<<
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception:
+ */
+ {
+ __Pyx_ExceptionSave(&__pyx_t_6, &__pyx_t_7, &__pyx_t_8);
+ __Pyx_XGOTREF(__pyx_t_6);
+ __Pyx_XGOTREF(__pyx_t_7);
+ __Pyx_XGOTREF(__pyx_t_8);
+ /*try:*/ {
+
+ /* "kenlm.pyx":92
+ * self.path = os.path.abspath(as_str(path))
+ * try:
+ * self.model = _kenlm.LoadVirtual(self.path) # <<<<<<<<<<<<<<
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ')
+ */
+ __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_v_self->path); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ try {
+ __pyx_t_10 = lm::ngram::LoadVirtual(__pyx_t_9);
+ } catch(...) {
+ __Pyx_CppExn2PyErr();
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ __pyx_v_self->model = __pyx_t_10;
+ }
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+ __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
+ goto __pyx_L10_try_end;
+ __pyx_L3_error:;
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":93
+ * try:
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception: # <<<<<<<<<<<<<<
+ * exception_message = str(exception).replace('\n', ' ')
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ */
+ __pyx_t_11 = PyErr_ExceptionMatches(__pyx_builtin_RuntimeError);
+ if (__pyx_t_11) {
+ __Pyx_AddTraceback("kenlm.LanguageModel.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_2, &__pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_2);
+ __pyx_v_exception = __pyx_t_2;
+
+ /* "kenlm.pyx":94
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ') # <<<<<<<<<<<<<<
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception
+ */
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(__pyx_v_exception);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_exception);
+ __Pyx_GIVEREF(__pyx_v_exception);
+ __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_v_exception_message = __pyx_t_4;
+ __pyx_t_4 = 0;
+
+ /* "kenlm.pyx":95
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ')
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\ # <<<<<<<<<<<<<<
+ * from exception
+ * self.vocab = &self.model.BaseVocabulary()
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_Cannot_read_model, __pyx_n_s_format); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 = NULL;
+ __pyx_t_13 = 0;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_12)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_12);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ __pyx_t_13 = 1;
+ }
+ }
+ __pyx_t_14 = PyTuple_New(2+__pyx_t_13); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_14);
+ if (__pyx_t_12) {
+ PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_12); __Pyx_GIVEREF(__pyx_t_12); __pyx_t_12 = NULL;
+ }
+ __Pyx_INCREF(__pyx_v_path);
+ PyTuple_SET_ITEM(__pyx_t_14, 0+__pyx_t_13, __pyx_v_path);
+ __Pyx_GIVEREF(__pyx_v_path);
+ __Pyx_INCREF(__pyx_v_exception_message);
+ PyTuple_SET_ITEM(__pyx_t_14, 1+__pyx_t_13, __pyx_v_exception_message);
+ __Pyx_GIVEREF(__pyx_v_exception_message);
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_14, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_4);
+ __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_IOError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "kenlm.pyx":96
+ * exception_message = str(exception).replace('\n', ' ')
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception # <<<<<<<<<<<<<<
+ * self.vocab = &self.model.BaseVocabulary()
+ *
+ */
+ __Pyx_Raise(__pyx_t_4, 0, 0, __pyx_v_exception);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ }
+ goto __pyx_L5_except_error;
+ __pyx_L5_except_error:;
+ __Pyx_XGIVEREF(__pyx_t_6);
+ __Pyx_XGIVEREF(__pyx_t_7);
+ __Pyx_XGIVEREF(__pyx_t_8);
+ __Pyx_ExceptionReset(__pyx_t_6, __pyx_t_7, __pyx_t_8);
+ goto __pyx_L1_error;
+ __pyx_L10_try_end:;
+ }
+
+ /* "kenlm.pyx":97
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception
+ * self.vocab = &self.model.BaseVocabulary() # <<<<<<<<<<<<<<
+ *
+ * def __dealloc__(self):
+ */
+ __pyx_v_self->vocab = (&__pyx_v_self->model->BaseVocabulary());
+
+ /* "kenlm.pyx":84
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ * def __init__(self, path): # <<<<<<<<<<<<<<
+ * """
+ * Load the language model.
+ */
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_12);
+ __Pyx_XDECREF(__pyx_t_14);
+ __Pyx_AddTraceback("kenlm.LanguageModel.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_exception);
+ __Pyx_XDECREF(__pyx_v_exception_message);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":99
+ * self.vocab = &self.model.BaseVocabulary()
+ *
+ * def __dealloc__(self): # <<<<<<<<<<<<<<
+ * del self.model
+ *
+ */
+
+/* Python wrapper */
+static void __pyx_pw_5kenlm_13LanguageModel_3__dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_pw_5kenlm_13LanguageModel_3__dealloc__(PyObject *__pyx_v_self) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
+ __pyx_pf_5kenlm_13LanguageModel_2__dealloc__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+}
+
+static void __pyx_pf_5kenlm_13LanguageModel_2__dealloc__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__dealloc__", 0);
+
+ /* "kenlm.pyx":100
+ *
+ * def __dealloc__(self):
+ * del self.model # <<<<<<<<<<<<<<
+ *
+ * property order:
+ */
+ delete __pyx_v_self->model;
+
+ /* "kenlm.pyx":99
+ * self.vocab = &self.model.BaseVocabulary()
+ *
+ * def __dealloc__(self): # <<<<<<<<<<<<<<
+ * del self.model
+ *
+ */
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+}
+
+/* "kenlm.pyx":103
+ *
+ * property order:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.model.Order()
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_5order_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_5order_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_5order___get__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_5order___get__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__get__", 0);
+
+ /* "kenlm.pyx":104
+ * property order:
+ * def __get__(self):
+ * return self.model.Order() # <<<<<<<<<<<<<<
+ *
+ * def score(self, sentence, bos = True, eos = True):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyInt_From_unsigned_int(__pyx_v_self->model->Order()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":103
+ *
+ * property order:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.model.Order()
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.LanguageModel.order.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":106
+ * return self.model.Order()
+ *
+ * def score(self, sentence, bos = True, eos = True): # <<<<<<<<<<<<<<
+ * cdef list words = as_str(sentence).split()
+ * cdef _kenlm.State state
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_5score(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_5score(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_sentence = 0;
+ PyObject *__pyx_v_bos = 0;
+ PyObject *__pyx_v_eos = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("score (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_sentence,&__pyx_n_s_bos,&__pyx_n_s_eos,0};
+ PyObject* values[3] = {0,0,0};
+ values[1] = ((PyObject *)Py_True);
+ values[2] = ((PyObject *)Py_True);
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_bos);
+ if (value) { values[1] = value; kw_args--; }
+ }
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_eos);
+ if (value) { values[2] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "score") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_sentence = values[0];
+ __pyx_v_bos = values[1];
+ __pyx_v_eos = values[2];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("score", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.LanguageModel.score", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_4score(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self), __pyx_v_sentence, __pyx_v_bos, __pyx_v_eos);
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_4score(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_bos, PyObject *__pyx_v_eos) {
+ PyObject *__pyx_v_words = 0;
+ lm::ngram::State __pyx_v_state;
+ lm::ngram::State __pyx_v_out_state;
+ float __pyx_v_total;
+ PyObject *__pyx_v_word = NULL;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_t_4;
+ Py_ssize_t __pyx_t_5;
+ char *__pyx_t_6;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("score", 0);
+
+ /* "kenlm.pyx":107
+ *
+ * def score(self, sentence, bos = True, eos = True):
+ * cdef list words = as_str(sentence).split() # <<<<<<<<<<<<<<
+ * cdef _kenlm.State state
+ * if bos:
+ */
+ __pyx_t_2 = __pyx_f_5kenlm_as_str(__pyx_v_sentence); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_split); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_2)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_2);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ }
+ }
+ if (__pyx_t_2) {
+ __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ } else {
+ __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (!(likely(PyList_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "list", Py_TYPE(__pyx_t_1)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_words = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":109
+ * cdef list words = as_str(sentence).split()
+ * cdef _kenlm.State state
+ * if bos: # <<<<<<<<<<<<<<
+ * self.model.BeginSentenceWrite(&state)
+ * else:
+ */
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_v_bos); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+
+ /* "kenlm.pyx":110
+ * cdef _kenlm.State state
+ * if bos:
+ * self.model.BeginSentenceWrite(&state) # <<<<<<<<<<<<<<
+ * else:
+ * self.model.NullContextWrite(&state)
+ */
+ __pyx_v_self->model->BeginSentenceWrite((&__pyx_v_state));
+ goto __pyx_L3;
+ }
+ /*else*/ {
+
+ /* "kenlm.pyx":112
+ * self.model.BeginSentenceWrite(&state)
+ * else:
+ * self.model.NullContextWrite(&state) # <<<<<<<<<<<<<<
+ * cdef _kenlm.State out_state
+ * cdef float total = 0
+ */
+ __pyx_v_self->model->NullContextWrite((&__pyx_v_state));
+ }
+ __pyx_L3:;
+
+ /* "kenlm.pyx":114
+ * self.model.NullContextWrite(&state)
+ * cdef _kenlm.State out_state
+ * cdef float total = 0 # <<<<<<<<<<<<<<
+ * for word in words:
+ * total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state)
+ */
+ __pyx_v_total = 0.0;
+
+ /* "kenlm.pyx":115
+ * cdef _kenlm.State out_state
+ * cdef float total = 0
+ * for word in words: # <<<<<<<<<<<<<<
+ * total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state)
+ * state = out_state
+ */
+ if (unlikely(__pyx_v_words == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ __pyx_t_1 = __pyx_v_words; __Pyx_INCREF(__pyx_t_1); __pyx_t_5 = 0;
+ for (;;) {
+ if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_1)) break;
+ #if CYTHON_COMPILING_IN_CPYTHON
+ __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_5); __Pyx_INCREF(__pyx_t_3); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #else
+ __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "kenlm.pyx":116
+ * cdef float total = 0
+ * for word in words:
+ * total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state) # <<<<<<<<<<<<<<
+ * state = out_state
+ * if eos:
+ */
+ __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_v_word); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_total = (__pyx_v_total + __pyx_v_self->model->BaseScore((&__pyx_v_state), __pyx_v_self->vocab->Index(__pyx_t_6), (&__pyx_v_out_state)));
+
+ /* "kenlm.pyx":117
+ * for word in words:
+ * total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state)
+ * state = out_state # <<<<<<<<<<<<<<
+ * if eos:
+ * total += self.model.BaseScore(&state, self.vocab.EndSentence(), &out_state)
+ */
+ __pyx_v_state = __pyx_v_out_state;
+
+ /* "kenlm.pyx":115
+ * cdef _kenlm.State out_state
+ * cdef float total = 0
+ * for word in words: # <<<<<<<<<<<<<<
+ * total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state)
+ * state = out_state
+ */
+ }
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":118
+ * total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state)
+ * state = out_state
+ * if eos: # <<<<<<<<<<<<<<
+ * total += self.model.BaseScore(&state, self.vocab.EndSentence(), &out_state)
+ * return total
+ */
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_v_eos); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+
+ /* "kenlm.pyx":119
+ * state = out_state
+ * if eos:
+ * total += self.model.BaseScore(&state, self.vocab.EndSentence(), &out_state) # <<<<<<<<<<<<<<
+ * return total
+ *
+ */
+ __pyx_v_total = (__pyx_v_total + __pyx_v_self->model->BaseScore((&__pyx_v_state), __pyx_v_self->vocab->EndSentence(), (&__pyx_v_out_state)));
+ goto __pyx_L6;
+ }
+ __pyx_L6:;
+
+ /* "kenlm.pyx":120
+ * if eos:
+ * total += self.model.BaseScore(&state, self.vocab.EndSentence(), &out_state)
+ * return total # <<<<<<<<<<<<<<
+ *
+ * def full_scores(self, sentence, bos = True, eos = True):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyFloat_FromDouble(__pyx_v_total); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":106
+ * return self.model.Order()
+ *
+ * def score(self, sentence, bos = True, eos = True): # <<<<<<<<<<<<<<
+ * cdef list words = as_str(sentence).split()
+ * cdef _kenlm.State state
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("kenlm.LanguageModel.score", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_words);
+ __Pyx_XDECREF(__pyx_v_word);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+static PyObject *__pyx_gb_5kenlm_13LanguageModel_8generator(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */
+
+/* "kenlm.pyx":122
+ * return total
+ *
+ * def full_scores(self, sentence, bos = True, eos = True): # <<<<<<<<<<<<<<
+ * """
+ * full_scores(sentence, bos = True, eos = Ture) -> generate full scores (prob, ngram length, oov)
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_7full_scores(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5kenlm_13LanguageModel_6full_scores[] = "\n full_scores(sentence, bos = True, eos = Ture) -> generate full scores (prob, ngram length, oov)\n @param sentence is a string (do not use boundary symbols)\n @param bos should kenlm add a bos state\n @param eos should kenlm add an eos state\n ";
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_7full_scores(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_sentence = 0;
+ PyObject *__pyx_v_bos = 0;
+ PyObject *__pyx_v_eos = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("full_scores (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_sentence,&__pyx_n_s_bos,&__pyx_n_s_eos,0};
+ PyObject* values[3] = {0,0,0};
+ values[1] = ((PyObject *)Py_True);
+ values[2] = ((PyObject *)Py_True);
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_bos);
+ if (value) { values[1] = value; kw_args--; }
+ }
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_eos);
+ if (value) { values[2] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "full_scores") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_sentence = values[0];
+ __pyx_v_bos = values[1];
+ __pyx_v_eos = values[2];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("full_scores", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.LanguageModel.full_scores", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_6full_scores(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self), __pyx_v_sentence, __pyx_v_bos, __pyx_v_eos);
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_6full_scores(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_sentence, PyObject *__pyx_v_bos, PyObject *__pyx_v_eos) {
+ struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *__pyx_cur_scope;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("full_scores", 0);
+ __pyx_cur_scope = (struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)__pyx_tp_new_5kenlm___pyx_scope_struct__full_scores(__pyx_ptype_5kenlm___pyx_scope_struct__full_scores, __pyx_empty_tuple, NULL);
+ if (unlikely(!__pyx_cur_scope)) {
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ }
+ __Pyx_GOTREF(__pyx_cur_scope);
+ __pyx_cur_scope->__pyx_v_self = __pyx_v_self;
+ __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
+ __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
+ __pyx_cur_scope->__pyx_v_sentence = __pyx_v_sentence;
+ __Pyx_INCREF(__pyx_cur_scope->__pyx_v_sentence);
+ __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_sentence);
+ __pyx_cur_scope->__pyx_v_bos = __pyx_v_bos;
+ __Pyx_INCREF(__pyx_cur_scope->__pyx_v_bos);
+ __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_bos);
+ __pyx_cur_scope->__pyx_v_eos = __pyx_v_eos;
+ __Pyx_INCREF(__pyx_cur_scope->__pyx_v_eos);
+ __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_eos);
+ {
+ __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5kenlm_13LanguageModel_8generator, (PyObject *) __pyx_cur_scope, __pyx_n_s_full_scores, __pyx_n_s_LanguageModel_full_scores); if (unlikely(!gen)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_cur_scope);
+ __Pyx_RefNannyFinishContext();
+ return (PyObject *) gen;
+ }
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_AddTraceback("kenlm.LanguageModel.full_scores", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __Pyx_DECREF(((PyObject *)__pyx_cur_scope));
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_gb_5kenlm_13LanguageModel_8generator(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */
+{
+ struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *__pyx_cur_scope = ((struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)__pyx_generator->closure);
+ PyObject *__pyx_r = NULL;
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_t_4;
+ Py_ssize_t __pyx_t_5;
+ char *__pyx_t_6;
+ PyObject *__pyx_t_7 = NULL;
+ PyObject *__pyx_t_8 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("None", 0);
+ switch (__pyx_generator->resume_label) {
+ case 0: goto __pyx_L3_first_run;
+ case 1: goto __pyx_L7_resume_from_yield;
+ case 2: goto __pyx_L9_resume_from_yield;
+ default: /* CPython raises the right error here */
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ }
+ __pyx_L3_first_run:;
+ if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+ /* "kenlm.pyx":129
+ * @param eos should kenlm add an eos state
+ * """
+ * cdef list words = as_str(sentence).split() # <<<<<<<<<<<<<<
+ * cdef _kenlm.State state
+ * if bos:
+ */
+ __pyx_t_2 = __pyx_f_5kenlm_as_str(__pyx_cur_scope->__pyx_v_sentence); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_split); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_2)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_2);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ }
+ }
+ if (__pyx_t_2) {
+ __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ } else {
+ __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ if (!(likely(PyList_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "list", Py_TYPE(__pyx_t_1)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_cur_scope->__pyx_v_words = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":131
+ * cdef list words = as_str(sentence).split()
+ * cdef _kenlm.State state
+ * if bos: # <<<<<<<<<<<<<<
+ * self.model.BeginSentenceWrite(&state)
+ * else:
+ */
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_bos); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+
+ /* "kenlm.pyx":132
+ * cdef _kenlm.State state
+ * if bos:
+ * self.model.BeginSentenceWrite(&state) # <<<<<<<<<<<<<<
+ * else:
+ * self.model.NullContextWrite(&state)
+ */
+ __pyx_cur_scope->__pyx_v_self->model->BeginSentenceWrite((&__pyx_cur_scope->__pyx_v_state));
+ goto __pyx_L4;
+ }
+ /*else*/ {
+
+ /* "kenlm.pyx":134
+ * self.model.BeginSentenceWrite(&state)
+ * else:
+ * self.model.NullContextWrite(&state) # <<<<<<<<<<<<<<
+ * cdef _kenlm.State out_state
+ * cdef _kenlm.FullScoreReturn ret
+ */
+ __pyx_cur_scope->__pyx_v_self->model->NullContextWrite((&__pyx_cur_scope->__pyx_v_state));
+ }
+ __pyx_L4:;
+
+ /* "kenlm.pyx":137
+ * cdef _kenlm.State out_state
+ * cdef _kenlm.FullScoreReturn ret
+ * cdef float total = 0 # <<<<<<<<<<<<<<
+ * cdef _kenlm.WordIndex wid
+ * for word in words:
+ */
+ __pyx_cur_scope->__pyx_v_total = 0.0;
+
+ /* "kenlm.pyx":139
+ * cdef float total = 0
+ * cdef _kenlm.WordIndex wid
+ * for word in words: # <<<<<<<<<<<<<<
+ * wid = self.vocab.Index(word)
+ * ret = self.model.BaseFullScore(&state, wid, &out_state)
+ */
+ if (unlikely(__pyx_cur_scope->__pyx_v_words == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ __pyx_t_1 = __pyx_cur_scope->__pyx_v_words; __Pyx_INCREF(__pyx_t_1); __pyx_t_5 = 0;
+ for (;;) {
+ if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_1)) break;
+ #if CYTHON_COMPILING_IN_CPYTHON
+ __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_5); __Pyx_INCREF(__pyx_t_3); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #else
+ __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_word);
+ __Pyx_XDECREF_SET(__pyx_cur_scope->__pyx_v_word, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+
+ /* "kenlm.pyx":140
+ * cdef _kenlm.WordIndex wid
+ * for word in words:
+ * wid = self.vocab.Index(word) # <<<<<<<<<<<<<<
+ * ret = self.model.BaseFullScore(&state, wid, &out_state)
+ * yield (ret.prob, ret.ngram_length, wid == 0)
+ */
+ __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_cur_scope->__pyx_v_word); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_cur_scope->__pyx_v_wid = __pyx_cur_scope->__pyx_v_self->vocab->Index(__pyx_t_6);
+
+ /* "kenlm.pyx":141
+ * for word in words:
+ * wid = self.vocab.Index(word)
+ * ret = self.model.BaseFullScore(&state, wid, &out_state) # <<<<<<<<<<<<<<
+ * yield (ret.prob, ret.ngram_length, wid == 0)
+ * state = out_state
+ */
+ __pyx_cur_scope->__pyx_v_ret = __pyx_cur_scope->__pyx_v_self->model->BaseFullScore((&__pyx_cur_scope->__pyx_v_state), __pyx_cur_scope->__pyx_v_wid, (&__pyx_cur_scope->__pyx_v_out_state));
+
+ /* "kenlm.pyx":142
+ * wid = self.vocab.Index(word)
+ * ret = self.model.BaseFullScore(&state, wid, &out_state)
+ * yield (ret.prob, ret.ngram_length, wid == 0) # <<<<<<<<<<<<<<
+ * state = out_state
+ * if eos:
+ */
+ __pyx_t_3 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_ret.prob); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_2 = __Pyx_PyInt_From_unsigned_char(__pyx_cur_scope->__pyx_v_ret.ngram_length); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_7 = __Pyx_PyBool_FromLong((__pyx_cur_scope->__pyx_v_wid == 0)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_7);
+ __Pyx_GIVEREF(__pyx_t_7);
+ __pyx_t_3 = 0;
+ __pyx_t_2 = 0;
+ __pyx_t_7 = 0;
+ __pyx_r = __pyx_t_8;
+ __pyx_t_8 = 0;
+ __Pyx_XGIVEREF(__pyx_t_1);
+ __pyx_cur_scope->__pyx_t_0 = __pyx_t_1;
+ __pyx_cur_scope->__pyx_t_1 = __pyx_t_5;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ /* return from generator, yielding value */
+ __pyx_generator->resume_label = 1;
+ return __pyx_r;
+ __pyx_L7_resume_from_yield:;
+ __pyx_t_1 = __pyx_cur_scope->__pyx_t_0;
+ __pyx_cur_scope->__pyx_t_0 = 0;
+ __Pyx_XGOTREF(__pyx_t_1);
+ __pyx_t_5 = __pyx_cur_scope->__pyx_t_1;
+ if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+ /* "kenlm.pyx":143
+ * ret = self.model.BaseFullScore(&state, wid, &out_state)
+ * yield (ret.prob, ret.ngram_length, wid == 0)
+ * state = out_state # <<<<<<<<<<<<<<
+ * if eos:
+ * ret = self.model.BaseFullScore(&state,
+ */
+ __pyx_cur_scope->__pyx_v_state = __pyx_cur_scope->__pyx_v_out_state;
+
+ /* "kenlm.pyx":139
+ * cdef float total = 0
+ * cdef _kenlm.WordIndex wid
+ * for word in words: # <<<<<<<<<<<<<<
+ * wid = self.vocab.Index(word)
+ * ret = self.model.BaseFullScore(&state, wid, &out_state)
+ */
+ }
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":144
+ * yield (ret.prob, ret.ngram_length, wid == 0)
+ * state = out_state
+ * if eos: # <<<<<<<<<<<<<<
+ * ret = self.model.BaseFullScore(&state,
+ * self.vocab.EndSentence(), &out_state)
+ */
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_eos); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+
+ /* "kenlm.pyx":145
+ * state = out_state
+ * if eos:
+ * ret = self.model.BaseFullScore(&state, # <<<<<<<<<<<<<<
+ * self.vocab.EndSentence(), &out_state)
+ * yield (ret.prob, ret.ngram_length, False)
+ */
+ __pyx_cur_scope->__pyx_v_ret = __pyx_cur_scope->__pyx_v_self->model->BaseFullScore((&__pyx_cur_scope->__pyx_v_state), __pyx_cur_scope->__pyx_v_self->vocab->EndSentence(), (&__pyx_cur_scope->__pyx_v_out_state));
+
+ /* "kenlm.pyx":147
+ * ret = self.model.BaseFullScore(&state,
+ * self.vocab.EndSentence(), &out_state)
+ * yield (ret.prob, ret.ngram_length, False) # <<<<<<<<<<<<<<
+ *
+ * def __contains__(self, word):
+ */
+ __pyx_t_1 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_ret.prob); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_8 = __Pyx_PyInt_From_unsigned_char(__pyx_cur_scope->__pyx_v_ret.ngram_length); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __pyx_t_7 = PyTuple_New(3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_8);
+ __Pyx_GIVEREF(__pyx_t_8);
+ __Pyx_INCREF(Py_False);
+ PyTuple_SET_ITEM(__pyx_t_7, 2, Py_False);
+ __Pyx_GIVEREF(Py_False);
+ __pyx_t_1 = 0;
+ __pyx_t_8 = 0;
+ __pyx_r = __pyx_t_7;
+ __pyx_t_7 = 0;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ /* return from generator, yielding value */
+ __pyx_generator->resume_label = 2;
+ return __pyx_r;
+ __pyx_L9_resume_from_yield:;
+ if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ goto __pyx_L8;
+ }
+ __pyx_L8:;
+
+ /* "kenlm.pyx":122
+ * return total
+ *
+ * def full_scores(self, sentence, bos = True, eos = True): # <<<<<<<<<<<<<<
+ * """
+ * full_scores(sentence, bos = True, eos = Ture) -> generate full scores (prob, ngram length, oov)
+ */
+
+ /* function exit code */
+ PyErr_SetNone(PyExc_StopIteration);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_8);
+ __Pyx_AddTraceback("full_scores", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_generator->resume_label = -1;
+ __Pyx_Generator_clear((PyObject*)__pyx_generator);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+}
+
+/* "kenlm.pyx":149
+ * yield (ret.prob, ret.ngram_length, False)
+ *
+ * def __contains__(self, word): # <<<<<<<<<<<<<<
+ * cdef bytes w = as_str(word)
+ * return (self.vocab.Index(w) != 0)
+ */
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_13LanguageModel_10__contains__(PyObject *__pyx_v_self, PyObject *__pyx_v_word); /*proto*/
+static int __pyx_pw_5kenlm_13LanguageModel_10__contains__(PyObject *__pyx_v_self, PyObject *__pyx_v_word) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__contains__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_9__contains__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self), ((PyObject *)__pyx_v_word));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_13LanguageModel_9__contains__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_word) {
+ PyObject *__pyx_v_w = 0;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ char *__pyx_t_2;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__contains__", 0);
+
+ /* "kenlm.pyx":150
+ *
+ * def __contains__(self, word):
+ * cdef bytes w = as_str(word) # <<<<<<<<<<<<<<
+ * return (self.vocab.Index(w) != 0)
+ *
+ */
+ __pyx_t_1 = __pyx_f_5kenlm_as_str(__pyx_v_word); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 150; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_v_w = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":151
+ * def __contains__(self, word):
+ * cdef bytes w = as_str(word)
+ * return (self.vocab.Index(w) != 0) # <<<<<<<<<<<<<<
+ *
+ * def __repr__(self):
+ */
+ __pyx_t_2 = __Pyx_PyObject_AsString(__pyx_v_w); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = (__pyx_v_self->vocab->Index(__pyx_t_2) != 0);
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":149
+ * yield (ret.prob, ret.ngram_length, False)
+ *
+ * def __contains__(self, word): # <<<<<<<<<<<<<<
+ * cdef bytes w = as_str(word)
+ * return (self.vocab.Index(w) != 0)
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.LanguageModel.__contains__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_w);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":153
+ * return (self.vocab.Index(w) != 0)
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return '<LanguageModel from {0}>'.format(os.path.basename(self.path))
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_12__repr__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_12__repr__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_11__repr__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_11__repr__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__repr__", 0);
+
+ /* "kenlm.pyx":154
+ *
+ * def __repr__(self):
+ * return '<LanguageModel from {0}>'.format(os.path.basename(self.path)) # <<<<<<<<<<<<<<
+ *
+ * def __reduce__(self):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_LanguageModel_from_0, __pyx_n_s_format); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_path); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_basename); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ }
+ }
+ if (!__pyx_t_5) {
+ __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_self->path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ } else {
+ __pyx_t_6 = PyTuple_New(1+1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = NULL;
+ __Pyx_INCREF(__pyx_v_self->path);
+ PyTuple_SET_ITEM(__pyx_t_6, 0+1, __pyx_v_self->path);
+ __Pyx_GIVEREF(__pyx_v_self->path);
+ __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_6, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ }
+ }
+ if (!__pyx_t_4) {
+ __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_GOTREF(__pyx_t_1);
+ } else {
+ __pyx_t_6 = PyTuple_New(1+1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = NULL;
+ PyTuple_SET_ITEM(__pyx_t_6, 0+1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_6, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":153
+ * return (self.vocab.Index(w) != 0)
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return '<LanguageModel from {0}>'.format(os.path.basename(self.path))
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_AddTraceback("kenlm.LanguageModel.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":156
+ * return '<LanguageModel from {0}>'.format(os.path.basename(self.path))
+ *
+ * def __reduce__(self): # <<<<<<<<<<<<<<
+ * return (_kenlm.LanguageModel, (self.path,))
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_14__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_14__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_13__reduce__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_13__reduce__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__reduce__", 0);
+
+ /* "kenlm.pyx":157
+ *
+ * def __reduce__(self):
+ * return (_kenlm.LanguageModel, (self.path,)) # <<<<<<<<<<<<<<
+ *
+ * cdef class Model:
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_kenlm); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_LanguageModel); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(__pyx_v_self->path);
+ PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->path);
+ __Pyx_GIVEREF(__pyx_v_self->path);
+ __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_2 = 0;
+ __pyx_t_1 = 0;
+ __pyx_r = __pyx_t_3;
+ __pyx_t_3 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":156
+ * return '<LanguageModel from {0}>'.format(os.path.basename(self.path))
+ *
+ * def __reduce__(self): # <<<<<<<<<<<<<<
+ * return (_kenlm.LanguageModel, (self.path,))
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("kenlm.LanguageModel.__reduce__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":81
+ *
+ * cdef _kenlm.Model* model
+ * cdef public bytes path # <<<<<<<<<<<<<<
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_4path_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_13LanguageModel_4path_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_4path___get__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_13LanguageModel_4path___get__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__", 0);
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_self->path);
+ __pyx_r = __pyx_v_self->path;
+ goto __pyx_L0;
+
+ /* function exit code */
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_13LanguageModel_4path_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/
+static int __pyx_pw_5kenlm_13LanguageModel_4path_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__set__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_4path_2__set__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self), ((PyObject *)__pyx_v_value));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_13LanguageModel_4path_2__set__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self, PyObject *__pyx_v_value) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__set__", 0);
+ if (!(likely(PyBytes_CheckExact(__pyx_v_value))||((__pyx_v_value) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_value)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_t_1 = __pyx_v_value;
+ __Pyx_INCREF(__pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_v_self->path);
+ __Pyx_DECREF(__pyx_v_self->path);
+ __pyx_v_self->path = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.LanguageModel.path.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_13LanguageModel_4path_5__del__(PyObject *__pyx_v_self); /*proto*/
+static int __pyx_pw_5kenlm_13LanguageModel_4path_5__del__(PyObject *__pyx_v_self) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__del__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_13LanguageModel_4path_4__del__(((struct __pyx_obj_5kenlm_LanguageModel *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_13LanguageModel_4path_4__del__(struct __pyx_obj_5kenlm_LanguageModel *__pyx_v_self) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__del__", 0);
+ __Pyx_INCREF(Py_None);
+ __Pyx_GIVEREF(Py_None);
+ __Pyx_GOTREF(__pyx_v_self->path);
+ __Pyx_DECREF(__pyx_v_self->path);
+ __pyx_v_self->path = ((PyObject*)Py_None);
+
+ /* function exit code */
+ __pyx_r = 0;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":168
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ * def __init__(self, path): # <<<<<<<<<<<<<<
+ * """
+ * Load the language model.
+ */
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_5Model_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5kenlm_5Model___init__[] = "\n Load the language model.\n\n :param path: path to an arpa file or a kenlm binary file.\n ";
+#if CYTHON_COMPILING_IN_CPYTHON
+struct wrapperbase __pyx_wrapperbase_5kenlm_5Model___init__;
+#endif
+static int __pyx_pw_5kenlm_5Model_1__init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_path = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__init__ (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_path,0};
+ PyObject* values[1] = {0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_path)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__init__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 1) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ }
+ __pyx_v_path = values[0];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 168; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.Model.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return -1;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5kenlm_5Model___init__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), __pyx_v_path);
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_5Model___init__(struct __pyx_obj_5kenlm_Model *__pyx_v_self, PyObject *__pyx_v_path) {
+ PyObject *__pyx_v_exception = NULL;
+ PyObject *__pyx_v_exception_message = NULL;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ PyObject *__pyx_t_7 = NULL;
+ PyObject *__pyx_t_8 = NULL;
+ char *__pyx_t_9;
+ lm::base::Model *__pyx_t_10;
+ int __pyx_t_11;
+ PyObject *__pyx_t_12 = NULL;
+ Py_ssize_t __pyx_t_13;
+ PyObject *__pyx_t_14 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__init__", 0);
+
+ /* "kenlm.pyx":174
+ * :param path: path to an arpa file or a kenlm binary file.
+ * """
+ * self.path = os.path.abspath(as_str(path)) # <<<<<<<<<<<<<<
+ * try:
+ * self.model = _kenlm.LoadVirtual(self.path)
+ */
+ __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_abspath); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __pyx_f_5kenlm_as_str(__pyx_v_path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ }
+ }
+ if (!__pyx_t_4) {
+ __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_GOTREF(__pyx_t_1);
+ } else {
+ __pyx_t_5 = PyTuple_New(1+1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = NULL;
+ PyTuple_SET_ITEM(__pyx_t_5, 0+1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ if (!(likely(PyBytes_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_1)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GIVEREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_v_self->path);
+ __Pyx_DECREF(__pyx_v_self->path);
+ __pyx_v_self->path = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":175
+ * """
+ * self.path = os.path.abspath(as_str(path))
+ * try: # <<<<<<<<<<<<<<
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception:
+ */
+ {
+ __Pyx_ExceptionSave(&__pyx_t_6, &__pyx_t_7, &__pyx_t_8);
+ __Pyx_XGOTREF(__pyx_t_6);
+ __Pyx_XGOTREF(__pyx_t_7);
+ __Pyx_XGOTREF(__pyx_t_8);
+ /*try:*/ {
+
+ /* "kenlm.pyx":176
+ * self.path = os.path.abspath(as_str(path))
+ * try:
+ * self.model = _kenlm.LoadVirtual(self.path) # <<<<<<<<<<<<<<
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ')
+ */
+ __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_v_self->path); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ try {
+ __pyx_t_10 = lm::ngram::LoadVirtual(__pyx_t_9);
+ } catch(...) {
+ __Pyx_CppExn2PyErr();
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 176; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ __pyx_v_self->model = __pyx_t_10;
+ }
+ __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
+ __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
+ goto __pyx_L10_try_end;
+ __pyx_L3_error:;
+ __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":177
+ * try:
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception: # <<<<<<<<<<<<<<
+ * exception_message = str(exception).replace('\n', ' ')
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ */
+ __pyx_t_11 = PyErr_ExceptionMatches(__pyx_builtin_RuntimeError);
+ if (__pyx_t_11) {
+ __Pyx_AddTraceback("kenlm.Model.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_2, &__pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_INCREF(__pyx_t_2);
+ __pyx_v_exception = __pyx_t_2;
+
+ /* "kenlm.pyx":178
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ') # <<<<<<<<<<<<<<
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception
+ */
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(__pyx_v_exception);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_exception);
+ __Pyx_GIVEREF(__pyx_v_exception);
+ __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_replace); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_v_exception_message = __pyx_t_4;
+ __pyx_t_4 = 0;
+
+ /* "kenlm.pyx":179
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ')
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\ # <<<<<<<<<<<<<<
+ * from exception
+ * self.vocab = &self.model.BaseVocabulary()
+ */
+ __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_Cannot_read_model, __pyx_n_s_format); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_12 = NULL;
+ __pyx_t_13 = 0;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) {
+ __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_3);
+ if (likely(__pyx_t_12)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
+ __Pyx_INCREF(__pyx_t_12);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_3, function);
+ __pyx_t_13 = 1;
+ }
+ }
+ __pyx_t_14 = PyTuple_New(2+__pyx_t_13); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_14);
+ if (__pyx_t_12) {
+ PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_12); __Pyx_GIVEREF(__pyx_t_12); __pyx_t_12 = NULL;
+ }
+ __Pyx_INCREF(__pyx_v_path);
+ PyTuple_SET_ITEM(__pyx_t_14, 0+__pyx_t_13, __pyx_v_path);
+ __Pyx_GIVEREF(__pyx_v_path);
+ __Pyx_INCREF(__pyx_v_exception_message);
+ PyTuple_SET_ITEM(__pyx_t_14, 1+__pyx_t_13, __pyx_v_exception_message);
+ __Pyx_GIVEREF(__pyx_v_exception_message);
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_14, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_4);
+ __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_IOError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "kenlm.pyx":180
+ * exception_message = str(exception).replace('\n', ' ')
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception # <<<<<<<<<<<<<<
+ * self.vocab = &self.model.BaseVocabulary()
+ *
+ */
+ __Pyx_Raise(__pyx_t_4, 0, 0, __pyx_v_exception);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L5_except_error;}
+ }
+ goto __pyx_L5_except_error;
+ __pyx_L5_except_error:;
+ __Pyx_XGIVEREF(__pyx_t_6);
+ __Pyx_XGIVEREF(__pyx_t_7);
+ __Pyx_XGIVEREF(__pyx_t_8);
+ __Pyx_ExceptionReset(__pyx_t_6, __pyx_t_7, __pyx_t_8);
+ goto __pyx_L1_error;
+ __pyx_L10_try_end:;
+ }
+
+ /* "kenlm.pyx":181
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception
+ * self.vocab = &self.model.BaseVocabulary() # <<<<<<<<<<<<<<
+ *
+ * def __dealloc__(self):
+ */
+ __pyx_v_self->vocab = (&__pyx_v_self->model->BaseVocabulary());
+
+ /* "kenlm.pyx":168
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ * def __init__(self, path): # <<<<<<<<<<<<<<
+ * """
+ * Load the language model.
+ */
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_12);
+ __Pyx_XDECREF(__pyx_t_14);
+ __Pyx_AddTraceback("kenlm.Model.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_exception);
+ __Pyx_XDECREF(__pyx_v_exception_message);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":183
+ * self.vocab = &self.model.BaseVocabulary()
+ *
+ * def __dealloc__(self): # <<<<<<<<<<<<<<
+ * del self.model
+ *
+ */
+
+/* Python wrapper */
+static void __pyx_pw_5kenlm_5Model_3__dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_pw_5kenlm_5Model_3__dealloc__(PyObject *__pyx_v_self) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
+ __pyx_pf_5kenlm_5Model_2__dealloc__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+}
+
+static void __pyx_pf_5kenlm_5Model_2__dealloc__(struct __pyx_obj_5kenlm_Model *__pyx_v_self) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__dealloc__", 0);
+
+ /* "kenlm.pyx":184
+ *
+ * def __dealloc__(self):
+ * del self.model # <<<<<<<<<<<<<<
+ *
+ * property order:
+ */
+ delete __pyx_v_self->model;
+
+ /* "kenlm.pyx":183
+ * self.vocab = &self.model.BaseVocabulary()
+ *
+ * def __dealloc__(self): # <<<<<<<<<<<<<<
+ * del self.model
+ *
+ */
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+}
+
+/* "kenlm.pyx":187
+ *
+ * property order:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.model.Order()
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_5order_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_5Model_5order_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_5order___get__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_5order___get__(struct __pyx_obj_5kenlm_Model *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__get__", 0);
+
+ /* "kenlm.pyx":188
+ * property order:
+ * def __get__(self):
+ * return self.model.Order() # <<<<<<<<<<<<<<
+ *
+ * def BeginSentenceWrite(self, State state):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyInt_From_unsigned_int(__pyx_v_self->model->Order()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":187
+ *
+ * property order:
+ * def __get__(self): # <<<<<<<<<<<<<<
+ * return self.model.Order()
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.Model.order.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":190
+ * return self.model.Order()
+ *
+ * def BeginSentenceWrite(self, State state): # <<<<<<<<<<<<<<
+ * """Change the given state to a BOS state."""
+ * self.model.BeginSentenceWrite(&state._c_state)
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_5BeginSentenceWrite(PyObject *__pyx_v_self, PyObject *__pyx_v_state); /*proto*/
+static char __pyx_doc_5kenlm_5Model_4BeginSentenceWrite[] = "Change the given state to a BOS state.";
+static PyObject *__pyx_pw_5kenlm_5Model_5BeginSentenceWrite(PyObject *__pyx_v_self, PyObject *__pyx_v_state) {
+ CYTHON_UNUSED int __pyx_lineno = 0;
+ CYTHON_UNUSED const char *__pyx_filename = NULL;
+ CYTHON_UNUSED int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("BeginSentenceWrite (wrapper)", 0);
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_state), __pyx_ptype_5kenlm_State, 1, "state", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = __pyx_pf_5kenlm_5Model_4BeginSentenceWrite(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), ((struct __pyx_obj_5kenlm_State *)__pyx_v_state));
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_4BeginSentenceWrite(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_state) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("BeginSentenceWrite", 0);
+
+ /* "kenlm.pyx":192
+ * def BeginSentenceWrite(self, State state):
+ * """Change the given state to a BOS state."""
+ * self.model.BeginSentenceWrite(&state._c_state) # <<<<<<<<<<<<<<
+ *
+ * def NullContextWrite(self, State state):
+ */
+ __pyx_v_self->model->BeginSentenceWrite((&__pyx_v_state->_c_state));
+
+ /* "kenlm.pyx":190
+ * return self.model.Order()
+ *
+ * def BeginSentenceWrite(self, State state): # <<<<<<<<<<<<<<
+ * """Change the given state to a BOS state."""
+ * self.model.BeginSentenceWrite(&state._c_state)
+ */
+
+ /* function exit code */
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":194
+ * self.model.BeginSentenceWrite(&state._c_state)
+ *
+ * def NullContextWrite(self, State state): # <<<<<<<<<<<<<<
+ * """Change the given state to a NULL state."""
+ * self.model.NullContextWrite(&state._c_state)
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_7NullContextWrite(PyObject *__pyx_v_self, PyObject *__pyx_v_state); /*proto*/
+static char __pyx_doc_5kenlm_5Model_6NullContextWrite[] = "Change the given state to a NULL state.";
+static PyObject *__pyx_pw_5kenlm_5Model_7NullContextWrite(PyObject *__pyx_v_self, PyObject *__pyx_v_state) {
+ CYTHON_UNUSED int __pyx_lineno = 0;
+ CYTHON_UNUSED const char *__pyx_filename = NULL;
+ CYTHON_UNUSED int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("NullContextWrite (wrapper)", 0);
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_state), __pyx_ptype_5kenlm_State, 1, "state", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = __pyx_pf_5kenlm_5Model_6NullContextWrite(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), ((struct __pyx_obj_5kenlm_State *)__pyx_v_state));
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_6NullContextWrite(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_state) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("NullContextWrite", 0);
+
+ /* "kenlm.pyx":196
+ * def NullContextWrite(self, State state):
+ * """Change the given state to a NULL state."""
+ * self.model.NullContextWrite(&state._c_state) # <<<<<<<<<<<<<<
+ *
+ * def BaseScore(self, State in_state, str word, State out_state):
+ */
+ __pyx_v_self->model->NullContextWrite((&__pyx_v_state->_c_state));
+
+ /* "kenlm.pyx":194
+ * self.model.BeginSentenceWrite(&state._c_state)
+ *
+ * def NullContextWrite(self, State state): # <<<<<<<<<<<<<<
+ * """Change the given state to a NULL state."""
+ * self.model.NullContextWrite(&state._c_state)
+ */
+
+ /* function exit code */
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":198
+ * self.model.NullContextWrite(&state._c_state)
+ *
+ * def BaseScore(self, State in_state, str word, State out_state): # <<<<<<<<<<<<<<
+ * """
+ * Return p(word|in_state) and update the output state.
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_9BaseScore(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5kenlm_5Model_8BaseScore[] = "\n Return p(word|in_state) and update the output state.\n Wrapper around model.BaseScore(in_state, Index(word), out_state)\n\n :param word: the suffix\n :param state: the context (defaults to NullContext)\n :returns: p(word|state)\n ";
+static PyObject *__pyx_pw_5kenlm_5Model_9BaseScore(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ struct __pyx_obj_5kenlm_State *__pyx_v_in_state = 0;
+ PyObject *__pyx_v_word = 0;
+ struct __pyx_obj_5kenlm_State *__pyx_v_out_state = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("BaseScore (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_in_state,&__pyx_n_s_word,&__pyx_n_s_out_state,0};
+ PyObject* values[3] = {0,0,0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_in_state)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("BaseScore", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ case 2:
+ if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_out_state)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("BaseScore", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "BaseScore") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 3) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ }
+ __pyx_v_in_state = ((struct __pyx_obj_5kenlm_State *)values[0]);
+ __pyx_v_word = ((PyObject*)values[1]);
+ __pyx_v_out_state = ((struct __pyx_obj_5kenlm_State *)values[2]);
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("BaseScore", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.Model.BaseScore", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_in_state), __pyx_ptype_5kenlm_State, 1, "in_state", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_word), (&PyString_Type), 1, "word", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_out_state), __pyx_ptype_5kenlm_State, 1, "out_state", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = __pyx_pf_5kenlm_5Model_8BaseScore(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), __pyx_v_in_state, __pyx_v_word, __pyx_v_out_state);
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_8BaseScore(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_in_state, PyObject *__pyx_v_word, struct __pyx_obj_5kenlm_State *__pyx_v_out_state) {
+ float __pyx_v_total;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ char *__pyx_t_2;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("BaseScore", 0);
+
+ /* "kenlm.pyx":207
+ * :returns: p(word|state)
+ * """
+ * cdef float total = self.model.BaseScore(&in_state._c_state, self.vocab.Index(as_str(word)), &out_state._c_state) # <<<<<<<<<<<<<<
+ * return total
+ *
+ */
+ __pyx_t_1 = __pyx_f_5kenlm_as_str(__pyx_v_word); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 207; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 207; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_total = __pyx_v_self->model->BaseScore((&__pyx_v_in_state->_c_state), __pyx_v_self->vocab->Index(__pyx_t_2), (&__pyx_v_out_state->_c_state));
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":208
+ * """
+ * cdef float total = self.model.BaseScore(&in_state._c_state, self.vocab.Index(as_str(word)), &out_state._c_state)
+ * return total # <<<<<<<<<<<<<<
+ *
+ * def BaseFullScore(self, State in_state, str word, State out_state):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyFloat_FromDouble(__pyx_v_total); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":198
+ * self.model.NullContextWrite(&state._c_state)
+ *
+ * def BaseScore(self, State in_state, str word, State out_state): # <<<<<<<<<<<<<<
+ * """
+ * Return p(word|in_state) and update the output state.
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.Model.BaseScore", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":210
+ * return total
+ *
+ * def BaseFullScore(self, State in_state, str word, State out_state): # <<<<<<<<<<<<<<
+ * """
+ * Wrapper around model.BaseScore(in_state, Index(word), out_state)
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_11BaseFullScore(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5kenlm_5Model_10BaseFullScore[] = "\n Wrapper around model.BaseScore(in_state, Index(word), out_state)\n\n :param word: the suffix\n :param state: the context (defaults to NullContext)\n :returns: FullScoreReturn(word|state)\n ";
+static PyObject *__pyx_pw_5kenlm_5Model_11BaseFullScore(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ struct __pyx_obj_5kenlm_State *__pyx_v_in_state = 0;
+ PyObject *__pyx_v_word = 0;
+ struct __pyx_obj_5kenlm_State *__pyx_v_out_state = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("BaseFullScore (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_in_state,&__pyx_n_s_word,&__pyx_n_s_out_state,0};
+ PyObject* values[3] = {0,0,0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_in_state)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("BaseFullScore", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ case 2:
+ if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_out_state)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("BaseFullScore", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "BaseFullScore") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 3) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ }
+ __pyx_v_in_state = ((struct __pyx_obj_5kenlm_State *)values[0]);
+ __pyx_v_word = ((PyObject*)values[1]);
+ __pyx_v_out_state = ((struct __pyx_obj_5kenlm_State *)values[2]);
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("BaseFullScore", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("kenlm.Model.BaseFullScore", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_in_state), __pyx_ptype_5kenlm_State, 1, "in_state", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_word), (&PyString_Type), 1, "word", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_out_state), __pyx_ptype_5kenlm_State, 1, "out_state", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = __pyx_pf_5kenlm_5Model_10BaseFullScore(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), __pyx_v_in_state, __pyx_v_word, __pyx_v_out_state);
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_10BaseFullScore(struct __pyx_obj_5kenlm_Model *__pyx_v_self, struct __pyx_obj_5kenlm_State *__pyx_v_in_state, PyObject *__pyx_v_word, struct __pyx_obj_5kenlm_State *__pyx_v_out_state) {
+ lm::WordIndex __pyx_v_wid;
+ struct lm::FullScoreReturn __pyx_v_ret;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ char *__pyx_t_2;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("BaseFullScore", 0);
+
+ /* "kenlm.pyx":218
+ * :returns: FullScoreReturn(word|state)
+ * """
+ * cdef _kenlm.WordIndex wid = self.vocab.Index(as_str(word)) # <<<<<<<<<<<<<<
+ * cdef _kenlm.FullScoreReturn ret = self.model.BaseFullScore(&in_state._c_state, wid, &out_state._c_state)
+ * return FullScoreReturn(ret.prob, ret.ngram_length, wid == 0)
+ */
+ __pyx_t_1 = __pyx_f_5kenlm_as_str(__pyx_v_word); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyObject_AsString(__pyx_t_1); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_wid = __pyx_v_self->vocab->Index(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":219
+ * """
+ * cdef _kenlm.WordIndex wid = self.vocab.Index(as_str(word))
+ * cdef _kenlm.FullScoreReturn ret = self.model.BaseFullScore(&in_state._c_state, wid, &out_state._c_state) # <<<<<<<<<<<<<<
+ * return FullScoreReturn(ret.prob, ret.ngram_length, wid == 0)
+ *
+ */
+ __pyx_v_ret = __pyx_v_self->model->BaseFullScore((&__pyx_v_in_state->_c_state), __pyx_v_wid, (&__pyx_v_out_state->_c_state));
+
+ /* "kenlm.pyx":220
+ * cdef _kenlm.WordIndex wid = self.vocab.Index(as_str(word))
+ * cdef _kenlm.FullScoreReturn ret = self.model.BaseFullScore(&in_state._c_state, wid, &out_state._c_state)
+ * return FullScoreReturn(ret.prob, ret.ngram_length, wid == 0) # <<<<<<<<<<<<<<
+ *
+ * def __contains__(self, word):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyFloat_FromDouble(__pyx_v_ret.prob); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 220; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = __Pyx_PyInt_From_unsigned_char(__pyx_v_ret.ngram_length); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 220; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = __Pyx_PyBool_FromLong((__pyx_v_wid == 0)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 220; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 220; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_4);
+ __pyx_t_1 = 0;
+ __pyx_t_3 = 0;
+ __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5kenlm_FullScoreReturn)), __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 220; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_r = __pyx_t_4;
+ __pyx_t_4 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":210
+ * return total
+ *
+ * def BaseFullScore(self, State in_state, str word, State out_state): # <<<<<<<<<<<<<<
+ * """
+ * Wrapper around model.BaseScore(in_state, Index(word), out_state)
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_AddTraceback("kenlm.Model.BaseFullScore", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":222
+ * return FullScoreReturn(ret.prob, ret.ngram_length, wid == 0)
+ *
+ * def __contains__(self, word): # <<<<<<<<<<<<<<
+ * cdef bytes w = as_str(word)
+ * return (self.vocab.Index(w) != 0)
+ */
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_5Model_13__contains__(PyObject *__pyx_v_self, PyObject *__pyx_v_word); /*proto*/
+static int __pyx_pw_5kenlm_5Model_13__contains__(PyObject *__pyx_v_self, PyObject *__pyx_v_word) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__contains__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_12__contains__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), ((PyObject *)__pyx_v_word));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_5Model_12__contains__(struct __pyx_obj_5kenlm_Model *__pyx_v_self, PyObject *__pyx_v_word) {
+ PyObject *__pyx_v_w = 0;
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ char *__pyx_t_2;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__contains__", 0);
+
+ /* "kenlm.pyx":223
+ *
+ * def __contains__(self, word):
+ * cdef bytes w = as_str(word) # <<<<<<<<<<<<<<
+ * return (self.vocab.Index(w) != 0)
+ *
+ */
+ __pyx_t_1 = __pyx_f_5kenlm_as_str(__pyx_v_word); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 223; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_v_w = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":224
+ * def __contains__(self, word):
+ * cdef bytes w = as_str(word)
+ * return (self.vocab.Index(w) != 0) # <<<<<<<<<<<<<<
+ *
+ * def __repr__(self):
+ */
+ __pyx_t_2 = __Pyx_PyObject_AsString(__pyx_v_w); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = (__pyx_v_self->vocab->Index(__pyx_t_2) != 0);
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":222
+ * return FullScoreReturn(ret.prob, ret.ngram_length, wid == 0)
+ *
+ * def __contains__(self, word): # <<<<<<<<<<<<<<
+ * cdef bytes w = as_str(word)
+ * return (self.vocab.Index(w) != 0)
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.Model.__contains__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_w);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":226
+ * return (self.vocab.Index(w) != 0)
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return '<Model from {0}>'.format(os.path.basename(self.path))
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_15__repr__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_5Model_15__repr__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_14__repr__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_14__repr__(struct __pyx_obj_5kenlm_Model *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__repr__", 0);
+
+ /* "kenlm.pyx":227
+ *
+ * def __repr__(self):
+ * return '<Model from {0}>'.format(os.path.basename(self.path)) # <<<<<<<<<<<<<<
+ *
+ * def __reduce__(self):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_Model_from_0, __pyx_n_s_format); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_path); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_basename); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_4))) {
+ __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
+ if (likely(__pyx_t_5)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
+ __Pyx_INCREF(__pyx_t_5);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_4, function);
+ }
+ }
+ if (!__pyx_t_5) {
+ __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_self->path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ } else {
+ __pyx_t_6 = PyTuple_New(1+1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = NULL;
+ __Pyx_INCREF(__pyx_v_self->path);
+ PyTuple_SET_ITEM(__pyx_t_6, 0+1, __pyx_v_self->path);
+ __Pyx_GIVEREF(__pyx_v_self->path);
+ __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_6, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = NULL;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_2))) {
+ __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
+ if (likely(__pyx_t_4)) {
+ PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
+ __Pyx_INCREF(__pyx_t_4);
+ __Pyx_INCREF(function);
+ __Pyx_DECREF_SET(__pyx_t_2, function);
+ }
+ }
+ if (!__pyx_t_4) {
+ __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_GOTREF(__pyx_t_1);
+ } else {
+ __pyx_t_6 = PyTuple_New(1+1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = NULL;
+ PyTuple_SET_ITEM(__pyx_t_6, 0+1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_6, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":226
+ * return (self.vocab.Index(w) != 0)
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return '<Model from {0}>'.format(os.path.basename(self.path))
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_AddTraceback("kenlm.Model.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":229
+ * return '<Model from {0}>'.format(os.path.basename(self.path))
+ *
+ * def __reduce__(self): # <<<<<<<<<<<<<<
+ * return (_kenlm.LanguageModel, (self.path,))
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_17__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw_5kenlm_5Model_17__reduce__(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__reduce__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_16__reduce__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_16__reduce__(struct __pyx_obj_5kenlm_Model *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__reduce__", 0);
+
+ /* "kenlm.pyx":230
+ *
+ * def __reduce__(self):
+ * return (_kenlm.LanguageModel, (self.path,)) # <<<<<<<<<<<<<<
+ *
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_kenlm); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_LanguageModel); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(__pyx_v_self->path);
+ PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->path);
+ __Pyx_GIVEREF(__pyx_v_self->path);
+ __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_2 = 0;
+ __pyx_t_1 = 0;
+ __pyx_r = __pyx_t_3;
+ __pyx_t_3 = 0;
+ goto __pyx_L0;
+
+ /* "kenlm.pyx":229
+ * return '<Model from {0}>'.format(os.path.basename(self.path))
+ *
+ * def __reduce__(self): # <<<<<<<<<<<<<<
+ * return (_kenlm.LanguageModel, (self.path,))
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("kenlm.Model.__reduce__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "kenlm.pyx":165
+ *
+ * cdef _kenlm.Model* model
+ * cdef public bytes path # <<<<<<<<<<<<<<
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5kenlm_5Model_4path_1__get__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5kenlm_5Model_4path_1__get__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_4path___get__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5kenlm_5Model_4path___get__(struct __pyx_obj_5kenlm_Model *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__get__", 0);
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_v_self->path);
+ __pyx_r = __pyx_v_self->path;
+ goto __pyx_L0;
+
+ /* function exit code */
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_5Model_4path_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value); /*proto*/
+static int __pyx_pw_5kenlm_5Model_4path_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_value) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__set__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_4path_2__set__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self), ((PyObject *)__pyx_v_value));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_5Model_4path_2__set__(struct __pyx_obj_5kenlm_Model *__pyx_v_self, PyObject *__pyx_v_value) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__set__", 0);
+ if (!(likely(PyBytes_CheckExact(__pyx_v_value))||((__pyx_v_value) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_value)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_t_1 = __pyx_v_value;
+ __Pyx_INCREF(__pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __Pyx_GOTREF(__pyx_v_self->path);
+ __Pyx_DECREF(__pyx_v_self->path);
+ __pyx_v_self->path = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* function exit code */
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("kenlm.Model.path.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5kenlm_5Model_4path_5__del__(PyObject *__pyx_v_self); /*proto*/
+static int __pyx_pw_5kenlm_5Model_4path_5__del__(PyObject *__pyx_v_self) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__del__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5kenlm_5Model_4path_4__del__(((struct __pyx_obj_5kenlm_Model *)__pyx_v_self));
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5kenlm_5Model_4path_4__del__(struct __pyx_obj_5kenlm_Model *__pyx_v_self) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__del__", 0);
+ __Pyx_INCREF(Py_None);
+ __Pyx_GIVEREF(Py_None);
+ __Pyx_GOTREF(__pyx_v_self->path);
+ __Pyx_DECREF(__pyx_v_self->path);
+ __pyx_v_self->path = ((PyObject*)Py_None);
+
+ /* function exit code */
+ __pyx_r = 0;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_tp_new_5kenlm_FullScoreReturn(PyTypeObject *t, PyObject *a, PyObject *k) {
+ PyObject *o;
+ if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+ o = (*t->tp_alloc)(t, 0);
+ } else {
+ o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+ }
+ if (unlikely(!o)) return 0;
+ if (unlikely(__pyx_pw_5kenlm_15FullScoreReturn_1__cinit__(o, a, k) < 0)) {
+ Py_DECREF(o); o = 0;
+ }
+ return o;
+}
+
+static void __pyx_tp_dealloc_5kenlm_FullScoreReturn(PyObject *o) {
+ #if PY_VERSION_HEX >= 0x030400a1
+ if (unlikely(Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))) {
+ if (PyObject_CallFinalizerFromDealloc(o)) return;
+ }
+ #endif
+ (*Py_TYPE(o)->tp_free)(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_15FullScoreReturn_log_prob(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_15FullScoreReturn_8log_prob_1__get__(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_15FullScoreReturn_ngram_length(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_15FullScoreReturn_12ngram_length_1__get__(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_15FullScoreReturn_oov(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_15FullScoreReturn_3oov_1__get__(o);
+}
+
+static PyMethodDef __pyx_methods_5kenlm_FullScoreReturn[] = {
+ {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets_5kenlm_FullScoreReturn[] = {
+ {(char *)"log_prob", __pyx_getprop_5kenlm_15FullScoreReturn_log_prob, 0, 0, 0},
+ {(char *)"ngram_length", __pyx_getprop_5kenlm_15FullScoreReturn_ngram_length, 0, 0, 0},
+ {(char *)"oov", __pyx_getprop_5kenlm_15FullScoreReturn_oov, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+};
+
+static PyTypeObject __pyx_type_5kenlm_FullScoreReturn = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "kenlm.FullScoreReturn", /*tp_name*/
+ sizeof(struct __pyx_obj_5kenlm_FullScoreReturn), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5kenlm_FullScoreReturn, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ __pyx_pw_5kenlm_15FullScoreReturn_3__repr__, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "\n Wrapper around FullScoreReturn.\n\n Notes:\n `prob` has been renamed to `log_prob`\n `oov` has been added to flag whether the word is OOV\n ", /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_5kenlm_FullScoreReturn, /*tp_methods*/
+ 0, /*tp_members*/
+ __pyx_getsets_5kenlm_FullScoreReturn, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5kenlm_FullScoreReturn, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+ #if PY_VERSION_HEX >= 0x030400a1
+ 0, /*tp_finalize*/
+ #endif
+};
+
+static PyObject *__pyx_tp_new_5kenlm_State(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+ struct __pyx_obj_5kenlm_State *p;
+ PyObject *o;
+ if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+ o = (*t->tp_alloc)(t, 0);
+ } else {
+ o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+ }
+ if (unlikely(!o)) return 0;
+ p = ((struct __pyx_obj_5kenlm_State *)o);
+ new((void*)&(p->_c_state)) lm::ngram::State();
+ return o;
+}
+
+static void __pyx_tp_dealloc_5kenlm_State(PyObject *o) {
+ struct __pyx_obj_5kenlm_State *p = (struct __pyx_obj_5kenlm_State *)o;
+ #if PY_VERSION_HEX >= 0x030400a1
+ if (unlikely(Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))) {
+ if (PyObject_CallFinalizerFromDealloc(o)) return;
+ }
+ #endif
+ __Pyx_call_destructor(&p->_c_state);
+ (*Py_TYPE(o)->tp_free)(o);
+}
+
+static PyMethodDef __pyx_methods_5kenlm_State[] = {
+ {0, 0, 0, 0}
+};
+
+static PyTypeObject __pyx_type_5kenlm_State = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "kenlm.State", /*tp_name*/
+ sizeof(struct __pyx_obj_5kenlm_State), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5kenlm_State, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ __pyx_pw_5kenlm_5State_3__hash__, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "\n Wrapper around lm::ngram::State so that python code can make incremental queries.\n\n Notes:\n * rich comparisons \n * hashable\n ", /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ __pyx_pw_5kenlm_5State_1__richcmp__, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_5kenlm_State, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5kenlm_State, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+ #if PY_VERSION_HEX >= 0x030400a1
+ 0, /*tp_finalize*/
+ #endif
+};
+
+static PyObject *__pyx_tp_new_5kenlm_LanguageModel(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+ struct __pyx_obj_5kenlm_LanguageModel *p;
+ PyObject *o;
+ if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+ o = (*t->tp_alloc)(t, 0);
+ } else {
+ o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+ }
+ if (unlikely(!o)) return 0;
+ p = ((struct __pyx_obj_5kenlm_LanguageModel *)o);
+ p->path = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ return o;
+}
+
+static void __pyx_tp_dealloc_5kenlm_LanguageModel(PyObject *o) {
+ struct __pyx_obj_5kenlm_LanguageModel *p = (struct __pyx_obj_5kenlm_LanguageModel *)o;
+ #if PY_VERSION_HEX >= 0x030400a1
+ if (unlikely(Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))) {
+ if (PyObject_CallFinalizerFromDealloc(o)) return;
+ }
+ #endif
+ {
+ PyObject *etype, *eval, *etb;
+ PyErr_Fetch(&etype, &eval, &etb);
+ ++Py_REFCNT(o);
+ __pyx_pw_5kenlm_13LanguageModel_3__dealloc__(o);
+ --Py_REFCNT(o);
+ PyErr_Restore(etype, eval, etb);
+ }
+ Py_CLEAR(p->path);
+ (*Py_TYPE(o)->tp_free)(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_13LanguageModel_order(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_13LanguageModel_5order_1__get__(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_13LanguageModel_path(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_13LanguageModel_4path_1__get__(o);
+}
+
+static int __pyx_setprop_5kenlm_13LanguageModel_path(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) {
+ if (v) {
+ return __pyx_pw_5kenlm_13LanguageModel_4path_3__set__(o, v);
+ }
+ else {
+ return __pyx_pw_5kenlm_13LanguageModel_4path_5__del__(o);
+ }
+}
+
+static PyMethodDef __pyx_methods_5kenlm_LanguageModel[] = {
+ {"score", (PyCFunction)__pyx_pw_5kenlm_13LanguageModel_5score, METH_VARARGS|METH_KEYWORDS, 0},
+ {"full_scores", (PyCFunction)__pyx_pw_5kenlm_13LanguageModel_7full_scores, METH_VARARGS|METH_KEYWORDS, __pyx_doc_5kenlm_13LanguageModel_6full_scores},
+ {"__reduce__", (PyCFunction)__pyx_pw_5kenlm_13LanguageModel_14__reduce__, METH_NOARGS, 0},
+ {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets_5kenlm_LanguageModel[] = {
+ {(char *)"order", __pyx_getprop_5kenlm_13LanguageModel_order, 0, 0, 0},
+ {(char *)"path", __pyx_getprop_5kenlm_13LanguageModel_path, __pyx_setprop_5kenlm_13LanguageModel_path, 0, 0},
+ {0, 0, 0, 0, 0}
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_LanguageModel = {
+ 0, /*sq_length*/
+ 0, /*sq_concat*/
+ 0, /*sq_repeat*/
+ 0, /*sq_item*/
+ 0, /*sq_slice*/
+ 0, /*sq_ass_item*/
+ 0, /*sq_ass_slice*/
+ __pyx_pw_5kenlm_13LanguageModel_10__contains__, /*sq_contains*/
+ 0, /*sq_inplace_concat*/
+ 0, /*sq_inplace_repeat*/
+};
+
+static PyTypeObject __pyx_type_5kenlm_LanguageModel = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "kenlm.LanguageModel", /*tp_name*/
+ sizeof(struct __pyx_obj_5kenlm_LanguageModel), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5kenlm_LanguageModel, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ __pyx_pw_5kenlm_13LanguageModel_12__repr__, /*tp_repr*/
+ 0, /*tp_as_number*/
+ &__pyx_tp_as_sequence_LanguageModel, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "\n This is not a strict wrapper, the interface is more pythonic.\n It loads models and queries full sentences.\n ", /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_5kenlm_LanguageModel, /*tp_methods*/
+ 0, /*tp_members*/
+ __pyx_getsets_5kenlm_LanguageModel, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ __pyx_pw_5kenlm_13LanguageModel_1__init__, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5kenlm_LanguageModel, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+ #if PY_VERSION_HEX >= 0x030400a1
+ 0, /*tp_finalize*/
+ #endif
+};
+
+static PyObject *__pyx_tp_new_5kenlm_Model(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+ struct __pyx_obj_5kenlm_Model *p;
+ PyObject *o;
+ if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {
+ o = (*t->tp_alloc)(t, 0);
+ } else {
+ o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0);
+ }
+ if (unlikely(!o)) return 0;
+ p = ((struct __pyx_obj_5kenlm_Model *)o);
+ p->path = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ return o;
+}
+
+static void __pyx_tp_dealloc_5kenlm_Model(PyObject *o) {
+ struct __pyx_obj_5kenlm_Model *p = (struct __pyx_obj_5kenlm_Model *)o;
+ #if PY_VERSION_HEX >= 0x030400a1
+ if (unlikely(Py_TYPE(o)->tp_finalize) && (!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))) {
+ if (PyObject_CallFinalizerFromDealloc(o)) return;
+ }
+ #endif
+ {
+ PyObject *etype, *eval, *etb;
+ PyErr_Fetch(&etype, &eval, &etb);
+ ++Py_REFCNT(o);
+ __pyx_pw_5kenlm_5Model_3__dealloc__(o);
+ --Py_REFCNT(o);
+ PyErr_Restore(etype, eval, etb);
+ }
+ Py_CLEAR(p->path);
+ (*Py_TYPE(o)->tp_free)(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_5Model_order(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_5Model_5order_1__get__(o);
+}
+
+static PyObject *__pyx_getprop_5kenlm_5Model_path(PyObject *o, CYTHON_UNUSED void *x) {
+ return __pyx_pw_5kenlm_5Model_4path_1__get__(o);
+}
+
+static int __pyx_setprop_5kenlm_5Model_path(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) {
+ if (v) {
+ return __pyx_pw_5kenlm_5Model_4path_3__set__(o, v);
+ }
+ else {
+ return __pyx_pw_5kenlm_5Model_4path_5__del__(o);
+ }
+}
+
+static PyMethodDef __pyx_methods_5kenlm_Model[] = {
+ {"BeginSentenceWrite", (PyCFunction)__pyx_pw_5kenlm_5Model_5BeginSentenceWrite, METH_O, __pyx_doc_5kenlm_5Model_4BeginSentenceWrite},
+ {"NullContextWrite", (PyCFunction)__pyx_pw_5kenlm_5Model_7NullContextWrite, METH_O, __pyx_doc_5kenlm_5Model_6NullContextWrite},
+ {"BaseScore", (PyCFunction)__pyx_pw_5kenlm_5Model_9BaseScore, METH_VARARGS|METH_KEYWORDS, __pyx_doc_5kenlm_5Model_8BaseScore},
+ {"BaseFullScore", (PyCFunction)__pyx_pw_5kenlm_5Model_11BaseFullScore, METH_VARARGS|METH_KEYWORDS, __pyx_doc_5kenlm_5Model_10BaseFullScore},
+ {"__reduce__", (PyCFunction)__pyx_pw_5kenlm_5Model_17__reduce__, METH_NOARGS, 0},
+ {0, 0, 0, 0}
+};
+
+static struct PyGetSetDef __pyx_getsets_5kenlm_Model[] = {
+ {(char *)"order", __pyx_getprop_5kenlm_5Model_order, 0, 0, 0},
+ {(char *)"path", __pyx_getprop_5kenlm_5Model_path, __pyx_setprop_5kenlm_5Model_path, 0, 0},
+ {0, 0, 0, 0, 0}
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_Model = {
+ 0, /*sq_length*/
+ 0, /*sq_concat*/
+ 0, /*sq_repeat*/
+ 0, /*sq_item*/
+ 0, /*sq_slice*/
+ 0, /*sq_ass_item*/
+ 0, /*sq_ass_slice*/
+ __pyx_pw_5kenlm_5Model_13__contains__, /*sq_contains*/
+ 0, /*sq_inplace_concat*/
+ 0, /*sq_inplace_repeat*/
+};
+
+static PyTypeObject __pyx_type_5kenlm_Model = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "kenlm.Model", /*tp_name*/
+ sizeof(struct __pyx_obj_5kenlm_Model), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5kenlm_Model, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ __pyx_pw_5kenlm_5Model_15__repr__, /*tp_repr*/
+ 0, /*tp_as_number*/
+ &__pyx_tp_as_sequence_Model, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "\n This is closer to a wrapper around lm::ngram::Model.\n ", /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_5kenlm_Model, /*tp_methods*/
+ 0, /*tp_members*/
+ __pyx_getsets_5kenlm_Model, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ __pyx_pw_5kenlm_5Model_1__init__, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5kenlm_Model, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+ #if PY_VERSION_HEX >= 0x030400a1
+ 0, /*tp_finalize*/
+ #endif
+};
+
+static struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *__pyx_freelist_5kenlm___pyx_scope_struct__full_scores[8];
+static int __pyx_freecount_5kenlm___pyx_scope_struct__full_scores = 0;
+
+static PyObject *__pyx_tp_new_5kenlm___pyx_scope_struct__full_scores(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) {
+ struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *p;
+ PyObject *o;
+ if (CYTHON_COMPILING_IN_CPYTHON && likely((__pyx_freecount_5kenlm___pyx_scope_struct__full_scores > 0) & (t->tp_basicsize == sizeof(struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores)))) {
+ o = (PyObject*)__pyx_freelist_5kenlm___pyx_scope_struct__full_scores[--__pyx_freecount_5kenlm___pyx_scope_struct__full_scores];
+ memset(o, 0, sizeof(struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores));
+ (void) PyObject_INIT(o, t);
+ PyObject_GC_Track(o);
+ } else {
+ o = (*t->tp_alloc)(t, 0);
+ if (unlikely(!o)) return 0;
+ }
+ p = ((struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)o);
+ new((void*)&(p->__pyx_v_out_state)) lm::ngram::State();
+ new((void*)&(p->__pyx_v_state)) lm::ngram::State();
+ return o;
+}
+
+static void __pyx_tp_dealloc_5kenlm___pyx_scope_struct__full_scores(PyObject *o) {
+ struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *p = (struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)o;
+ PyObject_GC_UnTrack(o);
+ __Pyx_call_destructor(&p->__pyx_v_out_state);
+ __Pyx_call_destructor(&p->__pyx_v_state);
+ Py_CLEAR(p->__pyx_v_bos);
+ Py_CLEAR(p->__pyx_v_eos);
+ Py_CLEAR(p->__pyx_v_self);
+ Py_CLEAR(p->__pyx_v_sentence);
+ Py_CLEAR(p->__pyx_v_word);
+ Py_CLEAR(p->__pyx_v_words);
+ Py_CLEAR(p->__pyx_t_0);
+ if (CYTHON_COMPILING_IN_CPYTHON && ((__pyx_freecount_5kenlm___pyx_scope_struct__full_scores < 8) & (Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores)))) {
+ __pyx_freelist_5kenlm___pyx_scope_struct__full_scores[__pyx_freecount_5kenlm___pyx_scope_struct__full_scores++] = ((struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)o);
+ } else {
+ (*Py_TYPE(o)->tp_free)(o);
+ }
+}
+
+static int __pyx_tp_traverse_5kenlm___pyx_scope_struct__full_scores(PyObject *o, visitproc v, void *a) {
+ int e;
+ struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *p = (struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)o;
+ if (p->__pyx_v_bos) {
+ e = (*v)(p->__pyx_v_bos, a); if (e) return e;
+ }
+ if (p->__pyx_v_eos) {
+ e = (*v)(p->__pyx_v_eos, a); if (e) return e;
+ }
+ if (p->__pyx_v_self) {
+ e = (*v)(((PyObject*)p->__pyx_v_self), a); if (e) return e;
+ }
+ if (p->__pyx_v_sentence) {
+ e = (*v)(p->__pyx_v_sentence, a); if (e) return e;
+ }
+ if (p->__pyx_v_word) {
+ e = (*v)(p->__pyx_v_word, a); if (e) return e;
+ }
+ if (p->__pyx_v_words) {
+ e = (*v)(p->__pyx_v_words, a); if (e) return e;
+ }
+ if (p->__pyx_t_0) {
+ e = (*v)(p->__pyx_t_0, a); if (e) return e;
+ }
+ return 0;
+}
+
+static int __pyx_tp_clear_5kenlm___pyx_scope_struct__full_scores(PyObject *o) {
+ PyObject* tmp;
+ struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *p = (struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores *)o;
+ tmp = ((PyObject*)p->__pyx_v_bos);
+ p->__pyx_v_bos = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->__pyx_v_eos);
+ p->__pyx_v_eos = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->__pyx_v_self);
+ p->__pyx_v_self = ((struct __pyx_obj_5kenlm_LanguageModel *)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->__pyx_v_sentence);
+ p->__pyx_v_sentence = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->__pyx_v_word);
+ p->__pyx_v_word = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->__pyx_v_words);
+ p->__pyx_v_words = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->__pyx_t_0);
+ p->__pyx_t_0 = Py_None; Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ return 0;
+}
+
+static PyTypeObject __pyx_type_5kenlm___pyx_scope_struct__full_scores = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "kenlm.__pyx_scope_struct__full_scores", /*tp_name*/
+ sizeof(struct __pyx_obj_5kenlm___pyx_scope_struct__full_scores), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5kenlm___pyx_scope_struct__full_scores, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ 0, /*tp_doc*/
+ __pyx_tp_traverse_5kenlm___pyx_scope_struct__full_scores, /*tp_traverse*/
+ __pyx_tp_clear_5kenlm___pyx_scope_struct__full_scores, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ 0, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5kenlm___pyx_scope_struct__full_scores, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+ #if PY_VERSION_HEX >= 0x030400a1
+ 0, /*tp_finalize*/
+ #endif
+};
+
+static PyMethodDef __pyx_methods[] = {
+ {0, 0, 0, 0}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef __pyx_moduledef = {
+ #if PY_VERSION_HEX < 0x03020000
+ { PyObject_HEAD_INIT(NULL) NULL, 0, NULL },
+ #else
+ PyModuleDef_HEAD_INIT,
+ #endif
+ "kenlm",
+ 0, /* m_doc */
+ -1, /* m_size */
+ __pyx_methods /* m_methods */,
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL /* m_free */
+};
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+ {&__pyx_kp_s_0_1_2_3, __pyx_k_0_1_2_3, sizeof(__pyx_k_0_1_2_3), 0, 0, 1, 0},
+ {&__pyx_kp_s_Cannot_convert_s_to_string, __pyx_k_Cannot_convert_s_to_string, sizeof(__pyx_k_Cannot_convert_s_to_string), 0, 0, 1, 0},
+ {&__pyx_kp_s_Cannot_read_model, __pyx_k_Cannot_read_model, sizeof(__pyx_k_Cannot_read_model), 0, 0, 1, 0},
+ {&__pyx_n_s_IOError, __pyx_k_IOError, sizeof(__pyx_k_IOError), 0, 0, 1, 1},
+ {&__pyx_n_s_LanguageModel, __pyx_k_LanguageModel, sizeof(__pyx_k_LanguageModel), 0, 0, 1, 1},
+ {&__pyx_kp_s_LanguageModel_from_0, __pyx_k_LanguageModel_from_0, sizeof(__pyx_k_LanguageModel_from_0), 0, 0, 1, 0},
+ {&__pyx_n_s_LanguageModel_full_scores, __pyx_k_LanguageModel_full_scores, sizeof(__pyx_k_LanguageModel_full_scores), 0, 0, 1, 1},
+ {&__pyx_kp_s_Model_from_0, __pyx_k_Model_from_0, sizeof(__pyx_k_Model_from_0), 0, 0, 1, 0},
+ {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1},
+ {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1},
+ {&__pyx_kp_s__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 0, 1, 0},
+ {&__pyx_kp_s__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 0, 1, 0},
+ {&__pyx_n_s_abspath, __pyx_k_abspath, sizeof(__pyx_k_abspath), 0, 0, 1, 1},
+ {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1},
+ {&__pyx_n_s_basename, __pyx_k_basename, sizeof(__pyx_k_basename), 0, 0, 1, 1},
+ {&__pyx_n_s_bos, __pyx_k_bos, sizeof(__pyx_k_bos), 0, 0, 1, 1},
+ {&__pyx_n_s_class, __pyx_k_class, sizeof(__pyx_k_class), 0, 0, 1, 1},
+ {&__pyx_n_s_close, __pyx_k_close, sizeof(__pyx_k_close), 0, 0, 1, 1},
+ {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1},
+ {&__pyx_n_s_eos, __pyx_k_eos, sizeof(__pyx_k_eos), 0, 0, 1, 1},
+ {&__pyx_n_s_format, __pyx_k_format, sizeof(__pyx_k_format), 0, 0, 1, 1},
+ {&__pyx_n_s_full_scores, __pyx_k_full_scores, sizeof(__pyx_k_full_scores), 0, 0, 1, 1},
+ {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
+ {&__pyx_n_s_in_state, __pyx_k_in_state, sizeof(__pyx_k_in_state), 0, 0, 1, 1},
+ {&__pyx_n_s_kenlm, __pyx_k_kenlm, sizeof(__pyx_k_kenlm), 0, 0, 1, 1},
+ {&__pyx_n_s_log_prob, __pyx_k_log_prob, sizeof(__pyx_k_log_prob), 0, 0, 1, 1},
+ {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
+ {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
+ {&__pyx_n_s_ngram_length, __pyx_k_ngram_length, sizeof(__pyx_k_ngram_length), 0, 0, 1, 1},
+ {&__pyx_n_s_oov, __pyx_k_oov, sizeof(__pyx_k_oov), 0, 0, 1, 1},
+ {&__pyx_n_s_os, __pyx_k_os, sizeof(__pyx_k_os), 0, 0, 1, 1},
+ {&__pyx_n_s_out_state, __pyx_k_out_state, sizeof(__pyx_k_out_state), 0, 0, 1, 1},
+ {&__pyx_n_s_path, __pyx_k_path, sizeof(__pyx_k_path), 0, 0, 1, 1},
+ {&__pyx_n_s_replace, __pyx_k_replace, sizeof(__pyx_k_replace), 0, 0, 1, 1},
+ {&__pyx_n_s_send, __pyx_k_send, sizeof(__pyx_k_send), 0, 0, 1, 1},
+ {&__pyx_n_s_sentence, __pyx_k_sentence, sizeof(__pyx_k_sentence), 0, 0, 1, 1},
+ {&__pyx_n_s_split, __pyx_k_split, sizeof(__pyx_k_split), 0, 0, 1, 1},
+ {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
+ {&__pyx_n_s_throw, __pyx_k_throw, sizeof(__pyx_k_throw), 0, 0, 1, 1},
+ {&__pyx_n_s_utf8, __pyx_k_utf8, sizeof(__pyx_k_utf8), 0, 0, 1, 1},
+ {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1},
+ {0, 0, 0, 0, 0, 0, 0}
+};
+static int __Pyx_InitCachedBuiltins(void) {
+ __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_builtin_IOError = __Pyx_GetBuiltinName(__pyx_n_s_IOError); if (!__pyx_builtin_IOError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ return 0;
+ __pyx_L1_error:;
+ return -1;
+}
+
+static int __Pyx_InitCachedConstants(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+ /* "kenlm.pyx":8
+ * return data
+ * elif isinstance(data, unicode):
+ * return data.encode('utf8') # <<<<<<<<<<<<<<
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ *
+ */
+ __pyx_tuple_ = PyTuple_Pack(1, __pyx_n_s_utf8); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_tuple_);
+ __Pyx_GIVEREF(__pyx_tuple_);
+
+ /* "kenlm.pyx":94
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ') # <<<<<<<<<<<<<<
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception
+ */
+ __pyx_tuple__4 = PyTuple_Pack(2, __pyx_kp_s__2, __pyx_kp_s__3); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_tuple__4);
+ __Pyx_GIVEREF(__pyx_tuple__4);
+
+ /* "kenlm.pyx":178
+ * self.model = _kenlm.LoadVirtual(self.path)
+ * except RuntimeError as exception:
+ * exception_message = str(exception).replace('\n', ' ') # <<<<<<<<<<<<<<
+ * raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ * from exception
+ */
+ __pyx_tuple__5 = PyTuple_Pack(2, __pyx_kp_s__2, __pyx_kp_s__3); if (unlikely(!__pyx_tuple__5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_tuple__5);
+ __Pyx_GIVEREF(__pyx_tuple__5);
+ __Pyx_RefNannyFinishContext();
+ return 0;
+ __pyx_L1_error:;
+ __Pyx_RefNannyFinishContext();
+ return -1;
+}
+
+static int __Pyx_InitGlobals(void) {
+ if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ return 0;
+ __pyx_L1_error:;
+ return -1;
+}
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initkenlm(void); /*proto*/
+PyMODINIT_FUNC initkenlm(void)
+#else
+PyMODINIT_FUNC PyInit_kenlm(void); /*proto*/
+PyMODINIT_FUNC PyInit_kenlm(void)
+#endif
+{
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannyDeclarations
+ #if CYTHON_REFNANNY
+ __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+ if (!__Pyx_RefNanny) {
+ PyErr_Clear();
+ __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+ if (!__Pyx_RefNanny)
+ Py_FatalError("failed to import 'refnanny' module");
+ }
+ #endif
+ __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_kenlm(void)", 0);
+ if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #ifdef __Pyx_CyFunction_USED
+ if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ #ifdef __Pyx_FusedFunction_USED
+ if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ #ifdef __Pyx_Generator_USED
+ if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ /*--- Library function declarations ---*/
+ /*--- Threads initialization code ---*/
+ #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+ #ifdef WITH_THREAD /* Python build with threading support? */
+ PyEval_InitThreads();
+ #endif
+ #endif
+ /*--- Module creation code ---*/
+ #if PY_MAJOR_VERSION < 3
+ __pyx_m = Py_InitModule4("kenlm", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
+ #else
+ __pyx_m = PyModule_Create(&__pyx_moduledef);
+ #endif
+ if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ Py_INCREF(__pyx_d);
+ __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #if CYTHON_COMPILING_IN_PYPY
+ Py_INCREF(__pyx_b);
+ #endif
+ if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ /*--- Initialize various global constants etc. ---*/
+ if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+ if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ if (__pyx_module_is_main_kenlm) {
+ if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ }
+ #if PY_MAJOR_VERSION >= 3
+ {
+ PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (!PyDict_GetItemString(modules, "kenlm")) {
+ if (unlikely(PyDict_SetItemString(modules, "kenlm", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ }
+ #endif
+ /*--- Builtin init code ---*/
+ if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ /*--- Constants init code ---*/
+ if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ /*--- Global init code ---*/
+ /*--- Variable export code ---*/
+ /*--- Function export code ---*/
+ /*--- Type init code ---*/
+ if (PyType_Ready(&__pyx_type_5kenlm_FullScoreReturn) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_type_5kenlm_FullScoreReturn.tp_print = 0;
+ if (PyObject_SetAttrString(__pyx_m, "FullScoreReturn", (PyObject *)&__pyx_type_5kenlm_FullScoreReturn) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_ptype_5kenlm_FullScoreReturn = &__pyx_type_5kenlm_FullScoreReturn;
+ if (PyType_Ready(&__pyx_type_5kenlm_State) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_type_5kenlm_State.tp_print = 0;
+ if (PyObject_SetAttrString(__pyx_m, "State", (PyObject *)&__pyx_type_5kenlm_State) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_ptype_5kenlm_State = &__pyx_type_5kenlm_State;
+ if (PyType_Ready(&__pyx_type_5kenlm_LanguageModel) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_type_5kenlm_LanguageModel.tp_print = 0;
+ #if CYTHON_COMPILING_IN_CPYTHON
+ {
+ PyObject *wrapper = PyObject_GetAttrString((PyObject *)&__pyx_type_5kenlm_LanguageModel, "__init__"); if (unlikely(!wrapper)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (Py_TYPE(wrapper) == &PyWrapperDescr_Type) {
+ __pyx_wrapperbase_5kenlm_13LanguageModel___init__ = *((PyWrapperDescrObject *)wrapper)->d_base;
+ __pyx_wrapperbase_5kenlm_13LanguageModel___init__.doc = __pyx_doc_5kenlm_13LanguageModel___init__;
+ ((PyWrapperDescrObject *)wrapper)->d_base = &__pyx_wrapperbase_5kenlm_13LanguageModel___init__;
+ }
+ }
+ #endif
+ if (PyObject_SetAttrString(__pyx_m, "LanguageModel", (PyObject *)&__pyx_type_5kenlm_LanguageModel) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_ptype_5kenlm_LanguageModel = &__pyx_type_5kenlm_LanguageModel;
+ if (PyType_Ready(&__pyx_type_5kenlm_Model) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_type_5kenlm_Model.tp_print = 0;
+ #if CYTHON_COMPILING_IN_CPYTHON
+ {
+ PyObject *wrapper = PyObject_GetAttrString((PyObject *)&__pyx_type_5kenlm_Model, "__init__"); if (unlikely(!wrapper)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (Py_TYPE(wrapper) == &PyWrapperDescr_Type) {
+ __pyx_wrapperbase_5kenlm_5Model___init__ = *((PyWrapperDescrObject *)wrapper)->d_base;
+ __pyx_wrapperbase_5kenlm_5Model___init__.doc = __pyx_doc_5kenlm_5Model___init__;
+ ((PyWrapperDescrObject *)wrapper)->d_base = &__pyx_wrapperbase_5kenlm_5Model___init__;
+ }
+ }
+ #endif
+ if (PyObject_SetAttrString(__pyx_m, "Model", (PyObject *)&__pyx_type_5kenlm_Model) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_ptype_5kenlm_Model = &__pyx_type_5kenlm_Model;
+ if (PyType_Ready(&__pyx_type_5kenlm___pyx_scope_struct__full_scores) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_type_5kenlm___pyx_scope_struct__full_scores.tp_print = 0;
+ __pyx_ptype_5kenlm___pyx_scope_struct__full_scores = &__pyx_type_5kenlm___pyx_scope_struct__full_scores;
+ /*--- Type import code ---*/
+ /*--- Variable import code ---*/
+ /*--- Function import code ---*/
+ /*--- Execution code ---*/
+
+ /* "kenlm.pyx":1
+ * import os # <<<<<<<<<<<<<<
+ * cimport _kenlm
+ *
+ */
+ __pyx_t_1 = __Pyx_Import(__pyx_n_s_os, 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_os, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "kenlm.pyx":165
+ *
+ * cdef _kenlm.Model* model
+ * cdef public bytes path # <<<<<<<<<<<<<<
+ * cdef _kenlm.const_Vocabulary* vocab
+ *
+ */
+ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /*--- Wrapped vars code ---*/
+
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ if (__pyx_m) {
+ if (__pyx_d) {
+ __Pyx_AddTraceback("init kenlm", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ }
+ Py_DECREF(__pyx_m); __pyx_m = 0;
+ } else if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ImportError, "init kenlm");
+ }
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ #if PY_MAJOR_VERSION < 3
+ return;
+ #else
+ return __pyx_m;
+ #endif
+}
+
+/* --- Runtime support code --- */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+ PyObject *m = NULL, *p = NULL;
+ void *r = NULL;
+ m = PyImport_ImportModule((char *)modname);
+ if (!m) goto end;
+ p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
+ if (!p) goto end;
+ r = PyLong_AsVoidPtr(p);
+end:
+ Py_XDECREF(p);
+ Py_XDECREF(m);
+ return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif
+
+static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+ PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
+ if (unlikely(!result)) {
+ PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+ "name '%U' is not defined", name);
+#else
+ "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+ }
+ return result;
+}
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+ PyObject *result;
+ ternaryfunc call = func->ob_type->tp_call;
+ if (unlikely(!call))
+ return PyObject_Call(func, arg, kw);
+ if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+ return NULL;
+ result = (*call)(func, arg, kw);
+ Py_LeaveRecursiveCall();
+ if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+ PyErr_SetString(
+ PyExc_SystemError,
+ "NULL result without error in PyObject_Call");
+ }
+ return result;
+}
+#endif
+
+static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ PyThreadState *tstate = PyThreadState_GET();
+ tmp_type = tstate->curexc_type;
+ tmp_value = tstate->curexc_value;
+ tmp_tb = tstate->curexc_traceback;
+ tstate->curexc_type = type;
+ tstate->curexc_value = value;
+ tstate->curexc_traceback = tb;
+ Py_XDECREF(tmp_type);
+ Py_XDECREF(tmp_value);
+ Py_XDECREF(tmp_tb);
+#else
+ PyErr_Restore(type, value, tb);
+#endif
+}
+static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyThreadState *tstate = PyThreadState_GET();
+ *type = tstate->curexc_type;
+ *value = tstate->curexc_value;
+ *tb = tstate->curexc_traceback;
+ tstate->curexc_type = 0;
+ tstate->curexc_value = 0;
+ tstate->curexc_traceback = 0;
+#else
+ PyErr_Fetch(type, value, tb);
+#endif
+}
+
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+ CYTHON_UNUSED PyObject *cause) {
+ Py_XINCREF(type);
+ if (!value || value == Py_None)
+ value = NULL;
+ else
+ Py_INCREF(value);
+ if (!tb || tb == Py_None)
+ tb = NULL;
+ else {
+ Py_INCREF(tb);
+ if (!PyTraceBack_Check(tb)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: arg 3 must be a traceback or None");
+ goto raise_error;
+ }
+ }
+ if (PyType_Check(type)) {
+#if CYTHON_COMPILING_IN_PYPY
+ if (!value) {
+ Py_INCREF(Py_None);
+ value = Py_None;
+ }
+#endif
+ PyErr_NormalizeException(&type, &value, &tb);
+ } else {
+ if (value) {
+ PyErr_SetString(PyExc_TypeError,
+ "instance exception may not have a separate value");
+ goto raise_error;
+ }
+ value = type;
+ type = (PyObject*) Py_TYPE(type);
+ Py_INCREF(type);
+ if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception class must be a subclass of BaseException");
+ goto raise_error;
+ }
+ }
+ __Pyx_ErrRestore(type, value, tb);
+ return;
+raise_error:
+ Py_XDECREF(value);
+ Py_XDECREF(type);
+ Py_XDECREF(tb);
+ return;
+}
+#else
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+ PyObject* owned_instance = NULL;
+ if (tb == Py_None) {
+ tb = 0;
+ } else if (tb && !PyTraceBack_Check(tb)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: arg 3 must be a traceback or None");
+ goto bad;
+ }
+ if (value == Py_None)
+ value = 0;
+ if (PyExceptionInstance_Check(type)) {
+ if (value) {
+ PyErr_SetString(PyExc_TypeError,
+ "instance exception may not have a separate value");
+ goto bad;
+ }
+ value = type;
+ type = (PyObject*) Py_TYPE(value);
+ } else if (PyExceptionClass_Check(type)) {
+ PyObject *instance_class = NULL;
+ if (value && PyExceptionInstance_Check(value)) {
+ instance_class = (PyObject*) Py_TYPE(value);
+ if (instance_class != type) {
+ if (PyObject_IsSubclass(instance_class, type)) {
+ type = instance_class;
+ } else {
+ instance_class = NULL;
+ }
+ }
+ }
+ if (!instance_class) {
+ PyObject *args;
+ if (!value)
+ args = PyTuple_New(0);
+ else if (PyTuple_Check(value)) {
+ Py_INCREF(value);
+ args = value;
+ } else
+ args = PyTuple_Pack(1, value);
+ if (!args)
+ goto bad;
+ owned_instance = PyObject_Call(type, args, NULL);
+ Py_DECREF(args);
+ if (!owned_instance)
+ goto bad;
+ value = owned_instance;
+ if (!PyExceptionInstance_Check(value)) {
+ PyErr_Format(PyExc_TypeError,
+ "calling %R should have returned an instance of "
+ "BaseException, not %R",
+ type, Py_TYPE(value));
+ goto bad;
+ }
+ }
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception class must be a subclass of BaseException");
+ goto bad;
+ }
+#if PY_VERSION_HEX >= 0x03030000
+ if (cause) {
+#else
+ if (cause && cause != Py_None) {
+#endif
+ PyObject *fixed_cause;
+ if (cause == Py_None) {
+ fixed_cause = NULL;
+ } else if (PyExceptionClass_Check(cause)) {
+ fixed_cause = PyObject_CallObject(cause, NULL);
+ if (fixed_cause == NULL)
+ goto bad;
+ } else if (PyExceptionInstance_Check(cause)) {
+ fixed_cause = cause;
+ Py_INCREF(fixed_cause);
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "exception causes must derive from "
+ "BaseException");
+ goto bad;
+ }
+ PyException_SetCause(value, fixed_cause);
+ }
+ PyErr_SetObject(type, value);
+ if (tb) {
+#if CYTHON_COMPILING_IN_PYPY
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ PyErr_Fetch(tmp_type, tmp_value, tmp_tb);
+ Py_INCREF(tb);
+ PyErr_Restore(tmp_type, tmp_value, tb);
+ Py_XDECREF(tmp_tb);
+#else
+ PyThreadState *tstate = PyThreadState_GET();
+ PyObject* tmp_tb = tstate->curexc_traceback;
+ if (tb != tmp_tb) {
+ Py_INCREF(tb);
+ tstate->curexc_traceback = tb;
+ Py_XDECREF(tmp_tb);
+ }
+#endif
+ }
+bad:
+ Py_XDECREF(owned_instance);
+ return;
+}
+#endif
+
+static void __Pyx_RaiseArgtupleInvalid(
+ const char* func_name,
+ int exact,
+ Py_ssize_t num_min,
+ Py_ssize_t num_max,
+ Py_ssize_t num_found)
+{
+ Py_ssize_t num_expected;
+ const char *more_or_less;
+ if (num_found < num_min) {
+ num_expected = num_min;
+ more_or_less = "at least";
+ } else {
+ num_expected = num_max;
+ more_or_less = "at most";
+ }
+ if (exact) {
+ more_or_less = "exactly";
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+ func_name, more_or_less, num_expected,
+ (num_expected == 1) ? "" : "s", num_found);
+}
+
+static void __Pyx_RaiseDoubleKeywordsError(
+ const char* func_name,
+ PyObject* kw_name)
+{
+ PyErr_Format(PyExc_TypeError,
+ #if PY_MAJOR_VERSION >= 3
+ "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+ #else
+ "%s() got multiple values for keyword argument '%s'", func_name,
+ PyString_AsString(kw_name));
+ #endif
+}
+
+static int __Pyx_ParseOptionalKeywords(
+ PyObject *kwds,
+ PyObject **argnames[],
+ PyObject *kwds2,
+ PyObject *values[],
+ Py_ssize_t num_pos_args,
+ const char* function_name)
+{
+ PyObject *key = 0, *value = 0;
+ Py_ssize_t pos = 0;
+ PyObject*** name;
+ PyObject*** first_kw_arg = argnames + num_pos_args;
+ while (PyDict_Next(kwds, &pos, &key, &value)) {
+ name = first_kw_arg;
+ while (*name && (**name != key)) name++;
+ if (*name) {
+ values[name-argnames] = value;
+ continue;
+ }
+ name = first_kw_arg;
+ #if PY_MAJOR_VERSION < 3
+ if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
+ while (*name) {
+ if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+ && _PyString_Eq(**name, key)) {
+ values[name-argnames] = value;
+ break;
+ }
+ name++;
+ }
+ if (*name) continue;
+ else {
+ PyObject*** argname = argnames;
+ while (argname != first_kw_arg) {
+ if ((**argname == key) || (
+ (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+ && _PyString_Eq(**argname, key))) {
+ goto arg_passed_twice;
+ }
+ argname++;
+ }
+ }
+ } else
+ #endif
+ if (likely(PyUnicode_Check(key))) {
+ while (*name) {
+ int cmp = (**name == key) ? 0 :
+ #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+ (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
+ #endif
+ PyUnicode_Compare(**name, key);
+ if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+ if (cmp == 0) {
+ values[name-argnames] = value;
+ break;
+ }
+ name++;
+ }
+ if (*name) continue;
+ else {
+ PyObject*** argname = argnames;
+ while (argname != first_kw_arg) {
+ int cmp = (**argname == key) ? 0 :
+ #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+ (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
+ #endif
+ PyUnicode_Compare(**argname, key);
+ if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+ if (cmp == 0) goto arg_passed_twice;
+ argname++;
+ }
+ }
+ } else
+ goto invalid_keyword_type;
+ if (kwds2) {
+ if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+ } else {
+ goto invalid_keyword;
+ }
+ }
+ return 0;
+arg_passed_twice:
+ __Pyx_RaiseDoubleKeywordsError(function_name, key);
+ goto bad;
+invalid_keyword_type:
+ PyErr_Format(PyExc_TypeError,
+ "%.200s() keywords must be strings", function_name);
+ goto bad;
+invalid_keyword:
+ PyErr_Format(PyExc_TypeError,
+ #if PY_MAJOR_VERSION < 3
+ "%.200s() got an unexpected keyword argument '%.200s'",
+ function_name, PyString_AsString(key));
+ #else
+ "%s() got an unexpected keyword argument '%U'",
+ function_name, key);
+ #endif
+bad:
+ return -1;
+}
+
+static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) {
+ PyErr_Format(PyExc_TypeError,
+ "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)",
+ name, type->tp_name, Py_TYPE(obj)->tp_name);
+}
+static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+ const char *name, int exact)
+{
+ if (unlikely(!type)) {
+ PyErr_SetString(PyExc_SystemError, "Missing type object");
+ return 0;
+ }
+ if (none_allowed && obj == Py_None) return 1;
+ else if (exact) {
+ if (likely(Py_TYPE(obj) == type)) return 1;
+ #if PY_MAJOR_VERSION == 2
+ else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
+ #endif
+ }
+ else {
+ if (likely(PyObject_TypeCheck(obj, type))) return 1;
+ }
+ __Pyx_RaiseArgumentTypeInvalid(name, obj, type);
+ return 0;
+}
+
+static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) {
+ PyObject *result;
+#if CYTHON_COMPILING_IN_CPYTHON
+ result = PyDict_GetItem(__pyx_d, name);
+ if (likely(result)) {
+ Py_INCREF(result);
+ } else {
+#else
+ result = PyObject_GetItem(__pyx_d, name);
+ if (!result) {
+ PyErr_Clear();
+#endif
+ result = __Pyx_GetBuiltinName(name);
+ }
+ return result;
+}
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+ PyObject *self, *result;
+ PyCFunction cfunc;
+ cfunc = PyCFunction_GET_FUNCTION(func);
+ self = PyCFunction_GET_SELF(func);
+ if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+ return NULL;
+ result = cfunc(self, arg);
+ Py_LeaveRecursiveCall();
+ if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+ PyErr_SetString(
+ PyExc_SystemError,
+ "NULL result without error in PyObject_Call");
+ }
+ return result;
+}
+#endif
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+ PyObject *result;
+ PyObject *args = PyTuple_New(1);
+ if (unlikely(!args)) return NULL;
+ Py_INCREF(arg);
+ PyTuple_SET_ITEM(args, 0, arg);
+ result = __Pyx_PyObject_Call(func, args, NULL);
+ Py_DECREF(args);
+ return result;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+#ifdef __Pyx_CyFunction_USED
+ if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) {
+#else
+ if (likely(PyCFunction_Check(func))) {
+#endif
+ if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) {
+ return __Pyx_PyObject_CallMethO(func, arg);
+ }
+ }
+ return __Pyx__PyObject_CallOneArg(func, arg);
+}
+#else
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+ PyObject* args = PyTuple_Pack(1, arg);
+ return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL;
+}
+#endif
+
+static CYTHON_INLINE void __Pyx_ExceptionSave(PyObject **type, PyObject **value, PyObject **tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyThreadState *tstate = PyThreadState_GET();
+ *type = tstate->exc_type;
+ *value = tstate->exc_value;
+ *tb = tstate->exc_traceback;
+ Py_XINCREF(*type);
+ Py_XINCREF(*value);
+ Py_XINCREF(*tb);
+#else
+ PyErr_GetExcInfo(type, value, tb);
+#endif
+}
+static void __Pyx_ExceptionReset(PyObject *type, PyObject *value, PyObject *tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ PyThreadState *tstate = PyThreadState_GET();
+ tmp_type = tstate->exc_type;
+ tmp_value = tstate->exc_value;
+ tmp_tb = tstate->exc_traceback;
+ tstate->exc_type = type;
+ tstate->exc_value = value;
+ tstate->exc_traceback = tb;
+ Py_XDECREF(tmp_type);
+ Py_XDECREF(tmp_value);
+ Py_XDECREF(tmp_tb);
+#else
+ PyErr_SetExcInfo(type, value, tb);
+#endif
+}
+
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) {
+ PyObject *local_type, *local_value, *local_tb;
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ PyThreadState *tstate = PyThreadState_GET();
+ local_type = tstate->curexc_type;
+ local_value = tstate->curexc_value;
+ local_tb = tstate->curexc_traceback;
+ tstate->curexc_type = 0;
+ tstate->curexc_value = 0;
+ tstate->curexc_traceback = 0;
+#else
+ PyErr_Fetch(&local_type, &local_value, &local_tb);
+#endif
+ PyErr_NormalizeException(&local_type, &local_value, &local_tb);
+#if CYTHON_COMPILING_IN_CPYTHON
+ if (unlikely(tstate->curexc_type))
+#else
+ if (unlikely(PyErr_Occurred()))
+#endif
+ goto bad;
+ #if PY_MAJOR_VERSION >= 3
+ if (local_tb) {
+ if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0))
+ goto bad;
+ }
+ #endif
+ Py_XINCREF(local_tb);
+ Py_XINCREF(local_type);
+ Py_XINCREF(local_value);
+ *type = local_type;
+ *value = local_value;
+ *tb = local_tb;
+#if CYTHON_COMPILING_IN_CPYTHON
+ tmp_type = tstate->exc_type;
+ tmp_value = tstate->exc_value;
+ tmp_tb = tstate->exc_traceback;
+ tstate->exc_type = local_type;
+ tstate->exc_value = local_value;
+ tstate->exc_traceback = local_tb;
+ Py_XDECREF(tmp_type);
+ Py_XDECREF(tmp_value);
+ Py_XDECREF(tmp_tb);
+#else
+ PyErr_SetExcInfo(local_type, local_value, local_tb);
+#endif
+ return 0;
+bad:
+ *type = 0;
+ *value = 0;
+ *tb = 0;
+ Py_XDECREF(local_type);
+ Py_XDECREF(local_value);
+ Py_XDECREF(local_tb);
+ return -1;
+}
+
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
+#ifdef __Pyx_CyFunction_USED
+ if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) {
+#else
+ if (likely(PyCFunction_Check(func))) {
+#endif
+ if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
+ return __Pyx_PyObject_CallMethO(func, NULL);
+ }
+ }
+ return __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL);
+}
+#endif
+
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+ int start = 0, mid = 0, end = count - 1;
+ if (end >= 0 && code_line > entries[end].code_line) {
+ return count;
+ }
+ while (start < end) {
+ mid = (start + end) / 2;
+ if (code_line < entries[mid].code_line) {
+ end = mid;
+ } else if (code_line > entries[mid].code_line) {
+ start = mid + 1;
+ } else {
+ return mid;
+ }
+ }
+ if (code_line <= entries[mid].code_line) {
+ return mid;
+ } else {
+ return mid + 1;
+ }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+ PyCodeObject* code_object;
+ int pos;
+ if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+ return NULL;
+ }
+ pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+ if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+ return NULL;
+ }
+ code_object = __pyx_code_cache.entries[pos].code_object;
+ Py_INCREF(code_object);
+ return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+ int pos, i;
+ __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+ if (unlikely(!code_line)) {
+ return;
+ }
+ if (unlikely(!entries)) {
+ entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+ if (likely(entries)) {
+ __pyx_code_cache.entries = entries;
+ __pyx_code_cache.max_count = 64;
+ __pyx_code_cache.count = 1;
+ entries[0].code_line = code_line;
+ entries[0].code_object = code_object;
+ Py_INCREF(code_object);
+ }
+ return;
+ }
+ pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+ if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+ PyCodeObject* tmp = entries[pos].code_object;
+ entries[pos].code_object = code_object;
+ Py_DECREF(tmp);
+ return;
+ }
+ if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+ int new_max = __pyx_code_cache.max_count + 64;
+ entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+ __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
+ if (unlikely(!entries)) {
+ return;
+ }
+ __pyx_code_cache.entries = entries;
+ __pyx_code_cache.max_count = new_max;
+ }
+ for (i=__pyx_code_cache.count; i>pos; i--) {
+ entries[i] = entries[i-1];
+ }
+ entries[pos].code_line = code_line;
+ entries[pos].code_object = code_object;
+ __pyx_code_cache.count++;
+ Py_INCREF(code_object);
+}
+
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+ const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyCodeObject *py_code = 0;
+ PyObject *py_srcfile = 0;
+ PyObject *py_funcname = 0;
+ #if PY_MAJOR_VERSION < 3
+ py_srcfile = PyString_FromString(filename);
+ #else
+ py_srcfile = PyUnicode_FromString(filename);
+ #endif
+ if (!py_srcfile) goto bad;
+ if (c_line) {
+ #if PY_MAJOR_VERSION < 3
+ py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+ #else
+ py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+ #endif
+ }
+ else {
+ #if PY_MAJOR_VERSION < 3
+ py_funcname = PyString_FromString(funcname);
+ #else
+ py_funcname = PyUnicode_FromString(funcname);
+ #endif
+ }
+ if (!py_funcname) goto bad;
+ py_code = __Pyx_PyCode_New(
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ __pyx_empty_bytes, /*PyObject *code,*/
+ __pyx_empty_tuple, /*PyObject *consts,*/
+ __pyx_empty_tuple, /*PyObject *names,*/
+ __pyx_empty_tuple, /*PyObject *varnames,*/
+ __pyx_empty_tuple, /*PyObject *freevars,*/
+ __pyx_empty_tuple, /*PyObject *cellvars,*/
+ py_srcfile, /*PyObject *filename,*/
+ py_funcname, /*PyObject *name,*/
+ py_line,
+ __pyx_empty_bytes /*PyObject *lnotab*/
+ );
+ Py_DECREF(py_srcfile);
+ Py_DECREF(py_funcname);
+ return py_code;
+bad:
+ Py_XDECREF(py_srcfile);
+ Py_XDECREF(py_funcname);
+ return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyCodeObject *py_code = 0;
+ PyFrameObject *py_frame = 0;
+ py_code = __pyx_find_code_object(c_line ? c_line : py_line);
+ if (!py_code) {
+ py_code = __Pyx_CreateCodeObjectForTraceback(
+ funcname, c_line, py_line, filename);
+ if (!py_code) goto bad;
+ __pyx_insert_code_object(c_line ? c_line : py_line, py_code);
+ }
+ py_frame = PyFrame_New(
+ PyThreadState_GET(), /*PyThreadState *tstate,*/
+ py_code, /*PyCodeObject *code,*/
+ __pyx_d, /*PyObject *globals,*/
+ 0 /*PyObject *locals*/
+ );
+ if (!py_frame) goto bad;
+ py_frame->f_lineno = py_line;
+ PyTraceBack_Here(py_frame);
+bad:
+ Py_XDECREF(py_code);
+ Py_XDECREF(py_frame);
+}
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+ PyObject *empty_list = 0;
+ PyObject *module = 0;
+ PyObject *global_dict = 0;
+ PyObject *empty_dict = 0;
+ PyObject *list;
+ #if PY_VERSION_HEX < 0x03030000
+ PyObject *py_import;
+ py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
+ if (!py_import)
+ goto bad;
+ #endif
+ if (from_list)
+ list = from_list;
+ else {
+ empty_list = PyList_New(0);
+ if (!empty_list)
+ goto bad;
+ list = empty_list;
+ }
+ global_dict = PyModule_GetDict(__pyx_m);
+ if (!global_dict)
+ goto bad;
+ empty_dict = PyDict_New();
+ if (!empty_dict)
+ goto bad;
+ {
+ #if PY_MAJOR_VERSION >= 3
+ if (level == -1) {
+ if (strchr(__Pyx_MODULE_NAME, '.')) {
+ #if PY_VERSION_HEX < 0x03030000
+ PyObject *py_level = PyInt_FromLong(1);
+ if (!py_level)
+ goto bad;
+ module = PyObject_CallFunctionObjArgs(py_import,
+ name, global_dict, empty_dict, list, py_level, NULL);
+ Py_DECREF(py_level);
+ #else
+ module = PyImport_ImportModuleLevelObject(
+ name, global_dict, empty_dict, list, 1);
+ #endif
+ if (!module) {
+ if (!PyErr_ExceptionMatches(PyExc_ImportError))
+ goto bad;
+ PyErr_Clear();
+ }
+ }
+ level = 0;
+ }
+ #endif
+ if (!module) {
+ #if PY_VERSION_HEX < 0x03030000
+ PyObject *py_level = PyInt_FromLong(level);
+ if (!py_level)
+ goto bad;
+ module = PyObject_CallFunctionObjArgs(py_import,
+ name, global_dict, empty_dict, list, py_level, NULL);
+ Py_DECREF(py_level);
+ #else
+ module = PyImport_ImportModuleLevelObject(
+ name, global_dict, empty_dict, list, level);
+ #endif
+ }
+ }
+bad:
+ #if PY_VERSION_HEX < 0x03030000
+ Py_XDECREF(py_import);
+ #endif
+ Py_XDECREF(empty_list);
+ Py_XDECREF(empty_dict);
+ return module;
+}
+
+#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value) \
+ { \
+ func_type value = func_value; \
+ if (sizeof(target_type) < sizeof(func_type)) { \
+ if (unlikely(value != (func_type) (target_type) value)) { \
+ func_type zero = 0; \
+ if (is_unsigned && unlikely(value < zero)) \
+ goto raise_neg_overflow; \
+ else \
+ goto raise_overflow; \
+ } \
+ } \
+ return (target_type) value; \
+ }
+
+#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_USE_PYLONG_INTERNALS
+ #include "longintrepr.h"
+ #endif
+#endif
+
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
+ const int neg_one = (int) -1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_Check(x))) {
+ if (sizeof(int) < sizeof(long)) {
+ __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
+ } else {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ goto raise_neg_overflow;
+ }
+ return (int) val;
+ }
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_USE_PYLONG_INTERNALS
+ switch (Py_SIZE(x)) {
+ case 0: return 0;
+ case 1: __PYX_VERIFY_RETURN_INT(int, digit, ((PyLongObject*)x)->ob_digit[0]);
+ }
+ #endif
+#endif
+ if (unlikely(Py_SIZE(x) < 0)) {
+ goto raise_neg_overflow;
+ }
+ if (sizeof(int) <= sizeof(unsigned long)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long, PyLong_AsUnsignedLong(x))
+ } else if (sizeof(int) <= sizeof(unsigned long long)) {
+ __PYX_VERIFY_RETURN_INT(int, unsigned long long, PyLong_AsUnsignedLongLong(x))
+ }
+ } else {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_USE_PYLONG_INTERNALS
+ switch (Py_SIZE(x)) {
+ case 0: return 0;
+ case 1: __PYX_VERIFY_RETURN_INT(int, digit, +(((PyLongObject*)x)->ob_digit[0]));
+ case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]);
+ }
+ #endif
+#endif
+ if (sizeof(int) <= sizeof(long)) {
+ __PYX_VERIFY_RETURN_INT(int, long, PyLong_AsLong(x))
+ } else if (sizeof(int) <= sizeof(long long)) {
+ __PYX_VERIFY_RETURN_INT(int, long long, PyLong_AsLongLong(x))
+ }
+ }
+ {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+ PyErr_SetString(PyExc_RuntimeError,
+ "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+ int val;
+ PyObject *v = __Pyx_PyNumber_Int(x);
+ #if PY_MAJOR_VERSION < 3
+ if (likely(v) && !PyLong_Check(v)) {
+ PyObject *tmp = v;
+ v = PyNumber_Long(tmp);
+ Py_DECREF(tmp);
+ }
+ #endif
+ if (likely(v)) {
+ int one = 1; int is_little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&val;
+ int ret = _PyLong_AsByteArray((PyLongObject *)v,
+ bytes, sizeof(val),
+ is_little, !is_unsigned);
+ Py_DECREF(v);
+ if (likely(!ret))
+ return val;
+ }
+#endif
+ return (int) -1;
+ }
+ } else {
+ int val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (int) -1;
+ val = __Pyx_PyInt_As_int(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+raise_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "value too large to convert to int");
+ return (int) -1;
+raise_neg_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to int");
+ return (int) -1;
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
+ const int neg_one = (int) -1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (is_unsigned) {
+ if (sizeof(int) < sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(int) <= sizeof(unsigned long)) {
+ return PyLong_FromUnsignedLong((unsigned long) value);
+ } else if (sizeof(int) <= sizeof(unsigned long long)) {
+ return PyLong_FromUnsignedLongLong((unsigned long long) value);
+ }
+ } else {
+ if (sizeof(int) <= sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(int) <= sizeof(long long)) {
+ return PyLong_FromLongLong((long long) value);
+ }
+ }
+ {
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&value;
+ return _PyLong_FromByteArray(bytes, sizeof(int),
+ little, !is_unsigned);
+ }
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) {
+ const unsigned int neg_one = (unsigned int) -1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (is_unsigned) {
+ if (sizeof(unsigned int) < sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(unsigned int) <= sizeof(unsigned long)) {
+ return PyLong_FromUnsignedLong((unsigned long) value);
+ } else if (sizeof(unsigned int) <= sizeof(unsigned long long)) {
+ return PyLong_FromUnsignedLongLong((unsigned long long) value);
+ }
+ } else {
+ if (sizeof(unsigned int) <= sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(unsigned int) <= sizeof(long long)) {
+ return PyLong_FromLongLong((long long) value);
+ }
+ }
+ {
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&value;
+ return _PyLong_FromByteArray(bytes, sizeof(unsigned int),
+ little, !is_unsigned);
+ }
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_char(unsigned char value) {
+ const unsigned char neg_one = (unsigned char) -1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (is_unsigned) {
+ if (sizeof(unsigned char) < sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(unsigned char) <= sizeof(unsigned long)) {
+ return PyLong_FromUnsignedLong((unsigned long) value);
+ } else if (sizeof(unsigned char) <= sizeof(unsigned long long)) {
+ return PyLong_FromUnsignedLongLong((unsigned long long) value);
+ }
+ } else {
+ if (sizeof(unsigned char) <= sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(unsigned char) <= sizeof(long long)) {
+ return PyLong_FromLongLong((long long) value);
+ }
+ }
+ {
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&value;
+ return _PyLong_FromByteArray(bytes, sizeof(unsigned char),
+ little, !is_unsigned);
+ }
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
+ const long neg_one = (long) -1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (is_unsigned) {
+ if (sizeof(long) < sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(long) <= sizeof(unsigned long)) {
+ return PyLong_FromUnsignedLong((unsigned long) value);
+ } else if (sizeof(long) <= sizeof(unsigned long long)) {
+ return PyLong_FromUnsignedLongLong((unsigned long long) value);
+ }
+ } else {
+ if (sizeof(long) <= sizeof(long)) {
+ return PyInt_FromLong((long) value);
+ } else if (sizeof(long) <= sizeof(long long)) {
+ return PyLong_FromLongLong((long long) value);
+ }
+ }
+ {
+ int one = 1; int little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&value;
+ return _PyLong_FromByteArray(bytes, sizeof(long),
+ little, !is_unsigned);
+ }
+}
+
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
+ const long neg_one = (long) -1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_Check(x))) {
+ if (sizeof(long) < sizeof(long)) {
+ __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
+ } else {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ goto raise_neg_overflow;
+ }
+ return (long) val;
+ }
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_USE_PYLONG_INTERNALS
+ switch (Py_SIZE(x)) {
+ case 0: return 0;
+ case 1: __PYX_VERIFY_RETURN_INT(long, digit, ((PyLongObject*)x)->ob_digit[0]);
+ }
+ #endif
+#endif
+ if (unlikely(Py_SIZE(x) < 0)) {
+ goto raise_neg_overflow;
+ }
+ if (sizeof(long) <= sizeof(unsigned long)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long, PyLong_AsUnsignedLong(x))
+ } else if (sizeof(long) <= sizeof(unsigned long long)) {
+ __PYX_VERIFY_RETURN_INT(long, unsigned long long, PyLong_AsUnsignedLongLong(x))
+ }
+ } else {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_USE_PYLONG_INTERNALS
+ switch (Py_SIZE(x)) {
+ case 0: return 0;
+ case 1: __PYX_VERIFY_RETURN_INT(long, digit, +(((PyLongObject*)x)->ob_digit[0]));
+ case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, -(sdigit) ((PyLongObject*)x)->ob_digit[0]);
+ }
+ #endif
+#endif
+ if (sizeof(long) <= sizeof(long)) {
+ __PYX_VERIFY_RETURN_INT(long, long, PyLong_AsLong(x))
+ } else if (sizeof(long) <= sizeof(long long)) {
+ __PYX_VERIFY_RETURN_INT(long, long long, PyLong_AsLongLong(x))
+ }
+ }
+ {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+ PyErr_SetString(PyExc_RuntimeError,
+ "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+ long val;
+ PyObject *v = __Pyx_PyNumber_Int(x);
+ #if PY_MAJOR_VERSION < 3
+ if (likely(v) && !PyLong_Check(v)) {
+ PyObject *tmp = v;
+ v = PyNumber_Long(tmp);
+ Py_DECREF(tmp);
+ }
+ #endif
+ if (likely(v)) {
+ int one = 1; int is_little = (int)*(unsigned char *)&one;
+ unsigned char *bytes = (unsigned char *)&val;
+ int ret = _PyLong_AsByteArray((PyLongObject *)v,
+ bytes, sizeof(val),
+ is_little, !is_unsigned);
+ Py_DECREF(v);
+ if (likely(!ret))
+ return val;
+ }
+#endif
+ return (long) -1;
+ }
+ } else {
+ long val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (long) -1;
+ val = __Pyx_PyInt_As_long(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+raise_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "value too large to convert to long");
+ return (long) -1;
+raise_neg_overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to long");
+ return (long) -1;
+}
+
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) {
+ PyObject* fake_module;
+ PyTypeObject* cached_type = NULL;
+ fake_module = PyImport_AddModule((char*) "_cython_" CYTHON_ABI);
+ if (!fake_module) return NULL;
+ Py_INCREF(fake_module);
+ cached_type = (PyTypeObject*) PyObject_GetAttrString(fake_module, type->tp_name);
+ if (cached_type) {
+ if (!PyType_Check((PyObject*)cached_type)) {
+ PyErr_Format(PyExc_TypeError,
+ "Shared Cython type %.200s is not a type object",
+ type->tp_name);
+ goto bad;
+ }
+ if (cached_type->tp_basicsize != type->tp_basicsize) {
+ PyErr_Format(PyExc_TypeError,
+ "Shared Cython type %.200s has the wrong size, try recompiling",
+ type->tp_name);
+ goto bad;
+ }
+ } else {
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
+ PyErr_Clear();
+ if (PyType_Ready(type) < 0) goto bad;
+ if (PyObject_SetAttrString(fake_module, type->tp_name, (PyObject*) type) < 0)
+ goto bad;
+ Py_INCREF(type);
+ cached_type = type;
+ }
+done:
+ Py_DECREF(fake_module);
+ return cached_type;
+bad:
+ Py_XDECREF(cached_type);
+ cached_type = NULL;
+ goto done;
+}
+
+static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) {
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyThreadState *tstate = PyThreadState_GET();
+ tmp_type = tstate->exc_type;
+ tmp_value = tstate->exc_value;
+ tmp_tb = tstate->exc_traceback;
+ tstate->exc_type = *type;
+ tstate->exc_value = *value;
+ tstate->exc_traceback = *tb;
+#else
+ PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb);
+ PyErr_SetExcInfo(*type, *value, *tb);
+#endif
+ *type = tmp_type;
+ *value = tmp_value;
+ *tb = tmp_tb;
+}
+
+static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) {
+ PyObject *method, *result = NULL;
+ method = __Pyx_PyObject_GetAttrStr(obj, method_name);
+ if (unlikely(!method)) goto bad;
+#if CYTHON_COMPILING_IN_CPYTHON
+ if (likely(PyMethod_Check(method))) {
+ PyObject *self = PyMethod_GET_SELF(method);
+ if (likely(self)) {
+ PyObject *args;
+ PyObject *function = PyMethod_GET_FUNCTION(method);
+ args = PyTuple_New(2);
+ if (unlikely(!args)) goto bad;
+ Py_INCREF(self);
+ PyTuple_SET_ITEM(args, 0, self);
+ Py_INCREF(arg);
+ PyTuple_SET_ITEM(args, 1, arg);
+ Py_INCREF(function);
+ Py_DECREF(method); method = NULL;
+ result = __Pyx_PyObject_Call(function, args, NULL);
+ Py_DECREF(args);
+ Py_DECREF(function);
+ return result;
+ }
+ }
+#endif
+ result = __Pyx_PyObject_CallOneArg(method, arg);
+bad:
+ Py_XDECREF(method);
+ return result;
+}
+
+static PyObject *__Pyx_Generator_Next(PyObject *self);
+static PyObject *__Pyx_Generator_Send(PyObject *self, PyObject *value);
+static PyObject *__Pyx_Generator_Close(PyObject *self);
+static PyObject *__Pyx_Generator_Throw(PyObject *gen, PyObject *args);
+static PyTypeObject *__pyx_GeneratorType = 0;
+#define __Pyx_Generator_CheckExact(obj) (Py_TYPE(obj) == __pyx_GeneratorType)
+#define __Pyx_Generator_Undelegate(gen) Py_CLEAR((gen)->yieldfrom)
+#if 1 || PY_VERSION_HEX < 0x030300B0
+static int __Pyx_PyGen_FetchStopIterationValue(PyObject **pvalue) {
+ PyObject *et, *ev, *tb;
+ PyObject *value = NULL;
+ __Pyx_ErrFetch(&et, &ev, &tb);
+ if (!et) {
+ Py_XDECREF(tb);
+ Py_XDECREF(ev);
+ Py_INCREF(Py_None);
+ *pvalue = Py_None;
+ return 0;
+ }
+ if (unlikely(et != PyExc_StopIteration) &&
+ unlikely(!PyErr_GivenExceptionMatches(et, PyExc_StopIteration))) {
+ __Pyx_ErrRestore(et, ev, tb);
+ return -1;
+ }
+ if (likely(et == PyExc_StopIteration)) {
+ if (likely(!ev) || !PyObject_IsInstance(ev, PyExc_StopIteration)) {
+ if (!ev) {
+ Py_INCREF(Py_None);
+ ev = Py_None;
+ }
+ Py_XDECREF(tb);
+ Py_DECREF(et);
+ *pvalue = ev;
+ return 0;
+ }
+ }
+ PyErr_NormalizeException(&et, &ev, &tb);
+ if (unlikely(!PyObject_IsInstance(ev, PyExc_StopIteration))) {
+ __Pyx_ErrRestore(et, ev, tb);
+ return -1;
+ }
+ Py_XDECREF(tb);
+ Py_DECREF(et);
+#if PY_VERSION_HEX >= 0x030300A0
+ value = ((PyStopIterationObject *)ev)->value;
+ Py_INCREF(value);
+ Py_DECREF(ev);
+#else
+ {
+ PyObject* args = PyObject_GetAttr(ev, __pyx_n_s_args);
+ Py_DECREF(ev);
+ if (likely(args)) {
+ value = PyObject_GetItem(args, 0);
+ Py_DECREF(args);
+ }
+ if (unlikely(!value)) {
+ __Pyx_ErrRestore(NULL, NULL, NULL);
+ Py_INCREF(Py_None);
+ value = Py_None;
+ }
+ }
+#endif
+ *pvalue = value;
+ return 0;
+}
+#endif
+static CYTHON_INLINE
+void __Pyx_Generator_ExceptionClear(__pyx_GeneratorObject *self) {
+ PyObject *exc_type = self->exc_type;
+ PyObject *exc_value = self->exc_value;
+ PyObject *exc_traceback = self->exc_traceback;
+ self->exc_type = NULL;
+ self->exc_value = NULL;
+ self->exc_traceback = NULL;
+ Py_XDECREF(exc_type);
+ Py_XDECREF(exc_value);
+ Py_XDECREF(exc_traceback);
+}
+static CYTHON_INLINE
+int __Pyx_Generator_CheckRunning(__pyx_GeneratorObject *gen) {
+ if (unlikely(gen->is_running)) {
+ PyErr_SetString(PyExc_ValueError,
+ "generator already executing");
+ return 1;
+ }
+ return 0;
+}
+static CYTHON_INLINE
+PyObject *__Pyx_Generator_SendEx(__pyx_GeneratorObject *self, PyObject *value) {
+ PyObject *retval;
+ assert(!self->is_running);
+ if (unlikely(self->resume_label == 0)) {
+ if (unlikely(value && value != Py_None)) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't send non-None value to a "
+ "just-started generator");
+ return NULL;
+ }
+ }
+ if (unlikely(self->resume_label == -1)) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ if (value) {
+#if CYTHON_COMPILING_IN_PYPY
+#else
+ if (self->exc_traceback) {
+ PyThreadState *tstate = PyThreadState_GET();
+ PyTracebackObject *tb = (PyTracebackObject *) self->exc_traceback;
+ PyFrameObject *f = tb->tb_frame;
+ Py_XINCREF(tstate->frame);
+ assert(f->f_back == NULL);
+ f->f_back = tstate->frame;
+ }
+#endif
+ __Pyx_ExceptionSwap(&self->exc_type, &self->exc_value,
+ &self->exc_traceback);
+ } else {
+ __Pyx_Generator_ExceptionClear(self);
+ }
+ self->is_running = 1;
+ retval = self->body((PyObject *) self, value);
+ self->is_running = 0;
+ if (retval) {
+ __Pyx_ExceptionSwap(&self->exc_type, &self->exc_value,
+ &self->exc_traceback);
+#if CYTHON_COMPILING_IN_PYPY
+#else
+ if (self->exc_traceback) {
+ PyTracebackObject *tb = (PyTracebackObject *) self->exc_traceback;
+ PyFrameObject *f = tb->tb_frame;
+ Py_CLEAR(f->f_back);
+ }
+#endif
+ } else {
+ __Pyx_Generator_ExceptionClear(self);
+ }
+ return retval;
+}
+static CYTHON_INLINE
+PyObject *__Pyx_Generator_FinishDelegation(__pyx_GeneratorObject *gen) {
+ PyObject *ret;
+ PyObject *val = NULL;
+ __Pyx_Generator_Undelegate(gen);
+ __Pyx_PyGen_FetchStopIterationValue(&val);
+ ret = __Pyx_Generator_SendEx(gen, val);
+ Py_XDECREF(val);
+ return ret;
+}
+static PyObject *__Pyx_Generator_Next(PyObject *self) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject*) self;
+ PyObject *yf = gen->yieldfrom;
+ if (unlikely(__Pyx_Generator_CheckRunning(gen)))
+ return NULL;
+ if (yf) {
+ PyObject *ret;
+ gen->is_running = 1;
+ ret = Py_TYPE(yf)->tp_iternext(yf);
+ gen->is_running = 0;
+ if (likely(ret)) {
+ return ret;
+ }
+ return __Pyx_Generator_FinishDelegation(gen);
+ }
+ return __Pyx_Generator_SendEx(gen, Py_None);
+}
+static PyObject *__Pyx_Generator_Send(PyObject *self, PyObject *value) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject*) self;
+ PyObject *yf = gen->yieldfrom;
+ if (unlikely(__Pyx_Generator_CheckRunning(gen)))
+ return NULL;
+ if (yf) {
+ PyObject *ret;
+ gen->is_running = 1;
+ if (__Pyx_Generator_CheckExact(yf)) {
+ ret = __Pyx_Generator_Send(yf, value);
+ } else {
+ if (value == Py_None)
+ ret = PyIter_Next(yf);
+ else
+ ret = __Pyx_PyObject_CallMethod1(yf, __pyx_n_s_send, value);
+ }
+ gen->is_running = 0;
+ if (likely(ret)) {
+ return ret;
+ }
+ return __Pyx_Generator_FinishDelegation(gen);
+ }
+ return __Pyx_Generator_SendEx(gen, value);
+}
+static int __Pyx_Generator_CloseIter(__pyx_GeneratorObject *gen, PyObject *yf) {
+ PyObject *retval = NULL;
+ int err = 0;
+ if (__Pyx_Generator_CheckExact(yf)) {
+ retval = __Pyx_Generator_Close(yf);
+ if (!retval)
+ return -1;
+ } else {
+ PyObject *meth;
+ gen->is_running = 1;
+ meth = PyObject_GetAttr(yf, __pyx_n_s_close);
+ if (unlikely(!meth)) {
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ PyErr_WriteUnraisable(yf);
+ }
+ PyErr_Clear();
+ } else {
+ retval = PyObject_CallFunction(meth, NULL);
+ Py_DECREF(meth);
+ if (!retval)
+ err = -1;
+ }
+ gen->is_running = 0;
+ }
+ Py_XDECREF(retval);
+ return err;
+}
+static PyObject *__Pyx_Generator_Close(PyObject *self) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject *) self;
+ PyObject *retval, *raised_exception;
+ PyObject *yf = gen->yieldfrom;
+ int err = 0;
+ if (unlikely(__Pyx_Generator_CheckRunning(gen)))
+ return NULL;
+ if (yf) {
+ Py_INCREF(yf);
+ err = __Pyx_Generator_CloseIter(gen, yf);
+ __Pyx_Generator_Undelegate(gen);
+ Py_DECREF(yf);
+ }
+ if (err == 0)
+ PyErr_SetNone(PyExc_GeneratorExit);
+ retval = __Pyx_Generator_SendEx(gen, NULL);
+ if (retval) {
+ Py_DECREF(retval);
+ PyErr_SetString(PyExc_RuntimeError,
+ "generator ignored GeneratorExit");
+ return NULL;
+ }
+ raised_exception = PyErr_Occurred();
+ if (!raised_exception
+ || raised_exception == PyExc_StopIteration
+ || raised_exception == PyExc_GeneratorExit
+ || PyErr_GivenExceptionMatches(raised_exception, PyExc_GeneratorExit)
+ || PyErr_GivenExceptionMatches(raised_exception, PyExc_StopIteration))
+ {
+ if (raised_exception) PyErr_Clear();
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ return NULL;
+}
+static PyObject *__Pyx_Generator_Throw(PyObject *self, PyObject *args) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject *) self;
+ PyObject *typ;
+ PyObject *tb = NULL;
+ PyObject *val = NULL;
+ PyObject *yf = gen->yieldfrom;
+ if (!PyArg_UnpackTuple(args, (char *)"throw", 1, 3, &typ, &val, &tb))
+ return NULL;
+ if (unlikely(__Pyx_Generator_CheckRunning(gen)))
+ return NULL;
+ if (yf) {
+ PyObject *ret;
+ Py_INCREF(yf);
+ if (PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit)) {
+ int err = __Pyx_Generator_CloseIter(gen, yf);
+ Py_DECREF(yf);
+ __Pyx_Generator_Undelegate(gen);
+ if (err < 0)
+ return __Pyx_Generator_SendEx(gen, NULL);
+ goto throw_here;
+ }
+ gen->is_running = 1;
+ if (__Pyx_Generator_CheckExact(yf)) {
+ ret = __Pyx_Generator_Throw(yf, args);
+ } else {
+ PyObject *meth = PyObject_GetAttr(yf, __pyx_n_s_throw);
+ if (unlikely(!meth)) {
+ Py_DECREF(yf);
+ if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
+ gen->is_running = 0;
+ return NULL;
+ }
+ PyErr_Clear();
+ __Pyx_Generator_Undelegate(gen);
+ gen->is_running = 0;
+ goto throw_here;
+ }
+ ret = PyObject_CallObject(meth, args);
+ Py_DECREF(meth);
+ }
+ gen->is_running = 0;
+ Py_DECREF(yf);
+ if (!ret) {
+ ret = __Pyx_Generator_FinishDelegation(gen);
+ }
+ return ret;
+ }
+throw_here:
+ __Pyx_Raise(typ, val, tb, NULL);
+ return __Pyx_Generator_SendEx(gen, NULL);
+}
+static int __Pyx_Generator_traverse(PyObject *self, visitproc visit, void *arg) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject *) self;
+ Py_VISIT(gen->closure);
+ Py_VISIT(gen->classobj);
+ Py_VISIT(gen->yieldfrom);
+ Py_VISIT(gen->exc_type);
+ Py_VISIT(gen->exc_value);
+ Py_VISIT(gen->exc_traceback);
+ return 0;
+}
+static int __Pyx_Generator_clear(PyObject *self) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject *) self;
+ Py_CLEAR(gen->closure);
+ Py_CLEAR(gen->classobj);
+ Py_CLEAR(gen->yieldfrom);
+ Py_CLEAR(gen->exc_type);
+ Py_CLEAR(gen->exc_value);
+ Py_CLEAR(gen->exc_traceback);
+ Py_CLEAR(gen->gi_name);
+ Py_CLEAR(gen->gi_qualname);
+ return 0;
+}
+static void __Pyx_Generator_dealloc(PyObject *self) {
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject *) self;
+ PyObject_GC_UnTrack(gen);
+ if (gen->gi_weakreflist != NULL)
+ PyObject_ClearWeakRefs(self);
+ if (gen->resume_label > 0) {
+ PyObject_GC_Track(self);
+#if PY_VERSION_HEX >= 0x030400a1
+ if (PyObject_CallFinalizerFromDealloc(self))
+#else
+ Py_TYPE(gen)->tp_del(self);
+ if (self->ob_refcnt > 0)
+#endif
+ {
+ return;
+ }
+ PyObject_GC_UnTrack(self);
+ }
+ __Pyx_Generator_clear(self);
+ PyObject_GC_Del(gen);
+}
+static void __Pyx_Generator_del(PyObject *self) {
+ PyObject *res;
+ PyObject *error_type, *error_value, *error_traceback;
+ __pyx_GeneratorObject *gen = (__pyx_GeneratorObject *) self;
+ if (gen->resume_label <= 0)
+ return ;
+#if PY_VERSION_HEX < 0x030400a1
+ assert(self->ob_refcnt == 0);
+ self->ob_refcnt = 1;
+#endif
+ __Pyx_ErrFetch(&error_type, &error_value, &error_traceback);
+ res = __Pyx_Generator_Close(self);
+ if (res == NULL)
+ PyErr_WriteUnraisable(self);
+ else
+ Py_DECREF(res);
+ __Pyx_ErrRestore(error_type, error_value, error_traceback);
+#if PY_VERSION_HEX < 0x030400a1
+ assert(self->ob_refcnt > 0);
+ if (--self->ob_refcnt == 0) {
+ return;
+ }
+ {
+ Py_ssize_t refcnt = self->ob_refcnt;
+ _Py_NewReference(self);
+ self->ob_refcnt = refcnt;
+ }
+#if CYTHON_COMPILING_IN_CPYTHON
+ assert(PyType_IS_GC(self->ob_type) &&
+ _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED);
+ _Py_DEC_REFTOTAL;
+#endif
+#ifdef COUNT_ALLOCS
+ --Py_TYPE(self)->tp_frees;
+ --Py_TYPE(self)->tp_allocs;
+#endif
+#endif
+}
+static PyObject *
+__Pyx_Generator_get_name(__pyx_GeneratorObject *self)
+{
+ Py_INCREF(self->gi_name);
+ return self->gi_name;
+}
+static int
+__Pyx_Generator_set_name(__pyx_GeneratorObject *self, PyObject *value)
+{
+ PyObject *tmp;
+#if PY_MAJOR_VERSION >= 3
+ if (unlikely(value == NULL || !PyUnicode_Check(value))) {
+#else
+ if (unlikely(value == NULL || !PyString_Check(value))) {
+#endif
+ PyErr_SetString(PyExc_TypeError,
+ "__name__ must be set to a string object");
+ return -1;
+ }
+ tmp = self->gi_name;
+ Py_INCREF(value);
+ self->gi_name = value;
+ Py_XDECREF(tmp);
+ return 0;
+}
+static PyObject *
+__Pyx_Generator_get_qualname(__pyx_GeneratorObject *self)
+{
+ Py_INCREF(self->gi_qualname);
+ return self->gi_qualname;
+}
+static int
+__Pyx_Generator_set_qualname(__pyx_GeneratorObject *self, PyObject *value)
+{
+ PyObject *tmp;
+#if PY_MAJOR_VERSION >= 3
+ if (unlikely(value == NULL || !PyUnicode_Check(value))) {
+#else
+ if (unlikely(value == NULL || !PyString_Check(value))) {
+#endif
+ PyErr_SetString(PyExc_TypeError,
+ "__qualname__ must be set to a string object");
+ return -1;
+ }
+ tmp = self->gi_qualname;
+ Py_INCREF(value);
+ self->gi_qualname = value;
+ Py_XDECREF(tmp);
+ return 0;
+}
+static PyGetSetDef __pyx_Generator_getsets[] = {
+ {(char *) "__name__", (getter)__Pyx_Generator_get_name, (setter)__Pyx_Generator_set_name,
+ (char*) PyDoc_STR("name of the generator"), 0},
+ {(char *) "__qualname__", (getter)__Pyx_Generator_get_qualname, (setter)__Pyx_Generator_set_qualname,
+ (char*) PyDoc_STR("qualified name of the generator"), 0},
+ {0, 0, 0, 0, 0}
+};
+static PyMemberDef __pyx_Generator_memberlist[] = {
+ {(char *) "gi_running", T_BOOL, offsetof(__pyx_GeneratorObject, is_running), READONLY, NULL},
+ {0, 0, 0, 0, 0}
+};
+static PyMethodDef __pyx_Generator_methods[] = {
+ {"send", (PyCFunction) __Pyx_Generator_Send, METH_O, 0},
+ {"throw", (PyCFunction) __Pyx_Generator_Throw, METH_VARARGS, 0},
+ {"close", (PyCFunction) __Pyx_Generator_Close, METH_NOARGS, 0},
+ {0, 0, 0, 0}
+};
+static PyTypeObject __pyx_GeneratorType_type = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "generator",
+ sizeof(__pyx_GeneratorObject),
+ 0,
+ (destructor) __Pyx_Generator_dealloc,
+ 0,
+ 0,
+ 0,
+#if PY_MAJOR_VERSION < 3
+ 0,
+#else
+ 0,
+#endif
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE,
+ 0,
+ (traverseproc) __Pyx_Generator_traverse,
+ 0,
+ 0,
+ offsetof(__pyx_GeneratorObject, gi_weakreflist),
+ 0,
+ (iternextfunc) __Pyx_Generator_Next,
+ __pyx_Generator_methods,
+ __pyx_Generator_memberlist,
+ __pyx_Generator_getsets,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+#if PY_VERSION_HEX >= 0x030400a1
+ 0,
+#else
+ __Pyx_Generator_del,
+#endif
+ 0,
+#if PY_VERSION_HEX >= 0x030400a1
+ __Pyx_Generator_del,
+#endif
+};
+static __pyx_GeneratorObject *__Pyx_Generator_New(__pyx_generator_body_t body,
+ PyObject *closure, PyObject *name, PyObject *qualname) {
+ __pyx_GeneratorObject *gen =
+ PyObject_GC_New(__pyx_GeneratorObject, &__pyx_GeneratorType_type);
+ if (gen == NULL)
+ return NULL;
+ gen->body = body;
+ gen->closure = closure;
+ Py_XINCREF(closure);
+ gen->is_running = 0;
+ gen->resume_label = 0;
+ gen->classobj = NULL;
+ gen->yieldfrom = NULL;
+ gen->exc_type = NULL;
+ gen->exc_value = NULL;
+ gen->exc_traceback = NULL;
+ gen->gi_weakreflist = NULL;
+ Py_XINCREF(qualname);
+ gen->gi_qualname = qualname;
+ Py_XINCREF(name);
+ gen->gi_name = name;
+ PyObject_GC_Track(gen);
+ return gen;
+}
+static int __pyx_Generator_init(void) {
+ __pyx_GeneratorType_type.tp_getattro = PyObject_GenericGetAttr;
+ __pyx_GeneratorType_type.tp_iter = PyObject_SelfIter;
+ __pyx_GeneratorType = __Pyx_FetchCommonType(&__pyx_GeneratorType_type);
+ if (__pyx_GeneratorType == NULL) {
+ return -1;
+ }
+ return 0;
+}
+
+static int __Pyx_check_binary_version(void) {
+ char ctversion[4], rtversion[4];
+ PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+ PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
+ if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
+ char message[200];
+ PyOS_snprintf(message, sizeof(message),
+ "compiletime version %s of module '%.100s' "
+ "does not match runtime version %s",
+ ctversion, __Pyx_MODULE_NAME, rtversion);
+ return PyErr_WarnEx(NULL, message, 1);
+ }
+ return 0;
+}
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+ while (t->p) {
+ #if PY_MAJOR_VERSION < 3
+ if (t->is_unicode) {
+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+ } else if (t->intern) {
+ *t->p = PyString_InternFromString(t->s);
+ } else {
+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+ }
+ #else
+ if (t->is_unicode | t->is_str) {
+ if (t->intern) {
+ *t->p = PyUnicode_InternFromString(t->s);
+ } else if (t->encoding) {
+ *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+ } else {
+ *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+ }
+ } else {
+ *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+ }
+ #endif
+ if (!*t->p)
+ return -1;
+ ++t;
+ }
+ return 0;
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+ return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
+}
+static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
+ Py_ssize_t ignore;
+ return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+ if (
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+ __Pyx_sys_getdefaultencoding_not_ascii &&
+#endif
+ PyUnicode_Check(o)) {
+#if PY_VERSION_HEX < 0x03030000
+ char* defenc_c;
+ PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
+ if (!defenc) return NULL;
+ defenc_c = PyBytes_AS_STRING(defenc);
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+ {
+ char* end = defenc_c + PyBytes_GET_SIZE(defenc);
+ char* c;
+ for (c = defenc_c; c < end; c++) {
+ if ((unsigned char) (*c) >= 128) {
+ PyUnicode_AsASCIIString(o);
+ return NULL;
+ }
+ }
+ }
+#endif
+ *length = PyBytes_GET_SIZE(defenc);
+ return defenc_c;
+#else
+ if (__Pyx_PyUnicode_READY(o) == -1) return NULL;
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+ if (PyUnicode_IS_ASCII(o)) {
+ *length = PyUnicode_GET_LENGTH(o);
+ return PyUnicode_AsUTF8(o);
+ } else {
+ PyUnicode_AsASCIIString(o);
+ return NULL;
+ }
+#else
+ return PyUnicode_AsUTF8AndSize(o, length);
+#endif
+#endif
+ } else
+#endif
+#if !CYTHON_COMPILING_IN_PYPY
+ if (PyByteArray_Check(o)) {
+ *length = PyByteArray_GET_SIZE(o);
+ return PyByteArray_AS_STRING(o);
+ } else
+#endif
+ {
+ char* result;
+ int r = PyBytes_AsStringAndSize(o, &result, length);
+ if (unlikely(r < 0)) {
+ return NULL;
+ } else {
+ return result;
+ }
+ }
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+ int is_true = x == Py_True;
+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+ else return PyObject_IsTrue(x);
+}
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
+ PyNumberMethods *m;
+ const char *name = NULL;
+ PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
+ if (PyInt_Check(x) || PyLong_Check(x))
+#else
+ if (PyLong_Check(x))
+#endif
+ return Py_INCREF(x), x;
+ m = Py_TYPE(x)->tp_as_number;
+#if PY_MAJOR_VERSION < 3
+ if (m && m->nb_int) {
+ name = "int";
+ res = PyNumber_Int(x);
+ }
+ else if (m && m->nb_long) {
+ name = "long";
+ res = PyNumber_Long(x);
+ }
+#else
+ if (m && m->nb_int) {
+ name = "int";
+ res = PyNumber_Long(x);
+ }
+#endif
+ if (res) {
+#if PY_MAJOR_VERSION < 3
+ if (!PyInt_Check(res) && !PyLong_Check(res)) {
+#else
+ if (!PyLong_Check(res)) {
+#endif
+ PyErr_Format(PyExc_TypeError,
+ "__%.4s__ returned non-%.4s (type %.200s)",
+ name, name, Py_TYPE(res)->tp_name);
+ Py_DECREF(res);
+ return NULL;
+ }
+ }
+ else if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_TypeError,
+ "an integer is required");
+ }
+ return res;
+}
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+ Py_ssize_t ival;
+ PyObject *x;
+#if PY_MAJOR_VERSION < 3
+ if (likely(PyInt_CheckExact(b)))
+ return PyInt_AS_LONG(b);
+#endif
+ if (likely(PyLong_CheckExact(b))) {
+ #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+ #if CYTHON_USE_PYLONG_INTERNALS
+ switch (Py_SIZE(b)) {
+ case -1: return -(sdigit)((PyLongObject*)b)->ob_digit[0];
+ case 0: return 0;
+ case 1: return ((PyLongObject*)b)->ob_digit[0];
+ }
+ #endif
+ #endif
+ return PyLong_AsSsize_t(b);
+ }
+ x = PyNumber_Index(b);
+ if (!x) return -1;
+ ival = PyInt_AsSsize_t(x);
+ Py_DECREF(x);
+ return ival;
+}
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+ return PyInt_FromSize_t(ival);
+}
+
+
+#endif /* Py_PYTHON_H */
diff --git a/src/kenlm/python/kenlm.pyx b/src/kenlm/python/kenlm.pyx
new file mode 100644
index 0000000..f312c90
--- /dev/null
+++ b/src/kenlm/python/kenlm.pyx
@@ -0,0 +1,231 @@
+import os
+cimport _kenlm
+
+cdef bytes as_str(data):
+ if isinstance(data, bytes):
+ return data
+ elif isinstance(data, unicode):
+ return data.encode('utf8')
+ raise TypeError('Cannot convert %s to string' % type(data))
+
+cdef class FullScoreReturn:
+ """
+ Wrapper around FullScoreReturn.
+
+ Notes:
+ `prob` has been renamed to `log_prob`
+ `oov` has been added to flag whether the word is OOV
+ """
+
+ cdef float log_prob
+ cdef int ngram_length
+ cdef bint oov
+
+ def __cinit__(self, log_prob, ngram_length, oov):
+ self.log_prob = log_prob
+ self.ngram_length = ngram_length
+ self.oov = oov
+
+ def __repr__(self):
+ return '{0}({1}, {2}, {3})'.format(self.__class__.__name__, repr(self.log_prob), repr(self.ngram_length), repr(self.oov))
+
+ property log_prob:
+ def __get__(self):
+ return self.log_prob
+
+ property ngram_length:
+ def __get__(self):
+ return self.ngram_length
+
+ property oov:
+ def __get__(self):
+ return self.oov
+
+cdef class State:
+ """
+ Wrapper around lm::ngram::State so that python code can make incremental queries.
+
+ Notes:
+ * rich comparisons
+ * hashable
+ """
+
+ cdef _kenlm.State _c_state
+
+ def __richcmp__(State qa, State qb, int op):
+ r = qa._c_state.Compare(qb._c_state)
+ if op == 0: # <
+ return r < 0
+ elif op == 1: # <=
+ return r <= 0
+ elif op == 2: # ==
+ return r == 0
+ elif op == 3: # !=
+ return r != 0
+ elif op == 4: # >
+ return r > 0
+ else: # >=
+ return r >= 0
+
+ def __hash__(self):
+ return _kenlm.hash_value(self._c_state)
+
+
+cdef class LanguageModel:
+ """
+ This is not a strict wrapper, the interface is more pythonic.
+ It loads models and queries full sentences.
+ """
+
+ cdef _kenlm.Model* model
+ cdef public bytes path
+ cdef _kenlm.const_Vocabulary* vocab
+
+ def __init__(self, path):
+ """
+ Load the language model.
+
+ :param path: path to an arpa file or a kenlm binary file.
+ """
+ self.path = os.path.abspath(as_str(path))
+ try:
+ self.model = _kenlm.LoadVirtual(self.path)
+ except RuntimeError as exception:
+ exception_message = str(exception).replace('\n', ' ')
+ raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ from exception
+ self.vocab = &self.model.BaseVocabulary()
+
+ def __dealloc__(self):
+ del self.model
+
+ property order:
+ def __get__(self):
+ return self.model.Order()
+
+ def score(self, sentence, bos = True, eos = True):
+ cdef list words = as_str(sentence).split()
+ cdef _kenlm.State state
+ if bos:
+ self.model.BeginSentenceWrite(&state)
+ else:
+ self.model.NullContextWrite(&state)
+ cdef _kenlm.State out_state
+ cdef float total = 0
+ for word in words:
+ total += self.model.BaseScore(&state, self.vocab.Index(word), &out_state)
+ state = out_state
+ if eos:
+ total += self.model.BaseScore(&state, self.vocab.EndSentence(), &out_state)
+ return total
+
+ def full_scores(self, sentence, bos = True, eos = True):
+ """
+ full_scores(sentence, bos = True, eos = Ture) -> generate full scores (prob, ngram length, oov)
+ @param sentence is a string (do not use boundary symbols)
+ @param bos should kenlm add a bos state
+ @param eos should kenlm add an eos state
+ """
+ cdef list words = as_str(sentence).split()
+ cdef _kenlm.State state
+ if bos:
+ self.model.BeginSentenceWrite(&state)
+ else:
+ self.model.NullContextWrite(&state)
+ cdef _kenlm.State out_state
+ cdef _kenlm.FullScoreReturn ret
+ cdef float total = 0
+ cdef _kenlm.WordIndex wid
+ for word in words:
+ wid = self.vocab.Index(word)
+ ret = self.model.BaseFullScore(&state, wid, &out_state)
+ yield (ret.prob, ret.ngram_length, wid == 0)
+ state = out_state
+ if eos:
+ ret = self.model.BaseFullScore(&state,
+ self.vocab.EndSentence(), &out_state)
+ yield (ret.prob, ret.ngram_length, False)
+
+ def __contains__(self, word):
+ cdef bytes w = as_str(word)
+ return (self.vocab.Index(w) != 0)
+
+ def __repr__(self):
+ return '<LanguageModel from {0}>'.format(os.path.basename(self.path))
+
+ def __reduce__(self):
+ return (_kenlm.LanguageModel, (self.path,))
+
+cdef class Model:
+ """
+ This is closer to a wrapper around lm::ngram::Model.
+ """
+
+ cdef _kenlm.Model* model
+ cdef public bytes path
+ cdef _kenlm.const_Vocabulary* vocab
+
+ def __init__(self, path):
+ """
+ Load the language model.
+
+ :param path: path to an arpa file or a kenlm binary file.
+ """
+ self.path = os.path.abspath(as_str(path))
+ try:
+ self.model = _kenlm.LoadVirtual(self.path)
+ except RuntimeError as exception:
+ exception_message = str(exception).replace('\n', ' ')
+ raise IOError('Cannot read model \'{}\' ({})'.format(path, exception_message))\
+ from exception
+ self.vocab = &self.model.BaseVocabulary()
+
+ def __dealloc__(self):
+ del self.model
+
+ property order:
+ def __get__(self):
+ return self.model.Order()
+
+ def BeginSentenceWrite(self, State state):
+ """Change the given state to a BOS state."""
+ self.model.BeginSentenceWrite(&state._c_state)
+
+ def NullContextWrite(self, State state):
+ """Change the given state to a NULL state."""
+ self.model.NullContextWrite(&state._c_state)
+
+ def BaseScore(self, State in_state, str word, State out_state):
+ """
+ Return p(word|in_state) and update the output state.
+ Wrapper around model.BaseScore(in_state, Index(word), out_state)
+
+ :param word: the suffix
+ :param state: the context (defaults to NullContext)
+ :returns: p(word|state)
+ """
+ cdef float total = self.model.BaseScore(&in_state._c_state, self.vocab.Index(as_str(word)), &out_state._c_state)
+ return total
+
+ def BaseFullScore(self, State in_state, str word, State out_state):
+ """
+ Wrapper around model.BaseScore(in_state, Index(word), out_state)
+
+ :param word: the suffix
+ :param state: the context (defaults to NullContext)
+ :returns: FullScoreReturn(word|state)
+ """
+ cdef _kenlm.WordIndex wid = self.vocab.Index(as_str(word))
+ cdef _kenlm.FullScoreReturn ret = self.model.BaseFullScore(&in_state._c_state, wid, &out_state._c_state)
+ return FullScoreReturn(ret.prob, ret.ngram_length, wid == 0)
+
+ def __contains__(self, word):
+ cdef bytes w = as_str(word)
+ return (self.vocab.Index(w) != 0)
+
+ def __repr__(self):
+ return '<Model from {0}>'.format(os.path.basename(self.path))
+
+ def __reduce__(self):
+ return (_kenlm.LanguageModel, (self.path,))
+
diff --git a/src/kenlm/setup.py b/src/kenlm/setup.py
new file mode 100644
index 0000000..ee061a8
--- /dev/null
+++ b/src/kenlm/setup.py
@@ -0,0 +1,48 @@
+from distutils.core import setup
+from distutils.extension import Extension
+import glob
+import platform
+import os
+
+#Does gcc compile with this header and library?
+def compile_test(header, library):
+ dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
+ command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\""
+ return os.system(command) == 0
+
+
+FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc')
+FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc'))]
+
+LIBS = ['stdc++']
+if platform.system() != 'Darwin':
+ LIBS.append('rt')
+
+
+ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6']
+
+if compile_test('zlib.h', 'z'):
+ ARGS.append('-DHAVE_ZLIB')
+ LIBS.append('z')
+
+if compile_test('bzlib.h', 'bz2'):
+ ARGS.append('-DHAVE_BZLIB')
+ LIBS.append('bz2')
+
+if compile_test('lzma.h', 'lzma'):
+ ARGS.append('-DHAVE_XZLIB')
+ LIBS.append('lzma')
+
+ext_modules = [
+ Extension(name='kenlm',
+ sources=FILES + ['python/kenlm.cpp'],
+ language='C++',
+ include_dirs=['.'],
+ libraries=LIBS,
+ extra_compile_args=ARGS)
+]
+
+setup(
+ name='kenlm',
+ ext_modules=ext_modules
+)
diff --git a/src/kenlm/util/CMakeLists.txt b/src/kenlm/util/CMakeLists.txt
new file mode 100644
index 0000000..8a544aa
--- /dev/null
+++ b/src/kenlm/util/CMakeLists.txt
@@ -0,0 +1,81 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+#
+# Because we do not set PARENT_SCOPE in the following definition,
+# CMake files in the parent directory won't be able to access this variable.
+#
+set(KENLM_UTIL_SOURCE
+ bit_packing.cc
+ ersatz_progress.cc
+ exception.cc
+ file.cc
+ file_piece.cc
+ float_to_string.cc
+ integer_to_string.cc
+ mmap.cc
+ murmur_hash.cc
+ parallel_read.cc
+ pool.cc
+ read_compressed.cc
+ scoped.cc
+ string_piece.cc
+ usage.cc
+ )
+
+# This directory has children that need to be processed
+add_subdirectory(double-conversion)
+add_subdirectory(stream)
+
+
+# Group these objects together for later use.
+#
+# Given add_library(foo OBJECT ${my_foo_sources}),
+# refer to these objects as $<TARGET_OBJECTS:foo>
+#
+add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL_STREAM_SOURCE} ${KENLM_UTIL_SOURCE})
+
+
+
+# Only compile and run unit tests if tests should be run
+if(BUILD_TESTING)
+
+ # Explicitly list the Boost test files to be compiled
+ set(KENLM_BOOST_TESTS_LIST
+ bit_packing_test
+ joint_sort_test
+ multi_intersection_test
+ probing_hash_table_test
+ read_compressed_test
+ sorted_uniform_test
+ tokenize_piece_test
+ )
+
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+
+ # file_piece_test requires an extra command line parameter
+ KenLMAddTest(TEST file_piece_test
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc)
+endif()
diff --git a/src/kenlm/util/Jamfile b/src/kenlm/util/Jamfile
new file mode 100644
index 0000000..2eeccf4
--- /dev/null
+++ b/src/kenlm/util/Jamfile
@@ -0,0 +1,41 @@
+local compressed_flags = <include>.. <define>HAVE_ZLIB ;
+local compressed_deps = /top//z ;
+if [ test_library "bz2" ] && [ test_header "bzlib.h" ] {
+ external-lib bz2 ;
+ compressed_flags += <define>HAVE_BZLIB ;
+ compressed_deps += bz2 ;
+}
+if [ test_library "lzma" ] && [ test_header "lzma.h" ] {
+ external-lib lzma ;
+ compressed_flags += <define>HAVE_XZLIB ;
+ compressed_deps += lzma ;
+}
+
+#rt is needed for clock_gettime on linux. But it's already included with threading=multi
+lib rt ;
+
+obj read_compressed.o : read_compressed.cc : $(compressed_flags) ;
+alias read_compressed : read_compressed.o $(compressed_deps) ;
+obj read_compressed_test.o : read_compressed_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
+obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
+
+fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ;
+
+fakelib kenutil : [ glob *.cc : parallel_read.cc read_compressed.cc *_main.cc *_test.cc ] read_compressed parallel_read double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
+
+exe cat_compressed : cat_compressed_main.cc kenutil ;
+
+#Does not install this
+exe probing_hash_table_benchmark : probing_hash_table_benchmark_main.cc kenutil ;
+
+alias programs : cat_compressed ;
+
+import testing ;
+
+run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
+for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
+ local name = [ MATCH "(.*)\.cc" : $(t) ] ;
+ unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_system ;
+}
+
+build-project stream ;
diff --git a/src/kenlm/util/bit_packing.cc b/src/kenlm/util/bit_packing.cc
new file mode 100644
index 0000000..cffd9cf
--- /dev/null
+++ b/src/kenlm/util/bit_packing.cc
@@ -0,0 +1,40 @@
+#include "util/bit_packing.hh"
+#include "util/exception.hh"
+
+#include <cstring>
+
+namespace util {
+
+namespace {
+template <bool> struct StaticCheck {};
+template <> struct StaticCheck<true> { typedef bool StaticAssertionPassed; };
+
+// If your float isn't 4 bytes, we're hosed.
+typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
+
+} // namespace
+
+uint8_t RequiredBits(uint64_t max_value) {
+ if (!max_value) return 0;
+ uint8_t ret = 1;
+ while (max_value >>= 1) ++ret;
+ return ret;
+}
+
+void BitPackingSanity() {
+ const FloatEnc neg1 = { -1.0 }, pos1 = { 1.0 };
+ if ((neg1.i ^ pos1.i) != 0x80000000) UTIL_THROW(Exception, "Sign bit is not 0x80000000");
+ char mem[57+8];
+ memset(mem, 0, sizeof(mem));
+ const uint64_t test57 = 0x123456789abcdefULL;
+ for (uint64_t b = 0; b < 57 * 8; b += 57) {
+ WriteInt57(mem, b, 57, test57);
+ }
+ for (uint64_t b = 0; b < 57 * 8; b += 57) {
+ if (test57 != ReadInt57(mem, b, 57, (1ULL << 57) - 1))
+ UTIL_THROW(Exception, "The bit packing routines are failing for your architecture. Please send a bug report with your architecture, operating system, and compiler.");
+ }
+ // TODO: more checks.
+}
+
+} // namespace util
diff --git a/src/kenlm/util/bit_packing.hh b/src/kenlm/util/bit_packing.hh
new file mode 100644
index 0000000..b24fd9c
--- /dev/null
+++ b/src/kenlm/util/bit_packing.hh
@@ -0,0 +1,185 @@
+#ifndef UTIL_BIT_PACKING_H
+#define UTIL_BIT_PACKING_H
+
+/* Bit-level packing routines
+ *
+ * WARNING WARNING WARNING:
+ * The write functions assume that memory is zero initially. This makes them
+ * faster and is the appropriate case for mmapped language model construction.
+ * These routines assume that unaligned access to uint64_t is fast. This is
+ * the case on x86_64. I'm not sure how fast unaligned 64-bit access is on
+ * x86 but my target audience is large language models for which 64-bit is
+ * necessary.
+ *
+ * Call the BitPackingSanity function to sanity check. Calling once suffices,
+ * but it may be called multiple times when that's inconvenient.
+ *
+ * ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at
+ * NICT.
+ */
+
+#include <cassert>
+#ifdef __APPLE__
+#include <architecture/byte_order.h>
+#elif __linux__
+#include <endian.h>
+#elif !defined(_WIN32) && !defined(_WIN64)
+#include <arpa/nameser_compat.h>
+#endif
+
+#include <stdint.h>
+#include <cstring>
+
+namespace util {
+
+// Fun fact: __BYTE_ORDER is wrong on Solaris Sparc, but the version without __ is correct.
+#if BYTE_ORDER == LITTLE_ENDIAN
+inline uint8_t BitPackShift(uint8_t bit, uint8_t /*length*/) {
+ return bit;
+}
+#elif BYTE_ORDER == BIG_ENDIAN
+inline uint8_t BitPackShift(uint8_t bit, uint8_t length) {
+ return 64 - length - bit;
+}
+#else
+#error "Bit packing code isn't written for your byte order."
+#endif
+
+inline uint64_t ReadOff(const void *base, uint64_t bit_off) {
+#if defined(__arm) || defined(__arm__)
+ const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3);
+ uint64_t value64;
+ memcpy(&value64, base_off, sizeof(value64));
+ return value64;
+#else
+ return *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3));
+#endif
+}
+
+/* Pack integers up to 57 bits using their least significant digits.
+ * The length is specified using mask:
+ * Assumes mask == (1 << length) - 1 where length <= 57.
+ */
+inline uint64_t ReadInt57(const void *base, uint64_t bit_off, uint8_t length, uint64_t mask) {
+ return (ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, length)) & mask;
+}
+/* Assumes value < (1 << length) and length <= 57.
+ * Assumes the memory is zero initially.
+ */
+inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, uint64_t value) {
+#if defined(__arm) || defined(__arm__)
+ uint8_t *base_off = reinterpret_cast<uint8_t*>(base) + (bit_off >> 3);
+ uint64_t value64;
+ memcpy(&value64, base_off, sizeof(value64));
+ value64 |= (value << BitPackShift(bit_off & 7, length));
+ memcpy(base_off, &value64, sizeof(value64));
+#else
+ *reinterpret_cast<uint64_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
+ (value << BitPackShift(bit_off & 7, length));
+#endif
+}
+
+/* Same caveats as above, but for a 25 bit limit. */
+inline uint32_t ReadInt25(const void *base, uint64_t bit_off, uint8_t length, uint32_t mask) {
+#if defined(__arm) || defined(__arm__)
+ const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3);
+ uint32_t value32;
+ memcpy(&value32, base_off, sizeof(value32));
+ return (value32 >> BitPackShift(bit_off & 7, length)) & mask;
+#else
+ return (*reinterpret_cast<const uint32_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3)) >> BitPackShift(bit_off & 7, length)) & mask;
+#endif
+}
+
+inline void WriteInt25(void *base, uint64_t bit_off, uint8_t length, uint32_t value) {
+#if defined(__arm) || defined(__arm__)
+ uint8_t *base_off = reinterpret_cast<uint8_t*>(base) + (bit_off >> 3);
+ uint32_t value32;
+ memcpy(&value32, base_off, sizeof(value32));
+ value32 |= (value << BitPackShift(bit_off & 7, length));
+ memcpy(base_off, &value32, sizeof(value32));
+#else
+ *reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
+ (value << BitPackShift(bit_off & 7, length));
+#endif
+}
+
+typedef union { float f; uint32_t i; } FloatEnc;
+
+inline float ReadFloat32(const void *base, uint64_t bit_off) {
+ FloatEnc encoded;
+ encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32);
+ return encoded.f;
+}
+inline void WriteFloat32(void *base, uint64_t bit_off, float value) {
+ FloatEnc encoded;
+ encoded.f = value;
+ WriteInt57(base, bit_off, 32, encoded.i);
+}
+
+const uint32_t kSignBit = 0x80000000;
+
+inline void SetSign(float &to) {
+ FloatEnc enc;
+ enc.f = to;
+ enc.i |= kSignBit;
+ to = enc.f;
+}
+
+inline void UnsetSign(float &to) {
+ FloatEnc enc;
+ enc.f = to;
+ enc.i &= ~kSignBit;
+ to = enc.f;
+}
+
+inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
+ FloatEnc encoded;
+ encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31);
+ // Sign bit set means negative.
+ encoded.i |= kSignBit;
+ return encoded.f;
+}
+inline void WriteNonPositiveFloat31(void *base, uint64_t bit_off, float value) {
+ FloatEnc encoded;
+ encoded.f = value;
+ encoded.i &= ~kSignBit;
+ WriteInt57(base, bit_off, 31, encoded.i);
+}
+
+void BitPackingSanity();
+
+// Return bits required to store integers upto max_value. Not the most
+// efficient implementation, but this is only called a few times to size tries.
+uint8_t RequiredBits(uint64_t max_value);
+
+struct BitsMask {
+ static BitsMask ByMax(uint64_t max_value) {
+ BitsMask ret;
+ ret.FromMax(max_value);
+ return ret;
+ }
+ static BitsMask ByBits(uint8_t bits) {
+ BitsMask ret;
+ ret.bits = bits;
+ ret.mask = (1ULL << bits) - 1;
+ return ret;
+ }
+ void FromMax(uint64_t max_value) {
+ bits = RequiredBits(max_value);
+ mask = (1ULL << bits) - 1;
+ }
+ uint8_t bits;
+ uint64_t mask;
+};
+
+struct BitAddress {
+ BitAddress(void *in_base, uint64_t in_offset) : base(in_base), offset(in_offset) {}
+
+ void *base;
+ uint64_t offset;
+};
+
+} // namespace util
+
+#endif // UTIL_BIT_PACKING_H
diff --git a/src/kenlm/util/bit_packing_test.cc b/src/kenlm/util/bit_packing_test.cc
new file mode 100644
index 0000000..c4494b6
--- /dev/null
+++ b/src/kenlm/util/bit_packing_test.cc
@@ -0,0 +1,59 @@
+#include "util/bit_packing.hh"
+
+#define BOOST_TEST_MODULE BitPackingTest
+#include <boost/test/unit_test.hpp>
+
+#include <cstring>
+
+namespace util {
+namespace {
+
+const uint64_t test57 = 0x123456789abcdefULL;
+const uint32_t test25 = 0x1234567;
+
+BOOST_AUTO_TEST_CASE(ZeroBit57) {
+ char mem[16];
+ memset(mem, 0, sizeof(mem));
+ WriteInt57(mem, 0, 57, test57);
+ BOOST_CHECK_EQUAL(test57, ReadInt57(mem, 0, 57, (1ULL << 57) - 1));
+}
+
+BOOST_AUTO_TEST_CASE(EachBit57) {
+ char mem[16];
+ for (uint8_t b = 0; b < 8; ++b) {
+ memset(mem, 0, sizeof(mem));
+ WriteInt57(mem, b, 57, test57);
+ BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
+ }
+}
+
+BOOST_AUTO_TEST_CASE(Consecutive57) {
+ char mem[57+8];
+ memset(mem, 0, sizeof(mem));
+ for (uint64_t b = 0; b < 57 * 8; b += 57) {
+ WriteInt57(mem, b, 57, test57);
+ BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
+ }
+ for (uint64_t b = 0; b < 57 * 8; b += 57) {
+ BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
+ }
+}
+
+BOOST_AUTO_TEST_CASE(Consecutive25) {
+ char mem[25+8];
+ memset(mem, 0, sizeof(mem));
+ for (uint64_t b = 0; b < 25 * 8; b += 25) {
+ WriteInt25(mem, b, 25, test25);
+ BOOST_CHECK_EQUAL(test25, ReadInt25(mem, b, 25, (1ULL << 25) - 1));
+ }
+ for (uint64_t b = 0; b < 25 * 8; b += 25) {
+ BOOST_CHECK_EQUAL(test25, ReadInt25(mem, b, 25, (1ULL << 25) - 1));
+ }
+}
+
+BOOST_AUTO_TEST_CASE(Sanity) {
+ BitPackingSanity();
+}
+
+} // namespace
+} // namespace util
diff --git a/src/kenlm/util/cat_compressed_main.cc b/src/kenlm/util/cat_compressed_main.cc
new file mode 100644
index 0000000..0c7cda9
--- /dev/null
+++ b/src/kenlm/util/cat_compressed_main.cc
@@ -0,0 +1,47 @@
+// Like cat but interprets compressed files.
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+
+#include <cstring>
+#include <iostream>
+
+namespace {
+const std::size_t kBufSize = 16384;
+void Copy(util::ReadCompressed &from, int to) {
+ util::scoped_malloc buffer(util::MallocOrThrow(kBufSize));
+ while (std::size_t amount = from.Read(buffer.get(), kBufSize)) {
+ util::WriteOrThrow(to, buffer.get(), amount);
+ }
+}
+} // namespace
+
+int main(int argc, char *argv[]) {
+ // Lane Schwartz likes -h and --help
+ for (int i = 1; i < argc; ++i) {
+ char *arg = argv[i];
+ if (!strcmp(arg, "--")) break;
+ if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
+ std::cerr <<
+ "A cat implementation that interprets compressed files.\n"
+ "Usage: " << argv[0] << " [file1] [file2] ...\n"
+ "If no file is provided, then stdin is read.\n";
+ return 1;
+ }
+ }
+
+ try {
+ if (argc == 1) {
+ util::ReadCompressed in(0);
+ Copy(in, 1);
+ } else {
+ for (int i = 1; i < argc; ++i) {
+ util::ReadCompressed in(util::OpenReadOrThrow(argv[i]));
+ Copy(in, 1);
+ }
+ }
+ } catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ return 2;
+ }
+ return 0;
+}
diff --git a/src/kenlm/util/double-conversion/CMakeLists.txt b/src/kenlm/util/double-conversion/CMakeLists.txt
new file mode 100644
index 0000000..e2cf02a
--- /dev/null
+++ b/src/kenlm/util/double-conversion/CMakeLists.txt
@@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+#
+# In order to allow CMake files in the parent directory
+# to see this variable definition, we set PARENT_SCOPE.
+#
+# In order to set correct paths to these files
+# when this variable is referenced by CMake files in the parent directory,
+# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_UTIL_DOUBLECONVERSION_SOURCE
+ ${CMAKE_CURRENT_SOURCE_DIR}/bignum-dtoa.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/bignum.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/cached-powers.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/diy-fp.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/double-conversion.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/fast-dtoa.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/fixed-dtoa.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/strtod.cc
+ PARENT_SCOPE)
+
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/Jamfile b/src/kenlm/util/double-conversion/Jamfile
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/Jamfile
rename to src/kenlm/util/double-conversion/Jamfile
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/LICENSE b/src/kenlm/util/double-conversion/LICENSE
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/LICENSE
rename to src/kenlm/util/double-conversion/LICENSE
diff --git a/src/kenlm/util/double-conversion/bignum-dtoa.cc b/src/kenlm/util/double-conversion/bignum-dtoa.cc
new file mode 100644
index 0000000..3d217bf
--- /dev/null
+++ b/src/kenlm/util/double-conversion/bignum-dtoa.cc
@@ -0,0 +1,640 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cmath>
+
+#include "bignum-dtoa.h"
+
+#include "bignum.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+static int NormalizedExponent(uint64_t significand, int exponent) {
+ ASSERT(significand != 0);
+ while ((significand & Double::kHiddenBit) == 0) {
+ significand = significand << 1;
+ exponent = exponent - 1;
+ }
+ return exponent;
+}
+
+
+// Forward declarations:
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k.
+static int EstimatePower(int exponent);
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator.
+static void InitialScaledStartValues(uint64_t significand,
+ int exponent,
+ bool lower_boundary_is_closer,
+ int estimated_power,
+ bool need_boundary_deltas,
+ Bignum* numerator,
+ Bignum* denominator,
+ Bignum* delta_minus,
+ Bignum* delta_plus);
+// Multiplies numerator/denominator so that its values lies in the range 1-10.
+// Returns decimal_point s.t.
+// v = numerator'/denominator' * 10^(decimal_point-1)
+// where numerator' and denominator' are the values of numerator and
+// denominator after the call to this function.
+static void FixupMultiply10(int estimated_power, bool is_even,
+ int* decimal_point,
+ Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus);
+// Generates digits from the left to the right and stops when the generated
+// digits yield the shortest decimal representation of v.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus,
+ bool is_even,
+ Vector<char> buffer, int* length);
+// Generates 'requested_digits' after the decimal point.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+ Bignum* numerator, Bignum* denominator,
+ Vector<char>(buffer), int* length);
+// Generates 'count' digits of numerator/denominator.
+// Once 'count' digits have been produced rounds the result depending on the
+// remainder (remainders of exactly .5 round upwards). Might update the
+// decimal_point when rounding up (for example for 0.9999).
+static void GenerateCountedDigits(int count, int* decimal_point,
+ Bignum* numerator, Bignum* denominator,
+ Vector<char>(buffer), int* length);
+
+
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+ Vector<char> buffer, int* length, int* decimal_point) {
+ ASSERT(v > 0);
+ ASSERT(!Double(v).IsSpecial());
+ uint64_t significand;
+ int exponent;
+ bool lower_boundary_is_closer;
+ if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) {
+ float f = static_cast<float>(v);
+ ASSERT(f == v);
+ significand = Single(f).Significand();
+ exponent = Single(f).Exponent();
+ lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser();
+ } else {
+ significand = Double(v).Significand();
+ exponent = Double(v).Exponent();
+ lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser();
+ }
+ bool need_boundary_deltas =
+ (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE);
+
+ bool is_even = (significand & 1) == 0;
+ int normalized_exponent = NormalizedExponent(significand, exponent);
+ // estimated_power might be too low by 1.
+ int estimated_power = EstimatePower(normalized_exponent);
+
+ // Shortcut for Fixed.
+ // The requested digits correspond to the digits after the point. If the
+ // number is much too small, then there is no need in trying to get any
+ // digits.
+ if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) {
+ buffer[0] = '\0';
+ *length = 0;
+ // Set decimal-point to -requested_digits. This is what Gay does.
+ // Note that it should not have any effect anyways since the string is
+ // empty.
+ *decimal_point = -requested_digits;
+ return;
+ }
+
+ Bignum numerator;
+ Bignum denominator;
+ Bignum delta_minus;
+ Bignum delta_plus;
+ // Make sure the bignum can grow large enough. The smallest double equals
+ // 4e-324. In this case the denominator needs fewer than 324*4 binary digits.
+ // The maximum double is 1.7976931348623157e308 which needs fewer than
+ // 308*4 binary digits.
+ ASSERT(Bignum::kMaxSignificantBits >= 324*4);
+ InitialScaledStartValues(significand, exponent, lower_boundary_is_closer,
+ estimated_power, need_boundary_deltas,
+ &numerator, &denominator,
+ &delta_minus, &delta_plus);
+ // We now have v = (numerator / denominator) * 10^estimated_power.
+ FixupMultiply10(estimated_power, is_even, decimal_point,
+ &numerator, &denominator,
+ &delta_minus, &delta_plus);
+ // We now have v = (numerator / denominator) * 10^(decimal_point-1), and
+ // 1 <= (numerator + delta_plus) / denominator < 10
+ switch (mode) {
+ case BIGNUM_DTOA_SHORTEST:
+ case BIGNUM_DTOA_SHORTEST_SINGLE:
+ GenerateShortestDigits(&numerator, &denominator,
+ &delta_minus, &delta_plus,
+ is_even, buffer, length);
+ break;
+ case BIGNUM_DTOA_FIXED:
+ BignumToFixed(requested_digits, decimal_point,
+ &numerator, &denominator,
+ buffer, length);
+ break;
+ case BIGNUM_DTOA_PRECISION:
+ GenerateCountedDigits(requested_digits, decimal_point,
+ &numerator, &denominator,
+ buffer, length);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ buffer[*length] = '\0';
+}
+
+
+// The procedure starts generating digits from the left to the right and stops
+// when the generated digits yield the shortest decimal representation of v. A
+// decimal representation of v is a number lying closer to v than to any other
+// double, so it converts to v when read.
+//
+// This is true if d, the decimal representation, is between m- and m+, the
+// upper and lower boundaries. d must be strictly between them if !is_even.
+// m- := (numerator - delta_minus) / denominator
+// m+ := (numerator + delta_plus) / denominator
+//
+// Precondition: 0 <= (numerator+delta_plus) / denominator < 10.
+// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit
+// will be produced. This should be the standard precondition.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus,
+ bool is_even,
+ Vector<char> buffer, int* length) {
+ // Small optimization: if delta_minus and delta_plus are the same just reuse
+ // one of the two bignums.
+ if (Bignum::Equal(*delta_minus, *delta_plus)) {
+ delta_plus = delta_minus;
+ }
+ *length = 0;
+ while (true) {
+ uint16_t digit;
+ digit = numerator->DivideModuloIntBignum(*denominator);
+ ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
+ // digit = numerator / denominator (integer division).
+ // numerator = numerator % denominator.
+ buffer[(*length)++] = digit + '0';
+
+ // Can we stop already?
+ // If the remainder of the division is less than the distance to the lower
+ // boundary we can stop. In this case we simply round down (discarding the
+ // remainder).
+ // Similarly we test if we can round up (using the upper boundary).
+ bool in_delta_room_minus;
+ bool in_delta_room_plus;
+ if (is_even) {
+ in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus);
+ } else {
+ in_delta_room_minus = Bignum::Less(*numerator, *delta_minus);
+ }
+ if (is_even) {
+ in_delta_room_plus =
+ Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+ } else {
+ in_delta_room_plus =
+ Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+ }
+ if (!in_delta_room_minus && !in_delta_room_plus) {
+ // Prepare for next iteration.
+ numerator->Times10();
+ delta_minus->Times10();
+ // We optimized delta_plus to be equal to delta_minus (if they share the
+ // same value). So don't multiply delta_plus if they point to the same
+ // object.
+ if (delta_minus != delta_plus) {
+ delta_plus->Times10();
+ }
+ } else if (in_delta_room_minus && in_delta_room_plus) {
+ // Let's see if 2*numerator < denominator.
+ // If yes, then the next digit would be < 5 and we can round down.
+ int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator);
+ if (compare < 0) {
+ // Remaining digits are less than .5. -> Round down (== do nothing).
+ } else if (compare > 0) {
+ // Remaining digits are more than .5 of denominator. -> Round up.
+ // Note that the last digit could not be a '9' as otherwise the whole
+ // loop would have stopped earlier.
+ // We still have an assert here in case the preconditions were not
+ // satisfied.
+ ASSERT(buffer[(*length) - 1] != '9');
+ buffer[(*length) - 1]++;
+ } else {
+ // Halfway case.
+ // TODO(floitsch): need a way to solve half-way cases.
+ // For now let's round towards even (since this is what Gay seems to
+ // do).
+
+ if ((buffer[(*length) - 1] - '0') % 2 == 0) {
+ // Round down => Do nothing.
+ } else {
+ ASSERT(buffer[(*length) - 1] != '9');
+ buffer[(*length) - 1]++;
+ }
+ }
+ return;
+ } else if (in_delta_room_minus) {
+ // Round down (== do nothing).
+ return;
+ } else { // in_delta_room_plus
+ // Round up.
+ // Note again that the last digit could not be '9' since this would have
+ // stopped the loop earlier.
+ // We still have an ASSERT here, in case the preconditions were not
+ // satisfied.
+ ASSERT(buffer[(*length) -1] != '9');
+ buffer[(*length) - 1]++;
+ return;
+ }
+ }
+}
+
+
+// Let v = numerator / denominator < 10.
+// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point)
+// from left to right. Once 'count' digits have been produced we decide wether
+// to round up or down. Remainders of exactly .5 round upwards. Numbers such
+// as 9.999999 propagate a carry all the way, and change the
+// exponent (decimal_point), when rounding upwards.
+static void GenerateCountedDigits(int count, int* decimal_point,
+ Bignum* numerator, Bignum* denominator,
+ Vector<char>(buffer), int* length) {
+ ASSERT(count >= 0);
+ for (int i = 0; i < count - 1; ++i) {
+ uint16_t digit;
+ digit = numerator->DivideModuloIntBignum(*denominator);
+ ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
+ // digit = numerator / denominator (integer division).
+ // numerator = numerator % denominator.
+ buffer[i] = digit + '0';
+ // Prepare for next iteration.
+ numerator->Times10();
+ }
+ // Generate the last digit.
+ uint16_t digit;
+ digit = numerator->DivideModuloIntBignum(*denominator);
+ if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+ digit++;
+ }
+ buffer[count - 1] = digit + '0';
+ // Correct bad digits (in case we had a sequence of '9's). Propagate the
+ // carry until we hat a non-'9' or til we reach the first digit.
+ for (int i = count - 1; i > 0; --i) {
+ if (buffer[i] != '0' + 10) break;
+ buffer[i] = '0';
+ buffer[i - 1]++;
+ }
+ if (buffer[0] == '0' + 10) {
+ // Propagate a carry past the top place.
+ buffer[0] = '1';
+ (*decimal_point)++;
+ }
+ *length = count;
+}
+
+
+// Generates 'requested_digits' after the decimal point. It might omit
+// trailing '0's. If the input number is too small then no digits at all are
+// generated (ex.: 2 fixed digits for 0.00001).
+//
+// Input verifies: 1 <= (numerator + delta) / denominator < 10.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+ Bignum* numerator, Bignum* denominator,
+ Vector<char>(buffer), int* length) {
+ // Note that we have to look at more than just the requested_digits, since
+ // a number could be rounded up. Example: v=0.5 with requested_digits=0.
+ // Even though the power of v equals 0 we can't just stop here.
+ if (-(*decimal_point) > requested_digits) {
+ // The number is definitively too small.
+ // Ex: 0.001 with requested_digits == 1.
+ // Set decimal-point to -requested_digits. This is what Gay does.
+ // Note that it should not have any effect anyways since the string is
+ // empty.
+ *decimal_point = -requested_digits;
+ *length = 0;
+ return;
+ } else if (-(*decimal_point) == requested_digits) {
+ // We only need to verify if the number rounds down or up.
+ // Ex: 0.04 and 0.06 with requested_digits == 1.
+ ASSERT(*decimal_point == -requested_digits);
+ // Initially the fraction lies in range (1, 10]. Multiply the denominator
+ // by 10 so that we can compare more easily.
+ denominator->Times10();
+ if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+ // If the fraction is >= 0.5 then we have to include the rounded
+ // digit.
+ buffer[0] = '1';
+ *length = 1;
+ (*decimal_point)++;
+ } else {
+ // Note that we caught most of similar cases earlier.
+ *length = 0;
+ }
+ return;
+ } else {
+ // The requested digits correspond to the digits after the point.
+ // The variable 'needed_digits' includes the digits before the point.
+ int needed_digits = (*decimal_point) + requested_digits;
+ GenerateCountedDigits(needed_digits, decimal_point,
+ numerator, denominator,
+ buffer, length);
+ }
+}
+
+
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k where
+// v = f * 2^exponent and 2^52 <= f < 2^53.
+// v is hence a normalized double with the given exponent. The output is an
+// approximation for the exponent of the decimal approimation .digits * 10^k.
+//
+// The result might undershoot by 1 in which case 10^k <= v < 10^k+1.
+// Note: this property holds for v's upper boundary m+ too.
+// 10^k <= m+ < 10^k+1.
+// (see explanation below).
+//
+// Examples:
+// EstimatePower(0) => 16
+// EstimatePower(-52) => 0
+//
+// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0.
+static int EstimatePower(int exponent) {
+ // This function estimates log10 of v where v = f*2^e (with e == exponent).
+ // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)).
+ // Note that f is bounded by its container size. Let p = 53 (the double's
+ // significand size). Then 2^(p-1) <= f < 2^p.
+ //
+ // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close
+ // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)).
+ // The computed number undershoots by less than 0.631 (when we compute log3
+ // and not log10).
+ //
+ // Optimization: since we only need an approximated result this computation
+ // can be performed on 64 bit integers. On x86/x64 architecture the speedup is
+ // not really measurable, though.
+ //
+ // Since we want to avoid overshooting we decrement by 1e10 so that
+ // floating-point imprecisions don't affect us.
+ //
+ // Explanation for v's boundary m+: the computation takes advantage of
+ // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement
+ // (even for denormals where the delta can be much more important).
+
+ const double k1Log10 = 0.30102999566398114; // 1/lg(10)
+
+ // For doubles len(f) == 53 (don't forget the hidden bit).
+ const int kSignificandSize = Double::kSignificandSize;
+ double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10);
+ return static_cast<int>(estimate);
+}
+
+
+// See comments for InitialScaledStartValues.
+static void InitialScaledStartValuesPositiveExponent(
+ uint64_t significand, int exponent,
+ int estimated_power, bool need_boundary_deltas,
+ Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus) {
+ // A positive exponent implies a positive power.
+ ASSERT(estimated_power >= 0);
+ // Since the estimated_power is positive we simply multiply the denominator
+ // by 10^estimated_power.
+
+ // numerator = v.
+ numerator->AssignUInt64(significand);
+ numerator->ShiftLeft(exponent);
+ // denominator = 10^estimated_power.
+ denominator->AssignPowerUInt16(10, estimated_power);
+
+ if (need_boundary_deltas) {
+ // Introduce a common denominator so that the deltas to the boundaries are
+ // integers.
+ denominator->ShiftLeft(1);
+ numerator->ShiftLeft(1);
+ // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+ // denominator (of 2) delta_plus equals 2^e.
+ delta_plus->AssignUInt16(1);
+ delta_plus->ShiftLeft(exponent);
+ // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+ delta_minus->AssignUInt16(1);
+ delta_minus->ShiftLeft(exponent);
+ }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentPositivePower(
+ uint64_t significand, int exponent,
+ int estimated_power, bool need_boundary_deltas,
+ Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus) {
+ // v = f * 2^e with e < 0, and with estimated_power >= 0.
+ // This means that e is close to 0 (have a look at how estimated_power is
+ // computed).
+
+ // numerator = significand
+ // since v = significand * 2^exponent this is equivalent to
+ // numerator = v * / 2^-exponent
+ numerator->AssignUInt64(significand);
+ // denominator = 10^estimated_power * 2^-exponent (with exponent < 0)
+ denominator->AssignPowerUInt16(10, estimated_power);
+ denominator->ShiftLeft(-exponent);
+
+ if (need_boundary_deltas) {
+ // Introduce a common denominator so that the deltas to the boundaries are
+ // integers.
+ denominator->ShiftLeft(1);
+ numerator->ShiftLeft(1);
+ // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+ // denominator (of 2) delta_plus equals 2^e.
+ // Given that the denominator already includes v's exponent the distance
+ // to the boundaries is simply 1.
+ delta_plus->AssignUInt16(1);
+ // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+ delta_minus->AssignUInt16(1);
+ }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentNegativePower(
+ uint64_t significand, int exponent,
+ int estimated_power, bool need_boundary_deltas,
+ Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus) {
+ // Instead of multiplying the denominator with 10^estimated_power we
+ // multiply all values (numerator and deltas) by 10^-estimated_power.
+
+ // Use numerator as temporary container for power_ten.
+ Bignum* power_ten = numerator;
+ power_ten->AssignPowerUInt16(10, -estimated_power);
+
+ if (need_boundary_deltas) {
+ // Since power_ten == numerator we must make a copy of 10^estimated_power
+ // before we complete the computation of the numerator.
+ // delta_plus = delta_minus = 10^estimated_power
+ delta_plus->AssignBignum(*power_ten);
+ delta_minus->AssignBignum(*power_ten);
+ }
+
+ // numerator = significand * 2 * 10^-estimated_power
+ // since v = significand * 2^exponent this is equivalent to
+ // numerator = v * 10^-estimated_power * 2 * 2^-exponent.
+ // Remember: numerator has been abused as power_ten. So no need to assign it
+ // to itself.
+ ASSERT(numerator == power_ten);
+ numerator->MultiplyByUInt64(significand);
+
+ // denominator = 2 * 2^-exponent with exponent < 0.
+ denominator->AssignUInt16(1);
+ denominator->ShiftLeft(-exponent);
+
+ if (need_boundary_deltas) {
+ // Introduce a common denominator so that the deltas to the boundaries are
+ // integers.
+ numerator->ShiftLeft(1);
+ denominator->ShiftLeft(1);
+ // With this shift the boundaries have their correct value, since
+ // delta_plus = 10^-estimated_power, and
+ // delta_minus = 10^-estimated_power.
+ // These assignments have been done earlier.
+ // The adjustments if f == 2^p-1 (lower boundary is closer) are done later.
+ }
+}
+
+
+// Let v = significand * 2^exponent.
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator. The functions GenerateShortestDigits and
+// GenerateCountedDigits will then convert this ratio to its decimal
+// representation d, with the required accuracy.
+// Then d * 10^estimated_power is the representation of v.
+// (Note: the fraction and the estimated_power might get adjusted before
+// generating the decimal representation.)
+//
+// The initial start values consist of:
+// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power.
+// - a scaled (common) denominator.
+// optionally (used by GenerateShortestDigits to decide if it has the shortest
+// decimal converting back to v):
+// - v - m-: the distance to the lower boundary.
+// - m+ - v: the distance to the upper boundary.
+//
+// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator.
+//
+// Let ep == estimated_power, then the returned values will satisfy:
+// v / 10^ep = numerator / denominator.
+// v's boundarys m- and m+:
+// m- / 10^ep == v / 10^ep - delta_minus / denominator
+// m+ / 10^ep == v / 10^ep + delta_plus / denominator
+// Or in other words:
+// m- == v - delta_minus * 10^ep / denominator;
+// m+ == v + delta_plus * 10^ep / denominator;
+//
+// Since 10^(k-1) <= v < 10^k (with k == estimated_power)
+// or 10^k <= v < 10^(k+1)
+// we then have 0.1 <= numerator/denominator < 1
+// or 1 <= numerator/denominator < 10
+//
+// It is then easy to kickstart the digit-generation routine.
+//
+// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST
+// or BIGNUM_DTOA_SHORTEST_SINGLE.
+
+static void InitialScaledStartValues(uint64_t significand,
+ int exponent,
+ bool lower_boundary_is_closer,
+ int estimated_power,
+ bool need_boundary_deltas,
+ Bignum* numerator,
+ Bignum* denominator,
+ Bignum* delta_minus,
+ Bignum* delta_plus) {
+ if (exponent >= 0) {
+ InitialScaledStartValuesPositiveExponent(
+ significand, exponent, estimated_power, need_boundary_deltas,
+ numerator, denominator, delta_minus, delta_plus);
+ } else if (estimated_power >= 0) {
+ InitialScaledStartValuesNegativeExponentPositivePower(
+ significand, exponent, estimated_power, need_boundary_deltas,
+ numerator, denominator, delta_minus, delta_plus);
+ } else {
+ InitialScaledStartValuesNegativeExponentNegativePower(
+ significand, exponent, estimated_power, need_boundary_deltas,
+ numerator, denominator, delta_minus, delta_plus);
+ }
+
+ if (need_boundary_deltas && lower_boundary_is_closer) {
+ // The lower boundary is closer at half the distance of "normal" numbers.
+ // Increase the common denominator and adapt all but the delta_minus.
+ denominator->ShiftLeft(1); // *2
+ numerator->ShiftLeft(1); // *2
+ delta_plus->ShiftLeft(1); // *2
+ }
+}
+
+
+// This routine multiplies numerator/denominator so that its values lies in the
+// range 1-10. That is after a call to this function we have:
+// 1 <= (numerator + delta_plus) /denominator < 10.
+// Let numerator the input before modification and numerator' the argument
+// after modification, then the output-parameter decimal_point is such that
+// numerator / denominator * 10^estimated_power ==
+// numerator' / denominator' * 10^(decimal_point - 1)
+// In some cases estimated_power was too low, and this is already the case. We
+// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k ==
+// estimated_power) but do not touch the numerator or denominator.
+// Otherwise the routine multiplies the numerator and the deltas by 10.
+static void FixupMultiply10(int estimated_power, bool is_even,
+ int* decimal_point,
+ Bignum* numerator, Bignum* denominator,
+ Bignum* delta_minus, Bignum* delta_plus) {
+ bool in_range;
+ if (is_even) {
+ // For IEEE doubles half-way cases (in decimal system numbers ending with 5)
+ // are rounded to the closest floating-point number with even significand.
+ in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+ } else {
+ in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+ }
+ if (in_range) {
+ // Since numerator + delta_plus >= denominator we already have
+ // 1 <= numerator/denominator < 10. Simply update the estimated_power.
+ *decimal_point = estimated_power + 1;
+ } else {
+ *decimal_point = estimated_power;
+ numerator->Times10();
+ if (Bignum::Equal(*delta_minus, *delta_plus)) {
+ delta_minus->Times10();
+ delta_plus->AssignBignum(*delta_minus);
+ } else {
+ delta_minus->Times10();
+ delta_plus->Times10();
+ }
+ }
+}
+
+} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum-dtoa.h b/src/kenlm/util/double-conversion/bignum-dtoa.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum-dtoa.h
rename to src/kenlm/util/double-conversion/bignum-dtoa.h
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum.cc b/src/kenlm/util/double-conversion/bignum.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum.cc
rename to src/kenlm/util/double-conversion/bignum.cc
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum.h b/src/kenlm/util/double-conversion/bignum.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/bignum.h
rename to src/kenlm/util/double-conversion/bignum.h
diff --git a/src/kenlm/util/double-conversion/cached-powers.cc b/src/kenlm/util/double-conversion/cached-powers.cc
new file mode 100644
index 0000000..9dcfa36
--- /dev/null
+++ b/src/kenlm/util/double-conversion/cached-powers.cc
@@ -0,0 +1,175 @@
+// Copyright 2006-2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cstdarg>
+#include <climits>
+#include <cmath>
+
+#include "utils.h"
+
+#include "cached-powers.h"
+
+namespace double_conversion {
+
+struct CachedPower {
+ uint64_t significand;
+ int16_t binary_exponent;
+ int16_t decimal_exponent;
+};
+
+static const CachedPower kCachedPowers[] = {
+ {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348},
+ {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340},
+ {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332},
+ {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324},
+ {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316},
+ {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308},
+ {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300},
+ {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292},
+ {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284},
+ {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276},
+ {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268},
+ {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260},
+ {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252},
+ {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244},
+ {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236},
+ {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228},
+ {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220},
+ {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212},
+ {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204},
+ {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196},
+ {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188},
+ {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180},
+ {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172},
+ {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164},
+ {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156},
+ {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148},
+ {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140},
+ {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132},
+ {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124},
+ {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116},
+ {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108},
+ {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100},
+ {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92},
+ {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84},
+ {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76},
+ {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68},
+ {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60},
+ {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52},
+ {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44},
+ {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36},
+ {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28},
+ {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20},
+ {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12},
+ {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4},
+ {UINT64_2PART_C(0x9c400000, 00000000), -50, 4},
+ {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12},
+ {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20},
+ {UINT64_2PART_C(0x813f3978, f8940984), 30, 28},
+ {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36},
+ {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44},
+ {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52},
+ {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60},
+ {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68},
+ {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76},
+ {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84},
+ {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92},
+ {UINT64_2PART_C(0x924d692c, a61be758), 269, 100},
+ {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108},
+ {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116},
+ {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124},
+ {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132},
+ {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140},
+ {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148},
+ {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156},
+ {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164},
+ {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172},
+ {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180},
+ {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188},
+ {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196},
+ {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204},
+ {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212},
+ {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220},
+ {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228},
+ {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236},
+ {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244},
+ {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252},
+ {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260},
+ {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268},
+ {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276},
+ {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284},
+ {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292},
+ {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300},
+ {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308},
+ {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316},
+ {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324},
+ {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332},
+ {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
+};
+
+static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers);
+static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent.
+static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10)
+// Difference between the decimal exponents in the table above.
+const int PowersOfTenCache::kDecimalExponentDistance = 8;
+const int PowersOfTenCache::kMinDecimalExponent = -348;
+const int PowersOfTenCache::kMaxDecimalExponent = 340;
+
+void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+ int min_exponent,
+ int max_exponent,
+ DiyFp* power,
+ int* decimal_exponent) {
+ int kQ = DiyFp::kSignificandSize;
+ double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10);
+ int foo = kCachedPowersOffset;
+ int index =
+ (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
+ ASSERT(0 <= index && index < kCachedPowersLength);
+ CachedPower cached_power = kCachedPowers[index];
+ ASSERT(min_exponent <= cached_power.binary_exponent);
+ ASSERT(cached_power.binary_exponent <= max_exponent);
+ *decimal_exponent = cached_power.decimal_exponent;
+ *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+}
+
+
+void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent,
+ DiyFp* power,
+ int* found_exponent) {
+ ASSERT(kMinDecimalExponent <= requested_exponent);
+ ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance);
+ int index =
+ (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance;
+ CachedPower cached_power = kCachedPowers[index];
+ *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+ *found_exponent = cached_power.decimal_exponent;
+ ASSERT(*found_exponent <= requested_exponent);
+ ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance);
+}
+
+} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/cached-powers.h b/src/kenlm/util/double-conversion/cached-powers.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/cached-powers.h
rename to src/kenlm/util/double-conversion/cached-powers.h
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/diy-fp.cc b/src/kenlm/util/double-conversion/diy-fp.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/diy-fp.cc
rename to src/kenlm/util/double-conversion/diy-fp.cc
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/diy-fp.h b/src/kenlm/util/double-conversion/diy-fp.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/diy-fp.h
rename to src/kenlm/util/double-conversion/diy-fp.h
diff --git a/src/kenlm/util/double-conversion/double-conversion.cc b/src/kenlm/util/double-conversion/double-conversion.cc
new file mode 100644
index 0000000..8a7923c
--- /dev/null
+++ b/src/kenlm/util/double-conversion/double-conversion.cc
@@ -0,0 +1,889 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <climits>
+#include <cmath>
+
+#include "double-conversion.h"
+
+#include "bignum-dtoa.h"
+#include "fast-dtoa.h"
+#include "fixed-dtoa.h"
+#include "ieee.h"
+#include "strtod.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() {
+ int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN;
+ static DoubleToStringConverter converter(flags,
+ "Infinity",
+ "NaN",
+ 'e',
+ -6, 21,
+ 6, 0);
+ return converter;
+}
+
+
+bool DoubleToStringConverter::HandleSpecialValues(
+ double value,
+ StringBuilder* result_builder) const {
+ Double double_inspect(value);
+ if (double_inspect.IsInfinite()) {
+ if (infinity_symbol_ == NULL) return false;
+ if (value < 0) {
+ result_builder->AddCharacter('-');
+ }
+ result_builder->AddString(infinity_symbol_);
+ return true;
+ }
+ if (double_inspect.IsNan()) {
+ if (nan_symbol_ == NULL) return false;
+ result_builder->AddString(nan_symbol_);
+ return true;
+ }
+ return false;
+}
+
+
+void DoubleToStringConverter::CreateExponentialRepresentation(
+ const char* decimal_digits,
+ int length,
+ int exponent,
+ StringBuilder* result_builder) const {
+ ASSERT(length != 0);
+ result_builder->AddCharacter(decimal_digits[0]);
+ if (length != 1) {
+ result_builder->AddCharacter('.');
+ result_builder->AddSubstring(&decimal_digits[1], length-1);
+ }
+ result_builder->AddCharacter(exponent_character_);
+ if (exponent < 0) {
+ result_builder->AddCharacter('-');
+ exponent = -exponent;
+ } else {
+ if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) {
+ result_builder->AddCharacter('+');
+ }
+ }
+ if (exponent == 0) {
+ result_builder->AddCharacter('0');
+ return;
+ }
+ ASSERT(exponent < 1e4);
+ const int kMaxExponentLength = 5;
+ char buffer[kMaxExponentLength + 1];
+ buffer[kMaxExponentLength] = '\0';
+ int first_char_pos = kMaxExponentLength;
+ while (exponent > 0) {
+ buffer[--first_char_pos] = '0' + (exponent % 10);
+ exponent /= 10;
+ }
+ result_builder->AddSubstring(&buffer[first_char_pos],
+ kMaxExponentLength - first_char_pos);
+}
+
+
+void DoubleToStringConverter::CreateDecimalRepresentation(
+ const char* decimal_digits,
+ int length,
+ int decimal_point,
+ int digits_after_point,
+ StringBuilder* result_builder) const {
+ // Create a representation that is padded with zeros if needed.
+ if (decimal_point <= 0) {
+ // "0.00000decimal_rep".
+ result_builder->AddCharacter('0');
+ if (digits_after_point > 0) {
+ result_builder->AddCharacter('.');
+ result_builder->AddPadding('0', -decimal_point);
+ ASSERT(length <= digits_after_point - (-decimal_point));
+ result_builder->AddSubstring(decimal_digits, length);
+ int remaining_digits = digits_after_point - (-decimal_point) - length;
+ result_builder->AddPadding('0', remaining_digits);
+ }
+ } else if (decimal_point >= length) {
+ // "decimal_rep0000.00000" or "decimal_rep.0000"
+ result_builder->AddSubstring(decimal_digits, length);
+ result_builder->AddPadding('0', decimal_point - length);
+ if (digits_after_point > 0) {
+ result_builder->AddCharacter('.');
+ result_builder->AddPadding('0', digits_after_point);
+ }
+ } else {
+ // "decima.l_rep000"
+ ASSERT(digits_after_point > 0);
+ result_builder->AddSubstring(decimal_digits, decimal_point);
+ result_builder->AddCharacter('.');
+ ASSERT(length - decimal_point <= digits_after_point);
+ result_builder->AddSubstring(&decimal_digits[decimal_point],
+ length - decimal_point);
+ int remaining_digits = digits_after_point - (length - decimal_point);
+ result_builder->AddPadding('0', remaining_digits);
+ }
+ if (digits_after_point == 0) {
+ if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
+ result_builder->AddCharacter('.');
+ }
+ if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
+ result_builder->AddCharacter('0');
+ }
+ }
+}
+
+
+bool DoubleToStringConverter::ToShortestIeeeNumber(
+ double value,
+ StringBuilder* result_builder,
+ DoubleToStringConverter::DtoaMode mode) const {
+ ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE);
+ if (Double(value).IsSpecial()) {
+ return HandleSpecialValues(value, result_builder);
+ }
+
+ int decimal_point;
+ bool sign;
+ const int kDecimalRepCapacity = kBase10MaximalLength + 1;
+ char decimal_rep[kDecimalRepCapacity];
+ int decimal_rep_length;
+
+ DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity,
+ &sign, &decimal_rep_length, &decimal_point);
+
+ bool unique_zero = (flags_ & UNIQUE_ZERO) != 0;
+ if (sign && (value != 0.0 || !unique_zero)) {
+ result_builder->AddCharacter('-');
+ }
+
+ int exponent = decimal_point - 1;
+ if ((decimal_in_shortest_low_ <= exponent) &&
+ (exponent < decimal_in_shortest_high_)) {
+ CreateDecimalRepresentation(decimal_rep, decimal_rep_length,
+ decimal_point,
+ Max(0, decimal_rep_length - decimal_point),
+ result_builder);
+ } else {
+ CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent,
+ result_builder);
+ }
+ return true;
+}
+
+
+bool DoubleToStringConverter::ToFixed(double value,
+ int requested_digits,
+ StringBuilder* result_builder) const {
+ ASSERT(kMaxFixedDigitsBeforePoint == 60);
+ const double kFirstNonFixed = 1e60;
+
+ if (Double(value).IsSpecial()) {
+ return HandleSpecialValues(value, result_builder);
+ }
+
+ if (requested_digits > kMaxFixedDigitsAfterPoint) return false;
+ if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false;
+
+ // Find a sufficiently precise decimal representation of n.
+ int decimal_point;
+ bool sign;
+ // Add space for the '\0' byte.
+ const int kDecimalRepCapacity =
+ kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1;
+ char decimal_rep[kDecimalRepCapacity];
+ int decimal_rep_length;
+ DoubleToAscii(value, FIXED, requested_digits,
+ decimal_rep, kDecimalRepCapacity,
+ &sign, &decimal_rep_length, &decimal_point);
+
+ bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+ if (sign && (value != 0.0 || !unique_zero)) {
+ result_builder->AddCharacter('-');
+ }
+
+ CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+ requested_digits, result_builder);
+ return true;
+}
+
+
+bool DoubleToStringConverter::ToExponential(
+ double value,
+ int requested_digits,
+ StringBuilder* result_builder) const {
+ if (Double(value).IsSpecial()) {
+ return HandleSpecialValues(value, result_builder);
+ }
+
+ if (requested_digits < -1) return false;
+ if (requested_digits > kMaxExponentialDigits) return false;
+
+ int decimal_point;
+ bool sign;
+ // Add space for digit before the decimal point and the '\0' character.
+ const int kDecimalRepCapacity = kMaxExponentialDigits + 2;
+ ASSERT(kDecimalRepCapacity > kBase10MaximalLength);
+ char decimal_rep[kDecimalRepCapacity];
+ int decimal_rep_length;
+
+ if (requested_digits == -1) {
+ DoubleToAscii(value, SHORTEST, 0,
+ decimal_rep, kDecimalRepCapacity,
+ &sign, &decimal_rep_length, &decimal_point);
+ } else {
+ DoubleToAscii(value, PRECISION, requested_digits + 1,
+ decimal_rep, kDecimalRepCapacity,
+ &sign, &decimal_rep_length, &decimal_point);
+ ASSERT(decimal_rep_length <= requested_digits + 1);
+
+ for (int i = decimal_rep_length; i < requested_digits + 1; ++i) {
+ decimal_rep[i] = '0';
+ }
+ decimal_rep_length = requested_digits + 1;
+ }
+
+ bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+ if (sign && (value != 0.0 || !unique_zero)) {
+ result_builder->AddCharacter('-');
+ }
+
+ int exponent = decimal_point - 1;
+ CreateExponentialRepresentation(decimal_rep,
+ decimal_rep_length,
+ exponent,
+ result_builder);
+ return true;
+}
+
+
+bool DoubleToStringConverter::ToPrecision(double value,
+ int precision,
+ StringBuilder* result_builder) const {
+ if (Double(value).IsSpecial()) {
+ return HandleSpecialValues(value, result_builder);
+ }
+
+ if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) {
+ return false;
+ }
+
+ // Find a sufficiently precise decimal representation of n.
+ int decimal_point;
+ bool sign;
+ // Add one for the terminating null character.
+ const int kDecimalRepCapacity = kMaxPrecisionDigits + 1;
+ char decimal_rep[kDecimalRepCapacity];
+ int decimal_rep_length;
+
+ DoubleToAscii(value, PRECISION, precision,
+ decimal_rep, kDecimalRepCapacity,
+ &sign, &decimal_rep_length, &decimal_point);
+ ASSERT(decimal_rep_length <= precision);
+
+ bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+ if (sign && (value != 0.0 || !unique_zero)) {
+ result_builder->AddCharacter('-');
+ }
+
+ // The exponent if we print the number as x.xxeyyy. That is with the
+ // decimal point after the first digit.
+ int exponent = decimal_point - 1;
+
+ int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
+ if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
+ (decimal_point - precision + extra_zero >
+ max_trailing_padding_zeroes_in_precision_mode_)) {
+ // Fill buffer to contain 'precision' digits.
+ // Usually the buffer is already at the correct length, but 'DoubleToAscii'
+ // is allowed to return less characters.
+ for (int i = decimal_rep_length; i < precision; ++i) {
+ decimal_rep[i] = '0';
+ }
+
+ CreateExponentialRepresentation(decimal_rep,
+ precision,
+ exponent,
+ result_builder);
+ } else {
+ CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+ Max(0, precision - decimal_point),
+ result_builder);
+ }
+ return true;
+}
+
+
+static BignumDtoaMode DtoaToBignumDtoaMode(
+ DoubleToStringConverter::DtoaMode dtoa_mode) {
+ switch (dtoa_mode) {
+ case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST;
+ case DoubleToStringConverter::SHORTEST_SINGLE:
+ return BIGNUM_DTOA_SHORTEST_SINGLE;
+ case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED;
+ case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
+ default:
+ UNREACHABLE();
+ return BIGNUM_DTOA_SHORTEST; // To silence compiler.
+ }
+}
+
+
+void DoubleToStringConverter::DoubleToAscii(double v,
+ DtoaMode mode,
+ int requested_digits,
+ char* buffer,
+ int buffer_length,
+ bool* sign,
+ int* length,
+ int* point) {
+ Vector<char> vector(buffer, buffer_length);
+ ASSERT(!Double(v).IsSpecial());
+ ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0);
+
+ if (Double(v).Sign() < 0) {
+ *sign = true;
+ v = -v;
+ } else {
+ *sign = false;
+ }
+
+ if (mode == PRECISION && requested_digits == 0) {
+ vector[0] = '\0';
+ *length = 0;
+ return;
+ }
+
+ if (v == 0) {
+ vector[0] = '0';
+ vector[1] = '\0';
+ *length = 1;
+ *point = 1;
+ return;
+ }
+
+ bool fast_worked;
+ switch (mode) {
+ case SHORTEST:
+ fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point);
+ break;
+ case SHORTEST_SINGLE:
+ fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0,
+ vector, length, point);
+ break;
+ case FIXED:
+ fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point);
+ break;
+ case PRECISION:
+ fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits,
+ vector, length, point);
+ break;
+ default:
+ UNREACHABLE();
+ fast_worked = false;
+ }
+ if (fast_worked) return;
+
+ // If the fast dtoa didn't succeed use the slower bignum version.
+ BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode);
+ BignumDtoa(v, bignum_mode, requested_digits, vector, length, point);
+ vector[*length] = '\0';
+}
+
+
+// Consumes the given substring from the iterator.
+// Returns false, if the substring does not match.
+static bool ConsumeSubString(const char** current,
+ const char* end,
+ const char* substring) {
+ ASSERT(**current == *substring);
+ for (substring++; *substring != '\0'; substring++) {
+ ++*current;
+ if (*current == end || **current != *substring) return false;
+ }
+ ++*current;
+ return true;
+}
+
+
+// Maximum number of significant digits in decimal representation.
+// The longest possible double in decimal representation is
+// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
+// (768 digits). If we parse a number whose first digits are equal to a
+// mean of 2 adjacent doubles (that could have up to 769 digits) the result
+// must be rounded to the bigger one unless the tail consists of zeros, so
+// we don't need to preserve all the digits.
+const int kMaxSignificantDigits = 772;
+
+
+// Returns true if a nonspace found and false if the end has reached.
+static inline bool AdvanceToNonspace(const char** current, const char* end) {
+ while (*current != end) {
+ if (**current != ' ') return true;
+ ++*current;
+ }
+ return false;
+}
+
+
+static bool isDigit(int x, int radix) {
+ return (x >= '0' && x <= '9' && x < '0' + radix)
+ || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
+ || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
+}
+
+
+static double SignedZero(bool sign) {
+ return sign ? -0.0 : 0.0;
+}
+
+
+// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
+template <int radix_log_2>
+static double RadixStringToIeee(const char* current,
+ const char* end,
+ bool sign,
+ bool allow_trailing_junk,
+ double junk_string_value,
+ bool read_as_double,
+ const char** trailing_pointer) {
+ ASSERT(current != end);
+
+ const int kDoubleSize = Double::kSignificandSize;
+ const int kSingleSize = Single::kSignificandSize;
+ const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
+
+ // Skip leading 0s.
+ while (*current == '0') {
+ ++current;
+ if (current == end) {
+ *trailing_pointer = end;
+ return SignedZero(sign);
+ }
+ }
+
+ int64_t number = 0;
+ int exponent = 0;
+ const int radix = (1 << radix_log_2);
+
+ do {
+ int digit;
+ if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
+ digit = static_cast<char>(*current) - '0';
+ } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
+ digit = static_cast<char>(*current) - 'a' + 10;
+ } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
+ digit = static_cast<char>(*current) - 'A' + 10;
+ } else {
+ if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) {
+ break;
+ } else {
+ return junk_string_value;
+ }
+ }
+
+ number = number * radix + digit;
+ int overflow = static_cast<int>(number >> kSignificandSize);
+ if (overflow != 0) {
+ // Overflow occurred. Need to determine which direction to round the
+ // result.
+ int overflow_bits_count = 1;
+ while (overflow > 1) {
+ overflow_bits_count++;
+ overflow >>= 1;
+ }
+
+ int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
+ int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
+ number >>= overflow_bits_count;
+ exponent = overflow_bits_count;
+
+ bool zero_tail = true;
+ while (true) {
+ ++current;
+ if (current == end || !isDigit(*current, radix)) break;
+ zero_tail = zero_tail && *current == '0';
+ exponent += radix_log_2;
+ }
+
+ if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
+ return junk_string_value;
+ }
+
+ int middle_value = (1 << (overflow_bits_count - 1));
+ if (dropped_bits > middle_value) {
+ number++; // Rounding up.
+ } else if (dropped_bits == middle_value) {
+ // Rounding to even to consistency with decimals: half-way case rounds
+ // up if significant part is odd and down otherwise.
+ if ((number & 1) != 0 || !zero_tail) {
+ number++; // Rounding up.
+ }
+ }
+
+ // Rounding up may cause overflow.
+ if ((number & ((int64_t)1 << kSignificandSize)) != 0) {
+ exponent++;
+ number >>= 1;
+ }
+ break;
+ }
+ ++current;
+ } while (current != end);
+
+ ASSERT(number < ((int64_t)1 << kSignificandSize));
+ ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
+
+ *trailing_pointer = current;
+
+ if (exponent == 0) {
+ if (sign) {
+ if (number == 0) return -0.0;
+ number = -number;
+ }
+ return static_cast<double>(number);
+ }
+
+ ASSERT(number != 0);
+ return Double(DiyFp(number, exponent)).value();
+}
+
+
+double StringToDoubleConverter::StringToIeee(
+ const char* input,
+ int length,
+ int* processed_characters_count,
+ bool read_as_double) const {
+ const char* current = input;
+ const char* end = input + length;
+
+ *processed_characters_count = 0;
+
+ const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0;
+ const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0;
+ const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0;
+ const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0;
+
+ // To make sure that iterator dereferencing is valid the following
+ // convention is used:
+ // 1. Each '++current' statement is followed by check for equality to 'end'.
+ // 2. If AdvanceToNonspace returned false then current == end.
+ // 3. If 'current' becomes equal to 'end' the function returns or goes to
+ // 'parsing_done'.
+ // 4. 'current' is not dereferenced after the 'parsing_done' label.
+ // 5. Code before 'parsing_done' may rely on 'current != end'.
+ if (current == end) return empty_string_value_;
+
+ if (allow_leading_spaces || allow_trailing_spaces) {
+ if (!AdvanceToNonspace(¤t, end)) {
+ *processed_characters_count = current - input;
+ return empty_string_value_;
+ }
+ if (!allow_leading_spaces && (input != current)) {
+ // No leading spaces allowed, but AdvanceToNonspace moved forward.
+ return junk_string_value_;
+ }
+ }
+
+ // The longest form of simplified number is: "-<significant digits>.1eXXX\0".
+ const int kBufferSize = kMaxSignificantDigits + 10;
+ char buffer[kBufferSize]; // NOLINT: size is known at compile time.
+ int buffer_pos = 0;
+
+ // Exponent will be adjusted if insignificant digits of the integer part
+ // or insignificant leading zeros of the fractional part are dropped.
+ int exponent = 0;
+ int significant_digits = 0;
+ int insignificant_digits = 0;
+ bool nonzero_digit_dropped = false;
+
+ bool sign = false;
+
+ if (*current == '+' || *current == '-') {
+ sign = (*current == '-');
+ ++current;
+ const char* next_non_space = current;
+ // Skip following spaces (if allowed).
+ if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
+ if (!allow_spaces_after_sign && (current != next_non_space)) {
+ return junk_string_value_;
+ }
+ current = next_non_space;
+ }
+
+ if (infinity_symbol_ != NULL) {
+ if (*current == infinity_symbol_[0]) {
+ if (!ConsumeSubString(¤t, end, infinity_symbol_)) {
+ return junk_string_value_;
+ }
+
+ if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+ return junk_string_value_;
+ }
+ if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
+ return junk_string_value_;
+ }
+
+ ASSERT(buffer_pos == 0);
+ *processed_characters_count = current - input;
+ return sign ? -Double::Infinity() : Double::Infinity();
+ }
+ }
+
+ if (nan_symbol_ != NULL) {
+ if (*current == nan_symbol_[0]) {
+ if (!ConsumeSubString(¤t, end, nan_symbol_)) {
+ return junk_string_value_;
+ }
+
+ if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+ return junk_string_value_;
+ }
+ if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
+ return junk_string_value_;
+ }
+
+ ASSERT(buffer_pos == 0);
+ *processed_characters_count = current - input;
+ return sign ? -Double::NaN() : Double::NaN();
+ }
+ }
+
+ bool leading_zero = false;
+ if (*current == '0') {
+ ++current;
+ if (current == end) {
+ *processed_characters_count = current - input;
+ return SignedZero(sign);
+ }
+
+ leading_zero = true;
+
+ // It could be hexadecimal value.
+ if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
+ ++current;
+ if (current == end || !isDigit(*current, 16)) {
+ return junk_string_value_; // "0x".
+ }
+
+ const char* tail_pointer = NULL;
+ double result = RadixStringToIeee<4>(current,
+ end,
+ sign,
+ allow_trailing_junk,
+ junk_string_value_,
+ read_as_double,
+ &tail_pointer);
+ if (tail_pointer != NULL) {
+ if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
+ *processed_characters_count = tail_pointer - input;
+ }
+ return result;
+ }
+
+ // Ignore leading zeros in the integer part.
+ while (*current == '0') {
+ ++current;
+ if (current == end) {
+ *processed_characters_count = current - input;
+ return SignedZero(sign);
+ }
+ }
+ }
+
+ bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0;
+
+ // Copy significant digits of the integer part (if any) to the buffer.
+ while (*current >= '0' && *current <= '9') {
+ if (significant_digits < kMaxSignificantDigits) {
+ ASSERT(buffer_pos < kBufferSize);
+ buffer[buffer_pos++] = static_cast<char>(*current);
+ significant_digits++;
+ // Will later check if it's an octal in the buffer.
+ } else {
+ insignificant_digits++; // Move the digit into the exponential part.
+ nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+ }
+ octal = octal && *current < '8';
+ ++current;
+ if (current == end) goto parsing_done;
+ }
+
+ if (significant_digits == 0) {
+ octal = false;
+ }
+
+ if (*current == '.') {
+ if (octal && !allow_trailing_junk) return junk_string_value_;
+ if (octal) goto parsing_done;
+
+ ++current;
+ if (current == end) {
+ if (significant_digits == 0 && !leading_zero) {
+ return junk_string_value_;
+ } else {
+ goto parsing_done;
+ }
+ }
+
+ if (significant_digits == 0) {
+ // octal = false;
+ // Integer part consists of 0 or is absent. Significant digits start after
+ // leading zeros (if any).
+ while (*current == '0') {
+ ++current;
+ if (current == end) {
+ *processed_characters_count = current - input;
+ return SignedZero(sign);
+ }
+ exponent--; // Move this 0 into the exponent.
+ }
+ }
+
+ // There is a fractional part.
+ // We don't emit a '.', but adjust the exponent instead.
+ while (*current >= '0' && *current <= '9') {
+ if (significant_digits < kMaxSignificantDigits) {
+ ASSERT(buffer_pos < kBufferSize);
+ buffer[buffer_pos++] = static_cast<char>(*current);
+ significant_digits++;
+ exponent--;
+ } else {
+ // Ignore insignificant digits in the fractional part.
+ nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+ }
+ ++current;
+ if (current == end) goto parsing_done;
+ }
+ }
+
+ if (!leading_zero && exponent == 0 && significant_digits == 0) {
+ // If leading_zeros is true then the string contains zeros.
+ // If exponent < 0 then string was [+-]\.0*...
+ // If significant_digits != 0 the string is not equal to 0.
+ // Otherwise there are no digits in the string.
+ return junk_string_value_;
+ }
+
+ // Parse exponential part.
+ if (*current == 'e' || *current == 'E') {
+ if (octal && !allow_trailing_junk) return junk_string_value_;
+ if (octal) goto parsing_done;
+ ++current;
+ if (current == end) {
+ if (allow_trailing_junk) {
+ goto parsing_done;
+ } else {
+ return junk_string_value_;
+ }
+ }
+ char sign = '+';
+ if (*current == '+' || *current == '-') {
+ sign = static_cast<char>(*current);
+ ++current;
+ if (current == end) {
+ if (allow_trailing_junk) {
+ goto parsing_done;
+ } else {
+ return junk_string_value_;
+ }
+ }
+ }
+
+ if (current == end || *current < '0' || *current > '9') {
+ if (allow_trailing_junk) {
+ goto parsing_done;
+ } else {
+ return junk_string_value_;
+ }
+ }
+
+ const int max_exponent = INT_MAX / 2;
+ ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
+ int num = 0;
+ do {
+ // Check overflow.
+ int digit = *current - '0';
+ if (num >= max_exponent / 10
+ && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
+ num = max_exponent;
+ } else {
+ num = num * 10 + digit;
+ }
+ ++current;
+ } while (current != end && *current >= '0' && *current <= '9');
+
+ exponent += (sign == '-' ? -num : num);
+ }
+
+ if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+ return junk_string_value_;
+ }
+ if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
+ return junk_string_value_;
+ }
+ if (allow_trailing_spaces) {
+ AdvanceToNonspace(¤t, end);
+ }
+
+ parsing_done:
+ exponent += insignificant_digits;
+
+ if (octal) {
+ double result;
+ const char* tail_pointer = NULL;
+ result = RadixStringToIeee<3>(buffer,
+ buffer + buffer_pos,
+ sign,
+ allow_trailing_junk,
+ junk_string_value_,
+ read_as_double,
+ &tail_pointer);
+ ASSERT(tail_pointer != NULL);
+ *processed_characters_count = current - input;
+ return result;
+ }
+
+ if (nonzero_digit_dropped) {
+ buffer[buffer_pos++] = '1';
+ exponent--;
+ }
+
+ ASSERT(buffer_pos < kBufferSize);
+ buffer[buffer_pos] = '\0';
+
+ double converted;
+ if (read_as_double) {
+ converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
+ } else {
+ converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
+ }
+ *processed_characters_count = current - input;
+ return sign? -converted: converted;
+}
+
+} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/double-conversion.h b/src/kenlm/util/double-conversion/double-conversion.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/double-conversion.h
rename to src/kenlm/util/double-conversion/double-conversion.h
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fast-dtoa.cc b/src/kenlm/util/double-conversion/fast-dtoa.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fast-dtoa.cc
rename to src/kenlm/util/double-conversion/fast-dtoa.cc
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fast-dtoa.h b/src/kenlm/util/double-conversion/fast-dtoa.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fast-dtoa.h
rename to src/kenlm/util/double-conversion/fast-dtoa.h
diff --git a/src/kenlm/util/double-conversion/fixed-dtoa.cc b/src/kenlm/util/double-conversion/fixed-dtoa.cc
new file mode 100644
index 0000000..7c1a952
--- /dev/null
+++ b/src/kenlm/util/double-conversion/fixed-dtoa.cc
@@ -0,0 +1,402 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cmath>
+
+#include "fixed-dtoa.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// Represents a 128bit type. This class should be replaced by a native type on
+// platforms that support 128bit integers.
+class UInt128 {
+ public:
+ UInt128() : high_bits_(0), low_bits_(0) { }
+ UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { }
+
+ void Multiply(uint32_t multiplicand) {
+ uint64_t accumulator;
+
+ accumulator = (low_bits_ & kMask32) * multiplicand;
+ uint32_t part = static_cast<uint32_t>(accumulator & kMask32);
+ accumulator >>= 32;
+ accumulator = accumulator + (low_bits_ >> 32) * multiplicand;
+ low_bits_ = (accumulator << 32) + part;
+ accumulator >>= 32;
+ accumulator = accumulator + (high_bits_ & kMask32) * multiplicand;
+ part = static_cast<uint32_t>(accumulator & kMask32);
+ accumulator >>= 32;
+ accumulator = accumulator + (high_bits_ >> 32) * multiplicand;
+ high_bits_ = (accumulator << 32) + part;
+ ASSERT((accumulator >> 32) == 0);
+ }
+
+ void Shift(int shift_amount) {
+ ASSERT(-64 <= shift_amount && shift_amount <= 64);
+ if (shift_amount == 0) {
+ return;
+ } else if (shift_amount == -64) {
+ high_bits_ = low_bits_;
+ low_bits_ = 0;
+ } else if (shift_amount == 64) {
+ low_bits_ = high_bits_;
+ high_bits_ = 0;
+ } else if (shift_amount <= 0) {
+ high_bits_ <<= -shift_amount;
+ high_bits_ += low_bits_ >> (64 + shift_amount);
+ low_bits_ <<= -shift_amount;
+ } else {
+ low_bits_ >>= shift_amount;
+ low_bits_ += high_bits_ << (64 - shift_amount);
+ high_bits_ >>= shift_amount;
+ }
+ }
+
+ // Modifies *this to *this MOD (2^power).
+ // Returns *this DIV (2^power).
+ int DivModPowerOf2(int power) {
+ if (power >= 64) {
+ int result = static_cast<int>(high_bits_ >> (power - 64));
+ high_bits_ -= static_cast<uint64_t>(result) << (power - 64);
+ return result;
+ } else {
+ uint64_t part_low = low_bits_ >> power;
+ uint64_t part_high = high_bits_ << (64 - power);
+ int result = static_cast<int>(part_low + part_high);
+ high_bits_ = 0;
+ low_bits_ -= part_low << power;
+ return result;
+ }
+ }
+
+ bool IsZero() const {
+ return high_bits_ == 0 && low_bits_ == 0;
+ }
+
+ int BitAt(int position) {
+ if (position >= 64) {
+ return static_cast<int>(high_bits_ >> (position - 64)) & 1;
+ } else {
+ return static_cast<int>(low_bits_ >> position) & 1;
+ }
+ }
+
+ private:
+ static const uint64_t kMask32 = 0xFFFFFFFF;
+ // Value == (high_bits_ << 64) + low_bits_
+ uint64_t high_bits_;
+ uint64_t low_bits_;
+};
+
+
+static const int kDoubleSignificandSize = 53; // Includes the hidden bit.
+
+
+static void FillDigits32FixedLength(uint32_t number, int requested_length,
+ Vector<char> buffer, int* length) {
+ for (int i = requested_length - 1; i >= 0; --i) {
+ buffer[(*length) + i] = '0' + number % 10;
+ number /= 10;
+ }
+ *length += requested_length;
+}
+
+
+static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
+ int number_length = 0;
+ // We fill the digits in reverse order and exchange them afterwards.
+ while (number != 0) {
+ int digit = number % 10;
+ number /= 10;
+ buffer[(*length) + number_length] = '0' + digit;
+ number_length++;
+ }
+ // Exchange the digits.
+ int i = *length;
+ int j = *length + number_length - 1;
+ while (i < j) {
+ char tmp = buffer[i];
+ buffer[i] = buffer[j];
+ buffer[j] = tmp;
+ i++;
+ j--;
+ }
+ *length += number_length;
+}
+
+
+static void FillDigits64FixedLength(uint64_t number, int requested_length,
+ Vector<char> buffer, int* length) {
+ const uint32_t kTen7 = 10000000;
+ // For efficiency cut the number into 3 uint32_t parts, and print those.
+ uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+ number /= kTen7;
+ uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+ uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+ FillDigits32FixedLength(part0, 3, buffer, length);
+ FillDigits32FixedLength(part1, 7, buffer, length);
+ FillDigits32FixedLength(part2, 7, buffer, length);
+}
+
+
+static void FillDigits64(uint64_t number, Vector<char> buffer, int* length) {
+ const uint32_t kTen7 = 10000000;
+ // For efficiency cut the number into 3 uint32_t parts, and print those.
+ uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+ number /= kTen7;
+ uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+ uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+ if (part0 != 0) {
+ FillDigits32(part0, buffer, length);
+ FillDigits32FixedLength(part1, 7, buffer, length);
+ FillDigits32FixedLength(part2, 7, buffer, length);
+ } else if (part1 != 0) {
+ FillDigits32(part1, buffer, length);
+ FillDigits32FixedLength(part2, 7, buffer, length);
+ } else {
+ FillDigits32(part2, buffer, length);
+ }
+}
+
+
+static void RoundUp(Vector<char> buffer, int* length, int* decimal_point) {
+ // An empty buffer represents 0.
+ if (*length == 0) {
+ buffer[0] = '1';
+ *decimal_point = 1;
+ *length = 1;
+ return;
+ }
+ // Round the last digit until we either have a digit that was not '9' or until
+ // we reached the first digit.
+ buffer[(*length) - 1]++;
+ for (int i = (*length) - 1; i > 0; --i) {
+ if (buffer[i] != '0' + 10) {
+ return;
+ }
+ buffer[i] = '0';
+ buffer[i - 1]++;
+ }
+ // If the first digit is now '0' + 10, we would need to set it to '0' and add
+ // a '1' in front. However we reach the first digit only if all following
+ // digits had been '9' before rounding up. Now all trailing digits are '0' and
+ // we simply switch the first digit to '1' and update the decimal-point
+ // (indicating that the point is now one digit to the right).
+ if (buffer[0] == '0' + 10) {
+ buffer[0] = '1';
+ (*decimal_point)++;
+ }
+}
+
+
+// The given fractionals number represents a fixed-point number with binary
+// point at bit (-exponent).
+// Preconditions:
+// -128 <= exponent <= 0.
+// 0 <= fractionals * 2^exponent < 1
+// The buffer holds the result.
+// The function will round its result. During the rounding-process digits not
+// generated by this function might be updated, and the decimal-point variable
+// might be updated. If this function generates the digits 99 and the buffer
+// already contained "199" (thus yielding a buffer of "19999") then a
+// rounding-up will change the contents of the buffer to "20000".
+static void FillFractionals(uint64_t fractionals, int exponent,
+ int fractional_count, Vector<char> buffer,
+ int* length, int* decimal_point) {
+ ASSERT(-128 <= exponent && exponent <= 0);
+ // 'fractionals' is a fixed-point number, with binary point at bit
+ // (-exponent). Inside the function the non-converted remainder of fractionals
+ // is a fixed-point number, with binary point at bit 'point'.
+ if (-exponent <= 64) {
+ // One 64 bit number is sufficient.
+ ASSERT(fractionals >> 56 == 0);
+ int point = -exponent;
+ for (int i = 0; i < fractional_count; ++i) {
+ if (fractionals == 0) break;
+ // Instead of multiplying by 10 we multiply by 5 and adjust the point
+ // location. This way the fractionals variable will not overflow.
+ // Invariant at the beginning of the loop: fractionals < 2^point.
+ // Initially we have: point <= 64 and fractionals < 2^56
+ // After each iteration the point is decremented by one.
+ // Note that 5^3 = 125 < 128 = 2^7.
+ // Therefore three iterations of this loop will not overflow fractionals
+ // (even without the subtraction at the end of the loop body). At this
+ // time point will satisfy point <= 61 and therefore fractionals < 2^point
+ // and any further multiplication of fractionals by 5 will not overflow.
+ fractionals *= 5;
+ point--;
+ int digit = static_cast<int>(fractionals >> point);
+ buffer[*length] = '0' + digit;
+ (*length)++;
+ fractionals -= static_cast<uint64_t>(digit) << point;
+ }
+ // If the first bit after the point is set we have to round up.
+ if (((fractionals >> (point - 1)) & 1) == 1) {
+ RoundUp(buffer, length, decimal_point);
+ }
+ } else { // We need 128 bits.
+ ASSERT(64 < -exponent && -exponent <= 128);
+ UInt128 fractionals128 = UInt128(fractionals, 0);
+ fractionals128.Shift(-exponent - 64);
+ int point = 128;
+ for (int i = 0; i < fractional_count; ++i) {
+ if (fractionals128.IsZero()) break;
+ // As before: instead of multiplying by 10 we multiply by 5 and adjust the
+ // point location.
+ // This multiplication will not overflow for the same reasons as before.
+ fractionals128.Multiply(5);
+ point--;
+ int digit = fractionals128.DivModPowerOf2(point);
+ buffer[*length] = '0' + digit;
+ (*length)++;
+ }
+ if (fractionals128.BitAt(point - 1) == 1) {
+ RoundUp(buffer, length, decimal_point);
+ }
+ }
+}
+
+
+// Removes leading and trailing zeros.
+// If leading zeros are removed then the decimal point position is adjusted.
+static void TrimZeros(Vector<char> buffer, int* length, int* decimal_point) {
+ while (*length > 0 && buffer[(*length) - 1] == '0') {
+ (*length)--;
+ }
+ int first_non_zero = 0;
+ while (first_non_zero < *length && buffer[first_non_zero] == '0') {
+ first_non_zero++;
+ }
+ if (first_non_zero != 0) {
+ for (int i = first_non_zero; i < *length; ++i) {
+ buffer[i - first_non_zero] = buffer[i];
+ }
+ *length -= first_non_zero;
+ *decimal_point -= first_non_zero;
+ }
+}
+
+
+bool FastFixedDtoa(double v,
+ int fractional_count,
+ Vector<char> buffer,
+ int* length,
+ int* decimal_point) {
+ const uint32_t kMaxUInt32 = 0xFFFFFFFF;
+ uint64_t significand = Double(v).Significand();
+ int exponent = Double(v).Exponent();
+ // v = significand * 2^exponent (with significand a 53bit integer).
+ // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we
+ // don't know how to compute the representation. 2^73 ~= 9.5*10^21.
+ // If necessary this limit could probably be increased, but we don't need
+ // more.
+ if (exponent > 20) return false;
+ if (fractional_count > 20) return false;
+ *length = 0;
+ // At most kDoubleSignificandSize bits of the significand are non-zero.
+ // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero
+ // bits: 0..11*..0xxx..53*..xx
+ if (exponent + kDoubleSignificandSize > 64) {
+ // The exponent must be > 11.
+ //
+ // We know that v = significand * 2^exponent.
+ // And the exponent > 11.
+ // We simplify the task by dividing v by 10^17.
+ // The quotient delivers the first digits, and the remainder fits into a 64
+ // bit number.
+ // Dividing by 10^17 is equivalent to dividing by 5^17*2^17.
+ const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5); // 5^17
+ uint64_t divisor = kFive17;
+ int divisor_power = 17;
+ uint64_t dividend = significand;
+ uint32_t quotient;
+ uint64_t remainder;
+ // Let v = f * 2^e with f == significand and e == exponent.
+ // Then need q (quotient) and r (remainder) as follows:
+ // v = q * 10^17 + r
+ // f * 2^e = q * 10^17 + r
+ // f * 2^e = q * 5^17 * 2^17 + r
+ // If e > 17 then
+ // f * 2^(e-17) = q * 5^17 + r/2^17
+ // else
+ // f = q * 5^17 * 2^(17-e) + r/2^e
+ if (exponent > divisor_power) {
+ // We only allow exponents of up to 20 and therefore (17 - e) <= 3
+ dividend <<= exponent - divisor_power;
+ quotient = static_cast<uint32_t>(dividend / divisor);
+ remainder = (dividend % divisor) << divisor_power;
+ } else {
+ divisor <<= divisor_power - exponent;
+ quotient = static_cast<uint32_t>(dividend / divisor);
+ remainder = (dividend % divisor) << exponent;
+ }
+ FillDigits32(quotient, buffer, length);
+ FillDigits64FixedLength(remainder, divisor_power, buffer, length);
+ *decimal_point = *length;
+ } else if (exponent >= 0) {
+ // 0 <= exponent <= 11
+ significand <<= exponent;
+ FillDigits64(significand, buffer, length);
+ *decimal_point = *length;
+ } else if (exponent > -kDoubleSignificandSize) {
+ // We have to cut the number.
+ uint64_t integrals = significand >> -exponent;
+ uint64_t fractionals = significand - (integrals << -exponent);
+ if (integrals > kMaxUInt32) {
+ FillDigits64(integrals, buffer, length);
+ } else {
+ FillDigits32(static_cast<uint32_t>(integrals), buffer, length);
+ }
+ *decimal_point = *length;
+ FillFractionals(fractionals, exponent, fractional_count,
+ buffer, length, decimal_point);
+ } else if (exponent < -128) {
+ // This configuration (with at most 20 digits) means that all digits must be
+ // 0.
+ ASSERT(fractional_count <= 20);
+ buffer[0] = '\0';
+ *length = 0;
+ *decimal_point = -fractional_count;
+ } else {
+ *decimal_point = 0;
+ FillFractionals(significand, exponent, fractional_count,
+ buffer, length, decimal_point);
+ }
+ TrimZeros(buffer, length, decimal_point);
+ buffer[*length] = '\0';
+ if ((*length) == 0) {
+ // The string is empty and the decimal_point thus has no importance. Mimick
+ // Gay's dtoa and and set it to -fractional_count.
+ *decimal_point = -fractional_count;
+ }
+ return true;
+}
+
+} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fixed-dtoa.h b/src/kenlm/util/double-conversion/fixed-dtoa.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/fixed-dtoa.h
rename to src/kenlm/util/double-conversion/fixed-dtoa.h
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/ieee.h b/src/kenlm/util/double-conversion/ieee.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/ieee.h
rename to src/kenlm/util/double-conversion/ieee.h
diff --git a/src/kenlm/util/double-conversion/strtod.cc b/src/kenlm/util/double-conversion/strtod.cc
new file mode 100644
index 0000000..55b4daa
--- /dev/null
+++ b/src/kenlm/util/double-conversion/strtod.cc
@@ -0,0 +1,558 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cstdarg>
+#include <climits>
+
+#include "strtod.h"
+#include "bignum.h"
+#include "cached-powers.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// 2^53 = 9007199254740992.
+// Any integer with at most 15 decimal digits will hence fit into a double
+// (which has a 53bit significand) without loss of precision.
+static const int kMaxExactDoubleIntegerDecimalDigits = 15;
+// 2^64 = 18446744073709551616 > 10^19
+static const int kMaxUint64DecimalDigits = 19;
+
+// Max double: 1.7976931348623157 x 10^308
+// Min non-zero double: 4.9406564584124654 x 10^-324
+// Any x >= 10^309 is interpreted as +infinity.
+// Any x <= 10^-324 is interpreted as 0.
+// Note that 2.5e-324 (despite being smaller than the min double) will be read
+// as non-zero (equal to the min non-zero double).
+static const int kMaxDecimalPower = 309;
+static const int kMinDecimalPower = -324;
+
+// 2^64 = 18446744073709551616
+static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
+
+
+static const double exact_powers_of_ten[] = {
+ 1.0, // 10^0
+ 10.0,
+ 100.0,
+ 1000.0,
+ 10000.0,
+ 100000.0,
+ 1000000.0,
+ 10000000.0,
+ 100000000.0,
+ 1000000000.0,
+ 10000000000.0, // 10^10
+ 100000000000.0,
+ 1000000000000.0,
+ 10000000000000.0,
+ 100000000000000.0,
+ 1000000000000000.0,
+ 10000000000000000.0,
+ 100000000000000000.0,
+ 1000000000000000000.0,
+ 10000000000000000000.0,
+ 100000000000000000000.0, // 10^20
+ 1000000000000000000000.0,
+ // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
+ 10000000000000000000000.0
+};
+static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
+
+// Maximum number of significant digits in the decimal representation.
+// In fact the value is 772 (see conversions.cc), but to give us some margin
+// we round up to 780.
+static const int kMaxSignificantDecimalDigits = 780;
+
+static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
+ for (int i = 0; i < buffer.length(); i++) {
+ if (buffer[i] != '0') {
+ return buffer.SubVector(i, buffer.length());
+ }
+ }
+ return Vector<const char>(buffer.start(), 0);
+}
+
+
+static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
+ for (int i = buffer.length() - 1; i >= 0; --i) {
+ if (buffer[i] != '0') {
+ return buffer.SubVector(0, i + 1);
+ }
+ }
+ return Vector<const char>(buffer.start(), 0);
+}
+
+
+static void CutToMaxSignificantDigits(Vector<const char> buffer,
+ int exponent,
+ char* significant_buffer,
+ int* significant_exponent) {
+ for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
+ significant_buffer[i] = buffer[i];
+ }
+ // The input buffer has been trimmed. Therefore the last digit must be
+ // different from '0'.
+ ASSERT(buffer[buffer.length() - 1] != '0');
+ // Set the last digit to be non-zero. This is sufficient to guarantee
+ // correct rounding.
+ significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
+ *significant_exponent =
+ exponent + (buffer.length() - kMaxSignificantDecimalDigits);
+}
+
+
+// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
+// If possible the input-buffer is reused, but if the buffer needs to be
+// modified (due to cutting), then the input needs to be copied into the
+// buffer_copy_space.
+static void TrimAndCut(Vector<const char> buffer, int exponent,
+ char* buffer_copy_space, int space_size,
+ Vector<const char>* trimmed, int* updated_exponent) {
+ Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
+ Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
+ exponent += left_trimmed.length() - right_trimmed.length();
+ if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
+ ASSERT(space_size >= kMaxSignificantDecimalDigits);
+ CutToMaxSignificantDigits(right_trimmed, exponent,
+ buffer_copy_space, updated_exponent);
+ *trimmed = Vector<const char>(buffer_copy_space,
+ kMaxSignificantDecimalDigits);
+ } else {
+ *trimmed = right_trimmed;
+ *updated_exponent = exponent;
+ }
+}
+
+
+// Reads digits from the buffer and converts them to a uint64.
+// Reads in as many digits as fit into a uint64.
+// When the string starts with "1844674407370955161" no further digit is read.
+// Since 2^64 = 18446744073709551616 it would still be possible read another
+// digit if it was less or equal than 6, but this would complicate the code.
+static uint64_t ReadUint64(Vector<const char> buffer,
+ int* number_of_read_digits) {
+ uint64_t result = 0;
+ int i = 0;
+ while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
+ int digit = buffer[i++] - '0';
+ ASSERT(0 <= digit && digit <= 9);
+ result = 10 * result + digit;
+ }
+ *number_of_read_digits = i;
+ return result;
+}
+
+
+// Reads a DiyFp from the buffer.
+// The returned DiyFp is not necessarily normalized.
+// If remaining_decimals is zero then the returned DiyFp is accurate.
+// Otherwise it has been rounded and has error of at most 1/2 ulp.
+static void ReadDiyFp(Vector<const char> buffer,
+ DiyFp* result,
+ int* remaining_decimals) {
+ int read_digits;
+ uint64_t significand = ReadUint64(buffer, &read_digits);
+ if (buffer.length() == read_digits) {
+ *result = DiyFp(significand, 0);
+ *remaining_decimals = 0;
+ } else {
+ // Round the significand.
+ if (buffer[read_digits] >= '5') {
+ significand++;
+ }
+ // Compute the binary exponent.
+ int exponent = 0;
+ *result = DiyFp(significand, exponent);
+ *remaining_decimals = buffer.length() - read_digits;
+ }
+}
+
+
+static bool DoubleStrtod(Vector<const char> trimmed,
+ int exponent,
+ double* result) {
+#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
+ // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
+ // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
+ // result is not accurate.
+ // We know that Windows32 uses 64 bits and is therefore accurate.
+ // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
+ // the same problem.
+ return false;
+#endif
+ if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
+ int read_digits;
+ // The trimmed input fits into a double.
+ // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
+ // can compute the result-double simply by multiplying (resp. dividing) the
+ // two numbers.
+ // This is possible because IEEE guarantees that floating-point operations
+ // return the best possible approximation.
+ if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
+ // 10^-exponent fits into a double.
+ *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+ ASSERT(read_digits == trimmed.length());
+ *result /= exact_powers_of_ten[-exponent];
+ return true;
+ }
+ if (0 <= exponent && exponent < kExactPowersOfTenSize) {
+ // 10^exponent fits into a double.
+ *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+ ASSERT(read_digits == trimmed.length());
+ *result *= exact_powers_of_ten[exponent];
+ return true;
+ }
+ int remaining_digits =
+ kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
+ if ((0 <= exponent) &&
+ (exponent - remaining_digits < kExactPowersOfTenSize)) {
+ // The trimmed string was short and we can multiply it with
+ // 10^remaining_digits. As a result the remaining exponent now fits
+ // into a double too.
+ *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+ ASSERT(read_digits == trimmed.length());
+ *result *= exact_powers_of_ten[remaining_digits];
+ *result *= exact_powers_of_ten[exponent - remaining_digits];
+ return true;
+ }
+ }
+ return false;
+}
+
+
+// Returns 10^exponent as an exact DiyFp.
+// The given exponent must be in the range [1; kDecimalExponentDistance[.
+static DiyFp AdjustmentPowerOfTen(int exponent) {
+ ASSERT(0 < exponent);
+ ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
+ // Simply hardcode the remaining powers for the given decimal exponent
+ // distance.
+ ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
+ switch (exponent) {
+ case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
+ case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
+ case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
+ case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
+ case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
+ case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
+ case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
+ default:
+ UNREACHABLE();
+ return DiyFp(0, 0);
+ }
+}
+
+
+// If the function returns true then the result is the correct double.
+// Otherwise it is either the correct double or the double that is just below
+// the correct double.
+static bool DiyFpStrtod(Vector<const char> buffer,
+ int exponent,
+ double* result) {
+ DiyFp input;
+ int remaining_decimals;
+ ReadDiyFp(buffer, &input, &remaining_decimals);
+ // Since we may have dropped some digits the input is not accurate.
+ // If remaining_decimals is different than 0 than the error is at most
+ // .5 ulp (unit in the last place).
+ // We don't want to deal with fractions and therefore keep a common
+ // denominator.
+ const int kDenominatorLog = 3;
+ const int kDenominator = 1 << kDenominatorLog;
+ // Move the remaining decimals into the exponent.
+ exponent += remaining_decimals;
+ int error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
+
+ int old_e = input.e();
+ input.Normalize();
+ error <<= old_e - input.e();
+
+ ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
+ if (exponent < PowersOfTenCache::kMinDecimalExponent) {
+ *result = 0.0;
+ return true;
+ }
+ DiyFp cached_power;
+ int cached_decimal_exponent;
+ PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
+ &cached_power,
+ &cached_decimal_exponent);
+
+ if (cached_decimal_exponent != exponent) {
+ int adjustment_exponent = exponent - cached_decimal_exponent;
+ DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
+ input.Multiply(adjustment_power);
+ if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
+ // The product of input with the adjustment power fits into a 64 bit
+ // integer.
+ ASSERT(DiyFp::kSignificandSize == 64);
+ } else {
+ // The adjustment power is exact. There is hence only an error of 0.5.
+ error += kDenominator / 2;
+ }
+ }
+
+ input.Multiply(cached_power);
+ // The error introduced by a multiplication of a*b equals
+ // error_a + error_b + error_a*error_b/2^64 + 0.5
+ // Substituting a with 'input' and b with 'cached_power' we have
+ // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
+ // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
+ int error_b = kDenominator / 2;
+ int error_ab = (error == 0 ? 0 : 1); // We round up to 1.
+ int fixed_error = kDenominator / 2;
+ error += error_b + error_ab + fixed_error;
+
+ old_e = input.e();
+ input.Normalize();
+ error <<= old_e - input.e();
+
+ // See if the double's significand changes if we add/subtract the error.
+ int order_of_magnitude = DiyFp::kSignificandSize + input.e();
+ int effective_significand_size =
+ Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
+ int precision_digits_count =
+ DiyFp::kSignificandSize - effective_significand_size;
+ if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
+ // This can only happen for very small denormals. In this case the
+ // half-way multiplied by the denominator exceeds the range of an uint64.
+ // Simply shift everything to the right.
+ int shift_amount = (precision_digits_count + kDenominatorLog) -
+ DiyFp::kSignificandSize + 1;
+ input.set_f(input.f() >> shift_amount);
+ input.set_e(input.e() + shift_amount);
+ // We add 1 for the lost precision of error, and kDenominator for
+ // the lost precision of input.f().
+ error = (error >> shift_amount) + 1 + kDenominator;
+ precision_digits_count -= shift_amount;
+ }
+ // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
+ ASSERT(DiyFp::kSignificandSize == 64);
+ ASSERT(precision_digits_count < 64);
+ uint64_t one64 = 1;
+ uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
+ uint64_t precision_bits = input.f() & precision_bits_mask;
+ uint64_t half_way = one64 << (precision_digits_count - 1);
+ precision_bits *= kDenominator;
+ half_way *= kDenominator;
+ DiyFp rounded_input(input.f() >> precision_digits_count,
+ input.e() + precision_digits_count);
+ if (precision_bits >= half_way + error) {
+ rounded_input.set_f(rounded_input.f() + 1);
+ }
+ // If the last_bits are too close to the half-way case than we are too
+ // inaccurate and round down. In this case we return false so that we can
+ // fall back to a more precise algorithm.
+
+ *result = Double(rounded_input).value();
+ if (half_way - error < precision_bits && precision_bits < half_way + error) {
+ // Too imprecise. The caller will have to fall back to a slower version.
+ // However the returned number is guaranteed to be either the correct
+ // double, or the next-lower double.
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// Returns
+// - -1 if buffer*10^exponent < diy_fp.
+// - 0 if buffer*10^exponent == diy_fp.
+// - +1 if buffer*10^exponent > diy_fp.
+// Preconditions:
+// buffer.length() + exponent <= kMaxDecimalPower + 1
+// buffer.length() + exponent > kMinDecimalPower
+// buffer.length() <= kMaxDecimalSignificantDigits
+static int CompareBufferWithDiyFp(Vector<const char> buffer,
+ int exponent,
+ DiyFp diy_fp) {
+ ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
+ ASSERT(buffer.length() + exponent > kMinDecimalPower);
+ ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
+ // Make sure that the Bignum will be able to hold all our numbers.
+ // Our Bignum implementation has a separate field for exponents. Shifts will
+ // consume at most one bigit (< 64 bits).
+ // ln(10) == 3.3219...
+ ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
+ Bignum buffer_bignum;
+ Bignum diy_fp_bignum;
+ buffer_bignum.AssignDecimalString(buffer);
+ diy_fp_bignum.AssignUInt64(diy_fp.f());
+ if (exponent >= 0) {
+ buffer_bignum.MultiplyByPowerOfTen(exponent);
+ } else {
+ diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
+ }
+ if (diy_fp.e() > 0) {
+ diy_fp_bignum.ShiftLeft(diy_fp.e());
+ } else {
+ buffer_bignum.ShiftLeft(-diy_fp.e());
+ }
+ return Bignum::Compare(buffer_bignum, diy_fp_bignum);
+}
+
+
+// Returns true if the guess is the correct double.
+// Returns false, when guess is either correct or the next-lower double.
+static bool ComputeGuess(Vector<const char> trimmed, int exponent,
+ double* guess) {
+ if (trimmed.length() == 0) {
+ *guess = 0.0;
+ return true;
+ }
+ if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
+ *guess = Double::Infinity();
+ return true;
+ }
+ if (exponent + trimmed.length() <= kMinDecimalPower) {
+ *guess = 0.0;
+ return true;
+ }
+
+ if (DoubleStrtod(trimmed, exponent, guess) ||
+ DiyFpStrtod(trimmed, exponent, guess)) {
+ return true;
+ }
+ if (*guess == Double::Infinity()) {
+ return true;
+ }
+ return false;
+}
+
+double Strtod(Vector<const char> buffer, int exponent) {
+ char copy_buffer[kMaxSignificantDecimalDigits];
+ Vector<const char> trimmed;
+ int updated_exponent;
+ TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+ &trimmed, &updated_exponent);
+ exponent = updated_exponent;
+
+ double guess;
+ bool is_correct = ComputeGuess(trimmed, exponent, &guess);
+ if (is_correct) return guess;
+
+ DiyFp upper_boundary = Double(guess).UpperBoundary();
+ int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+ if (comparison < 0) {
+ return guess;
+ } else if (comparison > 0) {
+ return Double(guess).NextDouble();
+ } else if ((Double(guess).Significand() & 1) == 0) {
+ // Round towards even.
+ return guess;
+ } else {
+ return Double(guess).NextDouble();
+ }
+}
+
+float Strtof(Vector<const char> buffer, int exponent) {
+ char copy_buffer[kMaxSignificantDecimalDigits];
+ Vector<const char> trimmed;
+ int updated_exponent;
+ TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+ &trimmed, &updated_exponent);
+ exponent = updated_exponent;
+
+ double double_guess;
+ bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
+
+ float float_guess = static_cast<float>(double_guess);
+ if (float_guess == double_guess) {
+ // This shortcut triggers for integer values.
+ return float_guess;
+ }
+
+ // We must catch double-rounding. Say the double has been rounded up, and is
+ // now a boundary of a float, and rounds up again. This is why we have to
+ // look at previous too.
+ // Example (in decimal numbers):
+ // input: 12349
+ // high-precision (4 digits): 1235
+ // low-precision (3 digits):
+ // when read from input: 123
+ // when rounded from high precision: 124.
+ // To do this we simply look at the neigbors of the correct result and see
+ // if they would round to the same float. If the guess is not correct we have
+ // to look at four values (since two different doubles could be the correct
+ // double).
+
+ double double_next = Double(double_guess).NextDouble();
+ double double_previous = Double(double_guess).PreviousDouble();
+
+ float f1 = static_cast<float>(double_previous);
+#ifndef NDEBUG
+ float f2 = float_guess;
+#endif
+ float f3 = static_cast<float>(double_next);
+ float f4;
+ if (is_correct) {
+ f4 = f3;
+ } else {
+ double double_next2 = Double(double_next).NextDouble();
+ f4 = static_cast<float>(double_next2);
+ }
+#ifndef NDEBUG
+ ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
+#endif
+
+ // If the guess doesn't lie near a single-precision boundary we can simply
+ // return its float-value.
+ if (f1 == f4) {
+ return float_guess;
+ }
+
+ ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
+ (f1 == f2 && f2 != f3 && f3 == f4) ||
+ (f1 == f2 && f2 == f3 && f3 != f4));
+
+ // guess and next are the two possible canditates (in the same way that
+ // double_guess was the lower candidate for a double-precision guess).
+ float guess = f1;
+ float next = f4;
+ DiyFp upper_boundary;
+ if (guess == 0.0f) {
+ float min_float = 1e-45f;
+ upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
+ } else {
+ upper_boundary = Single(guess).UpperBoundary();
+ }
+ int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+ if (comparison < 0) {
+ return guess;
+ } else if (comparison > 0) {
+ return next;
+ } else if ((Single(guess).Significand() & 1) == 0) {
+ // Round towards even.
+ return guess;
+ } else {
+ return next;
+ }
+}
+
+} // namespace double_conversion
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/strtod.h b/src/kenlm/util/double-conversion/strtod.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/strtod.h
rename to src/kenlm/util/double-conversion/strtod.h
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/double-conversion/utils.h b/src/kenlm/util/double-conversion/utils.h
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/double-conversion/utils.h
rename to src/kenlm/util/double-conversion/utils.h
diff --git a/src/kenlm/util/ersatz_progress.cc b/src/kenlm/util/ersatz_progress.cc
new file mode 100644
index 0000000..55c82e7
--- /dev/null
+++ b/src/kenlm/util/ersatz_progress.cc
@@ -0,0 +1,47 @@
+#include "util/ersatz_progress.hh"
+
+#include <algorithm>
+#include <ostream>
+#include <limits>
+#include <string>
+
+namespace util {
+
+namespace { const unsigned char kWidth = 100; }
+
+const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+
+ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
+
+ErsatzProgress::~ErsatzProgress() {
+ if (out_) Finished();
+}
+
+ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
+ : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
+ if (!out_) {
+ next_ = std::numeric_limits<uint64_t>::max();
+ return;
+ }
+ if (!message.empty()) *out_ << message << '\n';
+ *out_ << kProgressBanner;
+}
+
+void ErsatzProgress::Milestone() {
+ if (!out_) { current_ = 0; return; }
+ if (!complete_) return;
+ unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
+
+ for (; stones_written_ < stone; ++stones_written_) {
+ (*out_) << '*';
+ }
+ if (stone == kWidth) {
+ (*out_) << std::endl;
+ next_ = std::numeric_limits<uint64_t>::max();
+ out_ = NULL;
+ } else {
+ next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
+ }
+}
+
+} // namespace util
diff --git a/src/kenlm/util/ersatz_progress.hh b/src/kenlm/util/ersatz_progress.hh
new file mode 100644
index 0000000..b47aded
--- /dev/null
+++ b/src/kenlm/util/ersatz_progress.hh
@@ -0,0 +1,57 @@
+#ifndef UTIL_ERSATZ_PROGRESS_H
+#define UTIL_ERSATZ_PROGRESS_H
+
+#include <iostream>
+#include <string>
+#include <stdint.h>
+
+// Ersatz version of boost::progress so core language model doesn't depend on
+// boost. Also adds option to print nothing.
+
+namespace util {
+
+extern const char kProgressBanner[];
+
+class ErsatzProgress {
+ public:
+ // No output.
+ ErsatzProgress();
+
+ // Null means no output. The null value is useful for passing along the ostream pointer from another caller.
+ explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
+
+ ~ErsatzProgress();
+
+ ErsatzProgress &operator++() {
+ if (++current_ >= next_) Milestone();
+ return *this;
+ }
+
+ ErsatzProgress &operator+=(uint64_t amount) {
+ if ((current_ += amount) >= next_) Milestone();
+ return *this;
+ }
+
+ void Set(uint64_t to) {
+ if ((current_ = to) >= next_) Milestone();
+ }
+
+ void Finished() {
+ Set(complete_);
+ }
+
+ private:
+ void Milestone();
+
+ uint64_t current_, next_, complete_;
+ unsigned char stones_written_;
+ std::ostream *out_;
+
+ // noncopyable
+ ErsatzProgress(const ErsatzProgress &other);
+ ErsatzProgress &operator=(const ErsatzProgress &other);
+};
+
+} // namespace util
+
+#endif // UTIL_ERSATZ_PROGRESS_H
diff --git a/src/kenlm/util/exception.cc b/src/kenlm/util/exception.cc
new file mode 100644
index 0000000..e644d2c
--- /dev/null
+++ b/src/kenlm/util/exception.cc
@@ -0,0 +1,105 @@
+#include "util/exception.hh"
+
+#ifdef __GXX_RTTI
+#include <typeinfo>
+#endif
+
+#include <cerrno>
+#include <cstring>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#include <io.h>
+#endif
+
+namespace util {
+
+Exception::Exception() throw() {}
+Exception::~Exception() throw() {}
+
+void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
+ /* The child class might have set some text, but we want this to come first.
+ * Another option would be passing this information to the constructor, but
+ * then child classes would have to accept constructor arguments and pass
+ * them down.
+ */
+ std::string old_text;
+ std::swap(old_text, what_);
+ StringStream stream(what_);
+ stream << file << ':' << line;
+ if (func) stream << " in " << func << " threw ";
+ if (child_name) {
+ stream << child_name;
+ } else {
+#ifdef __GXX_RTTI
+ stream << typeid(this).name();
+#else
+ stream << "an exception";
+#endif
+ }
+ if (condition) {
+ stream << " because `" << condition << '\'';
+ }
+ stream << ".\n";
+ stream << old_text;
+}
+
+namespace {
+
+#ifdef __GNUC__
+const char *HandleStrerror(int ret, const char *buf) __attribute__ ((unused));
+const char *HandleStrerror(const char *ret, const char * /*buf*/) __attribute__ ((unused));
+#endif
+// At least one of these functions will not be called.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+// The XOPEN version.
+const char *HandleStrerror(int ret, const char *buf) {
+ if (!ret) return buf;
+ return NULL;
+}
+
+// The GNU version.
+const char *HandleStrerror(const char *ret, const char * /*buf*/) {
+ return ret;
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+} // namespace
+
+ErrnoException::ErrnoException() throw() : errno_(errno) {
+ char buf[200];
+ buf[0] = 0;
+#if defined(sun) || defined(_WIN32) || defined(_WIN64)
+ const char *add = strerror(errno);
+#else
+ const char *add = HandleStrerror(strerror_r(errno, buf, 200), buf);
+#endif
+
+ if (add) {
+ *this << add << ' ';
+ }
+}
+
+ErrnoException::~ErrnoException() throw() {}
+
+OverflowException::OverflowException() throw() {}
+OverflowException::~OverflowException() throw() {}
+
+#if defined(_WIN32) || defined(_WIN64)
+WindowsException::WindowsException() throw() {
+ unsigned int last_error = GetLastError();
+ char error_msg[256] = "";
+ if (!FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, LANG_NEUTRAL, error_msg, sizeof(error_msg), NULL)) {
+ *this << "Windows error " << GetLastError() << " while formatting Windows error " << last_error << ". ";
+ } else {
+ *this << "Windows error " << last_error << ": " << error_msg;
+ }
+}
+WindowsException::~WindowsException() throw() {}
+#endif
+
+} // namespace util
diff --git a/src/kenlm/util/exception.hh b/src/kenlm/util/exception.hh
new file mode 100644
index 0000000..57d803d
--- /dev/null
+++ b/src/kenlm/util/exception.hh
@@ -0,0 +1,159 @@
+#ifndef UTIL_EXCEPTION_H
+#define UTIL_EXCEPTION_H
+
+#include "util/string_stream.hh"
+
+#include <exception>
+#include <limits>
+#include <string>
+#include <stdint.h>
+
+namespace util {
+
+template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
+
+class Exception : public std::exception {
+ public:
+ Exception() throw();
+ virtual ~Exception() throw();
+
+ const char *what() const throw() { return what_.c_str(); }
+
+ // For use by the UTIL_THROW macros.
+ void SetLocation(
+ const char *file,
+ unsigned int line,
+ const char *func,
+ const char *child_name,
+ const char *condition);
+
+ private:
+ template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
+
+ // This helps restrict operator<< defined below.
+ template <class T> struct ExceptionTag {
+ typedef T Identity;
+ };
+
+ std::string what_;
+};
+
+/* This implements the normal operator<< for Exception and all its children.
+ * SFINAE means it only applies to Exception. Think of this as an ersatz
+ * boost::enable_if.
+ */
+template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
+ StringStream(e.what_) << data;
+ return e;
+}
+
+#ifdef __GNUC__
+#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
+#else
+#ifdef _WIN32
+#define UTIL_FUNC_NAME __FUNCTION__
+#else
+#define UTIL_FUNC_NAME NULL
+#endif
+#endif
+
+/* Create an instance of Exception, add the message Modify, and throw it.
+ * Modify is appended to the what() message and can contain << for ostream
+ * operations.
+ *
+ * do .. while kludge to swallow trailing ; character
+ * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
+ * Arg can be a constructor argument to the exception.
+ */
+#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
+ Exception UTIL_e Arg; \
+ UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
+ UTIL_e << Modify; \
+ throw UTIL_e; \
+} while (0)
+
+#define UTIL_THROW_ARG(Exception, Arg, Modify) \
+ UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
+
+#define UTIL_THROW(Exception, Modify) \
+ UTIL_THROW_BACKEND(NULL, Exception, , Modify);
+
+#define UTIL_THROW2(Modify) \
+ UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
+
+#if __GNUC__ >= 3
+#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
+#else
+#define UTIL_UNLIKELY(x) (x)
+#endif
+
+#if __GNUC__ >= 3
+#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1)
+#else
+#define UTIL_LIKELY(x) (x)
+#endif
+
+#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
+ if (UTIL_UNLIKELY(Condition)) { \
+ UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
+ } \
+} while (0)
+
+#define UTIL_THROW_IF(Condition, Exception, Modify) \
+ UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
+
+#define UTIL_THROW_IF2(Condition, Modify) \
+ UTIL_THROW_IF_ARG(Condition, util::Exception, , Modify)
+
+// Exception that records errno and adds it to the message.
+class ErrnoException : public Exception {
+ public:
+ ErrnoException() throw();
+
+ virtual ~ErrnoException() throw();
+
+ int Error() const throw() { return errno_; }
+
+ private:
+ int errno_;
+};
+
+// file wasn't there, or couldn't be open for some reason
+class FileOpenException : public Exception {
+ public:
+ FileOpenException() throw() {}
+ ~FileOpenException() throw() {}
+};
+
+// Utilities for overflow checking.
+class OverflowException : public Exception {
+ public:
+ OverflowException() throw();
+ ~OverflowException() throw();
+};
+
+template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
+ UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
+ return value;
+}
+
+template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
+ return value;
+}
+
+inline std::size_t CheckOverflow(uint64_t value) {
+ return CheckOverflowInternal<sizeof(std::size_t)>(value);
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+/* Thrown for Windows specific operations. */
+class WindowsException : public Exception {
+ public:
+ WindowsException() throw();
+ ~WindowsException() throw();
+};
+#endif
+
+} // namespace util
+
+#endif // UTIL_EXCEPTION_H
diff --git a/src/kenlm/util/fake_ostream.hh b/src/kenlm/util/fake_ostream.hh
new file mode 100644
index 0000000..2f76053
--- /dev/null
+++ b/src/kenlm/util/fake_ostream.hh
@@ -0,0 +1,111 @@
+#ifndef UTIL_FAKE_OSTREAM_H
+#define UTIL_FAKE_OSTREAM_H
+
+#include "util/float_to_string.hh"
+#include "util/integer_to_string.hh"
+#include "util/string_piece.hh"
+
+#include <cassert>
+#include <limits>
+
+#include <stdint.h>
+
+namespace util {
+
+/* Like std::ostream but without being incredibly slow.
+ * Supports most of the built-in types except for long double.
+ *
+ * The FakeOStream class is intended to be inherited from. The inherting class
+ * should provide:
+ * public:
+ * Derived &flush();
+ * Derived &write(const void *data, std::size_t length);
+ *
+ * private: or protected:
+ * friend class FakeOStream;
+ * char *Ensure(std::size_t amount);
+ * void AdvanceTo(char *to);
+ *
+ * The Ensure function makes enough space for an in-place write and returns
+ * where to write. The AdvanceTo function happens after the write, saying how
+ * much was actually written.
+ *
+ * Precondition:
+ * amount <= kToStringMaxBytes for in-place writes.
+ */
+template <class Derived> class FakeOStream {
+ public:
+ FakeOStream() {}
+
+ // This also covers std::string and char*
+ Derived &operator<<(StringPiece str) {
+ return C().write(str.data(), str.size());
+ }
+
+ // Handle integers by size and signedness.
+ private:
+ template <class Arg> struct EnableIfKludge {
+ typedef Derived type;
+ };
+ template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed, bool IsInteger = std::numeric_limits<From>::is_integer> struct Coerce {};
+
+ template <class From> struct Coerce<From, 2, false, true> { typedef uint16_t To; };
+ template <class From> struct Coerce<From, 4, false, true> { typedef uint32_t To; };
+ template <class From> struct Coerce<From, 8, false, true> { typedef uint64_t To; };
+
+ template <class From> struct Coerce<From, 2, true, true> { typedef int16_t To; };
+ template <class From> struct Coerce<From, 4, true, true> { typedef int32_t To; };
+ template <class From> struct Coerce<From, 8, true, true> { typedef int64_t To; };
+ public:
+ template <class From> typename EnableIfKludge<typename Coerce<From>::To>::type &operator<<(const From value) {
+ return CallToString(static_cast<typename Coerce<From>::To>(value));
+ }
+
+ // Character types that get copied as bytes instead of displayed as integers.
+ Derived &operator<<(char val) { return put(val); }
+ Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
+ Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
+
+ Derived &operator<<(bool val) { return put(val + '0'); }
+ // enums will fall back to int but are not caught by the template.
+ Derived &operator<<(int val) { return CallToString(static_cast<typename Coerce<int>::To>(val)); }
+
+ Derived &operator<<(float val) { return CallToString(val); }
+ Derived &operator<<(double val) { return CallToString(val); }
+
+ // This is here to catch all the other pointer types.
+ Derived &operator<<(const void *value) { return CallToString(value); }
+ // This is here because the above line also catches const char*.
+ Derived &operator<<(const char *value) { return *this << StringPiece(value); }
+ Derived &operator<<(char *value) { return *this << StringPiece(value); }
+
+ Derived &put(char val) {
+ char *c = C().Ensure(1);
+ *c = val;
+ C().AdvanceTo(++c);
+ return C();
+ }
+
+ char widen(char val) const { return val; }
+
+ private:
+ // References to derived class for convenience.
+ Derived &C() {
+ return *static_cast<Derived*>(this);
+ }
+
+ const Derived &C() const {
+ return *static_cast<const Derived*>(this);
+ }
+
+ // This is separate to prevent an infinite loop if the compiler considers
+ // types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
+ template <class T> Derived &CallToString(const T value) {
+ C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf<T>::kBytes)));
+ return C();
+ }
+};
+
+} // namespace
+
+#endif // UTIL_FAKE_OSTREAM_H
diff --git a/src/kenlm/util/file.cc b/src/kenlm/util/file.cc
new file mode 100644
index 0000000..e8976bc
--- /dev/null
+++ b/src/kenlm/util/file.cc
@@ -0,0 +1,574 @@
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+
+#include "util/file.hh"
+
+#include "util/exception.hh"
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <limits>
+#include <sstream>
+
+
+#include <cassert>
+#include <cerrno>
+#include <climits>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdint.h>
+
+#if defined(__MINGW32__)
+#include <windows.h>
+#include <unistd.h>
+#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1. Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
+#elif defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+namespace util {
+
+scoped_fd::~scoped_fd() {
+ if (fd_ != -1 && close(fd_)) {
+ std::cerr << "Could not close file " << fd_ << std::endl;
+ std::abort();
+ }
+}
+
+void scoped_FILE_closer::Close(std::FILE *file) {
+ if (file && std::fclose(file)) {
+ std::cerr << "Could not close file " << file << std::endl;
+ std::abort();
+ }
+}
+
+// Note that ErrnoException records errno before NameFromFD is called.
+FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
+ *this << "in " << name_guess_ << ' ';
+}
+
+FDException::~FDException() throw() {}
+
+EndOfFileException::EndOfFileException() throw() {
+ *this << "End of file";
+}
+EndOfFileException::~EndOfFileException() throw() {}
+
+bool InputFileIsStdin(StringPiece path) {
+ return path == "-" || path == "/dev/stdin";
+}
+
+bool OutputFileIsStdout(StringPiece path) {
+ return path == "-" || path == "/dev/stdout";
+}
+
+int OpenReadOrThrow(const char *name) {
+ int ret;
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF(-1 == (ret = _open(name, _O_BINARY | _O_RDONLY)), ErrnoException, "while opening " << name);
+#else
+ UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
+#endif
+ return ret;
+}
+
+int CreateOrThrow(const char *name) {
+ int ret;
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
+#else
+ UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
+#endif
+ return ret;
+}
+
+uint64_t SizeFile(int fd) {
+#if defined __MINGW32__
+ struct stat sb;
+ // Does this handle 64-bit?
+ int ret = fstat(fd, &sb);
+ if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+ return sb.st_size;
+#elif defined(_WIN32) || defined(_WIN64)
+ __int64 ret = _filelengthi64(fd);
+ return (ret == -1) ? kBadSize : ret;
+#else // Not windows.
+
+#ifdef OS_ANDROID
+ struct stat64 sb;
+ int ret = fstat64(fd, &sb);
+#else
+ struct stat sb;
+ int ret = fstat(fd, &sb);
+#endif
+ if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+ return sb.st_size;
+#endif
+}
+
+uint64_t SizeOrThrow(int fd) {
+ uint64_t ret = SizeFile(fd);
+ UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
+ return ret;
+}
+
+void ResizeOrThrow(int fd, uint64_t to) {
+#if defined __MINGW32__
+ // Does this handle 64-bit?
+ int ret = ftruncate
+#elif defined(_WIN32) || defined(_WIN64)
+ errno_t ret = _chsize_s
+#elif defined(OS_ANDROID)
+ int ret = ftruncate64
+#else
+ int ret = ftruncate
+#endif
+ (fd, to);
+ UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
+}
+
+namespace {
+std::size_t GuardLarge(std::size_t size) {
+ // The following operating systems have broken read/write/pread/pwrite that
+ // only supports up to 2^31.
+ // OS X man pages claim to support 64-bit, but Kareem M. Darwish had problems
+ // building with larger files, so APPLE is also here.
+#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
+ return size < INT_MAX ? size : INT_MAX;
+#else
+ return size;
+#endif
+}
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+namespace {
+const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
+} // namespace
+#endif
+
+std::size_t PartialRead(int fd, void *to, std::size_t amount) {
+#if defined(_WIN32) || defined(_WIN64)
+ DWORD ret;
+ HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+ DWORD larger_size = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, amount));
+ DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size.
+ if (!ReadFile(file_handle, to, larger_size, &ret, NULL))
+ {
+ DWORD last_error = GetLastError();
+ if (last_error != ERROR_NOT_ENOUGH_MEMORY || !ReadFile(file_handle, to, smaller_size, &ret, NULL)) {
+ UTIL_THROW(WindowsException, "Windows error in ReadFile.");
+ }
+ }
+#else
+ errno = 0;
+ ssize_t ret;
+ do {
+ ret = read(fd, to, GuardLarge(amount));
+ } while (ret == -1 && errno == EINTR);
+ UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
+#endif
+ return static_cast<std::size_t>(ret);
+}
+
+void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ while (amount) {
+ std::size_t ret = PartialRead(fd, to, amount);
+ UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
+ amount -= ret;
+ to += ret;
+ }
+}
+
+std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ std::size_t remaining = amount;
+ while (remaining) {
+ std::size_t ret = PartialRead(fd, to, remaining);
+ if (!ret) return amount - remaining;
+ remaining -= ret;
+ to += ret;
+ }
+ return amount;
+}
+
+void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
+ const uint8_t *data = static_cast<const uint8_t*>(data_void);
+ while (size) {
+#if defined(_WIN32) || defined(_WIN64)
+ int ret;
+#else
+ ssize_t ret;
+#endif
+ errno = 0;
+ do {
+ ret =
+#if defined(_WIN32) || defined(_WIN64)
+ _write
+#else
+ write
+#endif
+ (fd, data, GuardLarge(size));
+ } while (ret == -1 && errno == EINTR);
+ UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
+ data += ret;
+ size -= ret;
+ }
+}
+
+void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
+ if (!size) return;
+ UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
+}
+
+void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ while (size) {
+#if defined(_WIN32) || defined(_WIN64)
+ /* BROKEN: changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
+ // size_t might be 64-bit. DWORD is always 32.
+ DWORD reading = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
+ DWORD ret;
+ OVERLAPPED overlapped;
+ memset(&overlapped, 0, sizeof(OVERLAPPED));
+ overlapped.Offset = static_cast<DWORD>(off);
+ overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
+ UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), WindowsException, "ReadFile failed for offset " << off);
+#else
+ ssize_t ret;
+ errno = 0;
+ ret =
+#ifdef OS_ANDROID
+ pread64
+#else
+ pread
+#endif
+ (fd, to, GuardLarge(size), off);
+ if (ret <= 0) {
+ if (ret == -1 && errno == EINTR) continue;
+ UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
+ UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
+ }
+#endif
+ size -= ret;
+ off += ret;
+ to += ret;
+ }
+}
+
+void ErsatzPWrite(int fd, const void *from_void, std::size_t size, uint64_t off) {
+ const uint8_t *from = static_cast<const uint8_t*>(from_void);
+ while(size) {
+#if defined(_WIN32) || defined(_WIN64)
+ /* Changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() */
+ // size_t might be 64-bit. DWORD is always 32.
+ DWORD writing = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
+ DWORD ret;
+ OVERLAPPED overlapped;
+ memset(&overlapped, 0, sizeof(OVERLAPPED));
+ overlapped.Offset = static_cast<DWORD>(off);
+ overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
+ UTIL_THROW_IF(!WriteFile((HANDLE)_get_osfhandle(fd), from, writing, &ret, &overlapped), Exception, "WriteFile failed for offset " << off);
+#else
+ ssize_t ret;
+ errno = 0;
+ ret =
+#ifdef OS_ANDROID
+ pwrite64
+#else
+ pwrite
+#endif
+ (fd, from, GuardLarge(size), off);
+ if (ret <= 0) {
+ if (ret == -1 && errno == EINTR) continue;
+ UTIL_THROW_IF(ret == 0, EndOfFileException, " for writing " << size << " bytes at " << off << " from " << NameFromFD(fd));
+ UTIL_THROW_ARG(FDException, (fd), "while writing " << size << " bytes at offset " << off);
+ }
+#endif
+ size -= ret;
+ off += ret;
+ from += ret;
+ }
+}
+
+
+void FSyncOrThrow(int fd) {
+// Apparently windows doesn't have fsync?
+#if !defined(_WIN32) && !defined(_WIN64)
+ UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
+#endif
+}
+
+namespace {
+
+// Static assert for 64-bit off_t size.
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
+template <unsigned> struct CheckOffT;
+template <> struct CheckOffT<8> {
+ struct True {};
+};
+// If there's a compiler error on the next line, then off_t isn't 64 bit. And
+// that makes me a sad panda.
+typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
+#endif
+
+// Can't we all just get along?
+void InternalSeek(int fd, int64_t off, int whence) {
+ if (
+#if defined __MINGW32__
+ // Does this handle 64-bit?
+ (off_t)-1 == lseek(fd, off, whence)
+#elif defined(_WIN32) || defined(_WIN64)
+ (__int64)-1 == _lseeki64(fd, off, whence)
+#elif defined(OS_ANDROID)
+ (off64_t)-1 == lseek64(fd, off, whence)
+#else
+ (off_t)-1 == lseek(fd, off, whence)
+#endif
+ ) UTIL_THROW_ARG(FDException, (fd), "while seeking to " << off << " whence " << whence);
+}
+} // namespace
+
+void SeekOrThrow(int fd, uint64_t off) {
+ InternalSeek(fd, off, SEEK_SET);
+}
+
+void AdvanceOrThrow(int fd, int64_t off) {
+ InternalSeek(fd, off, SEEK_CUR);
+}
+
+void SeekEnd(int fd) {
+ InternalSeek(fd, 0, SEEK_END);
+}
+
+std::FILE *FDOpenOrThrow(scoped_fd &file) {
+ std::FILE *ret = fdopen(file.get(), "r+b");
+ UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
+ file.release();
+ return ret;
+}
+
+std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
+ std::FILE *ret = fdopen(file.get(), "rb");
+ UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
+ file.release();
+ return ret;
+}
+
+// Sigh. Windows temporary file creation is full of race conditions.
+#if defined(_WIN32) || defined(_WIN64)
+/* mkstemp extracted from libc/sysdeps/posix/tempname.c. Copyright
+ (C) 1991-1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version. */
+
+/* This has been modified from the original version to rename the function and
+ * set the Windows temporary flag. */
+
+static const char letters[] =
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+/* Generate a temporary file name based on TMPL. TMPL must match the
+ rules for mk[s]temp (i.e. end in "XXXXXX"). The name constructed
+ does not exist at the time of the call to mkstemp. TMPL is
+ overwritten with the result. */
+int
+mkstemp_and_unlink(char *tmpl)
+{
+ int len;
+ char *XXXXXX;
+ static unsigned long long value;
+ unsigned long long random_time_bits;
+ unsigned int count;
+ int fd = -1;
+ int save_errno = errno;
+
+ /* A lower bound on the number of temporary files to attempt to
+ generate. The maximum total number of temporary file names that
+ can exist for a given template is 62**6. It should never be
+ necessary to try all these combinations. Instead if a reasonable
+ number of names is tried (we define reasonable as 62**3) fail to
+ give the system administrator the chance to remove the problems. */
+#define ATTEMPTS_MIN (62 * 62 * 62)
+
+ /* The number of times to attempt to generate a temporary file. To
+ conform to POSIX, this must be no smaller than TMP_MAX. */
+#if ATTEMPTS_MIN < TMP_MAX
+ unsigned int attempts = TMP_MAX;
+#else
+ unsigned int attempts = ATTEMPTS_MIN;
+#endif
+
+ len = strlen (tmpl);
+ if (len < 6 || strcmp (&tmpl[len - 6], "XXXXXX"))
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+/* This is where the Xs start. */
+ XXXXXX = &tmpl[len - 6];
+
+ /* Get some more or less random data. */
+ {
+ SYSTEMTIME stNow;
+ FILETIME ftNow;
+
+ // get system time
+ GetSystemTime(&stNow);
+ stNow.wMilliseconds = 500;
+ if (!SystemTimeToFileTime(&stNow, &ftNow))
+ {
+ errno = -1;
+ return -1;
+ }
+
+ random_time_bits = (((unsigned long long)ftNow.dwHighDateTime << 32)
+ | (unsigned long long)ftNow.dwLowDateTime);
+ }
+ value += random_time_bits ^ (unsigned long long)GetCurrentThreadId ();
+
+ for (count = 0; count < attempts; value += 7777, ++count)
+ {
+ unsigned long long v = value;
+
+ /* Fill in the random bits. */
+ XXXXXX[0] = letters[v % 62];
+ v /= 62;
+ XXXXXX[1] = letters[v % 62];
+ v /= 62;
+ XXXXXX[2] = letters[v % 62];
+ v /= 62;
+ XXXXXX[3] = letters[v % 62];
+ v /= 62;
+ XXXXXX[4] = letters[v % 62];
+ v /= 62;
+ XXXXXX[5] = letters[v % 62];
+
+ /* Modified for windows and to unlink */
+ // fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
+ int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
+ flags |= _O_TEMPORARY;
+ fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
+ if (fd >= 0)
+ {
+ errno = save_errno;
+ return fd;
+ }
+ else if (errno != EEXIST)
+ return -1;
+ }
+
+ /* We got out of the loop because we ran out of combinations to try. */
+ errno = EEXIST;
+ return -1;
+}
+#else
+int
+mkstemp_and_unlink(char *tmpl) {
+ int ret = mkstemp(tmpl);
+ if (ret != -1) {
+ UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
+ }
+ return ret;
+}
+#endif
+
+// If it's a directory, add a /. This lets users say -T /tmp without creating
+// /tmpAAAAAA
+void NormalizeTempPrefix(std::string &base) {
+ if (base.empty()) return;
+ if (base[base.size() - 1] == '/') return;
+ struct stat sb;
+ // It's fine for it to not exist.
+ if (-1 == stat(base.c_str(), &sb)) return;
+ if (
+#if defined(_WIN32) || defined(_WIN64)
+ sb.st_mode & _S_IFDIR
+#else
+ S_ISDIR(sb.st_mode)
+#endif
+ ) base += '/';
+}
+
+int MakeTemp(const StringPiece &base) {
+ std::string name(base.data(), base.size());
+ name += "XXXXXX";
+ name.push_back(0);
+ int ret;
+ UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
+ return ret;
+}
+
+std::FILE *FMakeTemp(const StringPiece &base) {
+ util::scoped_fd file(MakeTemp(base));
+ return FDOpenOrThrow(file);
+}
+
+int DupOrThrow(int fd) {
+ int ret = dup(fd);
+ UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
+ return ret;
+}
+
+namespace {
+// Try to name things but be willing to fail too.
+bool TryName(int fd, std::string &out) {
+#if defined(_WIN32) || defined(_WIN64)
+ return false;
+#else
+ std::string name("/proc/self/fd/");
+ std::ostringstream convert;
+ convert << fd;
+ name += convert.str();
+
+ struct stat sb;
+ if (-1 == lstat(name.c_str(), &sb))
+ return false;
+ out.resize(sb.st_size + 1);
+ // lstat gave us a size, but I've seen it grow, possibly due to symlinks on top of symlinks.
+ while (true) {
+ ssize_t ret = readlink(name.c_str(), &out[0], out.size());
+ if (-1 == ret)
+ return false;
+ if ((size_t)ret < out.size()) {
+ out.resize(ret);
+ break;
+ }
+ // Exponential growth.
+ out.resize(out.size() * 2);
+ }
+ // Don't use the non-file names.
+ if (!out.empty() && out[0] != '/')
+ return false;
+ return true;
+#endif
+}
+} // namespace
+
+std::string NameFromFD(int fd) {
+ std::string ret;
+ if (TryName(fd, ret)) return ret;
+ switch (fd) {
+ case 0: return "stdin";
+ case 1: return "stdout";
+ case 2: return "stderr";
+ }
+ ret = "fd ";
+ std::ostringstream convert;
+ convert << fd;
+ ret += convert.str();
+ return ret;
+}
+
+} // namespace util
diff --git a/src/kenlm/util/file.hh b/src/kenlm/util/file.hh
new file mode 100644
index 0000000..f7cb4d6
--- /dev/null
+++ b/src/kenlm/util/file.hh
@@ -0,0 +1,154 @@
+#ifndef UTIL_FILE_H
+#define UTIL_FILE_H
+
+#include "util/exception.hh"
+#include "util/scoped.hh"
+#include "util/string_piece.hh"
+
+#include <cstddef>
+#include <cstdio>
+#include <string>
+#include <stdint.h>
+
+namespace util {
+
+class scoped_fd {
+ public:
+ scoped_fd() : fd_(-1) {}
+
+ explicit scoped_fd(int fd) : fd_(fd) {}
+
+ ~scoped_fd();
+
+ void reset(int to = -1) {
+ scoped_fd other(fd_);
+ fd_ = to;
+ }
+
+ int get() const { return fd_; }
+
+ int operator*() const { return fd_; }
+
+ int release() {
+ int ret = fd_;
+ fd_ = -1;
+ return ret;
+ }
+
+ private:
+ int fd_;
+
+ scoped_fd(const scoped_fd &);
+ scoped_fd &operator=(const scoped_fd &);
+};
+
+struct scoped_FILE_closer {
+ static void Close(std::FILE *file);
+};
+typedef scoped<std::FILE, scoped_FILE_closer> scoped_FILE;
+
+/* Thrown for any operation where the fd is known. */
+class FDException : public ErrnoException {
+ public:
+ explicit FDException(int fd) throw();
+
+ virtual ~FDException() throw();
+
+ // This may no longer be valid if the exception was thrown past open.
+ int FD() const { return fd_; }
+
+ // Guess from NameFromFD.
+ const std::string &NameGuess() const { return name_guess_; }
+
+ private:
+ int fd_;
+
+ std::string name_guess_;
+};
+
+// End of file reached.
+class EndOfFileException : public Exception {
+ public:
+ EndOfFileException() throw();
+ ~EndOfFileException() throw();
+};
+
+// Open for read only.
+int OpenReadOrThrow(const char *name);
+// Create file if it doesn't exist, truncate if it does. Opened for write.
+int CreateOrThrow(const char *name);
+
+/** Does the given input file path denote standard input?
+ *
+ * Returns true if, and only if, path is either "-" or "/dev/stdin".
+ *
+ * Opening standard input as a file may need some special treatment for
+ * portability. There's a convention that a dash ("-") in place of an input
+ * file path denotes standard input, but opening "/dev/stdin" may need to be
+ * special as well.
+ */
+bool InputPathIsStdin(StringPiece path);
+
+/** Does the given output file path denote standard output?
+ *
+ * Returns true if, and only if, path is either "-" or "/dev/stdout".
+ *
+ * Opening standard output as a file may need some special treatment for
+ * portability. There's a convention that a dash ("-") in place of an output
+ * file path denotes standard output, but opening "/dev/stdout" may need to be
+ * special as well.
+ */
+bool OutputPathIsStdout(StringPiece path);
+
+// Return value for SizeFile when it can't size properly.
+const uint64_t kBadSize = (uint64_t)-1;
+uint64_t SizeFile(int fd);
+uint64_t SizeOrThrow(int fd);
+
+void ResizeOrThrow(int fd, uint64_t to);
+
+std::size_t PartialRead(int fd, void *to, std::size_t size);
+void ReadOrThrow(int fd, void *to, std::size_t size);
+std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
+
+void WriteOrThrow(int fd, const void *data_void, std::size_t size);
+void WriteOrThrow(FILE *to, const void *data, std::size_t size);
+
+/* These call pread/pwrite in a loop. However, on Windows they call ReadFile/
+ * WriteFile which changes the file pointer. So it's safe to call ErsatzPRead
+ * and ErsatzPWrite concurrently (or any combination thereof). But it changes
+ * the file pointer on windows, so it's not safe to call concurrently with
+ * anything that uses the implicit file pointer e.g. the Read/Write functions
+ * above.
+ */
+void ErsatzPRead(int fd, void *to, std::size_t size, uint64_t off);
+void ErsatzPWrite(int fd, const void *data_void, std::size_t size, uint64_t off);
+
+void FSyncOrThrow(int fd);
+
+// Seeking
+void SeekOrThrow(int fd, uint64_t off);
+void AdvanceOrThrow(int fd, int64_t off);
+void SeekEnd(int fd);
+
+std::FILE *FDOpenOrThrow(scoped_fd &file);
+std::FILE *FDOpenReadOrThrow(scoped_fd &file);
+
+// Temporary files
+// Append a / if base is a directory.
+void NormalizeTempPrefix(std::string &base);
+int MakeTemp(const StringPiece &prefix);
+std::FILE *FMakeTemp(const StringPiece &prefix);
+
+// dup an fd.
+int DupOrThrow(int fd);
+
+/* Attempt get file name from fd. This won't always work (i.e. on Windows or
+ * a pipe). The file might have been renamed. It's intended for diagnostics
+ * and logging only.
+ */
+std::string NameFromFD(int fd);
+
+} // namespace util
+
+#endif // UTIL_FILE_H
diff --git a/src/kenlm/util/file_piece.cc b/src/kenlm/util/file_piece.cc
new file mode 100644
index 0000000..0a4d3a9
--- /dev/null
+++ b/src/kenlm/util/file_piece.cc
@@ -0,0 +1,337 @@
+#include "util/file_piece.hh"
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/mmap.hh"
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <cassert>
+#include <cerrno>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
+#include <string>
+
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+namespace util {
+
+ParseNumberException::ParseNumberException(StringPiece value) throw() {
+ *this << "Could not parse \"" << value << "\" into a ";
+}
+
+// Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
+const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
+ file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()),
+ progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
+ Initialize(name, show_progress, min_buffer);
+}
+
+namespace {
+std::string NamePossiblyFind(int fd, const char *name) {
+ if (name) return name;
+ return NameFromFD(fd);
+}
+} // namespace
+
+FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
+ file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
+ progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
+ Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
+}
+
+FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) :
+ total_size_(kBadSize), page_(SizePage()) {
+ InitializeNoRead("istream", min_buffer);
+
+ fallback_to_read_ = true;
+ HugeMalloc(default_map_size_, false, data_);
+ position_ = data_.begin();
+ position_end_ = position_;
+
+ fell_back_.Reset(stream);
+}
+
+FilePiece::~FilePiece() {}
+
+StringPiece FilePiece::ReadLine(char delim, bool strip_cr) {
+ std::size_t skip = 0;
+ while (true) {
+ for (const char *i = position_ + skip; i < position_end_; ++i) {
+ if (*i == delim) {
+ // End of line.
+ // Take 1 byte off the end if it's an unwanted carriage return.
+ const std::size_t subtract_cr = (
+ (strip_cr && i > position_ && *(i - 1) == '\r') ?
+ 1 : 0);
+ StringPiece ret(position_, i - position_ - subtract_cr);
+ position_ = i + 1;
+ return ret;
+ }
+ }
+ if (at_end_) {
+ if (position_ == position_end_) {
+ Shift();
+ }
+ return Consume(position_end_);
+ }
+ skip = position_end_ - position_;
+ Shift();
+ }
+}
+
+bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim, bool strip_cr) {
+ try {
+ to = ReadLine(delim, strip_cr);
+ } catch (const util::EndOfFileException &e) { return false; }
+ return true;
+}
+
+float FilePiece::ReadFloat() {
+ return ReadNumber<float>();
+}
+double FilePiece::ReadDouble() {
+ return ReadNumber<double>();
+}
+long int FilePiece::ReadLong() {
+ return ReadNumber<long int>();
+}
+unsigned long int FilePiece::ReadULong() {
+ return ReadNumber<unsigned long int>();
+}
+
+// Factored out so that istream can call this.
+void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
+ file_name_ = name;
+
+ default_map_size_ = page_ * std::max<std::size_t>((min_buffer / page_ + 1), 2);
+ position_ = NULL;
+ position_end_ = NULL;
+ mapped_offset_ = 0;
+ at_end_ = false;
+}
+
+void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) {
+ InitializeNoRead(name, min_buffer);
+
+ if (total_size_ == kBadSize) {
+ // So the assertion passes.
+ fallback_to_read_ = false;
+ if (show_progress)
+ *show_progress << "File " << name << " isn't normal. Using slower read() instead of mmap(). No progress bar." << std::endl;
+ TransitionToRead();
+ } else {
+ fallback_to_read_ = false;
+ }
+ Shift();
+ // gzip detect.
+ if ((position_end_ >= position_ + ReadCompressed::kMagicSize) && ReadCompressed::DetectCompressedMagic(position_)) {
+ if (!fallback_to_read_) {
+ at_end_ = false;
+ TransitionToRead();
+ }
+ }
+}
+
+namespace {
+
+static const double_conversion::StringToDoubleConverter kConverter(
+ double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
+ std::numeric_limits<double>::quiet_NaN(),
+ std::numeric_limits<double>::quiet_NaN(),
+ "inf",
+ "NaN");
+
+StringPiece FirstToken(StringPiece str) {
+ const char *i;
+ for (i = str.data(); i != str.data() + str.size(); ++i) {
+ if (kSpaces[(unsigned char)*i]) break;
+ }
+ return StringPiece(str.data(), i - str.data());
+}
+
+const char *ParseNumber(StringPiece str, float &out) {
+ int count;
+ out = kConverter.StringToFloat(str.data(), str.size(), &count);
+ UTIL_THROW_IF_ARG(std::isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "float");
+ return str.data() + count;
+}
+const char *ParseNumber(StringPiece str, double &out) {
+ int count;
+ out = kConverter.StringToDouble(str.data(), str.size(), &count);
+ UTIL_THROW_IF_ARG(std::isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "double");
+ return str.data() + count;
+}
+const char *ParseNumber(StringPiece str, long int &out) {
+ char *end;
+ errno = 0;
+ out = strtol(str.data(), &end, 10);
+ UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "long int");
+ return end;
+}
+const char *ParseNumber(StringPiece str, unsigned long int &out) {
+ char *end;
+ errno = 0;
+ out = strtoul(str.data(), &end, 10);
+ UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "unsigned long int");
+ return end;
+}
+} // namespace
+
+template <class T> T FilePiece::ReadNumber() {
+ SkipSpaces();
+ while (last_space_ < position_) {
+ if (UTIL_UNLIKELY(at_end_)) {
+ // Hallucinate a null off the end of the file.
+ std::string buffer(position_, position_end_);
+ T ret;
+ // Has to be null-terminated.
+ const char *begin = buffer.c_str();
+ const char *end = ParseNumber(StringPiece(begin, buffer.size()), ret);
+ position_ += end - begin;
+ return ret;
+ }
+ Shift();
+ }
+ T ret;
+ position_ = ParseNumber(StringPiece(position_, last_space_ - position_), ret);
+ return ret;
+}
+
+const char *FilePiece::FindDelimiterOrEOF(const bool *delim) {
+ std::size_t skip = 0;
+ while (true) {
+ for (const char *i = position_ + skip; i < position_end_; ++i) {
+ if (delim[static_cast<unsigned char>(*i)]) return i;
+ }
+ if (at_end_) {
+ if (position_ == position_end_) Shift();
+ return position_end_;
+ }
+ skip = position_end_ - position_;
+ Shift();
+ }
+}
+
+void FilePiece::Shift() {
+ if (at_end_) {
+ progress_.Finished();
+ throw EndOfFileException();
+ }
+ uint64_t desired_begin = position_ - data_.begin() + mapped_offset_;
+
+ if (!fallback_to_read_) MMapShift(desired_begin);
+ // Notice an mmap failure might set the fallback.
+ if (fallback_to_read_) ReadShift();
+
+ for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
+ if (kSpaces[static_cast<unsigned char>(*last_space_)]) break;
+ }
+}
+
+void FilePiece::MMapShift(uint64_t desired_begin) {
+ // Use mmap.
+ uint64_t ignore = desired_begin % page_;
+ // Duplicate request for Shift means give more data.
+ if (position_ == data_.begin() + ignore && position_) {
+ default_map_size_ *= 2;
+ }
+ // Local version so that in case of failure it doesn't overwrite the class variable.
+ uint64_t mapped_offset = desired_begin - ignore;
+
+ uint64_t mapped_size;
+ if (default_map_size_ >= static_cast<std::size_t>(total_size_ - mapped_offset)) {
+ at_end_ = true;
+ mapped_size = total_size_ - mapped_offset;
+ } else {
+ mapped_size = default_map_size_;
+ }
+
+ // Forcibly clear the existing mmap first.
+ data_.reset();
+ try {
+ MapRead(POPULATE_OR_LAZY, *file_, mapped_offset, mapped_size, data_);
+ } catch (const util::ErrnoException &e) {
+ if (desired_begin) {
+ SeekOrThrow(*file_, desired_begin);
+ }
+ // The mmap was scheduled to end the file, but now we're going to read it.
+ at_end_ = false;
+ TransitionToRead();
+ return;
+ }
+ mapped_offset_ = mapped_offset;
+ position_ = data_.begin() + ignore;
+ position_end_ = data_.begin() + mapped_size;
+
+ progress_.Set(desired_begin);
+}
+
+void FilePiece::TransitionToRead() {
+ assert(!fallback_to_read_);
+ fallback_to_read_ = true;
+ data_.reset();
+ HugeMalloc(default_map_size_, false, data_);
+ position_ = data_.begin();
+ position_end_ = position_;
+
+ try {
+ fell_back_.Reset(file_.release());
+ } catch (util::Exception &e) {
+ e << " in file " << file_name_;
+ throw;
+ }
+}
+
+void FilePiece::ReadShift() {
+ assert(fallback_to_read_);
+ // Bytes [data_.begin(), position_) have been consumed.
+ // Bytes [position_, position_end_) have been read into the buffer.
+
+ // Start at the beginning of the buffer if there's nothing useful in it.
+ if (position_ == position_end_) {
+ mapped_offset_ += (position_end_ - data_.begin());
+ position_ = data_.begin();
+ position_end_ = position_;
+ }
+
+ std::size_t already_read = position_end_ - data_.begin();
+
+ if (already_read == default_map_size_) {
+ if (position_ == data_.begin()) {
+ // Buffer too small.
+ std::size_t valid_length = position_end_ - position_;
+ default_map_size_ *= 2;
+ HugeRealloc(default_map_size_, false, data_);
+ position_ = data_.begin();
+ position_end_ = position_ + valid_length;
+ } else {
+ std::size_t moving = position_end_ - position_;
+ memmove(data_.get(), position_, moving);
+ position_ = data_.begin();
+ position_end_ = position_ + moving;
+ already_read = moving;
+ }
+ }
+
+ std::size_t read_return = fell_back_.Read(static_cast<uint8_t*>(data_.get()) + already_read, default_map_size_ - already_read);
+ progress_.Set(fell_back_.RawAmount());
+
+ if (read_return == 0) {
+ at_end_ = true;
+ }
+ position_end_ += read_return;
+}
+
+} // namespace util
diff --git a/src/kenlm/util/file_piece.hh b/src/kenlm/util/file_piece.hh
new file mode 100644
index 0000000..d3d8305
--- /dev/null
+++ b/src/kenlm/util/file_piece.hh
@@ -0,0 +1,175 @@
+#ifndef UTIL_FILE_PIECE_H
+#define UTIL_FILE_PIECE_H
+
+#include "util/ersatz_progress.hh"
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/mmap.hh"
+#include "util/read_compressed.hh"
+#include "util/string_piece.hh"
+
+#include <cstddef>
+#include <iosfwd>
+#include <string>
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+
+class ParseNumberException : public Exception {
+ public:
+ explicit ParseNumberException(StringPiece value) throw();
+ ~ParseNumberException() throw() {}
+};
+
+extern const bool kSpaces[256];
+
+// Memory backing the returned StringPiece may vanish on the next call.
+class FilePiece {
+ public:
+ // 1 MB default.
+ explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+ // Takes ownership of fd. name is used for messages.
+ explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+
+ /* Read from an istream. Don't use this if you can avoid it. Raw fd IO is
+ * much faster. But sometimes you just have an istream like Boost's HTTP
+ * server and want to parse it the same way.
+ * name is just used for messages and FileName().
+ */
+ explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
+
+ ~FilePiece();
+
+ char get() {
+ if (position_ == position_end_) {
+ Shift();
+ if (at_end_) throw EndOfFileException();
+ }
+ return *(position_++);
+ }
+
+ // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
+ StringPiece ReadDelimited(const bool *delim = kSpaces) {
+ SkipSpaces(delim);
+ return Consume(FindDelimiterOrEOF(delim));
+ }
+
+ /// Read word until the line or file ends.
+ bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
+ assert(delim[static_cast<unsigned char>('\n')]);
+ // Skip non-enter spaces.
+ for (; ; ++position_) {
+ if (position_ == position_end_) {
+ try {
+ Shift();
+ } catch (const util::EndOfFileException &e) { return false; }
+ // And break out at end of file.
+ if (position_ == position_end_) return false;
+ }
+ if (!delim[static_cast<unsigned char>(*position_)]) break;
+ if (*position_ == '\n') return false;
+ }
+ // We can't be at the end of file because there's at least one character open.
+ to = Consume(FindDelimiterOrEOF(delim));
+ return true;
+ }
+
+ /** Read a line of text from the file.
+ *
+ * Unlike ReadDelimited, this includes leading spaces and consumes the
+ * delimiter. It is similar to getline in that way.
+ *
+ * If strip_cr is true, any trailing carriate return (as would be found on
+ * a file written on Windows) will be left out of the returned line.
+ *
+ * Throws EndOfFileException if the end of the file is encountered. If the
+ * file does not end in a newline, this could mean that the last line is
+ * never read.
+ */
+ StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
+
+ /** Read a line of text from the file, or return false on EOF.
+ *
+ * This is like ReadLine, except it returns false where ReadLine throws
+ * EndOfFileException. Like ReadLine it may not read the last line in the
+ * file if the file does not end in a newline.
+ *
+ * If strip_cr is true, any trailing carriate return (as would be found on
+ * a file written on Windows) will be left out of the returned line.
+ */
+ bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true);
+
+ float ReadFloat();
+ double ReadDouble();
+ long int ReadLong();
+ unsigned long int ReadULong();
+
+ // Skip spaces defined by isspace.
+ void SkipSpaces(const bool *delim = kSpaces) {
+ assert(position_ <= position_end_);
+ for (; ; ++position_) {
+ if (position_ == position_end_) {
+ Shift();
+ // And break out at end of file.
+ if (position_ == position_end_) return;
+ }
+ assert(position_ < position_end_);
+ if (!delim[static_cast<unsigned char>(*position_)]) return;
+ }
+ }
+
+ uint64_t Offset() const {
+ return position_ - data_.begin() + mapped_offset_;
+ }
+
+ const std::string &FileName() const { return file_name_; }
+
+ private:
+ void InitializeNoRead(const char *name, std::size_t min_buffer);
+ // Calls InitializeNoRead, so don't call both.
+ void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
+
+ template <class T> T ReadNumber();
+
+ StringPiece Consume(const char *to) {
+ assert(to >= position_);
+ StringPiece ret(position_, to - position_);
+ position_ = to;
+ return ret;
+ }
+
+ const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
+
+ void Shift();
+ // Backends to Shift().
+ void MMapShift(uint64_t desired_begin);
+
+ void TransitionToRead();
+ void ReadShift();
+
+ const char *position_, *last_space_, *position_end_;
+
+ scoped_fd file_;
+ const uint64_t total_size_;
+ const uint64_t page_;
+
+ std::size_t default_map_size_;
+ uint64_t mapped_offset_;
+
+ // Order matters: file_ should always be destroyed after this.
+ scoped_memory data_;
+
+ bool at_end_;
+ bool fallback_to_read_;
+
+ ErsatzProgress progress_;
+
+ std::string file_name_;
+
+ ReadCompressed fell_back_;
+};
+
+} // namespace util
+
+#endif // UTIL_FILE_PIECE_H
diff --git a/src/kenlm/util/file_piece_test.cc b/src/kenlm/util/file_piece_test.cc
new file mode 100644
index 0000000..d03cd31
--- /dev/null
+++ b/src/kenlm/util/file_piece_test.cc
@@ -0,0 +1,154 @@
+// Tests might fail if you have creative characters in your path. Sue me.
+#include "util/file_piece.hh"
+
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/scoped.hh"
+
+#define BOOST_TEST_MODULE FilePieceTest
+#include <boost/test/unit_test.hpp>
+#include <fstream>
+#include <iostream>
+#include <cstdio>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+namespace util {
+namespace {
+
+std::string FileLocation() {
+ if (boost::unit_test::framework::master_test_suite().argc < 2) {
+ return "file_piece.cc";
+ }
+ std::string ret(boost::unit_test::framework::master_test_suite().argv[1]);
+ return ret;
+}
+
+/* istream */
+BOOST_AUTO_TEST_CASE(IStream) {
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+ std::fstream backing(FileLocation().c_str(), std::ios::in);
+ FilePiece test(backing);
+ std::string ref_line;
+ while (getline(ref, ref_line)) {
+ StringPiece test_line(test.ReadLine());
+ BOOST_CHECK_EQUAL(ref_line, test_line);
+ }
+ BOOST_CHECK_THROW(test.get(), EndOfFileException);
+ BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
+/* mmap implementation */
+BOOST_AUTO_TEST_CASE(MMapReadLine) {
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+ FilePiece test(FileLocation().c_str(), NULL, 1);
+ std::string ref_line;
+ while (getline(ref, ref_line)) {
+ StringPiece test_line(test.ReadLine());
+ // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+ if (!test_line.empty() || !ref_line.empty()) {
+ BOOST_CHECK_EQUAL(ref_line, test_line);
+ }
+ }
+ BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
+/* Apple isn't happy with the popen, fileno, dup. And I don't want to
+ * reimplement popen. This is an issue with the test.
+ */
+/* read() implementation */
+BOOST_AUTO_TEST_CASE(StreamReadLine) {
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+
+ std::string popen_args = "cat \"";
+ popen_args += FileLocation();
+ popen_args += '"';
+
+ FILE *catter = popen(popen_args.c_str(), "r");
+ BOOST_REQUIRE(catter);
+
+ FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
+ std::string ref_line;
+ while (getline(ref, ref_line)) {
+ StringPiece test_line(test.ReadLine());
+ // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+ if (!test_line.empty() || !ref_line.empty()) {
+ BOOST_CHECK_EQUAL(ref_line, test_line);
+ }
+ }
+ BOOST_CHECK_THROW(test.get(), EndOfFileException);
+ BOOST_REQUIRE(!pclose(catter));
+}
+#endif
+
+#ifdef HAVE_ZLIB
+
+// gzip file
+BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
+ std::string location(FileLocation());
+ std::fstream ref(location.c_str(), std::ios::in);
+
+ std::string command("gzip <\"");
+ command += location + "\" >\"" + location + "\".gz";
+
+ BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
+ FilePiece test((location + ".gz").c_str(), NULL, 1);
+ unlink((location + ".gz").c_str());
+ std::string ref_line;
+ while (getline(ref, ref_line)) {
+ StringPiece test_line(test.ReadLine());
+ // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+ if (!test_line.empty() || !ref_line.empty()) {
+ BOOST_CHECK_EQUAL(ref_line, test_line);
+ }
+ }
+ BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
+// gzip stream. Apple doesn't like popen, fileno, dup. This is an issue with
+// the test.
+#if !defined __APPLE__ && !defined __MINGW32__
+BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
+ std::fstream ref(FileLocation().c_str(), std::ios::in);
+
+ std::string command("gzip <\"");
+ command += FileLocation() + "\"";
+
+ FILE * catter = popen(command.c_str(), "r");
+ BOOST_REQUIRE(catter);
+
+ FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
+ std::string ref_line;
+ while (getline(ref, ref_line)) {
+ StringPiece test_line(test.ReadLine());
+ // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+ if (!test_line.empty() || !ref_line.empty()) {
+ BOOST_CHECK_EQUAL(ref_line, test_line);
+ }
+ }
+ BOOST_CHECK_THROW(test.get(), EndOfFileException);
+ BOOST_REQUIRE(!pclose(catter));
+}
+#endif // __APPLE__
+
+#endif // HAVE_ZLIB
+
+BOOST_AUTO_TEST_CASE(Numbers) {
+ scoped_fd file(MakeTemp(FileLocation()));
+ const float floating = 3.2;
+ {
+ util::FileStream writing(file.get());
+ writing << "94389483984398493890287 " << floating << " 5";
+ }
+ SeekOrThrow(file.get(), 0);
+ util::FilePiece f(file.release());
+ BOOST_CHECK_THROW(f.ReadULong(), ParseNumberException);
+ BOOST_CHECK_EQUAL("94389483984398493890287", f.ReadDelimited());
+ // Yes, exactly equal. Isn't double-conversion wonderful?
+ BOOST_CHECK_EQUAL(floating, f.ReadFloat());
+ BOOST_CHECK_EQUAL(5, f.ReadULong());
+}
+
+} // namespace
+} // namespace util
diff --git a/src/kenlm/util/file_stream.hh b/src/kenlm/util/file_stream.hh
new file mode 100644
index 0000000..ae9ad5a
--- /dev/null
+++ b/src/kenlm/util/file_stream.hh
@@ -0,0 +1,89 @@
+/* Like std::ofstream but without being incredibly slow. Backed by a raw fd.
+ * Supports most of the built-in types except for long double.
+ */
+#ifndef UTIL_FILE_STREAM_H
+#define UTIL_FILE_STREAM_H
+
+#include "util/fake_ostream.hh"
+#include "util/file.hh"
+#include "util/scoped.hh"
+
+#include <cassert>
+#include <cstring>
+
+#include <stdint.h>
+
+namespace util {
+
+class FileStream : public FakeOStream<FileStream> {
+ public:
+ FileStream(int out = -1, std::size_t buffer_size = 8192)
+ : buf_(util::MallocOrThrow(std::max<std::size_t>(buffer_size, kToStringMaxBytes))),
+ current_(static_cast<char*>(buf_.get())),
+ end_(current_ + std::max<std::size_t>(buffer_size, kToStringMaxBytes)),
+ fd_(out) {}
+
+ ~FileStream() {
+ flush();
+ }
+
+ void SetFD(int to) {
+ flush();
+ fd_ = to;
+ }
+
+ FileStream &flush() {
+ if (current_ != buf_.get()) {
+ util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get());
+ current_ = static_cast<char*>(buf_.get());
+ }
+ return *this;
+ }
+
+ // For writes of arbitrary size.
+ FileStream &write(const void *data, std::size_t length) {
+ if (UTIL_LIKELY(current_ + length <= end_)) {
+ std::memcpy(current_, data, length);
+ current_ += length;
+ return *this;
+ }
+ flush();
+ if (current_ + length <= end_) {
+ std::memcpy(current_, data, length);
+ current_ += length;
+ } else {
+ util::WriteOrThrow(fd_, data, length);
+ }
+ return *this;
+ }
+
+ FileStream &seekp(uint64_t to) {
+ util::SeekOrThrow(fd_, to);
+ return *this;
+ }
+
+ protected:
+ friend class FakeOStream<FileStream>;
+ // For writes directly to buffer guaranteed to have amount < buffer size.
+ char *Ensure(std::size_t amount) {
+ if (UTIL_UNLIKELY(current_ + amount > end_)) {
+ flush();
+ assert(current_ + amount <= end_);
+ }
+ return current_;
+ }
+
+ void AdvanceTo(char *to) {
+ current_ = to;
+ assert(current_ <= end_);
+ }
+
+ private:
+ util::scoped_malloc buf_;
+ char *current_, *end_;
+ int fd_;
+};
+
+} // namespace
+
+#endif
diff --git a/src/kenlm/util/fixed_array.hh b/src/kenlm/util/fixed_array.hh
new file mode 100644
index 0000000..c67e8ed
--- /dev/null
+++ b/src/kenlm/util/fixed_array.hh
@@ -0,0 +1,176 @@
+#ifndef UTIL_FIXED_ARRAY_H
+#define UTIL_FIXED_ARRAY_H
+
+#include "util/scoped.hh"
+
+#include <cstddef>
+
+#include <cassert>
+#include <cstdlib>
+
+namespace util {
+
+/**
+ * Defines an array with fixed maximum size.
+ *
+ * Ever want an array of things but they don't have a default constructor or
+ * are non-copyable? FixedArray allows constructing one at a time.
+ */
+template <class T> class FixedArray {
+ public:
+ /** Initialize with a given size bound but do not construct the objects. */
+ explicit FixedArray(std::size_t limit) {
+ Init(limit);
+ }
+
+ /**
+ * Constructs an instance, but does not initialize it.
+ *
+ * Any objects constructed in this manner must be subsequently @ref FixedArray::Init() "initialized" prior to use.
+ *
+ * @see FixedArray::Init()
+ */
+ FixedArray()
+ : newed_end_(NULL)
+#ifndef NDEBUG
+ , allocated_end_(NULL)
+#endif
+ {}
+
+ /**
+ * Initialize with a given size bound but do not construct the objects.
+ *
+ * This method is responsible for allocating memory.
+ * Objects stored in this array will be constructed in a location within this allocated memory.
+ */
+ void Init(std::size_t count) {
+ assert(!block_.get());
+ block_.reset(malloc(sizeof(T) * count));
+ if (!block_.get()) throw std::bad_alloc();
+ newed_end_ = begin();
+#ifndef NDEBUG
+ allocated_end_ = begin() + count;
+#endif
+ }
+
+ /**
+ * Constructs a copy of the provided array.
+ *
+ * @param from Array whose elements should be copied into this newly-constructed data structure.
+ */
+ FixedArray(const FixedArray &from) {
+ std::size_t size = from.newed_end_ - static_cast<const T*>(from.block_.get());
+ Init(size);
+ for (std::size_t i = 0; i < size; ++i) {
+ push_back(from[i]);
+ }
+ }
+
+ /**
+ * Frees the memory held by this object.
+ */
+ ~FixedArray() { clear(); }
+
+ /** Gets a pointer to the first object currently stored in this data structure. */
+ T *begin() { return static_cast<T*>(block_.get()); }
+
+ /** Gets a const pointer to the last object currently stored in this data structure. */
+ const T *begin() const { return static_cast<const T*>(block_.get()); }
+
+ /** Gets a pointer to the last object currently stored in this data structure. */
+ T *end() { return newed_end_; }
+
+ /** Gets a const pointer to the last object currently stored in this data structure. */
+ const T *end() const { return newed_end_; }
+
+ /** Gets a reference to the last object currently stored in this data structure. */
+ T &back() { return *(end() - 1); }
+
+ /** Gets a const reference to the last object currently stored in this data structure. */
+ const T &back() const { return *(end() - 1); }
+
+ /** Gets the number of objects currently stored in this data structure. */
+ std::size_t size() const { return end() - begin(); }
+
+ /** Returns true if there are no objects currently stored in this data structure. */
+ bool empty() const { return begin() == end(); }
+
+ /**
+ * Gets a reference to the object with index i currently stored in this data structure.
+ *
+ * @param i Index of the object to reference
+ */
+ T &operator[](std::size_t i) {
+ assert(i < size());
+ return begin()[i];
+ }
+
+ /**
+ * Gets a const reference to the object with index i currently stored in this data structure.
+ *
+ * @param i Index of the object to reference
+ */
+ const T &operator[](std::size_t i) const {
+ assert(i < size());
+ return begin()[i];
+ }
+
+ /**
+ * Constructs a new object using the provided parameter,
+ * and stores it in this data structure.
+ *
+ * The memory backing the constructed object is managed by this data structure.
+ * I miss C++11 variadic templates.
+ */
+ void push_back() {
+ new (end()) T();
+ Constructed();
+ }
+ template <class C> void push_back(const C &c) {
+ new (end()) T(c);
+ Constructed();
+ }
+ template <class C> void push_back(C &c) {
+ new (end()) T(c);
+ Constructed();
+ }
+ template <class C, class D> void push_back(const C &c, const D &d) {
+ new (end()) T(c, d);
+ Constructed();
+ }
+
+ void pop_back() {
+ back().~T();
+ --newed_end_;
+ }
+
+ /**
+ * Removes all elements from this array.
+ */
+ void clear() {
+ while (newed_end_ != begin())
+ pop_back();
+ }
+
+ protected:
+ // Always call Constructed after successful completion of new.
+ void Constructed() {
+ ++newed_end_;
+#ifndef NDEBUG
+ assert(newed_end_ <= allocated_end_);
+#endif
+ }
+
+ private:
+ util::scoped_malloc block_;
+
+ T *newed_end_;
+
+#ifndef NDEBUG
+ T *allocated_end_;
+#endif
+};
+
+} // namespace util
+
+#endif // UTIL_FIXED_ARRAY_H
diff --git a/src/kenlm/util/float_to_string.cc b/src/kenlm/util/float_to_string.cc
new file mode 100644
index 0000000..1e16d6f
--- /dev/null
+++ b/src/kenlm/util/float_to_string.cc
@@ -0,0 +1,23 @@
+#include "util/float_to_string.hh"
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/double-conversion/utils.h"
+
+namespace util {
+namespace {
+const double_conversion::DoubleToStringConverter kConverter(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0);
+} // namespace
+
+char *ToString(double value, char *to) {
+ double_conversion::StringBuilder builder(to, ToStringBuf<double>::kBytes);
+ kConverter.ToShortest(value, &builder);
+ return &to[builder.position()];
+}
+
+char *ToString(float value, char *to) {
+ double_conversion::StringBuilder builder(to, ToStringBuf<float>::kBytes);
+ kConverter.ToShortestSingle(value, &builder);
+ return &to[builder.position()];
+}
+
+} // namespace util
diff --git a/src/kenlm/util/float_to_string.hh b/src/kenlm/util/float_to_string.hh
new file mode 100644
index 0000000..9305327
--- /dev/null
+++ b/src/kenlm/util/float_to_string.hh
@@ -0,0 +1,25 @@
+#ifndef UTIL_FLOAT_TO_STRING_H
+#define UTIL_FLOAT_TO_STRING_H
+
+// Just for ToStringBuf
+#include "util/integer_to_string.hh"
+
+namespace util {
+
+template <> struct ToStringBuf<double> {
+ // DoubleToStringConverter::kBase10MaximalLength + 1 for null paranoia.
+ static const unsigned kBytes = 19;
+};
+
+// Single wasn't documented in double conversion, so be conservative and
+// say the same as double.
+template <> struct ToStringBuf<float> {
+ static const unsigned kBytes = 19;
+};
+
+char *ToString(double value, char *to);
+char *ToString(float value, char *to);
+
+} // namespace util
+
+#endif // UTIL_FLOAT_TO_STRING_H
diff --git a/src/kenlm/util/getopt.c b/src/kenlm/util/getopt.c
new file mode 100644
index 0000000..50eef42
--- /dev/null
+++ b/src/kenlm/util/getopt.c
@@ -0,0 +1,78 @@
+/*
+POSIX getopt for Windows
+
+AT&T Public License
+
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
+
+#ifndef __GNUC__
+
+#include "getopt.hh"
+#include <stdio.h>
+#include <string.h>
+
+#define NULL 0
+#define EOF (-1)
+#define ERR(s, c) if(opterr){\
+ char errbuf[2];\
+ errbuf[0] = c; errbuf[1] = '\n';\
+ fputs(argv[0], stderr);\
+ fputs(s, stderr);\
+ fputc(c, stderr);}
+ //(void) write(2, argv[0], (unsigned)strlen(argv[0]));\
+ //(void) write(2, s, (unsigned)strlen(s));\
+ //(void) write(2, errbuf, 2);}
+
+int opterr = 1;
+int optind = 1;
+int optopt;
+char *optarg;
+
+int
+getopt(argc, argv, opts)
+int argc;
+char **argv, *opts;
+{
+ static int sp = 1;
+ register int c;
+ register char *cp;
+
+ if(sp == 1)
+ if(optind >= argc ||
+ argv[optind][0] != '-' || argv[optind][1] == '\0')
+ return(EOF);
+ else if(strcmp(argv[optind], "--") == NULL) {
+ optind++;
+ return(EOF);
+ }
+ optopt = c = argv[optind][sp];
+ if(c == ':' || (cp=strchr(opts, c)) == NULL) {
+ ERR(": illegal option -- ", c);
+ if(argv[optind][++sp] == '\0') {
+ optind++;
+ sp = 1;
+ }
+ return('?');
+ }
+ if(*++cp == ':') {
+ if(argv[optind][sp+1] != '\0')
+ optarg = &argv[optind++][sp+1];
+ else if(++optind >= argc) {
+ ERR(": option requires an argument -- ", c);
+ sp = 1;
+ return('?');
+ } else
+ optarg = argv[optind++];
+ sp = 1;
+ } else {
+ if(argv[optind][++sp] == '\0') {
+ sp = 1;
+ optind++;
+ }
+ optarg = NULL;
+ }
+ return(c);
+}
+
+#endif /* __GNUC__ */
diff --git a/src/kenlm/util/getopt.hh b/src/kenlm/util/getopt.hh
new file mode 100644
index 0000000..9b0792b
--- /dev/null
+++ b/src/kenlm/util/getopt.hh
@@ -0,0 +1,33 @@
+/*
+POSIX getopt for Windows
+
+AT&T Public License
+
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
+
+#ifdef __GNUC__
+#include <getopt.h>
+#endif
+#ifndef __GNUC__
+
+#ifndef UTIL_GETOPT_H
+#define UTIL_GETOPT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int opterr;
+extern int optind;
+extern int optopt;
+extern char *optarg;
+extern int getopt(int argc, char **argv, char *opts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* UTIL_GETOPT_H */
+#endif /* __GNUC__ */
+
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/have.hh b/src/kenlm/util/have.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/have.hh
rename to src/kenlm/util/have.hh
diff --git a/src/kenlm/util/integer_to_string.cc b/src/kenlm/util/integer_to_string.cc
new file mode 100644
index 0000000..19fd794
--- /dev/null
+++ b/src/kenlm/util/integer_to_string.cc
@@ -0,0 +1,667 @@
+#include <iostream>
+/* Fast integer to string conversion.
+Source: https://github.com/miloyip/itoa-benchmark
+Local modifications:
+1. Return end of buffer instead of null terminating
+2. Collapse to single file
+3. Namespace
+4. Remove test hook
+5. Non-x86 support from the branch_lut code
+6. Rename functions
+7. Require __SSE2__ on i386
+
+Copyright (C) 2014 Milo Yip
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Which is based on: http://0x80.pl/snippets/asm/sse-utoa.c
+
+ SSE: conversion integers to decimal representation
+
+ Author: Wojciech Muła
+ e-mail: wojciech_mula@poczta.onet.pl
+ www: http://0x80.pl/
+
+ License: BSD
+
+ initial release 2011-10-21
+ $Id$
+*/
+
+#include "util/integer_to_string.hh"
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+
+namespace {
+const char gDigitsLut[200] = {
+ '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
+ '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
+ '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
+ '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
+ '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
+ '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
+ '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
+ '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
+ '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
+ '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
+};
+} // namespace
+
+// SSE2 implementation according to http://0x80.pl/articles/sse-itoa.html
+// Modifications: (1) fix incorrect digits (2) accept all ranges (3) write to user provided buffer.
+
+#if defined(__amd64) || defined(_M_X64) || (defined(__SSE2__) && (defined(_M_IX86) || defined(i386)))
+
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+#include "intrin.h"
+#endif
+
+#ifdef _MSC_VER
+#define ALIGN_PRE __declspec(align(16))
+#define ALIGN_SUF
+#else
+#define ALIGN_PRE
+#define ALIGN_SUF __attribute__ ((aligned(16)))
+#endif
+
+namespace {
+
+static const uint32_t kDiv10000 = 0xd1b71759;
+ALIGN_PRE static const uint32_t kDiv10000Vector[4] ALIGN_SUF = { kDiv10000, kDiv10000, kDiv10000, kDiv10000 };
+ALIGN_PRE static const uint32_t k10000Vector[4] ALIGN_SUF = { 10000, 10000, 10000, 10000 };
+ALIGN_PRE static const uint16_t kDivPowersVector[8] ALIGN_SUF = { 8389, 5243, 13108, 32768, 8389, 5243, 13108, 32768 }; // 10^3, 10^2, 10^1, 10^0
+ALIGN_PRE static const uint16_t kShiftPowersVector[8] ALIGN_SUF = {
+ 1 << (16 - (23 + 2 - 16)),
+ 1 << (16 - (19 + 2 - 16)),
+ 1 << (16 - 1 - 2),
+ 1 << (15),
+ 1 << (16 - (23 + 2 - 16)),
+ 1 << (16 - (19 + 2 - 16)),
+ 1 << (16 - 1 - 2),
+ 1 << (15)
+};
+ALIGN_PRE static const uint16_t k10Vector[8] ALIGN_SUF = { 10, 10, 10, 10, 10, 10, 10, 10 };
+ALIGN_PRE static const char kAsciiZero[16] ALIGN_SUF = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' };
+
+inline __m128i Convert8DigitsSSE2(uint32_t value) {
+ assert(value <= 99999999);
+
+ // abcd, efgh = abcdefgh divmod 10000
+ const __m128i abcdefgh = _mm_cvtsi32_si128(value);
+ const __m128i abcd = _mm_srli_epi64(_mm_mul_epu32(abcdefgh, reinterpret_cast<const __m128i*>(kDiv10000Vector)[0]), 45);
+ const __m128i efgh = _mm_sub_epi32(abcdefgh, _mm_mul_epu32(abcd, reinterpret_cast<const __m128i*>(k10000Vector)[0]));
+
+ // v1 = [ abcd, efgh, 0, 0, 0, 0, 0, 0 ]
+ const __m128i v1 = _mm_unpacklo_epi16(abcd, efgh);
+
+ // v1a = v1 * 4 = [ abcd * 4, efgh * 4, 0, 0, 0, 0, 0, 0 ]
+ const __m128i v1a = _mm_slli_epi64(v1, 2);
+
+ // v2 = [ abcd * 4, abcd * 4, abcd * 4, abcd * 4, efgh * 4, efgh * 4, efgh * 4, efgh * 4 ]
+ const __m128i v2a = _mm_unpacklo_epi16(v1a, v1a);
+ const __m128i v2 = _mm_unpacklo_epi32(v2a, v2a);
+
+ // v4 = v2 div 10^3, 10^2, 10^1, 10^0 = [ a, ab, abc, abcd, e, ef, efg, efgh ]
+ const __m128i v3 = _mm_mulhi_epu16(v2, reinterpret_cast<const __m128i*>(kDivPowersVector)[0]);
+ const __m128i v4 = _mm_mulhi_epu16(v3, reinterpret_cast<const __m128i*>(kShiftPowersVector)[0]);
+
+ // v5 = v4 * 10 = [ a0, ab0, abc0, abcd0, e0, ef0, efg0, efgh0 ]
+ const __m128i v5 = _mm_mullo_epi16(v4, reinterpret_cast<const __m128i*>(k10Vector)[0]);
+
+ // v6 = v5 << 16 = [ 0, a0, ab0, abc0, 0, e0, ef0, efg0 ]
+ const __m128i v6 = _mm_slli_epi64(v5, 16);
+
+ // v7 = v4 - v6 = { a, b, c, d, e, f, g, h }
+ const __m128i v7 = _mm_sub_epi16(v4, v6);
+
+ return v7;
+}
+
+inline __m128i ShiftDigits_SSE2(__m128i a, unsigned digit) {
+ assert(digit <= 8);
+ switch (digit) {
+ case 0: return a;
+ case 1: return _mm_srli_si128(a, 1);
+ case 2: return _mm_srli_si128(a, 2);
+ case 3: return _mm_srli_si128(a, 3);
+ case 4: return _mm_srli_si128(a, 4);
+ case 5: return _mm_srli_si128(a, 5);
+ case 6: return _mm_srli_si128(a, 6);
+ case 7: return _mm_srli_si128(a, 7);
+ case 8: return _mm_srli_si128(a, 8);
+ }
+ return a; // should not execute here.
+}
+
+} // namespace
+
+// Original name: u32toa_sse2
+char *ToString(uint32_t value, char* buffer) {
+ if (value < 10000) {
+ const uint32_t d1 = (value / 100) << 1;
+ const uint32_t d2 = (value % 100) << 1;
+
+ if (value >= 1000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 100)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 10)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+ //*buffer++ = '\0';
+ return buffer;
+ }
+ else if (value < 100000000) {
+ // Experiment shows that this case SSE2 is slower
+#if 0
+ const __m128i a = Convert8DigitsSSE2(value);
+
+ // Convert to bytes, add '0'
+ const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+
+ // Count number of digit
+ const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
+ unsigned long digit;
+#ifdef _MSC_VER
+ _BitScanForward(&digit, ~mask | 0x8000);
+#else
+ digit = __builtin_ctz(~mask | 0x8000);
+#endif
+
+ // Shift digits to the beginning
+ __m128i result = ShiftDigits_SSE2(va, digit);
+ //__m128i result = _mm_srl_epi64(va, _mm_cvtsi32_si128(digit * 8));
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result);
+ buffer[8 - digit] = '\0';
+#else
+ // value = bbbbcccc
+ const uint32_t b = value / 10000;
+ const uint32_t c = value % 10000;
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ if (value >= 10000000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 1000000)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 100000)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+
+ *buffer++ = gDigitsLut[d3];
+ *buffer++ = gDigitsLut[d3 + 1];
+ *buffer++ = gDigitsLut[d4];
+ *buffer++ = gDigitsLut[d4 + 1];
+// *buffer++ = '\0';
+ return buffer;
+#endif
+ }
+ else {
+ // value = aabbbbbbbb in decimal
+
+ const uint32_t a = value / 100000000; // 1 to 42
+ value %= 100000000;
+
+ if (a >= 10) {
+ const unsigned i = a << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ }
+ else
+ *buffer++ = '0' + static_cast<char>(a);
+
+ const __m128i b = Convert8DigitsSSE2(value);
+ const __m128i ba = _mm_add_epi8(_mm_packus_epi16(_mm_setzero_si128(), b), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+ const __m128i result = _mm_srli_si128(ba, 8);
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result);
+// buffer[8] = '\0';
+ return buffer + 8;
+ }
+}
+
+// Original name: u64toa_sse2
+char *ToString(uint64_t value, char* buffer) {
+ if (value < 100000000) {
+ uint32_t v = static_cast<uint32_t>(value);
+ if (v < 10000) {
+ const uint32_t d1 = (v / 100) << 1;
+ const uint32_t d2 = (v % 100) << 1;
+
+ if (v >= 1000)
+ *buffer++ = gDigitsLut[d1];
+ if (v >= 100)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (v >= 10)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+ //*buffer++ = '\0';
+ return buffer;
+ }
+ else {
+ // Experiment shows that this case SSE2 is slower
+#if 0
+ const __m128i a = Convert8DigitsSSE2(v);
+
+ // Convert to bytes, add '0'
+ const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+
+ // Count number of digit
+ const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
+ unsigned long digit;
+#ifdef _MSC_VER
+ _BitScanForward(&digit, ~mask | 0x8000);
+#else
+ digit = __builtin_ctz(~mask | 0x8000);
+#endif
+
+ // Shift digits to the beginning
+ __m128i result = ShiftDigits_SSE2(va, digit);
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(buffer), result);
+ buffer[8 - digit] = '\0';
+#else
+ // value = bbbbcccc
+ const uint32_t b = v / 10000;
+ const uint32_t c = v % 10000;
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ if (value >= 10000000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 1000000)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 100000)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+
+ *buffer++ = gDigitsLut[d3];
+ *buffer++ = gDigitsLut[d3 + 1];
+ *buffer++ = gDigitsLut[d4];
+ *buffer++ = gDigitsLut[d4 + 1];
+ //*buffer++ = '\0';
+ return buffer;
+#endif
+ }
+ }
+ else if (value < 10000000000000000) {
+ const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+ const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+ const __m128i a0 = Convert8DigitsSSE2(v0);
+ const __m128i a1 = Convert8DigitsSSE2(v1);
+
+ // Convert to bytes, add '0'
+ const __m128i va = _mm_add_epi8(_mm_packus_epi16(a0, a1), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+
+ // Count number of digit
+ const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
+#ifdef _MSC_VER
+ unsigned long digit;
+ _BitScanForward(&digit, ~mask | 0x8000);
+#else
+ unsigned digit = __builtin_ctz(~mask | 0x8000);
+#endif
+
+ // Shift digits to the beginning
+ __m128i result = ShiftDigits_SSE2(va, digit);
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
+// buffer[16 - digit] = '\0';
+ return &buffer[16 - digit];
+ }
+ else {
+ const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844
+ value %= 10000000000000000;
+
+ if (a < 10)
+ *buffer++ = '0' + static_cast<char>(a);
+ else if (a < 100) {
+ const uint32_t i = a << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ }
+ else if (a < 1000) {
+ *buffer++ = '0' + static_cast<char>(a / 100);
+
+ const uint32_t i = (a % 100) << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ }
+ else {
+ const uint32_t i = (a / 100) << 1;
+ const uint32_t j = (a % 100) << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ *buffer++ = gDigitsLut[j];
+ *buffer++ = gDigitsLut[j + 1];
+ }
+
+ const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+ const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+ const __m128i a0 = Convert8DigitsSSE2(v0);
+ const __m128i a1 = Convert8DigitsSSE2(v1);
+
+ // Convert to bytes, add '0'
+ const __m128i va = _mm_add_epi8(_mm_packus_epi16(a0, a1), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), va);
+// buffer[16] = '\0';
+ return &buffer[16];
+ }
+}
+
+#else // Generic Non-x86 case
+
+// Orignal name: u32toa_branchlut
+char *ToString(uint32_t value, char* buffer) {
+ if (value < 10000) {
+ const uint32_t d1 = (value / 100) << 1;
+ const uint32_t d2 = (value % 100) << 1;
+
+ if (value >= 1000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 100)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 10)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+ }
+ else if (value < 100000000) {
+ // value = bbbbcccc
+ const uint32_t b = value / 10000;
+ const uint32_t c = value % 10000;
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ if (value >= 10000000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 1000000)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 100000)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+
+ *buffer++ = gDigitsLut[d3];
+ *buffer++ = gDigitsLut[d3 + 1];
+ *buffer++ = gDigitsLut[d4];
+ *buffer++ = gDigitsLut[d4 + 1];
+ }
+ else {
+ // value = aabbbbcccc in decimal
+
+ const uint32_t a = value / 100000000; // 1 to 42
+ value %= 100000000;
+
+ if (a >= 10) {
+ const unsigned i = a << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ }
+ else
+ *buffer++ = '0' + static_cast<char>(a);
+
+ const uint32_t b = value / 10000; // 0 to 9999
+ const uint32_t c = value % 10000; // 0 to 9999
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ *buffer++ = gDigitsLut[d1];
+ *buffer++ = gDigitsLut[d1 + 1];
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+ *buffer++ = gDigitsLut[d3];
+ *buffer++ = gDigitsLut[d3 + 1];
+ *buffer++ = gDigitsLut[d4];
+ *buffer++ = gDigitsLut[d4 + 1];
+ }
+ return buffer; //*buffer++ = '\0';
+}
+
+// Original name: u64toa_branchlut
+char *ToString(uint64_t value, char* buffer) {
+ if (value < 100000000) {
+ uint32_t v = static_cast<uint32_t>(value);
+ if (v < 10000) {
+ const uint32_t d1 = (v / 100) << 1;
+ const uint32_t d2 = (v % 100) << 1;
+
+ if (v >= 1000)
+ *buffer++ = gDigitsLut[d1];
+ if (v >= 100)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (v >= 10)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+ }
+ else {
+ // value = bbbbcccc
+ const uint32_t b = v / 10000;
+ const uint32_t c = v % 10000;
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ if (value >= 10000000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 1000000)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 100000)
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+
+ *buffer++ = gDigitsLut[d3];
+ *buffer++ = gDigitsLut[d3 + 1];
+ *buffer++ = gDigitsLut[d4];
+ *buffer++ = gDigitsLut[d4 + 1];
+ }
+ }
+ else if (value < 10000000000000000) {
+ const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+ const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+ const uint32_t b0 = v0 / 10000;
+ const uint32_t c0 = v0 % 10000;
+
+ const uint32_t d1 = (b0 / 100) << 1;
+ const uint32_t d2 = (b0 % 100) << 1;
+
+ const uint32_t d3 = (c0 / 100) << 1;
+ const uint32_t d4 = (c0 % 100) << 1;
+
+ const uint32_t b1 = v1 / 10000;
+ const uint32_t c1 = v1 % 10000;
+
+ const uint32_t d5 = (b1 / 100) << 1;
+ const uint32_t d6 = (b1 % 100) << 1;
+
+ const uint32_t d7 = (c1 / 100) << 1;
+ const uint32_t d8 = (c1 % 100) << 1;
+
+ if (value >= 1000000000000000)
+ *buffer++ = gDigitsLut[d1];
+ if (value >= 100000000000000)
+ *buffer++ = gDigitsLut[d1 + 1];
+ if (value >= 10000000000000)
+ *buffer++ = gDigitsLut[d2];
+ if (value >= 1000000000000)
+ *buffer++ = gDigitsLut[d2 + 1];
+ if (value >= 100000000000)
+ *buffer++ = gDigitsLut[d3];
+ if (value >= 10000000000)
+ *buffer++ = gDigitsLut[d3 + 1];
+ if (value >= 1000000000)
+ *buffer++ = gDigitsLut[d4];
+ if (value >= 100000000)
+ *buffer++ = gDigitsLut[d4 + 1];
+
+ *buffer++ = gDigitsLut[d5];
+ *buffer++ = gDigitsLut[d5 + 1];
+ *buffer++ = gDigitsLut[d6];
+ *buffer++ = gDigitsLut[d6 + 1];
+ *buffer++ = gDigitsLut[d7];
+ *buffer++ = gDigitsLut[d7 + 1];
+ *buffer++ = gDigitsLut[d8];
+ *buffer++ = gDigitsLut[d8 + 1];
+ }
+ else {
+ const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844
+ value %= 10000000000000000;
+
+ if (a < 10)
+ *buffer++ = '0' + static_cast<char>(a);
+ else if (a < 100) {
+ const uint32_t i = a << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ }
+ else if (a < 1000) {
+ *buffer++ = '0' + static_cast<char>(a / 100);
+
+ const uint32_t i = (a % 100) << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ }
+ else {
+ const uint32_t i = (a / 100) << 1;
+ const uint32_t j = (a % 100) << 1;
+ *buffer++ = gDigitsLut[i];
+ *buffer++ = gDigitsLut[i + 1];
+ *buffer++ = gDigitsLut[j];
+ *buffer++ = gDigitsLut[j + 1];
+ }
+
+ const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+ const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+
+ const uint32_t b0 = v0 / 10000;
+ const uint32_t c0 = v0 % 10000;
+
+ const uint32_t d1 = (b0 / 100) << 1;
+ const uint32_t d2 = (b0 % 100) << 1;
+
+ const uint32_t d3 = (c0 / 100) << 1;
+ const uint32_t d4 = (c0 % 100) << 1;
+
+ const uint32_t b1 = v1 / 10000;
+ const uint32_t c1 = v1 % 10000;
+
+ const uint32_t d5 = (b1 / 100) << 1;
+ const uint32_t d6 = (b1 % 100) << 1;
+
+ const uint32_t d7 = (c1 / 100) << 1;
+ const uint32_t d8 = (c1 % 100) << 1;
+
+ *buffer++ = gDigitsLut[d1];
+ *buffer++ = gDigitsLut[d1 + 1];
+ *buffer++ = gDigitsLut[d2];
+ *buffer++ = gDigitsLut[d2 + 1];
+ *buffer++ = gDigitsLut[d3];
+ *buffer++ = gDigitsLut[d3 + 1];
+ *buffer++ = gDigitsLut[d4];
+ *buffer++ = gDigitsLut[d4 + 1];
+ *buffer++ = gDigitsLut[d5];
+ *buffer++ = gDigitsLut[d5 + 1];
+ *buffer++ = gDigitsLut[d6];
+ *buffer++ = gDigitsLut[d6 + 1];
+ *buffer++ = gDigitsLut[d7];
+ *buffer++ = gDigitsLut[d7 + 1];
+ *buffer++ = gDigitsLut[d8];
+ *buffer++ = gDigitsLut[d8 + 1];
+ }
+ return buffer;
+}
+
+#endif // End of architecture if statement.
+
+// Signed wrappers. The negation is done on the unsigned version because
+// doing so has defined behavior for INT_MIN.
+char *ToString(int32_t value, char *to) {
+ uint32_t un = static_cast<uint32_t>(value);
+ if (value < 0) {
+ *to++ = '-';
+ un = -un;
+ }
+ return ToString(un, to);
+}
+
+char *ToString(int64_t value, char *to) {
+ uint64_t un = static_cast<uint64_t>(value);
+ if (value < 0) {
+ *to++ = '-';
+ un = -un;
+ }
+ return ToString(un, to);
+}
+
+// No optimization for this case yet.
+char *ToString(int16_t value, char *to) {
+ return ToString((int32_t)value, to);
+}
+char *ToString(uint16_t value, char *to) {
+ return ToString((uint32_t)value, to);
+}
+
+// void * to string. This hasn't been optimized at all really.
+namespace {
+const char kHexDigits[] = "0123456789abcdef";
+} // namespace
+
+char *ToString(const void *v, char *to) {
+ *to++ = '0';
+ *to++ = 'x';
+
+ // Fun fact: gcc/clang boost::lexical_cast on Linux do just "0" while clang on OS X does "0x0"
+ // I happen to prefer 0x0.
+ if (!v) {
+ *to++ = '0';
+ return to;
+ }
+
+ uintptr_t value = reinterpret_cast<uintptr_t>(v);
+ uint8_t shift = sizeof(void*) * 8 - 4;
+ for (; !(value >> shift); shift -= 4) {}
+ for (; ; shift -= 4) {
+ *to++ = kHexDigits[(value >> shift) & 0xf];
+ if (!shift) break;
+ }
+ return to;
+}
+
+} // namespace util
diff --git a/src/kenlm/util/integer_to_string.hh b/src/kenlm/util/integer_to_string.hh
new file mode 100644
index 0000000..9ac25bd
--- /dev/null
+++ b/src/kenlm/util/integer_to_string.hh
@@ -0,0 +1,66 @@
+#ifndef UTIL_INTEGER_TO_STRING_H
+#define UTIL_INTEGER_TO_STRING_H
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+
+/* These functions convert integers to strings and return the end pointer.
+ */
+char *ToString(uint32_t value, char *to);
+char *ToString(uint64_t value, char *to);
+
+// Implemented as wrappers to above
+char *ToString(int32_t value, char *to);
+char *ToString(int64_t value, char *to);
+
+// Calls the 32-bit versions for now.
+char *ToString(uint16_t value, char *to);
+char *ToString(int16_t value, char *to);
+
+char *ToString(const void *value, char *to);
+
+inline char *ToString(bool value, char *to) {
+ *to++ = '0' + value;
+ return to;
+}
+
+// How many bytes to reserve in the buffer for these strings:
+// g++ 4.9.1 doesn't work with this:
+// static const std::size_t kBytes = 5;
+// So use enum.
+template <class T> struct ToStringBuf;
+template <> struct ToStringBuf<bool> {
+ enum { kBytes = 1 };
+};
+template <> struct ToStringBuf<uint16_t> {
+ enum { kBytes = 5 };
+};
+template <> struct ToStringBuf<int16_t> {
+ enum { kBytes = 6 };
+};
+template <> struct ToStringBuf<uint32_t> {
+ enum { kBytes = 10 };
+};
+template <> struct ToStringBuf<int32_t> {
+ enum { kBytes = 11 };
+};
+template <> struct ToStringBuf<uint64_t> {
+ enum { kBytes = 20 };
+};
+template <> struct ToStringBuf<int64_t> {
+ // Not a typo. 2^63 has 19 digits.
+ enum { kBytes = 20 };
+};
+
+template <> struct ToStringBuf<const void*> {
+ // Either 18 on 64-bit or 10 on 32-bit.
+ enum { kBytes = sizeof(const void*) * 2 + 2 };
+};
+
+// Maximum over this and float.
+enum { kToStringMaxBytes = 20 };
+
+} // namespace util
+
+#endif // UTIL_INTEGER_TO_STRING_H
diff --git a/src/kenlm/util/integer_to_string_test.cc b/src/kenlm/util/integer_to_string_test.cc
new file mode 100644
index 0000000..136c88f
--- /dev/null
+++ b/src/kenlm/util/integer_to_string_test.cc
@@ -0,0 +1,81 @@
+#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
+#include "util/integer_to_string.hh"
+#include "util/string_piece.hh"
+
+#define BOOST_TEST_MODULE IntegerToStringTest
+#include <boost/test/unit_test.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <limits>
+
+namespace util {
+namespace {
+
+template <class T> void TestValue(const T value) {
+ char buf[ToStringBuf<T>::kBytes];
+ StringPiece result(buf, ToString(value, buf) - buf);
+ BOOST_REQUIRE_GE(static_cast<std::size_t>(ToStringBuf<T>::kBytes), result.size());
+ if (value) {
+ BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), result);
+ } else {
+ // Platforms can do void * as 0x0 or 0.
+ BOOST_CHECK(result == "0x0" || result == "0");
+ }
+}
+
+template <class T> void TestCorners() {
+ TestValue(std::numeric_limits<T>::min());
+ TestValue(std::numeric_limits<T>::max());
+ TestValue((T)0);
+ TestValue((T)-1);
+ TestValue((T)1);
+}
+
+BOOST_AUTO_TEST_CASE(Corners) {
+ TestCorners<uint16_t>();
+ TestCorners<uint32_t>();
+ TestCorners<uint64_t>();
+ TestCorners<int16_t>();
+ TestCorners<int32_t>();
+ TestCorners<int64_t>();
+ TestCorners<const void*>();
+}
+
+template <class T> void TestAll() {
+ for (T i = std::numeric_limits<T>::min(); i < std::numeric_limits<T>::max(); ++i) {
+ TestValue(i);
+ }
+ TestValue(std::numeric_limits<T>::max());
+}
+
+BOOST_AUTO_TEST_CASE(Short) {
+ TestAll<uint16_t>();
+ TestAll<int16_t>();
+}
+
+template <class T> void Test10s() {
+ for (T i = 1; i < std::numeric_limits<T>::max() / 10; i *= 10) {
+ TestValue(i);
+ TestValue(i - 1);
+ TestValue(i + 1);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(Tens) {
+ Test10s<uint64_t>();
+ Test10s<int64_t>();
+ Test10s<uint32_t>();
+ Test10s<int32_t>();
+}
+
+BOOST_AUTO_TEST_CASE(Pointers) {
+ for (uintptr_t i = 1; i < std::numeric_limits<uintptr_t>::max() / 10; i *= 10) {
+ TestValue((const void*)i);
+ }
+ for (uintptr_t i = 0; i < 256; ++i) {
+ TestValue((const void*)i);
+ TestValue((const void*)(i + 0xf00));
+ }
+}
+
+}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/joint_sort.hh b/src/kenlm/util/joint_sort.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/joint_sort.hh
rename to src/kenlm/util/joint_sort.hh
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/joint_sort_test.cc b/src/kenlm/util/joint_sort_test.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/joint_sort_test.cc
rename to src/kenlm/util/joint_sort_test.cc
diff --git a/src/kenlm/util/mmap.cc b/src/kenlm/util/mmap.cc
new file mode 100644
index 0000000..d89f3f6
--- /dev/null
+++ b/src/kenlm/util/mmap.cc
@@ -0,0 +1,402 @@
+/* Memory mapping wrappers.
+ * ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at
+ * NICT.
+ */
+#include "util/mmap.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/parallel_read.hh"
+#include "util/scoped.hh"
+
+#include <iostream>
+
+#include <cassert>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <cstdlib>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#include <io.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+namespace util {
+
+std::size_t SizePage() {
+#if defined(_WIN32) || defined(_WIN64)
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ return si.dwAllocationGranularity;
+#else
+ return sysconf(_SC_PAGE_SIZE);
+#endif
+}
+
+scoped_mmap::~scoped_mmap() {
+ if (data_ != (void*)-1) {
+ try {
+ // Thanks Denis Filimonov for pointing out NFS likes msync first.
+ SyncOrThrow(data_, size_);
+ UnmapOrThrow(data_, size_);
+ } catch (const util::ErrnoException &e) {
+ std::cerr << e.what();
+ abort();
+ }
+ }
+}
+
+namespace {
+template <class T> T RoundUpPow2(T value, T mult) {
+ return ((value - 1) & ~(mult - 1)) + mult;
+}
+} // namespace
+
+scoped_memory::scoped_memory(std::size_t size, bool zeroed) : data_(NULL), size_(0), source_(NONE_ALLOCATED) {
+ HugeMalloc(size, zeroed, *this);
+}
+
+void scoped_memory::reset(void *data, std::size_t size, Alloc source) {
+ switch(source_) {
+ case MMAP_ROUND_UP_ALLOCATED:
+ scoped_mmap(data_, RoundUpPow2(size_, (std::size_t)SizePage()));
+ break;
+ case MMAP_ALLOCATED:
+ scoped_mmap(data_, size_);
+ break;
+ case MALLOC_ALLOCATED:
+ free(data_);
+ break;
+ case NONE_ALLOCATED:
+ break;
+ }
+ data_ = data;
+ size_ = size;
+ source_ = source;
+}
+
+/*void scoped_memory::call_realloc(std::size_t size) {
+ assert(source_ == MALLOC_ALLOCATED || source_ == NONE_ALLOCATED);
+ void *new_data = realloc(data_, size);
+ if (!new_data) {
+ reset();
+ } else {
+ data_ = new_data;
+ size_ = size;
+ source_ = MALLOC_ALLOCATED;
+ }
+}*/
+
+const int kFileFlags =
+#if defined(_WIN32) || defined(_WIN64)
+ 0 // MapOrThrow ignores flags on windows
+#elif defined(MAP_FILE)
+ MAP_FILE | MAP_SHARED
+#else
+ MAP_SHARED
+#endif
+ ;
+
+void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset) {
+#ifdef MAP_POPULATE // Linux specific
+ if (prefault) {
+ flags |= MAP_POPULATE;
+ }
+#endif
+#if defined(_WIN32) || defined(_WIN64)
+ int protectC = for_write ? PAGE_READWRITE : PAGE_READONLY;
+ int protectM = for_write ? FILE_MAP_WRITE : FILE_MAP_READ;
+ uint64_t total_size = size + offset;
+ HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, total_size >> 32, static_cast<DWORD>(total_size), NULL);
+ UTIL_THROW_IF(!hMapping, ErrnoException, "CreateFileMapping failed");
+ LPVOID ret = MapViewOfFile(hMapping, protectM, offset >> 32, offset, size);
+ CloseHandle(hMapping);
+ UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed");
+#else
+ int protect = for_write ? (PROT_READ | PROT_WRITE) : PROT_READ;
+ void *ret;
+ UTIL_THROW_IF((ret = mmap(NULL, size, protect, flags, fd, offset)) == MAP_FAILED, ErrnoException, "mmap failed for size " << size << " at offset " << offset);
+# ifdef MADV_HUGEPAGE
+ /* We like huge pages but it's fine if we can't have them. Note that huge
+ * pages are not supported for file-backed mmap on linux.
+ */
+ madvise(ret, size, MADV_HUGEPAGE);
+# endif
+#endif
+ return ret;
+}
+
+void SyncOrThrow(void *start, size_t length) {
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap");
+#else
+ UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap");
+#endif
+}
+
+void UnmapOrThrow(void *start, size_t length) {
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
+#else
+ UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed");
+#endif
+}
+
+// Linux huge pages.
+#ifdef __linux__
+
+namespace {
+
+bool AnonymousMap(std::size_t size, int flags, bool populate, util::scoped_memory &to) {
+ if (populate) flags |= MAP_POPULATE;
+ void *ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | flags, -1, 0);
+ if (ret == MAP_FAILED) return false;
+ to.reset(ret, size, scoped_memory::MMAP_ALLOCATED);
+ return true;
+}
+
+bool TryHuge(std::size_t size, uint8_t alignment_bits, bool populate, util::scoped_memory &to) {
+ // Don't bother with these cases.
+ if (size < (1ULL << alignment_bits) || (1ULL << alignment_bits) < SizePage())
+ return false;
+
+ // First try: Linux >= 3.8 with manually configured hugetlb pages available.
+#ifdef MAP_HUGE_SHIFT
+ if (AnonymousMap(size, MAP_HUGETLB | (alignment_bits << MAP_HUGE_SHIFT), populate, to))
+ return true;
+#endif
+
+ // Second try: manually configured hugetlb pages exist, but kernel too old to
+ // pick size or not available. This might pick the wrong size huge pages,
+ // but the sysadmin must have made them available in the first place.
+ if (AnonymousMap(size, MAP_HUGETLB, populate, to))
+ return true;
+
+ // Third try: align to a multiple of the huge page size by overallocating.
+ // I feel bad about doing this, but it's also how posix_memalign is
+ // implemented. And the memory is virtual.
+
+ // Round up requested size to multiple of page size. This will allow the pages after to be munmapped.
+ std::size_t size_up = RoundUpPow2(size, SizePage());
+
+ std::size_t ask = size_up + (1 << alignment_bits) - SizePage();
+ // Don't populate because this is asking for more than we will use.
+ scoped_mmap larger(mmap(NULL, ask, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), ask);
+ if (larger.get() == MAP_FAILED) return false;
+
+ // Throw out pages before the alignment point.
+ uintptr_t base = reinterpret_cast<uintptr_t>(larger.get());
+ // Round up to next multiple of alignment.
+ uintptr_t rounded_up = RoundUpPow2(base, static_cast<uintptr_t>(1) << alignment_bits);
+ if (base != rounded_up) {
+ // If this throws an exception (which it shouldn't) then we want to unmap the whole thing by keeping it in larger.
+ UnmapOrThrow(larger.get(), rounded_up - base);
+ larger.steal();
+ larger.reset(reinterpret_cast<void*>(rounded_up), ask - (rounded_up - base));
+ }
+
+ // Throw out pages after the requested size.
+ assert(larger.size() >= size_up);
+ if (larger.size() > size_up) {
+ // This is where we assume size_up is a multiple of page size.
+ UnmapOrThrow(static_cast<uint8_t*>(larger.get()) + size_up, larger.size() - size_up);
+ larger.reset(larger.steal(), size_up);
+ }
+#ifdef MADV_HUGEPAGE
+ madvise(larger.get(), size_up, MADV_HUGEPAGE);
+#endif
+ to.reset(larger.steal(), size, scoped_memory::MMAP_ROUND_UP_ALLOCATED);
+ return true;
+}
+
+} // namespace
+
+#endif
+
+void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to) {
+ to.reset();
+#ifdef __linux__
+ // TODO: architectures/page sizes other than 2^21 and 2^30.
+ // Attempt 1 GB pages.
+ // If the user asked for zeroed memory, assume they want it populated.
+ if (size >= (1ULL << 30) && TryHuge(size, 30, zeroed, to))
+ return;
+ // Attempt 2 MB pages.
+ if (size >= (1ULL << 21) && TryHuge(size, 21, zeroed, to))
+ return;
+#endif // __linux__
+ // Non-linux will always do this, as will small allocations on Linux.
+ to.reset(zeroed ? calloc(1, size) : malloc(size), size, scoped_memory::MALLOC_ALLOCATED);
+ UTIL_THROW_IF(!to.get(), ErrnoException, "Failed to allocate " << size << " bytes");
+}
+
+#ifdef __linux__
+const std::size_t kTransitionHuge = std::max<std::size_t>(1ULL << 21, SizePage());
+#endif // __linux__
+
+void HugeRealloc(std::size_t to, bool zero_new, scoped_memory &mem) {
+ if (!to) {
+ mem.reset();
+ return;
+ }
+ std::size_t from_size = mem.size();
+ switch (mem.source()) {
+ case scoped_memory::NONE_ALLOCATED:
+ HugeMalloc(to, zero_new, mem);
+ return;
+#ifdef __linux__
+ case scoped_memory::MMAP_ROUND_UP_ALLOCATED:
+ // for mremap's benefit.
+ from_size = RoundUpPow2(from_size, SizePage());
+ case scoped_memory::MMAP_ALLOCATED:
+ // Downsizing below barrier?
+ if (to <= SizePage()) {
+ scoped_malloc replacement(malloc(to));
+ memcpy(replacement.get(), mem.get(), std::min(to, mem.size()));
+ if (zero_new && to > mem.size())
+ memset(static_cast<uint8_t*>(replacement.get()) + mem.size(), 0, to - mem.size());
+ mem.reset(replacement.release(), to, scoped_memory::MALLOC_ALLOCATED);
+ } else {
+ void *new_addr = mremap(mem.get(), from_size, to, MREMAP_MAYMOVE);
+ UTIL_THROW_IF(!new_addr, ErrnoException, "Failed to mremap from " << from_size << " to " << to);
+ mem.steal();
+ mem.reset(new_addr, to, scoped_memory::MMAP_ALLOCATED);
+ }
+ return;
+#endif // __linux__
+ case scoped_memory::MALLOC_ALLOCATED:
+#ifdef __linux__
+ // Transition larger allocations to huge pages, but don't keep trying if we're still malloc allocated.
+ if (to >= kTransitionHuge && mem.size() < kTransitionHuge) {
+ scoped_memory replacement;
+ HugeMalloc(to, zero_new, replacement);
+ memcpy(replacement.get(), mem.get(), mem.size());
+ // This can't throw.
+ mem.reset(replacement.get(), replacement.size(), replacement.source());
+ replacement.steal();
+ return;
+ }
+#endif // __linux__
+ {
+ void *new_addr = std::realloc(mem.get(), to);
+ UTIL_THROW_IF(!new_addr, ErrnoException, "realloc to " << to << " bytes failed.");
+ if (zero_new && to > mem.size())
+ memset(static_cast<uint8_t*>(new_addr) + mem.size(), 0, to - mem.size());
+ mem.steal();
+ mem.reset(new_addr, to, scoped_memory::MALLOC_ALLOCATED);
+ }
+ return;
+ default:
+ UTIL_THROW(Exception, "HugeRealloc called with type " << mem.source());
+ }
+}
+
+void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out) {
+ switch (method) {
+ case LAZY:
+ out.reset(MapOrThrow(size, false, kFileFlags, false, fd, offset), size, scoped_memory::MMAP_ALLOCATED);
+ break;
+ case POPULATE_OR_LAZY:
+#ifdef MAP_POPULATE
+ case POPULATE_OR_READ:
+#endif
+ out.reset(MapOrThrow(size, false, kFileFlags, true, fd, offset), size, scoped_memory::MMAP_ALLOCATED);
+ break;
+#ifndef MAP_POPULATE
+ case POPULATE_OR_READ:
+#endif
+ case READ:
+ HugeMalloc(size, false, out);
+ SeekOrThrow(fd, offset);
+ ReadOrThrow(fd, out.get(), size);
+ break;
+ case PARALLEL_READ:
+ HugeMalloc(size, false, out);
+ ParallelRead(fd, out.get(), size, offset);
+ break;
+ }
+}
+
+void *MapZeroedWrite(int fd, std::size_t size) {
+ ResizeOrThrow(fd, 0);
+ ResizeOrThrow(fd, size);
+ return MapOrThrow(size, true, kFileFlags, false, fd, 0);
+}
+
+void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) {
+ file.reset(CreateOrThrow(name));
+ try {
+ return MapZeroedWrite(file.get(), size);
+ } catch (ErrnoException &e) {
+ e << " in file " << name;
+ throw;
+ }
+}
+
+Rolling::Rolling(const Rolling ©_from, uint64_t increase) {
+ *this = copy_from;
+ IncreaseBase(increase);
+}
+
+Rolling &Rolling::operator=(const Rolling ©_from) {
+ fd_ = copy_from.fd_;
+ file_begin_ = copy_from.file_begin_;
+ file_end_ = copy_from.file_end_;
+ for_write_ = copy_from.for_write_;
+ block_ = copy_from.block_;
+ read_bound_ = copy_from.read_bound_;
+
+ current_begin_ = 0;
+ if (copy_from.IsPassthrough()) {
+ current_end_ = copy_from.current_end_;
+ ptr_ = copy_from.ptr_;
+ } else {
+ // Force call on next mmap.
+ current_end_ = 0;
+ ptr_ = NULL;
+ }
+ return *this;
+}
+
+Rolling::Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount) {
+ current_begin_ = 0;
+ current_end_ = 0;
+ fd_ = fd;
+ file_begin_ = offset;
+ file_end_ = offset + amount;
+ for_write_ = for_write;
+ block_ = block;
+ read_bound_ = read_bound;
+}
+
+void *Rolling::ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size) {
+ out.reset();
+ if (IsPassthrough()) return static_cast<uint8_t*>(get()) + index;
+ uint64_t offset = index + file_begin_;
+ // Round down to multiple of page size.
+ uint64_t cruft = offset % static_cast<uint64_t>(SizePage());
+ std::size_t map_size = static_cast<std::size_t>(size + cruft);
+ out.reset(MapOrThrow(map_size, for_write_, kFileFlags, true, fd_, offset - cruft), map_size, scoped_memory::MMAP_ALLOCATED);
+ return static_cast<uint8_t*>(out.get()) + static_cast<std::size_t>(cruft);
+}
+
+void Rolling::Roll(uint64_t index) {
+ assert(!IsPassthrough());
+ std::size_t amount;
+ if (file_end_ - (index + file_begin_) > static_cast<uint64_t>(block_)) {
+ amount = block_;
+ current_end_ = index + amount - read_bound_;
+ } else {
+ amount = file_end_ - (index + file_begin_);
+ current_end_ = index + amount;
+ }
+ ptr_ = static_cast<uint8_t*>(ExtractNonRolling(mem_, index, amount)) - index;
+
+ current_begin_ = index;
+}
+
+} // namespace util
diff --git a/src/kenlm/util/mmap.hh b/src/kenlm/util/mmap.hh
new file mode 100644
index 0000000..b474dc7
--- /dev/null
+++ b/src/kenlm/util/mmap.hh
@@ -0,0 +1,225 @@
+#ifndef UTIL_MMAP_H
+#define UTIL_MMAP_H
+// Utilities for mmaped files.
+
+#include <cstddef>
+#include <limits>
+
+#include <stdint.h>
+#include <sys/types.h>
+
+namespace util {
+
+class scoped_fd;
+
+std::size_t SizePage();
+
+// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
+class scoped_mmap {
+ public:
+ scoped_mmap() : data_((void*)-1), size_(0) {}
+ scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {}
+ ~scoped_mmap();
+
+ void *get() const { return data_; }
+
+ const uint8_t *begin() const { return reinterpret_cast<uint8_t*>(data_); }
+ const uint8_t *end() const { return reinterpret_cast<uint8_t*>(data_) + size_; }
+ std::size_t size() const { return size_; }
+
+ void reset(void *data, std::size_t size) {
+ scoped_mmap other(data_, size_);
+ data_ = data;
+ size_ = size;
+ }
+
+ void reset() {
+ reset((void*)-1, 0);
+ }
+
+ void *steal() {
+ void *ret = data_;
+ data_ = (void*)-1;
+ size_ = 0;
+ return ret;
+ }
+
+ private:
+ void *data_;
+ std::size_t size_;
+
+ scoped_mmap(const scoped_mmap &);
+ scoped_mmap &operator=(const scoped_mmap &);
+};
+
+/* For when the memory might come from mmap, new char[], or malloc. Uses NULL
+ * and 0 for blanks even though mmap signals errors with (void*)-1). The reset
+ * function checks that blank for mmap.
+ */
+class scoped_memory {
+ public:
+ typedef enum {
+ MMAP_ROUND_UP_ALLOCATED, // The size was rounded up to a multiple of page size. Do the same before munmap.
+ MMAP_ALLOCATED, // munmap
+ MALLOC_ALLOCATED, // free
+ NONE_ALLOCATED // nothing here!
+ } Alloc;
+
+ scoped_memory(void *data, std::size_t size, Alloc source)
+ : data_(data), size_(size), source_(source) {}
+
+ scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
+
+ // Calls HugeMalloc
+ scoped_memory(std::size_t to, bool zero_new);
+
+ ~scoped_memory() { reset(); }
+
+ void *get() const { return data_; }
+ const char *begin() const { return reinterpret_cast<char*>(data_); }
+ const char *end() const { return reinterpret_cast<char*>(data_) + size_; }
+ std::size_t size() const { return size_; }
+
+ Alloc source() const { return source_; }
+
+ void reset() { reset(NULL, 0, NONE_ALLOCATED); }
+
+ void reset(void *data, std::size_t size, Alloc from);
+
+ void *steal() {
+ void *ret = data_;
+ data_ = NULL;
+ size_ = 0;
+ source_ = NONE_ALLOCATED;
+ return ret;
+ }
+
+ private:
+ void *data_;
+ std::size_t size_;
+
+ Alloc source_;
+
+ scoped_memory(const scoped_memory &);
+ scoped_memory &operator=(const scoped_memory &);
+};
+
+extern const int kFileFlags;
+
+// Cross-platform, error-checking wrapper for mmap().
+void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0);
+
+// msync wrapper
+void SyncOrThrow(void *start, size_t length);
+
+// Cross-platform, error-checking wrapper for munmap().
+void UnmapOrThrow(void *start, size_t length);
+
+// Allocate memory, promising that all/vast majority of it will be used. Tries
+// hard to use huge pages on Linux.
+// If you want zeroed memory, pass zeroed = true.
+void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to);
+
+// Reallocates memory ala realloc but with option to zero the new memory.
+// On Linux, the memory can come from anonymous mmap or malloc/calloc.
+// On non-Linux, only malloc/calloc is supported.
+//
+// To summarize, any memory from HugeMalloc or HugeRealloc can be resized with
+// this.
+void HugeRealloc(std::size_t size, bool new_zeroed, scoped_memory &mem);
+
+typedef enum {
+ // mmap with no prepopulate
+ LAZY,
+ // On linux, pass MAP_POPULATE to mmap.
+ POPULATE_OR_LAZY,
+ // Populate on Linux. malloc and read on non-Linux.
+ POPULATE_OR_READ,
+ // malloc and read.
+ READ,
+ // malloc and read in parallel (recommended for Lustre)
+ PARALLEL_READ,
+} LoadMethod;
+
+void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out);
+
+// Open file name with mmap of size bytes, all of which are initially zero.
+void *MapZeroedWrite(int fd, std::size_t size);
+void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
+
+// Forward rolling memory map with no overlap.
+class Rolling {
+ public:
+ Rolling() {}
+
+ explicit Rolling(void *data) { Init(data); }
+
+ Rolling(const Rolling ©_from, uint64_t increase = 0);
+ Rolling &operator=(const Rolling ©_from);
+
+ // For an actual rolling mmap.
+ explicit Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount);
+
+ // For a static mapping
+ void Init(void *data) {
+ ptr_ = data;
+ current_end_ = std::numeric_limits<uint64_t>::max();
+ current_begin_ = 0;
+ // Mark as a pass-through.
+ fd_ = -1;
+ }
+
+ void IncreaseBase(uint64_t by) {
+ file_begin_ += by;
+ ptr_ = static_cast<uint8_t*>(ptr_) + by;
+ if (!IsPassthrough()) current_end_ = 0;
+ }
+
+ void DecreaseBase(uint64_t by) {
+ file_begin_ -= by;
+ ptr_ = static_cast<uint8_t*>(ptr_) - by;
+ if (!IsPassthrough()) current_end_ = 0;
+ }
+
+ void *ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size);
+
+ // Returns base pointer
+ void *get() const { return ptr_; }
+
+ // Returns base pointer.
+ void *CheckedBase(uint64_t index) {
+ if (index >= current_end_ || index < current_begin_) {
+ Roll(index);
+ }
+ return ptr_;
+ }
+
+ // Returns indexed pointer.
+ void *CheckedIndex(uint64_t index) {
+ return static_cast<uint8_t*>(CheckedBase(index)) + index;
+ }
+
+ private:
+ void Roll(uint64_t index);
+
+ // True if this is just a thin wrapper on a pointer.
+ bool IsPassthrough() const { return fd_ == -1; }
+
+ void *ptr_;
+ uint64_t current_begin_;
+ uint64_t current_end_;
+
+ scoped_memory mem_;
+
+ int fd_;
+ uint64_t file_begin_;
+ uint64_t file_end_;
+
+ bool for_write_;
+ std::size_t block_;
+ std::size_t read_bound_;
+};
+
+} // namespace util
+
+#endif // UTIL_MMAP_H
diff --git a/src/kenlm/util/multi_intersection.hh b/src/kenlm/util/multi_intersection.hh
new file mode 100644
index 0000000..7395460
--- /dev/null
+++ b/src/kenlm/util/multi_intersection.hh
@@ -0,0 +1,80 @@
+#ifndef UTIL_MULTI_INTERSECTION_H
+#define UTIL_MULTI_INTERSECTION_H
+
+#include <boost/optional.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <vector>
+
+namespace util {
+
+namespace detail {
+template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
+ bool operator()(const Range &left, const Range &right) const {
+ return left.size() < right.size();
+ }
+};
+
+/* Takes sets specified by their iterators and a boost::optional containing
+ * the lowest intersection if any. Each set must be sorted in increasing
+ * order. sets is changed to truncate the beginning of each sequence to the
+ * location of the match or an empty set. Precondition: sets is not empty
+ * since the intersection over null is the universe and this function does not
+ * know the universe.
+ */
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
+ typedef std::vector<boost::iterator_range<Iterator> > Sets;
+ typedef typename std::iterator_traits<Iterator>::value_type Value;
+
+ assert(!sets.empty());
+
+ if (sets.front().empty()) return boost::optional<Value>();
+ // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
+ Value highest(sets.front().front());
+ for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
+ i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
+ if (i->empty()) return boost::optional<Value>();
+ if (less(highest, i->front())) {
+ highest = i->front();
+ // start over
+ i = sets.begin();
+ } else {
+ ++i;
+ }
+ }
+ return boost::optional<Value>(highest);
+}
+
+} // namespace detail
+
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
+ assert(!sets.empty());
+
+ std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+ return detail::FirstIntersectionSorted(sets, less);
+}
+
+template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
+ return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
+ typedef typename std::iterator_traits<Iterator>::value_type Value;
+ assert(!sets.empty());
+
+ std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+ boost::optional<Value> ret;
+ for (boost::optional<Value> ret; (ret = detail::FirstIntersectionSorted(sets, less)); sets.front().advance_begin(1)) {
+ out(*ret);
+ }
+}
+
+template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
+ AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+} // namespace util
+
+#endif // UTIL_MULTI_INTERSECTION_H
diff --git a/src/kenlm/util/multi_intersection_test.cc b/src/kenlm/util/multi_intersection_test.cc
new file mode 100644
index 0000000..ee5af7d
--- /dev/null
+++ b/src/kenlm/util/multi_intersection_test.cc
@@ -0,0 +1,63 @@
+#include "util/multi_intersection.hh"
+
+#define BOOST_TEST_MODULE MultiIntersectionTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace {
+
+BOOST_AUTO_TEST_CASE(Empty) {
+ std::vector<boost::iterator_range<const unsigned int*> > sets;
+
+ sets.push_back(boost::iterator_range<const unsigned int*>(static_cast<const unsigned int*>(NULL), static_cast<const unsigned int*>(NULL)));
+ BOOST_CHECK(!FirstIntersection(sets));
+}
+
+BOOST_AUTO_TEST_CASE(Single) {
+ std::vector<unsigned int> nums;
+ nums.push_back(1);
+ nums.push_back(4);
+ nums.push_back(100);
+ std::vector<boost::iterator_range<std::vector<unsigned int>::const_iterator> > sets;
+ sets.push_back(nums);
+
+ boost::optional<unsigned int> ret(FirstIntersection(sets));
+
+ BOOST_REQUIRE(ret);
+ BOOST_CHECK_EQUAL(static_cast<unsigned int>(1), *ret);
+}
+
+template <class T, unsigned int len> boost::iterator_range<const T*> RangeFromArray(const T (&arr)[len]) {
+ return boost::iterator_range<const T*>(arr, arr + len);
+}
+
+BOOST_AUTO_TEST_CASE(MultiNone) {
+ unsigned int nums0[] = {1, 3, 4, 22};
+ unsigned int nums1[] = {2, 5, 12};
+ unsigned int nums2[] = {4, 17};
+
+ std::vector<boost::iterator_range<const unsigned int*> > sets;
+ sets.push_back(RangeFromArray(nums0));
+ sets.push_back(RangeFromArray(nums1));
+ sets.push_back(RangeFromArray(nums2));
+
+ BOOST_CHECK(!FirstIntersection(sets));
+}
+
+BOOST_AUTO_TEST_CASE(MultiOne) {
+ unsigned int nums0[] = {1, 3, 4, 17, 22};
+ unsigned int nums1[] = {2, 5, 12, 17};
+ unsigned int nums2[] = {4, 17};
+
+ std::vector<boost::iterator_range<const unsigned int*> > sets;
+ sets.push_back(RangeFromArray(nums0));
+ sets.push_back(RangeFromArray(nums1));
+ sets.push_back(RangeFromArray(nums2));
+
+ boost::optional<unsigned int> ret(FirstIntersection(sets));
+ BOOST_REQUIRE(ret);
+ BOOST_CHECK_EQUAL(static_cast<unsigned int>(17), *ret);
+}
+
+} // namespace
+} // namespace util
diff --git a/src/kenlm/util/murmur_hash.cc b/src/kenlm/util/murmur_hash.cc
new file mode 100644
index 0000000..bf32498
--- /dev/null
+++ b/src/kenlm/util/murmur_hash.cc
@@ -0,0 +1,175 @@
+/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
+ * code is released to the public domain. For business purposes, Murmurhash is
+ * under the MIT license."
+ * This is modified from the original:
+ * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
+ * length changed to unsigned int.
+ * placed in namespace util
+ * add MurmurHashNative
+ * default option = 0 for seed
+ * ARM port from NICT
+ */
+
+#include "util/murmur_hash.hh"
+#include <cstring>
+
+namespace util {
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t seed )
+{
+ const uint64_t m = 0xc6a4a7935bd1e995ULL;
+ const int r = 47;
+
+ uint64_t h = seed ^ (len * m);
+
+#if defined(__arm) || defined(__arm__)
+ const size_t ksize = sizeof(uint64_t);
+ const unsigned char * data = (const unsigned char *)key;
+ const unsigned char * end = data + (std::size_t)(len/8) * ksize;
+#else
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len/8);
+#endif
+
+ while(data != end)
+ {
+#if defined(__arm) || defined(__arm__)
+ uint64_t k;
+ memcpy(&k, data, ksize);
+ data += ksize;
+#else
+ uint64_t k = *data++;
+#endif
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char * data2 = (const unsigned char*)data;
+
+ switch(len & 7)
+ {
+ case 7: h ^= uint64_t(data2[6]) << 48;
+ case 6: h ^= uint64_t(data2[5]) << 40;
+ case 5: h ^= uint64_t(data2[4]) << 32;
+ case 4: h ^= uint64_t(data2[3]) << 24;
+ case 3: h ^= uint64_t(data2[2]) << 16;
+ case 2: h ^= uint64_t(data2[1]) << 8;
+ case 1: h ^= uint64_t(data2[0]);
+ h *= m;
+ };
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t seed )
+{
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ unsigned int h1 = seed ^ len;
+ unsigned int h2 = 0;
+
+#if defined(__arm) || defined(__arm__)
+ size_t ksize = sizeof(unsigned int);
+ const unsigned char * data = (const unsigned char *)key;
+#else
+ const unsigned int * data = (const unsigned int *)key;
+#endif
+
+ unsigned int k1, k2;
+ while(len >= 8)
+ {
+#if defined(__arm) || defined(__arm__)
+ memcpy(&k1, data, ksize);
+ data += ksize;
+ memcpy(&k2, data, ksize);
+ data += ksize;
+#else
+ k1 = *data++;
+ k2 = *data++;
+#endif
+
+ k1 *= m; k1 ^= k1 >> r; k1 *= m;
+ h1 *= m; h1 ^= k1;
+ len -= 4;
+
+ k2 *= m; k2 ^= k2 >> r; k2 *= m;
+ h2 *= m; h2 ^= k2;
+ len -= 4;
+ }
+
+ if(len >= 4)
+ {
+#if defined(__arm) || defined(__arm__)
+ memcpy(&k1, data, ksize);
+ data += ksize;
+#else
+ k1 = *data++;
+#endif
+ k1 *= m; k1 ^= k1 >> r; k1 *= m;
+ h1 *= m; h1 ^= k1;
+ len -= 4;
+ }
+
+ switch(len)
+ {
+ case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+ case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+ case 1: h2 ^= ((unsigned char*)data)[0];
+ h2 *= m;
+ };
+
+ h1 ^= h2 >> 18; h1 *= m;
+ h2 ^= h1 >> 22; h2 *= m;
+ h1 ^= h2 >> 17; h1 *= m;
+ h2 ^= h1 >> 19; h2 *= m;
+
+ uint64_t h = h1;
+
+ h = (h << 32) | h2;
+
+ return h;
+}
+
+// Trick to test for 64-bit architecture at compile time.
+namespace {
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+template <unsigned L> inline uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, uint64_t seed) {
+ return MurmurHash64A(key, len, seed);
+}
+template <> inline uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, uint64_t seed) {
+ return MurmurHash64B(key, len, seed);
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+} // namespace
+
+uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed) {
+ return MurmurHashNativeBackend<sizeof(void*)>(key, len, seed);
+}
+
+} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/murmur_hash.hh b/src/kenlm/util/murmur_hash.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/murmur_hash.hh
rename to src/kenlm/util/murmur_hash.hh
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/parallel_read.cc b/src/kenlm/util/parallel_read.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/parallel_read.cc
rename to src/kenlm/util/parallel_read.cc
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/parallel_read.hh b/src/kenlm/util/parallel_read.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/parallel_read.hh
rename to src/kenlm/util/parallel_read.hh
diff --git a/src/kenlm/util/pcqueue.hh b/src/kenlm/util/pcqueue.hh
new file mode 100644
index 0000000..05c868f
--- /dev/null
+++ b/src/kenlm/util/pcqueue.hh
@@ -0,0 +1,156 @@
+#ifndef UTIL_PCQUEUE_H
+#define UTIL_PCQUEUE_H
+
+#include "util/exception.hh"
+
+#include <boost/interprocess/sync/interprocess_semaphore.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/utility.hpp>
+
+#include <cerrno>
+
+#ifdef __APPLE__
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/mach_traps.h>
+#include <mach/mach.h>
+#endif // __APPLE__
+
+namespace util {
+
+/* OS X Maverick and Boost interprocess were doing "Function not implemented."
+ * So this is my own wrapper around the mach kernel APIs.
+ */
+#ifdef __APPLE__
+
+#define MACH_CALL(call) UTIL_THROW_IF(KERN_SUCCESS != (call), Exception, "Mach call failure")
+
+class Semaphore {
+ public:
+ explicit Semaphore(int value) : task_(mach_task_self()) {
+ MACH_CALL(semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value));
+ }
+
+ ~Semaphore() {
+ MACH_CALL(semaphore_destroy(task_, back_));
+ }
+
+ void wait() {
+ MACH_CALL(semaphore_wait(back_));
+ }
+
+ void post() {
+ MACH_CALL(semaphore_signal(back_));
+ }
+
+ private:
+ semaphore_t back_;
+ task_t task_;
+};
+
+inline void WaitSemaphore(Semaphore &semaphore) {
+ semaphore.wait();
+}
+
+#else
+typedef boost::interprocess::interprocess_semaphore Semaphore;
+
+inline void WaitSemaphore (Semaphore &on) {
+ while (1) {
+ try {
+ on.wait();
+ break;
+ }
+ catch (boost::interprocess::interprocess_exception &e) {
+ if (e.get_native_error() != EINTR) {
+ throw;
+ }
+ }
+ }
+}
+
+#endif // __APPLE__
+
+/**
+ * Producer consumer queue safe for multiple producers and multiple consumers.
+ * T must be default constructable and have operator=.
+ * The value is copied twice for Consume(T &out) or three times for Consume(),
+ * so larger objects should be passed via pointer.
+ * Strong exception guarantee if operator= throws. Undefined if semaphores throw.
+ */
+template <class T> class PCQueue : boost::noncopyable {
+ public:
+ explicit PCQueue(size_t size)
+ : empty_(size), used_(0),
+ storage_(new T[size]),
+ end_(storage_.get() + size),
+ produce_at_(storage_.get()),
+ consume_at_(storage_.get()) {}
+
+ // Add a value to the queue.
+ void Produce(const T &val) {
+ WaitSemaphore(empty_);
+ {
+ boost::unique_lock<boost::mutex> produce_lock(produce_at_mutex_);
+ try {
+ *produce_at_ = val;
+ }
+ catch (...) {
+ empty_.post();
+ throw;
+ }
+ if (++produce_at_ == end_) produce_at_ = storage_.get();
+ }
+ used_.post();
+ }
+
+ // Consume a value, assigning it to out.
+ T& Consume(T &out) {
+ WaitSemaphore(used_);
+ {
+ boost::unique_lock<boost::mutex> consume_lock(consume_at_mutex_);
+ try {
+ out = *consume_at_;
+ }
+ catch (...) {
+ used_.post();
+ throw;
+ }
+ if (++consume_at_ == end_) consume_at_ = storage_.get();
+ }
+ empty_.post();
+ return out;
+ }
+
+ // Convenience version of Consume that copies the value to return.
+ // The other version is faster.
+ T Consume() {
+ T ret;
+ Consume(ret);
+ return ret;
+ }
+
+ private:
+ // Number of empty spaces in storage_.
+ Semaphore empty_;
+ // Number of occupied spaces in storage_.
+ Semaphore used_;
+
+ boost::scoped_array<T> storage_;
+
+ T *const end_;
+
+ // Index for next write in storage_.
+ T *produce_at_;
+ boost::mutex produce_at_mutex_;
+
+ // Index for next read from storage_.
+ T *consume_at_;
+ boost::mutex consume_at_mutex_;
+
+};
+
+} // namespace util
+
+#endif // UTIL_PCQUEUE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/pcqueue_test.cc b/src/kenlm/util/pcqueue_test.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/pcqueue_test.cc
rename to src/kenlm/util/pcqueue_test.cc
diff --git a/src/kenlm/util/pool.cc b/src/kenlm/util/pool.cc
new file mode 100644
index 0000000..246417c
--- /dev/null
+++ b/src/kenlm/util/pool.cc
@@ -0,0 +1,38 @@
+#include "util/pool.hh"
+
+#include "util/scoped.hh"
+
+#include <cstdlib>
+
+#include <algorithm>
+
+namespace util {
+
+Pool::Pool() {
+ current_ = NULL;
+ current_end_ = NULL;
+}
+
+Pool::~Pool() {
+ FreeAll();
+}
+
+void Pool::FreeAll() {
+ for (std::vector<void *>::const_iterator i(free_list_.begin()); i != free_list_.end(); ++i) {
+ free(*i);
+ }
+ free_list_.clear();
+ current_ = NULL;
+ current_end_ = NULL;
+}
+
+void *Pool::More(std::size_t size) {
+ std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
+ uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
+ free_list_.push_back(ret);
+ current_ = ret + size;
+ current_end_ = ret + amount;
+ return ret;
+}
+
+} // namespace util
diff --git a/src/kenlm/util/pool.hh b/src/kenlm/util/pool.hh
new file mode 100644
index 0000000..511b6d9
--- /dev/null
+++ b/src/kenlm/util/pool.hh
@@ -0,0 +1,44 @@
+// Very simple pool. It can only allocate memory. And all of the memory it
+// allocates must be freed at the same time.
+
+#ifndef UTIL_POOL_H
+#define UTIL_POOL_H
+
+#include <vector>
+#include <stdint.h>
+
+namespace util {
+
+class Pool {
+ public:
+ Pool();
+
+ ~Pool();
+
+ void *Allocate(std::size_t size) {
+ void *ret = current_;
+ current_ += size;
+ if (current_ < current_end_) {
+ return ret;
+ } else {
+ return More(size);
+ }
+ }
+
+ void FreeAll();
+
+ private:
+ void *More(std::size_t size);
+
+ std::vector<void *> free_list_;
+
+ uint8_t *current_, *current_end_;
+
+ // no copying
+ Pool(const Pool &);
+ Pool &operator=(const Pool &);
+};
+
+} // namespace util
+
+#endif // UTIL_POOL_H
diff --git a/src/kenlm/util/probing_hash_table.hh b/src/kenlm/util/probing_hash_table.hh
new file mode 100644
index 0000000..438de92
--- /dev/null
+++ b/src/kenlm/util/probing_hash_table.hh
@@ -0,0 +1,421 @@
+#ifndef UTIL_PROBING_HASH_TABLE_H
+#define UTIL_PROBING_HASH_TABLE_H
+
+#include "util/exception.hh"
+#include "util/mmap.hh"
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <vector>
+
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+
+/* Thrown when table grows too large */
+class ProbingSizeException : public Exception {
+ public:
+ ProbingSizeException() throw() {}
+ ~ProbingSizeException() throw() {}
+};
+
+// std::identity is an SGI extension :-(
+struct IdentityHash {
+ template <class T> T operator()(T arg) const { return arg; }
+};
+
+class DivMod {
+ public:
+ explicit DivMod(std::size_t buckets) : buckets_(buckets) {}
+
+ static std::size_t RoundBuckets(std::size_t from) {
+ return from;
+ }
+
+ template <class It> It Ideal(It begin, uint64_t hash) const {
+ return begin + (hash % buckets_);
+ }
+
+ template <class BaseIt, class OutIt> void Next(BaseIt begin, BaseIt end, OutIt &it) const {
+ if (++it == end) it = begin;
+ }
+
+ void Double() {
+ buckets_ *= 2;
+ }
+
+ private:
+ std::size_t buckets_;
+};
+
+class Power2Mod {
+ public:
+ explicit Power2Mod(std::size_t buckets) {
+ UTIL_THROW_IF(!buckets || (((buckets - 1) & buckets)), ProbingSizeException, "Size " << buckets << " is not a power of 2.");
+ mask_ = buckets - 1;
+ }
+
+ // Round up to next power of 2.
+ static std::size_t RoundBuckets(std::size_t from) {
+ --from;
+ from |= from >> 1;
+ from |= from >> 2;
+ from |= from >> 4;
+ from |= from >> 8;
+ from |= from >> 16;
+ from |= from >> 32;
+ return from + 1;
+ }
+
+ template <class It> It Ideal(It begin, uint64_t hash) const {
+ return begin + (hash & mask_);
+ }
+
+ template <class BaseIt, class OutIt> void Next(BaseIt begin, BaseIt /*end*/, OutIt &it) const {
+ it = begin + ((it - begin + 1) & mask_);
+ }
+
+ void Double() {
+ mask_ = (mask_ << 1) | 1;
+ }
+
+ private:
+ std::size_t mask_;
+};
+
+template <class EntryT, class HashT, class EqualT> class AutoProbing;
+
+/* Non-standard hash table
+ * Buckets must be set at the beginning and must be greater than maximum number
+ * of elements, else it throws ProbingSizeException.
+ * Memory management and initialization is externalized to make it easier to
+ * serialize these to disk and load them quickly.
+ * Uses linear probing to find value.
+ * Only insert and lookup operations.
+ */
+template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key>, class ModT = DivMod> class ProbingHashTable {
+ public:
+ typedef EntryT Entry;
+ typedef typename Entry::Key Key;
+ typedef const Entry *ConstIterator;
+ typedef Entry *MutableIterator;
+ typedef HashT Hash;
+ typedef EqualT Equal;
+ typedef ModT Mod;
+
+ static uint64_t Size(uint64_t entries, float multiplier) {
+ uint64_t buckets = Mod::RoundBuckets(std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries))));
+ return buckets * sizeof(Entry);
+ }
+
+ // Must be assigned to later.
+ ProbingHashTable() : mod_(1), entries_(0)
+#ifdef DEBUG
+ , initialized_(false)
+#endif
+ {}
+
+ ProbingHashTable(void *start, std::size_t allocated, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal())
+ : begin_(reinterpret_cast<MutableIterator>(start)),
+ end_(begin_ + allocated / sizeof(Entry)),
+ buckets_(end_ - begin_),
+ invalid_(invalid),
+ hash_(hash_func),
+ equal_(equal_func),
+ mod_(end_ - begin_),
+ entries_(0)
+#ifdef DEBUG
+ , initialized_(true)
+#endif
+ {}
+
+ void Relocate(void *new_base) {
+ begin_ = reinterpret_cast<MutableIterator>(new_base);
+ end_ = begin_ + buckets_;
+ }
+
+ MutableIterator Ideal(const Key key) {
+ return mod_.Ideal(begin_, hash_(key));
+ }
+ ConstIterator Ideal(const Key key) const {
+ return mod_.Ideal(begin_, hash_(key));
+ }
+
+ template <class T> MutableIterator Insert(const T &t) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
+ return UncheckedInsert(t);
+ }
+
+ // Return true if the value was found (and not inserted). This is consistent with Find but the opposite of hash_map!
+ template <class T> bool FindOrInsert(const T &t, MutableIterator &out) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (MutableIterator i = Ideal(t.GetKey());;mod_.Next(begin_, end_, i)) {
+ Key got(i->GetKey());
+ if (equal_(got, t.GetKey())) { out = i; return true; }
+ if (equal_(got, invalid_)) {
+ UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
+ *i = t;
+ out = i;
+ return false;
+ }
+ }
+ }
+
+ void FinishedInserting() {}
+
+ // Don't change anything related to GetKey,
+ template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (MutableIterator i(Ideal(key));; mod_.Next(begin_, end_, i)) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { out = i; return true; }
+ if (equal_(got, invalid_)) return false;
+ }
+ }
+
+ // Like UnsafeMutableFind, but the key must be there.
+ template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
+ for (MutableIterator i(Ideal(key));; mod_.Next(begin_, end_, i)) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { return i; }
+ assert(!equal_(got, invalid_));
+ }
+ }
+
+ // Iterator is both input and output.
+ template <class Key> bool FindFromIdeal(const Key key, ConstIterator &i) const {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (;; mod_.Next(begin_, end_, i)) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) return true;
+ if (equal_(got, invalid_)) return false;
+ }
+ }
+
+ template <class Key> bool Find(const Key key, ConstIterator &out) const {
+ out = Ideal(key);
+ return FindFromIdeal(key, out);
+ }
+
+ // Like Find but we're sure it must be there.
+ template <class Key> ConstIterator MustFind(const Key key) const {
+ for (ConstIterator i(Ideal(key));; mod_.Next(begin_, end_, i)) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { return i; }
+ assert(!equal_(got, invalid_));
+ }
+ }
+
+ void Clear() {
+ Entry invalid;
+ invalid.SetKey(invalid_);
+ std::fill(begin_, end_, invalid);
+ entries_ = 0;
+ }
+
+ // Return number of entries assuming no serialization went on.
+ std::size_t SizeNoSerialization() const {
+ return entries_;
+ }
+
+ // Return memory size expected by Double.
+ std::size_t DoubleTo() const {
+ return buckets_ * 2 * sizeof(Entry);
+ }
+
+ // Inform the table that it has double the amount of memory.
+ // Pass clear_new = false if you are sure the new memory is initialized
+ // properly (to invalid_) i.e. by mremap.
+ void Double(void *new_base, bool clear_new = true) {
+ begin_ = static_cast<MutableIterator>(new_base);
+ MutableIterator old_end = begin_ + buckets_;
+ buckets_ *= 2;
+ end_ = begin_ + buckets_;
+ mod_.Double();
+ if (clear_new) {
+ Entry invalid;
+ invalid.SetKey(invalid_);
+ std::fill(old_end, end_, invalid);
+ }
+ std::vector<Entry> rolled_over;
+ // Move roll-over entries to a buffer because they might not roll over anymore. This should be small.
+ for (MutableIterator i = begin_; i != old_end && !equal_(i->GetKey(), invalid_); ++i) {
+ rolled_over.push_back(*i);
+ i->SetKey(invalid_);
+ }
+ /* Re-insert everything. Entries might go backwards to take over a
+ * recently opened gap, stay, move to new territory, or wrap around. If
+ * an entry wraps around, it might go to a pointer greater than i (which
+ * can happen at the beginning) and it will be revisited to possibly fill
+ * in a gap created later.
+ */
+ Entry temp;
+ for (MutableIterator i = begin_; i != old_end; ++i) {
+ if (!equal_(i->GetKey(), invalid_)) {
+ temp = *i;
+ i->SetKey(invalid_);
+ UncheckedInsert(temp);
+ }
+ }
+ // Put the roll-over entries back in.
+ for (typename std::vector<Entry>::const_iterator i(rolled_over.begin()); i != rolled_over.end(); ++i) {
+ UncheckedInsert(*i);
+ }
+ }
+
+ // Mostly for tests, check consistency of every entry.
+ void CheckConsistency() {
+ MutableIterator last;
+ for (last = end_ - 1; last >= begin_ && !equal_(last->GetKey(), invalid_); --last) {}
+ UTIL_THROW_IF(last == begin_, ProbingSizeException, "Completely full");
+ MutableIterator i;
+ // Beginning can be wrap-arounds.
+ for (i = begin_; !equal_(i->GetKey(), invalid_); ++i) {
+ MutableIterator ideal = Ideal(i->GetKey());
+ UTIL_THROW_IF(ideal > i && ideal <= last, Exception, "Inconsistency at position " << (i - begin_) << " should be at " << (ideal - begin_));
+ }
+ MutableIterator pre_gap = i;
+ for (; i != end_; ++i) {
+ if (equal_(i->GetKey(), invalid_)) {
+ pre_gap = i;
+ continue;
+ }
+ MutableIterator ideal = Ideal(i->GetKey());
+ UTIL_THROW_IF(ideal > i || ideal <= pre_gap, Exception, "Inconsistency at position " << (i - begin_) << " with ideal " << (ideal - begin_));
+ }
+ }
+
+ ConstIterator RawBegin() const {
+ return begin_;
+ }
+ ConstIterator RawEnd() const {
+ return end_;
+ }
+
+ private:
+ friend class AutoProbing<Entry, Hash, Equal>;
+
+ template <class T> MutableIterator UncheckedInsert(const T &t) {
+ for (MutableIterator i(Ideal(t.GetKey()));; mod_.Next(begin_, end_, i)) {
+ if (equal_(i->GetKey(), invalid_)) { *i = t; return i; }
+ }
+ }
+
+ MutableIterator begin_;
+ MutableIterator end_;
+ std::size_t buckets_;
+ Key invalid_;
+ Hash hash_;
+ Equal equal_;
+ Mod mod_;
+
+ std::size_t entries_;
+#ifdef DEBUG
+ bool initialized_;
+#endif
+};
+
+// Resizable linear probing hash table. This owns the memory.
+template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class AutoProbing {
+ private:
+ typedef ProbingHashTable<EntryT, HashT, EqualT, Power2Mod> Backend;
+ public:
+ static std::size_t MemUsage(std::size_t size, float multiplier = 1.5) {
+ return Backend::Size(size, multiplier);
+ }
+
+ typedef EntryT Entry;
+ typedef typename Entry::Key Key;
+ typedef const Entry *ConstIterator;
+ typedef Entry *MutableIterator;
+ typedef HashT Hash;
+ typedef EqualT Equal;
+
+ AutoProbing(std::size_t initial_size = 5, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal()) :
+ allocated_(Backend::Size(initial_size, 1.2)), mem_(allocated_, KeyIsRawZero(invalid)), backend_(mem_.get(), allocated_, invalid, hash_func, equal_func) {
+ threshold_ = std::min<std::size_t>(backend_.buckets_ - 1, backend_.buckets_ * 0.9);
+ if (!KeyIsRawZero(invalid)) {
+ Clear();
+ }
+ }
+
+ // Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup.
+ template <class T> MutableIterator Insert(const T &t) {
+ ++backend_.entries_;
+ DoubleIfNeeded();
+ return backend_.UncheckedInsert(t);
+ }
+
+ template <class T> bool FindOrInsert(const T &t, MutableIterator &out) {
+ DoubleIfNeeded();
+ return backend_.FindOrInsert(t, out);
+ }
+
+ template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
+ return backend_.UnsafeMutableFind(key, out);
+ }
+
+ template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
+ return backend_.UnsafeMutableMustFind(key);
+ }
+
+ template <class Key> bool Find(const Key key, ConstIterator &out) const {
+ return backend_.Find(key, out);
+ }
+
+ template <class Key> ConstIterator MustFind(const Key key) const {
+ return backend_.MustFind(key);
+ }
+
+ std::size_t Size() const {
+ return backend_.SizeNoSerialization();
+ }
+
+ void Clear() {
+ backend_.Clear();
+ }
+
+ ConstIterator RawBegin() const {
+ return backend_.RawBegin();
+ }
+ ConstIterator RawEnd() const {
+ return backend_.RawEnd();
+ }
+
+ private:
+ void DoubleIfNeeded() {
+ if (UTIL_LIKELY(Size() < threshold_))
+ return;
+ HugeRealloc(backend_.DoubleTo(), KeyIsRawZero(backend_.invalid_), mem_);
+ allocated_ = backend_.DoubleTo();
+ backend_.Double(mem_.get(), !KeyIsRawZero(backend_.invalid_));
+ threshold_ = std::min<std::size_t>(backend_.buckets_ - 1, backend_.buckets_ * 0.9);
+ }
+
+ bool KeyIsRawZero(const Key &key) {
+ for (const uint8_t *i = reinterpret_cast<const uint8_t*>(&key); i < reinterpret_cast<const uint8_t*>(&key) + sizeof(Key); ++i) {
+ if (*i) return false;
+ }
+ return true;
+ }
+
+ std::size_t allocated_;
+ util::scoped_memory mem_;
+ Backend backend_;
+ std::size_t threshold_;
+};
+
+} // namespace util
+
+#endif // UTIL_PROBING_HASH_TABLE_H
diff --git a/src/kenlm/util/probing_hash_table_benchmark_main.cc b/src/kenlm/util/probing_hash_table_benchmark_main.cc
new file mode 100644
index 0000000..583d21f
--- /dev/null
+++ b/src/kenlm/util/probing_hash_table_benchmark_main.cc
@@ -0,0 +1,181 @@
+#include "util/file.hh"
+#include "util/probing_hash_table.hh"
+#include "util/mmap.hh"
+#include "util/usage.hh"
+
+#include <iostream>
+
+namespace util {
+namespace {
+
+struct Entry {
+ typedef uint64_t Key;
+ Key key;
+ Key GetKey() const { return key; }
+};
+
+// I don't care if this doesn't run on Windows. Empirically /dev/urandom was faster than boost::random's Mersenne Twister.
+class URandom {
+ public:
+ URandom() :
+ it_(buf_ + 1024), end_(buf_ + 1024),
+ file_(util::OpenReadOrThrow("/dev/urandom")) {}
+
+ uint64_t Get() {
+ if (it_ == end_) {
+ it_ = buf_;
+ util::ReadOrThrow(file_.get(), buf_, sizeof(buf_));
+ it_ = buf_;
+ }
+ return *it_++;
+ }
+
+ void Batch(uint64_t *begin, uint64_t *end) {
+ util::ReadOrThrow(file_.get(), begin, (end - begin) * sizeof(uint64_t));
+ }
+
+ private:
+ uint64_t buf_[1024];
+ uint64_t *it_, *end_;
+
+ util::scoped_fd file_;
+};
+
+struct PrefetchEntry {
+ uint64_t key;
+ const Entry *pointer;
+};
+
+template <class TableT, unsigned PrefetchSize> class PrefetchQueue {
+ public:
+ typedef TableT Table;
+
+ explicit PrefetchQueue(Table &table) : table_(table), cur_(0), twiddle_(false) {
+ for (PrefetchEntry *i = entries_; i != entries_ + PrefetchSize; ++i)
+ i->pointer = NULL;
+ }
+
+ void Add(uint64_t key) {
+ if (Cur().pointer) {
+ twiddle_ ^= table_.FindFromIdeal(Cur().key, Cur().pointer);
+ }
+ Cur().key = key;
+ Cur().pointer = table_.Ideal(key);
+ __builtin_prefetch(Cur().pointer, 0, 0);
+ Next();
+ }
+
+ bool Drain() {
+ if (Cur().pointer) {
+ for (PrefetchEntry *i = &Cur(); i < entries_ + PrefetchSize; ++i) {
+ twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer);
+ }
+ }
+ for (PrefetchEntry *i = entries_; i < &Cur(); ++i) {
+ twiddle_ ^= table_.FindFromIdeal(i->key, i->pointer);
+ }
+ return twiddle_;
+ }
+
+ private:
+ PrefetchEntry &Cur() { return entries_[cur_]; }
+ void Next() {
+ ++cur_;
+ cur_ = cur_ % PrefetchSize;
+ }
+
+ Table &table_;
+ PrefetchEntry entries_[PrefetchSize];
+ std::size_t cur_;
+
+ bool twiddle_;
+
+ PrefetchQueue(const PrefetchQueue&);
+ void operator=(const PrefetchQueue&);
+};
+
+template <class TableT> class Immediate {
+ public:
+ typedef TableT Table;
+
+ explicit Immediate(Table &table) : table_(table), twiddle_(false) {}
+
+ void Add(uint64_t key) {
+ typename Table::ConstIterator it;
+ twiddle_ ^= table_.Find(key, it);
+ }
+
+ bool Drain() const { return twiddle_; }
+
+ private:
+ Table &table_;
+ bool twiddle_;
+};
+
+std::size_t Size(uint64_t entries, float multiplier = 1.5) {
+ typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Power2Mod> Table;
+ // Always round up to power of 2 for fair comparison.
+ return Power2Mod::RoundBuckets(Table::Size(entries, multiplier) / sizeof(Entry)) * sizeof(Entry);
+}
+
+template <class Queue> bool Test(URandom &rn, uint64_t entries, const uint64_t *const queries_begin, const uint64_t *const queries_end, bool ordinary_malloc, float multiplier = 1.5) {
+ std::size_t size = Size(entries, multiplier);
+ scoped_memory backing;
+ if (ordinary_malloc) {
+ backing.reset(util::CallocOrThrow(size), size, scoped_memory::MALLOC_ALLOCATED);
+ } else {
+ util::HugeMalloc(size, true, backing);
+ }
+ typename Queue::Table table(backing.get(), size);
+
+ double start = CPUTime();
+ for (uint64_t i = 0; i < entries; ++i) {
+ Entry entry;
+ entry.key = rn.Get();
+ table.Insert(entry);
+ }
+ double inserted = CPUTime() - start;
+ double before_lookup = CPUTime();
+ Queue queue(table);
+ for (const uint64_t *i = queries_begin; i != queries_end; ++i) {
+ queue.Add(*i);
+ }
+ bool meaningless = queue.Drain();
+ std::cout << ' ' << (inserted / static_cast<double>(entries)) << ' ' << (CPUTime() - before_lookup) / static_cast<double>(queries_end - queries_begin) << std::flush;
+ return meaningless;
+}
+
+bool TestRun(uint64_t lookups = 20000000, float multiplier = 1.5) {
+ URandom rn;
+ util::scoped_memory queries;
+ HugeMalloc(lookups * sizeof(uint64_t), true, queries);
+ rn.Batch(static_cast<uint64_t*>(queries.get()), static_cast<uint64_t*>(queries.get()) + lookups);
+ uint64_t physical_mem_limit = util::GuessPhysicalMemory() / 2;
+ bool meaningless = true;
+ for (uint64_t i = 4; Size(i / multiplier) < physical_mem_limit; i *= 4) {
+ std::cout << static_cast<std::size_t>(i / multiplier) << ' ' << Size(i / multiplier);
+ typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, Power2Mod> Table;
+ typedef util::ProbingHashTable<Entry, util::IdentityHash, std::equal_to<Entry::Key>, DivMod> TableDiv;
+ const uint64_t *const queries_begin = static_cast<const uint64_t*>(queries.get());
+ meaningless ^= util::Test<Immediate<TableDiv> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
+ meaningless ^= util::Test<Immediate<Table> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
+ meaningless ^= util::Test<PrefetchQueue<Table, 4> >(rn, i / multiplier, queries_begin, queries_begin + lookups, true, multiplier);
+ meaningless ^= util::Test<Immediate<Table> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
+ meaningless ^= util::Test<PrefetchQueue<Table, 2> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
+ meaningless ^= util::Test<PrefetchQueue<Table, 4> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
+ meaningless ^= util::Test<PrefetchQueue<Table, 8> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
+ meaningless ^= util::Test<PrefetchQueue<Table, 16> >(rn, i / multiplier, queries_begin, queries_begin + lookups, false, multiplier);
+ std::cout << std::endl;
+ }
+ return meaningless;
+}
+
+} // namespace
+} // namespace util
+
+int main() {
+ bool meaningless = false;
+ std::cout << "#CPU time\n";
+ meaningless ^= util::TestRun();
+ std::cerr << "Meaningless: " << meaningless << '\n';
+}
diff --git a/src/kenlm/util/probing_hash_table_test.cc b/src/kenlm/util/probing_hash_table_test.cc
new file mode 100644
index 0000000..6ed5414
--- /dev/null
+++ b/src/kenlm/util/probing_hash_table_test.cc
@@ -0,0 +1,102 @@
+#include "util/probing_hash_table.hh"
+
+#include "util/murmur_hash.hh"
+#include "util/scoped.hh"
+
+#define BOOST_TEST_MODULE ProbingHashTableTest
+#include <boost/test/unit_test.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/functional/hash.hpp>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <stdint.h>
+
+namespace util {
+namespace {
+
+struct Entry {
+ unsigned char key;
+ typedef unsigned char Key;
+
+ unsigned char GetKey() const {
+ return key;
+ }
+
+ void SetKey(unsigned char to) {
+ key = to;
+ }
+
+ uint64_t GetValue() const {
+ return value;
+ }
+
+ uint64_t value;
+};
+
+typedef ProbingHashTable<Entry, boost::hash<unsigned char> > Table;
+
+BOOST_AUTO_TEST_CASE(simple) {
+ size_t size = Table::Size(10, 1.2);
+ boost::scoped_array<char> mem(new char[size]);
+ memset(mem.get(), 0, size);
+
+ Table table(mem.get(), size);
+ const Entry *i = NULL;
+ BOOST_CHECK(!table.Find(2, i));
+ Entry to_ins;
+ to_ins.key = 3;
+ to_ins.value = 328920;
+ table.Insert(to_ins);
+ BOOST_REQUIRE(table.Find(3, i));
+ BOOST_CHECK_EQUAL(3, i->GetKey());
+ BOOST_CHECK_EQUAL(static_cast<uint64_t>(328920), i->GetValue());
+ BOOST_CHECK(!table.Find(2, i));
+}
+
+struct Entry64 {
+ uint64_t key;
+ typedef uint64_t Key;
+
+ Entry64() {}
+
+ explicit Entry64(uint64_t key_in) {
+ key = key_in;
+ }
+
+ Key GetKey() const { return key; }
+ void SetKey(uint64_t to) { key = to; }
+};
+
+struct MurmurHashEntry64 {
+ std::size_t operator()(uint64_t value) const {
+ return util::MurmurHash64A(&value, 8);
+ }
+};
+
+typedef ProbingHashTable<Entry64, MurmurHashEntry64> Table64;
+
+BOOST_AUTO_TEST_CASE(Double) {
+ for (std::size_t initial = 19; initial < 30; ++initial) {
+ size_t size = Table64::Size(initial, 1.2);
+ scoped_malloc mem(MallocOrThrow(size));
+ Table64 table(mem.get(), size, std::numeric_limits<uint64_t>::max());
+ table.Clear();
+ for (uint64_t i = 0; i < 19; ++i) {
+ table.Insert(Entry64(i));
+ }
+ table.CheckConsistency();
+ mem.call_realloc(table.DoubleTo());
+ table.Double(mem.get());
+ table.CheckConsistency();
+ for (uint64_t i = 20; i < 40 ; ++i) {
+ table.Insert(Entry64(i));
+ }
+ mem.call_realloc(table.DoubleTo());
+ table.Double(mem.get());
+ table.CheckConsistency();
+ }
+}
+
+} // namespace
+} // namespace util
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/proxy_iterator.hh b/src/kenlm/util/proxy_iterator.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/proxy_iterator.hh
rename to src/kenlm/util/proxy_iterator.hh
diff --git a/src/kenlm/util/read_compressed.cc b/src/kenlm/util/read_compressed.cc
new file mode 100644
index 0000000..504c579
--- /dev/null
+++ b/src/kenlm/util/read_compressed.cc
@@ -0,0 +1,448 @@
+#include "util/read_compressed.hh"
+
+#include "util/file.hh"
+#include "util/have.hh"
+#include "util/scoped.hh"
+
+#include <algorithm>
+#include <iostream>
+
+#include <cassert>
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+#endif
+
+#ifdef HAVE_BZLIB
+#include <bzlib.h>
+#endif
+
+#ifdef HAVE_XZLIB
+#include <lzma.h>
+#endif
+
+namespace util {
+
+CompressedException::CompressedException() throw() {}
+CompressedException::~CompressedException() throw() {}
+
+GZException::GZException() throw() {}
+GZException::~GZException() throw() {}
+
+BZException::BZException() throw() {}
+BZException::~BZException() throw() {}
+
+XZException::XZException() throw() {}
+XZException::~XZException() throw() {}
+
+class ReadBase {
+ public:
+ virtual ~ReadBase() {}
+
+ virtual std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) = 0;
+
+ protected:
+ static void ReplaceThis(ReadBase *with, ReadCompressed &thunk) {
+ thunk.internal_.reset(with);
+ }
+
+ ReadBase *Current(ReadCompressed &thunk) { return thunk.internal_.get(); }
+
+ static uint64_t &ReadCount(ReadCompressed &thunk) {
+ return thunk.raw_amount_;
+ }
+};
+
+namespace {
+
+ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, std::size_t already_size, bool require_compressed);
+
+// Completed file that other classes can thunk to.
+class Complete : public ReadBase {
+ public:
+ std::size_t Read(void *, std::size_t, ReadCompressed &) {
+ return 0;
+ }
+};
+
+class Uncompressed : public ReadBase {
+ public:
+ explicit Uncompressed(int fd) : fd_(fd) {}
+
+ std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+ std::size_t got = PartialRead(fd_.get(), to, amount);
+ ReadCount(thunk) += got;
+ return got;
+ }
+
+ private:
+ scoped_fd fd_;
+};
+
+class UncompressedWithHeader : public ReadBase {
+ public:
+ UncompressedWithHeader(int fd, const void *already_data, std::size_t already_size) : fd_(fd) {
+ assert(already_size);
+ buf_.reset(malloc(already_size));
+ if (!buf_.get()) throw std::bad_alloc();
+ memcpy(buf_.get(), already_data, already_size);
+ remain_ = static_cast<uint8_t*>(buf_.get());
+ end_ = remain_ + already_size;
+ }
+
+ std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+ assert(buf_.get());
+ assert(remain_ != end_);
+ std::size_t sending = std::min<std::size_t>(amount, end_ - remain_);
+ memcpy(to, remain_, sending);
+ remain_ += sending;
+ if (remain_ == end_) {
+ ReplaceThis(new Uncompressed(fd_.release()), thunk);
+ }
+ return sending;
+ }
+
+ private:
+ scoped_malloc buf_;
+ uint8_t *remain_;
+ uint8_t *end_;
+
+ scoped_fd fd_;
+};
+
+static const std::size_t kInputBuffer = 16384;
+
+template <class Compression> class StreamCompressed : public ReadBase {
+ public:
+ StreamCompressed(int fd, const void *already_data, std::size_t already_size)
+ : file_(fd),
+ in_buffer_(MallocOrThrow(kInputBuffer)),
+ back_(memcpy(in_buffer_.get(), already_data, already_size), already_size) {}
+
+ std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+ if (amount == 0) return 0;
+ back_.SetOutput(to, amount);
+ do {
+ if (!back_.Stream().avail_in) ReadInput(thunk);
+ if (!back_.Process()) {
+ // reached end, at least for the compressed portion.
+ std::size_t ret = static_cast<const uint8_t *>(static_cast<void*>(back_.Stream().next_out)) - static_cast<const uint8_t*>(to);
+ ReplaceThis(ReadFactory(file_.release(), ReadCount(thunk), back_.Stream().next_in, back_.Stream().avail_in, true), thunk);
+ if (ret) return ret;
+ // We did not read anything this round, so clients might think EOF. Transfer responsibility to the next reader.
+ return Current(thunk)->Read(to, amount, thunk);
+ }
+ } while (back_.Stream().next_out == to);
+ return static_cast<const uint8_t*>(static_cast<void*>(back_.Stream().next_out)) - static_cast<const uint8_t*>(to);
+ }
+
+ private:
+ void ReadInput(ReadCompressed &thunk) {
+ assert(!back_.Stream().avail_in);
+ std::size_t got = ReadOrEOF(file_.get(), in_buffer_.get(), kInputBuffer);
+ back_.SetInput(in_buffer_.get(), got);
+ ReadCount(thunk) += got;
+ }
+
+ scoped_fd file_;
+ scoped_malloc in_buffer_;
+
+ Compression back_;
+};
+
+#ifdef HAVE_ZLIB
+class GZip {
+ public:
+ GZip(const void *base, std::size_t amount) {
+ SetInput(base, amount);
+ stream_.zalloc = Z_NULL;
+ stream_.zfree = Z_NULL;
+ stream_.opaque = Z_NULL;
+ stream_.msg = NULL;
+ // 32 for zlib and gzip decoding with automatic header detection.
+ // 15 for maximum window size.
+ UTIL_THROW_IF(Z_OK != inflateInit2(&stream_, 32 + 15), GZException, "Failed to initialize zlib.");
+ }
+
+ ~GZip() {
+ if (Z_OK != inflateEnd(&stream_)) {
+ std::cerr << "zlib could not close properly." << std::endl;
+ abort();
+ }
+ }
+
+ void SetOutput(void *to, std::size_t amount) {
+ stream_.next_out = static_cast<Bytef*>(to);
+ stream_.avail_out = std::min<std::size_t>(std::numeric_limits<uInt>::max(), amount);
+ }
+
+ void SetInput(const void *base, std::size_t amount) {
+ assert(amount < static_cast<std::size_t>(std::numeric_limits<uInt>::max()));
+ stream_.next_in = const_cast<Bytef*>(static_cast<const Bytef*>(base));
+ stream_.avail_in = amount;
+ }
+
+ const z_stream &Stream() const { return stream_; }
+
+ bool Process() {
+ int result = inflate(&stream_, 0);
+ switch (result) {
+ case Z_OK:
+ return true;
+ case Z_STREAM_END:
+ return false;
+ case Z_ERRNO:
+ UTIL_THROW(ErrnoException, "zlib error");
+ default:
+ UTIL_THROW(GZException, "zlib encountered " << (stream_.msg ? stream_.msg : "an error ") << " code " << result);
+ }
+ }
+
+ private:
+ z_stream stream_;
+};
+#endif // HAVE_ZLIB
+
+#ifdef HAVE_BZLIB
+class BZip {
+ public:
+ BZip(const void *base, std::size_t amount) {
+ memset(&stream_, 0, sizeof(stream_));
+ SetInput(base, amount);
+ HandleError(BZ2_bzDecompressInit(&stream_, 0, 0));
+ }
+
+ ~BZip() {
+ try {
+ HandleError(BZ2_bzDecompressEnd(&stream_));
+ } catch (const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
+ }
+ }
+
+ bool Process() {
+ int ret = BZ2_bzDecompress(&stream_);
+ if (ret == BZ_STREAM_END) return false;
+ HandleError(ret);
+ return true;
+ }
+
+ void SetOutput(void *base, std::size_t amount) {
+ stream_.next_out = static_cast<char*>(base);
+ stream_.avail_out = std::min<std::size_t>(std::numeric_limits<unsigned int>::max(), amount);
+ }
+
+ void SetInput(const void *base, std::size_t amount) {
+ stream_.next_in = const_cast<char*>(static_cast<const char*>(base));
+ stream_.avail_in = amount;
+ }
+
+ const bz_stream &Stream() const { return stream_; }
+
+ private:
+ void HandleError(int value) {
+ switch(value) {
+ case BZ_OK:
+ return;
+ case BZ_CONFIG_ERROR:
+ UTIL_THROW(BZException, "bzip2 seems to be miscompiled.");
+ case BZ_PARAM_ERROR:
+ UTIL_THROW(BZException, "bzip2 Parameter error");
+ case BZ_DATA_ERROR:
+ UTIL_THROW(BZException, "bzip2 detected a corrupt file");
+ case BZ_DATA_ERROR_MAGIC:
+ UTIL_THROW(BZException, "bzip2 detected bad magic bytes. Perhaps this was not a bzip2 file after all?");
+ case BZ_MEM_ERROR:
+ throw std::bad_alloc();
+ default:
+ UTIL_THROW(BZException, "Unknown bzip2 error code " << value);
+ }
+ }
+
+ bz_stream stream_;
+};
+#endif // HAVE_BZLIB
+
+#ifdef HAVE_XZLIB
+class XZip {
+ public:
+ XZip(const void *base, std::size_t amount)
+ : stream_(), action_(LZMA_RUN) {
+ memset(&stream_, 0, sizeof(stream_));
+ SetInput(base, amount);
+ HandleError(lzma_stream_decoder(&stream_, UINT64_MAX, 0));
+ }
+
+ ~XZip() {
+ lzma_end(&stream_);
+ }
+
+ void SetOutput(void *base, std::size_t amount) {
+ stream_.next_out = static_cast<uint8_t*>(base);
+ stream_.avail_out = amount;
+ }
+
+ void SetInput(const void *base, std::size_t amount) {
+ stream_.next_in = static_cast<const uint8_t*>(base);
+ stream_.avail_in = amount;
+ if (!amount) action_ = LZMA_FINISH;
+ }
+
+ const lzma_stream &Stream() const { return stream_; }
+
+ bool Process() {
+ lzma_ret status = lzma_code(&stream_, action_);
+ if (status == LZMA_STREAM_END) return false;
+ HandleError(status);
+ return true;
+ }
+
+ private:
+ void HandleError(lzma_ret value) {
+ switch (value) {
+ case LZMA_OK:
+ return;
+ case LZMA_MEM_ERROR:
+ throw std::bad_alloc();
+ case LZMA_FORMAT_ERROR:
+ UTIL_THROW(XZException, "xzlib says file format not recognized");
+ case LZMA_OPTIONS_ERROR:
+ UTIL_THROW(XZException, "xzlib says unsupported compression options");
+ case LZMA_DATA_ERROR:
+ UTIL_THROW(XZException, "xzlib says this file is corrupt");
+ case LZMA_BUF_ERROR:
+ UTIL_THROW(XZException, "xzlib says unexpected end of input");
+ default:
+ UTIL_THROW(XZException, "unrecognized xzlib error " << value);
+ }
+ }
+
+ lzma_stream stream_;
+ lzma_action action_;
+};
+#endif // HAVE_XZLIB
+
+class IStreamReader : public ReadBase {
+ public:
+ explicit IStreamReader(std::istream &stream) : stream_(stream) {}
+
+ std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+ if (!stream_.read(static_cast<char*>(to), amount)) {
+ UTIL_THROW_IF(!stream_.eof(), ErrnoException, "istream error");
+ amount = stream_.gcount();
+ }
+ ReadCount(thunk) += amount;
+ return amount;
+ }
+
+ private:
+ std::istream &stream_;
+};
+
+enum MagicResult {
+ UTIL_UNKNOWN, UTIL_GZIP, UTIL_BZIP, UTIL_XZIP
+};
+
+MagicResult DetectMagic(const void *from_void, std::size_t length) {
+ const uint8_t *header = static_cast<const uint8_t*>(from_void);
+ if (length >= 2 && header[0] == 0x1f && header[1] == 0x8b) {
+ return UTIL_GZIP;
+ }
+ const uint8_t kBZMagic[3] = {'B', 'Z', 'h'};
+ if (length >= sizeof(kBZMagic) && !memcmp(header, kBZMagic, sizeof(kBZMagic))) {
+ return UTIL_BZIP;
+ }
+ const uint8_t kXZMagic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+ if (length >= sizeof(kXZMagic) && !memcmp(header, kXZMagic, sizeof(kXZMagic))) {
+ return UTIL_XZIP;
+ }
+ return UTIL_UNKNOWN;
+}
+
+ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, const std::size_t already_size, bool require_compressed) {
+ scoped_fd hold(fd);
+ std::string header(reinterpret_cast<const char*>(already_data), already_size);
+ if (header.size() < ReadCompressed::kMagicSize) {
+ std::size_t original = header.size();
+ header.resize(ReadCompressed::kMagicSize);
+ std::size_t got = ReadOrEOF(fd, &header[original], ReadCompressed::kMagicSize - original);
+ raw_amount += got;
+ header.resize(original + got);
+ }
+ if (header.empty()) {
+ return new Complete();
+ }
+ switch (DetectMagic(&header[0], header.size())) {
+ case UTIL_GZIP:
+#ifdef HAVE_ZLIB
+ return new StreamCompressed<GZip>(hold.release(), header.data(), header.size());
+#else
+ UTIL_THROW(CompressedException, "This looks like a gzip file but gzip support was not compiled in.");
+#endif
+ case UTIL_BZIP:
+#ifdef HAVE_BZLIB
+ return new StreamCompressed<BZip>(hold.release(), &header[0], header.size());
+#else
+ UTIL_THROW(CompressedException, "This looks like a bzip file (it begins with BZh), but bzip support was not compiled in.");
+#endif
+ case UTIL_XZIP:
+#ifdef HAVE_XZLIB
+ return new StreamCompressed<XZip>(hold.release(), header.data(), header.size());
+#else
+ UTIL_THROW(CompressedException, "This looks like an xz file, but xz support was not compiled in.");
+#endif
+ default:
+ UTIL_THROW_IF(require_compressed, CompressedException, "Uncompressed data detected after a compresssed file. This could be supported but usually indicates an error.");
+ return new UncompressedWithHeader(hold.release(), header.data(), header.size());
+ }
+}
+
+} // namespace
+
+bool ReadCompressed::DetectCompressedMagic(const void *from_void) {
+ return DetectMagic(from_void, kMagicSize) != UTIL_UNKNOWN;
+}
+
+ReadCompressed::ReadCompressed(int fd) {
+ Reset(fd);
+}
+
+ReadCompressed::ReadCompressed(std::istream &in) {
+ Reset(in);
+}
+
+ReadCompressed::ReadCompressed() {}
+
+ReadCompressed::~ReadCompressed() {}
+
+void ReadCompressed::Reset(int fd) {
+ raw_amount_ = 0;
+ internal_.reset();
+ internal_.reset(ReadFactory(fd, raw_amount_, NULL, 0, false));
+}
+
+void ReadCompressed::Reset(std::istream &in) {
+ internal_.reset();
+ internal_.reset(new IStreamReader(in));
+}
+
+std::size_t ReadCompressed::Read(void *to, std::size_t amount) {
+ return internal_->Read(to, amount, *this);
+}
+
+std::size_t ReadCompressed::ReadOrEOF(void *const to_in, std::size_t amount) {
+ uint8_t *to = reinterpret_cast<uint8_t*>(to_in);
+ while (amount) {
+ std::size_t got = Read(to, amount);
+ if (!got) break;
+ to += got;
+ amount -= got;
+ }
+ return to - reinterpret_cast<uint8_t*>(to_in);
+}
+
+} // namespace util
diff --git a/src/kenlm/util/read_compressed.hh b/src/kenlm/util/read_compressed.hh
new file mode 100644
index 0000000..935a495
--- /dev/null
+++ b/src/kenlm/util/read_compressed.hh
@@ -0,0 +1,84 @@
+#ifndef UTIL_READ_COMPRESSED_H
+#define UTIL_READ_COMPRESSED_H
+
+#include "util/exception.hh"
+#include "util/scoped.hh"
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+
+class CompressedException : public Exception {
+ public:
+ CompressedException() throw();
+ virtual ~CompressedException() throw();
+};
+
+class GZException : public CompressedException {
+ public:
+ GZException() throw();
+ ~GZException() throw();
+};
+
+class BZException : public CompressedException {
+ public:
+ BZException() throw();
+ ~BZException() throw();
+};
+
+class XZException : public CompressedException {
+ public:
+ XZException() throw();
+ ~XZException() throw();
+};
+
+class ReadBase;
+
+class ReadCompressed {
+ public:
+ static const std::size_t kMagicSize = 6;
+ // Must have at least kMagicSize bytes.
+ static bool DetectCompressedMagic(const void *from);
+
+ // Takes ownership of fd.
+ explicit ReadCompressed(int fd);
+
+ // Try to avoid using this. Use the fd instead.
+ // There is no decompression support for istreams.
+ explicit ReadCompressed(std::istream &in);
+
+ // Must call Reset later.
+ ReadCompressed();
+
+ ~ReadCompressed();
+
+ // Takes ownership of fd.
+ void Reset(int fd);
+
+ // Same advice as the constructor.
+ void Reset(std::istream &in);
+
+ std::size_t Read(void *to, std::size_t amount);
+
+ // Repeatedly call read to fill a buffer unless EOF is hit.
+ // Return number of bytes read.
+ std::size_t ReadOrEOF(void *const to, std::size_t amount);
+
+ uint64_t RawAmount() const { return raw_amount_; }
+
+ private:
+ friend class ReadBase;
+
+ scoped_ptr<ReadBase> internal_;
+
+ uint64_t raw_amount_;
+
+ // No copying.
+ ReadCompressed(const ReadCompressed &);
+ void operator=(const ReadCompressed &);
+};
+
+} // namespace util
+
+#endif // UTIL_READ_COMPRESSED_H
diff --git a/src/kenlm/util/read_compressed_test.cc b/src/kenlm/util/read_compressed_test.cc
new file mode 100644
index 0000000..a983ca7
--- /dev/null
+++ b/src/kenlm/util/read_compressed_test.cc
@@ -0,0 +1,130 @@
+#include "util/read_compressed.hh"
+
+#include "util/file.hh"
+#include "util/have.hh"
+
+#define BOOST_TEST_MODULE ReadCompressedTest
+#include <boost/test/unit_test.hpp>
+#include <boost/scoped_ptr.hpp>
+
+#include <fstream>
+#include <string>
+#include <cstdlib>
+
+#if defined __MINGW32__
+#include <ctime>
+#include <fcntl.h>
+
+#if !defined mkstemp
+// TODO insecure
+int mkstemp(char * stemplate)
+{
+ char *filename = mktemp(stemplate);
+ if (filename == NULL)
+ return -1;
+ return open(filename, O_RDWR | O_CREAT, 0600);
+}
+#endif
+
+#endif // defined
+
+namespace util {
+namespace {
+
+void ReadLoop(ReadCompressed &reader, void *to_void, std::size_t amount) {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ while (amount) {
+ std::size_t ret = reader.Read(to, amount);
+ BOOST_REQUIRE(ret);
+ to += ret;
+ amount -= ret;
+ }
+}
+
+const uint32_t kSize4 = 100000 / 4;
+
+std::string WriteRandom() {
+ char name[] = "tempXXXXXX";
+ scoped_fd original(mkstemp(name));
+ BOOST_REQUIRE(original.get() > 0);
+ for (uint32_t i = 0; i < kSize4; ++i) {
+ WriteOrThrow(original.get(), &i, sizeof(uint32_t));
+ }
+ return name;
+}
+
+void VerifyRead(ReadCompressed &reader) {
+ for (uint32_t i = 0; i < kSize4; ++i) {
+ uint32_t got;
+ ReadLoop(reader, &got, sizeof(uint32_t));
+ BOOST_CHECK_EQUAL(i, got);
+ }
+
+ char ignored;
+ BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1));
+ // Test double EOF call.
+ BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1));
+}
+
+void TestRandom(const char *compressor) {
+ std::string name(WriteRandom());
+
+ char gzname[] = "tempXXXXXX";
+ scoped_fd gzipped(mkstemp(gzname));
+
+ std::string command(compressor);
+#ifdef __CYGWIN__
+ command += ".exe";
+#endif
+ command += " <\"";
+ command += name;
+ command += "\" >\"";
+ command += gzname;
+ command += "\"";
+ BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
+
+ BOOST_CHECK_EQUAL(0, unlink(name.c_str()));
+ BOOST_CHECK_EQUAL(0, unlink(gzname));
+
+ ReadCompressed reader(gzipped.release());
+ VerifyRead(reader);
+}
+
+BOOST_AUTO_TEST_CASE(Uncompressed) {
+ TestRandom("cat");
+}
+
+#ifdef HAVE_ZLIB
+BOOST_AUTO_TEST_CASE(ReadGZ) {
+ TestRandom("gzip");
+}
+#endif // HAVE_ZLIB
+
+#ifdef HAVE_BZLIB
+BOOST_AUTO_TEST_CASE(ReadBZ) {
+ TestRandom("bzip2");
+}
+#endif // HAVE_BZLIB
+
+#ifdef HAVE_XZLIB
+BOOST_AUTO_TEST_CASE(ReadXZ) {
+ TestRandom("xz");
+}
+#endif
+
+#ifdef HAVE_ZLIB
+BOOST_AUTO_TEST_CASE(AppendGZ) {
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(IStream) {
+ std::string name(WriteRandom());
+ std::fstream stream(name.c_str(), std::ios::in);
+ BOOST_CHECK_EQUAL(0, unlink(name.c_str()));
+ ReadCompressed reader;
+ reader.Reset(stream);
+ VerifyRead(reader);
+}
+
+} // namespace
+} // namespace util
diff --git a/src/kenlm/util/scoped.cc b/src/kenlm/util/scoped.cc
new file mode 100644
index 0000000..817aa24
--- /dev/null
+++ b/src/kenlm/util/scoped.cc
@@ -0,0 +1,43 @@
+#include "util/scoped.hh"
+
+#include <cstdlib>
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <sys/mman.h>
+#endif
+
+namespace util {
+
+// TODO: if we're really under memory pressure, don't allocate memory to
+// display the error.
+MallocException::MallocException(std::size_t requested) throw() {
+ *this << "for " << requested << " bytes ";
+}
+
+MallocException::~MallocException() throw() {}
+
+namespace {
+void *InspectAddr(void *addr, std::size_t requested, const char *func_name) {
+ UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name);
+ return addr;
+}
+} // namespace
+
+void *MallocOrThrow(std::size_t requested) {
+ return InspectAddr(std::malloc(requested), requested, "malloc");
+}
+
+void *CallocOrThrow(std::size_t requested) {
+ return InspectAddr(std::calloc(requested, 1), requested, "calloc");
+}
+
+void scoped_malloc::call_realloc(std::size_t requested) {
+ p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc");
+}
+
+void AdviseHugePages(const void *addr, std::size_t size) {
+#if MADV_HUGEPAGE
+ madvise((void*)addr, size, MADV_HUGEPAGE);
+#endif
+}
+
+} // namespace util
diff --git a/src/kenlm/util/scoped.hh b/src/kenlm/util/scoped.hh
new file mode 100644
index 0000000..21e9a75
--- /dev/null
+++ b/src/kenlm/util/scoped.hh
@@ -0,0 +1,111 @@
+#ifndef UTIL_SCOPED_H
+#define UTIL_SCOPED_H
+/* Other scoped objects in the style of scoped_ptr. */
+
+#include "util/exception.hh"
+#include <cstddef>
+#include <cstdlib>
+
+namespace util {
+
+class MallocException : public ErrnoException {
+ public:
+ explicit MallocException(std::size_t requested) throw();
+ ~MallocException() throw();
+};
+
+void *MallocOrThrow(std::size_t requested);
+void *CallocOrThrow(std::size_t requested);
+
+/* Unfortunately, defining the operator* for void * makes the compiler complain.
+ * So scoped is specialized to void. This includes the functionality common to
+ * both, namely everything except reference.
+ */
+template <class T, class Closer> class scoped_base {
+ public:
+ explicit scoped_base(T *p = NULL) : p_(p) {}
+
+ ~scoped_base() { Closer::Close(p_); }
+
+ void reset(T *p = NULL) {
+ scoped_base other(p_);
+ p_ = p;
+ }
+
+ T *get() { return p_; }
+ const T *get() const { return p_; }
+
+ T *operator->() { return p_; }
+ const T *operator->() const { return p_; }
+
+ T *release() {
+ T *ret = p_;
+ p_ = NULL;
+ return ret;
+ }
+
+ protected:
+ T *p_;
+
+ private:
+ scoped_base(const scoped_base &);
+ scoped_base &operator=(const scoped_base &);
+};
+
+template <class T, class Closer> class scoped : public scoped_base<T, Closer> {
+ public:
+ explicit scoped(T *p = NULL) : scoped_base<T, Closer>(p) {}
+
+ T &operator*() { return *scoped_base<T, Closer>::p_; }
+ const T&operator*() const { return *scoped_base<T, Closer>::p_; }
+};
+
+template <class Closer> class scoped<void, Closer> : public scoped_base<void, Closer> {
+ public:
+ explicit scoped(void *p = NULL) : scoped_base<void, Closer>(p) {}
+};
+
+/* Closer for c functions like std::free and cmph cleanup functions */
+template <class T, void (*clean)(T*)> struct scoped_c_forward {
+ static void Close(T *p) { clean(p); }
+};
+// Call a C function to delete stuff
+template <class T, void (*clean)(T*)> class scoped_c : public scoped<T, scoped_c_forward<T, clean> > {
+ public:
+ explicit scoped_c(T *p = NULL) : scoped<T, scoped_c_forward<T, clean> >(p) {}
+};
+
+class scoped_malloc : public scoped_c<void, std::free> {
+ public:
+ explicit scoped_malloc(void *p = NULL) : scoped_c<void, std::free>(p) {}
+
+ void call_realloc(std::size_t to);
+};
+
+/* scoped_array using delete[] */
+struct scoped_delete_array_forward {
+ template <class T> static void Close(T *p) { delete [] p; }
+};
+// Hat tip to boost.
+template <class T> class scoped_array : public scoped<T, scoped_delete_array_forward> {
+ public:
+ explicit scoped_array(T *p = NULL) : scoped<T, scoped_delete_array_forward>(p) {}
+
+ T &operator[](std::size_t idx) { return scoped<T, scoped_delete_array_forward>::p_[idx]; }
+ const T &operator[](std::size_t idx) const { return scoped<T, scoped_delete_array_forward>::p_[idx]; }
+};
+
+/* scoped_ptr using delete. If only there were a template typedef. */
+struct scoped_delete_forward {
+ template <class T> static void Close(T *p) { delete p; }
+};
+template <class T> class scoped_ptr : public scoped<T, scoped_delete_forward> {
+ public:
+ explicit scoped_ptr(T *p = NULL) : scoped<T, scoped_delete_forward>(p) {}
+};
+
+void AdviseHugePages(const void *addr, std::size_t size);
+
+} // namespace util
+
+#endif // UTIL_SCOPED_H
diff --git a/src/kenlm/util/sized_iterator.hh b/src/kenlm/util/sized_iterator.hh
new file mode 100644
index 0000000..c28d8e0
--- /dev/null
+++ b/src/kenlm/util/sized_iterator.hh
@@ -0,0 +1,120 @@
+#ifndef UTIL_SIZED_ITERATOR_H
+#define UTIL_SIZED_ITERATOR_H
+
+#include "util/proxy_iterator.hh"
+
+#include <algorithm>
+#include <functional>
+#include <string>
+
+#include <stdint.h>
+#include <cstring>
+
+namespace util {
+
+class SizedInnerIterator {
+ public:
+ SizedInnerIterator() {}
+
+ SizedInnerIterator(void *ptr, std::size_t size) : ptr_(static_cast<uint8_t*>(ptr)), size_(size) {}
+
+ bool operator==(const SizedInnerIterator &other) const {
+ return ptr_ == other.ptr_;
+ }
+ bool operator<(const SizedInnerIterator &other) const {
+ return ptr_ < other.ptr_;
+ }
+ SizedInnerIterator &operator+=(std::ptrdiff_t amount) {
+ ptr_ += amount * size_;
+ return *this;
+ }
+ std::ptrdiff_t operator-(const SizedInnerIterator &other) const {
+ return (ptr_ - other.ptr_) / size_;
+ }
+
+ const void *Data() const { return ptr_; }
+ void *Data() { return ptr_; }
+ std::size_t EntrySize() const { return size_; }
+
+ friend void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
+ std::swap(first.ptr_, second.ptr_);
+ std::swap(first.size_, second.size_);
+ }
+
+ private:
+ uint8_t *ptr_;
+ std::size_t size_;
+};
+
+class SizedProxy {
+ public:
+ SizedProxy() {}
+
+ SizedProxy(void *ptr, std::size_t size) : inner_(ptr, size) {}
+
+ operator std::string() const {
+ return std::string(reinterpret_cast<const char*>(inner_.Data()), inner_.EntrySize());
+ }
+
+ SizedProxy &operator=(const SizedProxy &from) {
+ memcpy(inner_.Data(), from.inner_.Data(), inner_.EntrySize());
+ return *this;
+ }
+
+ SizedProxy &operator=(const std::string &from) {
+ memcpy(inner_.Data(), from.data(), inner_.EntrySize());
+ return *this;
+ }
+
+ const void *Data() const { return inner_.Data(); }
+ void *Data() { return inner_.Data(); }
+
+ friend void swap(SizedProxy first, SizedProxy second) {
+ std::swap_ranges(
+ static_cast<char*>(first.inner_.Data()),
+ static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
+ static_cast<char*>(second.inner_.Data()));
+ }
+
+ private:
+ friend class util::ProxyIterator<SizedProxy>;
+
+ typedef std::string value_type;
+
+ typedef SizedInnerIterator InnerIterator;
+
+ InnerIterator &Inner() { return inner_; }
+ const InnerIterator &Inner() const { return inner_; }
+ InnerIterator inner_;
+};
+
+typedef ProxyIterator<SizedProxy> SizedIterator;
+
+inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }
+
+// Useful wrapper for a comparison function i.e. sort.
+template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
+ public:
+ explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
+
+ bool operator()(const Proxy &first, const Proxy &second) const {
+ return delegate_(first.Data(), second.Data());
+ }
+ bool operator()(const Proxy &first, const std::string &second) const {
+ return delegate_(first.Data(), second.data());
+ }
+ bool operator()(const std::string &first, const Proxy &second) const {
+ return delegate_(first.data(), second.Data());
+ }
+ bool operator()(const std::string &first, const std::string &second) const {
+ return delegate_(first.data(), second.data());
+ }
+
+ const Delegate &GetDelegate() const { return delegate_; }
+
+ private:
+ const Delegate delegate_;
+};
+
+} // namespace util
+#endif // UTIL_SIZED_ITERATOR_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/sized_iterator_test.cc b/src/kenlm/util/sized_iterator_test.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/sized_iterator_test.cc
rename to src/kenlm/util/sized_iterator_test.cc
diff --git a/src/kenlm/util/sorted_uniform.hh b/src/kenlm/util/sorted_uniform.hh
new file mode 100644
index 0000000..ddd2b3f
--- /dev/null
+++ b/src/kenlm/util/sorted_uniform.hh
@@ -0,0 +1,105 @@
+#ifndef UTIL_SORTED_UNIFORM_H
+#define UTIL_SORTED_UNIFORM_H
+
+#include <algorithm>
+#include <cstddef>
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+
+template <class T> class IdentityAccessor {
+ public:
+ typedef T Key;
+ T operator()(const T *in) const { return *in; }
+};
+
+struct Pivot64 {
+ static inline std::size_t Calc(uint64_t off, uint64_t range, std::size_t width) {
+ std::size_t ret = static_cast<std::size_t>(static_cast<float>(off) / static_cast<float>(range) * static_cast<float>(width));
+ // Cap for floating point rounding
+ return (ret < width) ? ret : width - 1;
+ }
+};
+
+// Use when off * width is <2^64. This is guaranteed when each of them is actually a 32-bit value.
+struct Pivot32 {
+ static inline std::size_t Calc(uint64_t off, uint64_t range, uint64_t width) {
+ return static_cast<std::size_t>((off * width) / (range + 1));
+ }
+};
+
+// Usage: PivotSelect<sizeof(DataType)>::T
+template <unsigned> struct PivotSelect;
+template <> struct PivotSelect<8> { typedef Pivot64 T; };
+template <> struct PivotSelect<4> { typedef Pivot32 T; };
+template <> struct PivotSelect<2> { typedef Pivot32 T; };
+
+/* Binary search. */
+template <class Iterator, class Accessor> bool BinaryFind(
+ const Accessor &accessor,
+ Iterator begin,
+ Iterator end,
+ const typename Accessor::Key key, Iterator &out) {
+ while (end > begin) {
+ Iterator pivot(begin + (end - begin) / 2);
+ typename Accessor::Key mid(accessor(pivot));
+ if (mid < key) {
+ begin = pivot + 1;
+ } else if (mid > key) {
+ end = pivot;
+ } else {
+ out = pivot;
+ return true;
+ }
+ }
+ return false;
+}
+
+// Search the range [before_it + 1, after_it - 1] for key.
+// Preconditions:
+// before_v <= key <= after_v
+// before_v <= all values in the range [before_it + 1, after_it - 1] <= after_v
+// range is sorted.
+template <class Iterator, class Accessor, class Pivot> bool BoundedSortedUniformFind(
+ const Accessor &accessor,
+ Iterator before_it, typename Accessor::Key before_v,
+ Iterator after_it, typename Accessor::Key after_v,
+ const typename Accessor::Key key, Iterator &out) {
+ while (after_it - before_it > 1) {
+ Iterator pivot(before_it + (1 + Pivot::Calc(key - before_v, after_v - before_v, after_it - before_it - 1)));
+ typename Accessor::Key mid(accessor(pivot));
+ if (mid < key) {
+ before_it = pivot;
+ before_v = mid;
+ } else if (mid > key) {
+ after_it = pivot;
+ after_v = mid;
+ } else {
+ out = pivot;
+ return true;
+ }
+ }
+ return false;
+}
+
+template <class Iterator, class Accessor, class Pivot> bool SortedUniformFind(const Accessor &accessor, Iterator begin, Iterator end, const typename Accessor::Key key, Iterator &out) {
+ if (begin == end) return false;
+ typename Accessor::Key below(accessor(begin));
+ if (key <= below) {
+ if (key == below) { out = begin; return true; }
+ return false;
+ }
+ // Make the range [begin, end].
+ --end;
+ typename Accessor::Key above(accessor(end));
+ if (key >= above) {
+ if (key == above) { out = end; return true; }
+ return false;
+ }
+ return BoundedSortedUniformFind<Iterator, Accessor, Pivot>(accessor, begin, below, end, above, key, out);
+}
+
+} // namespace util
+
+#endif // UTIL_SORTED_UNIFORM_H
diff --git a/src/kenlm/util/sorted_uniform_test.cc b/src/kenlm/util/sorted_uniform_test.cc
new file mode 100644
index 0000000..39f05e5
--- /dev/null
+++ b/src/kenlm/util/sorted_uniform_test.cc
@@ -0,0 +1,127 @@
+#include "util/sorted_uniform.hh"
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int.hpp>
+#include <boost/random/variate_generator.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/unordered_map.hpp>
+
+#define BOOST_TEST_MODULE SortedUniformTest
+#include <boost/test/unit_test.hpp>
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+namespace util {
+namespace {
+
+template <class KeyT, class ValueT> struct Entry {
+ typedef KeyT Key;
+ typedef ValueT Value;
+
+ Key key;
+ Value value;
+
+ Key GetKey() const {
+ return key;
+ }
+
+ Value GetValue() const {
+ return value;
+ }
+
+ bool operator<(const Entry<Key,Value> &other) const {
+ return key < other.key;
+ }
+};
+
+template <class KeyT> struct Accessor {
+ typedef KeyT Key;
+ template <class Value> Key operator()(const Entry<Key, Value> *entry) const {
+ return entry->GetKey();
+ }
+};
+
+template <class Key, class Value> void Check(const Entry<Key, Value> *begin, const Entry<Key, Value> *end, const boost::unordered_map<Key, Value> &reference, const Key key) {
+ typename boost::unordered_map<Key, Value>::const_iterator ref = reference.find(key);
+ typedef const Entry<Key, Value> *It;
+ // g++ can't tell that require will crash and burn.
+ It i = NULL;
+ bool ret = SortedUniformFind<It, Accessor<Key>, Pivot64>(Accessor<Key>(), begin, end, key, i);
+ if (ref == reference.end()) {
+ BOOST_CHECK(!ret);
+ } else {
+ BOOST_REQUIRE(ret);
+ BOOST_CHECK_EQUAL(ref->second, i->GetValue());
+ }
+}
+
+BOOST_AUTO_TEST_CASE(empty) {
+ typedef const Entry<uint64_t, float> T;
+ const T *i;
+ bool ret = SortedUniformFind<const T*, Accessor<uint64_t>, Pivot64>(Accessor<uint64_t>(), (const T*)NULL, (const T*)NULL, (uint64_t)10, i);
+ BOOST_CHECK(!ret);
+}
+
+template <class Key> void RandomTest(Key upper, size_t entries, size_t queries) {
+ typedef unsigned char Value;
+ boost::mt19937 rng;
+ boost::uniform_int<Key> range_key(0, upper);
+ boost::uniform_int<Value> range_value(0, 255);
+ boost::variate_generator<boost::mt19937&, boost::uniform_int<Key> > gen_key(rng, range_key);
+ boost::variate_generator<boost::mt19937&, boost::uniform_int<unsigned char> > gen_value(rng, range_value);
+
+ typedef Entry<Key, Value> Ent;
+ std::vector<Ent> backing;
+ boost::unordered_map<Key, unsigned char> reference;
+ Ent ent;
+ for (size_t i = 0; i < entries; ++i) {
+ Key key = gen_key();
+ unsigned char value = gen_value();
+ if (reference.insert(std::make_pair(key, value)).second) {
+ ent.key = key;
+ ent.value = value;
+ backing.push_back(ent);
+ }
+ }
+ std::sort(backing.begin(), backing.end());
+
+ // Random queries.
+ for (size_t i = 0; i < queries; ++i) {
+ const Key key = gen_key();
+ Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, key);
+ }
+
+ typename boost::unordered_map<Key, unsigned char>::const_iterator it = reference.begin();
+ for (size_t i = 0; (i < queries) && (it != reference.end()); ++i, ++it) {
+ Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, it->second);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(basic) {
+ RandomTest<uint8_t>(11, 10, 200);
+}
+
+BOOST_AUTO_TEST_CASE(tiny_dense_random) {
+ RandomTest<uint8_t>(11, 50, 200);
+}
+
+BOOST_AUTO_TEST_CASE(small_dense_random) {
+ RandomTest<uint8_t>(100, 100, 200);
+}
+
+BOOST_AUTO_TEST_CASE(small_sparse_random) {
+ RandomTest<uint8_t>(200, 15, 200);
+}
+
+BOOST_AUTO_TEST_CASE(medium_sparse_random) {
+ RandomTest<uint16_t>(32000, 1000, 2000);
+}
+
+BOOST_AUTO_TEST_CASE(sparse_random) {
+ RandomTest<uint64_t>(std::numeric_limits<uint64_t>::max(), 100000, 2000);
+}
+
+} // namespace
+} // namespace util
diff --git a/src/kenlm/util/stream/CMakeLists.txt b/src/kenlm/util/stream/CMakeLists.txt
new file mode 100644
index 0000000..0c4c115
--- /dev/null
+++ b/src/kenlm/util/stream/CMakeLists.txt
@@ -0,0 +1,50 @@
+cmake_minimum_required(VERSION 2.8.8)
+#
+# The KenLM cmake files make use of add_library(... OBJECTS ...)
+#
+# This syntax allows grouping of source files when compiling
+# (effectively creating "fake" libraries based on source subdirs).
+#
+# This syntax was only added in cmake version 2.8.8
+#
+# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
+
+
+# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
+
+# Explicitly list the source files for this subdirectory
+#
+# If you add any source files to this subdirectory
+# that should be included in the kenlm library,
+# (this excludes any unit test files)
+# you should add them to the following list:
+#
+# In order to allow CMake files in the parent directory
+# to see this variable definition, we set PARENT_SCOPE.
+#
+# In order to set correct paths to these files
+# when this variable is referenced by CMake files in the parent directory,
+# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
+#
+set(KENLM_UTIL_STREAM_SOURCE
+ ${CMAKE_CURRENT_SOURCE_DIR}/chain.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/io.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/line_input.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/multi_progress.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/rewindable_stream.cc
+ PARENT_SCOPE)
+
+
+
+if(BUILD_TESTING)
+ # Explicitly list the Boost test files to be compiled
+ set(KENLM_BOOST_TESTS_LIST
+ io_test
+ sort_test
+ stream_test
+ )
+
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
+ LIBRARIES ${Boost_LIBRARIES} pthread)
+endif()
diff --git a/src/kenlm/util/stream/Jamfile b/src/kenlm/util/stream/Jamfile
new file mode 100644
index 0000000..de9d41c
--- /dev/null
+++ b/src/kenlm/util/stream/Jamfile
@@ -0,0 +1,7 @@
+fakelib stream : [ glob *.cc : *_test.cc ] ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ;
+
+import testing ;
+unit-test io_test : io_test.cc stream /top//boost_unit_test_framework ;
+unit-test stream_test : stream_test.cc stream /top//boost_unit_test_framework ;
+unit-test rewindable_stream_test : rewindable_stream_test.cc stream /top//boost_unit_test_framework ;
+unit-test sort_test : sort_test.cc stream /top//boost_unit_test_framework ;
diff --git a/src/kenlm/util/stream/block.hh b/src/kenlm/util/stream/block.hh
new file mode 100644
index 0000000..42df13f
--- /dev/null
+++ b/src/kenlm/util/stream/block.hh
@@ -0,0 +1,93 @@
+#ifndef UTIL_STREAM_BLOCK_H
+#define UTIL_STREAM_BLOCK_H
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+/**
+ * Encapsulates a block of memory.
+ */
+class Block {
+ public:
+
+ /**
+ * Constructs an empty block.
+ */
+ Block() : mem_(NULL), valid_size_(0) {}
+
+ /**
+ * Constructs a block that encapsulates a segment of memory.
+ *
+ * @param[in] mem The segment of memory to encapsulate
+ * @param[in] size The size of the memory segment in bytes
+ */
+ Block(void *mem, std::size_t size) : mem_(mem), valid_size_(size) {}
+
+ /**
+ * Set the number of bytes in this block that should be interpreted as valid.
+ *
+ * @param[in] to Number of bytes
+ */
+ void SetValidSize(std::size_t to) { valid_size_ = to; }
+
+ /**
+ * Gets the number of bytes in this block that should be interpreted as valid.
+ * This is important because read might fill in less than Allocated at EOF.
+ */
+ std::size_t ValidSize() const { return valid_size_; }
+
+ /** Gets a void pointer to the memory underlying this block. */
+ void *Get() { return mem_; }
+
+ /** Gets a const void pointer to the memory underlying this block. */
+ const void *Get() const { return mem_; }
+
+
+ /**
+ * Gets a const void pointer to the end of the valid section of memory
+ * encapsulated by this block.
+ */
+ const void *ValidEnd() const {
+ return reinterpret_cast<const uint8_t*>(mem_) + valid_size_;
+ }
+
+ /**
+ * Returns true if this block encapsulates a valid (non-NULL) block of memory.
+ *
+ * This method is a user-defined implicit conversion function to boolean;
+ * among other things, this method enables bare instances of this class
+ * to be used as the condition of an if statement.
+ */
+ operator bool() const { return mem_ != NULL; }
+
+ /**
+ * Returns true if this block is empty.
+ *
+ * In other words, if Get()==NULL, this method will return true.
+ */
+ bool operator!() const { return mem_ == NULL; }
+
+ private:
+ friend class Link;
+ friend class RewindableStream;
+
+ /**
+ * Points this block's memory at NULL.
+ *
+ * This class defines poison as a block whose memory pointer is NULL.
+ */
+ void SetToPoison() {
+ mem_ = NULL;
+ }
+
+ void *mem_;
+ std::size_t valid_size_;
+};
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_BLOCK_H
diff --git a/src/kenlm/util/stream/chain.cc b/src/kenlm/util/stream/chain.cc
new file mode 100644
index 0000000..6bc0005
--- /dev/null
+++ b/src/kenlm/util/stream/chain.cc
@@ -0,0 +1,163 @@
+#include "util/stream/chain.hh"
+
+#include "util/stream/io.hh"
+
+#include "util/exception.hh"
+#include "util/pcqueue.hh"
+
+#include <cstdlib>
+#include <new>
+#include <iostream>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+ChainConfigException::ChainConfigException() throw() { *this << "Chain configured with "; }
+ChainConfigException::~ChainConfigException() throw() {}
+
+Thread::~Thread() {
+ thread_.join();
+}
+
+void Thread::UnhandledException(const std::exception &e) {
+ std::cerr << e.what() << std::endl;
+ abort();
+}
+
+void Recycler::Run(const ChainPosition &position) {
+ for (Link l(position); l; ++l) {
+ l->SetValidSize(position.GetChain().BlockSize());
+ }
+}
+
+const Recycler kRecycle = Recycler();
+
+Chain::Chain(const ChainConfig &config) : config_(config), complete_called_(false) {
+ UTIL_THROW_IF(!config.entry_size, ChainConfigException, "zero-size entries.");
+ UTIL_THROW_IF(!config.block_count, ChainConfigException, "block count zero");
+ UTIL_THROW_IF(config.total_memory < config.entry_size * config.block_count, ChainConfigException, config.total_memory << " total memory, too small for " << config.block_count << " blocks of containing entries of size " << config.entry_size);
+ // Round down block size to a multiple of entry size.
+ block_size_ = config.total_memory / (config.block_count * config.entry_size) * config.entry_size;
+}
+
+Chain::~Chain() {
+ Wait();
+}
+
+ChainPosition Chain::Add() {
+ if (!Running()) Start();
+ PCQueue<Block> &in = queues_.back();
+ queues_.push_back(new PCQueue<Block>(config_.block_count));
+ return ChainPosition(in, queues_.back(), this, progress_);
+}
+
+Chain &Chain::operator>>(const WriteAndRecycle &writer) {
+ threads_.push_back(new Thread(Complete(), writer));
+ return *this;
+}
+
+Chain &Chain::operator>>(const PWriteAndRecycle &writer) {
+ threads_.push_back(new Thread(Complete(), writer));
+ return *this;
+}
+
+void Chain::Wait(bool release_memory) {
+ if (queues_.empty()) {
+ assert(threads_.empty());
+ return; // Nothing to wait for.
+ }
+ if (!complete_called_) CompleteLoop();
+ threads_.clear();
+ for (std::size_t i = 0; queues_.front().Consume(); ++i) {
+ if (i == config_.block_count) {
+ std::cerr << "Chain ending without poison." << std::endl;
+ abort();
+ }
+ }
+ queues_.clear();
+ progress_.Finished();
+ complete_called_ = false;
+ if (release_memory) memory_.reset();
+}
+
+void Chain::Start() {
+ Wait(false);
+ if (!memory_.get()) {
+ // Allocate memory.
+ assert(threads_.empty());
+ assert(queues_.empty());
+ std::size_t malloc_size = block_size_ * config_.block_count;
+ memory_.reset(MallocOrThrow(malloc_size));
+ }
+ // This queue can accomodate all blocks.
+ queues_.push_back(new PCQueue<Block>(config_.block_count));
+ // Populate the lead queue with blocks.
+ uint8_t *base = static_cast<uint8_t*>(memory_.get());
+ for (std::size_t i = 0; i < config_.block_count; ++i) {
+ queues_.front().Produce(Block(base, block_size_));
+ base += block_size_;
+ }
+}
+
+ChainPosition Chain::Complete() {
+ assert(Running());
+ UTIL_THROW_IF(complete_called_, util::Exception, "CompleteLoop() called twice");
+ complete_called_ = true;
+ return ChainPosition(queues_.back(), queues_.front(), this, progress_);
+}
+
+Link::Link() : in_(NULL), out_(NULL), poisoned_(true) {}
+
+void Link::Init(const ChainPosition &position) {
+ UTIL_THROW_IF(in_, util::Exception, "Link::Init twice");
+ in_ = position.in_;
+ out_ = position.out_;
+ poisoned_ = false;
+ progress_ = position.progress_;
+ in_->Consume(current_);
+}
+
+Link::Link(const ChainPosition &position) : in_(NULL) {
+ Init(position);
+}
+
+Link::~Link() {
+ if (current_) {
+ // Probably an exception unwinding.
+ std::cerr << "Last input should have been poison." << std::endl;
+ abort();
+ } else {
+ if (!poisoned_) {
+ // Poison is a block whose memory pointer is NULL.
+ //
+ // Because we're in the else block,
+ // we know that the memory pointer of current_ is NULL.
+ //
+ // Pass the current (poison) block!
+ out_->Produce(current_);
+ }
+ }
+}
+
+Link &Link::operator++() {
+ assert(current_);
+ progress_ += current_.ValidSize();
+ out_->Produce(current_);
+ in_->Consume(current_);
+ if (!current_) {
+ poisoned_ = true;
+ out_->Produce(current_);
+ }
+ return *this;
+}
+
+void Link::Poison() {
+ assert(!poisoned_);
+ current_.SetToPoison();
+ out_->Produce(current_);
+ poisoned_ = true;
+}
+
+} // namespace stream
+} // namespace util
diff --git a/src/kenlm/util/stream/chain.hh b/src/kenlm/util/stream/chain.hh
new file mode 100644
index 0000000..2969822
--- /dev/null
+++ b/src/kenlm/util/stream/chain.hh
@@ -0,0 +1,347 @@
+#ifndef UTIL_STREAM_CHAIN_H
+#define UTIL_STREAM_CHAIN_H
+
+#include "util/stream/block.hh"
+#include "util/stream/config.hh"
+#include "util/stream/multi_progress.hh"
+#include "util/scoped.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/thread/thread.hpp>
+
+#include <cstddef>
+#include <cassert>
+
+namespace util {
+template <class T> class PCQueue;
+namespace stream {
+
+class ChainConfigException : public Exception {
+ public:
+ ChainConfigException() throw();
+ ~ChainConfigException() throw();
+};
+
+class Chain;
+class RewindableStream;
+
+/**
+ * Encapsulates a @ref PCQueue "producer queue" and a @ref PCQueue "consumer queue" within a @ref Chain "chain".
+ *
+ * Specifies position in chain for Link constructor.
+ */
+class ChainPosition {
+ public:
+ const Chain &GetChain() const { return *chain_; }
+ private:
+ friend class Chain;
+ friend class Link;
+ friend class RewindableStream;
+ ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress)
+ : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
+
+ PCQueue<Block> *in_, *out_;
+
+ Chain *chain_;
+
+ WorkerProgress progress_;
+};
+
+
+/**
+ * Encapsulates a worker thread processing data at a given position in the chain.
+ *
+ * Each instance of this class owns one boost thread in which the worker is Run().
+ */
+class Thread {
+ public:
+
+ /**
+ * Constructs a new Thread in which the provided Worker is Run().
+ *
+ * Position is usually ChainPosition but if there are multiple streams involved, this can be ChainPositions.
+ *
+ * After a call to this constructor, the provided worker will be running within a boost thread owned by the newly constructed Thread object.
+ */
+ template <class Position, class Worker> Thread(const Position &position, const Worker &worker)
+ : thread_(boost::ref(*this), position, worker) {}
+
+ ~Thread();
+
+ /**
+ * Launches the provided worker in this object's boost thread.
+ *
+ * This method is called automatically by this class's @ref Thread() "constructor".
+ */
+ template <class Position, class Worker> void operator()(const Position &position, Worker &worker) {
+// try {
+ worker.Run(position);
+// } catch (const std::exception &e) {
+// UnhandledException(e);
+// }
+ }
+
+ private:
+ void UnhandledException(const std::exception &e);
+
+ boost::thread thread_;
+};
+
+/**
+ * This resets blocks to full valid size. Used to close the loop in Chain by recycling blocks.
+ */
+class Recycler {
+ public:
+ /**
+ * Resets the blocks in the chain such that the blocks' respective valid sizes match the chain's block size.
+ *
+ * @see Block::SetValidSize()
+ * @see Chain::BlockSize()
+ */
+ void Run(const ChainPosition &position);
+};
+
+extern const Recycler kRecycle;
+class WriteAndRecycle;
+class PWriteAndRecycle;
+
+/**
+ * Represents a sequence of workers, through which @ref Block "blocks" can pass.
+ */
+class Chain {
+ private:
+ template <class T, void (T::*ptr)(const ChainPosition &) = &T::Run> struct CheckForRun {
+ typedef Chain type;
+ };
+
+ public:
+
+ /**
+ * Constructs a configured Chain.
+ *
+ * @param config Specifies how to configure the Chain.
+ */
+ explicit Chain(const ChainConfig &config);
+
+ /**
+ * Destructs a Chain.
+ *
+ * This method waits for the chain's threads to complete,
+ * and frees the memory held by this chain.
+ */
+ ~Chain();
+
+ void ActivateProgress() {
+ assert(!Running());
+ progress_.Activate();
+ }
+
+ void SetProgressTarget(uint64_t target) {
+ progress_.SetTarget(target);
+ }
+
+ /**
+ * Gets the number of bytes in each record of a Block.
+ *
+ * @see ChainConfig::entry_size
+ */
+ std::size_t EntrySize() const {
+ return config_.entry_size;
+ }
+
+ /**
+ * Gets the inital @ref Block::ValidSize "valid size" for @ref Block "blocks" in this chain.
+ *
+ * @see Block::ValidSize
+ */
+ std::size_t BlockSize() const {
+ return block_size_;
+ }
+
+ /**
+ * Number of blocks going through the Chain.
+ */
+ std::size_t BlockCount() const {
+ return config_.block_count;
+ }
+
+ /** Two ways to add to the chain: Add() or operator>>. */
+ ChainPosition Add();
+
+ /**
+ * Adds a new worker to this chain,
+ * and runs that worker in a new Thread owned by this chain.
+ *
+ * The worker must have a Run method that accepts a position argument.
+ *
+ * @see Thread::operator()()
+ */
+ template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
+ assert(!complete_called_);
+ threads_.push_back(new Thread(Add(), worker));
+ return *this;
+ }
+
+ /**
+ * Adds a new worker to this chain (but avoids copying that worker),
+ * and runs that worker in a new Thread owned by this chain.
+ *
+ * The worker must have a Run method that accepts a position argument.
+ *
+ * @see Thread::operator()()
+ */
+ template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
+ assert(!complete_called_);
+ threads_.push_back(new Thread(Add(), worker));
+ return *this;
+ }
+
+ // Note that Link and Stream also define operator>> outside this class.
+
+ // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.
+ void CompleteLoop() {
+ threads_.push_back(new Thread(Complete(), kRecycle));
+ }
+
+ /**
+ * Adds a Recycler worker to this chain,
+ * and runs that worker in a new Thread owned by this chain.
+ */
+ Chain &operator>>(const Recycler &) {
+ CompleteLoop();
+ return *this;
+ }
+
+ /**
+ * Adds a WriteAndRecycle worker to this chain,
+ * and runs that worker in a new Thread owned by this chain.
+ */
+ Chain &operator>>(const WriteAndRecycle &writer);
+ Chain &operator>>(const PWriteAndRecycle &writer);
+
+ // Chains are reusable. Call Wait to wait for everything to finish and free memory.
+ void Wait(bool release_memory = true);
+
+ // Waits for the current chain to complete (if any) then starts again.
+ void Start();
+
+ bool Running() const { return !queues_.empty(); }
+
+ private:
+ ChainPosition Complete();
+
+ ChainConfig config_;
+
+ std::size_t block_size_;
+
+ scoped_malloc memory_;
+
+ boost::ptr_vector<PCQueue<Block> > queues_;
+
+ bool complete_called_;
+
+ boost::ptr_vector<Thread> threads_;
+
+ MultiProgress progress_;
+};
+
+// Create the link in the worker thread using the position token.
+/**
+ * Represents a C++ style iterator over @ref Block "blocks".
+ */
+class Link {
+ public:
+
+ // Either default construct and Init or just construct all at once.
+
+ /**
+ * Constructs an @ref Init "initialized" link.
+ *
+ * @see Init
+ */
+ explicit Link(const ChainPosition &position);
+
+ /**
+ * Constructs a link that must subsequently be @ref Init "initialized".
+ *
+ * @see Init
+ */
+ Link();
+
+ /**
+ * Initializes the link with the input @ref PCQueue "consumer queue" and output @ref PCQueue "producer queue" at a given @ref ChainPosition "position" in the @ref Chain "chain".
+ *
+ * @see Link()
+ */
+ void Init(const ChainPosition &position);
+
+ /**
+ * Destructs the link object.
+ *
+ * If necessary, this method will pass a poison block
+ * to this link's output @ref PCQueue "producer queue".
+ *
+ * @see Block::SetToPoison()
+ */
+ ~Link();
+
+ /**
+ * Gets a reference to the @ref Block "block" at this link.
+ */
+ Block &operator*() { return current_; }
+
+ /**
+ * Gets a const reference to the @ref Block "block" at this link.
+ */
+ const Block &operator*() const { return current_; }
+
+ /**
+ * Gets a pointer to the @ref Block "block" at this link.
+ */
+ Block *operator->() { return ¤t_; }
+
+ /**
+ * Gets a const pointer to the @ref Block "block" at this link.
+ */
+ const Block *operator->() const { return ¤t_; }
+
+ /**
+ * Gets the link at the next @ref ChainPosition "position" in the @ref Chain "chain".
+ */
+ Link &operator++();
+
+ /**
+ * Returns true if the @ref Block "block" at this link encapsulates a valid (non-NULL) block of memory.
+ *
+ * This method is a user-defined implicit conversion function to boolean;
+ * among other things, this method enables bare instances of this class
+ * to be used as the condition of an if statement.
+ */
+ operator bool() const { return current_; }
+
+ /**
+ * @ref Block::SetToPoison() "Poisons" the @ref Block "block" at this link,
+ * and passes this now-poisoned block to this link's output @ref PCQueue "producer queue".
+ *
+ * @see Block::SetToPoison()
+ */
+ void Poison();
+
+ private:
+ Block current_;
+ PCQueue<Block> *in_, *out_;
+
+ bool poisoned_;
+
+ WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, Link &link) {
+ link.Init(chain.Add());
+ return chain;
+}
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_CHAIN_H
diff --git a/src/kenlm/util/stream/config.hh b/src/kenlm/util/stream/config.hh
new file mode 100644
index 0000000..e94cf34
--- /dev/null
+++ b/src/kenlm/util/stream/config.hh
@@ -0,0 +1,63 @@
+#ifndef UTIL_STREAM_CONFIG_H
+#define UTIL_STREAM_CONFIG_H
+
+#include <cstddef>
+#include <string>
+
+namespace util { namespace stream {
+
+/**
+ * Represents how a chain should be configured.
+ */
+struct ChainConfig {
+
+ /** Constructs an configuration with underspecified (or default) parameters. */
+ ChainConfig() {}
+
+ /**
+ * Constructs a chain configuration object.
+ *
+ * @param [in] in_entry_size Number of bytes in each record.
+ * @param [in] in_block_count Number of blocks in the chain.
+ * @param [in] in_total_memory Total number of bytes available to the chain.
+ * This value will be divided amongst the blocks in the chain.
+ */
+ ChainConfig(std::size_t in_entry_size, std::size_t in_block_count, std::size_t in_total_memory)
+ : entry_size(in_entry_size), block_count(in_block_count), total_memory(in_total_memory) {}
+
+ /**
+ * Number of bytes in each record.
+ */
+ std::size_t entry_size;
+
+ /**
+ * Number of blocks in the chain.
+ */
+ std::size_t block_count;
+
+ /**
+ * Total number of bytes available to the chain.
+ * This value will be divided amongst the blocks in the chain.
+ * Chain's constructor will make this a multiple of entry_size.
+ */
+ std::size_t total_memory;
+};
+
+
+/**
+ * Represents how a sorter should be configured.
+ */
+struct SortConfig {
+
+ /** Filename prefix where temporary files should be placed. */
+ std::string temp_prefix;
+
+ /** Size of each input/output buffer. */
+ std::size_t buffer_size;
+
+ /** Total memory to use when running alone. */
+ std::size_t total_memory;
+};
+
+}} // namespaces
+#endif // UTIL_STREAM_CONFIG_H
diff --git a/src/kenlm/util/stream/count_records.cc b/src/kenlm/util/stream/count_records.cc
new file mode 100644
index 0000000..bdadad7
--- /dev/null
+++ b/src/kenlm/util/stream/count_records.cc
@@ -0,0 +1,12 @@
+#include "util/stream/count_records.hh"
+#include "util/stream/chain.hh"
+
+namespace util { namespace stream {
+
+void CountRecords::Run(const ChainPosition &position) {
+ for (Link link(position); link; ++link) {
+ *count_ += link->ValidSize() / position.GetChain().EntrySize();
+ }
+}
+
+}} // namespaces
diff --git a/src/kenlm/util/stream/count_records.hh b/src/kenlm/util/stream/count_records.hh
new file mode 100644
index 0000000..e3f7c94
--- /dev/null
+++ b/src/kenlm/util/stream/count_records.hh
@@ -0,0 +1,20 @@
+#include <stdint.h>
+
+namespace util { namespace stream {
+
+class ChainPosition;
+
+class CountRecords {
+ public:
+ explicit CountRecords(uint64_t *out)
+ : count_(out) {
+ *count_ = 0;
+ }
+
+ void Run(const ChainPosition &position);
+
+ private:
+ uint64_t *count_;
+};
+
+}} // namespaces
diff --git a/src/kenlm/util/stream/io.cc b/src/kenlm/util/stream/io.cc
new file mode 100644
index 0000000..c272d77
--- /dev/null
+++ b/src/kenlm/util/stream/io.cc
@@ -0,0 +1,78 @@
+#include "util/stream/io.hh"
+
+#include "util/file.hh"
+#include "util/stream/chain.hh"
+
+#include <cstddef>
+
+namespace util {
+namespace stream {
+
+ReadSizeException::ReadSizeException() throw() {}
+ReadSizeException::~ReadSizeException() throw() {}
+
+void Read::Run(const ChainPosition &position) {
+ const std::size_t block_size = position.GetChain().BlockSize();
+ const std::size_t entry_size = position.GetChain().EntrySize();
+ for (Link link(position); link; ++link) {
+ std::size_t got = util::ReadOrEOF(file_, link->Get(), block_size);
+ UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << ".");
+ if (got == 0) {
+ link.Poison();
+ return;
+ } else {
+ link->SetValidSize(got);
+ }
+ }
+}
+
+void PRead::Run(const ChainPosition &position) {
+ scoped_fd owner;
+ if (own_) owner.reset(file_);
+ const uint64_t size = SizeOrThrow(file_);
+ UTIL_THROW_IF(size % static_cast<uint64_t>(position.GetChain().EntrySize()), ReadSizeException, "File size " << file_ << " size is " << size << " not a multiple of " << position.GetChain().EntrySize());
+ const std::size_t block_size = position.GetChain().BlockSize();
+ const uint64_t block_size64 = static_cast<uint64_t>(block_size);
+ Link link(position);
+ uint64_t offset = 0;
+ for (; offset + block_size64 < size; offset += block_size64, ++link) {
+ ErsatzPRead(file_, link->Get(), block_size, offset);
+ link->SetValidSize(block_size);
+ }
+ // size - offset is <= block_size, so it casts to 32-bit fine.
+ if (size - offset) {
+ ErsatzPRead(file_, link->Get(), size - offset, offset);
+ link->SetValidSize(size - offset);
+ ++link;
+ }
+ link.Poison();
+}
+
+void Write::Run(const ChainPosition &position) {
+ for (Link link(position); link; ++link) {
+ WriteOrThrow(file_, link->Get(), link->ValidSize());
+ }
+}
+
+void WriteAndRecycle::Run(const ChainPosition &position) {
+ const std::size_t block_size = position.GetChain().BlockSize();
+ for (Link link(position); link; ++link) {
+ WriteOrThrow(file_, link->Get(), link->ValidSize());
+ link->SetValidSize(block_size);
+ }
+}
+
+void PWriteAndRecycle::Run(const ChainPosition &position) {
+ const std::size_t block_size = position.GetChain().BlockSize();
+ uint64_t offset = 0;
+ for (Link link(position); link; ++link) {
+ ErsatzPWrite(file_, link->Get(), link->ValidSize(), offset);
+ offset += link->ValidSize();
+ link->SetValidSize(block_size);
+ }
+ // Trim file to size.
+ util::ResizeOrThrow(file_, offset);
+}
+
+} // namespace stream
+} // namespace util
diff --git a/src/kenlm/util/stream/io.hh b/src/kenlm/util/stream/io.hh
new file mode 100644
index 0000000..4605a8a
--- /dev/null
+++ b/src/kenlm/util/stream/io.hh
@@ -0,0 +1,87 @@
+#ifndef UTIL_STREAM_IO_H
+#define UTIL_STREAM_IO_H
+
+#include "util/exception.hh"
+#include "util/file.hh"
+
+namespace util {
+namespace stream {
+
+class ChainPosition;
+
+class ReadSizeException : public util::Exception {
+ public:
+ ReadSizeException() throw();
+ ~ReadSizeException() throw();
+};
+
+class Read {
+ public:
+ explicit Read(int fd) : file_(fd) {}
+ void Run(const ChainPosition &position);
+ private:
+ int file_;
+};
+
+// Like read but uses pread so that the file can be accessed from multiple threads.
+class PRead {
+ public:
+ explicit PRead(int fd, bool take_own = false) : file_(fd), own_(take_own) {}
+ void Run(const ChainPosition &position);
+ private:
+ int file_;
+ bool own_;
+};
+
+class Write {
+ public:
+ explicit Write(int fd) : file_(fd) {}
+ void Run(const ChainPosition &position);
+ private:
+ int file_;
+};
+
+// It's a common case that stuff is written and then recycled. So rather than
+// spawn another thread to Recycle, this combines the two roles.
+class WriteAndRecycle {
+ public:
+ explicit WriteAndRecycle(int fd) : file_(fd) {}
+ void Run(const ChainPosition &position);
+ private:
+ int file_;
+};
+
+class PWriteAndRecycle {
+ public:
+ explicit PWriteAndRecycle(int fd) : file_(fd) {}
+ void Run(const ChainPosition &position);
+ private:
+ int file_;
+};
+
+
+// Reuse the same file over and over again to buffer output.
+class FileBuffer {
+ public:
+ explicit FileBuffer(int fd) : file_(fd) {}
+
+ PWriteAndRecycle Sink() const {
+ util::SeekOrThrow(file_.get(), 0);
+ return PWriteAndRecycle(file_.get());
+ }
+
+ PRead Source(bool discard = false) {
+ return PRead(discard ? file_.release() : file_.get(), discard);
+ }
+
+ uint64_t Size() const {
+ return SizeOrThrow(file_.get());
+ }
+
+ private:
+ scoped_fd file_;
+};
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_IO_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/io_test.cc b/src/kenlm/util/stream/io_test.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/stream/io_test.cc
rename to src/kenlm/util/stream/io_test.cc
diff --git a/src/kenlm/util/stream/line_input.cc b/src/kenlm/util/stream/line_input.cc
new file mode 100644
index 0000000..0ad8800
--- /dev/null
+++ b/src/kenlm/util/stream/line_input.cc
@@ -0,0 +1,52 @@
+#include "util/stream/line_input.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+#include "util/stream/chain.hh"
+
+#include <algorithm>
+#include <vector>
+
+namespace util { namespace stream {
+
+void LineInput::Run(const ChainPosition &position) {
+ ReadCompressed reader(fd_);
+ // Holding area for beginning of line to be placed in next block.
+ std::vector<char> carry;
+
+ for (Link block(position); ; ++block) {
+ char *to = static_cast<char*>(block->Get());
+ char *begin = to;
+ char *end = to + position.GetChain().BlockSize();
+ std::copy(carry.begin(), carry.end(), to);
+ to += carry.size();
+ while (to != end) {
+ std::size_t got = reader.Read(to, end - to);
+ if (!got) {
+ // EOF
+ block->SetValidSize(to - begin);
+ ++block;
+ block.Poison();
+ return;
+ }
+ to += got;
+ }
+
+ // Find the last newline.
+ char *newline;
+ for (newline = to - 1; ; --newline) {
+ UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ". Is this a text file?");
+ if (*newline == '\n') break;
+ }
+
+ // Copy everything after the last newline to the carry.
+ carry.clear();
+ carry.resize(to - (newline + 1));
+ std::copy(newline + 1, to, &*carry.begin());
+
+ block->SetValidSize(newline + 1 - begin);
+ }
+}
+
+}} // namespaces
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/line_input.hh b/src/kenlm/util/stream/line_input.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/stream/line_input.hh
rename to src/kenlm/util/stream/line_input.hh
diff --git a/src/kenlm/util/stream/multi_progress.cc b/src/kenlm/util/stream/multi_progress.cc
new file mode 100644
index 0000000..59750f5
--- /dev/null
+++ b/src/kenlm/util/stream/multi_progress.cc
@@ -0,0 +1,86 @@
+#include "util/stream/multi_progress.hh"
+
+// TODO: merge some functionality with the simple progress bar?
+#include "util/ersatz_progress.hh"
+
+#include <iostream>
+#include <limits>
+
+#include <cstring>
+
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <unistd.h>
+#endif
+
+namespace util { namespace stream {
+
+namespace {
+const char kDisplayCharacters[] = "-+*#0123456789";
+
+uint64_t Next(unsigned char stone, uint64_t complete) {
+ return (static_cast<uint64_t>(stone + 1) * complete + MultiProgress::kWidth - 1) / MultiProgress::kWidth;
+}
+
+} // namespace
+
+MultiProgress::MultiProgress() : active_(false), complete_(std::numeric_limits<uint64_t>::max()), character_handout_(0) {}
+
+MultiProgress::~MultiProgress() {
+ if (active_ && complete_ != std::numeric_limits<uint64_t>::max())
+ std::cerr << '\n';
+}
+
+void MultiProgress::Activate() {
+ active_ =
+#if !defined(_WIN32) && !defined(_WIN64)
+ // Is stderr a terminal?
+ (isatty(2) == 1)
+#else
+ true
+#endif
+ ;
+}
+
+void MultiProgress::SetTarget(uint64_t complete) {
+ if (!active_) return;
+ complete_ = complete;
+ if (!complete) complete_ = 1;
+ memset(display_, 0, sizeof(display_));
+ character_handout_ = 0;
+ std::cerr << kProgressBanner;
+}
+
+WorkerProgress MultiProgress::Add() {
+ if (!active_)
+ return WorkerProgress(std::numeric_limits<uint64_t>::max(), *this, '\0');
+ std::size_t character_index;
+ {
+ boost::unique_lock<boost::mutex> lock(mutex_);
+ character_index = character_handout_++;
+ if (character_handout_ == sizeof(kDisplayCharacters) - 1)
+ character_handout_ = 0;
+ }
+ return WorkerProgress(Next(0, complete_), *this, kDisplayCharacters[character_index]);
+}
+
+void MultiProgress::Finished() {
+ if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+ std::cerr << '\n';
+ complete_ = std::numeric_limits<uint64_t>::max();
+}
+
+void MultiProgress::Milestone(WorkerProgress &worker) {
+ if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+ unsigned char stone = std::min(static_cast<uint64_t>(kWidth), worker.current_ * kWidth / complete_);
+ for (char *i = &display_[worker.stone_]; i < &display_[stone]; ++i) {
+ *i = worker.character_;
+ }
+ worker.next_ = Next(stone, complete_);
+ worker.stone_ = stone;
+ {
+ boost::unique_lock<boost::mutex> lock(mutex_);
+ std::cerr << '\r' << display_ << std::flush;
+ }
+}
+
+}} // namespaces
diff --git a/src/kenlm/util/stream/multi_progress.hh b/src/kenlm/util/stream/multi_progress.hh
new file mode 100644
index 0000000..f9e6423
--- /dev/null
+++ b/src/kenlm/util/stream/multi_progress.hh
@@ -0,0 +1,89 @@
+/* Progress bar suitable for chains of workers */
+#ifndef UTIL_STREAM_MULTI_PROGRESS_H
+#define UTIL_STREAM_MULTI_PROGRESS_H
+
+#include <boost/thread/mutex.hpp>
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util { namespace stream {
+
+class WorkerProgress;
+
+class MultiProgress {
+ public:
+ static const unsigned char kWidth = 100;
+
+ MultiProgress();
+
+ ~MultiProgress();
+
+ // Turns on showing (requires SetTarget too).
+ void Activate();
+
+ void SetTarget(uint64_t complete);
+
+ WorkerProgress Add();
+
+ void Finished();
+
+ private:
+ friend class WorkerProgress;
+ void Milestone(WorkerProgress &worker);
+
+ bool active_;
+
+ uint64_t complete_;
+
+ boost::mutex mutex_;
+
+ // \0 at the end.
+ char display_[kWidth + 1];
+
+ std::size_t character_handout_;
+
+ MultiProgress(const MultiProgress &);
+ MultiProgress &operator=(const MultiProgress &);
+};
+
+class WorkerProgress {
+ public:
+ // Default contrutor must be initialized with operator= later.
+ WorkerProgress() : parent_(NULL) {}
+
+ // Not threadsafe for the same worker by default.
+ WorkerProgress &operator++() {
+ if (++current_ >= next_) {
+ parent_->Milestone(*this);
+ }
+ return *this;
+ }
+
+ WorkerProgress &operator+=(uint64_t amount) {
+ current_ += amount;
+ if (current_ >= next_) {
+ parent_->Milestone(*this);
+ }
+ return *this;
+ }
+
+ private:
+ friend class MultiProgress;
+ WorkerProgress(uint64_t next, MultiProgress &parent, char character)
+ : current_(0), next_(next), parent_(&parent), stone_(0), character_(character) {}
+
+ uint64_t current_, next_;
+
+ MultiProgress *parent_;
+
+ // Previous milestone reached.
+ unsigned char stone_;
+
+ // Character to display in bar.
+ char character_;
+};
+
+}} // namespaces
+
+#endif // UTIL_STREAM_MULTI_PROGRESS_H
diff --git a/src/kenlm/util/stream/multi_stream.hh b/src/kenlm/util/stream/multi_stream.hh
new file mode 100644
index 0000000..6381fc2
--- /dev/null
+++ b/src/kenlm/util/stream/multi_stream.hh
@@ -0,0 +1,124 @@
+#ifndef UTIL_STREAM_MULTI_STREAM_H
+#define UTIL_STREAM_MULTI_STREAM_H
+
+#include "util/fixed_array.hh"
+#include "util/scoped.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/stream.hh"
+
+#include <cstddef>
+#include <new>
+
+#include <cassert>
+#include <cstdlib>
+
+namespace util { namespace stream {
+
+class Chains;
+
+class ChainPositions : public util::FixedArray<util::stream::ChainPosition> {
+ public:
+ ChainPositions() {}
+
+ explicit ChainPositions(std::size_t bound) :
+ util::FixedArray<util::stream::ChainPosition>(bound) {}
+
+ void Init(Chains &chains);
+
+ explicit ChainPositions(Chains &chains) {
+ Init(chains);
+ }
+};
+
+class Chains : public util::FixedArray<util::stream::Chain> {
+ private:
+ template <class T, void (T::*ptr)(const ChainPositions &) = &T::Run> struct CheckForRun {
+ typedef Chains type;
+ };
+
+ public:
+ // Must call Init.
+ Chains() {}
+
+ explicit Chains(std::size_t limit) : util::FixedArray<util::stream::Chain>(limit) {}
+
+ template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
+ threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
+ return *this;
+ }
+
+ template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
+ threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
+ return *this;
+ }
+
+ Chains &operator>>(const util::stream::Recycler &recycler) {
+ for (util::stream::Chain *i = begin(); i != end(); ++i)
+ *i >> recycler;
+ return *this;
+ }
+
+ void Wait(bool release_memory = true) {
+ threads_.clear();
+ for (util::stream::Chain *i = begin(); i != end(); ++i) {
+ i->Wait(release_memory);
+ }
+ }
+
+ private:
+ boost::ptr_vector<util::stream::Thread> threads_;
+
+ Chains(const Chains &);
+ void operator=(const Chains &);
+};
+
+inline void ChainPositions::Init(Chains &chains) {
+ util::FixedArray<util::stream::ChainPosition>::Init(chains.size());
+ for (util::stream::Chain *i = chains.begin(); i != chains.end(); ++i) {
+ // use "placement new" syntax to initalize ChainPosition in an already-allocated memory location
+ new (end()) util::stream::ChainPosition(i->Add()); Constructed();
+ }
+}
+
+inline Chains &operator>>(Chains &chains, ChainPositions &positions) {
+ positions.Init(chains);
+ return chains;
+}
+
+template <class T> class GenericStreams : public util::FixedArray<T> {
+ private:
+ typedef util::FixedArray<T> P;
+ public:
+ GenericStreams() {}
+
+ // Limit restricts to positions[0,limit)
+ void Init(const ChainPositions &positions, std::size_t limit) {
+ P::Init(limit);
+ for (const util::stream::ChainPosition *i = positions.begin(); i != positions.begin() + limit; ++i) {
+ P::push_back(*i);
+ }
+ }
+ void Init(const ChainPositions &positions) {
+ Init(positions, positions.size());
+ }
+
+ GenericStreams(const ChainPositions &positions) {
+ Init(positions);
+ }
+
+ void Init(std::size_t amount) {
+ P::Init(amount);
+ }
+};
+
+template <class T> inline Chains &operator>>(Chains &chains, GenericStreams<T> &streams) {
+ ChainPositions positions;
+ chains >> positions;
+ streams.Init(positions);
+ return chains;
+}
+
+typedef GenericStreams<Stream> Streams;
+
+}} // namespaces
+#endif // UTIL_STREAM_MULTI_STREAM_H
diff --git a/src/kenlm/util/stream/rewindable_stream.cc b/src/kenlm/util/stream/rewindable_stream.cc
new file mode 100644
index 0000000..726e2a7
--- /dev/null
+++ b/src/kenlm/util/stream/rewindable_stream.cc
@@ -0,0 +1,134 @@
+#include "util/stream/rewindable_stream.hh"
+#include "util/pcqueue.hh"
+
+#include <iostream>
+
+namespace util {
+namespace stream {
+
+RewindableStream::RewindableStream()
+ : current_(NULL), in_(NULL), out_(NULL), poisoned_(true) {
+ // nothing
+}
+
+void RewindableStream::Init(const ChainPosition &position) {
+ UTIL_THROW_IF2(in_, "RewindableStream::Init twice");
+ in_ = position.in_;
+ out_ = position.out_;
+ hit_poison_ = false;
+ poisoned_ = false;
+ progress_ = position.progress_;
+ entry_size_ = position.GetChain().EntrySize();
+ block_size_ = position.GetChain().BlockSize();
+ block_count_ = position.GetChain().BlockCount();
+ blocks_it_ = 0;
+ marked_ = NULL;
+ UTIL_THROW_IF2(block_count_ < 2, "RewindableStream needs block_count at least two");
+ AppendBlock();
+}
+
+RewindableStream &RewindableStream::operator++() {
+ assert(*this);
+ assert(current_ < block_end_);
+ assert(current_);
+ assert(blocks_it_ < blocks_.size());
+ current_ += entry_size_;
+ if (UTIL_UNLIKELY(current_ == block_end_)) {
+ // Fetch another block if necessary.
+ if (++blocks_it_ == blocks_.size()) {
+ if (!marked_) {
+ Flush(blocks_.begin() + blocks_it_);
+ blocks_it_ = 0;
+ }
+ AppendBlock();
+ assert(poisoned_ || (blocks_it_ == blocks_.size() - 1));
+ if (poisoned_) return *this;
+ }
+ Block &cur_block = blocks_[blocks_it_];
+ current_ = static_cast<uint8_t*>(cur_block.Get());
+ block_end_ = current_ + cur_block.ValidSize();
+ }
+ assert(current_);
+ assert(current_ >= static_cast<uint8_t*>(blocks_[blocks_it_].Get()));
+ assert(current_ < block_end_);
+ assert(block_end_ == blocks_[blocks_it_].ValidEnd());
+ return *this;
+}
+
+void RewindableStream::Mark() {
+ marked_ = current_;
+ Flush(blocks_.begin() + blocks_it_);
+ blocks_it_ = 0;
+}
+
+void RewindableStream::Rewind() {
+ if (current_ != marked_) {
+ poisoned_ = false;
+ }
+ blocks_it_ = 0;
+ current_ = marked_;
+ block_end_ = static_cast<const uint8_t*>(blocks_[blocks_it_].ValidEnd());
+
+ assert(current_);
+ assert(current_ >= static_cast<uint8_t*>(blocks_[blocks_it_].Get()));
+ assert(current_ < block_end_);
+ assert(block_end_ == blocks_[blocks_it_].ValidEnd());
+}
+
+void RewindableStream::Poison() {
+ if (blocks_.empty()) return;
+ assert(*this);
+ assert(blocks_it_ == blocks_.size() - 1);
+
+ // Produce all buffered blocks.
+ blocks_.back().SetValidSize(current_ - static_cast<uint8_t*>(blocks_.back().Get()));
+ Flush(blocks_.end());
+ blocks_it_ = 0;
+
+ Block poison;
+ if (!hit_poison_) {
+ in_->Consume(poison);
+ }
+ poison.SetToPoison();
+ out_->Produce(poison);
+ hit_poison_ = true;
+ poisoned_ = true;
+}
+
+void RewindableStream::AppendBlock() {
+ if (UTIL_UNLIKELY(blocks_.size() >= block_count_)) {
+ std::cerr << "RewindableStream trying to use more blocks than available" << std::endl;
+ abort();
+ }
+ if (UTIL_UNLIKELY(hit_poison_)) {
+ poisoned_ = true;
+ return;
+ }
+ Block get;
+ // The loop is needed since it is *feasible* that we're given 0 sized but
+ // valid blocks
+ do {
+ in_->Consume(get);
+ if (UTIL_LIKELY(get)) {
+ blocks_.push_back(get);
+ } else {
+ hit_poison_ = true;
+ poisoned_ = true;
+ return;
+ }
+ } while (UTIL_UNLIKELY(get.ValidSize() == 0));
+ current_ = static_cast<uint8_t*>(blocks_.back().Get());
+ block_end_ = static_cast<const uint8_t*>(blocks_.back().ValidEnd());
+ blocks_it_ = blocks_.size() - 1;
+}
+
+void RewindableStream::Flush(std::deque<Block>::iterator to) {
+ for (std::deque<Block>::iterator i = blocks_.begin(); i != to; ++i) {
+ out_->Produce(*i);
+ progress_ += i->ValidSize();
+ }
+ blocks_.erase(blocks_.begin(), to);
+}
+
+}
+}
diff --git a/src/kenlm/util/stream/rewindable_stream.hh b/src/kenlm/util/stream/rewindable_stream.hh
new file mode 100644
index 0000000..560825c
--- /dev/null
+++ b/src/kenlm/util/stream/rewindable_stream.hh
@@ -0,0 +1,132 @@
+#ifndef UTIL_STREAM_REWINDABLE_STREAM_H
+#define UTIL_STREAM_REWINDABLE_STREAM_H
+
+#include "util/stream/chain.hh"
+
+#include <boost/noncopyable.hpp>
+
+#include <deque>
+
+namespace util {
+namespace stream {
+
+/**
+ * A RewindableStream is like a Stream (but one that is only used for
+ * creating input at the start of a chain) except that it can be rewound to
+ * be able to re-write a part of the stream before it is sent. Rewinding
+ * has a limit of 2 * block_size_ - 1 in distance (it does *not* buffer an
+ * entire stream into memory, only a maximum of 2 * block_size_).
+ */
+class RewindableStream : boost::noncopyable {
+ public:
+ /**
+ * Creates an uninitialized RewindableStream. You **must** call Init()
+ * on it later!
+ */
+ RewindableStream();
+
+ ~RewindableStream() {
+ Poison();
+ }
+
+ /**
+ * Initializes an existing RewindableStream at a specific position in
+ * a Chain.
+ *
+ * @param position The position in the chain to get input from and
+ * produce output on
+ */
+ void Init(const ChainPosition &position);
+
+ /**
+ * Constructs a RewindableStream at a specific position in a Chain all
+ * in one step.
+ *
+ * Equivalent to RewindableStream a(); a.Init(....);
+ */
+ explicit RewindableStream(const ChainPosition &position)
+ : in_(NULL) {
+ Init(position);
+ }
+
+ /**
+ * Gets the record at the current stream position. Const version.
+ */
+ const void *Get() const {
+ assert(!poisoned_);
+ assert(current_);
+ return current_;
+ }
+
+ /**
+ * Gets the record at the current stream position.
+ */
+ void *Get() {
+ assert(!poisoned_);
+ assert(current_);
+ return current_;
+ }
+
+ operator bool() const { return !poisoned_; }
+
+ bool operator!() const { return poisoned_; }
+
+ /**
+ * Marks the current position in the stream to be rewound to later.
+ * Note that you can only rewind back as far as 2 * block_size_ - 1!
+ */
+ void Mark();
+
+ /**
+ * Rewinds the stream back to the marked position. This will throw an
+ * exception if the marked position is too far away.
+ */
+ void Rewind();
+
+ /**
+ * Moves the stream forward to the next record. This internally may
+ * buffer a block for the purposes of rewinding.
+ */
+ RewindableStream& operator++();
+
+ /**
+ * Poisons the stream. This sends any buffered blocks down the chain
+ * and sends a poison block as well (sending at most 2 non-poison and 1
+ * poison block).
+ */
+ void Poison();
+
+ private:
+ void AppendBlock();
+
+ void Flush(std::deque<Block>::iterator to);
+
+ std::deque<Block> blocks_;
+ // current_ is in blocks_[blocks_it_] unless poisoned_.
+ std::size_t blocks_it_;
+
+ std::size_t entry_size_;
+ std::size_t block_size_;
+ std::size_t block_count_;
+
+ uint8_t *marked_, *current_;
+ const uint8_t *block_end_;
+
+ PCQueue<Block> *in_, *out_;
+
+ // Have we hit poison at the end of the stream, even if rewinding?
+ bool hit_poison_;
+ // Is the curren position poison?
+ bool poisoned_;
+
+ WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, RewindableStream &stream) {
+ stream.Init(chain.Add());
+ return chain;
+}
+
+}
+}
+#endif
diff --git a/src/kenlm/util/stream/rewindable_stream_test.cc b/src/kenlm/util/stream/rewindable_stream_test.cc
new file mode 100644
index 0000000..f8924c3
--- /dev/null
+++ b/src/kenlm/util/stream/rewindable_stream_test.cc
@@ -0,0 +1,41 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/rewindable_stream.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE RewindableStreamTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace stream {
+namespace {
+
+BOOST_AUTO_TEST_CASE(RewindableStreamTest) {
+ scoped_fd in(MakeTemp("io_test_temp"));
+ for (uint64_t i = 0; i < 100000; ++i) {
+ WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+ }
+ SeekOrThrow(in.get(), 0);
+
+ ChainConfig config;
+ config.entry_size = 8;
+ config.total_memory = 100;
+ config.block_count = 6;
+
+ Chain chain(config);
+ RewindableStream s;
+ chain >> Read(in.get()) >> s >> kRecycle;
+ uint64_t i = 0;
+ for (; s; ++s, ++i) {
+ BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get()));
+ if (100000UL - i == 2)
+ s.Mark();
+ }
+ BOOST_CHECK_EQUAL(100000ULL, i);
+ s.Rewind();
+ BOOST_CHECK_EQUAL(100000ULL - 2, *static_cast<const uint64_t*>(s.Get()));
+}
+
+}
+}
+}
diff --git a/src/kenlm/util/stream/sort.hh b/src/kenlm/util/stream/sort.hh
new file mode 100644
index 0000000..1b4801a
--- /dev/null
+++ b/src/kenlm/util/stream/sort.hh
@@ -0,0 +1,599 @@
+/* Usage:
+ * Sort<Compare> sorter(temp, compare);
+ * Chain(config) >> Read(file) >> sorter.Unsorted();
+ * Stream stream;
+ * Chain chain(config) >> sorter.Sorted(internal_config, lazy_config) >> stream;
+ *
+ * Note that sorter must outlive any threads that use Unsorted or Sorted.
+ *
+ * Combiners take the form:
+ * bool operator()(void *into, const void *option, const Compare &compare) const
+ * which returns true iff a combination happened. The sorting algorithm
+ * guarantees compare(into, option). But it does not guarantee
+ * compare(option, into).
+ * Currently, combining is only done in merge steps, not during on-the-fly
+ * sort. Use a hash table for that.
+ */
+
+#ifndef UTIL_STREAM_SORT_H
+#define UTIL_STREAM_SORT_H
+
+#include "util/stream/chain.hh"
+#include "util/stream/config.hh"
+#include "util/stream/io.hh"
+#include "util/stream/stream.hh"
+#include "util/stream/timer.hh"
+
+#include "util/file.hh"
+#include "util/fixed_array.hh"
+#include "util/scoped.hh"
+#include "util/sized_iterator.hh"
+
+#include <algorithm>
+#include <iostream>
+#include <queue>
+#include <string>
+
+namespace util {
+namespace stream {
+
+struct NeverCombine {
+ template <class Compare> bool operator()(const void *, const void *, const Compare &) const {
+ return false;
+ }
+};
+
+// Manage the offsets of sorted blocks in a file.
+class Offsets {
+ public:
+ explicit Offsets(int fd) : log_(fd) {
+ Reset();
+ }
+
+ int File() const { return log_; }
+
+ void Append(uint64_t length) {
+ if (!length) return;
+ ++block_count_;
+ if (length == cur_.length) {
+ ++cur_.run;
+ return;
+ }
+ WriteOrThrow(log_, &cur_, sizeof(Entry));
+ cur_.length = length;
+ cur_.run = 1;
+ }
+
+ void FinishedAppending() {
+ WriteOrThrow(log_, &cur_, sizeof(Entry));
+ SeekOrThrow(log_, sizeof(Entry)); // Skip 0,0 at beginning.
+ cur_.run = 0;
+ if (block_count_) {
+ ReadOrThrow(log_, &cur_, sizeof(Entry));
+ assert(cur_.length);
+ assert(cur_.run);
+ }
+ }
+
+ uint64_t RemainingBlocks() const { return block_count_; }
+
+ uint64_t TotalOffset() const { return output_sum_; }
+
+ uint64_t PeekSize() const {
+ return cur_.length;
+ }
+
+ uint64_t NextSize() {
+ assert(block_count_);
+ uint64_t ret = cur_.length;
+ output_sum_ += ret;
+
+ --cur_.run;
+ --block_count_;
+ if (!cur_.run && block_count_) {
+ ReadOrThrow(log_, &cur_, sizeof(Entry));
+ assert(cur_.length);
+ assert(cur_.run);
+ }
+ return ret;
+ }
+
+ void Reset() {
+ SeekOrThrow(log_, 0);
+ ResizeOrThrow(log_, 0);
+ cur_.length = 0;
+ cur_.run = 0;
+ block_count_ = 0;
+ output_sum_ = 0;
+ }
+
+ private:
+ int log_;
+
+ struct Entry {
+ uint64_t length;
+ uint64_t run;
+ };
+ Entry cur_;
+
+ uint64_t block_count_;
+
+ uint64_t output_sum_;
+};
+
+// A priority queue of entries backed by file buffers
+template <class Compare> class MergeQueue {
+ public:
+ MergeQueue(int fd, std::size_t buffer_size, std::size_t entry_size, const Compare &compare)
+ : queue_(Greater(compare)), in_(fd), buffer_size_(buffer_size), entry_size_(entry_size) {}
+
+ void Push(void *base, uint64_t offset, uint64_t amount) {
+ queue_.push(Entry(base, in_, offset, amount, buffer_size_));
+ }
+
+ const void *Top() const {
+ return queue_.top().Current();
+ }
+
+ void Pop() {
+ Entry top(queue_.top());
+ queue_.pop();
+ if (top.Increment(in_, buffer_size_, entry_size_))
+ queue_.push(top);
+ }
+
+ std::size_t Size() const {
+ return queue_.size();
+ }
+
+ bool Empty() const {
+ return queue_.empty();
+ }
+
+ private:
+ // Priority queue contains these entries.
+ class Entry {
+ public:
+ Entry() {}
+
+ Entry(void *base, int fd, uint64_t offset, uint64_t amount, std::size_t buf_size) {
+ offset_ = offset;
+ remaining_ = amount;
+ buffer_end_ = static_cast<uint8_t*>(base) + buf_size;
+ Read(fd, buf_size);
+ }
+
+ bool Increment(int fd, std::size_t buf_size, std::size_t entry_size) {
+ current_ += entry_size;
+ if (current_ != buffer_end_) return true;
+ return Read(fd, buf_size);
+ }
+
+ const void *Current() const { return current_; }
+
+ private:
+ bool Read(int fd, std::size_t buf_size) {
+ current_ = buffer_end_ - buf_size;
+ std::size_t amount;
+ if (static_cast<uint64_t>(buf_size) < remaining_) {
+ amount = buf_size;
+ } else if (!remaining_) {
+ return false;
+ } else {
+ amount = remaining_;
+ buffer_end_ = current_ + remaining_;
+ }
+ ErsatzPRead(fd, current_, amount, offset_);
+ offset_ += amount;
+ assert(current_ <= buffer_end_);
+ remaining_ -= amount;
+ return true;
+ }
+
+ // Buffer
+ uint8_t *current_, *buffer_end_;
+ // File
+ uint64_t remaining_, offset_;
+ };
+
+ // Wrapper comparison function for queue entries.
+ class Greater : public std::binary_function<const Entry &, const Entry &, bool> {
+ public:
+ explicit Greater(const Compare &compare) : compare_(compare) {}
+
+ bool operator()(const Entry &first, const Entry &second) const {
+ return compare_(second.Current(), first.Current());
+ }
+
+ private:
+ const Compare compare_;
+ };
+
+ typedef std::priority_queue<Entry, std::vector<Entry>, Greater> Queue;
+ Queue queue_;
+
+ const int in_;
+ const std::size_t buffer_size_;
+ const std::size_t entry_size_;
+};
+
+/* A worker object that merges. If the number of pieces to merge exceeds the
+ * arity, it outputs multiple sorted blocks, recording to out_offsets.
+ * However, users will only every see a single sorted block out output because
+ * Sort::Sorted insures the arity is higher than the number of pieces before
+ * returning this.
+ */
+template <class Compare, class Combine> class MergingReader {
+ public:
+ MergingReader(int in, Offsets *in_offsets, Offsets *out_offsets, std::size_t buffer_size, std::size_t total_memory, const Compare &compare, const Combine &combine) :
+ compare_(compare), combine_(combine),
+ in_(in),
+ in_offsets_(in_offsets), out_offsets_(out_offsets),
+ buffer_size_(buffer_size), total_memory_(total_memory) {}
+
+ void Run(const ChainPosition &position) {
+ Run(position, false);
+ }
+
+ void Run(const ChainPosition &position, bool assert_one) {
+ // Special case: nothing to read.
+ if (!in_offsets_->RemainingBlocks()) {
+ Link l(position);
+ l.Poison();
+ return;
+ }
+ // If there's just one entry, just read.
+ if (in_offsets_->RemainingBlocks() == 1) {
+ // Sequencing is important.
+ uint64_t offset = in_offsets_->TotalOffset();
+ uint64_t amount = in_offsets_->NextSize();
+ ReadSingle(offset, amount, position);
+ if (out_offsets_) out_offsets_->Append(amount);
+ return;
+ }
+
+ Stream str(position);
+ scoped_malloc buffer(MallocOrThrow(total_memory_));
+ uint8_t *const buffer_end = static_cast<uint8_t*>(buffer.get()) + total_memory_;
+
+ const std::size_t entry_size = position.GetChain().EntrySize();
+
+ while (in_offsets_->RemainingBlocks()) {
+ // Use bigger buffers if there's less remaining.
+ uint64_t per_buffer = static_cast<uint64_t>(std::max<std::size_t>(
+ buffer_size_,
+ static_cast<std::size_t>((static_cast<uint64_t>(total_memory_) / in_offsets_->RemainingBlocks()))));
+ per_buffer -= per_buffer % entry_size;
+ assert(per_buffer);
+
+ // Populate queue.
+ MergeQueue<Compare> queue(in_, per_buffer, entry_size, compare_);
+ for (uint8_t *buf = static_cast<uint8_t*>(buffer.get());
+ in_offsets_->RemainingBlocks() && (buf + std::min(per_buffer, in_offsets_->PeekSize()) <= buffer_end);) {
+ uint64_t offset = in_offsets_->TotalOffset();
+ uint64_t size = in_offsets_->NextSize();
+ queue.Push(buf, offset, size);
+ buf += static_cast<std::size_t>(std::min<uint64_t>(size, per_buffer));
+ }
+ // This shouldn't happen but it's probably better to die than loop indefinitely.
+ if (queue.Size() < 2 && in_offsets_->RemainingBlocks()) {
+ std::cerr << "Bug in sort implementation: not merging at least two stripes." << std::endl;
+ abort();
+ }
+ if (assert_one && in_offsets_->RemainingBlocks()) {
+ std::cerr << "Bug in sort implementation: should only be one merge group for lazy sort" << std::endl;
+ abort();
+ }
+
+ uint64_t written = 0;
+ // Merge including combiner support.
+ memcpy(str.Get(), queue.Top(), entry_size);
+ for (queue.Pop(); !queue.Empty(); queue.Pop()) {
+ if (!combine_(str.Get(), queue.Top(), compare_)) {
+ ++written; ++str;
+ memcpy(str.Get(), queue.Top(), entry_size);
+ }
+ }
+ ++written; ++str;
+ if (out_offsets_)
+ out_offsets_->Append(written * entry_size);
+ }
+ str.Poison();
+ }
+
+ private:
+ void ReadSingle(uint64_t offset, const uint64_t size, const ChainPosition &position) {
+ // Special case: only one to read.
+ const uint64_t end = offset + size;
+ const uint64_t block_size = position.GetChain().BlockSize();
+ Link l(position);
+ for (; offset + block_size < end; ++l, offset += block_size) {
+ ErsatzPRead(in_, l->Get(), block_size, offset);
+ l->SetValidSize(block_size);
+ }
+ ErsatzPRead(in_, l->Get(), end - offset, offset);
+ l->SetValidSize(end - offset);
+ (++l).Poison();
+ return;
+ }
+
+ Compare compare_;
+ Combine combine_;
+
+ int in_;
+
+ protected:
+ Offsets *in_offsets_;
+
+ private:
+ Offsets *out_offsets_;
+
+ std::size_t buffer_size_;
+ std::size_t total_memory_;
+};
+
+// The lazy step owns the remaining files. This keeps track of them.
+template <class Compare, class Combine> class OwningMergingReader : public MergingReader<Compare, Combine> {
+ private:
+ typedef MergingReader<Compare, Combine> P;
+ public:
+ OwningMergingReader(int data, const Offsets &offsets, std::size_t buffer, std::size_t lazy, const Compare &compare, const Combine &combine)
+ : P(data, NULL, NULL, buffer, lazy, compare, combine),
+ data_(data),
+ offsets_(offsets) {}
+
+ void Run(const ChainPosition &position) {
+ P::in_offsets_ = &offsets_;
+ scoped_fd data(data_);
+ scoped_fd offsets_file(offsets_.File());
+ P::Run(position, true);
+ }
+
+ private:
+ int data_;
+ Offsets offsets_;
+};
+
+// Don't use this directly. Worker that sorts blocks.
+template <class Compare> class BlockSorter {
+ public:
+ BlockSorter(Offsets &offsets, const Compare &compare) :
+ offsets_(&offsets), compare_(compare) {}
+
+ void Run(const ChainPosition &position) {
+ const std::size_t entry_size = position.GetChain().EntrySize();
+ for (Link link(position); link; ++link) {
+ // Record the size of each block in a separate file.
+ offsets_->Append(link->ValidSize());
+ void *end = static_cast<uint8_t*>(link->Get()) + link->ValidSize();
+#if defined(_WIN32) || defined(_WIN64)
+ std::stable_sort
+#else
+ std::sort
+#endif
+ (SizedIt(link->Get(), entry_size),
+ SizedIt(end, entry_size),
+ compare_);
+ }
+ offsets_->FinishedAppending();
+ }
+
+ private:
+ Offsets *offsets_;
+ SizedCompare<Compare> compare_;
+};
+
+class BadSortConfig : public Exception {
+ public:
+ BadSortConfig() throw() {}
+ ~BadSortConfig() throw() {}
+};
+
+/** Sort */
+template <class Compare, class Combine = NeverCombine> class Sort {
+ public:
+ /** Constructs an object capable of sorting */
+ Sort(Chain &in, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine())
+ : config_(config),
+ data_(MakeTemp(config.temp_prefix)),
+ offsets_file_(MakeTemp(config.temp_prefix)), offsets_(offsets_file_.get()),
+ compare_(compare), combine_(combine),
+ entry_size_(in.EntrySize()) {
+ UTIL_THROW_IF(!entry_size_, BadSortConfig, "Sorting entries of size 0");
+ // Make buffer_size a multiple of the entry_size.
+ config_.buffer_size -= config_.buffer_size % entry_size_;
+ UTIL_THROW_IF(!config_.buffer_size, BadSortConfig, "Sort buffer too small");
+ UTIL_THROW_IF(config_.total_memory < config_.buffer_size * 4, BadSortConfig, "Sorting memory " << config_.total_memory << " is too small for four buffers (two read and two write).");
+ in >> BlockSorter<Compare>(offsets_, compare_) >> WriteAndRecycle(data_.get());
+ }
+
+ uint64_t Size() const {
+ return SizeOrThrow(data_.get());
+ }
+
+ // Do merge sort, terminating when lazy merge could be done with the
+ // specified memory. Return the minimum memory necessary to do lazy merge.
+ std::size_t Merge(std::size_t lazy_memory) {
+ if (offsets_.RemainingBlocks() <= 1) return 0;
+ const uint64_t lazy_arity = std::max<uint64_t>(1, lazy_memory / config_.buffer_size);
+ uint64_t size = Size();
+ /* No overflow because
+ * offsets_.RemainingBlocks() * config_.buffer_size <= lazy_memory ||
+ * size < lazy_memory
+ */
+ if (offsets_.RemainingBlocks() <= lazy_arity || size <= static_cast<uint64_t>(lazy_memory))
+ return std::min<std::size_t>(size, offsets_.RemainingBlocks() * config_.buffer_size);
+
+ scoped_fd data2(MakeTemp(config_.temp_prefix));
+ int fd_in = data_.get(), fd_out = data2.get();
+ scoped_fd offsets2_file(MakeTemp(config_.temp_prefix));
+ Offsets offsets2(offsets2_file.get());
+ Offsets *offsets_in = &offsets_, *offsets_out = &offsets2;
+
+ // Double buffered writing.
+ ChainConfig chain_config;
+ chain_config.entry_size = entry_size_;
+ chain_config.block_count = 2;
+ chain_config.total_memory = config_.buffer_size * 2;
+ Chain chain(chain_config);
+
+ while (offsets_in->RemainingBlocks() > lazy_arity) {
+ if (size <= static_cast<uint64_t>(lazy_memory)) break;
+ std::size_t reading_memory = config_.total_memory - 2 * config_.buffer_size;
+ if (size < static_cast<uint64_t>(reading_memory)) {
+ reading_memory = static_cast<std::size_t>(size);
+ }
+ SeekOrThrow(fd_in, 0);
+ chain >>
+ MergingReader<Compare, Combine>(
+ fd_in,
+ offsets_in, offsets_out,
+ config_.buffer_size,
+ reading_memory,
+ compare_, combine_) >>
+ WriteAndRecycle(fd_out);
+ chain.Wait();
+ offsets_out->FinishedAppending();
+ ResizeOrThrow(fd_in, 0);
+ offsets_in->Reset();
+ std::swap(fd_in, fd_out);
+ std::swap(offsets_in, offsets_out);
+ size = SizeOrThrow(fd_in);
+ }
+
+ SeekOrThrow(fd_in, 0);
+ if (fd_in == data2.get()) {
+ data_.reset(data2.release());
+ offsets_file_.reset(offsets2_file.release());
+ offsets_ = offsets2;
+ }
+ if (offsets_.RemainingBlocks() <= 1) return 0;
+ // No overflow because the while loop exited.
+ return std::min(size, offsets_.RemainingBlocks() * static_cast<uint64_t>(config_.buffer_size));
+ }
+
+ // Output to chain, using this amount of memory, maximum, for lazy merge
+ // sort.
+ void Output(Chain &out, std::size_t lazy_memory) {
+ Merge(lazy_memory);
+ out.SetProgressTarget(Size());
+ out >> OwningMergingReader<Compare, Combine>(data_.get(), offsets_, config_.buffer_size, lazy_memory, compare_, combine_);
+ data_.release();
+ offsets_file_.release();
+ }
+
+ /* If a pipeline step is reading sorted input and writing to a different
+ * sort order, then there's a trade-off between using RAM to read lazily
+ * (avoiding copying the file) and using RAM to increase block size and,
+ * therefore, decrease the number of merge sort passes in the next
+ * iteration.
+ *
+ * Merge sort takes log_{arity}(pieces) passes. Thus, each time the chain
+ * block size is multiplied by arity, the number of output passes decreases
+ * by one. Up to a constant, then, log_{arity}(chain) is the number of
+ * passes saved. Chain simply divides the memory evenly over all blocks.
+ *
+ * Lazy sort saves this many passes (up to a constant)
+ * log_{arity}((memory-lazy)/block_count) + 1
+ * Non-lazy sort saves this many passes (up to the same constant):
+ * log_{arity}(memory/block_count)
+ * Add log_{arity}(block_count) to both:
+ * log_{arity}(memory-lazy) + 1 versus log_{arity}(memory)
+ * Take arity to the power of both sizes (arity > 1)
+ * (memory - lazy)*arity versus memory
+ * Solve for lazy
+ * lazy = memory * (arity - 1) / arity
+ */
+ std::size_t DefaultLazy() {
+ float arity = static_cast<float>(config_.total_memory / config_.buffer_size);
+ return static_cast<std::size_t>(static_cast<float>(config_.total_memory) * (arity - 1.0) / arity);
+ }
+
+ // Same as Output with default lazy memory setting.
+ void Output(Chain &out) {
+ Output(out, DefaultLazy());
+ }
+
+ // Completely merge sort and transfer ownership to the caller.
+ int StealCompleted() {
+ // Merge all the way.
+ Merge(0);
+ SeekOrThrow(data_.get(), 0);
+ offsets_file_.reset();
+ return data_.release();
+ }
+
+ private:
+ SortConfig config_;
+
+ scoped_fd data_;
+
+ scoped_fd offsets_file_;
+ Offsets offsets_;
+
+ const Compare compare_;
+ const Combine combine_;
+ const std::size_t entry_size_;
+};
+
+// returns bytes to be read on demand.
+template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = NeverCombine()) {
+ Sort<Compare, Combine> sorter(chain, config, compare, combine);
+ chain.Wait(true);
+ uint64_t size = sorter.Size();
+ sorter.Output(chain);
+ return size;
+}
+
+/**
+ * Represents an @ref util::FixedArray "array" capable of storing @ref util::stream::Sort "Sort" objects.
+ *
+ * In the anticipated use case, an instance of this class will maintain one @ref util::stream::Sort "Sort" object
+ * for each n-gram order (ranging from 1 up to the maximum n-gram order being processed).
+ * Use in this manner would enable the n-grams each n-gram order to be sorted, in parallel.
+ *
+ * @tparam Compare An @ref Comparator "ngram comparator" to use during sorting.
+ */
+template <class Compare, class Combine = NeverCombine> class Sorts : public FixedArray<Sort<Compare, Combine> > {
+ private:
+ typedef Sort<Compare, Combine> S;
+ typedef FixedArray<S> P;
+
+ public:
+ /**
+ * Constructs, but does not initialize.
+ *
+ * @ref util::FixedArray::Init() "Init" must be called before use.
+ *
+ * @see util::FixedArray::Init()
+ */
+ Sorts() {}
+
+ /**
+ * Constructs an @ref util::FixedArray "array" capable of storing a fixed number of @ref util::stream::Sort "Sort" objects.
+ *
+ * @param number The maximum number of @ref util::stream::Sort "sorters" that can be held by this @ref util::FixedArray "array"
+ * @see util::FixedArray::FixedArray()
+ */
+ explicit Sorts(std::size_t number) : FixedArray<Sort<Compare, Combine> >(number) {}
+
+ /**
+ * Constructs a new @ref util::stream::Sort "Sort" object which is stored in this @ref util::FixedArray "array".
+ *
+ * The new @ref util::stream::Sort "Sort" object is constructed using the provided @ref util::stream::SortConfig "SortConfig" and @ref Comparator "ngram comparator";
+ * once constructed, a new worker @ref util::stream::Thread "thread" (owned by the @ref util::stream::Chain "chain") will sort the n-gram data stored
+ * in the @ref util::stream::Block "blocks" of the provided @ref util::stream::Chain "chain".
+ *
+ * @see util::stream::Sort::Sort()
+ * @see util::stream::Chain::operator>>()
+ */
+ void push_back(util::stream::Chain &chain, const util::stream::SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine()) {
+ new (P::end()) S(chain, config, compare, combine); // use "placement new" syntax to initalize S in an already-allocated memory location
+ P::Constructed();
+ }
+};
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_SORT_H
diff --git a/src/kenlm/util/stream/sort_test.cc b/src/kenlm/util/stream/sort_test.cc
new file mode 100644
index 0000000..fc97ffd
--- /dev/null
+++ b/src/kenlm/util/stream/sort_test.cc
@@ -0,0 +1,62 @@
+#include "util/stream/sort.hh"
+
+#define BOOST_TEST_MODULE SortTest
+#include <boost/test/unit_test.hpp>
+
+#include <algorithm>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+struct CompareUInt64 : public std::binary_function<const void *, const void *, bool> {
+ bool operator()(const void *first, const void *second) const {
+ return *static_cast<const uint64_t*>(first) < *reinterpret_cast<const uint64_t*>(second);
+ }
+};
+
+const uint64_t kSize = 100000;
+
+struct Putter {
+ Putter(std::vector<uint64_t> &shuffled) : shuffled_(shuffled) {}
+
+ void Run(const ChainPosition &position) {
+ Stream put_shuffled(position);
+ for (uint64_t i = 0; i < shuffled_.size(); ++i, ++put_shuffled) {
+ *static_cast<uint64_t*>(put_shuffled.Get()) = shuffled_[i];
+ }
+ put_shuffled.Poison();
+ }
+ std::vector<uint64_t> &shuffled_;
+};
+
+BOOST_AUTO_TEST_CASE(FromShuffled) {
+ std::vector<uint64_t> shuffled;
+ shuffled.reserve(kSize);
+ for (uint64_t i = 0; i < kSize; ++i) {
+ shuffled.push_back(i);
+ }
+ std::random_shuffle(shuffled.begin(), shuffled.end());
+
+ ChainConfig config;
+ config.entry_size = 8;
+ config.total_memory = 800;
+ config.block_count = 3;
+
+ SortConfig merge_config;
+ merge_config.temp_prefix = "sort_test_temp";
+ merge_config.buffer_size = 800;
+ merge_config.total_memory = 3300;
+
+ Chain chain(config);
+ chain >> Putter(shuffled);
+ BlockingSort(chain, merge_config, CompareUInt64(), NeverCombine());
+ Stream sorted;
+ chain >> sorted >> kRecycle;
+ for (uint64_t i = 0; i < kSize; ++i, ++sorted) {
+ BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(sorted.Get()));
+ }
+ BOOST_CHECK(!sorted);
+}
+
+}}} // namespaces
diff --git a/src/kenlm/util/stream/stream.hh b/src/kenlm/util/stream/stream.hh
new file mode 100644
index 0000000..ee1e9fa
--- /dev/null
+++ b/src/kenlm/util/stream/stream.hh
@@ -0,0 +1,77 @@
+#ifndef UTIL_STREAM_STREAM_H
+#define UTIL_STREAM_STREAM_H
+
+#include "util/stream/chain.hh"
+
+#include <boost/noncopyable.hpp>
+
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+class Stream : boost::noncopyable {
+ public:
+ Stream() : current_(NULL), end_(NULL) {}
+
+ void Init(const ChainPosition &position) {
+ entry_size_ = position.GetChain().EntrySize();
+ block_size_ = position.GetChain().BlockSize();
+ block_it_.Init(position);
+ StartBlock();
+ }
+
+ explicit Stream(const ChainPosition &position) {
+ Init(position);
+ }
+
+ operator bool() const { return current_ != NULL; }
+ bool operator!() const { return current_ == NULL; }
+
+ const void *Get() const { return current_; }
+ void *Get() { return current_; }
+
+ void Poison() {
+ block_it_->SetValidSize(current_ - static_cast<uint8_t*>(block_it_->Get()));
+ ++block_it_;
+ block_it_.Poison();
+ }
+
+ Stream &operator++() {
+ assert(*this);
+ assert(current_ < end_);
+ current_ += entry_size_;
+ if (current_ == end_) {
+ ++block_it_;
+ StartBlock();
+ }
+ return *this;
+ }
+
+ private:
+ void StartBlock() {
+ for (; block_it_ && !block_it_->ValidSize(); ++block_it_) {}
+ current_ = static_cast<uint8_t*>(block_it_->Get());
+ end_ = current_ + block_it_->ValidSize();
+ }
+
+ // The following are pointers to raw memory
+ // current_ is the current record
+ // end_ is the end of the block (so we know when to move to the next block)
+ uint8_t *current_, *end_;
+
+ std::size_t entry_size_;
+ std::size_t block_size_;
+
+ Link block_it_;
+};
+
+inline Chain &operator>>(Chain &chain, Stream &stream) {
+ stream.Init(chain.Add());
+ return chain;
+}
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_STREAM_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/stream/stream_test.cc b/src/kenlm/util/stream/stream_test.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/stream/stream_test.cc
rename to src/kenlm/util/stream/stream_test.cc
diff --git a/src/kenlm/util/stream/timer.hh b/src/kenlm/util/stream/timer.hh
new file mode 100644
index 0000000..9e9573d
--- /dev/null
+++ b/src/kenlm/util/stream/timer.hh
@@ -0,0 +1,16 @@
+#ifndef UTIL_STREAM_TIMER_H
+#define UTIL_STREAM_TIMER_H
+
+// Sorry Jon, this was adding library dependencies in Moses and people complained.
+
+/*#include <boost/version.hpp>
+
+#if BOOST_VERSION >= 104800
+#include <boost/timer/timer.hpp>
+#define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
+#else
+//#warning Using Boost older than 1.48. Timing information will not be available.*/
+#define UTIL_TIMER(str)
+//#endif
+
+#endif // UTIL_STREAM_TIMER_H
diff --git a/src/kenlm/util/string_piece.cc b/src/kenlm/util/string_piece.cc
new file mode 100644
index 0000000..180b4e4
--- /dev/null
+++ b/src/kenlm/util/string_piece.cc
@@ -0,0 +1,192 @@
+// Copyright 2004 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in string_piece.hh.
+
+#include "util/string_piece.hh"
+
+#include <algorithm>
+#include <climits>
+
+#ifndef HAVE_ICU
+
+typedef StringPiece::size_type size_type;
+
+void StringPiece::CopyToString(std::string* target) const {
+ target->assign(ptr_, length_);
+}
+
+size_type StringPiece::find(const StringPiece& s, size_type pos) const {
+ // Not sure why length_ < 0 was here since it's std::size_t.
+ if (/*length_ < 0 || */pos > static_cast<size_type>(length_))
+ return npos;
+
+ const char* result = std::search(ptr_ + pos, ptr_ + length_,
+ s.ptr_, s.ptr_ + s.length_);
+ const size_type xpos = result - ptr_;
+ return xpos + s.length_ <= length_ ? xpos : npos;
+}
+
+size_type StringPiece::find(char c, size_type pos) const {
+ if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {
+ return npos;
+ }
+ const char* result = std::find(ptr_ + pos, ptr_ + length_, c);
+ return result != ptr_ + length_ ? result - ptr_ : npos;
+}
+
+size_type StringPiece::rfind(const StringPiece& s, size_type pos) const {
+ if (length_ < s.length_) return npos;
+ const size_t ulen = length_;
+ if (s.length_ == 0) return std::min(ulen, pos);
+
+ const char* last = ptr_ + std::min(ulen - s.length_, pos) + s.length_;
+ const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
+ return result != last ? result - ptr_ : npos;
+}
+
+size_type StringPiece::rfind(char c, size_type pos) const {
+ if (length_ <= 0) return npos;
+ for (int i = std::min(pos, static_cast<size_type>(length_ - 1));
+ i >= 0; --i) {
+ if (ptr_[i] == c) {
+ return i;
+ }
+ }
+ return npos;
+}
+
+// For each character in characters_wanted, sets the index corresponding
+// to the ASCII code of that character to 1 in table. This is used by
+// the find_.*_of methods below to tell whether or not a character is in
+// the lookup table in constant time.
+// The argument `table' must be an array that is large enough to hold all
+// the possible values of an unsigned char. Thus it should be be declared
+// as follows:
+// bool table[UCHAR_MAX + 1]
+static inline void BuildLookupTable(const StringPiece& characters_wanted,
+ bool* table) {
+ const size_type length = characters_wanted.length();
+ const char* const data = characters_wanted.data();
+ for (size_type i = 0; i < length; ++i) {
+ table[static_cast<unsigned char>(data[i])] = true;
+ }
+}
+
+size_type StringPiece::find_first_of(const StringPiece& s,
+ size_type pos) const {
+ if (length_ == 0 || s.length_ == 0)
+ return npos;
+
+ // Avoid the cost of BuildLookupTable() for a single-character search.
+ if (s.length_ == 1)
+ return find_first_of(s.ptr_[0], pos);
+
+ bool lookup[UCHAR_MAX + 1] = { false };
+ BuildLookupTable(s, lookup);
+ for (size_type i = pos; i < length_; ++i) {
+ if (lookup[static_cast<unsigned char>(ptr_[i])]) {
+ return i;
+ }
+ }
+ return npos;
+}
+
+size_type StringPiece::find_first_not_of(const StringPiece& s,
+ size_type pos) const {
+ if (length_ == 0)
+ return npos;
+
+ if (s.length_ == 0)
+ return 0;
+
+ // Avoid the cost of BuildLookupTable() for a single-character search.
+ if (s.length_ == 1)
+ return find_first_not_of(s.ptr_[0], pos);
+
+ bool lookup[UCHAR_MAX + 1] = { false };
+ BuildLookupTable(s, lookup);
+ for (size_type i = pos; i < length_; ++i) {
+ if (!lookup[static_cast<unsigned char>(ptr_[i])]) {
+ return i;
+ }
+ }
+ return npos;
+}
+
+size_type StringPiece::find_first_not_of(char c, size_type pos) const {
+ if (length_ == 0)
+ return npos;
+
+ for (; pos < length_; ++pos) {
+ if (ptr_[pos] != c) {
+ return pos;
+ }
+ }
+ return npos;
+}
+
+size_type StringPiece::find_last_of(const StringPiece& s, size_type pos) const {
+ if (length_ == 0 || s.length_ == 0)
+ return npos;
+
+ // Avoid the cost of BuildLookupTable() for a single-character search.
+ if (s.length_ == 1)
+ return find_last_of(s.ptr_[0], pos);
+
+ bool lookup[UCHAR_MAX + 1] = { false };
+ BuildLookupTable(s, lookup);
+ for (size_type i = std::min(pos, length_ - 1); ; --i) {
+ if (lookup[static_cast<unsigned char>(ptr_[i])])
+ return i;
+ if (i == 0)
+ break;
+ }
+ return npos;
+}
+
+size_type StringPiece::find_last_not_of(const StringPiece& s,
+ size_type pos) const {
+ if (length_ == 0)
+ return npos;
+
+ size_type i = std::min(pos, length_ - 1);
+ if (s.length_ == 0)
+ return i;
+
+ // Avoid the cost of BuildLookupTable() for a single-character search.
+ if (s.length_ == 1)
+ return find_last_not_of(s.ptr_[0], pos);
+
+ bool lookup[UCHAR_MAX + 1] = { false };
+ BuildLookupTable(s, lookup);
+ for (; ; --i) {
+ if (!lookup[static_cast<unsigned char>(ptr_[i])])
+ return i;
+ if (i == 0)
+ break;
+ }
+ return npos;
+}
+
+size_type StringPiece::find_last_not_of(char c, size_type pos) const {
+ if (length_ == 0)
+ return npos;
+
+ for (size_type i = std::min(pos, length_ - 1); ; --i) {
+ if (ptr_[i] != c)
+ return i;
+ if (i == 0)
+ break;
+ }
+ return npos;
+}
+
+StringPiece StringPiece::substr(size_type pos, size_type n) const {
+ if (pos > length_) pos = length_;
+ if (n > length_ - pos) n = length_ - pos;
+ return StringPiece(ptr_ + pos, n);
+}
+
+const size_type StringPiece::npos = size_type(-1);
+
+#endif // !HAVE_ICU
diff --git a/src/kenlm/util/string_piece.hh b/src/kenlm/util/string_piece.hh
new file mode 100644
index 0000000..9c47982
--- /dev/null
+++ b/src/kenlm/util/string_piece.hh
@@ -0,0 +1,270 @@
+/* If you use ICU in your program, then compile with -DHAVE_ICU -licui18n. If
+ * you don't use ICU, then this will use the Google implementation from Chrome.
+ * This has been modified from the original version to let you choose.
+ */
+
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copied from strings/stringpiece.h with modifications
+//
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece. The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+
+#ifndef UTIL_STRING_PIECE_H
+#define UTIL_STRING_PIECE_H
+
+#include "util/have.hh"
+
+#include <cstring>
+#include <iosfwd>
+#include <ostream>
+
+#ifdef HAVE_ICU
+#include <unicode/stringpiece.h>
+#include <unicode/uversion.h>
+
+// Old versions of ICU don't define operator== and operator!=.
+#if (U_ICU_VERSION_MAJOR_NUM < 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM < 4))
+#warning You are using an old version of ICU. Consider upgrading to ICU >= 4.6.
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+ if (x.size() != y.size())
+ return false;
+
+ return std::memcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+#endif // old version of ICU
+
+U_NAMESPACE_BEGIN
+
+inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) {
+ int longersize = longer.size(), prefixsize = prefix.size();
+ return longersize >= prefixsize && std::memcmp(longer.data(), prefix.data(), prefixsize) == 0;
+}
+
+#else
+
+#include <algorithm>
+#include <cstddef>
+#include <string>
+#include <cstring>
+
+#ifdef WIN32
+#undef max
+#undef min
+#endif
+
+class StringPiece {
+ public:
+ typedef size_t size_type;
+
+ private:
+ const char* ptr_;
+ size_type length_;
+
+ public:
+ // We provide non-explicit singleton constructors so users can pass
+ // in a "const char*" or a "string" wherever a "StringPiece" is
+ // expected.
+ StringPiece() : ptr_(NULL), length_(0) { }
+ StringPiece(const char* str)
+ : ptr_(str), length_((str == NULL) ? 0 : strlen(str)) { }
+ StringPiece(const std::string& str)
+ : ptr_(str.data()), length_(str.size()) { }
+ StringPiece(const char* offset, size_type len)
+ : ptr_(offset), length_(len) { }
+
+ // data() may return a pointer to a buffer with embedded NULs, and the
+ // returned buffer may or may not be null terminated. Therefore it is
+ // typically a mistake to pass data() to a routine that expects a NUL
+ // terminated string.
+ const char* data() const { return ptr_; }
+ size_type size() const { return length_; }
+ size_type length() const { return length_; }
+ bool empty() const { return length_ == 0; }
+
+ void clear() { ptr_ = NULL; length_ = 0; }
+ void set(const char* data, size_type len) { ptr_ = data; length_ = len; }
+ void set(const char* str) {
+ ptr_ = str;
+ length_ = str ? strlen(str) : 0;
+ }
+ void set(const void* data, size_type len) {
+ ptr_ = reinterpret_cast<const char*>(data);
+ length_ = len;
+ }
+
+ char operator[](size_type i) const { return ptr_[i]; }
+
+ void remove_prefix(size_type n) {
+ ptr_ += n;
+ length_ -= n;
+ }
+
+ void remove_suffix(size_type n) {
+ length_ -= n;
+ }
+
+ int compare(const StringPiece& x) const {
+ int r = wordmemcmp(ptr_, x.ptr_, std::min(length_, x.length_));
+ if (r == 0) {
+ if (length_ < x.length_) r = -1;
+ else if (length_ > x.length_) r = +1;
+ }
+ return r;
+ }
+
+ std::string as_string() const {
+ // std::string doesn't like to take a NULL pointer even with a 0 size.
+ return std::string(!empty() ? data() : "", size());
+ }
+
+ void CopyToString(std::string* target) const;
+ void AppendToString(std::string* target) const;
+
+ // Does "this" start with "x"
+ bool starts_with(const StringPiece& x) const {
+ return ((length_ >= x.length_) &&
+ (wordmemcmp(ptr_, x.ptr_, x.length_) == 0));
+ }
+
+ // Does "this" end with "x"
+ bool ends_with(const StringPiece& x) const {
+ return ((length_ >= x.length_) &&
+ (wordmemcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
+ }
+
+ // standard STL container boilerplate
+ typedef char value_type;
+ typedef const char* pointer;
+ typedef const char& reference;
+ typedef const char& const_reference;
+ typedef ptrdiff_t difference_type;
+ static const size_type npos;
+ typedef const char* const_iterator;
+ typedef const char* iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef std::reverse_iterator<iterator> reverse_iterator;
+ iterator begin() const { return ptr_; }
+ iterator end() const { return ptr_ + length_; }
+ const_reverse_iterator rbegin() const {
+ return const_reverse_iterator(ptr_ + length_);
+ }
+ const_reverse_iterator rend() const {
+ return const_reverse_iterator(ptr_);
+ }
+
+ size_type max_size() const { return length_; }
+ size_type capacity() const { return length_; }
+
+ size_type copy(char* buf, size_type n, size_type pos = 0) const;
+
+ size_type find(const StringPiece& s, size_type pos = 0) const;
+ size_type find(char c, size_type pos = 0) const;
+ size_type rfind(const StringPiece& s, size_type pos = npos) const;
+ size_type rfind(char c, size_type pos = npos) const;
+
+ size_type find_first_of(const StringPiece& s, size_type pos = 0) const;
+ size_type find_first_of(char c, size_type pos = 0) const {
+ return find(c, pos);
+ }
+ size_type find_first_not_of(const StringPiece& s, size_type pos = 0) const;
+ size_type find_first_not_of(char c, size_type pos = 0) const;
+ size_type find_last_of(const StringPiece& s, size_type pos = npos) const;
+ size_type find_last_of(char c, size_type pos = npos) const {
+ return rfind(c, pos);
+ }
+ size_type find_last_not_of(const StringPiece& s, size_type pos = npos) const;
+ size_type find_last_not_of(char c, size_type pos = npos) const;
+
+ StringPiece substr(size_type pos, size_type n = npos) const;
+
+ static int wordmemcmp(const char* p, const char* p2, size_type N) {
+ return std::memcmp(p, p2, N);
+ }
+};
+
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+ if (x.size() != y.size())
+ return false;
+
+ return std::memcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+
+inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) {
+ return longer.starts_with(prefix);
+}
+
+#endif // HAVE_ICU undefined
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+ const int r = std::memcmp(x.data(), y.data(),
+ std::min(x.size(), y.size()));
+ return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+ return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+ return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+ return !(x < y);
+}
+
+// allow StringPiece to be logged (needed for unit testing).
+inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
+ return o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
+}
+
+#ifdef HAVE_ICU
+U_NAMESPACE_END
+using U_NAMESPACE_QUALIFIER StringPiece;
+#endif
+
+#endif // UTIL_STRING_PIECE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/string_piece_hash.hh b/src/kenlm/util/string_piece_hash.hh
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/string_piece_hash.hh
rename to src/kenlm/util/string_piece_hash.hh
diff --git a/src/kenlm/util/string_stream.hh b/src/kenlm/util/string_stream.hh
new file mode 100644
index 0000000..730403d
--- /dev/null
+++ b/src/kenlm/util/string_stream.hh
@@ -0,0 +1,44 @@
+#ifndef UTIL_STRING_STREAM_H
+#define UTIL_STRING_STREAM_H
+
+#include "util/fake_ostream.hh"
+
+#include <cassert>
+#include <string>
+
+namespace util {
+
+class StringStream : public FakeOStream<StringStream> {
+ public:
+ // Semantics: appends to string. Remember to clear first!
+ explicit StringStream(std::string &out)
+ : out_(out) {}
+
+ StringStream &flush() { return *this; }
+
+ StringStream &write(const void *data, std::size_t length) {
+ out_.append(static_cast<const char*>(data), length);
+ return *this;
+ }
+
+ protected:
+ friend class FakeOStream<StringStream>;
+ char *Ensure(std::size_t amount) {
+ std::size_t current = out_.size();
+ out_.resize(out_.size() + amount);
+ return &out_[current];
+ }
+
+ void AdvanceTo(char *to) {
+ assert(to <= &*out_.end());
+ assert(to >= &*out_.begin());
+ out_.resize(to - &*out_.begin());
+ }
+
+ private:
+ std::string &out_;
+};
+
+} // namespace
+
+#endif // UTIL_STRING_STREAM_H
diff --git a/src/kenlm/util/string_stream_test.cc b/src/kenlm/util/string_stream_test.cc
new file mode 100644
index 0000000..3a7734f
--- /dev/null
+++ b/src/kenlm/util/string_stream_test.cc
@@ -0,0 +1,80 @@
+#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
+#define BOOST_TEST_MODULE FakeOStreamTest
+
+#include "util/string_stream.hh"
+#include <boost/test/unit_test.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <cstddef>
+#include <limits>
+
+namespace util { namespace {
+
+template <class T> void TestEqual(const T value) {
+ std::string str;
+ StringStream(str) << value;
+ BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), str);
+}
+
+template <class T> void TestCorners() {
+ TestEqual(std::numeric_limits<T>::max());
+ TestEqual(std::numeric_limits<T>::min());
+ TestEqual(static_cast<T>(0));
+ TestEqual(static_cast<T>(-1));
+ TestEqual(static_cast<T>(1));
+}
+
+BOOST_AUTO_TEST_CASE(Integer) {
+ TestCorners<char>();
+ TestCorners<signed char>();
+ TestCorners<unsigned char>();
+
+ TestCorners<short>();
+ TestCorners<signed short>();
+ TestCorners<unsigned short>();
+
+ TestCorners<int>();
+ TestCorners<unsigned int>();
+ TestCorners<signed int>();
+
+ TestCorners<long>();
+ TestCorners<unsigned long>();
+ TestCorners<signed long>();
+
+ TestCorners<long long>();
+ TestCorners<unsigned long long>();
+ TestCorners<signed long long>();
+
+ TestCorners<std::size_t>();
+}
+
+enum TinyEnum { EnumValue };
+
+BOOST_AUTO_TEST_CASE(EnumCase) {
+ TestEqual(EnumValue);
+}
+
+BOOST_AUTO_TEST_CASE(Strings) {
+ TestEqual("foo");
+ const char *a = "bar";
+ TestEqual(a);
+ StringPiece piece("abcdef");
+ TestEqual(piece);
+ TestEqual(StringPiece());
+
+ char non_const[3];
+ non_const[0] = 'b';
+ non_const[1] = 'c';
+ non_const[2] = 0;
+ std::string out;
+ StringStream(out) << "a" << non_const << 'c';
+ BOOST_CHECK_EQUAL("abcc", out);
+
+ // Now test as a separate object.
+ out.clear();
+ StringStream stream(out);
+ stream << "a" << non_const << 'c' << piece;
+ BOOST_CHECK_EQUAL("abccabcdef", out);
+}
+
+}} // namespaces
diff --git a/src/kenlm/util/thread_pool.hh b/src/kenlm/util/thread_pool.hh
new file mode 100644
index 0000000..dce987c
--- /dev/null
+++ b/src/kenlm/util/thread_pool.hh
@@ -0,0 +1,94 @@
+#ifndef UTIL_THREAD_POOL_H
+#define UTIL_THREAD_POOL_H
+
+#include "util/pcqueue.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/optional.hpp>
+#include <boost/thread.hpp>
+
+#include <iostream>
+#include <cstdlib>
+
+namespace util {
+
+template <class HandlerT> class Worker : boost::noncopyable {
+ public:
+ typedef HandlerT Handler;
+ typedef typename Handler::Request Request;
+
+ template <class Construct> Worker(PCQueue<Request> &in, Construct &construct, const Request &poison)
+ : in_(in), handler_(construct), poison_(poison), thread_(boost::ref(*this)) {}
+
+ // Only call from thread.
+ void operator()() {
+ Request request;
+ while (1) {
+ in_.Consume(request);
+ if (request == poison_) return;
+ try {
+ (*handler_)(request);
+ }
+ catch(const std::exception &e) {
+ std::cerr << "Handler threw " << e.what() << std::endl;
+ abort();
+ }
+ catch(...) {
+ std::cerr << "Handler threw an exception, dropping request" << std::endl;
+ abort();
+ }
+ }
+ }
+
+ void Join() {
+ thread_.join();
+ }
+
+ private:
+ PCQueue<Request> &in_;
+
+ boost::optional<Handler> handler_;
+
+ const Request poison_;
+
+ boost::thread thread_;
+};
+
+template <class HandlerT> class ThreadPool : boost::noncopyable {
+ public:
+ typedef HandlerT Handler;
+ typedef typename Handler::Request Request;
+
+ template <class Construct> ThreadPool(size_t queue_length, size_t workers, Construct handler_construct, Request poison) : in_(queue_length), poison_(poison) {
+ for (size_t i = 0; i < workers; ++i) {
+ workers_.push_back(new Worker<Handler>(in_, handler_construct, poison));
+ }
+ }
+
+ ~ThreadPool() {
+ for (size_t i = 0; i < workers_.size(); ++i) {
+ Produce(poison_);
+ }
+ for (typename boost::ptr_vector<Worker<Handler> >::iterator i = workers_.begin(); i != workers_.end(); ++i) {
+ i->Join();
+ }
+ }
+
+ void Produce(const Request &request) {
+ in_.Produce(request);
+ }
+
+ // For adding to the queue.
+ PCQueue<Request> &In() { return in_; }
+
+ private:
+ PCQueue<Request> in_;
+
+ boost::ptr_vector<Worker<Handler> > workers_;
+
+ Request poison_;
+};
+
+} // namespace util
+
+#endif // UTIL_THREAD_POOL_H
diff --git a/src/kenlm/util/tokenize_piece.hh b/src/kenlm/util/tokenize_piece.hh
new file mode 100644
index 0000000..9da5fa3
--- /dev/null
+++ b/src/kenlm/util/tokenize_piece.hh
@@ -0,0 +1,150 @@
+#ifndef UTIL_TOKENIZE_PIECE_H
+#define UTIL_TOKENIZE_PIECE_H
+
+#include "util/exception.hh"
+#include "util/string_piece.hh"
+
+#include <boost/iterator/iterator_facade.hpp>
+
+#include <algorithm>
+#include <cstring>
+
+namespace util {
+
+// Thrown on dereference when out of tokens to parse
+class OutOfTokens : public Exception {
+ public:
+ OutOfTokens() throw() {}
+ ~OutOfTokens() throw() {}
+};
+
+class SingleCharacter {
+ public:
+ SingleCharacter() {}
+ explicit SingleCharacter(char delim) : delim_(delim) {}
+
+ StringPiece Find(const StringPiece &in) const {
+ return StringPiece(std::find(in.data(), in.data() + in.size(), delim_), 1);
+ }
+
+ private:
+ char delim_;
+};
+
+class MultiCharacter {
+ public:
+ MultiCharacter() {}
+
+ explicit MultiCharacter(const StringPiece &delimiter) : delimiter_(delimiter) {}
+
+ StringPiece Find(const StringPiece &in) const {
+ return StringPiece(std::search(in.data(), in.data() + in.size(), delimiter_.data(), delimiter_.data() + delimiter_.size()), delimiter_.size());
+ }
+
+ private:
+ StringPiece delimiter_;
+};
+
+class AnyCharacter {
+ public:
+ AnyCharacter() {}
+ explicit AnyCharacter(const StringPiece &chars) : chars_(chars) {}
+
+ StringPiece Find(const StringPiece &in) const {
+ return StringPiece(std::find_first_of(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
+ }
+
+ private:
+ StringPiece chars_;
+};
+
+class BoolCharacter {
+ public:
+ BoolCharacter() {}
+
+ explicit BoolCharacter(const bool *delimiter) { delimiter_ = delimiter; }
+
+ StringPiece Find(const StringPiece &in) const {
+ for (const char *i = in.data(); i != in.data() + in.size(); ++i) {
+ if (delimiter_[static_cast<unsigned char>(*i)]) return StringPiece(i, 1);
+ }
+ return StringPiece(in.data() + in.size(), 0);
+ }
+
+ template <unsigned Length> static void Build(const char (&characters)[Length], bool (&out)[256]) {
+ memset(out, 0, sizeof(out));
+ for (const char *i = characters; i != characters + Length; ++i) {
+ out[static_cast<unsigned char>(*i)] = true;
+ }
+ }
+
+ private:
+ const bool *delimiter_;
+};
+
+class AnyCharacterLast {
+ public:
+ AnyCharacterLast() {}
+
+ explicit AnyCharacterLast(const StringPiece &chars) : chars_(chars) {}
+
+ StringPiece Find(const StringPiece &in) const {
+ return StringPiece(std::find_end(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
+ }
+
+ private:
+ StringPiece chars_;
+};
+
+template <class Find, bool SkipEmpty = false> class TokenIter : public boost::iterator_facade<TokenIter<Find, SkipEmpty>, const StringPiece, boost::forward_traversal_tag> {
+ public:
+ TokenIter() {}
+
+ template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
+ increment();
+ }
+
+ bool operator!() const {
+ return current_.data() == 0;
+ }
+ operator bool() const {
+ return current_.data() != 0;
+ }
+
+ static TokenIter<Find, SkipEmpty> end() {
+ return TokenIter<Find, SkipEmpty>();
+ }
+
+ private:
+ friend class boost::iterator_core_access;
+
+ void increment() {
+ do {
+ StringPiece found(finder_.Find(after_));
+ current_ = StringPiece(after_.data(), found.data() - after_.data());
+ if (found.data() == after_.data() + after_.size()) {
+ after_ = StringPiece(NULL, 0);
+ } else {
+ after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size());
+ }
+ } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.
+ }
+
+ bool equal(const TokenIter<Find, SkipEmpty> &other) const {
+ return current_.data() == other.current_.data();
+ }
+
+ const StringPiece &dereference() const {
+ UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
+ return current_;
+ }
+
+ StringPiece current_;
+ StringPiece after_;
+
+ Find finder_;
+};
+
+} // namespace util
+
+#endif // UTIL_TOKENIZE_PIECE_H
diff --git a/src/joshua/decoder/ff/lm/kenlm/util/tokenize_piece_test.cc b/src/kenlm/util/tokenize_piece_test.cc
similarity index 100%
rename from src/joshua/decoder/ff/lm/kenlm/util/tokenize_piece_test.cc
rename to src/kenlm/util/tokenize_piece_test.cc
diff --git a/src/kenlm/util/usage.cc b/src/kenlm/util/usage.cc
new file mode 100644
index 0000000..cfefe27
--- /dev/null
+++ b/src/kenlm/util/usage.cc
@@ -0,0 +1,305 @@
+#include "util/usage.hh"
+
+#include "util/exception.hh"
+
+#include <fstream>
+#include <ostream>
+#include <sstream>
+#include <set>
+#include <string>
+#include <cstring>
+#include <cctype>
+#include <ctime>
+#if defined(_WIN32) || defined(_WIN64)
+// This code lifted from physmem.c in gnulib. See the copyright statement
+// below.
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+/* MEMORYSTATUSEX is missing from older windows headers, so define
+ a local replacement. */
+typedef struct
+{
+ DWORD dwLength;
+ DWORD dwMemoryLoad;
+ DWORDLONG ullTotalPhys;
+ DWORDLONG ullAvailPhys;
+ DWORDLONG ullTotalPageFile;
+ DWORDLONG ullAvailPageFile;
+ DWORDLONG ullTotalVirtual;
+ DWORDLONG ullAvailVirtual;
+ DWORDLONG ullAvailExtendedVirtual;
+} lMEMORYSTATUSEX;
+// Is this really supposed to be defined like this?
+typedef int WINBOOL;
+typedef WINBOOL (WINAPI *PFN_MS_EX) (lMEMORYSTATUSEX*);
+#else
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+#endif
+
+#if defined(__MACH__) || defined(__FreeBSD__) || defined(__APPLE__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
+namespace util {
+namespace {
+
+#if defined(__MACH__)
+typedef struct timeval Wall;
+Wall GetWall() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv;
+}
+#elif defined(_WIN32) || defined(_WIN64)
+typedef time_t Wall;
+Wall GetWall() {
+ return time(NULL);
+}
+#else
+typedef struct timespec Wall;
+Wall GetWall() {
+ Wall ret;
+ clock_gettime(CLOCK_MONOTONIC, &ret);
+ return ret;
+}
+#endif
+
+// gcc possible-unused function flags
+#ifdef __GNUC__
+double Subtract(time_t first, time_t second) __attribute__ ((unused));
+double DoubleSec(time_t tv) __attribute__ ((unused));
+#if !defined(_WIN32) && !defined(_WIN64)
+double Subtract(const struct timeval &first, const struct timeval &second) __attribute__ ((unused));
+double Subtract(const struct timespec &first, const struct timespec &second) __attribute__ ((unused));
+double DoubleSec(const struct timeval &tv) __attribute__ ((unused));
+double DoubleSec(const struct timespec &tv) __attribute__ ((unused));
+#endif
+#endif
+
+// Some of these functions are only used on some platforms.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+// These all assume first > second
+double Subtract(time_t first, time_t second) {
+ return difftime(first, second);
+}
+double DoubleSec(time_t tv) {
+ return static_cast<double>(tv);
+}
+#if !defined(_WIN32) && !defined(_WIN64)
+double Subtract(const struct timeval &first, const struct timeval &second) {
+ return static_cast<double>(first.tv_sec - second.tv_sec) + static_cast<double>(first.tv_usec - second.tv_usec) / 1000000.0;
+}
+double Subtract(const struct timespec &first, const struct timespec &second) {
+ return static_cast<double>(first.tv_sec - second.tv_sec) + static_cast<double>(first.tv_nsec - second.tv_nsec) / 1000000000.0;
+}
+double DoubleSec(const struct timeval &tv) {
+ return static_cast<double>(tv.tv_sec) + (static_cast<double>(tv.tv_usec) / 1000000.0);
+}
+double DoubleSec(const struct timespec &tv) {
+ return static_cast<double>(tv.tv_sec) + (static_cast<double>(tv.tv_nsec) / 1000000000.0);
+}
+#endif
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+class RecordStart {
+ public:
+ RecordStart() {
+ started_ = GetWall();
+ }
+
+ const Wall &Started() const {
+ return started_;
+ }
+
+ private:
+ Wall started_;
+};
+
+const RecordStart kRecordStart;
+
+const char *SkipSpaces(const char *at) {
+ for (; *at == ' ' || *at == '\t'; ++at) {}
+ return at;
+}
+} // namespace
+
+double WallTime() {
+ return Subtract(GetWall(), kRecordStart.Started());
+}
+
+double CPUTime() {
+#if defined(_WIN32) || defined(_WIN64)
+ return 0.0;
+#else
+ struct rusage usage;
+ if (getrusage(RUSAGE_SELF, &usage))
+ return 0.0;
+ return DoubleSec(usage.ru_utime) + DoubleSec(usage.ru_stime);
+#endif
+}
+
+uint64_t RSSMax() {
+#if defined(_WIN32) || defined(_WIN64)
+ return 0;
+#else
+ struct rusage usage;
+ if (getrusage(RUSAGE_SELF, &usage))
+ return 0;
+ return static_cast<uint64_t>(usage.ru_maxrss) * 1024;
+#endif
+}
+
+void PrintUsage(std::ostream &out) {
+#if !defined(_WIN32) && !defined(_WIN64)
+ // Linux doesn't set memory usage in getrusage :-(
+ std::set<std::string> headers;
+ headers.insert("VmPeak:");
+ headers.insert("VmRSS:");
+ headers.insert("Name:");
+
+ std::ifstream status("/proc/self/status", std::ios::in);
+ std::string header, value;
+ while ((status >> header) && getline(status, value)) {
+ if (headers.find(header) != headers.end()) {
+ out << header << SkipSpaces(value.c_str()) << '\t';
+ }
+ }
+
+ struct rusage usage;
+ if (getrusage(RUSAGE_SELF, &usage)) {
+ perror("getrusage");
+ return;
+ }
+ out << "RSSMax:" << usage.ru_maxrss << " kB" << '\t';
+ out << "user:" << DoubleSec(usage.ru_utime) << "\tsys:" << DoubleSec(usage.ru_stime) << '\t';
+ out << "CPU:" << (DoubleSec(usage.ru_utime) + DoubleSec(usage.ru_stime));
+ out << '\t';
+#endif
+
+ out << "real:" << WallTime() << '\n';
+}
+
+/* Adapted from physmem.c in gnulib 831b84c59ef413c57a36b67344467d66a8a2ba70 */
+/* Calculate the size of physical memory.
+
+ Copyright (C) 2000-2001, 2003, 2005-2006, 2009-2013 Free Software
+ Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Paul Eggert. */
+uint64_t GuessPhysicalMemory() {
+#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+ {
+ long pages = sysconf(_SC_PHYS_PAGES);
+ long page_size = sysconf(_SC_PAGESIZE);
+ if (pages != -1 && page_size != -1)
+ return static_cast<uint64_t>(pages) * static_cast<uint64_t>(page_size);
+ }
+#endif
+#ifdef HW_PHYSMEM
+ { /* This works on *bsd and darwin. */
+ unsigned int physmem;
+ size_t len = sizeof physmem;
+ static int mib[2] = { CTL_HW, HW_PHYSMEM };
+
+ if (sysctl (mib, sizeof(mib) / sizeof(mib[0]), &physmem, &len, NULL, 0) == 0
+ && len == sizeof (physmem))
+ return static_cast<uint64_t>(physmem);
+ }
+#endif
+
+#if defined(_WIN32) || defined(_WIN64)
+ { /* this works on windows */
+ PFN_MS_EX pfnex;
+ HMODULE h = GetModuleHandle (TEXT("kernel32.dll"));
+
+ if (!h)
+ return 0;
+
+ /* Use GlobalMemoryStatusEx if available. */
+ if ((pfnex = (PFN_MS_EX) GetProcAddress (h, "GlobalMemoryStatusEx")))
+ {
+ lMEMORYSTATUSEX lms_ex;
+ lms_ex.dwLength = sizeof lms_ex;
+ if (!pfnex (&lms_ex))
+ return 0;
+ return lms_ex.ullTotalPhys;
+ }
+
+ /* Fall back to GlobalMemoryStatus which is always available.
+ but returns wrong results for physical memory > 4GB. */
+ else
+ {
+ MEMORYSTATUS ms;
+ GlobalMemoryStatus (&ms);
+ return ms.dwTotalPhys;
+ }
+ }
+#endif
+ return 0;
+}
+
+namespace {
+class SizeParseError : public Exception {
+ public:
+ explicit SizeParseError(const std::string &str) throw() {
+ *this << "Failed to parse " << str << " into a memory size ";
+ }
+};
+
+template <class Num> uint64_t ParseNum(const std::string &arg) {
+ std::stringstream stream(arg);
+ Num value;
+ stream >> value;
+ UTIL_THROW_IF_ARG(!stream, SizeParseError, (arg), "for the leading number.");
+ std::string after;
+ stream >> after;
+ UTIL_THROW_IF_ARG(after.size() > 1, SizeParseError, (arg), "because there are more than two characters after the number.");
+ std::string throwaway;
+ UTIL_THROW_IF_ARG(stream >> throwaway, SizeParseError, (arg), "because there was more cruft " << throwaway << " after the number.");
+
+ // Silly sort, using kilobytes as your default unit.
+ if (after.empty()) after = "K";
+ if (after == "%") {
+ uint64_t mem = GuessPhysicalMemory();
+ UTIL_THROW_IF_ARG(!mem, SizeParseError, (arg), "because % was specified but the physical memory size could not be determined.");
+ return static_cast<uint64_t>(static_cast<double>(value) * static_cast<double>(mem) / 100.0);
+ }
+
+ if (after == "k") after = "K";
+ std::string units("bKMGTPEZY");
+ std::string::size_type index = units.find(after[0]);
+ UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");
+ for (std::string::size_type i = 0; i < index; ++i) {
+ value *= 1024;
+ }
+ return static_cast<uint64_t>(value);
+}
+
+} // namespace
+
+uint64_t ParseSize(const std::string &arg) {
+ return arg.find('.') == std::string::npos ? ParseNum<double>(arg) : ParseNum<uint64_t>(arg);
+}
+
+} // namespace util
diff --git a/src/kenlm/util/usage.hh b/src/kenlm/util/usage.hh
new file mode 100644
index 0000000..2f1b3e9
--- /dev/null
+++ b/src/kenlm/util/usage.hh
@@ -0,0 +1,27 @@
+#ifndef UTIL_USAGE_H
+#define UTIL_USAGE_H
+#include <cstddef>
+#include <iosfwd>
+#include <string>
+#include <stdint.h>
+
+namespace util {
+// Time in seconds since process started. Zero on unsupported platforms.
+double WallTime();
+
+// User + system time.
+double CPUTime();
+
+// Resident usage in bytes.
+uint64_t RSSMax();
+
+void PrintUsage(std::ostream &to);
+
+// Determine how much physical memory there is. Return 0 on failure.
+uint64_t GuessPhysicalMemory();
+
+// Parse a size like unix sort. Sadly, this means the default multiplier is K.
+uint64_t ParseSize(const std::string &arg);
+
+} // namespace util
+#endif // UTIL_USAGE_H
diff --git a/src/kenlm/windows/build_binary.vcxproj b/src/kenlm/windows/build_binary.vcxproj
new file mode 100644
index 0000000..5f31c49
--- /dev/null
+++ b/src/kenlm/windows/build_binary.vcxproj
@@ -0,0 +1,174 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{726B8149-7F58-4415-BD64-9EDECDF3409A}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>build_binary</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\lm\build_binary_main.cc" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="kenlm.vcxproj">
+ <Project>{e834d71c-5d5f-4fb4-a361-88694c438ff9}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/kenlm/windows/kenlm.sln b/src/kenlm/windows/kenlm.sln
new file mode 100644
index 0000000..f81c497
--- /dev/null
+++ b/src/kenlm/windows/kenlm.sln
@@ -0,0 +1,64 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.40629.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kenlm", "kenlm.vcxproj", "{E834D71C-5D5F-4FB4-A361-88694C438FF9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ngram_query", "ngram_query.vcxproj", "{826B8049-7E58-4415-BD64-9EDECDF1402B}"
+ ProjectSection(ProjectDependencies) = postProject
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9} = {E834D71C-5D5F-4FB4-A361-88694C438FF9}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "build_binary", "build_binary.vcxproj", "{726B8149-7F58-4415-BD64-9EDECDF3409A}"
+ ProjectSection(ProjectDependencies) = postProject
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9} = {E834D71C-5D5F-4FB4-A361-88694C438FF9}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lmplz", "lmplz.vcxproj", "{926B8049-7E58-4415-BD64-9EDECDF1502C}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Debug|Win32.ActiveCfg = Debug|Win32
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Debug|Win32.Build.0 = Debug|Win32
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Debug|x64.ActiveCfg = Debug|x64
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Debug|x64.Build.0 = Debug|x64
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Release|Win32.ActiveCfg = Release|Win32
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Release|Win32.Build.0 = Release|Win32
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Release|x64.ActiveCfg = Release|x64
+ {E834D71C-5D5F-4FB4-A361-88694C438FF9}.Release|x64.Build.0 = Release|x64
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Debug|Win32.ActiveCfg = Debug|Win32
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Debug|Win32.Build.0 = Debug|Win32
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Debug|x64.ActiveCfg = Debug|x64
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Debug|x64.Build.0 = Debug|x64
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Release|Win32.ActiveCfg = Release|Win32
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Release|Win32.Build.0 = Release|Win32
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Release|x64.ActiveCfg = Release|x64
+ {826B8049-7E58-4415-BD64-9EDECDF1402B}.Release|x64.Build.0 = Release|x64
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Debug|Win32.ActiveCfg = Debug|Win32
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Debug|Win32.Build.0 = Debug|Win32
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Debug|x64.ActiveCfg = Debug|x64
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Debug|x64.Build.0 = Debug|x64
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Release|Win32.ActiveCfg = Release|Win32
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Release|Win32.Build.0 = Release|Win32
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Release|x64.ActiveCfg = Release|x64
+ {726B8149-7F58-4415-BD64-9EDECDF3409A}.Release|x64.Build.0 = Release|x64
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Debug|Win32.ActiveCfg = Debug|Win32
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Debug|Win32.Build.0 = Debug|Win32
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Debug|x64.ActiveCfg = Debug|x64
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Debug|x64.Build.0 = Debug|x64
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Release|Win32.ActiveCfg = Release|Win32
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Release|Win32.Build.0 = Release|Win32
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Release|x64.ActiveCfg = Release|x64
+ {926B8049-7E58-4415-BD64-9EDECDF1502C}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/src/kenlm/windows/kenlm.vcxproj b/src/kenlm/windows/kenlm.vcxproj
new file mode 100644
index 0000000..d8ae59c
--- /dev/null
+++ b/src/kenlm/windows/kenlm.vcxproj
@@ -0,0 +1,291 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{E834D71C-5D5F-4FB4-A361-88694C438FF9}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>kenlm</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <TargetName>$(ProjectName)</TargetName>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;D_SCL_SECURE_NO_WARNINGS_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <AdditionalOptions>-D_SCL_SECURE_NO_WARNINGS %(AdditionalOptions)</AdditionalOptions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;D_SCL_SECURE_NO_WARNINGSNDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <AdditionalOptions>-D_SCL_SECURE_NO_WARNINGS %(AdditionalOptions)</AdditionalOptions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <None Include="..\lm\bhiksha.hh" />
+ <None Include="..\lm\binary_format.hh" />
+ <None Include="..\lm\blank.hh" />
+ <None Include="..\lm\config.hh" />
+ <None Include="..\lm\enumerate_vocab.hh" />
+ <None Include="..\lm\facade.hh" />
+ <None Include="..\lm\left.hh" />
+ <None Include="..\lm\lm_exception.hh" />
+ <None Include="..\lm\max_order.hh" />
+ <None Include="..\lm\model.hh" />
+ <None Include="..\lm\model_type.hh" />
+ <None Include="..\lm\quantize.hh" />
+ <None Include="..\lm\read_arpa.hh" />
+ <None Include="..\lm\return.hh" />
+ <None Include="..\lm\search_hashed.hh" />
+ <None Include="..\lm\search_trie.hh" />
+ <None Include="..\lm\test.arpa" />
+ <None Include="..\lm\test_nounk.arpa" />
+ <None Include="..\lm\trie.hh" />
+ <None Include="..\lm\trie_sort.hh" />
+ <None Include="..\lm\virtual_interface.hh" />
+ <None Include="..\lm\vocab.hh" />
+ <None Include="..\lm\weights.hh" />
+ <None Include="..\lm\word_index.hh" />
+ <None Include="..\util\bit_packing.hh" />
+ <None Include="..\util\ersatz_progress.hh" />
+ <None Include="..\util\exception.hh" />
+ <None Include="..\util\file.hh" />
+ <None Include="..\util\file_piece.hh" />
+ <None Include="..\util\getopt.hh" />
+ <None Include="..\util\have.hh" />
+ <None Include="..\util\joint_sort.hh" />
+ <None Include="..\util\key_value_packing.hh" />
+ <None Include="..\util\mmap.hh" />
+ <None Include="..\util\murmur_hash.hh" />
+ <None Include="..\util\probing_hash_table.hh" />
+ <None Include="..\util\proxy_iterator.hh" />
+ <None Include="..\util\scoped.hh" />
+ <None Include="..\util\sized_iterator.hh" />
+ <None Include="..\util\sorted_uniform.hh" />
+ <None Include="..\util\string_piece.hh" />
+ <None Include="..\util\tokenize_piece.hh" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\lm\value_build.cc" />
+ <ClCompile Include="..\util\double-conversion\bignum-dtoa.cc" />
+ <ClCompile Include="..\util\double-conversion\bignum.cc" />
+ <ClCompile Include="..\util\double-conversion\cached-powers.cc" />
+ <ClCompile Include="..\util\double-conversion\diy-fp.cc" />
+ <ClCompile Include="..\util\double-conversion\double-conversion.cc" />
+ <ClCompile Include="..\util\double-conversion\fast-dtoa.cc" />
+ <ClCompile Include="..\util\double-conversion\fixed-dtoa.cc" />
+ <ClCompile Include="..\util\double-conversion\strtod.cc" />
+ <ClCompile Include="..\util\float_to_string.cc" />
+ <ClCompile Include="..\util\integer_to_string.cc" />
+ <ClCompile Include="..\util\joint_sort_test.cc" />
+ <ClCompile Include="..\util\parallel_read.cc" />
+ <ClCompile Include="..\util\pool.cc" />
+ <ClCompile Include="..\util\read_compressed.cc" />
+ <ClCompile Include="..\util\scoped.cc" />
+ <ClCompile Include="..\util\stream\chain.cc" />
+ <ClCompile Include="..\util\stream\count_records.cc" />
+ <ClCompile Include="..\util\stream\io.cc" />
+ <ClCompile Include="..\util\stream\line_input.cc" />
+ <ClCompile Include="..\util\stream\multi_progress.cc" />
+ <ClCompile Include="..\util\stream\rewindable_stream.cc" />
+ <ClCompile Include="..\util\string_piece.cc" />
+ <ClCompile Include="..\util\usage.cc" />
+ <ClCompile Include="..\lm\bhiksha.cc" />
+ <ClCompile Include="..\lm\binary_format.cc" />
+ <ClCompile Include="..\lm\config.cc" />
+ <ClCompile Include="..\lm\lm_exception.cc" />
+ <ClCompile Include="..\lm\model.cc" />
+ <ClCompile Include="..\lm\quantize.cc" />
+ <ClCompile Include="..\lm\read_arpa.cc" />
+ <ClCompile Include="..\lm\search_hashed.cc" />
+ <ClCompile Include="..\lm\search_trie.cc" />
+ <ClCompile Include="..\lm\sizes.cc" />
+ <ClCompile Include="..\lm\trie.cc" />
+ <ClCompile Include="..\lm\trie_sort.cc" />
+ <ClCompile Include="..\lm\virtual_interface.cc" />
+ <ClCompile Include="..\lm\vocab.cc" />
+ <ClCompile Include="..\util\bit_packing.cc" />
+ <ClCompile Include="..\util\ersatz_progress.cc" />
+ <ClCompile Include="..\util\exception.cc" />
+ <ClCompile Include="..\util\file.cc" />
+ <ClCompile Include="..\util\file_piece.cc" />
+ <ClCompile Include="..\util\getopt.c" />
+ <ClCompile Include="..\util\mmap.cc" />
+ <ClCompile Include="..\util\murmur_hash.cc" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\util\double-conversion\bignum-dtoa.h" />
+ <ClInclude Include="..\util\double-conversion\bignum.h" />
+ <ClInclude Include="..\util\double-conversion\cached-powers.h" />
+ <ClInclude Include="..\util\double-conversion\diy-fp.h" />
+ <ClInclude Include="..\util\double-conversion\double-conversion.h" />
+ <ClInclude Include="..\util\double-conversion\fast-dtoa.h" />
+ <ClInclude Include="..\util\double-conversion\fixed-dtoa.h" />
+ <ClInclude Include="..\util\double-conversion\ieee.h" />
+ <ClInclude Include="..\util\double-conversion\strtod.h" />
+ <ClInclude Include="..\util\double-conversion\utils.h" />
+ <ClInclude Include="..\util\file_stream.hh" />
+ <ClInclude Include="..\util\string_stream.hh" />
+ <ClInclude Include="..\util\fake_ostream.hh" />
+ <ClInclude Include="..\util\fixed_array.hh" />
+ <ClInclude Include="..\util\float_to_string.hh" />
+ <ClInclude Include="..\util\integer_to_string.hh" />
+ <ClInclude Include="..\util\multi_intersection.hh" />
+ <ClInclude Include="..\util\parallel_read.hh" />
+ <ClInclude Include="..\util\pcqueue.hh" />
+ <ClInclude Include="..\util\pool.hh" />
+ <ClInclude Include="..\util\read_compressed.hh" />
+ <ClInclude Include="..\util\stream\block.hh" />
+ <ClInclude Include="..\util\stream\chain.hh" />
+ <ClInclude Include="..\util\stream\config.hh" />
+ <ClInclude Include="..\util\stream\count_records.hh" />
+ <ClInclude Include="..\util\stream\io.hh" />
+ <ClInclude Include="..\util\stream\line_input.hh" />
+ <ClInclude Include="..\util\stream\multi_progress.hh" />
+ <ClInclude Include="..\util\stream\multi_stream.hh" />
+ <ClInclude Include="..\util\stream\rewindable_stream.hh" />
+ <ClInclude Include="..\util\stream\sort.hh" />
+ <ClInclude Include="..\util\stream\stream.hh" />
+ <ClInclude Include="..\util\stream\timer.hh" />
+ <ClInclude Include="..\util\string_piece_hash.hh" />
+ <ClInclude Include="..\util\thread_pool.hh" />
+ <ClInclude Include="..\util\usage.hh" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
diff --git a/src/kenlm/windows/lmplz.vcxproj b/src/kenlm/windows/lmplz.vcxproj
new file mode 100755
index 0000000..813eadf
--- /dev/null
+++ b/src/kenlm/windows/lmplz.vcxproj
@@ -0,0 +1,214 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{926B8049-7E58-4415-BD64-9EDECDF1502C}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>lmplz</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ <IncludePath>C:\Program Files\boost\boost_1_51;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSdkDir)include;$(FrameworkSDKDir)\include;</IncludePath>
+ <LibraryPath>C:\Program Files\boost\boost_1_51\lib;$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSdkDir)lib;$(FrameworkSDKDir)\lib</LibraryPath>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(BOOSTDIR)\stage\lib</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(BOOSTDIR)\stage\lib\x64</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(BOOSTDIR)\stage\lib;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\Lib\</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..;$(BOOSTDIR)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(BOOSTDIR)\stage\lib\x64;</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <None Include="..\lm\builder\adjust_counts.hh" />
+ <None Include="..\lm\builder\corpus_count.hh" />
+ <None Include="..\lm\builder\discount.hh" />
+ <None Include="..\lm\builder\header_info.hh" />
+ <None Include="..\lm\builder\initial_probabilities.hh" />
+ <None Include="..\lm\builder\interpolate.hh" />
+ <None Include="..\lm\builder\pipeline.hh" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\lm\builder\adjust_counts.cc" />
+ <ClCompile Include="..\lm\builder\corpus_count.cc" />
+ <ClCompile Include="..\lm\builder\initial_probabilities.cc" />
+ <ClCompile Include="..\lm\builder\interpolate.cc" />
+ <ClCompile Include="..\lm\builder\output.cc" />
+ <ClCompile Include="..\lm\builder\pipeline.cc" />
+ <ClCompile Include="..\lm\builder\lmplz_main.cc" />
+ <ClCompile Include="..\lm\common\model_buffer.cc" />
+ <ClCompile Include="..\lm\common\print.cc" />
+ <ClCompile Include="..\lm\common\renumber.cc" />
+ <ClCompile Include="..\lm\common\size_option.cc" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="kenlm.vcxproj">
+ <Project>{e834d71c-5d5f-4fb4-a361-88694c438ff9}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\lm\builder\debug_print.hh" />
+ <ClInclude Include="..\lm\builder\hash_gamma.hh" />
+ <ClInclude Include="..\lm\builder\output.hh" />
+ <ClInclude Include="..\lm\builder\payload.hh" />
+ <ClInclude Include="..\lm\common\compare.hh" />
+ <ClInclude Include="..\lm\common\joint_order.hh" />
+ <ClInclude Include="..\lm\common\model_buffer.hh" />
+ <ClInclude Include="..\lm\common\ngram.hh" />
+ <ClInclude Include="..\lm\common\ngram_stream.hh" />
+ <ClInclude Include="..\lm\common\print.hh" />
+ <ClInclude Include="..\lm\common\renumber.hh" />
+ <ClInclude Include="..\lm\common\size_option.hh" />
+ <ClInclude Include="..\lm\common\special.hh" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/src/kenlm/windows/ngram_query.vcxproj b/src/kenlm/windows/ngram_query.vcxproj
new file mode 100644
index 0000000..0c3da1a
--- /dev/null
+++ b/src/kenlm/windows/ngram_query.vcxproj
@@ -0,0 +1,177 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{826B8049-7E58-4415-BD64-9EDECDF1402B}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>ngram_query</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ <TargetName>$(ProjectName)</TargetName>
+ <OutDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+ <IntDir>$(SolutionDir)$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>TurnOffAllWarnings</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>KENLM_MAX_ORDER=6;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>$(ProjectDir)\..</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>
+ </AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <None Include="..\lm\ngram_query.hh" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\lm\query_main.cc" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="kenlm.vcxproj">
+ <Project>{e834d71c-5d5f-4fb4-a361-88694c438ff9}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/scripts/training/symal/Makefile b/src/symal/Makefile
similarity index 100%
rename from scripts/training/symal/Makefile
rename to src/symal/Makefile
diff --git a/scripts/training/symal/cmd.c b/src/symal/cmd.c
similarity index 100%
rename from scripts/training/symal/cmd.c
rename to src/symal/cmd.c
diff --git a/scripts/training/symal/cmd.h b/src/symal/cmd.h
similarity index 100%
rename from scripts/training/symal/cmd.h
rename to src/symal/cmd.h
diff --git a/scripts/training/symal/giza2bal.pl b/src/symal/giza2bal.pl
similarity index 100%
rename from scripts/training/symal/giza2bal.pl
rename to src/symal/giza2bal.pl
diff --git a/scripts/training/symal/symal.cpp b/src/symal/symal.cpp
similarity index 100%
rename from scripts/training/symal/symal.cpp
rename to src/symal/symal.cpp
diff --git a/test/bn-en/hiero/joshua-classlm.config b/test/bn-en/hiero/joshua-classlm.config
index 13311a8..970b9b7 100644
--- a/test/bn-en/hiero/joshua-classlm.config
+++ b/test/bn-en/hiero/joshua-classlm.config
@@ -23,8 +23,8 @@
use_unique_nbest=true
top_n = 10
-feature-function = OOVPenalty
feature-function = WordPenalty
+feature-function = OOVPenalty
###### model weights
lm_0 1.2373676802179452
diff --git a/test/bn-en/hiero/output-classlm.gold b/test/bn-en/hiero/output-classlm.gold
index 6128d5c..44d9a99 100644
--- a/test/bn-en/hiero/output-classlm.gold
+++ b/test/bn-en/hiero/output-classlm.gold
@@ -1,690 +1,690 @@
-0 ||| rabindranath was born in a পিরালী ব্রাহ্মণ in the family ||| lm_0=-23.712 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-30.409 tm_pt_6=-15.712 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.135 tm_pt_10=-14.979 tm_pt_11=-0.000 tm_pt_12=-7.729 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -270.690
-0 ||| rabindranath born in kolkata a পিরালী ব্রাহ্মণ in the family ||| lm_0=-27.803 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-29.029 tm_pt_6=-16.002 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-10.090 tm_pt_11=-0.000 tm_pt_12=-4.282 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -272.654
-0 ||| rabindranath born in the a পিরালী ব্রাহ্মণ in the family ||| lm_0=-26.980 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-32.737 tm_pt_6=-16.092 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-12.188 tm_pt_11=-0.000 tm_pt_12=-3.876 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -272.776
-0 ||| rabindranath was born in one পিরালী ব্রাহ্মণ in the family ||| lm_0=-25.676 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-29.639 tm_pt_6=-16.710 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.135 tm_pt_10=-13.438 tm_pt_11=-0.000 tm_pt_12=-8.350 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -273.133
-0 ||| rabindranath born in the one পিরালী ব্রাহ্মণ in the family ||| lm_0=-27.320 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-31.967 tm_pt_6=-17.090 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-10.648 tm_pt_11=-0.000 tm_pt_12=-4.497 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -273.210
-0 ||| rabindranath born in kolkata one পিরালী ব্রাহ্মণ in the family ||| lm_0=-28.555 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-28.259 tm_pt_6=-16.999 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-8.550 tm_pt_11=-0.000 tm_pt_12=-4.903 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -273.597
-0 ||| rabindranath was born in a পিরালী ব্রাহ্মণ পরিবারে . ||| lm_0=-23.574 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.867 tm_pt_6=-7.153 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.135 tm_pt_10=-14.988 tm_pt_11=-0.000 tm_pt_12=-7.732 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -360.646
-0 ||| rabindranath born in kolkata a পিরালী ব্রাহ্মণ পরিবারে . ||| lm_0=-27.665 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-11.487 tm_pt_6=-7.442 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.000 tm_pt_10=-10.100 tm_pt_11=-0.000 tm_pt_12=-4.285 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -362.610
-0 ||| rabindranath born in the a পিরালী ব্রাহ্মণ পরিবারে . ||| lm_0=-26.841 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-15.195 tm_pt_6=-7.533 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.000 tm_pt_10=-12.198 tm_pt_11=-0.000 tm_pt_12=-3.880 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -362.732
-1 ||| recently india with united relation improved . ||| lm_0=-21.585 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-13.810 tm_pt_6=-15.759 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.104 tm_pt_10=-11.527 tm_pt_11=-0.000 tm_pt_12=-5.174 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 ||| -58.929
-1 ||| recently with the united relation improved . ||| lm_0=-19.205 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-17.062 tm_pt_6=-15.857 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.105 tm_pt_10=-15.528 tm_pt_11=-0.000 tm_pt_12=-7.396 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.909 ||| -59.428
-1 ||| recently india with united matters improved . ||| lm_0=-21.481 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-15.156 tm_pt_6=-17.146 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.104 tm_pt_10=-11.320 tm_pt_11=-0.000 tm_pt_12=-5.174 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 ||| -59.477
-1 ||| recently india with united states relation improved . ||| lm_0=-21.491 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-15.661 tm_pt_6=-15.849 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.742 tm_pt_10=-10.885 tm_pt_11=-0.000 tm_pt_12=-4.412 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-4.343 ||| -59.602
-1 ||| recently the with united relation improved . ||| lm_0=-20.835 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-17.062 tm_pt_6=-15.857 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.104 tm_pt_10=-14.462 tm_pt_11=-0.000 tm_pt_12=-5.822 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 ||| -59.647
-1 ||| recently with the united states relation improved . ||| lm_0=-18.873 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-18.913 tm_pt_6=-15.946 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.743 tm_pt_10=-14.886 tm_pt_11=-0.000 tm_pt_12=-6.633 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 ||| -59.806
-2 ||| mathematics so science language . ||| lm_0=-15.141 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-6.483 tm_pt_6=-3.387 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.002 tm_pt_10=-3.378 tm_pt_11=-0.000 tm_pt_12=-1.626 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 ||| -34.682
-2 ||| mathematics is science language . ||| lm_0=-12.890 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-10.375 tm_pt_6=-3.926 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.052 tm_pt_10=-8.326 tm_pt_11=-0.000 tm_pt_12=-3.330 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 ||| -34.754
-2 ||| mathematics that science language . ||| lm_0=-14.001 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-9.625 tm_pt_6=-3.926 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.052 tm_pt_10=-7.607 tm_pt_11=-0.000 tm_pt_12=-3.330 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 ||| -35.798
-2 ||| mathematics so science language ||| lm_0=-15.078 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-7.060 tm_pt_6=-10.481 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.138 tm_pt_10=-7.888 tm_pt_11=-0.000 tm_pt_12=-3.417 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 ||| -36.952
-2 ||| mathematics is science language ||| lm_0=-12.827 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-10.951 tm_pt_6=-11.020 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.188 tm_pt_10=-12.835 tm_pt_11=-0.000 tm_pt_12=-5.122 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 ||| -37.024
-3 ||| from this it understood that this মেট্রিকটি will এফআরডব্লিউ মেট্রিক . ||| lm_0=-32.371 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-14.595 tm_pt_6=-13.171 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.405 tm_pt_10=-9.498 tm_pt_11=-0.000 tm_pt_12=-5.915 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -377.287
-3 ||| from this it will be understood that the মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| lm_0=-28.474 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-20.366 tm_pt_6=-14.416 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.389 tm_pt_10=-6.943 tm_pt_11=-0.000 tm_pt_12=-4.457 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -377.643
-3 ||| from this it understood that this মেট্রিকটি be এফআরডব্লিউ মেট্রিক . ||| lm_0=-32.656 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-15.733 tm_pt_6=-13.079 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.405 tm_pt_10=-10.513 tm_pt_11=-0.000 tm_pt_12=-5.915 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.089
-3 ||| from this it understood that this will মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| lm_0=-31.903 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.595 tm_pt_6=-13.171 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.522 tm_pt_10=-7.740 tm_pt_11=-0.000 tm_pt_12=-5.309 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.099
-3 ||| from this easily understood that this মেট্রিকটি will এফআরডব্লিউ মেট্রিক . ||| lm_0=-35.196 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-10.693 tm_pt_6=-12.277 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.522 tm_pt_10=-6.659 tm_pt_11=-0.000 tm_pt_12=-6.069 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.180
-3 ||| from this it will be understood that this মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| lm_0=-28.974 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-17.030 tm_pt_6=-13.124 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.386 tm_pt_10=-7.892 tm_pt_11=-0.000 tm_pt_12=-3.799 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -378.348
-3 ||| from this it understood that the মেট্রিকটি will এফআরডব্লিউ মেট্রিক . ||| lm_0=-31.871 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-17.930 tm_pt_6=-14.463 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.423 tm_pt_10=-9.145 tm_pt_11=-0.000 tm_pt_12=-6.163 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.534
-3 ||| from this it will understood that the মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| lm_0=-30.313 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-17.930 tm_pt_6=-14.463 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.524 tm_pt_10=-6.432 tm_pt_11=-0.000 tm_pt_12=-5.479 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.708
-4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের indication match from this novel . ||| lm_0=-39.641 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-16.212 tm_pt_6=-10.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-12.338 tm_pt_11=-0.000 tm_pt_12=-5.018 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.423
-4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের presage match from this novel . ||| lm_0=-39.916 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-15.113 tm_pt_6=-10.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-12.338 tm_pt_11=-0.000 tm_pt_12=-5.018 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.553
-4 ||| the with the earthcentered সামন্ততন্ত্রের পতনের indication match from this novel . ||| lm_0=-37.546 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-20.640 tm_pt_6=-9.983 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.024 tm_pt_10=-18.544 tm_pt_11=-0.000 tm_pt_12=-6.405 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.757
-4 ||| the with the earthcentered সামন্ততন্ত্রের পতনের presage match from this novel . ||| lm_0=-37.820 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-19.541 tm_pt_6=-9.983 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.024 tm_pt_10=-18.544 tm_pt_11=-0.000 tm_pt_12=-6.405 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.887
-4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের indication match from this novels . ||| lm_0=-41.798 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-15.164 tm_pt_6=-9.637 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-11.182 tm_pt_11=-0.000 tm_pt_12=-4.651 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.308
-4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের indication match this novel from . ||| lm_0=-41.827 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-16.212 tm_pt_6=-10.084 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.003 tm_pt_10=-12.054 tm_pt_11=-0.000 tm_pt_12=-5.342 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.392
-4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের presage match from this novels . ||| lm_0=-42.073 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-14.066 tm_pt_6=-9.637 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-11.182 tm_pt_11=-0.000 tm_pt_12=-4.651 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.438
-4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের presage match this novel from . ||| lm_0=-42.102 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-15.113 tm_pt_6=-10.084 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.003 tm_pt_10=-12.054 tm_pt_11=-0.000 tm_pt_12=-5.342 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.522
-4 ||| the with the earthcentered সামন্ততন্ত্রের পতনের indication match this novel from . ||| lm_0=-39.732 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-20.640 tm_pt_6=-9.983 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.021 tm_pt_10=-18.260 tm_pt_11=-0.000 tm_pt_12=-6.729 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.727
-4 ||| with the same earthcentered সামন্ততন্ত্রের পতনের indication match from this novel . ||| lm_0=-37.796 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-16.212 tm_pt_6=-10.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.373 tm_pt_10=-14.188 tm_pt_11=-0.000 tm_pt_12=-7.809 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.802
-5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority in . ||| lm_0=-25.124 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-13.102 tm_pt_6=-8.482 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.289 tm_pt_10=-14.216 tm_pt_11=-0.000 tm_pt_12=-2.256 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -262.275
-5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority in the . ||| lm_0=-25.263 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-16.310 tm_pt_6=-6.695 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.289 tm_pt_10=-10.344 tm_pt_11=-0.000 tm_pt_12=-2.428 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -262.282
-5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority majority . ||| lm_0=-26.944 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-9.787 tm_pt_6=-9.868 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.639 tm_pt_10=-10.413 tm_pt_11=-0.000 tm_pt_12=-3.172 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -263.990
-5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority that . ||| lm_0=-26.235 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.810 tm_pt_6=-9.357 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.406 tm_pt_10=-13.770 tm_pt_11=-0.000 tm_pt_12=-2.767 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -264.054
-5 ||| নির্বচনে mujib and his party majority in নিরঙ্কুষ . ||| lm_0=-25.124 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-13.102 tm_pt_6=-8.482 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.522 tm_pt_10=-12.917 tm_pt_11=-0.000 tm_pt_12=-2.374 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -264.135
-5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority that the . ||| lm_0=-27.702 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-16.018 tm_pt_6=-7.571 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.271 tm_pt_10=-5.619 tm_pt_11=-0.000 tm_pt_12=-1.161 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -264.484
-5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority where the . ||| lm_0=-28.188 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-15.032 tm_pt_6=-9.180 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.406 tm_pt_10=-2.153 tm_pt_11=-0.000 tm_pt_12=-0.468 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -264.558
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with to that . ||| lm_0=-33.425 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-22.509 tm_pt_6=-11.163 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.370 tm_pt_10=-18.845 tm_pt_11=-0.000 tm_pt_12=-2.681 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -476.744
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with can that . ||| lm_0=-35.292 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-19.242 tm_pt_6=-9.832 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.503 tm_pt_10=-17.011 tm_pt_11=-0.000 tm_pt_12=-3.528 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -477.908
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with that can . ||| lm_0=-33.973 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-19.242 tm_pt_6=-9.832 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-17.376 tm_pt_11=-0.000 tm_pt_12=-3.305 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -477.964
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his works with to that . ||| lm_0=-33.694 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-22.839 tm_pt_6=-12.090 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.371 tm_pt_10=-19.317 tm_pt_11=-0.000 tm_pt_12=-4.578 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -478.251
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with a that . ||| lm_0=-33.108 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-24.229 tm_pt_6=-13.109 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.418 tm_pt_10=-18.986 tm_pt_11=-0.000 tm_pt_12=-2.612 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -478.362
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his works with can that . ||| lm_0=-35.562 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-19.572 tm_pt_6=-10.759 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.504 tm_pt_10=-17.483 tm_pt_11=-0.000 tm_pt_12=-5.425 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -479.414
-6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his works with that can . ||| lm_0=-34.242 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-19.572 tm_pt_6=-10.759 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-17.847 tm_pt_11=-0.000 tm_pt_12=-5.202 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -479.471
-7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character is but these very is not . ||| lm_0=-47.390 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-36.805 tm_pt_6=-15.372 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-24.380 tm_pt_11=-0.000 tm_pt_12=-9.030 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -317.576
-7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character is but these very is not . ||| lm_0=-47.416 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-34.821 tm_pt_6=-14.079 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-24.460 tm_pt_11=-0.000 tm_pt_12=-9.204 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -317.835
-7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character are but these very is not . ||| lm_0=-48.326 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-35.727 tm_pt_6=-15.118 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-23.123 tm_pt_11=-0.000 tm_pt_12=-8.647 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -317.979
-7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character are but these very is not . ||| lm_0=-48.352 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-33.743 tm_pt_6=-13.825 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-23.203 tm_pt_11=-0.000 tm_pt_12=-8.821 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.238
-7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character there but these very is not . ||| lm_0=-48.680 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-34.439 tm_pt_6=-14.939 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-22.476 tm_pt_11=-0.000 tm_pt_12=-9.541 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.258
-7 ||| task , ওএস-ট্যান and more some linux প্রতিনিধিত্বকারী character is but these very is not . ||| lm_0=-49.202 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-34.821 tm_pt_6=-14.079 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-23.599 tm_pt_11=-0.000 tm_pt_12=-8.183 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.493
-7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character there but these very is not . ||| lm_0=-48.706 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-32.456 tm_pt_6=-13.646 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-22.557 tm_pt_11=-0.000 tm_pt_12=-9.715 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.517
-7 ||| task without ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character is but these very is not . ||| lm_0=-49.402 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-31.413 tm_pt_6=-16.431 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-17.244 tm_pt_11=-0.000 tm_pt_12=-9.204 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.572
-7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character is but these a is not . ||| lm_0=-46.205 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-40.521 tm_pt_6=-16.440 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.889 tm_pt_10=-28.483 tm_pt_11=-0.000 tm_pt_12=-9.906 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.574
-7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character is but these a is not . ||| lm_0=-46.230 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-38.538 tm_pt_6=-15.147 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.892 tm_pt_10=-28.563 tm_pt_11=-0.000 tm_pt_12=-10.079 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.833
-8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of services . ||| lm_0=-45.531 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-45.930 tm_pt_6=-30.412 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.432 tm_pt_10=-33.034 tm_pt_11=-0.000 tm_pt_12=-8.758 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -326.554
-8 ||| this social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of services . ||| lm_0=-44.770 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-47.524 tm_pt_6=-31.359 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.432 tm_pt_10=-34.967 tm_pt_11=-0.000 tm_pt_12=-9.390 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -326.967
-8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rules of services . ||| lm_0=-46.018 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-45.332 tm_pt_6=-31.000 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.561 tm_pt_10=-32.666 tm_pt_11=-0.000 tm_pt_12=-9.451 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -327.498
-8 ||| this social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rules of services . ||| lm_0=-45.257 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-46.925 tm_pt_6=-31.947 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.561 tm_pt_10=-34.599 tm_pt_11=-0.000 tm_pt_12=-10.083 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -327.911
-8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of the fingers . ||| lm_0=-47.090 lm_1=-58.169 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-48.497 tm_pt_6=-24.838 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.665 tm_pt_10=-34.032 tm_pt_11=-0.000 tm_pt_12=-10.582 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -328.644
-8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making is rule of services . ||| lm_0=-48.384 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-44.305 tm_pt_6=-30.138 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.414 tm_pt_10=-31.140 tm_pt_11=-0.000 tm_pt_12=-8.170 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -328.967
-8 ||| this social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of the fingers . ||| lm_0=-46.329 lm_1=-58.169 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-50.091 tm_pt_6=-25.785 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.665 tm_pt_10=-35.965 tm_pt_11=-0.000 tm_pt_12=-11.214 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -329.056
-9 ||| বৃষ্টিপাতঃ annual ২৫৪০ মিলি meters ||| lm_0=-26.340 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-3.372 tm_pt_6=-3.054 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-2.140 tm_pt_11=-0.000 tm_pt_12=-1.263 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.288
-9 ||| বৃষ্টিপাতঃ annual ২৫৪০ মিলি meter ||| lm_0=-26.316 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-3.885 tm_pt_6=-2.821 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-2.140 tm_pt_11=-0.000 tm_pt_12=-1.337 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.303
-9 ||| বৃষ্টিপাতঃ arrange ২৫৪০ মিলি meters ||| lm_0=-26.532 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-2.916 tm_pt_6=-3.748 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.956 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.884
-9 ||| বৃষ্টিপাতঃ arrange ২৫৪০ মিলি meter ||| lm_0=-26.509 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-3.430 tm_pt_6=-3.514 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-2.030 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.900
-9 ||| বৃষ্টিপাতঃ annual ২৫৪০ মিলি metres ||| lm_0=-26.771 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-3.608 tm_pt_6=-4.389 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.503 tm_pt_10=-2.140 tm_pt_11=-0.000 tm_pt_12=-2.803 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -346.979
-9 ||| বৃষ্টিপাতঃ arrange ২৫৪০ মিলি metres ||| lm_0=-26.963 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-3.153 tm_pt_6=-5.083 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.135 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-3.497 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -347.576
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national was he main speech -lrb- keynote speech -rrb- on the . ||| lm_0=-64.735 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-43.133 tm_pt_6=-17.136 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.738 tm_pt_10=-35.371 tm_pt_11=-0.000 tm_pt_12=-8.127 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.407
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national was he main speech -lrb- keynote speech -rrb- to the . ||| lm_0=-64.457 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-44.212 tm_pt_6=-17.290 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.707 tm_pt_10=-36.372 tm_pt_11=-0.000 tm_pt_12=-7.904 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.478
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was the speech -lrb- keynote speech -rrb- on the . ||| lm_0=-60.509 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-47.579 tm_pt_6=-17.884 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.738 tm_pt_10=-42.661 tm_pt_11=-0.000 tm_pt_12=-12.264 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.604
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was the speech -lrb- keynote speech -rrb- to the . ||| lm_0=-60.231 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-48.658 tm_pt_6=-18.038 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.707 tm_pt_10=-43.661 tm_pt_11=-0.000 tm_pt_12=-12.041 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.674
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national was he main speech -lrb- keynote speech -rrb- , the . ||| lm_0=-63.604 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-45.343 tm_pt_6=-16.831 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.056 tm_pt_10=-39.115 tm_pt_11=-0.000 tm_pt_12=-8.820 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.804
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was the speech -lrb- keynote speech -rrb- , the . ||| lm_0=-59.378 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-49.789 tm_pt_6=-17.578 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.056 tm_pt_10=-46.405 tm_pt_11=-0.000 tm_pt_12=-12.957 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -755.000
-10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was main speech -lrb- keynote speech -rrb- , the . ||| lm_0=-62.129 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-45.343 tm_pt_6=-16.831 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.056 tm_pt_10=-41.797 tm_pt_11=-0.000 tm_pt_12=-10.972 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -755.400
-11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the regarded as a province west pakistan . ||| lm_0=-69.815 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-20.000 tm_pt_5=-75.038 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.948 tm_pt_10=-58.394 tm_pt_11=-0.000 tm_pt_12=-13.706 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=20.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -313.312
-11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব they started the where the regarded as a province west pakistan . ||| lm_0=-71.365 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-21.000 tm_pt_5=-73.645 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.630 tm_pt_10=-57.313 tm_pt_11=-0.000 tm_pt_12=-13.824 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=21.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -313.498
-11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan regarded as a province . ||| lm_0=-71.643 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-21.000 tm_pt_5=-75.038 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.898 tm_pt_10=-56.926 tm_pt_11=-0.000 tm_pt_12=-12.847 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=21.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -313.884
-11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব they started the where the west pakistan regarded as a province . ||| lm_0=-73.192 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-22.000 tm_pt_5=-73.645 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.580 tm_pt_10=-55.844 tm_pt_11=-0.000 tm_pt_12=-12.965 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=22.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.070
-11 ||| population on power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan regarded as a province . ||| lm_0=-72.719 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-21.000 tm_pt_5=-71.056 tm_pt_6=-44.018 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.530 tm_pt_10=-54.568 tm_pt_11=-0.000 tm_pt_12=-13.540 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=21.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.104
-11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan considered as a province . ||| lm_0=-71.170 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-21.000 tm_pt_5=-75.147 tm_pt_6=-43.926 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.530 tm_pt_10=-59.228 tm_pt_11=-0.000 tm_pt_12=-13.540 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=21.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.148
-11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব they started the where the west pakistan considered as a province . ||| lm_0=-72.719 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-22.000 tm_pt_5=-73.755 tm_pt_6=-43.926 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.212 tm_pt_10=-58.147 tm_pt_11=-0.000 tm_pt_12=-13.658 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=22.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.334
-11 ||| population on power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan considered as a province . ||| lm_0=-72.246 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-21.000 tm_pt_5=-71.166 tm_pt_6=-42.459 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-5.163 tm_pt_10=-56.871 tm_pt_11=-0.000 tm_pt_12=-14.233 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=21.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.368
-11 ||| population on power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the regarded as a province west pakistan . ||| lm_0=-70.891 lm_1=-101.095 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-25.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-71.056 tm_pt_6=-44.018 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.580 tm_pt_10=-56.039 tm_pt_11=-0.000 tm_pt_12=-14.401 tm_pt_13=-0.000 tm_pt_14=-29.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=19.000 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.498
-12 ||| the পরিমাপন theory ||| lm_0=-11.112 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-9.309 tm_pt_6=-3.988 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-7.916 tm_pt_11=-0.000 tm_pt_12=-1.316 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -126.556
-12 ||| mathematical পরিমাপন theory ||| lm_0=-12.665 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-2.869 tm_pt_6=-2.890 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=-4.888 tm_pt_11=-0.000 tm_pt_12=-2.010 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -126.848
-12 ||| • পরিমাপন theory ||| lm_0=-14.217 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.046 tm_pt_6=-5.241 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-0.422 tm_pt_11=-0.000 tm_pt_12=-1.316 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.301
-12 ||| . পরিমাপন theory ||| lm_0=-12.758 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-7.900 tm_pt_6=-2.990 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-3.641 tm_pt_11=-0.000 tm_pt_12=-1.712 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.691
-13 ||| external links of ||| lm_0=-6.986 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-7.390 tm_pt_6=-2.729 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=-0.000 tm_pt_12=-1.611 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -20.459
-13 ||| out-links of ||| lm_0=-8.078 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-6.938 tm_pt_6=-4.795 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=-0.000 tm_pt_12=-3.297 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -20.925
-13 ||| external link of ||| lm_0=-7.533 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-8.091 tm_pt_6=-2.871 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=-0.000 tm_pt_12=-2.767 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -21.728
-13 ||| external communication of ||| lm_0=-7.692 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-8.265 tm_pt_6=-2.886 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=-0.000 tm_pt_12=-2.555 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -21.888
-13 ||| description of ||| lm_0=-6.281 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-10.521 tm_pt_6=-7.098 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.018 tm_pt_10=-5.978 tm_pt_11=-0.000 tm_pt_12=-5.600 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -21.989
-13 ||| out-links by ||| lm_0=-8.495 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-5.866 tm_pt_6=-5.948 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-4.816 tm_pt_11=-0.000 tm_pt_12=-4.214 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -22.119
-13 ||| inter-connectivity of ||| lm_0=-8.447 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-6.938 tm_pt_6=-6.405 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.386 tm_pt_10=-5.285 tm_pt_11=-0.000 tm_pt_12=-4.907 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -22.803
-14 ||| tata communicationer " foreign sanchar nigam limited building it in telecommunication system a main providers ||| lm_0=-47.618 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-31.182 tm_pt_6=-18.848 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-4.420 tm_pt_10=-20.028 tm_pt_11=-0.000 tm_pt_12=-11.506 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.383 ||| -119.822
-14 ||| tata communicationer " foreign sanchar nigam limited building it the telecommunication system a main providers ||| lm_0=-48.866 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-30.294 tm_pt_6=-17.028 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-4.370 tm_pt_10=-19.775 tm_pt_11=-0.000 tm_pt_12=-10.184 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.383 ||| -120.000
-14 ||| tata communication " foreign sanchar nigam limited building it the telecommunication system a main providers ||| lm_0=-47.612 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-34.848 tm_pt_6=-17.028 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-4.370 tm_pt_10=-22.911 tm_pt_11=-0.000 tm_pt_12=-10.184 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.383 ||| -120.133
-14 ||| tata communicationer " foreign sanchar nigam limited building it city telecommunication system a main providers ||| lm_0=-50.859 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-27.204 tm_pt_6=-17.941 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-5.370 tm_pt_10=-15.421 tm_pt_11=-0.000 tm_pt_12=-8.892 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-7.383 ||| -121.229
-15 ||| he that year ৪ই নভেম্বরের national assembly in election won all and united states elected as 44th president . ||| lm_0=-52.538 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-35.845 tm_pt_6=-16.465 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.793 tm_pt_10=-11.300 tm_pt_11=-0.000 tm_pt_12=-4.120 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -329.758
-15 ||| he that year ৪ই নভেম্বরের national assembly in the won all and united states elected as 44th president . ||| lm_0=-49.848 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-42.525 tm_pt_6=-17.628 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.799 tm_pt_10=-17.156 tm_pt_11=-0.000 tm_pt_12=-4.967 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -329.930
-15 ||| he that year ৪ই নভেম্বরের assembly in national election won all and united states elected as 44th president . ||| lm_0=-52.453 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-35.845 tm_pt_6=-16.465 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.793 tm_pt_10=-11.097 tm_pt_11=-0.000 tm_pt_12=-3.904 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.488
-15 ||| he that year ৪ই নভেম্বরের assembly the national election won all and united states elected as 44th president . ||| lm_0=-51.665 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-37.637 tm_pt_6=-17.325 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.804 tm_pt_10=-12.349 tm_pt_11=-0.000 tm_pt_12=-4.087 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.550
-15 ||| in the year ৪ই নভেম্বরের national assembly in election won all and united states elected as 44th president . ||| lm_0=-47.957 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-42.978 tm_pt_6=-19.674 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-1.425 tm_pt_10=-21.531 tm_pt_11=-0.000 tm_pt_12=-8.327 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.583
-15 ||| in that year ৪ই নভেম্বরের national assembly in election won all and united states elected as 44th president . ||| lm_0=-49.614 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-40.948 tm_pt_6=-19.745 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.793 tm_pt_10=-15.498 tm_pt_11=-0.000 tm_pt_12=-7.753 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.613
-15 ||| he that year ৪ই নভেম্বরের national assembly 44th president and united states was elected as the won all . ||| lm_0=-47.289 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-41.906 tm_pt_6=-18.024 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-3.424 tm_pt_10=-18.889 tm_pt_11=-0.000 tm_pt_12=-4.845 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.639
-16 ||| many indian প্রজাতি fighting জাত টেক্সা from upper stage ||| lm_0=-35.728 lm_1=-32.080 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.556 tm_pt_6=-12.125 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-2.670 tm_pt_11=-0.000 tm_pt_12=-0.912 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -371.817
-16 ||| many indian প্রজাতি fighting জাত from টেক্সা upper stage ||| lm_0=-35.728 lm_1=-32.080 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-14.556 tm_pt_6=-12.125 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-3.002 tm_pt_11=-0.000 tm_pt_12=-1.537 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.089
-16 ||| many indian প্রজাতি fighting জাত টেক্সা to upper stage ||| lm_0=-35.464 lm_1=-32.080 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-16.518 tm_pt_6=-13.004 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-4.474 tm_pt_11=-0.000 tm_pt_12=-2.241 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.105
-16 ||| many indian প্রজাতি fighting জাত টেক্সা upper stage from ||| lm_0=-36.278 lm_1=-32.080 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-14.556 tm_pt_6=-12.125 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-2.830 tm_pt_11=-0.000 tm_pt_12=-1.650 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.766
-16 ||| many the প্রজাতি fighting জাত টেক্সা from upper stage ||| lm_0=-35.098 lm_1=-32.080 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-18.962 tm_pt_6=-12.755 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-7.024 tm_pt_11=-0.000 tm_pt_12=-2.862 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.915
-16 ||| of indian প্রজাতি fighting জাত টেক্সা from upper stage ||| lm_0=-34.630 lm_1=-32.080 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-19.990 tm_pt_6=-14.047 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.050 tm_pt_10=-6.670 tm_pt_11=-0.000 tm_pt_12=-3.477 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -374.141
-17 ||| britain writers written drama novels and stories recently scripts in আদৃত . ||| lm_0=-36.970 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-20.642 tm_pt_6=-10.927 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-21.259 tm_pt_11=-0.000 tm_pt_12=-8.774 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -193.128
-17 ||| britain writers written drama novels stories and recently scripts in আদৃত . ||| lm_0=-40.717 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-20.642 tm_pt_6=-10.927 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-16.732 tm_pt_11=-0.000 tm_pt_12=-5.024 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -193.334
-17 ||| britain writers written drama novel stories and recently scripts in আদৃত . ||| lm_0=-40.536 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-21.690 tm_pt_6=-11.374 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-17.888 tm_pt_11=-0.000 tm_pt_12=-5.391 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -193.894
-17 ||| britain writers written drama novels and stories recently script in আদৃত . ||| lm_0=-36.674 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-21.418 tm_pt_6=-10.442 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.824 tm_pt_10=-21.547 tm_pt_11=-0.000 tm_pt_12=-10.160 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.089
-17 ||| britain writers written drama novels story and recently scripts in আদৃত . ||| lm_0=-40.669 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-21.556 tm_pt_6=-11.746 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-17.463 tm_pt_11=-0.000 tm_pt_12=-5.755 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.178
-17 ||| britain writers written drama novels stories and recently script in আদৃত . ||| lm_0=-40.421 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-21.418 tm_pt_6=-10.442 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.824 tm_pt_10=-17.019 tm_pt_11=-0.000 tm_pt_12=-6.410 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.295
-17 ||| britain writers the drama novels and stories recently scripts in আদৃত . ||| lm_0=-36.194 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-25.509 tm_pt_6=-11.095 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.959 tm_pt_10=-25.559 tm_pt_11=-0.000 tm_pt_12=-9.061 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.444
-18 ||| on may 1919 , it saogat magazine published . ||| lm_0=-23.514 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-15.806 tm_pt_6=-13.716 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.152 tm_pt_10=-8.843 tm_pt_11=-0.000 tm_pt_12=-5.765 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 ||| -65.459
-18 ||| 1919 on may , it saogat magazine published . ||| lm_0=-25.572 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-15.806 tm_pt_6=-13.716 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.151 tm_pt_10=-4.478 tm_pt_11=-0.000 tm_pt_12=-4.843 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 ||| -66.178
-18 ||| 1919 in may , it saogat magazine published . ||| lm_0=-25.975 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-17.884 tm_pt_6=-14.337 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.519 tm_pt_10=-3.784 tm_pt_11=-0.000 tm_pt_12=-4.150 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 ||| -66.342
-18 ||| on may 1919 in it saogat magazine published . ||| lm_0=-24.704 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-14.193 tm_pt_6=-13.162 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.201 tm_pt_10=-7.947 tm_pt_11=-0.000 tm_pt_12=-6.576 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 ||| -66.529
-18 ||| 1919 on may month it saogat magazine published . ||| lm_0=-27.533 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-9.934 tm_pt_6=-13.516 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.287 tm_pt_10=-4.582 tm_pt_11=-0.000 tm_pt_12=-6.775 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 ||| -66.639
-18 ||| on may 1919 , this saogat magazine published . ||| lm_0=-23.185 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-17.388 tm_pt_6=-14.651 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.152 tm_pt_10=-11.508 tm_pt_11=-0.000 tm_pt_12=-7.130 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 ||| -66.848
-19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium was arranged . ||| lm_0=-57.604 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-21.244 tm_pt_6=-8.707 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-15.730 tm_pt_11=-0.000 tm_pt_12=-5.148 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -520.892
-19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium is the . ||| lm_0=-55.740 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-29.073 tm_pt_6=-9.197 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-22.691 tm_pt_11=-0.000 tm_pt_12=-5.552 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -521.235
-19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium was organized . ||| lm_0=-57.677 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-22.039 tm_pt_6=-8.841 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-16.240 tm_pt_11=-0.000 tm_pt_12=-5.148 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -521.313
-19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium arranged in . ||| lm_0=-58.454 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-23.377 tm_pt_6=-9.826 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-15.383 tm_pt_11=-0.000 tm_pt_12=-4.247 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -522.327
-19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium organized in . ||| lm_0=-58.336 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-24.172 tm_pt_6=-9.959 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-16.993 tm_pt_11=-0.000 tm_pt_12=-6.193 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -522.519
-19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium is was . ||| lm_0=-57.992 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-26.317 tm_pt_6=-8.389 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-20.791 tm_pt_11=-0.000 tm_pt_12=-5.062 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -522.552
-20 ||| to prevent this several measures are taken . ||| lm_0=-11.632 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-22.680 tm_pt_6=-30.812 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.386 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-4.343 ||| -48.405
-20 ||| to prevent this several measures are . ||| lm_0=-12.686 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-20.219 tm_pt_6=-29.189 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=-6.851 tm_pt_11=-0.000 tm_pt_12=-1.946 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-3.909 ||| -50.265
-20 ||| to prevent this several measures are the . ||| lm_0=-14.066 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-24.227 tm_pt_6=-27.251 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.000 tm_pt_10=-3.426 tm_pt_11=-0.000 tm_pt_12=-2.285 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-4.343 ||| -52.084
-20 ||| to prevent this several measures are in . ||| lm_0=-14.649 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-23.388 tm_pt_6=-27.344 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.000 tm_pt_10=-2.771 tm_pt_11=-0.000 tm_pt_12=-2.699 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-4.343 ||| -52.653
-20 ||| to avoid this possibility several measures are taken . ||| lm_0=-13.461 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-24.597 tm_pt_6=-31.733 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.386 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-4.777 ||| -53.452
-20 ||| to prevent this several measures are to . ||| lm_0=-15.009 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-23.286 tm_pt_6=-27.775 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.000 tm_pt_10=-3.872 tm_pt_11=-0.000 tm_pt_12=-3.920 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-4.343 ||| -53.944
-20 ||| to prevent this several measures are to ||| lm_0=-14.913 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-23.744 tm_pt_6=-33.519 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-2.000 tm_pt_10=-6.999 tm_pt_11=-0.000 tm_pt_12=-4.736 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-3.909 ||| -55.503
-21 ||| ১৯৬৬ on 5 february লাহোরে of দলসমূহের a national was held . ||| lm_0=-41.154 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-21.481 tm_pt_6=-14.645 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.436 tm_pt_10=-15.183 tm_pt_11=-0.000 tm_pt_12=-3.672 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.381
-21 ||| ১৯৬৬ on 5 february লাহোরে দলসমূহের against a national was held . ||| lm_0=-41.879 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-16.732 tm_pt_6=-14.335 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.553 tm_pt_10=-12.167 tm_pt_11=-0.000 tm_pt_12=-3.960 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.643
-21 ||| ১৯৬৬ on 5 february লাহোরে against দলসমূহের a national was held . ||| lm_0=-42.439 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-16.732 tm_pt_6=-14.335 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.553 tm_pt_10=-12.989 tm_pt_11=-0.000 tm_pt_12=-4.183 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.664
-21 ||| ১৯৬৬ on 5 february লাহোরে opposition দলসমূহের a national was held . ||| lm_0=-43.323 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-15.278 tm_pt_6=-14.740 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.553 tm_pt_10=-10.186 tm_pt_11=-0.000 tm_pt_12=-4.183 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.891
-21 ||| ১৯৬৬ on 5 february লাহোরে of দলসমূহের a national was held in . ||| lm_0=-42.147 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-24.651 tm_pt_6=-11.840 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.018 tm_pt_10=-8.289 tm_pt_11=-0.000 tm_pt_12=-3.518 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -392.804
-21 ||| ১৯৬৬ on 5 february লাহোরে দলসমূহের against a national was held in . ||| lm_0=-42.873 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-19.901 tm_pt_6=-11.529 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.135 tm_pt_10=-5.272 tm_pt_11=-0.000 tm_pt_12=-3.806 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -393.067
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in in . ||| lm_0=-44.173 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-47.305 tm_pt_6=-11.674 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.050 tm_pt_10=-27.018 tm_pt_11=-0.000 tm_pt_12=-1.913 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -412.927
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in took . ||| lm_0=-45.354 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-43.395 tm_pt_6=-11.480 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.368 tm_pt_10=-23.851 tm_pt_11=-0.000 tm_pt_12=-2.607 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -413.259
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in adopted . ||| lm_0=-45.321 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-42.382 tm_pt_6=-12.116 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.000 tm_pt_10=-21.979 tm_pt_11=-0.000 tm_pt_12=-3.300 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -413.521
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in in the . ||| lm_0=-45.104 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-50.512 tm_pt_6=-9.888 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.050 tm_pt_10=-23.145 tm_pt_11=-0.000 tm_pt_12=-2.086 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.252 OOVPenalty=-300.000 ||| -413.915
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank took secured its place in . ||| lm_0=-45.354 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-43.395 tm_pt_6=-11.480 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.368 tm_pt_10=-23.728 tm_pt_11=-0.000 tm_pt_12=-3.037 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -414.344
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in had . ||| lm_0=-44.889 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-45.320 tm_pt_6=-12.521 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.000 tm_pt_10=-25.115 tm_pt_11=-0.000 tm_pt_12=-3.300 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -414.500
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in took the . ||| lm_0=-46.563 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-46.603 tm_pt_6=-9.693 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.368 tm_pt_10=-19.978 tm_pt_11=-0.000 tm_pt_12=-2.779 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.252 OOVPenalty=-300.000 ||| -414.591
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank took secured its place in the . ||| lm_0=-45.737 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-46.603 tm_pt_6=-9.693 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.368 tm_pt_10=-19.856 tm_pt_11=-0.000 tm_pt_12=-3.210 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-300.000 ||| -414.654
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and had ডেভেলপমেণ্ট bank secured its place in in . ||| lm_0=-43.907 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-52.098 tm_pt_6=-14.618 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.050 tm_pt_10=-29.460 tm_pt_11=-0.000 tm_pt_12=-4.216 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -416.790
-22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and had ডেভেলপমেণ্ট bank secured its place in took . ||| lm_0=-45.088 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-48.188 tm_pt_6=-14.424 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.368 tm_pt_10=-26.293 tm_pt_11=-0.000 tm_pt_12=-4.909 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -417.121
-23 ||| subject : encyclopedia ||| lm_0=-5.528 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.657 tm_pt_6=-1.542 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.420 tm_pt_11=-0.000 tm_pt_12=-1.500 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -15.836
-23 ||| category : encyclopedia ||| lm_0=-5.707 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.425 tm_pt_6=-2.012 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=-0.020 tm_pt_11=-0.000 tm_pt_12=-1.817 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -16.293
-23 ||| subject-class : encyclopedia ||| lm_0=-5.989 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.379 tm_pt_6=-3.561 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-2.703 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -17.472
-23 ||| topics : encyclopedia ||| lm_0=-6.220 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.537 tm_pt_6=-3.874 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-2.991 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 ||| -17.996
-24 ||| russia france and israel the main অস্ত্রসরবরাহকারী country and defense sub country . ||| lm_0=-38.844 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-18.530 tm_pt_6=-8.356 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.106 tm_pt_10=-10.471 tm_pt_11=-0.000 tm_pt_12=-5.841 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -189.536
-24 ||| russia france and israel the main অস্ত্রসরবরাহকারী state and defense sub country . ||| lm_0=-38.802 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-18.467 tm_pt_6=-9.206 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.474 tm_pt_10=-10.535 tm_pt_11=-0.000 tm_pt_12=-5.933 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -189.924
-24 ||| russia france and israel the main অস্ত্রসরবরাহকারী country and defence sub country . ||| lm_0=-38.535 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-18.942 tm_pt_6=-9.742 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.738 tm_pt_10=-10.065 tm_pt_11=-0.000 tm_pt_12=-6.534 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -190.366
-24 ||| russia france and israel the main অস্ত্রসরবরাহকারী countries and defense sub country . ||| lm_0=-38.815 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-19.426 tm_pt_6=-10.343 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.474 tm_pt_10=-10.130 tm_pt_11=-0.000 tm_pt_12=-5.933 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -190.407
-25 ||| this is the known imaginary mathematics formed with which are real number set from সেটে par with the complex number . ||| lm_0=-48.835 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-51.313 tm_pt_6=-21.937 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.889 tm_pt_10=-21.875 tm_pt_11=-0.000 tm_pt_12=-7.000 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -241.719
-25 ||| this is our known imaginary mathematics formed with which are real number set from সেটে par with the complex number . ||| lm_0=-50.396 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-44.711 tm_pt_6=-21.174 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.889 tm_pt_10=-22.633 tm_pt_11=-0.000 tm_pt_12=-7.690 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -241.938
-25 ||| this is the known imaginary mathematics formed with which are real number set from সেটে par with complex number . ||| lm_0=-48.176 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-47.304 tm_pt_6=-23.874 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.890 tm_pt_10=-30.946 tm_pt_11=-0.000 tm_pt_12=-9.515 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-9.554 OOVPenalty=-100.000 ||| -242.826
-25 ||| this is the known imaginary mathematics formed with which are real numbers set from সেটে par with the complex number . ||| lm_0=-49.663 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-51.078 tm_pt_6=-22.637 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.889 tm_pt_10=-21.594 tm_pt_11=-0.000 tm_pt_12=-7.000 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -242.865
-25 ||| this is the known imaginary mathematics formed with which are real number set to সেটে par with the complex number . ||| lm_0=-48.571 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-53.275 tm_pt_6=-22.816 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.889 tm_pt_10=-23.680 tm_pt_11=-0.000 tm_pt_12=-8.329 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -243.007
-25 ||| this is our known imaginary mathematics formed with which are real number set from সেটে par with complex number . ||| lm_0=-49.737 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-40.703 tm_pt_6=-23.112 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-2.890 tm_pt_10=-31.704 tm_pt_11=-0.000 tm_pt_12=-10.205 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-9.554 OOVPenalty=-100.000 ||| -243.045
-25 ||| this is our known imaginary mathematics formed with which are real numbers set from সেটে par with the complex number . ||| lm_0=-51.225 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-44.476 tm_pt_6=-21.875 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.889 tm_pt_10=-22.352 tm_pt_11=-0.000 tm_pt_12=-7.690 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -243.084
-25 ||| this is the known imaginary mathematics formed with which are from real number set সেটে par with the complex number . ||| lm_0=-48.916 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-51.313 tm_pt_6=-21.937 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-5.889 tm_pt_10=-20.802 tm_pt_11=-0.000 tm_pt_12=-6.932 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -243.110
-26 ||| <address> ||| lm_0=-4.240 lm_1=-6.522 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.494 tm_pt_6=-38.184 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.050 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=0.000 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -21.743
-26 ||| < ঠিকানা > ||| lm_0=-15.853 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-2.518 tm_pt_6=-29.231 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.118 tm_pt_11=-0.000 tm_pt_12=0.000 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -136.961
-26 ||| the lt ঠিকানা > ||| lm_0=-17.709 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-6.362 tm_pt_6=-20.589 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=-2.453 tm_pt_11=-0.000 tm_pt_12=0.000 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.606 OOVPenalty=-200.000 ||| -241.704
-26 ||| < ঠিকানা , gt , ||| lm_0=-20.678 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-4.258 tm_pt_6=-15.720 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-5.328 tm_pt_11=-0.000 tm_pt_12=-1.262 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -244.166
-26 ||| , lt , ঠিকানা > ||| lm_0=-20.961 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-4.245 tm_pt_6=-15.998 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-5.446 tm_pt_11=-0.000 tm_pt_12=-1.262 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -244.640
-26 ||| < ঠিকানা , gt ; ||| lm_0=-21.561 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-3.201 tm_pt_6=-18.449 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-1.596 tm_pt_11=-0.000 tm_pt_12=-1.248 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -245.017
-27 ||| september ||| lm_0=-3.024 lm_1=-6.522 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-0.176 tm_pt_6=-0.047 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.000 tm_pt_10=-0.013 tm_pt_11=-0.000 tm_pt_12=-0.025 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -6.923
-27 ||| september . ||| lm_0=-4.832 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-9.282 tm_pt_6=-0.716 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=-1.099 tm_pt_11=-0.000 tm_pt_12=-3.689 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -15.112
-27 ||| সেপ্টেম্বর ||| lm_0=-7.355 lm_1=-6.522 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
-28 ||| from this theory though big বিস্ফোরণোর against can not be but it can be support . ||| lm_0=-35.950 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-27.960 tm_pt_6=-23.108 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.692 tm_pt_10=-15.673 tm_pt_11=-0.000 tm_pt_12=-5.046 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -202.585
-28 ||| from this theory though big বিস্ফোরণোর against can not be rather it can be support . ||| lm_0=-37.068 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-25.087 tm_pt_6=-23.283 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.692 tm_pt_10=-13.275 tm_pt_11=-0.000 tm_pt_12=-5.046 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -202.856
-28 ||| from this theory though big বিস্ফোরণোর against can not be but it can be supported . ||| lm_0=-34.996 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-27.543 tm_pt_6=-22.616 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.691 tm_pt_10=-16.797 tm_pt_11=-0.000 tm_pt_12=-7.126 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.004
-28 ||| from this theory though big বিস্ফোরণোর against can not be rather it can be supported . ||| lm_0=-36.114 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-24.670 tm_pt_6=-22.790 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.691 tm_pt_10=-14.399 tm_pt_11=-0.000 tm_pt_12=-7.126 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.274
-28 ||| from this theory though the বিস্ফোরণোর against can not be but it can be support . ||| lm_0=-35.207 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-32.548 tm_pt_6=-23.199 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.692 tm_pt_10=-19.989 tm_pt_11=-0.000 tm_pt_12=-5.126 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.722
-28 ||| this theory from though big বিস্ফোরণোর against can not be but it can be support . ||| lm_0=-37.468 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-27.960 tm_pt_6=-23.108 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.572 tm_pt_10=-14.343 tm_pt_11=-0.000 tm_pt_12=-4.354 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.775
-29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| lm_0=-65.262 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-41.676 tm_pt_6=-19.680 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.745 tm_pt_10=-19.919 tm_pt_11=-0.000 tm_pt_12=-7.203 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -359.581
-29 ||| agricultural in production france country ; it is the most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| lm_0=-63.377 lm_1=-81.528 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-47.097 tm_pt_6=-19.754 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.612 tm_pt_10=-18.967 tm_pt_11=-0.000 tm_pt_12=-6.355 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-11.292 OOVPenalty=-200.000 ||| -359.757
-29 ||| agriculture in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| lm_0=-65.754 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-41.940 tm_pt_6=-20.240 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.746 tm_pt_10=-19.868 tm_pt_11=-0.000 tm_pt_12=-8.068 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -360.724
-29 ||| agricultural in production france is most important country ; it mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| lm_0=-66.157 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-17.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-41.676 tm_pt_6=-19.680 tm_pt_7=-17.000 tm_pt_8=-46.206 tm_pt_9=-1.659 tm_pt_10=-26.434 tm_pt_11=-0.000 tm_pt_12=-9.771 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -360.813
-29 ||| agricultural in production france is most important country , it basically খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| lm_0=-65.377 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-17.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-42.462 tm_pt_6=-17.947 tm_pt_7=-17.000 tm_pt_8=-46.206 tm_pt_9=-2.610 tm_pt_10=-29.579 tm_pt_11=-0.000 tm_pt_12=-9.660 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -360.960
-29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and whole world export . ||| lm_0=-67.863 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-37.477 tm_pt_6=-20.252 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.745 tm_pt_10=-15.113 tm_pt_11=-0.000 tm_pt_12=-7.539 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -361.064
-29 ||| agricultural in production france country ; it is the most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and whole world export . ||| lm_0=-65.977 lm_1=-81.528 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-42.898 tm_pt_6=-20.326 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.612 tm_pt_10=-14.161 tm_pt_11=-0.000 tm_pt_12=-6.692 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-11.292 OOVPenalty=-200.000 ||| -361.239
-29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the the export . ||| lm_0=-65.079 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-45.631 tm_pt_6=-20.302 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.659 tm_pt_10=-22.659 tm_pt_11=-0.000 tm_pt_12=-6.429 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -361.654
-29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the in export . ||| lm_0=-65.585 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-44.352 tm_pt_6=-19.955 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.977 tm_pt_10=-22.858 tm_pt_11=-0.000 tm_pt_12=-7.608 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -361.683
-29 ||| agricultural in production france country ; it is the most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the the export . ||| lm_0=-63.193 lm_1=-81.528 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-51.052 tm_pt_6=-20.376 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.526 tm_pt_10=-21.707 tm_pt_11=-0.000 tm_pt_12=-5.582 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-11.292 OOVPenalty=-200.000 ||| -361.830
-30 ||| their in mathematics পাটীগণিতের person was . ||| lm_0=-20.967 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-17.754 tm_pt_6=-6.222 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.018 tm_pt_10=-0.326 tm_pt_11=-0.000 tm_pt_12=-0.500 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -147.537
-30 ||| their in mathematics পাটীগণিতের were was . ||| lm_0=-20.867 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-18.983 tm_pt_6=-5.123 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-4.319 tm_pt_11=-0.000 tm_pt_12=-1.553 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.316
-30 ||| their in mathematics পাটীগণিতের are was . ||| lm_0=-20.651 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-19.645 tm_pt_6=-4.613 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-7.011 tm_pt_11=-0.000 tm_pt_12=-2.428 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.564
-30 ||| their in mathematics was পাটীগণিতের were . ||| lm_0=-19.648 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-18.983 tm_pt_6=-5.123 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.754 tm_pt_10=-7.426 tm_pt_11=-0.000 tm_pt_12=-1.413 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.641
-30 ||| their in mathematics পাটীগণিতের priority was . ||| lm_0=-22.612 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.687 tm_pt_6=-5.123 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-1.004 tm_pt_11=-0.000 tm_pt_12=-2.428 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.658
-30 ||| their in mathematics পাটীগণিতের in was . ||| lm_0=-20.357 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-21.111 tm_pt_6=-4.836 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-8.122 tm_pt_11=-0.000 tm_pt_12=-2.428 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.845
-30 ||| their in mathematics পাটীগণিতের dominance was . ||| lm_0=-22.622 lm_1=-25.558 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-14.926 tm_pt_6=-5.529 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-1.004 tm_pt_11=-0.000 tm_pt_12=-1.553 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.990
-31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-154.514 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-30.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.236 tm_pt_10=-15.926 tm_pt_11=-0.000 tm_pt_12=-4.654 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=34.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -710.087
-31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-154.478 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-30.000 tm_pt_5=-70.620 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.868 tm_pt_10=-15.926 tm_pt_11=-0.000 tm_pt_12=-5.347 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=34.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -710.513
-31 ||| deshgulo france are : call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-153.583 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-29.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.236 tm_pt_10=-18.669 tm_pt_11=-0.000 tm_pt_12=-7.022 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=33.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -711.448
-31 ||| deshgulo france are : call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-153.546 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-29.000 tm_pt_5=-70.620 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.868 tm_pt_10=-18.669 tm_pt_11=-0.000 tm_pt_12=-7.715 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=33.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -711.875
-31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark chekoslovakia sweden austria argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-154.514 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-29.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.468 tm_pt_10=-15.639 tm_pt_11=-0.000 tm_pt_12=-4.654 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=32.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.168
-31 ||| deshgulo are : france call , make noise belgium china switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-154.514 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-29.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.254 tm_pt_10=-18.146 tm_pt_11=-0.000 tm_pt_12=-7.070 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=33.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.497
-31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet union iran iraq and sri lanka . ||| lm_0=-153.515 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-30.000 tm_pt_5=-75.530 tm_pt_6=-17.369 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.604 tm_pt_10=-19.643 tm_pt_11=-0.000 tm_pt_12=-7.427 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=34.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.585
-31 ||| deshgulo are : france call , make noise china belgium switzerland garmany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-155.925 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-30.000 tm_pt_5=-69.730 tm_pt_6=-18.090 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-11.186 tm_pt_10=-12.550 tm_pt_11=-0.000 tm_pt_12=-6.040 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=34.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.850
-31 ||| deshgulo are : france call , make noise belgium china switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| lm_0=-154.478 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-29.000 tm_pt_5=-70.620 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.886 tm_pt_10=-18.146 tm_pt_11=-0.000 tm_pt_12=-7.763 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=33.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.923
-31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet union iran iraq and sri lanka . ||| lm_0=-153.479 lm_1=-130.445 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-30.000 tm_pt_3=-0.000 tm_pt_4=-30.000 tm_pt_5=-74.004 tm_pt_6=-17.369 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-11.236 tm_pt_10=-19.643 tm_pt_11=-0.000 tm_pt_12=-8.120 tm_pt_13=-0.000 tm_pt_14=-35.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=34.000 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -713.011
-32 ||| this ব্যাসিলিকার places now situated bank of england . ||| lm_0=-24.256 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-11.945 tm_pt_6=-6.619 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.032 tm_pt_10=-3.478 tm_pt_11=-0.000 tm_pt_12=-3.808 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -158.979
-32 ||| this ব্যাসিলিকার places now located bank of england . ||| lm_0=-24.231 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-12.136 tm_pt_6=-6.827 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.063 tm_pt_10=-3.701 tm_pt_11=-0.000 tm_pt_12=-4.031 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.219
-32 ||| this ব্যাসিলিকার places now bank of england is . ||| lm_0=-24.005 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-15.428 tm_pt_6=-6.481 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.145 tm_pt_10=-8.604 tm_pt_11=-0.000 tm_pt_12=-5.064 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.416
-32 ||| this ব্যাসিলিকার places is situated bank of england . ||| lm_0=-22.559 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-16.306 tm_pt_6=-7.543 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.032 tm_pt_10=-7.140 tm_pt_11=-0.000 tm_pt_12=-5.143 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.452
-32 ||| this ব্যাসিলিকার places now bank of england situated . ||| lm_0=-26.017 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-11.945 tm_pt_6=-6.619 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.510 tm_pt_10=-3.660 tm_pt_11=-0.000 tm_pt_12=-3.521 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.567
-32 ||| this ব্যাসিলিকার parts is situated bank of england . ||| lm_0=-21.958 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-17.783 tm_pt_6=-8.332 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.075 tm_pt_10=-7.796 tm_pt_11=-0.000 tm_pt_12=-5.549 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.610
-32 ||| this ব্যাসিলিকার places is located bank of england . ||| lm_0=-22.627 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-16.497 tm_pt_6=-7.751 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.063 tm_pt_10=-7.363 tm_pt_11=-0.000 tm_pt_12=-5.366 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.807
-32 ||| this ব্যাসিলিকার places are situated bank of england . ||| lm_0=-21.540 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-16.832 tm_pt_6=-8.893 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.400 tm_pt_10=-7.875 tm_pt_11=-0.000 tm_pt_12=-6.753 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.814
-32 ||| this ব্যাসিলিকার parts now situated bank of england . ||| lm_0=-24.207 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-13.422 tm_pt_6=-7.408 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.075 tm_pt_10=-4.135 tm_pt_11=-0.000 tm_pt_12=-4.214 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.820
-32 ||| this ব্যাসিলিকার places now bank of england located . ||| lm_0=-26.192 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-12.136 tm_pt_6=-6.827 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.510 tm_pt_10=-3.660 tm_pt_11=-0.000 tm_pt_12=-3.521 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.891
-33 ||| country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| lm_0=-52.858 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.110 tm_pt_6=-14.959 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-39.743 tm_pt_11=-0.000 tm_pt_12=-13.346 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.237
-33 ||| country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| lm_0=-52.872 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.262 tm_pt_6=-14.179 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-40.812 tm_pt_11=-0.000 tm_pt_12=-13.346 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.294
-33 ||| country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| lm_0=-52.705 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.298 tm_pt_6=-15.474 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-39.086 tm_pt_11=-0.000 tm_pt_12=-13.982 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.314
-33 ||| country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| lm_0=-52.719 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.450 tm_pt_6=-14.694 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-40.155 tm_pt_11=-0.000 tm_pt_12=-13.982 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.371
-33 ||| country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর south মরক্কো the west and the atlantic ocean . ||| lm_0=-54.581 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-31.798 tm_pt_6=-14.337 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.075 tm_pt_10=-37.232 tm_pt_11=-0.000 tm_pt_12=-12.835 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.402
-33 ||| country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর south মরক্কো the west and the atlantic ocean . ||| lm_0=-54.428 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-31.986 tm_pt_6=-14.851 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.075 tm_pt_10=-36.576 tm_pt_11=-0.000 tm_pt_12=-13.471 tm_pt_13=-0.000 tm_pt_14=-14.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.480
-33 ||| the country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| lm_0=-52.914 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-41.841 tm_pt_6=-14.869 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.198 tm_pt_10=-35.989 tm_pt_11=-0.000 tm_pt_12=-11.554 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.051
-33 ||| the country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| lm_0=-52.928 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-41.994 tm_pt_6=-14.089 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.198 tm_pt_10=-37.058 tm_pt_11=-0.000 tm_pt_12=-11.554 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.108
-33 ||| the country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| lm_0=-52.761 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-42.029 tm_pt_6=-15.384 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.199 tm_pt_10=-35.333 tm_pt_11=-0.000 tm_pt_12=-12.190 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.129
-33 ||| the country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| lm_0=-52.775 lm_1=-65.223 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-42.181 tm_pt_6=-14.603 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.113 tm_pt_10=-35.540 tm_pt_11=-0.000 tm_pt_12=-11.584 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.642
-34 ||| apart from this situation understood a মুহূর্তে the the decision within অক্ষমতা after taking . ||| lm_0=-43.285 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-52.889 tm_pt_6=-19.875 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.368 tm_pt_10=-31.297 tm_pt_11=-0.000 tm_pt_12=-7.910 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.670
-34 ||| other than this situation understood a মুহূর্তে the the decision within অক্ষমতা after taking . ||| lm_0=-42.880 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-54.580 tm_pt_6=-20.568 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.368 tm_pt_10=-31.548 tm_pt_11=-0.000 tm_pt_12=-7.910 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.794
-34 ||| moreover this situation understood a মুহূর্তে the the decision within অক্ষমতা after taking . ||| lm_0=-46.234 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-46.225 tm_pt_6=-21.261 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-4.503 tm_pt_10=-30.392 tm_pt_11=-0.000 tm_pt_12=-6.475 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -319.929
-34 ||| apart from this situation understood a মুহূর্তে the the within অক্ষমতা after taking decision . ||| lm_0=-41.892 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-52.889 tm_pt_6=-19.875 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.736 tm_pt_10=-32.627 tm_pt_11=-0.000 tm_pt_12=-8.979 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.936
-34 ||| apart from this situation understood a মুহূর্তে the fast within অক্ষমতা after taking decision . ||| lm_0=-43.691 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-47.241 tm_pt_6=-20.722 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.737 tm_pt_10=-26.965 tm_pt_11=-0.000 tm_pt_12=-9.202 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.982
-34 ||| other than this situation understood a মুহূর্তে the the within অক্ষমতা after taking decision . ||| lm_0=-41.487 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-54.580 tm_pt_6=-20.568 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.736 tm_pt_10=-32.878 tm_pt_11=-0.000 tm_pt_12=-8.979 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -320.060
-34 ||| other than this situation understood a মুহূর্তে the fast within অক্ষমতা after taking decision . ||| lm_0=-43.286 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-48.932 tm_pt_6=-21.415 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.737 tm_pt_10=-27.217 tm_pt_11=-0.000 tm_pt_12=-9.202 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -320.106
-34 ||| moreover this situation understood a মুহূর্তে the the within অক্ষমতা after taking decision . ||| lm_0=-44.841 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-46.225 tm_pt_6=-21.261 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-4.871 tm_pt_10=-31.722 tm_pt_11=-0.000 tm_pt_12=-7.544 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -320.195
-34 ||| moreover this situation understood a মুহূর্তে the fast within অক্ষমতা after taking decision . ||| lm_0=-46.640 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-40.578 tm_pt_6=-22.109 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-4.872 tm_pt_10=-26.061 tm_pt_11=-0.000 tm_pt_12=-7.767 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -320.241
-35 ||| কার্ল there is work through it by . ||| lm_0=-22.228 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-27.298 tm_pt_6=-10.704 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.553 tm_pt_10=-22.739 tm_pt_11=-0.000 tm_pt_12=-4.672 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -164.983
-35 ||| কার্ল there is work through this by . ||| lm_0=-21.615 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-28.891 tm_pt_6=-11.650 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.553 tm_pt_10=-24.672 tm_pt_11=-0.000 tm_pt_12=-5.305 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -165.578
-35 ||| কার্ল there is work through it by the . ||| lm_0=-22.767 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-31.306 tm_pt_6=-8.766 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.553 tm_pt_10=-19.313 tm_pt_11=-0.000 tm_pt_12=-5.011 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -165.761
-35 ||| কার্ল there is only through it by . ||| lm_0=-20.644 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-30.406 tm_pt_6=-13.648 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.921 tm_pt_10=-25.279 tm_pt_11=-0.000 tm_pt_12=-6.687 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.297
-35 ||| কার্ল there is works that it by . ||| lm_0=-21.335 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-31.776 tm_pt_6=-13.782 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.242 tm_pt_10=-22.614 tm_pt_11=-0.000 tm_pt_12=-4.895 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.543
-35 ||| কার্ল there is work through this are . ||| lm_0=-21.240 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-30.491 tm_pt_6=-12.874 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.871 tm_pt_10=-25.862 tm_pt_11=-0.000 tm_pt_12=-5.998 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.654
-35 ||| কার্ল there is work that it by . ||| lm_0=-21.935 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-31.374 tm_pt_6=-12.783 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.235 tm_pt_10=-23.123 tm_pt_11=-0.000 tm_pt_12=-3.979 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.669
-35 ||| কার্ল there is work through this by the . ||| lm_0=-22.538 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-32.900 tm_pt_6=-9.713 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.553 tm_pt_10=-21.246 tm_pt_11=-0.000 tm_pt_12=-5.644 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -166.832
-35 ||| কার্ল there is only through this by . ||| lm_0=-20.030 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-32.000 tm_pt_6=-14.595 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.921 tm_pt_10=-27.212 tm_pt_11=-0.000 tm_pt_12=-7.320 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.892
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি taken from . ||| lm_0=-60.113 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-49.320 tm_pt_6=-19.514 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-13.572 tm_pt_11=-0.000 tm_pt_12=-8.086 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -252.556
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি from accepted . ||| lm_0=-61.063 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-47.822 tm_pt_6=-18.616 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-3.321 tm_pt_10=-13.748 tm_pt_11=-0.000 tm_pt_12=-8.787 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -252.802
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story from sometimes again today 's social and political ঘটনাবলি taken from . ||| lm_0=-62.036 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-49.320 tm_pt_6=-19.514 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-13.413 tm_pt_11=-0.000 tm_pt_12=-7.348 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -253.669
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story from sometimes again today 's social and political ঘটনাবলি from accepted . ||| lm_0=-62.986 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-47.822 tm_pt_6=-18.616 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-3.321 tm_pt_10=-13.589 tm_pt_11=-0.000 tm_pt_12=-8.048 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -253.914
-36 ||| the subject sometimes puran -lrb- sometimes from maintain love story from sometimes again today 's social and political ঘটনাবলি taken from . ||| lm_0=-61.385 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-49.320 tm_pt_6=-19.514 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-13.572 tm_pt_11=-0.000 tm_pt_12=-8.086 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -254.130
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি to accepted . ||| lm_0=-60.799 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-49.784 tm_pt_6=-19.495 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.327 tm_pt_10=-14.588 tm_pt_11=-0.000 tm_pt_12=-9.499 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -254.401
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি from accepted ||| lm_0=-61.702 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-48.412 tm_pt_6=-28.349 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-16.137 tm_pt_11=-0.000 tm_pt_12=-9.102 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -256.291
-36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি from taken ||| lm_0=-61.646 lm_1=-71.745 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-49.706 tm_pt_6=-28.148 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-15.774 tm_pt_11=-0.000 tm_pt_12=-9.102 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -256.307
-37 ||| three measure based on the that age is found that is almost ১৩.৭ ± ০.২ billion years . ||| lm_0=-50.078 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-34.511 tm_pt_6=-23.848 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.143 tm_pt_10=-17.819 tm_pt_11=-0.000 tm_pt_12=-5.086 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -425.421
-37 ||| three measure based on the that is found in that is almost ১৩.৭ ± ০.২ billion years . ||| lm_0=-47.005 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-41.431 tm_pt_6=-27.078 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.504 tm_pt_10=-23.356 tm_pt_11=-0.000 tm_pt_12=-6.272 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.199
-37 ||| three measure base on the that is found in that is almost ১৩.৭ ± ০.২ billion years . ||| lm_0=-48.066 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-40.394 tm_pt_6=-25.427 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.736 tm_pt_10=-21.197 tm_pt_11=-0.000 tm_pt_12=-6.677 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.523
-37 ||| three measure based on the age is found in the is almost ১৩.৭ ± ০.২ billion years . ||| lm_0=-47.656 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-38.810 tm_pt_6=-24.879 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.510 tm_pt_10=-14.501 tm_pt_11=-0.000 tm_pt_12=-6.681 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.534
-37 ||| three measure based on the that are found in that is almost ১৩.৭ ± ০.২ billion years . ||| lm_0=-47.774 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-38.527 tm_pt_6=-24.999 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.504 tm_pt_10=-23.330 tm_pt_11=-0.000 tm_pt_12=-5.579 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.632
-38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea another can situated . ||| lm_0=-45.799 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-15.090 tm_pt_6=-9.552 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-8.592 tm_pt_11=-0.000 tm_pt_12=-6.507 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -298.593
-38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea other can situated . ||| lm_0=-45.540 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-16.129 tm_pt_6=-8.992 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-10.656 tm_pt_11=-0.000 tm_pt_12=-6.507 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -298.816
-38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea another can located . ||| lm_0=-45.973 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-15.282 tm_pt_6=-9.759 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-8.592 tm_pt_11=-0.000 tm_pt_12=-6.507 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -298.917
-38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea other can located . ||| lm_0=-45.715 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-16.321 tm_pt_6=-9.200 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-10.656 tm_pt_11=-0.000 tm_pt_12=-6.507 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -299.140
-38 ||| কাছেই is east russia which ওখটস্ক sea and japan sea another can situated . ||| lm_0=-45.709 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-16.168 tm_pt_6=-9.805 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-9.849 tm_pt_11=-0.000 tm_pt_12=-6.890 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -299.237
-38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea another can at ||| lm_0=-45.363 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-18.427 tm_pt_6=-18.338 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.386 tm_pt_10=-12.882 tm_pt_11=-0.000 tm_pt_12=-7.200 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-200.000 ||| -301.470
-38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea other can at ||| lm_0=-45.105 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-19.466 tm_pt_6=-17.778 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.386 tm_pt_10=-14.946 tm_pt_11=-0.000 tm_pt_12=-7.200 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-200.000 ||| -301.693
-39 ||| kolkata the national library the leading public লাইব্রেরি . ||| lm_0=-25.795 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-17.614 tm_pt_6=-12.955 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.370 tm_pt_10=-17.254 tm_pt_11=-0.000 tm_pt_12=-5.352 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -167.803
-39 ||| kolkata indian national library the leading public লাইব্রেরি . ||| lm_0=-27.113 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-15.158 tm_pt_6=-14.274 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.738 tm_pt_10=-13.975 tm_pt_11=-0.000 tm_pt_12=-4.141 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -168.246
-39 ||| kolkata the national library countries leading public লাইব্রেরি . ||| lm_0=-27.936 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-14.518 tm_pt_6=-14.341 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.370 tm_pt_10=-13.423 tm_pt_11=-0.000 tm_pt_12=-5.209 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -169.289
-39 ||| kolkata is the national library the leading public লাইব্রেরি . ||| lm_0=-24.166 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-20.796 tm_pt_6=-7.964 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.373 tm_pt_10=-17.972 tm_pt_11=-0.000 tm_pt_12=-8.984 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 OOVPenalty=-100.000 ||| -169.565
-39 ||| kolkata the leading indian national library public লাইব্রেরি . ||| lm_0=-25.518 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-15.158 tm_pt_6=-14.274 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.736 tm_pt_10=-13.149 tm_pt_11=-0.000 tm_pt_12=-3.633 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -169.626
-40 ||| ছত্রাকবিদ্যা ||| lm_0=-7.355 lm_1=-6.522 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
-41 ||| রাষ্ট্রসঙ্ঘের general secretary বান ki moon ||| lm_0=-24.479 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-5.844 tm_pt_6=-4.164 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.135 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.511 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -246.419
-41 ||| রাষ্ট্রসঙ্ঘের secretary general বান ki moon ||| lm_0=-23.882 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-5.844 tm_pt_6=-4.164 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.050 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.223 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -246.473
-41 ||| রাষ্ট্রসঙ্ঘের chief secretary বান ki moon ||| lm_0=-24.832 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-7.082 tm_pt_6=-4.649 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-2.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.609 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -248.355
-41 ||| বান রাষ্ট্রসঙ্ঘের general secretary ki moon ||| lm_0=-24.479 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-5.844 tm_pt_6=-4.164 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-2.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.609 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.481
-41 ||| রাষ্ট্রসঙ্ঘের general secretary বান what moon ||| lm_0=-26.709 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-7.058 tm_pt_6=-3.653 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.510 tm_pt_10=-1.322 tm_pt_11=-0.000 tm_pt_12=-2.015 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.788
-42 ||| মিনিক্সের of was smells টানেনবম a famous operating system design প্রশিক্ষক . ||| lm_0=-40.398 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-22.185 tm_pt_6=-8.038 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-17.562 tm_pt_11=-0.000 tm_pt_12=-3.604 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -390.309
-42 ||| মিনিক্সের of was smells টানেনবম a famous operating system designing প্রশিক্ষক . ||| lm_0=-40.309 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-21.718 tm_pt_6=-8.785 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.418 tm_pt_10=-16.650 tm_pt_11=-0.000 tm_pt_12=-4.415 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -390.456
-42 ||| মিনিক্সের to was smells টানেনবম a famous operating system design প্রশিক্ষক . ||| lm_0=-40.681 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-22.433 tm_pt_6=-9.424 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.375 tm_pt_10=-17.042 tm_pt_11=-0.000 tm_pt_12=-3.747 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -392.066
-42 ||| মিনিক্সের of were smells টানেনবম a famous operating system design প্রশিক্ষক . ||| lm_0=-40.592 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-22.244 tm_pt_6=-9.498 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.375 tm_pt_10=-15.933 tm_pt_11=-0.000 tm_pt_12=-5.149 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -392.154
-43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| lm_0=-132.510 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-85.959 tm_pt_6=-42.518 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-54.697 tm_pt_11=-0.000 tm_pt_12=-12.202 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.495
-43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| lm_0=-132.327 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-85.962 tm_pt_6=-42.848 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.296 tm_pt_10=-54.098 tm_pt_11=-0.000 tm_pt_12=-12.895 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.577
-43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| lm_0=-133.742 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-83.499 tm_pt_6=-42.159 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-52.351 tm_pt_11=-0.000 tm_pt_12=-11.882 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.703
-43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| lm_0=-133.558 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-83.503 tm_pt_6=-42.489 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.296 tm_pt_10=-51.752 tm_pt_11=-0.000 tm_pt_12=-12.575 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.785
-43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| lm_0=-132.420 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-29.000 tm_pt_3=-0.000 tm_pt_4=-28.000 tm_pt_5=-86.665 tm_pt_6=-42.400 tm_pt_7=-29.000 tm_pt_8=-78.822 tm_pt_9=-2.168 tm_pt_10=-55.990 tm_pt_11=-0.000 tm_pt_12=-12.468 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=44.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.098
-43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film ' -rrb- . ||| lm_0=-132.895 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-85.919 tm_pt_6=-42.967 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-54.224 tm_pt_11=-0.000 tm_pt_12=-12.687 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.166
-43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| lm_0=-132.237 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-29.000 tm_pt_3=-0.000 tm_pt_4=-28.000 tm_pt_5=-86.668 tm_pt_6=-42.730 tm_pt_7=-29.000 tm_pt_8=-78.822 tm_pt_9=-2.296 tm_pt_10=-55.391 tm_pt_11=-0.000 tm_pt_12=-13.161 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=44.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.180
-43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film ' -rrb- . ||| lm_0=-132.712 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-85.923 tm_pt_6=-43.298 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.296 tm_pt_10=-53.625 tm_pt_11=-0.000 tm_pt_12=-13.380 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.248
-43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| lm_0=-133.652 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-29.000 tm_pt_3=-0.000 tm_pt_4=-28.000 tm_pt_5=-84.205 tm_pt_6=-42.040 tm_pt_7=-29.000 tm_pt_8=-78.822 tm_pt_9=-2.168 tm_pt_10=-53.644 tm_pt_11=-0.000 tm_pt_12=-12.148 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=44.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.307
-43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film ' -rrb- . ||| lm_0=-134.126 lm_1=-159.795 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-28.000 tm_pt_3=-0.000 tm_pt_4=-27.000 tm_pt_5=-83.460 tm_pt_6=-42.608 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-51.878 tm_pt_11=-0.000 tm_pt_12=-12.367 tm_pt_13=-0.000 tm_pt_14=-32.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=43.000 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.375
-44 ||| after 1953 on may , nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| lm_0=-46.024 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.467 tm_pt_10=-9.648 tm_pt_11=-0.000 tm_pt_12=-6.183 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -403.881
-44 ||| after 1953 on may , nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to . ||| lm_0=-46.988 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-24.146 tm_pt_6=-11.499 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.603 tm_pt_10=-8.039 tm_pt_11=-0.000 tm_pt_12=-5.084 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-7.383 OOVPenalty=-300.000 ||| -404.278
-44 ||| after on may , 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| lm_0=-45.744 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.467 tm_pt_10=-9.407 tm_pt_11=-0.000 tm_pt_12=-6.183 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.473
-44 ||| thereafter on may , 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| lm_0=-45.937 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-20.564 tm_pt_6=-19.871 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.553 tm_pt_10=-5.857 tm_pt_11=-0.000 tm_pt_12=-6.470 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.756
-44 ||| after nazrul on may , 1953 and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| lm_0=-45.135 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.018 tm_pt_10=-3.750 tm_pt_11=-0.000 tm_pt_12=-4.807 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.776
-44 ||| on may , after 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| lm_0=-45.664 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.419 tm_pt_10=-7.862 tm_pt_11=-0.000 tm_pt_12=-5.846 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.778
-44 ||| after on may , 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to . ||| lm_0=-46.709 lm_1=-52.178 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-24.146 tm_pt_6=-11.499 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.603 tm_pt_10=-7.798 tm_pt_11=-0.000 tm_pt_12=-5.084 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-7.383 OOVPenalty=-300.000 ||| -404.870
-44 ||| after 1953 on may , nazrul and প্রমীলা দেবীকে চিকিৎসার send to london . ||| lm_0=-43.328 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-22.429 tm_pt_6=-14.784 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.835 tm_pt_10=-16.668 tm_pt_11=-0.000 tm_pt_12=-7.081 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -405.022
-45 ||| the southern and the are বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| lm_0=-43.886 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-37.865 tm_pt_6=-16.165 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-32.454 tm_pt_11=-0.000 tm_pt_12=-11.897 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -320.811
-45 ||| the southern and the are বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| lm_0=-43.796 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-38.815 tm_pt_6=-16.291 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-33.161 tm_pt_11=-0.000 tm_pt_12=-11.730 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.050
-45 ||| the southern and the is বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| lm_0=-43.797 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-38.815 tm_pt_6=-16.291 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-33.161 tm_pt_11=-0.000 tm_pt_12=-11.730 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.051
-45 ||| the south and the are বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| lm_0=-44.748 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-37.569 tm_pt_6=-14.360 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-31.413 tm_pt_11=-0.000 tm_pt_12=-10.259 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.161
-45 ||| the southern and the is বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| lm_0=-43.707 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-39.765 tm_pt_6=-16.417 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-33.868 tm_pt_11=-0.000 tm_pt_12=-11.563 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.289
-45 ||| the south and the are বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| lm_0=-44.658 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-38.520 tm_pt_6=-14.486 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-32.120 tm_pt_11=-0.000 tm_pt_12=-10.092 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.399
-45 ||| the south and the is বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| lm_0=-44.659 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-38.520 tm_pt_6=-14.486 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-32.120 tm_pt_11=-0.000 tm_pt_12=-10.092 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.400
-45 ||| the southern and the are বিস্তীর্ণ land , west and in are রুক্ষ hill and mountain . ||| lm_0=-44.348 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-38.680 tm_pt_6=-17.956 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-2.454 tm_pt_10=-29.789 tm_pt_11=-0.000 tm_pt_12=-11.192 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.539
-45 ||| the south and the is বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| lm_0=-44.570 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-39.470 tm_pt_6=-14.612 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-32.827 tm_pt_11=-0.000 tm_pt_12=-9.925 tm_pt_13=-0.000 tm_pt_14=-15.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.638
-46 ||| ট্রেডমার্ক ||| lm_0=-7.355 lm_1=-6.522 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his city are the ghotechilo ||| lm_0=-42.957 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-23.058 tm_pt_6=-19.554 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-11.563 tm_pt_11=-0.000 tm_pt_12=-5.490 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -495.528
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the are the ghotechilo ||| lm_0=-42.350 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-26.541 tm_pt_6=-19.235 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-15.056 tm_pt_11=-0.000 tm_pt_12=-6.114 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -496.460
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his city are increased ghotechilo ||| lm_0=-44.936 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-18.120 tm_pt_6=-20.134 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.007 tm_pt_10=-7.473 tm_pt_11=-0.000 tm_pt_12=-6.643 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -496.582
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the mentionable the ghotechilo ||| lm_0=-43.745 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-23.233 tm_pt_6=-19.818 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-11.720 tm_pt_11=-0.000 tm_pt_12=-5.739 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -496.755
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the mentionable increased ghotechilo ||| lm_0=-45.214 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-18.295 tm_pt_6=-20.398 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.007 tm_pt_10=-7.630 tm_pt_11=-0.000 tm_pt_12=-6.892 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.179
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the important the ghotechilo ||| lm_0=-43.532 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-24.937 tm_pt_6=-19.942 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-13.772 tm_pt_11=-0.000 tm_pt_12=-5.873 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.439
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the are increased ghotechilo ||| lm_0=-44.328 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-21.603 tm_pt_6=-19.815 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.007 tm_pt_10=-10.967 tm_pt_11=-0.000 tm_pt_12=-7.267 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.514
-47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the notable the ghotechilo ||| lm_0=-44.104 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-23.321 tm_pt_6=-20.360 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-11.784 tm_pt_11=-0.000 tm_pt_12=-6.209 tm_pt_13=-0.000 tm_pt_14=-8.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.584
-48 ||| many important and real to solve problems complex number apariharza ||| lm_0=-26.936 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-18.905 tm_pt_6=-11.630 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.959 tm_pt_11=-0.000 tm_pt_12=-4.527 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 ||| -71.121
-48 ||| many important and real problem to solve complex number apariharza ||| lm_0=-27.189 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-18.764 tm_pt_6=-11.367 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-10.348 tm_pt_11=-0.000 tm_pt_12=-4.565 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -71.245
-48 ||| many important and real problem to for complex number apariharza ||| lm_0=-28.229 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-18.001 tm_pt_6=-7.765 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.368 tm_pt_10=-14.453 tm_pt_11=-0.000 tm_pt_12=-3.912 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 ||| -71.659
-48 ||| many important and real to solve problems complex number inevitable ||| lm_0=-27.158 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-20.292 tm_pt_6=-11.630 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.959 tm_pt_11=-0.000 tm_pt_12=-4.527 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 ||| -71.661
-48 ||| many important and real to solve problems complex numbers apariharza ||| lm_0=-27.284 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-18.673 tm_pt_6=-12.332 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.424 tm_pt_11=-0.000 tm_pt_12=-4.836 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 ||| -71.717
-48 ||| many important and real problem to solve complex number inevitable ||| lm_0=-27.411 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-20.150 tm_pt_6=-11.367 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-10.348 tm_pt_11=-0.000 tm_pt_12=-4.565 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -71.784
-48 ||| many important and real problem to solve complex numbers apariharza ||| lm_0=-27.536 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-18.531 tm_pt_6=-12.070 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-9.813 tm_pt_11=-0.000 tm_pt_12=-4.873 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -71.841
-48 ||| many important and real problem to for complex number inevitable ||| lm_0=-28.451 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-19.387 tm_pt_6=-7.765 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.368 tm_pt_10=-14.453 tm_pt_11=-0.000 tm_pt_12=-3.912 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 ||| -72.199
-48 ||| many important and real to solve problems complex numbers inevitable ||| lm_0=-27.505 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-20.059 tm_pt_6=-12.332 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.424 tm_pt_11=-0.000 tm_pt_12=-4.836 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 ||| -72.257
-48 ||| many important and real problem to solve complex numbers inevitable ||| lm_0=-27.758 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-19.917 tm_pt_6=-12.070 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-9.813 tm_pt_11=-0.000 tm_pt_12=-4.873 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -72.380
-49 ||| big bang is a famous result in the state so and recent situation from the separate . ||| lm_0=-36.721 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-49.728 tm_pt_6=-24.253 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.221 tm_pt_10=-39.767 tm_pt_11=-0.000 tm_pt_12=-14.945 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 ||| -122.970
-49 ||| big bang is a famous result in the state so and recent situation from the the . ||| lm_0=-34.761 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-56.487 tm_pt_6=-24.541 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.225 tm_pt_10=-45.470 tm_pt_11=-0.000 tm_pt_12=-15.099 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 ||| -123.473
-49 ||| big bang is a famous result in the state so and recent situation from the different . ||| lm_0=-35.034 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-52.901 tm_pt_6=-24.828 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.586 tm_pt_10=-43.998 tm_pt_11=-0.000 tm_pt_12=-16.198 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 ||| -123.527
-49 ||| big bang is a famous result in the state so and recent state from the separate . ||| lm_0=-38.242 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-49.859 tm_pt_6=-23.063 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.860 tm_pt_10=-40.011 tm_pt_11=-0.000 tm_pt_12=-13.846 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 ||| -123.849
-49 ||| big bang is a famous result in the state so and recent state from the the . ||| lm_0=-36.282 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-56.618 tm_pt_6=-23.351 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.864 tm_pt_10=-45.713 tm_pt_11=-0.000 tm_pt_12=-14.000 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 ||| -124.352
-49 ||| the big bang is a famous result in the state so and recent situation from the separate . ||| lm_0=-35.341 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-54.868 tm_pt_6=-24.390 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.216 tm_pt_10=-39.906 tm_pt_11=-0.000 tm_pt_12=-14.791 tm_pt_13=-0.000 tm_pt_14=-18.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.686 ||| -124.373
-49 ||| big bang is a famous result in the state so and recent state from the different . ||| lm_0=-36.555 lm_1=-58.700 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-53.032 tm_pt_6=-23.639 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.225 tm_pt_10=-44.242 tm_pt_11=-0.000 tm_pt_12=-15.099 tm_pt_13=-0.000 tm_pt_14=-17.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-8.252 ||| -124.406
-50 ||| windows মিলিনিয়াম ||| lm_0=-9.976 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-0.231 tm_pt_6=-0.060 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.000 tm_pt_10=-0.025 tm_pt_11=-0.000 tm_pt_12=0.000 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -116.964
-50 ||| উইন্ডোজ মিলিনিয়াম ||| lm_0=-12.528 lm_1=-9.783 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-200.000 ||| -219.189
-51 ||| rabindranath , many শৈলী আয়ত্ত্ব was . ||| lm_0=-21.075 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-17.153 tm_pt_6=-7.458 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.019 tm_pt_10=-17.417 tm_pt_11=-0.000 tm_pt_12=-5.381 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.314
-51 ||| rabindranath , more শৈলী আয়ত্ত্ব was . ||| lm_0=-21.909 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-16.178 tm_pt_6=-6.541 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-16.203 tm_pt_11=-0.000 tm_pt_12=-5.093 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.429
-51 ||| rabindranath , many শৈলী আয়ত্ত্ব a . ||| lm_0=-21.101 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-17.168 tm_pt_6=-7.217 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.136 tm_pt_10=-18.506 tm_pt_11=-0.000 tm_pt_12=-5.892 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.825
-51 ||| rabindranath although many শৈলী আয়ত্ত্ব was . ||| lm_0=-24.131 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.228 tm_pt_6=-9.106 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.154 tm_pt_10=-10.629 tm_pt_11=-0.000 tm_pt_12=-4.282 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.858
-51 ||| rabindranath though more শৈলী আয়ত্ত্ব was . ||| lm_0=-24.610 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.457 tm_pt_6=-7.315 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.154 tm_pt_10=-11.213 tm_pt_11=-0.000 tm_pt_12=-3.995 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.932
-51 ||| rabindranath , many শৈলী আয়ত্ত্ব by . ||| lm_0=-21.027 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-17.337 tm_pt_6=-8.246 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.050 tm_pt_10=-15.591 tm_pt_11=-0.000 tm_pt_12=-4.998 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.940
-51 ||| rabindranath , more শৈলী আয়ত্ত্ব a . ||| lm_0=-21.935 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-16.194 tm_pt_6=-6.300 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.135 tm_pt_10=-17.291 tm_pt_11=-0.000 tm_pt_12=-5.604 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.940
-51 ||| rabindranath though many শৈলী আয়ত্ত্ব was . ||| lm_0=-23.903 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-13.432 tm_pt_6=-8.231 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.154 tm_pt_10=-12.428 tm_pt_11=-0.000 tm_pt_12=-4.282 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.974
-51 ||| rabindranath , more শৈলী আয়ত্ত্ব by . ||| lm_0=-21.861 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-16.363 tm_pt_6=-7.330 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.050 tm_pt_10=-14.377 tm_pt_11=-0.000 tm_pt_12=-4.710 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -255.055
-52 ||| labour economics ||| lm_0=-5.389 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.274 tm_pt_6=-2.013 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.007 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.773 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -13.099
-52 ||| labor economy ||| lm_0=-5.309 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.452 tm_pt_6=-2.394 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.050 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.179 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -13.343
-52 ||| labor economics ||| lm_0=-7.246 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.231 tm_pt_6=-1.682 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.388 tm_pt_11=-0.000 tm_pt_12=-1.158 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -15.698
-52 ||| labour economy ||| lm_0=-6.994 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.496 tm_pt_6=-2.725 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.673 tm_pt_11=-0.000 tm_pt_12=-2.368 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -16.294
-52 ||| the economics ||| lm_0=-5.487 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-8.013 tm_pt_6=-2.637 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.050 tm_pt_10=-6.947 tm_pt_11=-0.000 tm_pt_12=-2.863 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -17.490
-52 ||| the economy ||| lm_0=-5.372 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-8.235 tm_pt_6=-3.349 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.050 tm_pt_10=-7.117 tm_pt_11=-0.000 tm_pt_12=-3.620 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -17.945
-53 ||| britain at the main and his economic power was . ||| lm_0=-23.355 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-21.601 tm_pt_6=-8.391 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.738 tm_pt_10=-18.689 tm_pt_11=-0.000 tm_pt_12=-6.389 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -68.955
-53 ||| britain at the main and who was economical power . ||| lm_0=-22.404 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.110 tm_pt_6=-9.959 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-19.176 tm_pt_11=-0.000 tm_pt_12=-6.249 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.212 ||| -69.372
-53 ||| britain at the main and his economic powers was . ||| lm_0=-23.387 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-21.769 tm_pt_6=-10.054 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.738 tm_pt_10=-18.284 tm_pt_11=-0.000 tm_pt_12=-6.389 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -69.489
-53 ||| britain at the main and who was economic power . ||| lm_0=-22.721 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.129 tm_pt_6=-9.084 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-19.581 tm_pt_11=-0.000 tm_pt_12=-6.249 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.212 ||| -69.575
-53 ||| britain at the main and his economical power was . ||| lm_0=-23.774 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-21.582 tm_pt_6=-9.265 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.738 tm_pt_10=-18.284 tm_pt_11=-0.000 tm_pt_12=-6.389 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -69.663
-53 ||| britain at the main economic power and who was . ||| lm_0=-21.688 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.129 tm_pt_6=-9.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.370 tm_pt_10=-16.217 tm_pt_11=-0.000 tm_pt_12=-6.105 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-5.212 ||| -69.744
-53 ||| britain at the main and who was economic powers . ||| lm_0=-22.569 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.297 tm_pt_6=-10.747 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-19.176 tm_pt_11=-0.000 tm_pt_12=-6.249 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.212 ||| -69.881
-53 ||| britain at the main and his was economical power . ||| lm_0=-22.611 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.582 tm_pt_6=-9.265 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-21.391 tm_pt_11=-0.000 tm_pt_12=-6.249 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.212 ||| -70.056
-53 ||| britain at the main and his was economic power . ||| lm_0=-22.929 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.601 tm_pt_6=-8.391 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-21.797 tm_pt_11=-0.000 tm_pt_12=-6.249 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.212 ||| -70.259
-54 ||| movement against the military rule and pakistan গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| lm_0=-62.918 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.124 tm_pt_10=-30.377 tm_pt_11=-0.000 tm_pt_12=-8.664 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.346
-54 ||| the military rule movement against and pakistan গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| lm_0=-64.294 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.075 tm_pt_10=-31.215 tm_pt_11=-0.000 tm_pt_12=-8.952 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.363
-54 ||| movement against the military rule and the গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| lm_0=-60.831 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-53.615 tm_pt_6=-23.667 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.124 tm_pt_10=-34.296 tm_pt_11=-0.000 tm_pt_12=-10.260 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.490
-54 ||| the military rule movement against and the গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| lm_0=-62.207 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-53.615 tm_pt_6=-23.667 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.075 tm_pt_10=-35.134 tm_pt_11=-0.000 tm_pt_12=-10.548 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.507
-54 ||| movement against the military rule and pakistan গোষ্ঠীগত was against protest and bengalis independence movement on ধাবিত to for he widely praised . ||| lm_0=-63.790 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-46.730 tm_pt_6=-23.155 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.241 tm_pt_10=-27.837 tm_pt_11=-0.000 tm_pt_12=-9.175 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.697
-54 ||| the military rule movement against and pakistan গোষ্ঠীগত was against protest and bengalis independence movement on ধাবিত to for he widely praised . ||| lm_0=-65.167 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-46.730 tm_pt_6=-23.155 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.192 tm_pt_10=-28.675 tm_pt_11=-0.000 tm_pt_12=-9.463 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.714
-54 ||| movement against the military rule and the গোষ্ঠীগত was against protest and bengalis independence movement on ধাবিত to for he widely praised . ||| lm_0=-61.703 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-51.351 tm_pt_6=-23.918 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.241 tm_pt_10=-31.756 tm_pt_11=-0.000 tm_pt_12=-10.771 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.841
-54 ||| movement against the military rule and pakistan গোষ্ঠীগত was protest against and bengalis independence movement , ধাবিত to for he widely praised . ||| lm_0=-61.725 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-5.124 tm_pt_10=-32.031 tm_pt_11=-0.000 tm_pt_12=-9.805 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.858
-54 ||| movement against the military rule and pakistan গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত for the he widely praised . ||| lm_0=-61.304 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-51.130 tm_pt_6=-23.575 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.124 tm_pt_10=-32.703 tm_pt_11=-0.000 tm_pt_12=-9.548 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.868
-54 ||| the military rule movement against and pakistan গোষ্ঠীগত was protest against and bengalis independence movement , ধাবিত to for he widely praised . ||| lm_0=-63.101 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-5.075 tm_pt_10=-32.869 tm_pt_11=-0.000 tm_pt_12=-10.092 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.875
-55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject to . ||| lm_0=-55.366 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-75.462 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-50.024 tm_pt_11=-0.000 tm_pt_12=-9.468 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -270.162
-55 ||| here is mentioned that were internet and world wide web other name to be word but actually শব্দদ্বয় different subject to . ||| lm_0=-55.492 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-75.462 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-48.558 tm_pt_11=-0.000 tm_pt_12=-9.468 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -270.902
-55 ||| here is mentioned that were internet and world wide web other name word to be though actually শব্দদ্বয় different subject to . ||| lm_0=-56.919 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-74.255 tm_pt_6=-35.844 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.314 tm_pt_10=-48.057 tm_pt_11=-0.000 tm_pt_12=-8.957 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -270.968
-55 ||| here is mentioned that were internet and other name of world wide web word to be but actually শব্দদ্বয় different subject to . ||| lm_0=-53.566 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-76.084 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-48.638 tm_pt_11=-0.000 tm_pt_12=-9.468 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=-1.000 tm_glue_0=15.000 WordPenalty=-10.857 OOVPenalty=-100.000 ||| -270.995
-55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject a . ||| lm_0=-55.873 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-75.102 tm_pt_6=-35.998 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.664 tm_pt_10=-50.785 tm_pt_11=-0.000 tm_pt_12=-9.873 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.207
-55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject indicates . ||| lm_0=-57.972 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-70.008 tm_pt_6=-36.286 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.664 tm_pt_10=-44.530 tm_pt_11=-0.000 tm_pt_12=-9.873 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.304
-55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject to the . ||| lm_0=-56.535 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-17.000 tm_pt_5=-78.670 tm_pt_6=-34.346 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-6.431 tm_pt_10=-46.151 tm_pt_11=-0.000 tm_pt_12=-9.640 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.857 OOVPenalty=-100.000 ||| -271.444
-55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subjects to . ||| lm_0=-55.554 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-75.002 tm_pt_6=-37.837 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.432 tm_pt_10=-47.322 tm_pt_11=-0.000 tm_pt_12=-10.432 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.494
-55 ||| here is mentioned that many internet and world wide web other name word to be but actually শব্দদ্বয় different subject to . ||| lm_0=-56.305 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-74.971 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-51.114 tm_pt_11=-0.000 tm_pt_12=-9.468 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.512
-55 ||| here is mentioned that were internet and world wide web other name word to be though actually শব্দদ্বয় different subject a . ||| lm_0=-57.427 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-73.895 tm_pt_6=-35.711 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.547 tm_pt_10=-48.819 tm_pt_11=-0.000 tm_pt_12=-9.362 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -272.013
-56 ||| . z related polar co-ordinate two are r = . ||| lm_0=-29.784 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-25.545 tm_pt_6=-12.128 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.418 tm_pt_10=-16.486 tm_pt_11=-0.000 tm_pt_12=-0.128 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 ||| -77.342
-56 ||| . z related polar co-ordinate two is r = . ||| lm_0=-29.699 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-25.602 tm_pt_6=-11.361 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.050 tm_pt_10=-19.524 tm_pt_11=-0.000 tm_pt_12=-0.535 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -77.806
-57 ||| november ||| lm_0=-3.071 lm_1=-6.522 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-0.041 tm_pt_6=-0.057 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.008 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -6.949
-57 ||| november . ||| lm_0=-5.242 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-9.840 tm_pt_6=-0.742 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-4.779 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -16.354
-57 ||| নভেম্বর ||| lm_0=-7.355 lm_1=-6.522 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
-58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক mail to to . ||| lm_0=-38.430 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.369 tm_pt_10=-17.204 tm_pt_11=-0.000 tm_pt_12=-2.899 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -382.030
-58 ||| 1972 খ্রীস্টাব্দে " আরপানেটে first ইলেক্ট্রনিক mail to to . ||| lm_0=-37.710 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-21.353 tm_pt_6=-9.458 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.368 tm_pt_10=-20.715 tm_pt_11=-0.000 tm_pt_12=-3.391 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -382.837
-58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক mail to used . ||| lm_0=-38.643 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-17.350 tm_pt_6=-10.180 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.369 tm_pt_10=-15.685 tm_pt_11=-0.000 tm_pt_12=-4.158 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.214
-58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক to mail to . ||| lm_0=-37.936 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.736 tm_pt_10=-17.873 tm_pt_11=-0.000 tm_pt_12=-4.403 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.391
-58 ||| খ্রীস্টাব্দে 1972 then আরপানেটে first ইলেক্ট্রনিক mail to to . ||| lm_0=-38.430 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.387 tm_pt_10=-18.080 tm_pt_11=-0.000 tm_pt_12=-3.512 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.453
-58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক mail to arrested . ||| lm_0=-39.625 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-16.830 tm_pt_6=-12.490 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.736 tm_pt_10=-13.000 tm_pt_11=-0.000 tm_pt_12=-3.465 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.660
-58 ||| 1972 খ্রীস্টাব্দে first ইলেক্ট্রনিক then আরপানেটে mail to to . ||| lm_0=-38.430 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.001 tm_pt_10=-14.022 tm_pt_11=-0.000 tm_pt_12=-2.899 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.688
-58 ||| 1972 খ্রীস্টাব্দে then first ইলেক্ট্রনিক আরপানেটে mail to to . ||| lm_0=-38.430 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.001 tm_pt_10=-14.022 tm_pt_11=-0.000 tm_pt_12=-2.899 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.688
-59 ||| জীব science that শাখায় fungi and the practical subject to he discussed ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-56.259 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-22.287 tm_pt_6=-21.471 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.274 tm_pt_10=-18.370 tm_pt_11=-0.000 tm_pt_12=-6.550 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.136
-59 ||| জীব science that শাখায় fungi and its practical subject to he discussed ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-56.884 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-19.437 tm_pt_6=-22.441 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-1.139 tm_pt_10=-16.283 tm_pt_11=-0.000 tm_pt_12=-7.569 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.595
-59 ||| জীব science that শাখায় fungi and the practical subject to discussed he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-57.503 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-22.287 tm_pt_6=-21.471 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.273 tm_pt_10=-18.945 tm_pt_11=-0.000 tm_pt_12=-6.327 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.781
-59 ||| জীব science that শাখায় fungi and the practical subject about to he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-56.839 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-23.925 tm_pt_6=-21.956 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.273 tm_pt_10=-23.221 tm_pt_11=-0.000 tm_pt_12=-7.244 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.908
-59 ||| জীব science that শাখায় fungus and the practical subject to he discussed ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-56.252 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-22.893 tm_pt_6=-22.244 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.409 tm_pt_10=-18.370 tm_pt_11=-0.000 tm_pt_12=-7.850 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.079
-59 ||| জীব science that শাখায় fungi and the practical subject to discussed was ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-56.516 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-23.316 tm_pt_6=-21.692 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.641 tm_pt_10=-20.873 tm_pt_11=-0.000 tm_pt_12=-8.032 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.237
-59 ||| জীব science that শাখায় fungi and its practical subject to discussed he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-58.129 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-19.437 tm_pt_6=-22.441 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-1.138 tm_pt_10=-16.857 tm_pt_11=-0.000 tm_pt_12=-7.346 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.239
-59 ||| জীব science that শাখায় fungi and the practical subject discussed to he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| lm_0=-58.156 lm_1=-55.439 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-22.287 tm_pt_6=-21.471 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.273 tm_pt_10=-20.554 tm_pt_11=-0.000 tm_pt_12=-7.244 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.365
-60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a system . ||| lm_0=-62.255 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-44.880 tm_pt_6=-17.637 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.059 tm_pt_10=-36.336 tm_pt_11=-0.000 tm_pt_12=-8.261 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.512
-60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a method . ||| lm_0=-63.624 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-42.470 tm_pt_6=-17.470 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.053 tm_pt_10=-33.326 tm_pt_11=-0.000 tm_pt_12=-7.973 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.800
-60 ||| water river from উঠানো was some purs দিয়ে- দড়ি and বালটির through water used by animal world a system . ||| lm_0=-64.447 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-40.233 tm_pt_6=-19.023 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.059 tm_pt_10=-28.982 tm_pt_11=-0.000 tm_pt_12=-8.261 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.902
-60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a process . ||| lm_0=-62.988 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-43.528 tm_pt_6=-17.732 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.071 tm_pt_10=-35.218 tm_pt_11=-0.000 tm_pt_12=-8.443 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.977
-60 ||| water river from উঠানো was some purs দিয়ে- দড়ি and বালটির through water used by animal world a method . ||| lm_0=-65.816 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-37.824 tm_pt_6=-18.856 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.053 tm_pt_10=-25.972 tm_pt_11=-0.000 tm_pt_12=-7.973 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -553.189
-60 ||| water river from উঠানো was some purs দিয়ে- দড়ি and বালটির through water used by animal world a process . ||| lm_0=-65.180 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-38.882 tm_pt_6=-19.118 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.071 tm_pt_10=-27.863 tm_pt_11=-0.000 tm_pt_12=-8.443 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -553.367
-60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a methods . ||| lm_0=-64.045 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-41.830 tm_pt_6=-17.637 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.071 tm_pt_10=-33.354 tm_pt_11=-0.000 tm_pt_12=-8.443 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -553.444
-61 ||| include tribal dance lokuj dance classical dance etc . ||| lm_0=-31.031 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-11.611 tm_pt_6=-21.187 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-5.327 tm_pt_11=-0.000 tm_pt_12=-3.691 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 ||| -74.273
-61 ||| among them are tribal dance lokuj dance classical dance etc . ||| lm_0=-31.005 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-15.406 tm_pt_6=-13.319 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-3.535 tm_pt_11=-0.000 tm_pt_12=-2.998 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 ||| -75.253
-61 ||| among these are tribal dance lokuj dance classical dance etc . ||| lm_0=-31.423 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-16.348 tm_pt_6=-13.351 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-3.535 tm_pt_11=-0.000 tm_pt_12=-2.998 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 ||| -75.962
-61 ||| among these there tribal dance lokuj dance classical dance etc . ||| lm_0=-31.235 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-15.061 tm_pt_6=-13.172 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.001 tm_pt_10=-6.315 tm_pt_11=-0.000 tm_pt_12=-4.875 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 ||| -76.695
-62 ||| the oldest literature first মৌখিকভাবে and later written form is . ||| lm_0=-29.702 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-21.290 tm_pt_6=-13.446 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.143 tm_pt_10=-20.845 tm_pt_11=-0.000 tm_pt_12=-6.143 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.397
-62 ||| the oldest literature first মৌখিকভাবে and later written in the . ||| lm_0=-27.896 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-25.849 tm_pt_6=-13.114 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.161 tm_pt_10=-26.490 tm_pt_11=-0.000 tm_pt_12=-6.731 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.608
-62 ||| the oldest literature first মৌখিকভাবে and later written form the . ||| lm_0=-29.290 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-22.915 tm_pt_6=-13.720 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.161 tm_pt_10=-22.739 tm_pt_11=-0.000 tm_pt_12=-6.731 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.007
-62 ||| the oldest literature first মৌখিকভাবে and later is written form . ||| lm_0=-28.837 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-21.290 tm_pt_6=-13.446 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.007 tm_pt_10=-19.269 tm_pt_11=-0.000 tm_pt_12=-5.296 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.293
-62 ||| the oldest literature first মৌখিকভাবে and then written in the . ||| lm_0=-27.774 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-28.254 tm_pt_6=-14.856 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.167 tm_pt_10=-26.282 tm_pt_11=-0.000 tm_pt_12=-7.137 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.608
-62 ||| the oldest literary first মৌখিকভাবে and later written in the . ||| lm_0=-27.906 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-25.997 tm_pt_6=-14.392 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.162 tm_pt_10=-26.383 tm_pt_11=-0.000 tm_pt_12=-8.480 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.678
-62 ||| the oldest literature first মৌখিকভাবে and later written form used . ||| lm_0=-30.913 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-20.086 tm_pt_6=-14.362 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.149 tm_pt_10=-19.482 tm_pt_11=-0.000 tm_pt_12=-6.549 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.774
-62 ||| the oldest literature first মৌখিকভাবে and later written in is . ||| lm_0=-29.777 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-24.223 tm_pt_6=-12.840 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.143 tm_pt_10=-24.596 tm_pt_11=-0.000 tm_pt_12=-6.143 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.817
-62 ||| the oldest literature first মৌখিকভাবে and later written in used . ||| lm_0=-29.975 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-23.020 tm_pt_6=-13.756 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.149 tm_pt_10=-23.233 tm_pt_11=-0.000 tm_pt_12=-6.549 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.940
-62 ||| the oldest literature first মৌখিকভাবে and then written form the . ||| lm_0=-29.168 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-25.320 tm_pt_6=-15.462 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.167 tm_pt_10=-22.532 tm_pt_11=-0.000 tm_pt_12=-7.137 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -182.006
-63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা considered a . ||| lm_0=-74.493 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-54.205 tm_pt_6=-41.751 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-47.718 tm_pt_11=-0.000 tm_pt_12=-11.703 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.460
-63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা considered a . ||| lm_0=-72.451 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-60.033 tm_pt_6=-41.464 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-53.905 tm_pt_11=-0.000 tm_pt_12=-11.703 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.554
-63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা a considered . ||| lm_0=-75.470 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-54.205 tm_pt_6=-41.751 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-47.536 tm_pt_11=-0.000 tm_pt_12=-11.703 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=19.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.657
-63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা considered a . ||| lm_0=-73.629 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-56.876 tm_pt_6=-42.444 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-48.730 tm_pt_11=-0.000 tm_pt_12=-11.233 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.680
-63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা a considered . ||| lm_0=-73.428 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-60.033 tm_pt_6=-41.464 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-53.722 tm_pt_11=-0.000 tm_pt_12=-11.703 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=19.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.751
-63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা considered a . ||| lm_0=-71.586 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-62.704 tm_pt_6=-42.157 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-54.916 tm_pt_11=-0.000 tm_pt_12=-11.233 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.774
-63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা a considered . ||| lm_0=-74.606 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-56.876 tm_pt_6=-42.444 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-48.547 tm_pt_11=-0.000 tm_pt_12=-11.233 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.877
-63 ||| in 1989 the গণশত্রু film his , তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা considered a . ||| lm_0=-72.352 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-60.164 tm_pt_6=-41.346 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-55.850 tm_pt_11=-0.000 tm_pt_12=-11.703 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.921
-63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা a considered . ||| lm_0=-72.564 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-62.704 tm_pt_6=-42.157 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-54.734 tm_pt_11=-0.000 tm_pt_12=-11.233 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.971
-63 ||| in 1989 the গণশত্রু film his , তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা considered a . ||| lm_0=-70.310 lm_1=-75.006 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-65.992 tm_pt_6=-41.058 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-62.037 tm_pt_11=-0.000 tm_pt_12=-11.703 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=18.000 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -788.015
-64 ||| the বলবিদ্যা ||| lm_0=-8.364 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-8.263 tm_pt_6=-3.559 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=-7.494 tm_pt_11=-0.000 tm_pt_12=-1.253 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -120.385
-64 ||| mathematical বলবিদ্যা ||| lm_0=-9.917 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.823 tm_pt_6=-2.461 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-4.466 tm_pt_11=-0.000 tm_pt_12=-1.946 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -120.677
-64 ||| • বলবিদ্যা ||| lm_0=-11.469 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=0.000 tm_pt_6=-4.812 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.253 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -121.130
-64 ||| . বলবিদ্যা ||| lm_0=-10.010 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-0.000 tm_pt_5=-6.855 tm_pt_6=-2.561 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.018 tm_pt_10=-3.219 tm_pt_11=-0.000 tm_pt_12=-1.649 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -121.521
-64 ||| বলবিদ্যা mathematical theory . ||| lm_0=-12.880 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-0.000 tm_pt_5=-13.157 tm_pt_6=-2.797 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-3.258 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-2.606 OOVPenalty=-100.000 ||| -131.102
-64 ||| mathematical theory . বলবিদ্যা ||| lm_0=-15.855 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-13.157 tm_pt_6=-2.797 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.946 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.606 OOVPenalty=-100.000 ||| -133.354
-65 ||| other স্বত্ত্ব-সংরক্ষিত linux operating system like windows and mac os to acquire different . ||| lm_0=-34.003 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-19.554 tm_pt_6=-11.681 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.503 tm_pt_10=-4.595 tm_pt_11=-0.000 tm_pt_12=-2.985 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -190.224
-65 ||| other স্বত্ত্ব-সংরক্ষিত operating system like windows and mac os to linux acquire different . ||| lm_0=-36.650 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-19.804 tm_pt_6=-11.885 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.135 tm_pt_10=-5.647 tm_pt_11=-0.000 tm_pt_12=-3.453 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -190.860
-65 ||| other স্বত্ত্ব-সংরক্ষিত operating systems like windows and mac os to linux acquire different . ||| lm_0=-36.234 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-19.646 tm_pt_6=-12.587 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.135 tm_pt_10=-5.555 tm_pt_11=-0.000 tm_pt_12=-4.870 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -191.033
-65 ||| other স্বত্ত্ব-সংরক্ষিত linux operating system like windows and mac os to acquire a . ||| lm_0=-31.792 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-22.787 tm_pt_6=-12.731 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.503 tm_pt_10=-11.959 tm_pt_11=-0.000 tm_pt_12=-6.033 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -191.644
-65 ||| other স্বত্ত্ব-সংরক্ষিত linux operating system like windows and mac os to acquire separate . ||| lm_0=-34.986 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-17.756 tm_pt_6=-12.480 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.553 tm_pt_10=-5.857 tm_pt_11=-0.000 tm_pt_12=-4.647 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -191.696
-66 ||| asia টাইমসের ভাষ্য to , ||| lm_0=-20.278 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-7.205 tm_pt_6=-3.750 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.368 tm_pt_10=-7.348 tm_pt_11=-0.000 tm_pt_12=-2.553 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -241.426
-66 ||| asia টাইমসের ভাষ্য according to , ||| lm_0=-21.778 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-9.191 tm_pt_6=-3.783 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-2.160 tm_pt_11=-0.000 tm_pt_12=-0.413 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -243.372
-66 ||| the টাইমসের ভাষ্য to , ||| lm_0=-17.775 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-15.228 tm_pt_6=-6.778 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.503 tm_pt_10=-13.835 tm_pt_11=-0.000 tm_pt_12=-5.498 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -243.736
-66 ||| asia টাইমসের according to ভাষ্য , ||| lm_0=-21.781 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-9.191 tm_pt_6=-3.783 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.002 tm_pt_10=-2.232 tm_pt_11=-0.000 tm_pt_12=-0.468 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -244.380
-66 ||| টাইমসের asia ভাষ্য to , ||| lm_0=-20.816 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-7.205 tm_pt_6=-3.750 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.368 tm_pt_10=-9.649 tm_pt_11=-0.000 tm_pt_12=-4.609 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -244.381
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language open way বিতরণ to . ||| lm_0=-69.951 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-47.236 tm_pt_6=-22.736 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.508 tm_pt_10=-24.834 tm_pt_11=-0.000 tm_pt_12=-7.172 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -570.805
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language open way বিতরণ to . ||| lm_0=-71.727 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-42.789 tm_pt_6=-21.988 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.876 tm_pt_10=-23.736 tm_pt_11=-0.000 tm_pt_12=-6.479 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -570.848
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language open way বিতরণ is . ||| lm_0=-70.022 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-47.949 tm_pt_6=-23.336 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.508 tm_pt_10=-24.678 tm_pt_11=-0.000 tm_pt_12=-7.134 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.180
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language বিতরণ to open way . ||| lm_0=-68.179 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-47.236 tm_pt_6=-22.736 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-2.875 tm_pt_10=-23.924 tm_pt_11=-0.000 tm_pt_12=-6.527 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=19.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.187
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language open way বিতরণ is . ||| lm_0=-71.798 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-43.503 tm_pt_6=-22.588 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.876 tm_pt_10=-23.580 tm_pt_11=-0.000 tm_pt_12=-6.441 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.223
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language বিতরণ to open way . ||| lm_0=-69.955 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-18.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-42.789 tm_pt_6=-21.988 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-2.243 tm_pt_10=-22.825 tm_pt_11=-0.000 tm_pt_12=-5.834 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=19.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.230
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language free way বিতরণ to . ||| lm_0=-69.884 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-47.210 tm_pt_6=-22.925 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.511 tm_pt_10=-25.203 tm_pt_11=-0.000 tm_pt_12=-8.361 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.300
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language free way বিতরণ to . ||| lm_0=-71.660 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-42.764 tm_pt_6=-22.177 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.879 tm_pt_10=-24.104 tm_pt_11=-0.000 tm_pt_12=-7.668 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.343
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language open in বিতরণ to . ||| lm_0=-68.374 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-51.736 tm_pt_6=-23.141 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.557 tm_pt_10=-29.937 tm_pt_11=-0.000 tm_pt_12=-7.865 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.462
-67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language open in বিতরণ to . ||| lm_0=-70.151 lm_1=-88.050 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-19.000 tm_pt_3=-0.000 tm_pt_4=-19.000 tm_pt_5=-47.289 tm_pt_6=-22.394 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.925 tm_pt_10=-28.839 tm_pt_11=-0.000 tm_pt_12=-7.172 tm_pt_13=-0.000 tm_pt_14=-22.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.505
-68 ||| bangladesh অনলাইনে dhaka ||| lm_0=-12.999 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.375 tm_pt_11=-0.000 tm_pt_12=-0.055 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -123.523
-68 ||| bangladesh অনলাইনে the ||| lm_0=-12.123 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-7.966 tm_pt_6=-2.711 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-6.172 tm_pt_11=-0.000 tm_pt_12=-3.195 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.024
-68 ||| bangladesh dhaka অনলাইনে ||| lm_0=-13.457 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.007 tm_pt_10=-4.393 tm_pt_11=-0.000 tm_pt_12=-3.350 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.267
-68 ||| অনলাইনে bangladesh dhaka ||| lm_0=-14.038 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-3.099 tm_pt_11=-0.000 tm_pt_12=-3.378 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.655
-68 ||| অনলাইনে dhaka bangladesh ||| lm_0=-14.148 lm_1=-13.045 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-0.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-3.217 tm_pt_11=-0.000 tm_pt_12=-3.428 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -128.804
-69 ||| first world war germany হেরে be . ||| lm_0=-20.261 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-10.464 tm_pt_6=-4.004 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.037 tm_pt_10=-5.752 tm_pt_11=-0.000 tm_pt_12=-3.057 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -146.864
-69 ||| first world war germany হেরে easily . ||| lm_0=-20.090 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-11.761 tm_pt_6=-7.860 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.386 tm_pt_10=-2.970 tm_pt_11=-0.000 tm_pt_12=-1.870 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -147.174
-69 ||| first world war germany হেরে can . ||| lm_0=-19.817 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-10.407 tm_pt_6=-4.209 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-6.604 tm_pt_11=-0.000 tm_pt_12=-3.974 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -147.206
-69 ||| first world war germany হেরে be ||| lm_0=-20.105 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-11.045 tm_pt_6=-11.540 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.018 tm_pt_10=-7.352 tm_pt_11=-0.000 tm_pt_12=-2.563 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-100.000 ||| -148.112
-69 ||| germany first world war হেরে be ||| lm_0=-19.984 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-11.045 tm_pt_6=-11.540 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.050 tm_pt_10=-6.128 tm_pt_11=-0.000 tm_pt_12=-2.563 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.474 OOVPenalty=-100.000 ||| -149.635
-69 ||| first world war german হেরে be ||| lm_0=-19.107 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-12.991 tm_pt_6=-13.951 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.037 tm_pt_10=-9.945 tm_pt_11=-0.000 tm_pt_12=-5.173 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-100.000 ||| -149.683
-69 ||| the first world war germany হেরে be ||| lm_0=-19.326 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-15.740 tm_pt_6=-11.783 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.135 tm_pt_10=-6.129 tm_pt_11=-0.000 tm_pt_12=-2.191 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.688
-70 ||| but this is to for even research to going on . ||| lm_0=-25.403 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-41.595 tm_pt_6=-11.055 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-30.198 tm_pt_11=-0.000 tm_pt_12=-7.719 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -82.457
-70 ||| but this is to for even research progress going on . ||| lm_0=-27.145 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-37.087 tm_pt_6=-11.507 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-25.502 tm_pt_11=-0.000 tm_pt_12=-7.719 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -82.685
-70 ||| but this is to even for research to going on . ||| lm_0=-24.693 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-41.595 tm_pt_6=-11.055 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-30.710 tm_pt_11=-0.000 tm_pt_12=-8.104 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 ||| -82.811
-70 ||| but this is to even for research progress going on . ||| lm_0=-26.435 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-37.087 tm_pt_6=-11.507 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-26.014 tm_pt_11=-0.000 tm_pt_12=-8.104 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 ||| -83.040
-70 ||| but this is to for even research to going on ||| lm_0=-26.717 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-41.411 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.189 tm_pt_11=-0.000 tm_pt_12=-7.023 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 ||| -84.283
-70 ||| but this is to for even research progress going on ||| lm_0=-28.458 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-36.903 tm_pt_6=-19.630 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-25.493 tm_pt_11=-0.000 tm_pt_12=-7.023 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 ||| -84.511
-70 ||| but this is to even for research to going on ||| lm_0=-26.007 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-41.411 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.701 tm_pt_11=-0.000 tm_pt_12=-7.407 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -84.637
-70 ||| but this is to for presently research to going on ||| lm_0=-27.162 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-40.742 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.255 tm_pt_11=-0.000 tm_pt_12=-7.023 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 ||| -84.722
-70 ||| but this is to presently for research to going on ||| lm_0=-26.172 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-40.742 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.766 tm_pt_11=-0.000 tm_pt_12=-7.407 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -84.730
-70 ||| but this is to even for research progress going on ||| lm_0=-27.748 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-36.903 tm_pt_6=-19.630 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-26.005 tm_pt_11=-0.000 tm_pt_12=-7.407 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -84.866
-71 ||| সুপারএইচ ||| lm_0=-7.355 lm_1=-6.522 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
-72 ||| he army for আনফিট was declared . ||| lm_0=-20.183 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-12.054 tm_pt_6=-13.977 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-13.455 tm_pt_11=-0.000 tm_pt_12=-3.627 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -155.071
-72 ||| he army for আনফিট declared was . ||| lm_0=-21.450 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.054 tm_pt_6=-13.977 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-12.735 tm_pt_11=-0.000 tm_pt_12=-3.541 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -155.456
-72 ||| he army for আনফিট declared the . ||| lm_0=-20.541 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.810 tm_pt_6=-14.785 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-14.635 tm_pt_11=-0.000 tm_pt_12=-4.030 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -155.801
-72 ||| he army to আনফিট was declared . ||| lm_0=-19.972 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-13.940 tm_pt_6=-14.814 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-15.536 tm_pt_11=-0.000 tm_pt_12=-4.934 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.459
-72 ||| he army for আনফিট declared in . ||| lm_0=-21.040 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.187 tm_pt_6=-15.095 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-14.207 tm_pt_11=-0.000 tm_pt_12=-4.672 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.523
-72 ||| he army আনফিট for was declared . ||| lm_0=-20.562 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-12.054 tm_pt_6=-13.977 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-13.967 tm_pt_11=-0.000 tm_pt_12=-4.011 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.773
-72 ||| he army to আনফিট declared was . ||| lm_0=-21.239 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-13.940 tm_pt_6=-14.814 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-14.815 tm_pt_11=-0.000 tm_pt_12=-4.848 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.845
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib form government for for জানালে he no government has নেবেন . ||| lm_0=-74.299 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-51.550 tm_pt_6=-32.711 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.387 tm_pt_10=-32.377 tm_pt_11=-0.000 tm_pt_12=-6.725 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.226
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan was form government for for জানালে he no government has নেবেন . ||| lm_0=-72.148 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-54.468 tm_pt_6=-32.962 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-7.369 tm_pt_10=-35.733 tm_pt_11=-0.000 tm_pt_12=-8.335 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.453
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib form government for to জানালে he no government has নেবেন . ||| lm_0=-74.200 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-53.005 tm_pt_6=-33.116 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.387 tm_pt_10=-33.151 tm_pt_11=-0.000 tm_pt_12=-6.725 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.721
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with declared in the yahya khan mujib form government for for জানালে he no government has নেবেন . ||| lm_0=-74.046 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-50.735 tm_pt_6=-29.491 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-5.455 tm_pt_10=-39.390 tm_pt_11=-0.000 tm_pt_12=-9.219 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.784
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 by mujib announced that the yahya khan form government for for জানালে he no government has নেবেন . ||| lm_0=-71.145 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-52.464 tm_pt_6=-33.588 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.376 tm_pt_10=-33.449 tm_pt_11=-0.000 tm_pt_12=-9.053 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.784
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with mujib announced that the yahya khan form government for for জানালে he no government has নেবেন . ||| lm_0=-72.023 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-51.550 tm_pt_6=-32.711 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.376 tm_pt_10=-33.112 tm_pt_11=-0.000 tm_pt_12=-7.985 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.930
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib government for the for জানালে he no government has নেবেন . ||| lm_0=-72.232 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-16.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-55.212 tm_pt_6=-31.900 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-5.522 tm_pt_10=-40.450 tm_pt_11=-0.000 tm_pt_12=-7.600 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.941
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan was form government for to জানালে he no government has নেবেন . ||| lm_0=-72.049 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-55.923 tm_pt_6=-33.367 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-7.369 tm_pt_10=-36.507 tm_pt_11=-0.000 tm_pt_12=-8.335 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.949
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 announced that with the yahya khan mujib form government for for জানালে he no government has নেবেন . ||| lm_0=-73.638 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-15.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-51.550 tm_pt_6=-32.711 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.405 tm_pt_10=-33.276 tm_pt_11=-0.000 tm_pt_12=-7.734 tm_pt_13=-0.000 tm_pt_14=-19.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.978
-73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib form government for for জানালে he that government by নেবেন not . ||| lm_0=-77.742 lm_1=-81.528 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-17.000 tm_pt_3=-0.000 tm_pt_4=-17.000 tm_pt_5=-53.183 tm_pt_6=-24.907 tm_pt_7=-17.000 tm_pt_8=-46.206 tm_pt_9=-3.805 tm_pt_10=-34.741 tm_pt_11=-0.000 tm_pt_12=-8.898 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=21.000 WordPenalty=-11.292 OOVPenalty=-400.000 ||| -581.984
-74 ||| and computer words money গণনাকারী machine . ||| lm_0=-23.685 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-8.448 tm_pt_6=-5.506 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.002 tm_pt_10=-5.943 tm_pt_11=-0.000 tm_pt_12=-2.092 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -250.238
-74 ||| the computer words money গণনাকারী machine . ||| lm_0=-22.025 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-11.337 tm_pt_6=-7.378 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.003 tm_pt_10=-7.382 tm_pt_11=-0.000 tm_pt_12=-4.299 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -250.531
-74 ||| and computer word meaning গণনাকারী machine . ||| lm_0=-22.256 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-9.368 tm_pt_6=-6.074 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.375 tm_pt_10=-8.628 tm_pt_11=-0.000 tm_pt_12=-4.325 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -250.632
-74 ||| and computer words means গণনাকারী machine . ||| lm_0=-23.552 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-9.592 tm_pt_6=-5.858 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-7.376 tm_pt_11=-0.000 tm_pt_12=-3.073 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.136
-74 ||| and computer words meaning গণনাকারী machine . ||| lm_0=-23.908 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-8.565 tm_pt_6=-5.969 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-6.970 tm_pt_11=-0.000 tm_pt_12=-3.073 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.313
-74 ||| and computer words the গণনাকারী machine . ||| lm_0=-21.894 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.774 tm_pt_6=-6.030 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-12.066 tm_pt_11=-0.000 tm_pt_12=-3.073 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.351
-74 ||| the computer words means গণনাকারী machine . ||| lm_0=-21.892 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.480 tm_pt_6=-7.730 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-8.815 tm_pt_11=-0.000 tm_pt_12=-5.280 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.429
-75 ||| on 4th july ১৭৭৬ this constituents a independence notice জারি . ||| lm_0=-36.395 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-11.577 tm_pt_11=-0.000 tm_pt_12=-4.190 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.518
-75 ||| ১৭৭৬ on 4th july this constituents a independence notice জারি . ||| lm_0=-37.838 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-12.645 tm_pt_11=-0.000 tm_pt_12=-4.101 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.584
-75 ||| on 4th july this ১৭৭৬ constituents a independence notice জারি . ||| lm_0=-36.395 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-11.577 tm_pt_11=-0.000 tm_pt_12=-3.784 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.339
-75 ||| on 4th july ১৭৭৬ this constituents independence a notice জারি . ||| lm_0=-36.395 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.418 tm_pt_10=-11.815 tm_pt_11=-0.000 tm_pt_12=-3.838 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.460
-75 ||| ১৭৭৬ on 4th july this constituents independence a notice জারি . ||| lm_0=-37.838 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.418 tm_pt_10=-12.882 tm_pt_11=-0.000 tm_pt_12=-3.748 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.525
-75 ||| on 4th july ১৭৭৬ the constituents a independence notice জারি . ||| lm_0=-35.456 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-15.758 tm_pt_6=-16.129 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-14.374 tm_pt_11=-0.000 tm_pt_12=-6.108 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.840
-75 ||| on 4th july ১৭৭৬ this constituents a of notice জারি . ||| lm_0=-34.517 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-17.112 tm_pt_6=-14.586 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-16.407 tm_pt_11=-0.000 tm_pt_12=-6.387 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.855
-75 ||| ১৭৭৬ on 4th july this constituents a of notice জারি . ||| lm_0=-35.960 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-17.112 tm_pt_6=-14.586 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-17.474 tm_pt_11=-0.000 tm_pt_12=-6.298 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.921
-75 ||| on 4th july ১৭৭৬ this constituents a notice independence জারি . ||| lm_0=-36.395 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-12.508 tm_pt_11=-0.000 tm_pt_12=-4.308 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -288.585
-75 ||| ১৭৭৬ on 4th july this constituents a notice independence জারি . ||| lm_0=-37.838 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-13.575 tm_pt_11=-0.000 tm_pt_12=-4.218 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -288.651
-76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| lm_0=-54.519 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-40.037 tm_pt_6=-27.087 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.419 tm_pt_10=-14.481 tm_pt_11=-0.000 tm_pt_12=-2.350 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -534.960
-76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| lm_0=-55.093 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-36.787 tm_pt_6=-27.934 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.437 tm_pt_10=-11.919 tm_pt_11=-0.000 tm_pt_12=-3.138 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -534.967
-76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- middle is a country . ||| lm_0=-56.628 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-34.903 tm_pt_6=-27.087 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.419 tm_pt_10=-9.385 tm_pt_11=-0.000 tm_pt_12=-1.975 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -535.133
-76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- central is a country . ||| lm_0=-56.309 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-35.140 tm_pt_6=-27.200 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.419 tm_pt_10=-10.729 tm_pt_11=-0.000 tm_pt_12=-2.039 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=12.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -535.194
-76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| lm_0=-54.703 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-40.034 tm_pt_6=-26.757 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.064 tm_pt_10=-24.499 tm_pt_11=-0.000 tm_pt_12=-4.432 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.344
-76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| lm_0=-55.276 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.784 tm_pt_6=-27.604 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.082 tm_pt_10=-21.937 tm_pt_11=-0.000 tm_pt_12=-5.221 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.350
-76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| lm_0=-54.519 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-40.037 tm_pt_6=-27.087 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.193 tm_pt_10=-23.900 tm_pt_11=-0.000 tm_pt_12=-5.126 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.426
-76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| lm_0=-55.093 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.787 tm_pt_6=-27.934 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.211 tm_pt_10=-21.338 tm_pt_11=-0.000 tm_pt_12=-5.914 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.432
-76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| lm_0=-54.336 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-40.040 tm_pt_6=-27.417 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.321 tm_pt_10=-23.301 tm_pt_11=-0.000 tm_pt_12=-5.819 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.508
-76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| lm_0=-54.909 lm_1=-68.484 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-36.790 tm_pt_6=-28.265 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.340 tm_pt_10=-20.739 tm_pt_11=-0.000 tm_pt_12=-6.607 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.515
-77 ||| খ্রিস্টধর্ম the main religion . ||| lm_0=-13.390 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-8.682 tm_pt_6=-3.222 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.368 tm_pt_10=-8.196 tm_pt_11=-0.000 tm_pt_12=-2.807 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -134.806
-77 ||| খ্রিস্টধর্ম russia main religion . ||| lm_0=-17.547 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-2.865 tm_pt_6=-3.257 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.370 tm_pt_10=-1.801 tm_pt_11=-0.000 tm_pt_12=-0.861 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -135.684
-77 ||| খ্রিস্টধর্ম russia the religion . ||| lm_0=-16.281 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-6.650 tm_pt_6=-3.344 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.002 tm_pt_10=-6.572 tm_pt_11=-0.000 tm_pt_12=-2.367 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -136.531
-77 ||| খ্রিস্টধর্ম russiar main religion . ||| lm_0=-18.360 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-2.245 tm_pt_6=-5.203 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.736 tm_pt_10=-0.009 tm_pt_11=-0.000 tm_pt_12=-2.114 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.514
-78 ||| but গলদের education রোমানীকরণের গতি was slow down . ||| lm_0=-30.819 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-9.388 tm_pt_6=-3.301 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-4.108 tm_pt_11=-0.000 tm_pt_12=-2.060 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -361.851
-78 ||| but গলদের the রোমানীকরণের গতি was slow down . ||| lm_0=-29.343 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-14.896 tm_pt_6=-3.724 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.503 tm_pt_10=-8.340 tm_pt_11=-0.000 tm_pt_12=-2.570 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -362.598
-78 ||| but গলদের are রোমানীকরণের গতি was slow down . ||| lm_0=-29.623 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-13.874 tm_pt_6=-4.877 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.368 tm_pt_10=-7.258 tm_pt_11=-0.000 tm_pt_12=-3.669 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -363.960
-78 ||| but গলদের education রোমানীকরণের গতি was too slow . ||| lm_0=-31.760 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-10.731 tm_pt_6=-3.524 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-4.108 tm_pt_11=-0.000 tm_pt_12=-2.753 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -364.112
-79 ||| subject : gnu foundation ||| lm_0=-11.822 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-2.244 tm_pt_6=-1.768 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.656 tm_pt_11=-0.000 tm_pt_12=-1.687 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -26.233
-79 ||| category : gnu foundation ||| lm_0=-12.001 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-2.012 tm_pt_6=-2.238 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.642 tm_pt_11=-0.000 tm_pt_12=-2.047 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -26.695
-79 ||| subject-class : gnu foundation ||| lm_0=-12.283 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-1.966 tm_pt_6=-3.787 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.669 tm_pt_11=-0.000 tm_pt_12=-3.010 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -27.913
-79 ||| subject : gonu foundation ||| lm_0=-12.324 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-2.126 tm_pt_6=-3.308 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.050 tm_pt_10=-0.451 tm_pt_11=-0.000 tm_pt_12=-3.392 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -27.949
-79 ||| topics : gnu foundation ||| lm_0=-12.514 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-2.124 tm_pt_6=-4.100 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.642 tm_pt_11=-0.000 tm_pt_12=-3.220 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -28.403
-79 ||| category : gonu foundation ||| lm_0=-12.503 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-1.894 tm_pt_6=-3.779 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.050 tm_pt_10=-0.437 tm_pt_11=-0.000 tm_pt_12=-3.751 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -28.411
-80 ||| economic policy and revenue নীতিকেও it study . ||| lm_0=-25.344 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-11.391 tm_pt_6=-10.283 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.386 tm_pt_10=-11.196 tm_pt_11=-0.000 tm_pt_12=-2.816 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -161.720
-80 ||| economic policy and tax নীতিকেও it study . ||| lm_0=-25.178 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-11.710 tm_pt_6=-10.283 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.386 tm_pt_10=-12.177 tm_pt_11=-0.000 tm_pt_12=-2.816 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -161.830
-80 ||| economic policy and revenue নীতিকেও studying it . ||| lm_0=-24.499 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-11.529 tm_pt_6=-11.605 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-10.097 tm_pt_11=-0.000 tm_pt_12=-3.039 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.715
-80 ||| economic policy and for নীতিকেও it study . ||| lm_0=-23.359 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-16.601 tm_pt_6=-10.976 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.018 tm_pt_10=-17.015 tm_pt_11=-0.000 tm_pt_12=-3.509 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.770
-80 ||| economic policy and tax নীতিকেও studying it . ||| lm_0=-24.333 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-11.848 tm_pt_6=-11.605 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-11.078 tm_pt_11=-0.000 tm_pt_12=-3.039 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.825
-80 ||| economic policy and revenue নীতিকেও this study . ||| lm_0=-25.189 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-12.985 tm_pt_6=-11.230 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.386 tm_pt_10=-13.129 tm_pt_11=-0.000 tm_pt_12=-3.449 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.882
-80 ||| economic policy tax and নীতিকেও it study . ||| lm_0=-24.592 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-11.710 tm_pt_6=-10.283 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.018 tm_pt_10=-12.870 tm_pt_11=-0.000 tm_pt_12=-3.828 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -163.125
-80 ||| economic policy and নীতিকেও for it study . ||| lm_0=-23.187 lm_1=-29.350 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-16.601 tm_pt_6=-10.976 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.018 tm_pt_10=-16.048 tm_pt_11=-0.000 tm_pt_12=-3.627 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -163.312
-81 ||| among them are : may be তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-41.164 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-26.350 tm_pt_6=-16.516 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.736 tm_pt_10=-6.623 tm_pt_11=-0.000 tm_pt_12=-6.240 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -397.697
-81 ||| among these are : may be তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-41.398 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-27.293 tm_pt_6=-16.548 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.000 tm_pt_10=-7.999 tm_pt_11=-0.000 tm_pt_12=-4.493 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -397.798
-81 ||| among them are : may be তোমার get is ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-39.595 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-30.305 tm_pt_6=-17.069 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.738 tm_pt_10=-10.642 tm_pt_11=-0.000 tm_pt_12=-6.779 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -397.937
-81 ||| among these are : may be তোমার get is ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-39.829 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-31.248 tm_pt_6=-17.101 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.002 tm_pt_10=-12.017 tm_pt_11=-0.000 tm_pt_12=-5.032 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.038
-81 ||| among them are : would have তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-42.167 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-24.692 tm_pt_6=-15.956 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-6.623 tm_pt_11=-0.000 tm_pt_12=-6.527 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.086
-81 ||| among these are : would have তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-42.401 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-25.635 tm_pt_6=-15.989 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.000 tm_pt_10=-6.613 tm_pt_11=-0.000 tm_pt_12=-4.493 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.172
-81 ||| among them are : may be তোমার numbers is ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-39.159 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-30.645 tm_pt_6=-17.069 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.738 tm_pt_10=-13.408 tm_pt_11=-0.000 tm_pt_12=-6.779 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.178
-81 ||| among them are : would have তোমার get is ওরে this any স্নেহ-সুরধুনী . ||| lm_0=-40.599 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-28.647 tm_pt_6=-16.509 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.370 tm_pt_10=-10.642 tm_pt_11=-0.000 tm_pt_12=-7.066 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.326
-81 ||| among them are : may be তোমার get seen ওরে this no স্নেহ-সুরধুনী . ||| lm_0=-41.147 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-26.658 tm_pt_6=-17.123 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.736 tm_pt_10=-6.677 tm_pt_11=-0.000 tm_pt_12=-7.285 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.326
-81 ||| among these are : may be তোমার get seen ওরে this no স্নেহ-সুরধুনী . ||| lm_0=-41.381 lm_1=-48.917 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-27.600 tm_pt_6=-17.155 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.000 tm_pt_10=-8.052 tm_pt_11=-0.000 tm_pt_12=-5.538 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.428
-82 ||| on 23rd april 1992 satyajit died . ||| lm_0=-14.394 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-7.544 tm_pt_6=-12.167 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.369 tm_pt_10=-3.969 tm_pt_11=-0.000 tm_pt_12=-3.180 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.909 ||| -45.335
-82 ||| on 23rd april 1992 satyajit expired . ||| lm_0=-14.659 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-6.291 tm_pt_6=-13.553 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.001 tm_pt_10=-1.517 tm_pt_11=-0.000 tm_pt_12=-3.358 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.909 ||| -45.653
-82 ||| satyajit died on 23rd april 1992 . ||| lm_0=-14.171 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-7.544 tm_pt_6=-12.167 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.369 tm_pt_10=-4.522 tm_pt_11=-0.000 tm_pt_12=-4.487 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-3.909 ||| -48.260
-83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to sent . ||| lm_0=-37.256 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-16.030 tm_pt_6=-16.653 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-13.963 tm_pt_11=-0.000 tm_pt_12=-5.389 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.197
-83 ||| this time nazrul medical রিপোর্ট sent to stay famous চিকিৎসকদের . ||| lm_0=-34.004 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-16.030 tm_pt_6=-16.653 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.420 tm_pt_10=-12.132 tm_pt_11=-0.000 tm_pt_12=-6.333 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.263
-83 ||| this time nazrul medical রিপোর্ট stay famous sent to চিকিৎসকদের . ||| lm_0=-35.756 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-16.030 tm_pt_6=-16.653 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.420 tm_pt_10=-11.801 tm_pt_11=-0.000 tm_pt_12=-6.236 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.345
-83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to send to . ||| lm_0=-37.275 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-19.143 tm_pt_6=-11.248 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-11.660 tm_pt_11=-0.000 tm_pt_12=-5.389 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.473
-83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to sent . ||| lm_0=-37.147 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-17.020 tm_pt_6=-17.752 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-13.452 tm_pt_11=-0.000 tm_pt_12=-5.389 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.493
-83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to send . ||| lm_0=-35.787 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-15.462 tm_pt_6=-13.355 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.152 tm_pt_10=-19.885 tm_pt_11=-0.000 tm_pt_12=-5.763 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.554
-83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to sent to . ||| lm_0=-38.221 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-19.065 tm_pt_6=-10.907 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.188 tm_pt_10=-12.354 tm_pt_11=-0.000 tm_pt_12=-4.290 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.594
-83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to send to . ||| lm_0=-37.166 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-20.132 tm_pt_6=-12.347 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-11.150 tm_pt_11=-0.000 tm_pt_12=-5.389 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.770
-83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to send . ||| lm_0=-35.678 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-16.451 tm_pt_6=-14.454 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.152 tm_pt_10=-19.374 tm_pt_11=-0.000 tm_pt_12=-5.763 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.851
-83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to sent to . ||| lm_0=-38.112 lm_1=-42.395 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-20.055 tm_pt_6=-12.006 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.188 tm_pt_10=-11.843 tm_pt_11=-0.000 tm_pt_12=-4.290 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.890
-84 ||| acted in different times rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| lm_0=-40.958 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-22.380 tm_pt_6=-14.549 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.424 tm_pt_10=-13.562 tm_pt_11=-0.000 tm_pt_12=-6.591 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -396.253
-84 ||| acting in different times rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| lm_0=-40.924 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-23.032 tm_pt_6=-15.414 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.553 tm_pt_10=-14.373 tm_pt_11=-0.000 tm_pt_12=-7.284 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.192
-84 ||| acted in different time rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| lm_0=-42.187 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-22.602 tm_pt_6=-13.033 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.424 tm_pt_10=-13.562 tm_pt_11=-0.000 tm_pt_12=-6.591 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.297
-84 ||| acted in different times rani মুখার্জী various দাতব্য are connected with থেকেছেন . ||| lm_0=-41.409 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-22.396 tm_pt_6=-15.771 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.424 tm_pt_10=-13.500 tm_pt_11=-0.000 tm_pt_12=-7.777 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.635
-84 ||| different times , acted rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| lm_0=-41.777 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-22.591 tm_pt_6=-13.702 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.153 tm_pt_10=-14.288 tm_pt_11=-0.000 tm_pt_12=-3.905 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.961
-84 ||| acted , different times rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| lm_0=-43.056 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-22.591 tm_pt_6=-13.702 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.792 tm_pt_10=-14.576 tm_pt_11=-0.000 tm_pt_12=-5.897 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -398.099
-84 ||| in different times acted rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| lm_0=-41.466 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-22.380 tm_pt_6=-14.549 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.786 tm_pt_10=-12.484 tm_pt_11=-0.000 tm_pt_12=-4.655 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -398.142
-85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-62.080 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-55.591 tm_pt_6=-26.977 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.106 tm_pt_10=-26.460 tm_pt_11=-0.000 tm_pt_12=-3.613 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.327
-85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-60.734 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-13.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-56.621 tm_pt_6=-27.198 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.474 tm_pt_10=-28.388 tm_pt_11=-0.000 tm_pt_12=-5.318 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=15.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.339
-85 ||| bengali literature and culture his special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-64.166 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-50.176 tm_pt_6=-22.617 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-3.738 tm_pt_10=-26.585 tm_pt_11=-0.000 tm_pt_12=-3.925 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.413
-85 ||| bengali literature and culture his special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-62.820 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-51.205 tm_pt_6=-22.838 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-28.514 tm_pt_11=-0.000 tm_pt_12=-5.630 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.425
-85 ||| bengali literature and culture in special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-61.999 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-54.240 tm_pt_6=-24.867 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-3.738 tm_pt_10=-29.755 tm_pt_11=-0.000 tm_pt_12=-6.549 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.029
-85 ||| bengali literature and culture in special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-60.653 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-55.269 tm_pt_6=-25.088 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-31.684 tm_pt_11=-0.000 tm_pt_12=-8.253 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.041
-85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with the . ||| lm_0=-62.950 lm_1=-81.528 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-58.799 tm_pt_6=-25.191 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-22.587 tm_pt_11=-0.000 tm_pt_12=-3.786 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-11.292 OOVPenalty=-300.000 ||| -460.239
-85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with the . ||| lm_0=-61.604 lm_1=-81.528 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-14.000 tm_pt_5=-59.828 tm_pt_6=-25.412 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.474 tm_pt_10=-24.516 tm_pt_11=-0.000 tm_pt_12=-5.490 tm_pt_13=-0.000 tm_pt_14=-21.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=17.000 WordPenalty=-11.292 OOVPenalty=-300.000 ||| -460.251
-85 ||| bengali literature and culture its special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-63.766 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-51.065 tm_pt_6=-24.579 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-3.738 tm_pt_10=-27.055 tm_pt_11=-0.000 tm_pt_12=-5.930 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.590
-85 ||| bengali literature and culture its special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| lm_0=-62.420 lm_1=-78.267 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-14.000 tm_pt_3=-0.000 tm_pt_4=-13.000 tm_pt_5=-52.094 tm_pt_6=-24.800 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-28.983 tm_pt_11=-0.000 tm_pt_12=-7.634 tm_pt_13=-0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=16.000 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.602
-86 ||| kolkata durga puja city of tourism আকর্ষণও is also a reason ||| lm_0=-28.824 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-38.348 tm_pt_6=-18.126 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.135 tm_pt_10=-11.306 tm_pt_11=-0.000 tm_pt_12=-5.878 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.380
-86 ||| kolkata durga puja of the tourism আকর্ষণও is also a reason ||| lm_0=-26.385 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-41.831 tm_pt_6=-17.807 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.271 tm_pt_10=-15.896 tm_pt_11=-0.000 tm_pt_12=-6.947 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.564
-86 ||| kolkata durga puja tourism of the আকর্ষণও is also a reason ||| lm_0=-26.752 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-41.831 tm_pt_6=-17.807 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.185 tm_pt_10=-11.722 tm_pt_11=-0.000 tm_pt_12=-6.829 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.823
-86 ||| the durga puja city of tourism আকর্ষণও is also a reason ||| lm_0=-28.392 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-42.056 tm_pt_6=-18.216 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.135 tm_pt_10=-13.404 tm_pt_11=-0.000 tm_pt_12=-5.473 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.986
-86 ||| the durga puja of the tourism আকর্ষণও is also a reason ||| lm_0=-25.953 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-45.538 tm_pt_6=-17.897 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.271 tm_pt_10=-17.994 tm_pt_11=-0.000 tm_pt_12=-6.542 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.169
-86 ||| the durga puja tourism of the আকর্ষণও is also a reason ||| lm_0=-26.319 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-45.538 tm_pt_6=-17.897 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.185 tm_pt_10=-13.820 tm_pt_11=-0.000 tm_pt_12=-6.424 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.429
-86 ||| tourism of kolkata durga puja the আকর্ষণও is also a reason ||| lm_0=-26.087 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-41.831 tm_pt_6=-17.807 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.068 tm_pt_10=-10.909 tm_pt_11=-0.000 tm_pt_12=-6.286 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.430
-86 ||| tourism of kolkata durga puja city আকর্ষণও is also a reason ||| lm_0=-27.334 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-38.348 tm_pt_6=-18.126 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.068 tm_pt_10=-8.610 tm_pt_11=-0.000 tm_pt_12=-5.593 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.574
-86 ||| kolkata durga puja tourism of city আকর্ষণও is also a reason ||| lm_0=-28.790 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-38.348 tm_pt_6=-18.126 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.185 tm_pt_10=-8.229 tm_pt_11=-0.000 tm_pt_12=-6.205 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.662
-86 ||| kolkata durga puja city of tourism আকর্ষণও বটে . ||| lm_0=-28.144 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-11.192 tm_pt_6=-6.496 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.135 tm_pt_10=-11.315 tm_pt_11=-0.000 tm_pt_12=-5.882 tm_pt_13=-0.000 tm_pt_14=-7.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -264.676
-87 ||| but many of east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| lm_0=-49.154 lm_1=-100.563 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-73.802 tm_pt_6=-53.757 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-3.007 tm_pt_10=-7.175 tm_pt_11=-0.000 tm_pt_12=-1.869 tm_pt_13=-0.000 tm_pt_14=-30.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-13.897 ||| -162.668
-87 ||| but many of east germany started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| lm_0=-47.637 lm_1=-100.563 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-77.075 tm_pt_6=-56.565 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-4.007 tm_pt_10=-9.884 tm_pt_11=-0.000 tm_pt_12=-2.562 tm_pt_13=-0.000 tm_pt_14=-30.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-13.897 ||| -164.335
-87 ||| but when lakhs east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| lm_0=-51.889 lm_1=-100.563 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-71.335 tm_pt_6=-57.389 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-3.368 tm_pt_10=-0.747 tm_pt_11=-0.000 tm_pt_12=-1.269 tm_pt_13=-0.000 tm_pt_14=-30.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-13.897 ||| -165.048
-87 ||| but when of east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| lm_0=-50.113 lm_1=-100.563 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-5.000 tm_pt_3=-0.000 tm_pt_4=-5.000 tm_pt_5=-76.892 tm_pt_6=-57.471 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-3.375 tm_pt_10=-5.103 tm_pt_11=-0.000 tm_pt_12=-1.674 tm_pt_13=-0.000 tm_pt_14=-30.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-13.897 ||| -165.219
-87 ||| but many lakhs east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| lm_0=-53.671 lm_1=-100.563 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-6.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-68.245 tm_pt_6=-53.675 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-3.000 tm_pt_10=-2.820 tm_pt_11=-0.000 tm_pt_12=-1.463 tm_pt_13=-0.000 tm_pt_14=-30.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-13.897 ||| -165.888
-88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-102.048 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-17.000 tm_pt_5=-63.476 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.106 tm_pt_10=-37.944 tm_pt_11=-0.000 tm_pt_12=-10.606 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=25.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.611
-88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-104.615 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-17.000 tm_pt_5=-59.695 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.156 tm_pt_10=-33.741 tm_pt_11=-0.000 tm_pt_12=-9.220 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=25.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.760
-88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown them and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-101.785 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-18.000 tm_pt_5=-64.617 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.738 tm_pt_10=-39.119 tm_pt_11=-0.000 tm_pt_12=-11.992 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=26.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.853
-88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown sound and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-102.550 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-18.000 tm_pt_5=-62.205 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.738 tm_pt_10=-37.672 tm_pt_11=-0.000 tm_pt_12=-11.992 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=26.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.963
-88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown them and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-104.352 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-18.000 tm_pt_5=-60.836 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.788 tm_pt_10=-34.916 tm_pt_11=-0.000 tm_pt_12=-10.606 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=26.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.002
-88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown sound and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-105.117 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-18.000 tm_pt_5=-58.424 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.788 tm_pt_10=-33.469 tm_pt_11=-0.000 tm_pt_12=-10.606 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=26.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.112
-88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and the second one is " দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-98.761 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-21.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-63.476 tm_pt_6=-36.182 tm_pt_7=-21.000 tm_pt_8=-57.078 tm_pt_9=-3.745 tm_pt_10=-39.194 tm_pt_11=-0.000 tm_pt_12=-15.506 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.426
-88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and the second one is " দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| lm_0=-101.329 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-21.000 tm_pt_3=-0.000 tm_pt_4=-16.000 tm_pt_5=-59.695 tm_pt_6=-36.346 tm_pt_7=-21.000 tm_pt_8=-57.078 tm_pt_9=-2.795 tm_pt_10=-34.991 tm_pt_11=-0.000 tm_pt_12=-14.120 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.575
-88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের love for his মুখোমুখি . ||| lm_0=-101.929 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-63.476 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.075 tm_pt_10=-37.509 tm_pt_11=-0.000 tm_pt_12=-11.627 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1037.615
-88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের love for his মুখোমুখি . ||| lm_0=-104.496 lm_1=-104.356 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-20.000 tm_pt_3=-0.000 tm_pt_4=-15.000 tm_pt_5=-59.695 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.124 tm_pt_10=-33.307 tm_pt_11=-0.000 tm_pt_12=-10.240 tm_pt_13=-0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=23.000 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1037.764
-89 ||| 18th century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development it . ||| lm_0=-55.872 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-33.743 tm_pt_6=-15.231 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-6.068 tm_pt_10=-14.342 tm_pt_11=-0.000 tm_pt_12=-4.255 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -330.408
-89 ||| 18th century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development people . ||| lm_0=-55.886 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-33.399 tm_pt_6=-15.924 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-6.068 tm_pt_10=-14.342 tm_pt_11=-0.000 tm_pt_12=-4.255 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -330.597
-89 ||| 18 century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development it . ||| lm_0=-57.011 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-32.293 tm_pt_6=-13.806 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-5.118 tm_pt_10=-16.169 tm_pt_11=-0.000 tm_pt_12=-4.726 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -331.105
-89 ||| 18 century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development people . ||| lm_0=-57.026 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-12.000 tm_pt_3=-0.000 tm_pt_4=-12.000 tm_pt_5=-31.949 tm_pt_6=-14.499 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-5.118 tm_pt_10=-16.169 tm_pt_11=-0.000 tm_pt_12=-4.726 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=14.000 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -331.295
-89 ||| 18th century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development emperor . ||| lm_0=-57.425 lm_1=-61.961 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-11.000 tm_pt_3=-0.000 tm_pt_4=-11.000 tm_pt_5=-30.810 tm_pt_6=-15.924 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-6.068 tm_pt_10=-12.732 tm_pt_11=-0.000 tm_pt_12=-4.255 tm_pt_13=-0.000 tm_pt_14=-16.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=13.000 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -331.588
-90 ||| the arousal activities before penetrating male organ into vagina is called foreplay . ||| lm_0=-22.210 lm_1=-41.130 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-40.937 tm_pt_6=-21.594 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.007 tm_pt_10=-0.415 tm_pt_11=-0.000 tm_pt_12=-0.920 tm_pt_13=-0.000 tm_pt_14=-13.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-6.514 ||| -67.075
-90 ||| the arousal activities before penetrating male organ into vagina called foreplay . ||| lm_0=-24.209 lm_1=-37.869 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-37.122 tm_pt_6=-21.535 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-2.000 tm_pt_10=-0.009 tm_pt_11=-0.000 tm_pt_12=-0.003 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-6.080 ||| -67.081
-90 ||| the arousal activities before penetrating male organ into vagina is called stimulation ||| lm_0=-22.691 lm_1=-37.338 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-41.890 tm_pt_6=-31.076 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.693 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-6.080 ||| -67.760
-90 ||| vagina arousal activities before penetrating male organ into vagina is called stimulation ||| lm_0=-25.494 lm_1=-35.909 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-36.650 tm_pt_6=-30.932 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.135 tm_pt_10=-2.197 tm_pt_11=-0.000 tm_pt_12=-1.099 tm_pt_13=-0.000 tm_pt_14=-12.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-6.080 ||| -69.424
-90 ||| the arousal activities before penetrating male organ into vagina called stimulation ||| lm_0=-24.690 lm_1=-34.076 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-38.075 tm_pt_6=-31.017 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-2.000 tm_pt_10=-2.623 tm_pt_11=-0.000 tm_pt_12=-3.476 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-5.646 ||| -70.247
-91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by british . ||| lm_0=-36.899 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-43.503 tm_pt_6=-16.453 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.063 tm_pt_10=-29.648 tm_pt_11=-0.000 tm_pt_12=-8.509 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -400.621
-91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by british . ||| lm_0=-39.881 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-38.304 tm_pt_6=-16.616 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.928 tm_pt_10=-22.777 tm_pt_11=-0.000 tm_pt_12=-4.755 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -400.780
-91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by cultures . ||| lm_0=-38.736 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-40.802 tm_pt_6=-16.453 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.063 tm_pt_10=-28.731 tm_pt_11=-0.000 tm_pt_12=-8.509 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.139
-91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে would by . ||| lm_0=-38.842 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-41.446 tm_pt_6=-14.844 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.431 tm_pt_10=-28.964 tm_pt_11=-0.000 tm_pt_12=-7.819 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.297
-91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by cultures . ||| lm_0=-41.718 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-35.603 tm_pt_6=-16.616 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.928 tm_pt_10=-21.861 tm_pt_11=-0.000 tm_pt_12=-4.755 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.298
-91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে would by . ||| lm_0=-41.824 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-36.247 tm_pt_6=-15.006 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.295 tm_pt_10=-22.093 tm_pt_11=-0.000 tm_pt_12=-4.065 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.455
-91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected cultures . ||| lm_0=-40.503 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-37.296 tm_pt_6=-17.347 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.063 tm_pt_10=-23.915 tm_pt_11=-0.000 tm_pt_12=-8.509 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.712
-91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by would . ||| lm_0=-38.868 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-41.446 tm_pt_6=-14.844 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.063 tm_pt_10=-29.839 tm_pt_11=-0.000 tm_pt_12=-9.765 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=11.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.798
-91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected cultures . ||| lm_0=-43.485 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-7.000 tm_pt_3=-0.000 tm_pt_4=-6.000 tm_pt_5=-32.097 tm_pt_6=-17.510 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.928 tm_pt_10=-17.044 tm_pt_11=-0.000 tm_pt_12=-4.755 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.871
-91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by would . ||| lm_0=-41.850 lm_1=-45.656 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-8.000 tm_pt_3=-0.000 tm_pt_4=-7.000 tm_pt_5=-36.247 tm_pt_6=-15.006 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.928 tm_pt_10=-22.968 tm_pt_11=-0.000 tm_pt_12=-6.011 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.957
-92 ||| these একএ the mycelium structure . ||| lm_0=-19.717 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-11.032 tm_pt_6=-8.359 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-15.357 tm_pt_11=-0.000 tm_pt_12=-2.852 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -248.730
-92 ||| these একএ to mycelium structure . ||| lm_0=-19.811 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-10.353 tm_pt_6=-9.145 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-14.748 tm_pt_11=-0.000 tm_pt_12=-3.432 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.033
-92 ||| these একএ the mycelium formed . ||| lm_0=-20.043 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-10.989 tm_pt_6=-7.740 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-15.807 tm_pt_11=-0.000 tm_pt_12=-3.494 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.258
-92 ||| these একএ to mycelium formed . ||| lm_0=-20.137 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-10.311 tm_pt_6=-8.526 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-15.198 tm_pt_11=-0.000 tm_pt_12=-4.074 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.561
-92 ||| these একএ the mycelium formed the . ||| lm_0=-21.826 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-14.196 tm_pt_6=-5.953 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-8.281 tm_pt_11=-0.000 tm_pt_12=-2.016 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -249.619
-92 ||| these একএ by mycelium structure . ||| lm_0=-20.001 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-10.365 tm_pt_6=-10.244 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.019 tm_pt_10=-14.289 tm_pt_11=-0.000 tm_pt_12=-4.461 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.906
-92 ||| these একএ to mycelium formed the . ||| lm_0=-21.920 lm_1=-26.089 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-13.518 tm_pt_6=-6.740 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-7.673 tm_pt_11=-0.000 tm_pt_12=-2.595 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=6.000 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -249.921
-92 ||| these একএ in mycelium structure . ||| lm_0=-19.703 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-11.559 tm_pt_6=-9.819 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.019 tm_pt_10=-15.897 tm_pt_11=-0.000 tm_pt_12=-4.461 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -250.038
-93 ||| russia now a democratic country . ||| lm_0=-13.823 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-6.332 tm_pt_6=-4.466 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-1.906 tm_pt_11=-0.000 tm_pt_12=-2.722 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 ||| -35.538
-93 ||| russia at a democratic country . ||| lm_0=-12.833 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-7.468 tm_pt_6=-3.817 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.185 tm_pt_10=-4.040 tm_pt_11=-0.000 tm_pt_12=-5.120 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 ||| -35.822
-93 ||| russia presently a democratic country . ||| lm_0=-14.848 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-5.165 tm_pt_6=-4.075 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-1.100 tm_pt_11=-0.000 tm_pt_12=-2.327 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 ||| -36.101
-93 ||| russia is a democratic country . ||| lm_0=-11.653 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-4.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-10.602 tm_pt_6=-5.298 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.185 tm_pt_10=-6.630 tm_pt_11=-0.000 tm_pt_12=-5.120 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.474 ||| -36.140
-94 ||| penis ||| lm_0=-5.586 lm_1=-5.991 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.771 tm_pt_6=-1.305 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.018 tm_pt_10=-1.609 tm_pt_11=-0.000 tm_pt_12=-1.099 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -10.979
-94 ||| sex ||| lm_0=-4.702 lm_1=-5.991 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-3.989 tm_pt_6=-3.384 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=-2.251 tm_pt_11=-0.000 tm_pt_12=-2.015 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -11.798
-94 ||| gender ||| lm_0=-6.387 lm_1=-6.522 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-0.811 tm_pt_6=-1.305 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.007 tm_pt_10=-0.405 tm_pt_11=-0.000 tm_pt_12=-0.916 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -12.058
-94 ||| the ||| lm_0=-3.208 lm_1=-6.522 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-9.110 tm_pt_6=-2.979 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-8.187 tm_pt_11=-0.000 tm_pt_12=-2.708 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -13.752
-94 ||| sexual organs ||| lm_0=-5.946 lm_1=-9.252 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-9.102 tm_pt_6=-4.078 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-1.386 tm_pt_11=-0.000 tm_pt_12=-2.708 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -17.194
-94 ||| লিঙ্গ ||| lm_0=-7.355 lm_1=-6.522 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
-95 ||| this state is called orgasm . ||| lm_0=-5.945 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-12.738 tm_pt_6=-21.324 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.693 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-3.474 ||| -31.255
-95 ||| this state called orgasm . ||| lm_0=-10.709 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-8.924 tm_pt_6=-21.265 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.504 tm_pt_10=-2.441 tm_pt_11=-0.000 tm_pt_12=-2.783 tm_pt_13=-0.000 tm_pt_14=-5.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.040 ||| -36.164
-95 ||| this situation is called orgasm . ||| lm_0=-9.800 lm_1=-22.828 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-11.823 tm_pt_6=-21.729 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.386 tm_pt_13=-0.000 tm_pt_14=-6.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-3.474 ||| -36.754
-96 ||| different period here was established royal more empire . ||| lm_0=-26.591 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-21.275 tm_pt_6=-12.515 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.754 tm_pt_10=-16.893 tm_pt_11=-0.000 tm_pt_12=-4.327 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-4.777 ||| -77.701
-96 ||| different period was established this royal more empire . ||| lm_0=-24.820 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-25.005 tm_pt_6=-13.679 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.386 tm_pt_10=-18.640 tm_pt_11=-0.000 tm_pt_12=-5.020 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-4.777 ||| -77.837
-96 ||| different period here established the royal more empire . ||| lm_0=-26.232 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-24.031 tm_pt_6=-13.324 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.503 tm_pt_10=-19.552 tm_pt_11=-0.000 tm_pt_12=-3.996 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-4.777 ||| -78.390
-96 ||| different period was established royal this more empire . ||| lm_0=-25.484 lm_1=-32.611 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-9.000 tm_pt_3=-0.000 tm_pt_4=-4.000 tm_pt_5=-25.005 tm_pt_6=-13.679 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.386 tm_pt_10=-18.640 tm_pt_11=-0.000 tm_pt_12=-5.020 tm_pt_13=-0.000 tm_pt_14=-9.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-4.777 ||| -78.658
-96 ||| history different period here was established royal more empire . ||| lm_0=-30.742 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-23.037 tm_pt_6=-9.337 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.387 tm_pt_10=-20.795 tm_pt_11=-0.000 tm_pt_12=-5.694 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.212 ||| -79.860
-96 ||| history different period this was established royal more empire . ||| lm_0=-29.314 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-26.767 tm_pt_6=-10.501 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.019 tm_pt_10=-24.010 tm_pt_11=-0.000 tm_pt_12=-6.675 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=9.000 WordPenalty=-5.212 ||| -79.939
-97 ||| micro economics ||| lm_0=-5.021 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-2.764 tm_pt_6=-1.552 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.050 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-0.560 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -12.731
-97 ||| microeconomics ||| lm_0=-5.336 lm_1=-6.522 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-1.386 tm_pt_6=-5.894 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-1.099 tm_pt_11=-0.000 tm_pt_12=-1.946 tm_pt_13=-0.000 tm_pt_14=-1.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 ||| -13.795
-97 ||| macro economics ||| lm_0=-5.691 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-1.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-2.997 tm_pt_6=-2.245 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=0.000 tm_pt_11=-0.000 tm_pt_12=-1.253 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -14.348
-97 ||| macro economy ||| lm_0=-6.566 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-3.219 tm_pt_6=-2.957 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-1.009 tm_pt_11=-0.000 tm_pt_12=-2.277 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -16.531
-97 ||| micro economy ||| lm_0=-8.065 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-2.985 tm_pt_6=-2.264 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.050 tm_pt_10=-1.874 tm_pt_11=-0.000 tm_pt_12=-1.583 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 ||| -17.823
-97 ||| economics micro ||| lm_0=-9.491 lm_1=-9.783 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-2.764 tm_pt_6=-1.552 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.051 tm_pt_10=-4.442 tm_pt_11=-0.000 tm_pt_12=-4.472 tm_pt_13=-0.000 tm_pt_14=-2.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=1.000 WordPenalty=-1.737 ||| -21.955
-98 ||| user to operating system the visible of the computer interface . ||| lm_0=-27.398 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-30.494 tm_pt_6=-10.074 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-25.134 tm_pt_11=-0.000 tm_pt_12=-9.193 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -79.254
-98 ||| user to operating system the visible form is computer interface . ||| lm_0=-30.618 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-24.401 tm_pt_6=-9.477 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-19.267 tm_pt_11=-0.000 tm_pt_12=-8.160 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -79.981
-98 ||| user to operating system the visible form are computer interface . ||| lm_0=-31.122 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-23.695 tm_pt_6=-9.596 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-17.971 tm_pt_11=-0.000 tm_pt_12=-7.737 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -80.025
-98 ||| user to operating system the visible of are computer interface . ||| lm_0=-30.220 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-27.275 tm_pt_6=-9.031 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-21.498 tm_pt_11=-0.000 tm_pt_12=-7.737 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -80.315
-98 ||| user to operating system the visible of the computers interface . ||| lm_0=-27.592 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-30.657 tm_pt_6=-11.632 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-24.916 tm_pt_11=-0.000 tm_pt_12=-10.715 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -80.540
-98 ||| the user to operating visible of are computer interface . ||| lm_0=-28.611 lm_1=-35.872 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-8.000 tm_pt_5=-25.284 tm_pt_6=-9.508 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.603 tm_pt_10=-20.953 tm_pt_11=-0.000 tm_pt_12=-10.715 tm_pt_13=-0.000 tm_pt_14=-10.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=7.000 WordPenalty=-5.212 ||| -80.707
-98 ||| user to operating system most visible of are computer interface . ||| lm_0=-32.231 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-10.000 tm_pt_5=-23.367 tm_pt_6=-8.876 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-17.510 tm_pt_11=-0.000 tm_pt_12=-7.099 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=10.000 WordPenalty=-5.646 ||| -80.742
-98 ||| operating system to users most visible of are computer interface . ||| lm_0=-30.825 lm_1=-39.134 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-10.000 tm_pt_3=-0.000 tm_pt_4=-9.000 tm_pt_5=-23.295 tm_pt_6=-9.312 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.878 tm_pt_10=-14.536 tm_pt_11=-0.000 tm_pt_12=-7.541 tm_pt_13=-0.000 tm_pt_14=-11.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=8.000 WordPenalty=-5.646 ||| -80.754
-99 ||| বিস্তারিতঃ 1971 temporary government ||| lm_0=-16.410 lm_1=-16.306 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-2.717 tm_pt_6=-9.543 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.503 tm_pt_10=-4.447 tm_pt_11=-0.000 tm_pt_12=-2.398 tm_pt_13=-0.000 tm_pt_14=-3.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 OOVPenalty=-100.000 ||| -135.444
-99 ||| বিস্তারিতঃ 1971 temporary bangladesh government ||| lm_0=-17.358 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-3.000 tm_pt_3=-0.000 tm_pt_4=-3.000 tm_pt_5=-3.296 tm_pt_6=-5.286 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.368 tm_pt_10=-3.934 tm_pt_11=-0.000 tm_pt_12=-2.129 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.226
-99 ||| বিস্তারিতঃ , 1971 temporary government ||| lm_0=-16.939 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-7.272 tm_pt_6=-7.036 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.271 tm_pt_10=-2.590 tm_pt_11=-0.000 tm_pt_12=-1.992 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.400
-99 ||| বিস্তারিতঃ temporary government , 1971 ||| lm_0=-16.484 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-7.272 tm_pt_6=-7.036 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.185 tm_pt_10=-1.204 tm_pt_11=-0.000 tm_pt_12=-2.351 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.498
-99 ||| বিস্তারিতঃ in 1971 temporary government ||| lm_0=-16.946 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-2.000 tm_pt_5=-6.823 tm_pt_6=-7.645 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.271 tm_pt_10=-2.827 tm_pt_11=-0.000 tm_pt_12=-1.992 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=3.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.592
-99 ||| বিস্তারিতঃ temporary government in 1971 ||| lm_0=-16.404 lm_1=-19.567 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-2.000 tm_pt_3=-0.000 tm_pt_4=-1.000 tm_pt_5=-6.823 tm_pt_6=-7.645 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.185 tm_pt_10=-1.674 tm_pt_11=-0.000 tm_pt_12=-2.351 tm_pt_13=-0.000 tm_pt_14=-4.000 tm_pt_15=-0.000 tm_pt_16=-0.000 tm_glue_0=2.000 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.643
+0 ||| rabindranath was born in a পিরালী ব্রাহ্মণ in the family ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-30.409 tm_pt_6=-15.712 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.135 tm_pt_10=-14.979 tm_pt_11=0.000 tm_pt_12=-7.729 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-23.712 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -270.690
+0 ||| rabindranath born in kolkata a পিরালী ব্রাহ্মণ in the family ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-29.029 tm_pt_6=-16.002 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-10.090 tm_pt_11=0.000 tm_pt_12=-4.282 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.803 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -272.654
+0 ||| rabindranath born in the a পিরালী ব্রাহ্মণ in the family ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-32.737 tm_pt_6=-16.092 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-12.188 tm_pt_11=0.000 tm_pt_12=-3.876 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-26.980 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -272.776
+0 ||| rabindranath was born in one পিরালী ব্রাহ্মণ in the family ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-29.639 tm_pt_6=-16.710 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.135 tm_pt_10=-13.438 tm_pt_11=0.000 tm_pt_12=-8.350 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-25.676 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -273.133
+0 ||| rabindranath born in the one পিরালী ব্রাহ্মণ in the family ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-31.967 tm_pt_6=-17.090 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-10.648 tm_pt_11=0.000 tm_pt_12=-4.497 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.320 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -273.210
+0 ||| rabindranath born in kolkata one পিরালী ব্রাহ্মণ in the family ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-28.259 tm_pt_6=-16.999 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-8.550 tm_pt_11=0.000 tm_pt_12=-4.903 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-28.555 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -273.597
+0 ||| rabindranath was born in a পিরালী ব্রাহ্মণ পরিবারে . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.867 tm_pt_6=-7.153 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.135 tm_pt_10=-14.988 tm_pt_11=0.000 tm_pt_12=-7.732 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-23.574 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -360.646
+0 ||| rabindranath born in kolkata a পিরালী ব্রাহ্মণ পরিবারে . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-11.487 tm_pt_6=-7.442 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.000 tm_pt_10=-10.100 tm_pt_11=0.000 tm_pt_12=-4.285 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.665 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -362.610
+0 ||| rabindranath born in the a পিরালী ব্রাহ্মণ পরিবারে . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-15.195 tm_pt_6=-7.533 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.000 tm_pt_10=-12.198 tm_pt_11=0.000 tm_pt_12=-3.880 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-26.841 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -362.732
+1 ||| recently india with united relation improved . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-13.810 tm_pt_6=-15.759 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.104 tm_pt_10=-11.527 tm_pt_11=0.000 tm_pt_12=-5.174 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.585 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -58.929
+1 ||| recently with the united relation improved . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-17.062 tm_pt_6=-15.857 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.105 tm_pt_10=-15.528 tm_pt_11=0.000 tm_pt_12=-7.396 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-19.205 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -59.428
+1 ||| recently india with united matters improved . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-15.156 tm_pt_6=-17.146 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.104 tm_pt_10=-11.320 tm_pt_11=0.000 tm_pt_12=-5.174 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.481 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -59.477
+1 ||| recently india with united states relation improved . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-15.661 tm_pt_6=-15.849 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.742 tm_pt_10=-10.885 tm_pt_11=0.000 tm_pt_12=-4.412 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.491 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=0.000 ||| -59.602
+1 ||| recently the with united relation improved . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-17.062 tm_pt_6=-15.857 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.104 tm_pt_10=-14.462 tm_pt_11=0.000 tm_pt_12=-5.822 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.835 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -59.647
+1 ||| recently with the united states relation improved . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-18.913 tm_pt_6=-15.946 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.743 tm_pt_10=-14.886 tm_pt_11=0.000 tm_pt_12=-6.633 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-18.873 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=0.000 ||| -59.806
+2 ||| mathematics so science language . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-6.483 tm_pt_6=-3.387 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.002 tm_pt_10=-3.378 tm_pt_11=0.000 tm_pt_12=-1.626 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-15.141 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=0.000 ||| -34.682
+2 ||| mathematics is science language . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-10.375 tm_pt_6=-3.926 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.052 tm_pt_10=-8.326 tm_pt_11=0.000 tm_pt_12=-3.330 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-12.890 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=0.000 ||| -34.754
+2 ||| mathematics that science language . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-9.625 tm_pt_6=-3.926 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.052 tm_pt_10=-7.607 tm_pt_11=0.000 tm_pt_12=-3.330 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-14.001 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=0.000 ||| -35.798
+2 ||| mathematics so science language ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-7.060 tm_pt_6=-10.481 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.138 tm_pt_10=-7.888 tm_pt_11=0.000 tm_pt_12=-3.417 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-15.078 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -36.952
+2 ||| mathematics is science language ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-10.951 tm_pt_6=-11.020 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.188 tm_pt_10=-12.835 tm_pt_11=0.000 tm_pt_12=-5.122 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-12.827 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -37.024
+3 ||| from this it understood that this মেট্রিকটি will এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-14.595 tm_pt_6=-13.171 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.405 tm_pt_10=-9.498 tm_pt_11=0.000 tm_pt_12=-5.915 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-32.371 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -377.287
+3 ||| from this it will be understood that the মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-20.366 tm_pt_6=-14.416 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.389 tm_pt_10=-6.943 tm_pt_11=0.000 tm_pt_12=-4.457 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-28.474 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -377.643
+3 ||| from this it understood that this মেট্রিকটি be এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-15.733 tm_pt_6=-13.079 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.405 tm_pt_10=-10.513 tm_pt_11=0.000 tm_pt_12=-5.915 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-32.656 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.089
+3 ||| from this it understood that this will মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.595 tm_pt_6=-13.171 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.522 tm_pt_10=-7.740 tm_pt_11=0.000 tm_pt_12=-5.309 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-31.903 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.099
+3 ||| from this easily understood that this মেট্রিকটি will এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-10.693 tm_pt_6=-12.277 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.522 tm_pt_10=-6.659 tm_pt_11=0.000 tm_pt_12=-6.069 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-35.196 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.180
+3 ||| from this it will be understood that this মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-17.030 tm_pt_6=-13.124 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.386 tm_pt_10=-7.892 tm_pt_11=0.000 tm_pt_12=-3.799 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-28.974 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -378.348
+3 ||| from this it understood that the মেট্রিকটি will এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-17.930 tm_pt_6=-14.463 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.423 tm_pt_10=-9.145 tm_pt_11=0.000 tm_pt_12=-6.163 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-31.871 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.534
+3 ||| from this it will understood that the মেট্রিকটি এফআরডব্লিউ মেট্রিক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-17.930 tm_pt_6=-14.463 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.524 tm_pt_10=-6.432 tm_pt_11=0.000 tm_pt_12=-5.479 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-30.313 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-300.000 ||| -378.708
+4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের indication match from this novel . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-16.212 tm_pt_6=-10.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-12.338 tm_pt_11=0.000 tm_pt_12=-5.018 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-39.641 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.423
+4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের presage match from this novel . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-15.113 tm_pt_6=-10.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-12.338 tm_pt_11=0.000 tm_pt_12=-5.018 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-39.916 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.553
+4 ||| the with the earthcentered সামন্ততন্ত্রের পতনের indication match from this novel . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-20.640 tm_pt_6=-9.983 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.024 tm_pt_10=-18.544 tm_pt_11=0.000 tm_pt_12=-6.405 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.546 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.757
+4 ||| the with the earthcentered সামন্ততন্ত্রের পতনের presage match from this novel . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-19.541 tm_pt_6=-9.983 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.024 tm_pt_10=-18.544 tm_pt_11=0.000 tm_pt_12=-6.405 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.820 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -290.887
+4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের indication match from this novels . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-15.164 tm_pt_6=-9.637 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-11.182 tm_pt_11=0.000 tm_pt_12=-4.651 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.798 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.308
+4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের indication match this novel from . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-16.212 tm_pt_6=-10.084 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.003 tm_pt_10=-12.054 tm_pt_11=0.000 tm_pt_12=-5.342 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-41.827 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.392
+4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের presage match from this novels . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-14.066 tm_pt_6=-9.637 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.005 tm_pt_10=-11.182 tm_pt_11=0.000 tm_pt_12=-4.651 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-42.073 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.438
+4 ||| same with the earthcentered সামন্ততন্ত্রের পতনের presage match this novel from . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-15.113 tm_pt_6=-10.084 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.003 tm_pt_10=-12.054 tm_pt_11=0.000 tm_pt_12=-5.342 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-42.102 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.522
+4 ||| the with the earthcentered সামন্ততন্ত্রের পতনের indication match this novel from . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-20.640 tm_pt_6=-9.983 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.021 tm_pt_10=-18.260 tm_pt_11=0.000 tm_pt_12=-6.729 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-39.732 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.727
+4 ||| with the same earthcentered সামন্ততন্ত্রের পতনের indication match from this novel . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-16.212 tm_pt_6=-10.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.373 tm_pt_10=-14.188 tm_pt_11=0.000 tm_pt_12=-7.809 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-37.796 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -292.802
+5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-13.102 tm_pt_6=-8.482 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.289 tm_pt_10=-14.216 tm_pt_11=0.000 tm_pt_12=-2.256 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-25.124 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -262.275
+5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority in the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-16.310 tm_pt_6=-6.695 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.289 tm_pt_10=-10.344 tm_pt_11=0.000 tm_pt_12=-2.428 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-25.263 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -262.282
+5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority majority . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-9.787 tm_pt_6=-9.868 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.639 tm_pt_10=-10.413 tm_pt_11=0.000 tm_pt_12=-3.172 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-26.944 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -263.990
+5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority that . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.810 tm_pt_6=-9.357 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.406 tm_pt_10=-13.770 tm_pt_11=0.000 tm_pt_12=-2.767 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-26.235 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -264.054
+5 ||| নির্বচনে mujib and his party majority in নিরঙ্কুষ . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-13.102 tm_pt_6=-8.482 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.522 tm_pt_10=-12.917 tm_pt_11=0.000 tm_pt_12=-2.374 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-25.124 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -264.135
+5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority that the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-16.018 tm_pt_6=-7.571 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.271 tm_pt_10=-5.619 tm_pt_11=0.000 tm_pt_12=-1.161 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.702 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -264.484
+5 ||| নির্বচনে mujib and his party নিরঙ্কুষ majority where the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-15.032 tm_pt_6=-9.180 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.406 tm_pt_10=-2.153 tm_pt_11=0.000 tm_pt_12=-0.468 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-28.188 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-200.000 ||| -264.558
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with to that . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-22.509 tm_pt_6=-11.163 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.370 tm_pt_10=-18.845 tm_pt_11=0.000 tm_pt_12=-2.681 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-33.425 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -476.744
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with can that . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-19.242 tm_pt_6=-9.832 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.503 tm_pt_10=-17.011 tm_pt_11=0.000 tm_pt_12=-3.528 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-35.292 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -477.908
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with that can . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-19.242 tm_pt_6=-9.832 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-17.376 tm_pt_11=0.000 tm_pt_12=-3.305 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-33.973 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -477.964
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his works with to that . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-22.839 tm_pt_6=-12.090 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.371 tm_pt_10=-19.317 tm_pt_11=0.000 tm_pt_12=-4.578 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-33.694 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -478.251
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his work with a that . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-24.229 tm_pt_6=-13.109 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.418 tm_pt_10=-18.986 tm_pt_11=0.000 tm_pt_12=-2.612 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-33.108 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -478.362
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his works with can that . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-19.572 tm_pt_6=-10.759 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.504 tm_pt_10=-17.483 tm_pt_11=0.000 tm_pt_12=-5.425 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-35.562 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -479.414
+6 ||| হিটলারও বাভারিয়ার মিউনিখ শহরেই his works with that can . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-19.572 tm_pt_6=-10.759 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-17.847 tm_pt_11=0.000 tm_pt_12=-5.202 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-34.242 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-400.000 ||| -479.471
+7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character is but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-36.805 tm_pt_6=-15.372 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-24.380 tm_pt_11=0.000 tm_pt_12=-9.030 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-47.390 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -317.576
+7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character is but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-34.821 tm_pt_6=-14.079 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-24.460 tm_pt_11=0.000 tm_pt_12=-9.204 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-47.416 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -317.835
+7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character are but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-35.727 tm_pt_6=-15.118 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-23.123 tm_pt_11=0.000 tm_pt_12=-8.647 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-48.326 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -317.979
+7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character are but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-33.743 tm_pt_6=-13.825 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-23.203 tm_pt_11=0.000 tm_pt_12=-8.821 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-48.352 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.238
+7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character there but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-34.439 tm_pt_6=-14.939 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-22.476 tm_pt_11=0.000 tm_pt_12=-9.541 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-48.680 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.258
+7 ||| task , ওএস-ট্যান and more some linux প্রতিনিধিত্বকারী character is but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-34.821 tm_pt_6=-14.079 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.871 tm_pt_10=-23.599 tm_pt_11=0.000 tm_pt_12=-8.183 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-49.202 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.493
+7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character there but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-32.456 tm_pt_6=-13.646 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-22.557 tm_pt_11=0.000 tm_pt_12=-9.715 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-48.706 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.517
+7 ||| task without ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character is but these very is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-31.413 tm_pt_6=-16.431 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.874 tm_pt_10=-17.244 tm_pt_11=0.000 tm_pt_12=-9.204 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-49.402 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.572
+7 ||| task , ওএস-ট্যান and also some linux প্রতিনিধিত্বকারী character is but these a is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-40.521 tm_pt_6=-16.440 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.889 tm_pt_10=-28.483 tm_pt_11=0.000 tm_pt_12=-9.906 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-46.205 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.574
+7 ||| task , ওএস-ট্যান and some more linux প্রতিনিধিত্বকারী character is but these a is not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-38.538 tm_pt_6=-15.147 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.892 tm_pt_10=-28.563 tm_pt_11=0.000 tm_pt_12=-10.079 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-46.230 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -318.833
+8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of services . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-45.930 tm_pt_6=-30.412 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.432 tm_pt_10=-33.034 tm_pt_11=0.000 tm_pt_12=-8.758 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-45.531 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -326.554
+8 ||| this social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of services . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-47.524 tm_pt_6=-31.359 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.432 tm_pt_10=-34.967 tm_pt_11=0.000 tm_pt_12=-9.390 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-44.770 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -326.967
+8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rules of services . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-45.332 tm_pt_6=-31.000 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.561 tm_pt_10=-32.666 tm_pt_11=0.000 tm_pt_12=-9.451 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-46.018 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -327.498
+8 ||| this social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rules of services . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-46.925 tm_pt_6=-31.947 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.561 tm_pt_10=-34.599 tm_pt_11=0.000 tm_pt_12=-10.083 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-45.257 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -327.911
+8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of the fingers . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-48.497 tm_pt_6=-24.838 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.665 tm_pt_10=-34.032 tm_pt_11=0.000 tm_pt_12=-10.582 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-47.090 lm_1=-58.169 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -328.644
+8 ||| it social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making is rule of services . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-44.305 tm_pt_6=-30.138 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.414 tm_pt_10=-31.140 tm_pt_11=0.000 tm_pt_12=-8.170 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-48.384 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-200.000 ||| -328.967
+8 ||| this social situation in সিদ্ধান্তগ্রহনকারী his oppositions with ক্রীড়াক্ষেত্রে decision making the rule of the fingers . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-50.091 tm_pt_6=-25.785 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.665 tm_pt_10=-35.965 tm_pt_11=0.000 tm_pt_12=-11.214 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-46.329 lm_1=-58.169 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -329.056
+9 ||| বৃষ্টিপাতঃ annual ২৫৪০ মিলি meters ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-3.372 tm_pt_6=-3.054 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-2.140 tm_pt_11=0.000 tm_pt_12=-1.263 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.340 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.288
+9 ||| বৃষ্টিপাতঃ annual ২৫৪০ মিলি meter ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-3.885 tm_pt_6=-2.821 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-2.140 tm_pt_11=0.000 tm_pt_12=-1.337 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.316 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.303
+9 ||| বৃষ্টিপাতঃ arrange ২৫৪০ মিলি meters ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-2.916 tm_pt_6=-3.748 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.956 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.532 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.884
+9 ||| বৃষ্টিপাতঃ arrange ২৫৪০ মিলি meter ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-3.430 tm_pt_6=-3.514 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-2.030 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.509 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -345.900
+9 ||| বৃষ্টিপাতঃ annual ২৫৪০ মিলি metres ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-3.608 tm_pt_6=-4.389 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.503 tm_pt_10=-2.140 tm_pt_11=0.000 tm_pt_12=-2.803 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.771 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -346.979
+9 ||| বৃষ্টিপাতঃ arrange ২৫৪০ মিলি metres ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-3.153 tm_pt_6=-5.083 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.135 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-3.497 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.963 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-300.000 ||| -347.576
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national was he main speech -lrb- keynote speech -rrb- on the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-43.133 tm_pt_6=-17.136 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.738 tm_pt_10=-35.371 tm_pt_11=0.000 tm_pt_12=-8.127 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-64.735 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.407
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national was he main speech -lrb- keynote speech -rrb- to the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-44.212 tm_pt_6=-17.290 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.707 tm_pt_10=-36.372 tm_pt_11=0.000 tm_pt_12=-7.904 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-64.457 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.478
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was the speech -lrb- keynote speech -rrb- on the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-47.579 tm_pt_6=-17.884 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.738 tm_pt_10=-42.661 tm_pt_11=0.000 tm_pt_12=-12.264 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-60.509 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.604
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was the speech -lrb- keynote speech -rrb- to the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-48.658 tm_pt_6=-18.038 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.707 tm_pt_10=-43.661 tm_pt_11=0.000 tm_pt_12=-12.041 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-60.231 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.674
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national was he main speech -lrb- keynote speech -rrb- , the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-45.343 tm_pt_6=-16.831 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.056 tm_pt_10=-39.115 tm_pt_11=0.000 tm_pt_12=-8.820 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-63.604 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -754.804
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was the speech -lrb- keynote speech -rrb- , the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-49.789 tm_pt_6=-17.578 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.056 tm_pt_10=-46.405 tm_pt_11=0.000 tm_pt_12=-12.957 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-59.378 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -755.000
+10 ||| in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in ডেমোক্র্যাট in the national he was main speech -lrb- keynote speech -rrb- , the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-45.343 tm_pt_6=-16.831 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.056 tm_pt_10=-41.797 tm_pt_11=0.000 tm_pt_12=-10.972 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-62.129 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-600.000 ||| -755.400
+11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the regarded as a province west pakistan . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-20.000 tm_pt_5=-75.038 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.948 tm_pt_10=-58.394 tm_pt_11=0.000 tm_pt_12=-13.706 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=20.000 lm_0=-69.815 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -313.312
+11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব they started the where the regarded as a province west pakistan . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-21.000 tm_pt_5=-73.645 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.630 tm_pt_10=-57.313 tm_pt_11=0.000 tm_pt_12=-13.824 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=21.000 lm_0=-71.365 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -313.498
+11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan regarded as a province . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-21.000 tm_pt_5=-75.038 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.898 tm_pt_10=-56.926 tm_pt_11=0.000 tm_pt_12=-12.847 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=21.000 lm_0=-71.643 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -313.884
+11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব they started the where the west pakistan regarded as a province . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-22.000 tm_pt_5=-73.645 tm_pt_6=-45.484 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-3.580 tm_pt_10=-55.844 tm_pt_11=0.000 tm_pt_12=-12.965 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=22.000 lm_0=-73.192 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.070
+11 ||| population on power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan regarded as a province . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-21.000 tm_pt_5=-71.056 tm_pt_6=-44.018 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.530 tm_pt_10=-54.568 tm_pt_11=0.000 tm_pt_12=-13.540 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=21.000 lm_0=-72.719 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.104
+11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan considered as a province . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-21.000 tm_pt_5=-75.147 tm_pt_6=-43.926 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.530 tm_pt_10=-59.228 tm_pt_11=0.000 tm_pt_12=-13.540 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=21.000 lm_0=-71.170 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.148
+11 ||| population , power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব they started the where the west pakistan considered as a province . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-22.000 tm_pt_5=-73.755 tm_pt_6=-43.926 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.212 tm_pt_10=-58.147 tm_pt_11=0.000 tm_pt_12=-13.658 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=22.000 lm_0=-72.719 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.334
+11 ||| population on power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the west pakistan considered as a province . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-21.000 tm_pt_5=-71.166 tm_pt_6=-42.459 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-5.163 tm_pt_10=-56.871 tm_pt_11=0.000 tm_pt_12=-14.233 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=21.000 lm_0=-72.246 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.368
+11 ||| population on power distribution east pakistan where to west pakistan " one unit theory " is a অভিনব started with the where the regarded as a province west pakistan . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-25.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-71.056 tm_pt_6=-44.018 tm_pt_7=-25.000 tm_pt_8=-67.950 tm_pt_9=-4.580 tm_pt_10=-56.039 tm_pt_11=0.000 tm_pt_12=-14.401 tm_pt_13=0.000 tm_pt_14=-29.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=19.000 lm_0=-70.891 lm_1=-101.095 WordPenalty=-13.897 OOVPenalty=-100.000 ||| -314.498
+12 ||| the পরিমাপন theory ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-9.309 tm_pt_6=-3.988 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-7.916 tm_pt_11=0.000 tm_pt_12=-1.316 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-11.112 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -126.556
+12 ||| mathematical পরিমাপন theory ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-2.869 tm_pt_6=-2.890 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=-4.888 tm_pt_11=0.000 tm_pt_12=-2.010 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.665 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -126.848
+12 ||| • পরিমাপন theory ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.046 tm_pt_6=-5.241 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-0.422 tm_pt_11=0.000 tm_pt_12=-1.316 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-14.217 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.301
+12 ||| . পরিমাপন theory ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-7.900 tm_pt_6=-2.990 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-3.641 tm_pt_11=0.000 tm_pt_12=-1.712 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-12.758 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.691
+13 ||| external links of ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-7.390 tm_pt_6=-2.729 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=0.000 tm_pt_12=-1.611 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-6.986 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -20.459
+13 ||| out-links of ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-6.938 tm_pt_6=-4.795 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=0.000 tm_pt_12=-3.297 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-8.078 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -20.925
+13 ||| external link of ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-8.091 tm_pt_6=-2.871 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=0.000 tm_pt_12=-2.767 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-7.533 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -21.728
+13 ||| external communication of ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-8.265 tm_pt_6=-2.886 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.018 tm_pt_10=-5.285 tm_pt_11=0.000 tm_pt_12=-2.555 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-7.692 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -21.888
+13 ||| description of ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-10.521 tm_pt_6=-7.098 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.018 tm_pt_10=-5.978 tm_pt_11=0.000 tm_pt_12=-5.600 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-6.281 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -21.989
+13 ||| out-links by ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-5.866 tm_pt_6=-5.948 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-4.816 tm_pt_11=0.000 tm_pt_12=-4.214 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-8.495 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -22.119
+13 ||| inter-connectivity of ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-6.938 tm_pt_6=-6.405 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.386 tm_pt_10=-5.285 tm_pt_11=0.000 tm_pt_12=-4.907 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-8.447 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -22.803
+14 ||| tata communicationer " foreign sanchar nigam limited building it in telecommunication system a main providers ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-31.182 tm_pt_6=-18.848 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-4.420 tm_pt_10=-20.028 tm_pt_11=0.000 tm_pt_12=-11.506 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-47.618 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=0.000 ||| -119.822
+14 ||| tata communicationer " foreign sanchar nigam limited building it the telecommunication system a main providers ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-30.294 tm_pt_6=-17.028 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-4.370 tm_pt_10=-19.775 tm_pt_11=0.000 tm_pt_12=-10.184 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-48.866 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=0.000 ||| -120.000
+14 ||| tata communication " foreign sanchar nigam limited building it the telecommunication system a main providers ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-34.848 tm_pt_6=-17.028 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-4.370 tm_pt_10=-22.911 tm_pt_11=0.000 tm_pt_12=-10.184 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-47.612 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=0.000 ||| -120.133
+14 ||| tata communicationer " foreign sanchar nigam limited building it city telecommunication system a main providers ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-27.204 tm_pt_6=-17.941 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-5.370 tm_pt_10=-15.421 tm_pt_11=0.000 tm_pt_12=-8.892 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-50.859 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=0.000 ||| -121.229
+15 ||| he that year ৪ই নভেম্বরের national assembly in election won all and united states elected as 44th president . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-35.845 tm_pt_6=-16.465 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.793 tm_pt_10=-11.300 tm_pt_11=0.000 tm_pt_12=-4.120 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-52.538 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -329.758
+15 ||| he that year ৪ই নভেম্বরের national assembly in the won all and united states elected as 44th president . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-42.525 tm_pt_6=-17.628 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.799 tm_pt_10=-17.156 tm_pt_11=0.000 tm_pt_12=-4.967 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-49.848 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -329.930
+15 ||| he that year ৪ই নভেম্বরের assembly in national election won all and united states elected as 44th president . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-35.845 tm_pt_6=-16.465 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.793 tm_pt_10=-11.097 tm_pt_11=0.000 tm_pt_12=-3.904 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-52.453 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.488
+15 ||| he that year ৪ই নভেম্বরের assembly the national election won all and united states elected as 44th president . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-37.637 tm_pt_6=-17.325 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.804 tm_pt_10=-12.349 tm_pt_11=0.000 tm_pt_12=-4.087 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-51.665 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.550
+15 ||| in the year ৪ই নভেম্বরের national assembly in election won all and united states elected as 44th president . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-42.978 tm_pt_6=-19.674 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-1.425 tm_pt_10=-21.531 tm_pt_11=0.000 tm_pt_12=-8.327 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-47.957 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.583
+15 ||| in that year ৪ই নভেম্বরের national assembly in election won all and united states elected as 44th president . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-40.948 tm_pt_6=-19.745 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.793 tm_pt_10=-15.498 tm_pt_11=0.000 tm_pt_12=-7.753 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-49.614 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.613
+15 ||| he that year ৪ই নভেম্বরের national assembly 44th president and united states was elected as the won all . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-41.906 tm_pt_6=-18.024 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-3.424 tm_pt_10=-18.889 tm_pt_11=0.000 tm_pt_12=-4.845 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-47.289 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-200.000 ||| -330.639
+16 ||| many indian প্রজাতি fighting জাত টেক্সা from upper stage ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.556 tm_pt_6=-12.125 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-2.670 tm_pt_11=0.000 tm_pt_12=-0.912 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-35.728 lm_1=-32.080 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -371.817
+16 ||| many indian প্রজাতি fighting জাত from টেক্সা upper stage ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-14.556 tm_pt_6=-12.125 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-3.002 tm_pt_11=0.000 tm_pt_12=-1.537 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-35.728 lm_1=-32.080 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.089
+16 ||| many indian প্রজাতি fighting জাত টেক্সা to upper stage ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-16.518 tm_pt_6=-13.004 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-4.474 tm_pt_11=0.000 tm_pt_12=-2.241 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-35.464 lm_1=-32.080 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.105
+16 ||| many indian প্রজাতি fighting জাত টেক্সা upper stage from ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-14.556 tm_pt_6=-12.125 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-2.830 tm_pt_11=0.000 tm_pt_12=-1.650 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-36.278 lm_1=-32.080 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.766
+16 ||| many the প্রজাতি fighting জাত টেক্সা from upper stage ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-18.962 tm_pt_6=-12.755 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.000 tm_pt_10=-7.024 tm_pt_11=0.000 tm_pt_12=-2.862 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-35.098 lm_1=-32.080 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -373.915
+16 ||| of indian প্রজাতি fighting জাত টেক্সা from upper stage ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-19.990 tm_pt_6=-14.047 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.050 tm_pt_10=-6.670 tm_pt_11=0.000 tm_pt_12=-3.477 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-34.630 lm_1=-32.080 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -374.141
+17 ||| britain writers written drama novels and stories recently scripts in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-20.642 tm_pt_6=-10.927 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-21.259 tm_pt_11=0.000 tm_pt_12=-8.774 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-36.970 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -193.128
+17 ||| britain writers written drama novels stories and recently scripts in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-20.642 tm_pt_6=-10.927 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-16.732 tm_pt_11=0.000 tm_pt_12=-5.024 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.717 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -193.334
+17 ||| britain writers written drama novel stories and recently scripts in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-21.690 tm_pt_6=-11.374 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-17.888 tm_pt_11=0.000 tm_pt_12=-5.391 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.536 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -193.894
+17 ||| britain writers written drama novels and stories recently script in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-21.418 tm_pt_6=-10.442 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.824 tm_pt_10=-21.547 tm_pt_11=0.000 tm_pt_12=-10.160 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-36.674 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.089
+17 ||| britain writers written drama novels story and recently scripts in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-21.556 tm_pt_6=-11.746 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.874 tm_pt_10=-17.463 tm_pt_11=0.000 tm_pt_12=-5.755 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.669 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.178
+17 ||| britain writers written drama novels stories and recently script in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-21.418 tm_pt_6=-10.442 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.824 tm_pt_10=-17.019 tm_pt_11=0.000 tm_pt_12=-6.410 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.421 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.295
+17 ||| britain writers the drama novels and stories recently scripts in আদৃত . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-25.509 tm_pt_6=-11.095 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-0.959 tm_pt_10=-25.559 tm_pt_11=0.000 tm_pt_12=-9.061 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-36.194 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-100.000 ||| -194.444
+18 ||| on may 1919 , it saogat magazine published . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-15.806 tm_pt_6=-13.716 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.152 tm_pt_10=-8.843 tm_pt_11=0.000 tm_pt_12=-5.765 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-23.514 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -65.459
+18 ||| 1919 on may , it saogat magazine published . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-15.806 tm_pt_6=-13.716 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.151 tm_pt_10=-4.478 tm_pt_11=0.000 tm_pt_12=-4.843 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-25.572 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -66.178
+18 ||| 1919 in may , it saogat magazine published . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-17.884 tm_pt_6=-14.337 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.519 tm_pt_10=-3.784 tm_pt_11=0.000 tm_pt_12=-4.150 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-25.975 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -66.342
+18 ||| on may 1919 in it saogat magazine published . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-14.193 tm_pt_6=-13.162 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.201 tm_pt_10=-7.947 tm_pt_11=0.000 tm_pt_12=-6.576 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.704 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -66.529
+18 ||| 1919 on may month it saogat magazine published . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-9.934 tm_pt_6=-13.516 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.287 tm_pt_10=-4.582 tm_pt_11=0.000 tm_pt_12=-6.775 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.533 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -66.639
+18 ||| on may 1919 , this saogat magazine published . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-17.388 tm_pt_6=-14.651 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.152 tm_pt_10=-11.508 tm_pt_11=0.000 tm_pt_12=-7.130 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-23.185 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -66.848
+19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium was arranged . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-21.244 tm_pt_6=-8.707 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-15.730 tm_pt_11=0.000 tm_pt_12=-5.148 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-57.604 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -520.892
+19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium is the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-29.073 tm_pt_6=-9.197 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-22.691 tm_pt_11=0.000 tm_pt_12=-5.552 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-55.740 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -521.235
+19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium was organized . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-22.039 tm_pt_6=-8.841 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-16.240 tm_pt_11=0.000 tm_pt_12=-5.148 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-57.677 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -521.313
+19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium arranged in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-23.377 tm_pt_6=-9.826 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-15.383 tm_pt_11=0.000 tm_pt_12=-4.247 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-58.454 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -522.327
+19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium organized in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-24.172 tm_pt_6=-9.959 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-16.993 tm_pt_11=0.000 tm_pt_12=-6.193 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-58.336 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -522.519
+19 ||| 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium is was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-26.317 tm_pt_6=-8.389 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.249 tm_pt_10=-20.791 tm_pt_11=0.000 tm_pt_12=-5.062 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-57.992 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -522.552
+20 ||| to prevent this several measures are taken . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-22.680 tm_pt_6=-30.812 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.386 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-11.632 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=0.000 ||| -48.405
+20 ||| to prevent this several measures are . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-20.219 tm_pt_6=-29.189 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.000 tm_pt_10=-6.851 tm_pt_11=0.000 tm_pt_12=-1.946 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-12.686 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -50.265
+20 ||| to prevent this several measures are the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-24.227 tm_pt_6=-27.251 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.000 tm_pt_10=-3.426 tm_pt_11=0.000 tm_pt_12=-2.285 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-14.066 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=0.000 ||| -52.084
+20 ||| to prevent this several measures are in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-23.388 tm_pt_6=-27.344 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.000 tm_pt_10=-2.771 tm_pt_11=0.000 tm_pt_12=-2.699 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-14.649 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=0.000 ||| -52.653
+20 ||| to avoid this possibility several measures are taken . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-24.597 tm_pt_6=-31.733 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.386 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-13.461 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -53.452
+20 ||| to prevent this several measures are to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-23.286 tm_pt_6=-27.775 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.000 tm_pt_10=-3.872 tm_pt_11=0.000 tm_pt_12=-3.920 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-15.009 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=0.000 ||| -53.944
+20 ||| to prevent this several measures are to ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-23.744 tm_pt_6=-33.519 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-2.000 tm_pt_10=-6.999 tm_pt_11=0.000 tm_pt_12=-4.736 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-14.913 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -55.503
+21 ||| ১৯৬৬ on 5 february লাহোরে of দলসমূহের a national was held . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-21.481 tm_pt_6=-14.645 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.436 tm_pt_10=-15.183 tm_pt_11=0.000 tm_pt_12=-3.672 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-41.154 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.381
+21 ||| ১৯৬৬ on 5 february লাহোরে দলসমূহের against a national was held . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-16.732 tm_pt_6=-14.335 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.553 tm_pt_10=-12.167 tm_pt_11=0.000 tm_pt_12=-3.960 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-41.879 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.643
+21 ||| ১৯৬৬ on 5 february লাহোরে against দলসমূহের a national was held . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-16.732 tm_pt_6=-14.335 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.553 tm_pt_10=-12.989 tm_pt_11=0.000 tm_pt_12=-4.183 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-42.439 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.664
+21 ||| ১৯৬৬ on 5 february লাহোরে opposition দলসমূহের a national was held . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-15.278 tm_pt_6=-14.740 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.553 tm_pt_10=-10.186 tm_pt_11=0.000 tm_pt_12=-4.183 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-43.323 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -391.891
+21 ||| ১৯৬৬ on 5 february লাহোরে of দলসমূহের a national was held in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-24.651 tm_pt_6=-11.840 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.018 tm_pt_10=-8.289 tm_pt_11=0.000 tm_pt_12=-3.518 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-42.147 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -392.804
+21 ||| ১৯৬৬ on 5 february লাহোরে দলসমূহের against a national was held in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-19.901 tm_pt_6=-11.529 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.135 tm_pt_10=-5.272 tm_pt_11=0.000 tm_pt_12=-3.806 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-42.873 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -393.067
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-47.305 tm_pt_6=-11.674 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.050 tm_pt_10=-27.018 tm_pt_11=0.000 tm_pt_12=-1.913 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-44.173 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -412.927
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in took . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-43.395 tm_pt_6=-11.480 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.368 tm_pt_10=-23.851 tm_pt_11=0.000 tm_pt_12=-2.607 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-45.354 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -413.259
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in adopted . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-42.382 tm_pt_6=-12.116 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.000 tm_pt_10=-21.979 tm_pt_11=0.000 tm_pt_12=-3.300 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-45.321 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -413.521
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in in the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-50.512 tm_pt_6=-9.888 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.050 tm_pt_10=-23.145 tm_pt_11=0.000 tm_pt_12=-2.086 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-45.104 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-300.000 ||| -413.915
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank took secured its place in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-43.395 tm_pt_6=-11.480 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.368 tm_pt_10=-23.728 tm_pt_11=0.000 tm_pt_12=-3.037 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.354 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -414.344
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in had . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-45.320 tm_pt_6=-12.521 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.000 tm_pt_10=-25.115 tm_pt_11=0.000 tm_pt_12=-3.300 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-44.889 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -414.500
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank secured its place in took the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-46.603 tm_pt_6=-9.693 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.368 tm_pt_10=-19.978 tm_pt_11=0.000 tm_pt_12=-2.779 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-46.563 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-300.000 ||| -414.591
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and islamic ডেভেলপমেণ্ট bank took secured its place in the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-46.603 tm_pt_6=-9.693 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.368 tm_pt_10=-19.856 tm_pt_11=0.000 tm_pt_12=-3.210 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-45.737 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-300.000 ||| -414.654
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and had ডেভেলপমেণ্ট bank secured its place in in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-52.098 tm_pt_6=-14.618 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.050 tm_pt_10=-29.460 tm_pt_11=0.000 tm_pt_12=-4.216 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-43.907 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -416.790
+22 ||| bangladesh অর্গানাইজেশন of the islamic কনফারেন্স and had ডেভেলপমেণ্ট bank secured its place in took . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-48.188 tm_pt_6=-14.424 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.368 tm_pt_10=-26.293 tm_pt_11=0.000 tm_pt_12=-4.909 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-45.088 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-300.000 ||| -417.121
+23 ||| subject : encyclopedia ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.657 tm_pt_6=-1.542 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.420 tm_pt_11=0.000 tm_pt_12=-1.500 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-5.528 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -15.836
+23 ||| category : encyclopedia ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.425 tm_pt_6=-2.012 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=-0.020 tm_pt_11=0.000 tm_pt_12=-1.817 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-5.707 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -16.293
+23 ||| subject-class : encyclopedia ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.379 tm_pt_6=-3.561 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-2.703 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-5.989 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -17.472
+23 ||| topics : encyclopedia ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.537 tm_pt_6=-3.874 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-2.991 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-6.220 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=0.000 ||| -17.996
+24 ||| russia france and israel the main অস্ত্রসরবরাহকারী country and defense sub country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-18.530 tm_pt_6=-8.356 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.106 tm_pt_10=-10.471 tm_pt_11=0.000 tm_pt_12=-5.841 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-38.844 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -189.536
+24 ||| russia france and israel the main অস্ত্রসরবরাহকারী state and defense sub country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-18.467 tm_pt_6=-9.206 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.474 tm_pt_10=-10.535 tm_pt_11=0.000 tm_pt_12=-5.933 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-38.802 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -189.924
+24 ||| russia france and israel the main অস্ত্রসরবরাহকারী country and defence sub country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-18.942 tm_pt_6=-9.742 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.738 tm_pt_10=-10.065 tm_pt_11=0.000 tm_pt_12=-6.534 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-38.535 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -190.366
+24 ||| russia france and israel the main অস্ত্রসরবরাহকারী countries and defense sub country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-19.426 tm_pt_6=-10.343 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.474 tm_pt_10=-10.130 tm_pt_11=0.000 tm_pt_12=-5.933 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-38.815 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-100.000 ||| -190.407
+25 ||| this is the known imaginary mathematics formed with which are real number set from সেটে par with the complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-51.313 tm_pt_6=-21.937 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.889 tm_pt_10=-21.875 tm_pt_11=0.000 tm_pt_12=-7.000 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-48.835 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -241.719
+25 ||| this is our known imaginary mathematics formed with which are real number set from সেটে par with the complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-44.711 tm_pt_6=-21.174 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.889 tm_pt_10=-22.633 tm_pt_11=0.000 tm_pt_12=-7.690 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-50.396 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -241.938
+25 ||| this is the known imaginary mathematics formed with which are real number set from সেটে par with complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-47.304 tm_pt_6=-23.874 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.890 tm_pt_10=-30.946 tm_pt_11=0.000 tm_pt_12=-9.515 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-48.176 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-100.000 ||| -242.826
+25 ||| this is the known imaginary mathematics formed with which are real numbers set from সেটে par with the complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-51.078 tm_pt_6=-22.637 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.889 tm_pt_10=-21.594 tm_pt_11=0.000 tm_pt_12=-7.000 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-49.663 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -242.865
+25 ||| this is the known imaginary mathematics formed with which are real number set to সেটে par with the complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-53.275 tm_pt_6=-22.816 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.889 tm_pt_10=-23.680 tm_pt_11=0.000 tm_pt_12=-8.329 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-48.571 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -243.007
+25 ||| this is our known imaginary mathematics formed with which are real number set from সেটে par with complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-40.703 tm_pt_6=-23.112 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-2.890 tm_pt_10=-31.704 tm_pt_11=0.000 tm_pt_12=-10.205 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-49.737 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-100.000 ||| -243.045
+25 ||| this is our known imaginary mathematics formed with which are real numbers set from সেটে par with the complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-44.476 tm_pt_6=-21.875 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.889 tm_pt_10=-22.352 tm_pt_11=0.000 tm_pt_12=-7.690 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-51.225 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -243.084
+25 ||| this is the known imaginary mathematics formed with which are from real number set সেটে par with the complex number . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-51.313 tm_pt_6=-21.937 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-5.889 tm_pt_10=-20.802 tm_pt_11=0.000 tm_pt_12=-6.932 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-48.916 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -243.110
+26 ||| <address> ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.494 tm_pt_6=-38.184 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.050 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-4.240 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=0.000 ||| -21.743
+26 ||| < ঠিকানা > ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-2.518 tm_pt_6=-29.231 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.118 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-15.853 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -136.961
+26 ||| the lt ঠিকানা > ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-6.362 tm_pt_6=-20.589 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.135 tm_pt_10=-2.453 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-17.709 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=-200.000 ||| -241.704
+26 ||| < ঠিকানা , gt , ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-4.258 tm_pt_6=-15.720 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-5.328 tm_pt_11=0.000 tm_pt_12=-1.262 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.678 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -244.166
+26 ||| , lt , ঠিকানা > ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-4.245 tm_pt_6=-15.998 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-5.446 tm_pt_11=0.000 tm_pt_12=-1.262 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.961 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -244.640
+26 ||| < ঠিকানা , gt ; ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-3.201 tm_pt_6=-18.449 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-1.596 tm_pt_11=0.000 tm_pt_12=-1.248 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.561 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -245.017
+27 ||| september ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-0.176 tm_pt_6=-0.047 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.000 tm_pt_10=-0.013 tm_pt_11=0.000 tm_pt_12=-0.025 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-3.024 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=0.000 ||| -6.923
+27 ||| september . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-9.282 tm_pt_6=-0.716 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=-1.099 tm_pt_11=0.000 tm_pt_12=-3.689 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-4.832 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -15.112
+27 ||| সেপ্টেম্বর ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-7.355 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
+28 ||| from this theory though big বিস্ফোরণোর against can not be but it can be support . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-27.960 tm_pt_6=-23.108 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.692 tm_pt_10=-15.673 tm_pt_11=0.000 tm_pt_12=-5.046 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-35.950 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -202.585
+28 ||| from this theory though big বিস্ফোরণোর against can not be rather it can be support . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-25.087 tm_pt_6=-23.283 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.692 tm_pt_10=-13.275 tm_pt_11=0.000 tm_pt_12=-5.046 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-37.068 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -202.856
+28 ||| from this theory though big বিস্ফোরণোর against can not be but it can be supported . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-27.543 tm_pt_6=-22.616 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.691 tm_pt_10=-16.797 tm_pt_11=0.000 tm_pt_12=-7.126 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-34.996 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.004
+28 ||| from this theory though big বিস্ফোরণোর against can not be rather it can be supported . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-24.670 tm_pt_6=-22.790 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.691 tm_pt_10=-14.399 tm_pt_11=0.000 tm_pt_12=-7.126 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-36.114 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.274
+28 ||| from this theory though the বিস্ফোরণোর against can not be but it can be support . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-32.548 tm_pt_6=-23.199 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.692 tm_pt_10=-19.989 tm_pt_11=0.000 tm_pt_12=-5.126 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-35.207 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.722
+28 ||| this theory from though big বিস্ফোরণোর against can not be but it can be support . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-27.960 tm_pt_6=-23.108 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.572 tm_pt_10=-14.343 tm_pt_11=0.000 tm_pt_12=-4.354 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-37.468 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-100.000 ||| -203.775
+29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-41.676 tm_pt_6=-19.680 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.745 tm_pt_10=-19.919 tm_pt_11=0.000 tm_pt_12=-7.203 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-65.262 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -359.581
+29 ||| agricultural in production france country ; it is the most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-47.097 tm_pt_6=-19.754 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.612 tm_pt_10=-18.967 tm_pt_11=0.000 tm_pt_12=-6.355 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-63.377 lm_1=-81.528 WordPenalty=-11.292 OOVPenalty=-200.000 ||| -359.757
+29 ||| agriculture in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-41.940 tm_pt_6=-20.240 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.746 tm_pt_10=-19.868 tm_pt_11=0.000 tm_pt_12=-8.068 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-65.754 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -360.724
+29 ||| agricultural in production france is most important country ; it mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-17.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-41.676 tm_pt_6=-19.680 tm_pt_7=-17.000 tm_pt_8=-46.206 tm_pt_9=-1.659 tm_pt_10=-26.434 tm_pt_11=0.000 tm_pt_12=-9.771 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-66.157 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -360.813
+29 ||| agricultural in production france is most important country , it basically খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-17.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-42.462 tm_pt_6=-17.947 tm_pt_7=-17.000 tm_pt_8=-46.206 tm_pt_9=-2.610 tm_pt_10=-29.579 tm_pt_11=0.000 tm_pt_12=-9.660 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-65.377 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -360.960
+29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and whole world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-37.477 tm_pt_6=-20.252 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.745 tm_pt_10=-15.113 tm_pt_11=0.000 tm_pt_12=-7.539 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-67.863 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -361.064
+29 ||| agricultural in production france country ; it is the most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and whole world export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-42.898 tm_pt_6=-20.326 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.612 tm_pt_10=-14.161 tm_pt_11=0.000 tm_pt_12=-6.692 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-65.977 lm_1=-81.528 WordPenalty=-11.292 OOVPenalty=-200.000 ||| -361.239
+29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the the export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-45.631 tm_pt_6=-20.302 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.659 tm_pt_10=-22.659 tm_pt_11=0.000 tm_pt_12=-6.429 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-65.079 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -361.654
+29 ||| agricultural in production france country ; it is most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the in export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-44.352 tm_pt_6=-19.955 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.977 tm_pt_10=-22.858 tm_pt_11=0.000 tm_pt_12=-7.608 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-65.585 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -361.683
+29 ||| agricultural in production france country ; it is the most important mainly খাদ্যশস্য wine cheese and other কৃষিদ্রব্য europe and the the export . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-51.052 tm_pt_6=-20.376 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.526 tm_pt_10=-21.707 tm_pt_11=0.000 tm_pt_12=-5.582 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-63.193 lm_1=-81.528 WordPenalty=-11.292 OOVPenalty=-200.000 ||| -361.830
+30 ||| their in mathematics পাটীগণিতের person was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-17.754 tm_pt_6=-6.222 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.018 tm_pt_10=-0.326 tm_pt_11=0.000 tm_pt_12=-0.500 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.967 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -147.537
+30 ||| their in mathematics পাটীগণিতের were was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-18.983 tm_pt_6=-5.123 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-4.319 tm_pt_11=0.000 tm_pt_12=-1.553 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.867 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.316
+30 ||| their in mathematics পাটীগণিতের are was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-19.645 tm_pt_6=-4.613 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-7.011 tm_pt_11=0.000 tm_pt_12=-2.428 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-20.651 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.564
+30 ||| their in mathematics was পাটীগণিতের were . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-18.983 tm_pt_6=-5.123 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.754 tm_pt_10=-7.426 tm_pt_11=0.000 tm_pt_12=-1.413 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-19.648 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.641
+30 ||| their in mathematics পাটীগণিতের priority was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.687 tm_pt_6=-5.123 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-1.004 tm_pt_11=0.000 tm_pt_12=-2.428 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-22.612 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.658
+30 ||| their in mathematics পাটীগণিতের in was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-21.111 tm_pt_6=-4.836 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-8.122 tm_pt_11=0.000 tm_pt_12=-2.428 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-20.357 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.845
+30 ||| their in mathematics পাটীগণিতের dominance was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-14.926 tm_pt_6=-5.529 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-1.004 tm_pt_11=0.000 tm_pt_12=-1.553 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-22.622 lm_1=-25.558 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.990
+31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-30.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.236 tm_pt_10=-15.926 tm_pt_11=0.000 tm_pt_12=-4.654 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=34.000 lm_0=-154.514 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -710.087
+31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-30.000 tm_pt_5=-70.620 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.868 tm_pt_10=-15.926 tm_pt_11=0.000 tm_pt_12=-5.347 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=34.000 lm_0=-154.478 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -710.513
+31 ||| deshgulo france are : call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-29.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.236 tm_pt_10=-18.669 tm_pt_11=0.000 tm_pt_12=-7.022 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=33.000 lm_0=-153.583 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -711.448
+31 ||| deshgulo france are : call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-29.000 tm_pt_5=-70.620 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.868 tm_pt_10=-18.669 tm_pt_11=0.000 tm_pt_12=-7.715 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=33.000 lm_0=-153.546 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -711.875
+31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark chekoslovakia sweden austria argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-29.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.468 tm_pt_10=-15.639 tm_pt_11=0.000 tm_pt_12=-4.654 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=32.000 lm_0=-154.514 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.168
+31 ||| deshgulo are : france call , make noise belgium china switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-29.000 tm_pt_5=-72.146 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.254 tm_pt_10=-18.146 tm_pt_11=0.000 tm_pt_12=-7.070 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=33.000 lm_0=-154.514 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.497
+31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet union iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-30.000 tm_pt_5=-75.530 tm_pt_6=-17.369 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.604 tm_pt_10=-19.643 tm_pt_11=0.000 tm_pt_12=-7.427 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=34.000 lm_0=-153.515 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.585
+31 ||| deshgulo are : france call , make noise china belgium switzerland garmany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria romania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-30.000 tm_pt_5=-69.730 tm_pt_6=-18.090 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-11.186 tm_pt_10=-12.550 tm_pt_11=0.000 tm_pt_12=-6.040 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=34.000 lm_0=-155.925 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.850
+31 ||| deshgulo are : france call , make noise belgium china switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet russia iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-29.000 tm_pt_5=-70.620 tm_pt_6=-14.912 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-10.886 tm_pt_10=-18.146 tm_pt_11=0.000 tm_pt_12=-7.763 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=33.000 lm_0=-154.478 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -712.923
+31 ||| deshgulo are : france call , make noise china belgium switzerland germany denmark sweden austria chekoslovakia argentina italy norway হাঙ্গেরী yugoslavia bulgaria rumania গ্রীস egypt singapore indonesia থাইল্যান্ড japan burma হল্যান্ড soviet union iran iraq and sri lanka . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-30.000 tm_pt_3=0.000 tm_pt_4=-30.000 tm_pt_5=-74.004 tm_pt_6=-17.369 tm_pt_7=-30.000 tm_pt_8=-81.540 tm_pt_9=-11.236 tm_pt_10=-19.643 tm_pt_11=0.000 tm_pt_12=-8.120 tm_pt_13=0.000 tm_pt_14=-35.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=34.000 lm_0=-153.479 lm_1=-130.445 WordPenalty=-17.806 OOVPenalty=-400.000 ||| -713.011
+32 ||| this ব্যাসিলিকার places now situated bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-11.945 tm_pt_6=-6.619 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.032 tm_pt_10=-3.478 tm_pt_11=0.000 tm_pt_12=-3.808 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.256 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -158.979
+32 ||| this ব্যাসিলিকার places now located bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-12.136 tm_pt_6=-6.827 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.063 tm_pt_10=-3.701 tm_pt_11=0.000 tm_pt_12=-4.031 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.231 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.219
+32 ||| this ব্যাসিলিকার places now bank of england is . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-15.428 tm_pt_6=-6.481 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.145 tm_pt_10=-8.604 tm_pt_11=0.000 tm_pt_12=-5.064 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-24.005 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.416
+32 ||| this ব্যাসিলিকার places is situated bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-16.306 tm_pt_6=-7.543 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.032 tm_pt_10=-7.140 tm_pt_11=0.000 tm_pt_12=-5.143 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-22.559 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.452
+32 ||| this ব্যাসিলিকার places now bank of england situated . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-11.945 tm_pt_6=-6.619 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.510 tm_pt_10=-3.660 tm_pt_11=0.000 tm_pt_12=-3.521 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-26.017 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.567
+32 ||| this ব্যাসিলিকার parts is situated bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-17.783 tm_pt_6=-8.332 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.075 tm_pt_10=-7.796 tm_pt_11=0.000 tm_pt_12=-5.549 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.958 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.610
+32 ||| this ব্যাসিলিকার places is located bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-16.497 tm_pt_6=-7.751 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.063 tm_pt_10=-7.363 tm_pt_11=0.000 tm_pt_12=-5.366 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-22.627 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.807
+32 ||| this ব্যাসিলিকার places are situated bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-16.832 tm_pt_6=-8.893 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.400 tm_pt_10=-7.875 tm_pt_11=0.000 tm_pt_12=-6.753 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.540 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.814
+32 ||| this ব্যাসিলিকার parts now situated bank of england . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-13.422 tm_pt_6=-7.408 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.075 tm_pt_10=-4.135 tm_pt_11=0.000 tm_pt_12=-4.214 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.207 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.820
+32 ||| this ব্যাসিলিকার places now bank of england located . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-12.136 tm_pt_6=-6.827 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.510 tm_pt_10=-3.660 tm_pt_11=0.000 tm_pt_12=-3.521 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-26.192 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -159.891
+33 ||| country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.110 tm_pt_6=-14.959 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-39.743 tm_pt_11=0.000 tm_pt_12=-13.346 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.858 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.237
+33 ||| country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.262 tm_pt_6=-14.179 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-40.812 tm_pt_11=0.000 tm_pt_12=-13.346 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.872 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.294
+33 ||| country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.298 tm_pt_6=-15.474 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-39.086 tm_pt_11=0.000 tm_pt_12=-13.982 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.705 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.314
+33 ||| country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.450 tm_pt_6=-14.694 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.192 tm_pt_10=-40.155 tm_pt_11=0.000 tm_pt_12=-13.982 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.719 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.371
+33 ||| country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর south মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-31.798 tm_pt_6=-14.337 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.075 tm_pt_10=-37.232 tm_pt_11=0.000 tm_pt_12=-12.835 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-54.581 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.402
+33 ||| country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর south মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-31.986 tm_pt_6=-14.851 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.075 tm_pt_10=-36.576 tm_pt_11=0.000 tm_pt_12=-13.471 tm_pt_13=0.000 tm_pt_14=-14.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-54.428 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-400.000 ||| -537.480
+33 ||| the country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-41.841 tm_pt_6=-14.869 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.198 tm_pt_10=-35.989 tm_pt_11=0.000 tm_pt_12=-11.554 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.914 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.051
+33 ||| the country in বিস্কাই sub-sea south the জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-41.994 tm_pt_6=-14.089 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.198 tm_pt_10=-37.058 tm_pt_11=0.000 tm_pt_12=-11.554 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.928 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.108
+33 ||| the country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর in মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-42.029 tm_pt_6=-15.384 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.199 tm_pt_10=-35.333 tm_pt_11=0.000 tm_pt_12=-12.190 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-52.761 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.129
+33 ||| the country in বিস্কাই sub-sea south of জিব্রাল্টার strait প্রণালীর the মরক্কো the west and the atlantic ocean . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-42.181 tm_pt_6=-14.603 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-3.113 tm_pt_10=-35.540 tm_pt_11=0.000 tm_pt_12=-11.584 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-52.775 lm_1=-65.223 WordPenalty=-9.120 OOVPenalty=-400.000 ||| -538.642
+34 ||| apart from this situation understood a মুহূর্তে the the decision within অক্ষমতা after taking . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-52.889 tm_pt_6=-19.875 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.368 tm_pt_10=-31.297 tm_pt_11=0.000 tm_pt_12=-7.910 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-43.285 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.670
+34 ||| other than this situation understood a মুহূর্তে the the decision within অক্ষমতা after taking . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-54.580 tm_pt_6=-20.568 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.368 tm_pt_10=-31.548 tm_pt_11=0.000 tm_pt_12=-7.910 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-42.880 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.794
+34 ||| moreover this situation understood a মুহূর্তে the the decision within অক্ষমতা after taking . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-46.225 tm_pt_6=-21.261 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-4.503 tm_pt_10=-30.392 tm_pt_11=0.000 tm_pt_12=-6.475 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-46.234 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -319.929
+34 ||| apart from this situation understood a মুহূর্তে the the within অক্ষমতা after taking decision . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-52.889 tm_pt_6=-19.875 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.736 tm_pt_10=-32.627 tm_pt_11=0.000 tm_pt_12=-8.979 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.892 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.936
+34 ||| apart from this situation understood a মুহূর্তে the fast within অক্ষমতা after taking decision . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-47.241 tm_pt_6=-20.722 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.737 tm_pt_10=-26.965 tm_pt_11=0.000 tm_pt_12=-9.202 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-43.691 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -319.982
+34 ||| other than this situation understood a মুহূর্তে the the within অক্ষমতা after taking decision . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-54.580 tm_pt_6=-20.568 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.736 tm_pt_10=-32.878 tm_pt_11=0.000 tm_pt_12=-8.979 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.487 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -320.060
+34 ||| other than this situation understood a মুহূর্তে the fast within অক্ষমতা after taking decision . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-48.932 tm_pt_6=-21.415 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-5.737 tm_pt_10=-27.217 tm_pt_11=0.000 tm_pt_12=-9.202 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-43.286 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-200.000 ||| -320.106
+34 ||| moreover this situation understood a মুহূর্তে the the within অক্ষমতা after taking decision . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-46.225 tm_pt_6=-21.261 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-4.871 tm_pt_10=-31.722 tm_pt_11=0.000 tm_pt_12=-7.544 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-44.841 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -320.195
+34 ||| moreover this situation understood a মুহূর্তে the fast within অক্ষমতা after taking decision . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-40.578 tm_pt_6=-22.109 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-4.872 tm_pt_10=-26.061 tm_pt_11=0.000 tm_pt_12=-7.767 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-46.640 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -320.241
+35 ||| কার্ল there is work through it by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-27.298 tm_pt_6=-10.704 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.553 tm_pt_10=-22.739 tm_pt_11=0.000 tm_pt_12=-4.672 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-22.228 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -164.983
+35 ||| কার্ল there is work through this by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-28.891 tm_pt_6=-11.650 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.553 tm_pt_10=-24.672 tm_pt_11=0.000 tm_pt_12=-5.305 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.615 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -165.578
+35 ||| কার্ল there is work through it by the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-31.306 tm_pt_6=-8.766 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.553 tm_pt_10=-19.313 tm_pt_11=0.000 tm_pt_12=-5.011 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-22.767 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -165.761
+35 ||| কার্ল there is only through it by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-30.406 tm_pt_6=-13.648 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.921 tm_pt_10=-25.279 tm_pt_11=0.000 tm_pt_12=-6.687 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.644 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.297
+35 ||| কার্ল there is works that it by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-31.776 tm_pt_6=-13.782 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.242 tm_pt_10=-22.614 tm_pt_11=0.000 tm_pt_12=-4.895 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-21.335 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.543
+35 ||| কার্ল there is work through this are . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-30.491 tm_pt_6=-12.874 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.871 tm_pt_10=-25.862 tm_pt_11=0.000 tm_pt_12=-5.998 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.240 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.654
+35 ||| কার্ল there is work that it by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-31.374 tm_pt_6=-12.783 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.235 tm_pt_10=-23.123 tm_pt_11=0.000 tm_pt_12=-3.979 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-21.935 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.669
+35 ||| কার্ল there is work through this by the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-32.900 tm_pt_6=-9.713 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.553 tm_pt_10=-21.246 tm_pt_11=0.000 tm_pt_12=-5.644 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-22.538 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -166.832
+35 ||| কার্ল there is only through this by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-32.000 tm_pt_6=-14.595 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.921 tm_pt_10=-27.212 tm_pt_11=0.000 tm_pt_12=-7.320 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.030 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -166.892
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি taken from . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-49.320 tm_pt_6=-19.514 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-13.572 tm_pt_11=0.000 tm_pt_12=-8.086 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-60.113 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -252.556
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি from accepted . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-47.822 tm_pt_6=-18.616 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-3.321 tm_pt_10=-13.748 tm_pt_11=0.000 tm_pt_12=-8.787 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-61.063 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -252.802
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story from sometimes again today 's social and political ঘটনাবলি taken from . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-49.320 tm_pt_6=-19.514 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-13.413 tm_pt_11=0.000 tm_pt_12=-7.348 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-62.036 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -253.669
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story from sometimes again today 's social and political ঘটনাবলি from accepted . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-47.822 tm_pt_6=-18.616 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-3.321 tm_pt_10=-13.589 tm_pt_11=0.000 tm_pt_12=-8.048 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-62.986 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -253.914
+36 ||| the subject sometimes puran -lrb- sometimes from maintain love story from sometimes again today 's social and political ঘটনাবলি taken from . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-49.320 tm_pt_6=-19.514 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-13.572 tm_pt_11=0.000 tm_pt_12=-8.086 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-61.385 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -254.130
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি to accepted . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-49.784 tm_pt_6=-19.495 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-3.327 tm_pt_10=-14.588 tm_pt_11=0.000 tm_pt_12=-9.499 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-60.799 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -254.401
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি from accepted ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-48.412 tm_pt_6=-28.349 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-16.137 tm_pt_11=0.000 tm_pt_12=-9.102 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-61.702 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -256.291
+36 ||| the subject sometimes puran -lrb- from sometimes maintain love story sometimes again from today 's social and political ঘটনাবলি from taken ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-49.706 tm_pt_6=-28.148 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-4.321 tm_pt_10=-15.774 tm_pt_11=0.000 tm_pt_12=-9.102 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-61.646 lm_1=-71.745 WordPenalty=-9.989 OOVPenalty=-100.000 ||| -256.307
+37 ||| three measure based on the that age is found that is almost ১৩.৭ ± ০.২ billion years . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-34.511 tm_pt_6=-23.848 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.143 tm_pt_10=-17.819 tm_pt_11=0.000 tm_pt_12=-5.086 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-50.078 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -425.421
+37 ||| three measure based on the that is found in that is almost ১৩.৭ ± ০.২ billion years . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-41.431 tm_pt_6=-27.078 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.504 tm_pt_10=-23.356 tm_pt_11=0.000 tm_pt_12=-6.272 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-47.005 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.199
+37 ||| three measure base on the that is found in that is almost ১৩.৭ ± ০.২ billion years . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-40.394 tm_pt_6=-25.427 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.736 tm_pt_10=-21.197 tm_pt_11=0.000 tm_pt_12=-6.677 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-48.066 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.523
+37 ||| three measure based on the age is found in the is almost ১৩.৭ ± ০.২ billion years . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-38.810 tm_pt_6=-24.879 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.510 tm_pt_10=-14.501 tm_pt_11=0.000 tm_pt_12=-6.681 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-47.656 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.534
+37 ||| three measure based on the that are found in that is almost ১৩.৭ ± ০.২ billion years . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-38.527 tm_pt_6=-24.999 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-3.504 tm_pt_10=-23.330 tm_pt_11=0.000 tm_pt_12=-5.579 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-47.774 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-300.000 ||| -426.632
+38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea another can situated . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-15.090 tm_pt_6=-9.552 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-8.592 tm_pt_11=0.000 tm_pt_12=-6.507 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.799 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -298.593
+38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea other can situated . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-16.129 tm_pt_6=-8.992 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-10.656 tm_pt_11=0.000 tm_pt_12=-6.507 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.540 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -298.816
+38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea another can located . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-15.282 tm_pt_6=-9.759 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-8.592 tm_pt_11=0.000 tm_pt_12=-6.507 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.973 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -298.917
+38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea other can located . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-16.321 tm_pt_6=-9.200 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-10.656 tm_pt_11=0.000 tm_pt_12=-6.507 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.715 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -299.140
+38 ||| কাছেই is east russia which ওখটস্ক sea and japan sea another can situated . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-16.168 tm_pt_6=-9.805 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.754 tm_pt_10=-9.849 tm_pt_11=0.000 tm_pt_12=-6.890 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.709 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-200.000 ||| -299.237
+38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea another can at ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-18.427 tm_pt_6=-18.338 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.386 tm_pt_10=-12.882 tm_pt_11=0.000 tm_pt_12=-7.200 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.363 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-200.000 ||| -301.470
+38 ||| কাছেই are east russia which ওখটস্ক sea and japan sea other can at ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-19.466 tm_pt_6=-17.778 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.386 tm_pt_10=-14.946 tm_pt_11=0.000 tm_pt_12=-7.200 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.105 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-200.000 ||| -301.693
+39 ||| kolkata the national library the leading public লাইব্রেরি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-17.614 tm_pt_6=-12.955 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.370 tm_pt_10=-17.254 tm_pt_11=0.000 tm_pt_12=-5.352 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-25.795 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -167.803
+39 ||| kolkata indian national library the leading public লাইব্রেরি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-15.158 tm_pt_6=-14.274 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.738 tm_pt_10=-13.975 tm_pt_11=0.000 tm_pt_12=-4.141 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.113 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -168.246
+39 ||| kolkata the national library countries leading public লাইব্রেরি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-14.518 tm_pt_6=-14.341 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.370 tm_pt_10=-13.423 tm_pt_11=0.000 tm_pt_12=-5.209 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-27.936 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -169.289
+39 ||| kolkata is the national library the leading public লাইব্রেরি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-20.796 tm_pt_6=-7.964 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-0.373 tm_pt_10=-17.972 tm_pt_11=0.000 tm_pt_12=-8.984 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-24.166 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-100.000 ||| -169.565
+39 ||| kolkata the leading indian national library public লাইব্রেরি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-15.158 tm_pt_6=-14.274 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.736 tm_pt_10=-13.149 tm_pt_11=0.000 tm_pt_12=-3.633 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-25.518 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-100.000 ||| -169.626
+40 ||| ছত্রাকবিদ্যা ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-7.355 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
+41 ||| রাষ্ট্রসঙ্ঘের general secretary বান ki moon ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-5.844 tm_pt_6=-4.164 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.135 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.511 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-24.479 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -246.419
+41 ||| রাষ্ট্রসঙ্ঘের secretary general বান ki moon ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-5.844 tm_pt_6=-4.164 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.050 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.223 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-23.882 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -246.473
+41 ||| রাষ্ট্রসঙ্ঘের chief secretary বান ki moon ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-7.082 tm_pt_6=-4.649 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-2.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.609 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-24.832 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -248.355
+41 ||| বান রাষ্ট্রসঙ্ঘের general secretary ki moon ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-5.844 tm_pt_6=-4.164 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-2.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.609 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-24.479 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.481
+41 ||| রাষ্ট্রসঙ্ঘের general secretary বান what moon ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-7.058 tm_pt_6=-3.653 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.510 tm_pt_10=-1.322 tm_pt_11=0.000 tm_pt_12=-2.015 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.709 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.788
+42 ||| মিনিক্সের of was smells টানেনবম a famous operating system design প্রশিক্ষক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-22.185 tm_pt_6=-8.038 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-17.562 tm_pt_11=0.000 tm_pt_12=-3.604 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.398 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -390.309
+42 ||| মিনিক্সের of was smells টানেনবম a famous operating system designing প্রশিক্ষক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-21.718 tm_pt_6=-8.785 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.418 tm_pt_10=-16.650 tm_pt_11=0.000 tm_pt_12=-4.415 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.309 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -390.456
+42 ||| মিনিক্সের to was smells টানেনবম a famous operating system design প্রশিক্ষক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-22.433 tm_pt_6=-9.424 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.375 tm_pt_10=-17.042 tm_pt_11=0.000 tm_pt_12=-3.747 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-40.681 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -392.066
+42 ||| মিনিক্সের of were smells টানেনবম a famous operating system design প্রশিক্ষক . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-22.244 tm_pt_6=-9.498 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.375 tm_pt_10=-15.933 tm_pt_11=0.000 tm_pt_12=-5.149 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-40.592 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-300.000 ||| -392.154
+43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-85.959 tm_pt_6=-42.518 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-54.697 tm_pt_11=0.000 tm_pt_12=-12.202 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-132.510 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.495
+43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-85.962 tm_pt_6=-42.848 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.296 tm_pt_10=-54.098 tm_pt_11=0.000 tm_pt_12=-12.895 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-132.327 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.577
+43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-83.499 tm_pt_6=-42.159 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-52.351 tm_pt_11=0.000 tm_pt_12=-11.882 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-133.742 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.703
+43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-83.503 tm_pt_6=-42.489 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.296 tm_pt_10=-51.752 tm_pt_11=0.000 tm_pt_12=-12.575 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-133.558 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1912.785
+43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-29.000 tm_pt_3=0.000 tm_pt_4=-28.000 tm_pt_5=-86.665 tm_pt_6=-42.400 tm_pt_7=-29.000 tm_pt_8=-78.822 tm_pt_9=-2.168 tm_pt_10=-55.990 tm_pt_11=0.000 tm_pt_12=-12.468 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=44.000 lm_0=-132.420 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.098
+43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film ' -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-85.919 tm_pt_6=-42.967 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-54.224 tm_pt_11=0.000 tm_pt_12=-12.687 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-132.895 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.166
+43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-29.000 tm_pt_3=0.000 tm_pt_4=-28.000 tm_pt_5=-86.668 tm_pt_6=-42.730 tm_pt_7=-29.000 tm_pt_8=-78.822 tm_pt_9=-2.296 tm_pt_10=-55.391 tm_pt_11=0.000 tm_pt_12=-13.161 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=44.000 lm_0=-132.237 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.180
+43 ||| the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film ' -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-85.923 tm_pt_6=-43.298 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.296 tm_pt_10=-53.625 tm_pt_11=0.000 tm_pt_12=-13.380 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-132.712 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.248
+43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali is pure film " -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-29.000 tm_pt_3=0.000 tm_pt_4=-28.000 tm_pt_5=-84.205 tm_pt_6=-42.040 tm_pt_7=-29.000 tm_pt_8=-78.822 tm_pt_9=-2.168 tm_pt_10=-53.644 tm_pt_11=0.000 tm_pt_12=-12.148 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=44.000 lm_0=-133.652 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.307
+43 ||| the টাইম্ the 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by indian films with compared to unreal ... pather panchali are pure film ' -rrb- . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-28.000 tm_pt_3=0.000 tm_pt_4=-27.000 tm_pt_5=-83.460 tm_pt_6=-42.608 tm_pt_7=-28.000 tm_pt_8=-76.104 tm_pt_9=-2.168 tm_pt_10=-51.878 tm_pt_11=0.000 tm_pt_12=-12.367 tm_pt_13=0.000 tm_pt_14=-32.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=43.000 lm_0=-134.126 lm_1=-159.795 WordPenalty=-21.715 OOVPenalty=-1600.000 ||| -1913.375
+44 ||| after 1953 on may , nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.467 tm_pt_10=-9.648 tm_pt_11=0.000 tm_pt_12=-6.183 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-46.024 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -403.881
+44 ||| after 1953 on may , nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-24.146 tm_pt_6=-11.499 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.603 tm_pt_10=-8.039 tm_pt_11=0.000 tm_pt_12=-5.084 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-46.988 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-300.000 ||| -404.278
+44 ||| after on may , 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.467 tm_pt_10=-9.407 tm_pt_11=0.000 tm_pt_12=-6.183 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.744 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.473
+44 ||| thereafter on may , 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-20.564 tm_pt_6=-19.871 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.553 tm_pt_10=-5.857 tm_pt_11=0.000 tm_pt_12=-6.470 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.937 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.756
+44 ||| after nazrul on may , 1953 and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.018 tm_pt_10=-3.750 tm_pt_11=0.000 tm_pt_12=-4.807 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-45.135 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.776
+44 ||| on may , after 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-21.111 tm_pt_6=-17.244 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.419 tm_pt_10=-7.862 tm_pt_11=0.000 tm_pt_12=-5.846 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-45.664 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -404.778
+44 ||| after on may , 1953 nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-24.146 tm_pt_6=-11.499 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.603 tm_pt_10=-7.798 tm_pt_11=0.000 tm_pt_12=-5.084 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-46.709 lm_1=-52.178 WordPenalty=-7.383 OOVPenalty=-300.000 ||| -404.870
+44 ||| after 1953 on may , nazrul and প্রমীলা দেবীকে চিকিৎসার send to london . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-22.429 tm_pt_6=-14.784 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.835 tm_pt_10=-16.668 tm_pt_11=0.000 tm_pt_12=-7.081 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-43.328 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -405.022
+45 ||| the southern and the are বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-37.865 tm_pt_6=-16.165 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-32.454 tm_pt_11=0.000 tm_pt_12=-11.897 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-43.886 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -320.811
+45 ||| the southern and the are বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-38.815 tm_pt_6=-16.291 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-33.161 tm_pt_11=0.000 tm_pt_12=-11.730 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-43.796 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.050
+45 ||| the southern and the is বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-38.815 tm_pt_6=-16.291 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-33.161 tm_pt_11=0.000 tm_pt_12=-11.730 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-43.797 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.051
+45 ||| the south and the are বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-37.569 tm_pt_6=-14.360 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-31.413 tm_pt_11=0.000 tm_pt_12=-10.259 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-44.748 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.161
+45 ||| the southern and the is বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-39.765 tm_pt_6=-16.417 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-1.822 tm_pt_10=-33.868 tm_pt_11=0.000 tm_pt_12=-11.563 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-43.707 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.289
+45 ||| the south and the are বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-38.520 tm_pt_6=-14.486 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-32.120 tm_pt_11=0.000 tm_pt_12=-10.092 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-44.658 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.399
+45 ||| the south and the is বিস্তীর্ণ land , west and in are রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-38.520 tm_pt_6=-14.486 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-32.120 tm_pt_11=0.000 tm_pt_12=-10.092 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-44.659 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.400
+45 ||| the southern and the are বিস্তীর্ণ land , west and in are রুক্ষ hill and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-38.680 tm_pt_6=-17.956 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-2.454 tm_pt_10=-29.789 tm_pt_11=0.000 tm_pt_12=-11.192 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-44.348 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.539
+45 ||| the south and the is বিস্তীর্ণ land , west and in is রুক্ষ mountain and mountain . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-39.470 tm_pt_6=-14.612 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-2.804 tm_pt_10=-32.827 tm_pt_11=0.000 tm_pt_12=-9.925 tm_pt_13=0.000 tm_pt_14=-15.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-44.570 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=-200.000 ||| -321.638
+46 ||| ট্রেডমার্ক ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-7.355 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his city are the ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-23.058 tm_pt_6=-19.554 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-11.563 tm_pt_11=0.000 tm_pt_12=-5.490 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-42.957 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -495.528
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the are the ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-26.541 tm_pt_6=-19.235 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-15.056 tm_pt_11=0.000 tm_pt_12=-6.114 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-42.350 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -496.460
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his city are increased ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-18.120 tm_pt_6=-20.134 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.007 tm_pt_10=-7.473 tm_pt_11=0.000 tm_pt_12=-6.643 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-44.936 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -496.582
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the mentionable the ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-23.233 tm_pt_6=-19.818 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-11.720 tm_pt_11=0.000 tm_pt_12=-5.739 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-43.745 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -496.755
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the mentionable increased ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-18.295 tm_pt_6=-20.398 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.007 tm_pt_10=-7.630 tm_pt_11=0.000 tm_pt_12=-6.892 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-45.214 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.179
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the important the ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-24.937 tm_pt_6=-19.942 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-13.772 tm_pt_11=0.000 tm_pt_12=-5.873 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-43.532 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.439
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the are increased ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-21.603 tm_pt_6=-19.815 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.007 tm_pt_10=-10.967 tm_pt_11=0.000 tm_pt_12=-7.267 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-44.328 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.514
+47 ||| লর্ড ওয়েলেসলির -lrb- গভর্নর-জেনারেল ১৭৯৭-১৮০৫ -rrb- during his the notable the ghotechilo ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-23.321 tm_pt_6=-20.360 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.000 tm_pt_10=-11.784 tm_pt_11=0.000 tm_pt_12=-6.209 tm_pt_13=0.000 tm_pt_14=-8.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-44.104 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-400.000 ||| -497.584
+48 ||| many important and real to solve problems complex number apariharza ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-18.905 tm_pt_6=-11.630 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.959 tm_pt_11=0.000 tm_pt_12=-4.527 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-26.936 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.121
+48 ||| many important and real problem to solve complex number apariharza ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-18.764 tm_pt_6=-11.367 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-10.348 tm_pt_11=0.000 tm_pt_12=-4.565 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.189 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.245
+48 ||| many important and real problem to for complex number apariharza ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-18.001 tm_pt_6=-7.765 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.368 tm_pt_10=-14.453 tm_pt_11=0.000 tm_pt_12=-3.912 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-28.229 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.659
+48 ||| many important and real to solve problems complex number inevitable ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-20.292 tm_pt_6=-11.630 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.959 tm_pt_11=0.000 tm_pt_12=-4.527 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.158 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.661
+48 ||| many important and real to solve problems complex numbers apariharza ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-18.673 tm_pt_6=-12.332 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.424 tm_pt_11=0.000 tm_pt_12=-4.836 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.284 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.717
+48 ||| many important and real problem to solve complex number inevitable ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-20.150 tm_pt_6=-11.367 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-10.348 tm_pt_11=0.000 tm_pt_12=-4.565 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.411 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.784
+48 ||| many important and real problem to solve complex numbers apariharza ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-18.531 tm_pt_6=-12.070 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-9.813 tm_pt_11=0.000 tm_pt_12=-4.873 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.536 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -71.841
+48 ||| many important and real problem to for complex number inevitable ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-19.387 tm_pt_6=-7.765 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.368 tm_pt_10=-14.453 tm_pt_11=0.000 tm_pt_12=-3.912 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-28.451 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -72.199
+48 ||| many important and real to solve problems complex numbers inevitable ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-20.059 tm_pt_6=-12.332 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-8.424 tm_pt_11=0.000 tm_pt_12=-4.836 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.505 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -72.257
+48 ||| many important and real problem to solve complex numbers inevitable ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-19.917 tm_pt_6=-12.070 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-9.813 tm_pt_11=0.000 tm_pt_12=-4.873 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.758 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -72.380
+49 ||| big bang is a famous result in the state so and recent situation from the separate . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-49.728 tm_pt_6=-24.253 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.221 tm_pt_10=-39.767 tm_pt_11=0.000 tm_pt_12=-14.945 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-36.721 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=0.000 ||| -122.970
+49 ||| big bang is a famous result in the state so and recent situation from the the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-56.487 tm_pt_6=-24.541 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.225 tm_pt_10=-45.470 tm_pt_11=0.000 tm_pt_12=-15.099 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-34.761 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=0.000 ||| -123.473
+49 ||| big bang is a famous result in the state so and recent situation from the different . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-52.901 tm_pt_6=-24.828 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.586 tm_pt_10=-43.998 tm_pt_11=0.000 tm_pt_12=-16.198 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-35.034 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=0.000 ||| -123.527
+49 ||| big bang is a famous result in the state so and recent state from the separate . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-49.859 tm_pt_6=-23.063 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.860 tm_pt_10=-40.011 tm_pt_11=0.000 tm_pt_12=-13.846 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-38.242 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=0.000 ||| -123.849
+49 ||| big bang is a famous result in the state so and recent state from the the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-56.618 tm_pt_6=-23.351 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-1.864 tm_pt_10=-45.713 tm_pt_11=0.000 tm_pt_12=-14.000 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-36.282 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=0.000 ||| -124.352
+49 ||| the big bang is a famous result in the state so and recent situation from the separate . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-54.868 tm_pt_6=-24.390 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.216 tm_pt_10=-39.906 tm_pt_11=0.000 tm_pt_12=-14.791 tm_pt_13=0.000 tm_pt_14=-18.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-35.341 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=0.000 ||| -124.373
+49 ||| big bang is a famous result in the state so and recent state from the different . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-53.032 tm_pt_6=-23.639 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-2.225 tm_pt_10=-44.242 tm_pt_11=0.000 tm_pt_12=-15.099 tm_pt_13=0.000 tm_pt_14=-17.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-36.555 lm_1=-58.700 WordPenalty=-8.252 OOVPenalty=0.000 ||| -124.406
+50 ||| windows মিলিনিয়াম ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-0.231 tm_pt_6=-0.060 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.000 tm_pt_10=-0.025 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-9.976 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -116.964
+50 ||| উইন্ডোজ মিলিনিয়াম ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-12.528 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=-200.000 ||| -219.189
+51 ||| rabindranath , many শৈলী আয়ত্ত্ব was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-17.153 tm_pt_6=-7.458 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.019 tm_pt_10=-17.417 tm_pt_11=0.000 tm_pt_12=-5.381 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-21.075 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.314
+51 ||| rabindranath , more শৈলী আয়ত্ত্ব was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-16.178 tm_pt_6=-6.541 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-16.203 tm_pt_11=0.000 tm_pt_12=-5.093 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-21.909 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.429
+51 ||| rabindranath , many শৈলী আয়ত্ত্ব a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-17.168 tm_pt_6=-7.217 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.136 tm_pt_10=-18.506 tm_pt_11=0.000 tm_pt_12=-5.892 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-21.101 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.825
+51 ||| rabindranath although many শৈলী আয়ত্ত্ব was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.228 tm_pt_6=-9.106 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.154 tm_pt_10=-10.629 tm_pt_11=0.000 tm_pt_12=-4.282 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-24.131 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.858
+51 ||| rabindranath though more শৈলী আয়ত্ত্ব was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.457 tm_pt_6=-7.315 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.154 tm_pt_10=-11.213 tm_pt_11=0.000 tm_pt_12=-3.995 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-24.610 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.932
+51 ||| rabindranath , many শৈলী আয়ত্ত্ব by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-17.337 tm_pt_6=-8.246 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.050 tm_pt_10=-15.591 tm_pt_11=0.000 tm_pt_12=-4.998 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.027 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.940
+51 ||| rabindranath , more শৈলী আয়ত্ত্ব a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-16.194 tm_pt_6=-6.300 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.135 tm_pt_10=-17.291 tm_pt_11=0.000 tm_pt_12=-5.604 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-21.935 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.940
+51 ||| rabindranath though many শৈলী আয়ত্ত্ব was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-13.432 tm_pt_6=-8.231 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.154 tm_pt_10=-12.428 tm_pt_11=0.000 tm_pt_12=-4.282 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-23.903 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -254.974
+51 ||| rabindranath , more শৈলী আয়ত্ত্ব by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-16.363 tm_pt_6=-7.330 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.050 tm_pt_10=-14.377 tm_pt_11=0.000 tm_pt_12=-4.710 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.861 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -255.055
+52 ||| labour economics ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.274 tm_pt_6=-2.013 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.007 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.773 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.389 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -13.099
+52 ||| labor economy ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.452 tm_pt_6=-2.394 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.050 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.179 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.309 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -13.343
+52 ||| labor economics ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.231 tm_pt_6=-1.682 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.388 tm_pt_11=0.000 tm_pt_12=-1.158 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-7.246 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -15.698
+52 ||| labour economy ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.496 tm_pt_6=-2.725 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-0.673 tm_pt_11=0.000 tm_pt_12=-2.368 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-6.994 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -16.294
+52 ||| the economics ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-8.013 tm_pt_6=-2.637 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.050 tm_pt_10=-6.947 tm_pt_11=0.000 tm_pt_12=-2.863 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-5.487 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -17.490
+52 ||| the economy ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-8.235 tm_pt_6=-3.349 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.050 tm_pt_10=-7.117 tm_pt_11=0.000 tm_pt_12=-3.620 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-5.372 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -17.945
+53 ||| britain at the main and his economic power was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-21.601 tm_pt_6=-8.391 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.738 tm_pt_10=-18.689 tm_pt_11=0.000 tm_pt_12=-6.389 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-23.355 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -68.955
+53 ||| britain at the main and who was economical power . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.110 tm_pt_6=-9.959 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-19.176 tm_pt_11=0.000 tm_pt_12=-6.249 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-22.404 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -69.372
+53 ||| britain at the main and his economic powers was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-21.769 tm_pt_6=-10.054 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.738 tm_pt_10=-18.284 tm_pt_11=0.000 tm_pt_12=-6.389 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-23.387 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -69.489
+53 ||| britain at the main and who was economic power . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.129 tm_pt_6=-9.084 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-19.581 tm_pt_11=0.000 tm_pt_12=-6.249 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-22.721 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -69.575
+53 ||| britain at the main and his economical power was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-21.582 tm_pt_6=-9.265 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.738 tm_pt_10=-18.284 tm_pt_11=0.000 tm_pt_12=-6.389 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-23.774 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -69.663
+53 ||| britain at the main economic power and who was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.129 tm_pt_6=-9.084 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.370 tm_pt_10=-16.217 tm_pt_11=0.000 tm_pt_12=-6.105 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.688 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -69.744
+53 ||| britain at the main and who was economic powers . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.297 tm_pt_6=-10.747 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-19.176 tm_pt_11=0.000 tm_pt_12=-6.249 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-22.569 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -69.881
+53 ||| britain at the main and his was economical power . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.582 tm_pt_6=-9.265 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-21.391 tm_pt_11=0.000 tm_pt_12=-6.249 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-22.611 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -70.056
+53 ||| britain at the main and his was economic power . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.601 tm_pt_6=-8.391 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.106 tm_pt_10=-21.797 tm_pt_11=0.000 tm_pt_12=-6.249 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-22.929 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -70.259
+54 ||| movement against the military rule and pakistan গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.124 tm_pt_10=-30.377 tm_pt_11=0.000 tm_pt_12=-8.664 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-62.918 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.346
+54 ||| the military rule movement against and pakistan গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.075 tm_pt_10=-31.215 tm_pt_11=0.000 tm_pt_12=-8.952 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-64.294 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.363
+54 ||| movement against the military rule and the গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-53.615 tm_pt_6=-23.667 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.124 tm_pt_10=-34.296 tm_pt_11=0.000 tm_pt_12=-10.260 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-60.831 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.490
+54 ||| the military rule movement against and the গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-53.615 tm_pt_6=-23.667 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.075 tm_pt_10=-35.134 tm_pt_11=0.000 tm_pt_12=-10.548 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-62.207 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.507
+54 ||| movement against the military rule and pakistan গোষ্ঠীগত was against protest and bengalis independence movement on ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-46.730 tm_pt_6=-23.155 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.241 tm_pt_10=-27.837 tm_pt_11=0.000 tm_pt_12=-9.175 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-63.790 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.697
+54 ||| the military rule movement against and pakistan গোষ্ঠীগত was against protest and bengalis independence movement on ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-46.730 tm_pt_6=-23.155 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.192 tm_pt_10=-28.675 tm_pt_11=0.000 tm_pt_12=-9.463 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-65.167 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.714
+54 ||| movement against the military rule and the গোষ্ঠীগত was against protest and bengalis independence movement on ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-51.351 tm_pt_6=-23.918 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.241 tm_pt_10=-31.756 tm_pt_11=0.000 tm_pt_12=-10.771 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-61.703 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.841
+54 ||| movement against the military rule and pakistan গোষ্ঠীগত was protest against and bengalis independence movement , ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-5.124 tm_pt_10=-32.031 tm_pt_11=0.000 tm_pt_12=-9.805 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-61.725 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.858
+54 ||| movement against the military rule and pakistan গোষ্ঠীগত was against protest and bengalis independence movement , ধাবিত for the he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-51.130 tm_pt_6=-23.575 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-5.124 tm_pt_10=-32.703 tm_pt_11=0.000 tm_pt_12=-9.548 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-61.304 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.868
+54 ||| the military rule movement against and pakistan গোষ্ঠীগত was protest against and bengalis independence movement , ধাবিত to for he widely praised . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-48.994 tm_pt_6=-22.903 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-5.075 tm_pt_10=-32.869 tm_pt_11=0.000 tm_pt_12=-10.092 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-63.101 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-200.000 ||| -365.875
+55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-75.462 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-50.024 tm_pt_11=0.000 tm_pt_12=-9.468 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-55.366 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -270.162
+55 ||| here is mentioned that were internet and world wide web other name to be word but actually শব্দদ্বয় different subject to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-75.462 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-48.558 tm_pt_11=0.000 tm_pt_12=-9.468 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-55.492 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -270.902
+55 ||| here is mentioned that were internet and world wide web other name word to be though actually শব্দদ্বয় different subject to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-74.255 tm_pt_6=-35.844 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.314 tm_pt_10=-48.057 tm_pt_11=0.000 tm_pt_12=-8.957 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-56.919 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -270.968
+55 ||| here is mentioned that were internet and other name of world wide web word to be but actually শব্দদ্বয় different subject to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-76.084 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-48.638 tm_pt_11=0.000 tm_pt_12=-9.468 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=-1.000 tm_pt_16=-1.000 tm_glue_0=15.000 lm_0=-53.566 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-100.000 ||| -270.995
+55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-75.102 tm_pt_6=-35.998 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.664 tm_pt_10=-50.785 tm_pt_11=0.000 tm_pt_12=-9.873 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-55.873 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.207
+55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject indicates . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-70.008 tm_pt_6=-36.286 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.664 tm_pt_10=-44.530 tm_pt_11=0.000 tm_pt_12=-9.873 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-57.972 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.304
+55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subject to the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-17.000 tm_pt_5=-78.670 tm_pt_6=-34.346 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-6.431 tm_pt_10=-46.151 tm_pt_11=0.000 tm_pt_12=-9.640 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-56.535 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-100.000 ||| -271.444
+55 ||| here is mentioned that were internet and world wide web other name word to be but actually শব্দদ্বয় different subjects to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-75.002 tm_pt_6=-37.837 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.432 tm_pt_10=-47.322 tm_pt_11=0.000 tm_pt_12=-10.432 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-55.554 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.494
+55 ||| here is mentioned that many internet and world wide web other name word to be but actually শব্দদ্বয় different subject to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-74.971 tm_pt_6=-36.132 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.431 tm_pt_10=-51.114 tm_pt_11=0.000 tm_pt_12=-9.468 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-56.305 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -271.512
+55 ||| here is mentioned that were internet and world wide web other name word to be though actually শব্দদ্বয় different subject a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-73.895 tm_pt_6=-35.711 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-6.547 tm_pt_10=-48.819 tm_pt_11=0.000 tm_pt_12=-9.362 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-57.427 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-100.000 ||| -272.013
+56 ||| . z related polar co-ordinate two are r = . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-25.545 tm_pt_6=-12.128 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.418 tm_pt_10=-16.486 tm_pt_11=0.000 tm_pt_12=-0.128 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-29.784 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -77.342
+56 ||| . z related polar co-ordinate two is r = . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-25.602 tm_pt_6=-11.361 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.050 tm_pt_10=-19.524 tm_pt_11=0.000 tm_pt_12=-0.535 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-29.699 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -77.806
+57 ||| november ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-0.041 tm_pt_6=-0.057 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.008 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-3.071 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=0.000 ||| -6.949
+57 ||| november . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-9.840 tm_pt_6=-0.742 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-4.779 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.242 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -16.354
+57 ||| নভেম্বর ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-7.355 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
+58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক mail to to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.369 tm_pt_10=-17.204 tm_pt_11=0.000 tm_pt_12=-2.899 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-38.430 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -382.030
+58 ||| 1972 খ্রীস্টাব্দে " আরপানেটে first ইলেক্ট্রনিক mail to to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-21.353 tm_pt_6=-9.458 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.368 tm_pt_10=-20.715 tm_pt_11=0.000 tm_pt_12=-3.391 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.710 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -382.837
+58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক mail to used . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-17.350 tm_pt_6=-10.180 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.369 tm_pt_10=-15.685 tm_pt_11=0.000 tm_pt_12=-4.158 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-38.643 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.214
+58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক to mail to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.736 tm_pt_10=-17.873 tm_pt_11=0.000 tm_pt_12=-4.403 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-37.936 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.391
+58 ||| খ্রীস্টাব্দে 1972 then আরপানেটে first ইলেক্ট্রনিক mail to to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-0.387 tm_pt_10=-18.080 tm_pt_11=0.000 tm_pt_12=-3.512 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-38.430 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.453
+58 ||| 1972 খ্রীস্টাব্দে then আরপানেটে first ইলেক্ট্রনিক mail to arrested . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-16.830 tm_pt_6=-12.490 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.736 tm_pt_10=-13.000 tm_pt_11=0.000 tm_pt_12=-3.465 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-39.625 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.660
+58 ||| 1972 খ্রীস্টাব্দে first ইলেক্ট্রনিক then আরপানেটে mail to to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.001 tm_pt_10=-14.022 tm_pt_11=0.000 tm_pt_12=-2.899 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-38.430 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.688
+58 ||| 1972 খ্রীস্টাব্দে then first ইলেক্ট্রনিক আরপানেটে mail to to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-18.629 tm_pt_6=-8.765 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-1.001 tm_pt_10=-14.022 tm_pt_11=0.000 tm_pt_12=-2.899 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-38.430 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=-300.000 ||| -383.688
+59 ||| জীব science that শাখায় fungi and the practical subject to he discussed ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-22.287 tm_pt_6=-21.471 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.274 tm_pt_10=-18.370 tm_pt_11=0.000 tm_pt_12=-6.550 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-56.259 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.136
+59 ||| জীব science that শাখায় fungi and its practical subject to he discussed ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-19.437 tm_pt_6=-22.441 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-1.139 tm_pt_10=-16.283 tm_pt_11=0.000 tm_pt_12=-7.569 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-56.884 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.595
+59 ||| জীব science that শাখায় fungi and the practical subject to discussed he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-22.287 tm_pt_6=-21.471 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.273 tm_pt_10=-18.945 tm_pt_11=0.000 tm_pt_12=-6.327 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-57.503 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.781
+59 ||| জীব science that শাখায় fungi and the practical subject about to he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-23.925 tm_pt_6=-21.956 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.273 tm_pt_10=-23.221 tm_pt_11=0.000 tm_pt_12=-7.244 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-56.839 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -525.908
+59 ||| জীব science that শাখায় fungus and the practical subject to he discussed ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-22.893 tm_pt_6=-22.244 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.409 tm_pt_10=-18.370 tm_pt_11=0.000 tm_pt_12=-7.850 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-56.252 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.079
+59 ||| জীব science that শাখায় fungi and the practical subject to discussed was ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-23.316 tm_pt_6=-21.692 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.641 tm_pt_10=-20.873 tm_pt_11=0.000 tm_pt_12=-8.032 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-56.516 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.237
+59 ||| জীব science that শাখায় fungi and its practical subject to discussed he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-19.437 tm_pt_6=-22.441 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-1.138 tm_pt_10=-16.857 tm_pt_11=0.000 tm_pt_12=-7.346 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-58.129 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.239
+59 ||| জীব science that শাখায় fungi and the practical subject discussed to he ছত্রাকবিদ্যা -lrb- mycology -rrb- ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-22.287 tm_pt_6=-21.471 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-1.273 tm_pt_10=-20.554 tm_pt_11=0.000 tm_pt_12=-7.244 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-58.156 lm_1=-55.439 WordPenalty=-7.817 OOVPenalty=-400.000 ||| -526.365
+60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a system . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-44.880 tm_pt_6=-17.637 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.059 tm_pt_10=-36.336 tm_pt_11=0.000 tm_pt_12=-8.261 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-62.255 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.512
+60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a method . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-42.470 tm_pt_6=-17.470 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.053 tm_pt_10=-33.326 tm_pt_11=0.000 tm_pt_12=-7.973 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-63.624 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.800
+60 ||| water river from উঠানো was some purs দিয়ে- দড়ি and বালটির through water used by animal world a system . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-40.233 tm_pt_6=-19.023 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.059 tm_pt_10=-28.982 tm_pt_11=0.000 tm_pt_12=-8.261 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-64.447 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.902
+60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a process . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-43.528 tm_pt_6=-17.732 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.071 tm_pt_10=-35.218 tm_pt_11=0.000 tm_pt_12=-8.443 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-62.988 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -552.977
+60 ||| water river from উঠানো was some purs দিয়ে- দড়ি and বালটির through water used by animal world a method . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-37.824 tm_pt_6=-18.856 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.053 tm_pt_10=-25.972 tm_pt_11=0.000 tm_pt_12=-7.973 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-65.816 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -553.189
+60 ||| water river from উঠানো was some purs দিয়ে- দড়ি and বালটির through water used by animal world a process . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-38.882 tm_pt_6=-19.118 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.071 tm_pt_10=-27.863 tm_pt_11=0.000 tm_pt_12=-8.443 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-65.180 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -553.367
+60 ||| water river from উঠানো was some a দিয়ে- দড়ি and বালটির through water used by animal world a methods . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-41.830 tm_pt_6=-17.637 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-4.071 tm_pt_10=-33.354 tm_pt_11=0.000 tm_pt_12=-8.443 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-64.045 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -553.444
+61 ||| include tribal dance lokuj dance classical dance etc . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-11.611 tm_pt_6=-21.187 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.000 tm_pt_10=-5.327 tm_pt_11=0.000 tm_pt_12=-3.691 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-31.031 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -74.273
+61 ||| among them are tribal dance lokuj dance classical dance etc . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-15.406 tm_pt_6=-13.319 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-3.535 tm_pt_11=0.000 tm_pt_12=-2.998 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-31.005 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -75.253
+61 ||| among these are tribal dance lokuj dance classical dance etc . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-16.348 tm_pt_6=-13.351 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-3.535 tm_pt_11=0.000 tm_pt_12=-2.998 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-31.423 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -75.962
+61 ||| among these there tribal dance lokuj dance classical dance etc . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-15.061 tm_pt_6=-13.172 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.001 tm_pt_10=-6.315 tm_pt_11=0.000 tm_pt_12=-4.875 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-31.235 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -76.695
+62 ||| the oldest literature first মৌখিকভাবে and later written form is . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-21.290 tm_pt_6=-13.446 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.143 tm_pt_10=-20.845 tm_pt_11=0.000 tm_pt_12=-6.143 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-29.702 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.397
+62 ||| the oldest literature first মৌখিকভাবে and later written in the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-25.849 tm_pt_6=-13.114 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.161 tm_pt_10=-26.490 tm_pt_11=0.000 tm_pt_12=-6.731 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.896 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.608
+62 ||| the oldest literature first মৌখিকভাবে and later written form the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-22.915 tm_pt_6=-13.720 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.161 tm_pt_10=-22.739 tm_pt_11=0.000 tm_pt_12=-6.731 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-29.290 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.007
+62 ||| the oldest literature first মৌখিকভাবে and later is written form . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-21.290 tm_pt_6=-13.446 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.007 tm_pt_10=-19.269 tm_pt_11=0.000 tm_pt_12=-5.296 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-28.837 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.293
+62 ||| the oldest literature first মৌখিকভাবে and then written in the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-28.254 tm_pt_6=-14.856 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.167 tm_pt_10=-26.282 tm_pt_11=0.000 tm_pt_12=-7.137 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.774 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.608
+62 ||| the oldest literary first মৌখিকভাবে and later written in the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-25.997 tm_pt_6=-14.392 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.162 tm_pt_10=-26.383 tm_pt_11=0.000 tm_pt_12=-8.480 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-27.906 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.678
+62 ||| the oldest literature first মৌখিকভাবে and later written form used . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-20.086 tm_pt_6=-14.362 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.149 tm_pt_10=-19.482 tm_pt_11=0.000 tm_pt_12=-6.549 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-30.913 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.774
+62 ||| the oldest literature first মৌখিকভাবে and later written in is . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-24.223 tm_pt_6=-12.840 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.143 tm_pt_10=-24.596 tm_pt_11=0.000 tm_pt_12=-6.143 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-29.777 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.817
+62 ||| the oldest literature first মৌখিকভাবে and later written in used . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-23.020 tm_pt_6=-13.756 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.149 tm_pt_10=-23.233 tm_pt_11=0.000 tm_pt_12=-6.549 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-29.975 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.940
+62 ||| the oldest literature first মৌখিকভাবে and then written form the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-25.320 tm_pt_6=-15.462 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-0.167 tm_pt_10=-22.532 tm_pt_11=0.000 tm_pt_12=-7.137 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-29.168 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -182.006
+63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা considered a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-54.205 tm_pt_6=-41.751 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-47.718 tm_pt_11=0.000 tm_pt_12=-11.703 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-74.493 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.460
+63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা considered a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-60.033 tm_pt_6=-41.464 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-53.905 tm_pt_11=0.000 tm_pt_12=-11.703 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-72.451 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.554
+63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা a considered . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-54.205 tm_pt_6=-41.751 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-47.536 tm_pt_11=0.000 tm_pt_12=-11.703 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=19.000 lm_0=-75.470 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.657
+63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা considered a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-56.876 tm_pt_6=-42.444 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-48.730 tm_pt_11=0.000 tm_pt_12=-11.233 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-73.629 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.680
+63 ||| in 1989 the গণশত্রু film his conducted তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা a considered . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-60.033 tm_pt_6=-41.464 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-53.722 tm_pt_11=0.000 tm_pt_12=-11.703 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=19.000 lm_0=-73.428 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.751
+63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা considered a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-62.704 tm_pt_6=-42.157 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-54.916 tm_pt_11=0.000 tm_pt_12=-11.233 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-71.586 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.774
+63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা a considered . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-56.876 tm_pt_6=-42.444 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-48.547 tm_pt_11=0.000 tm_pt_12=-11.233 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-74.606 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.877
+63 ||| in 1989 the গণশত্রু film his , তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema building পুনর্প্রচেষ্টা considered a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-60.164 tm_pt_6=-41.346 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-55.850 tm_pt_11=0.000 tm_pt_12=-11.703 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-72.352 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.921
+63 ||| in 1989 the গণশত্রু film his between তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা a considered . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-62.704 tm_pt_6=-42.157 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-6.609 tm_pt_10=-54.734 tm_pt_11=0.000 tm_pt_12=-11.233 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-72.564 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -787.971
+63 ||| in 1989 the গণশত্রু film his , তুলনামূলকভাবে দুর্বল and this দীর্ঘদিনের অসুস্থতাশেষে back , after cinema , পুনর্প্রচেষ্টা considered a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-65.992 tm_pt_6=-41.058 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-7.241 tm_pt_10=-62.037 tm_pt_11=0.000 tm_pt_12=-11.703 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=18.000 lm_0=-70.310 lm_1=-75.006 WordPenalty=-10.423 OOVPenalty=-600.000 ||| -788.015
+64 ||| the বলবিদ্যা ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-8.263 tm_pt_6=-3.559 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=-7.494 tm_pt_11=0.000 tm_pt_12=-1.253 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-8.364 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -120.385
+64 ||| mathematical বলবিদ্যা ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.823 tm_pt_6=-2.461 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-4.466 tm_pt_11=0.000 tm_pt_12=-1.946 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-9.917 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -120.677
+64 ||| • বলবিদ্যা ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=0.000 tm_pt_6=-4.812 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.253 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-11.469 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -121.130
+64 ||| . বলবিদ্যা ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=-6.855 tm_pt_6=-2.561 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.018 tm_pt_10=-3.219 tm_pt_11=0.000 tm_pt_12=-1.649 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-10.010 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=-100.000 ||| -121.521
+64 ||| বলবিদ্যা mathematical theory . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=-13.157 tm_pt_6=-2.797 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-3.258 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-12.880 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=-100.000 ||| -131.102
+64 ||| mathematical theory . বলবিদ্যা ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-13.157 tm_pt_6=-2.797 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.946 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-15.855 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=-100.000 ||| -133.354
+65 ||| other স্বত্ত্ব-সংরক্ষিত linux operating system like windows and mac os to acquire different . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-19.554 tm_pt_6=-11.681 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.503 tm_pt_10=-4.595 tm_pt_11=0.000 tm_pt_12=-2.985 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-34.003 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -190.224
+65 ||| other স্বত্ত্ব-সংরক্ষিত operating system like windows and mac os to linux acquire different . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-19.804 tm_pt_6=-11.885 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.135 tm_pt_10=-5.647 tm_pt_11=0.000 tm_pt_12=-3.453 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-36.650 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -190.860
+65 ||| other স্বত্ত্ব-সংরক্ষিত operating systems like windows and mac os to linux acquire different . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-19.646 tm_pt_6=-12.587 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.135 tm_pt_10=-5.555 tm_pt_11=0.000 tm_pt_12=-4.870 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-36.234 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -191.033
+65 ||| other স্বত্ত্ব-সংরক্ষিত linux operating system like windows and mac os to acquire a . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-22.787 tm_pt_6=-12.731 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.503 tm_pt_10=-11.959 tm_pt_11=0.000 tm_pt_12=-6.033 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-31.792 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -191.644
+65 ||| other স্বত্ত্ব-সংরক্ষিত linux operating system like windows and mac os to acquire separate . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-17.756 tm_pt_6=-12.480 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.553 tm_pt_10=-5.857 tm_pt_11=0.000 tm_pt_12=-4.647 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-34.986 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-100.000 ||| -191.696
+66 ||| asia টাইমসের ভাষ্য to , ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-7.205 tm_pt_6=-3.750 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.368 tm_pt_10=-7.348 tm_pt_11=0.000 tm_pt_12=-2.553 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.278 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -241.426
+66 ||| asia টাইমসের ভাষ্য according to , ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-9.191 tm_pt_6=-3.783 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-2.160 tm_pt_11=0.000 tm_pt_12=-0.413 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-21.778 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -243.372
+66 ||| the টাইমসের ভাষ্য to , ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-15.228 tm_pt_6=-6.778 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.503 tm_pt_10=-13.835 tm_pt_11=0.000 tm_pt_12=-5.498 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-17.775 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -243.736
+66 ||| asia টাইমসের according to ভাষ্য , ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-9.191 tm_pt_6=-3.783 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.002 tm_pt_10=-2.232 tm_pt_11=0.000 tm_pt_12=-0.468 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-21.781 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -244.380
+66 ||| টাইমসের asia ভাষ্য to , ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-7.205 tm_pt_6=-3.750 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.368 tm_pt_10=-9.649 tm_pt_11=0.000 tm_pt_12=-4.609 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-20.816 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-200.000 ||| -244.381
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language open way বিতরণ to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-47.236 tm_pt_6=-22.736 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.508 tm_pt_10=-24.834 tm_pt_11=0.000 tm_pt_12=-7.172 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-69.951 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -570.805
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language open way বিতরণ to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-42.789 tm_pt_6=-21.988 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.876 tm_pt_10=-23.736 tm_pt_11=0.000 tm_pt_12=-6.479 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-71.727 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -570.848
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language open way বিতরণ is . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-47.949 tm_pt_6=-23.336 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.508 tm_pt_10=-24.678 tm_pt_11=0.000 tm_pt_12=-7.134 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-70.022 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.180
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language বিতরণ to open way . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-47.236 tm_pt_6=-22.736 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-2.875 tm_pt_10=-23.924 tm_pt_11=0.000 tm_pt_12=-6.527 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=19.000 lm_0=-68.179 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.187
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language open way বিতরণ is . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-43.503 tm_pt_6=-22.588 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.876 tm_pt_10=-23.580 tm_pt_11=0.000 tm_pt_12=-6.441 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-71.798 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.223
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language বিতরণ to open way . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-18.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-42.789 tm_pt_6=-21.988 tm_pt_7=-18.000 tm_pt_8=-48.924 tm_pt_9=-2.243 tm_pt_10=-22.825 tm_pt_11=0.000 tm_pt_12=-5.834 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=19.000 lm_0=-69.955 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.230
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language free way বিতরণ to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-47.210 tm_pt_6=-22.925 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.511 tm_pt_10=-25.203 tm_pt_11=0.000 tm_pt_12=-8.361 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-69.884 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.300
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language free way বিতরণ to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-42.764 tm_pt_6=-22.177 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.879 tm_pt_10=-24.104 tm_pt_11=0.000 tm_pt_12=-7.668 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-71.660 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.343
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or the সাংকেতিক language open in বিতরণ to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-51.736 tm_pt_6=-23.141 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-2.557 tm_pt_10=-29.937 tm_pt_11=0.000 tm_pt_12=-7.865 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-68.374 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.462
+67 ||| open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language open in বিতরণ to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-19.000 tm_pt_3=0.000 tm_pt_4=-19.000 tm_pt_5=-47.289 tm_pt_6=-22.394 tm_pt_7=-19.000 tm_pt_8=-51.642 tm_pt_9=-1.925 tm_pt_10=-28.839 tm_pt_11=0.000 tm_pt_12=-7.172 tm_pt_13=0.000 tm_pt_14=-22.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-70.151 lm_1=-88.050 WordPenalty=-12.160 OOVPenalty=-400.000 ||| -571.505
+68 ||| bangladesh অনলাইনে dhaka ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=0.000 tm_pt_10=-0.375 tm_pt_11=0.000 tm_pt_12=-0.055 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.999 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -123.523
+68 ||| bangladesh অনলাইনে the ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-7.966 tm_pt_6=-2.711 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-6.172 tm_pt_11=0.000 tm_pt_12=-3.195 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.123 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.024
+68 ||| bangladesh dhaka অনলাইনে ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.007 tm_pt_10=-4.393 tm_pt_11=0.000 tm_pt_12=-3.350 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-13.457 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.267
+68 ||| অনলাইনে bangladesh dhaka ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-3.099 tm_pt_11=0.000 tm_pt_12=-3.378 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-14.038 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -127.655
+68 ||| অনলাইনে dhaka bangladesh ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=-1.665 tm_pt_6=-0.471 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.000 tm_pt_10=-3.217 tm_pt_11=0.000 tm_pt_12=-3.428 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-14.148 lm_1=-13.045 WordPenalty=-2.171 OOVPenalty=-100.000 ||| -128.804
+69 ||| first world war germany হেরে be . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-10.464 tm_pt_6=-4.004 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.037 tm_pt_10=-5.752 tm_pt_11=0.000 tm_pt_12=-3.057 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-20.261 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -146.864
+69 ||| first world war germany হেরে easily . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-11.761 tm_pt_6=-7.860 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.386 tm_pt_10=-2.970 tm_pt_11=0.000 tm_pt_12=-1.870 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.090 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -147.174
+69 ||| first world war germany হেরে can . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-10.407 tm_pt_6=-4.209 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-6.604 tm_pt_11=0.000 tm_pt_12=-3.974 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-19.817 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -147.206
+69 ||| first world war germany হেরে be ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-11.045 tm_pt_6=-11.540 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.018 tm_pt_10=-7.352 tm_pt_11=0.000 tm_pt_12=-2.563 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.105 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-100.000 ||| -148.112
+69 ||| germany first world war হেরে be ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-11.045 tm_pt_6=-11.540 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.050 tm_pt_10=-6.128 tm_pt_11=0.000 tm_pt_12=-2.563 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-19.984 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-100.000 ||| -149.635
+69 ||| first world war german হেরে be ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-12.991 tm_pt_6=-13.951 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-1.037 tm_pt_10=-9.945 tm_pt_11=0.000 tm_pt_12=-5.173 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-19.107 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-100.000 ||| -149.683
+69 ||| the first world war germany হেরে be ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-15.740 tm_pt_6=-11.783 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.135 tm_pt_10=-6.129 tm_pt_11=0.000 tm_pt_12=-2.191 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-19.326 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -149.688
+70 ||| but this is to for even research to going on . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-41.595 tm_pt_6=-11.055 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-30.198 tm_pt_11=0.000 tm_pt_12=-7.719 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-25.403 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -82.457
+70 ||| but this is to for even research progress going on . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-37.087 tm_pt_6=-11.507 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-25.502 tm_pt_11=0.000 tm_pt_12=-7.719 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-27.145 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -82.685
+70 ||| but this is to even for research to going on . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-41.595 tm_pt_6=-11.055 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-30.710 tm_pt_11=0.000 tm_pt_12=-8.104 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-24.693 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -82.811
+70 ||| but this is to even for research progress going on . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-37.087 tm_pt_6=-11.507 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-3.503 tm_pt_10=-26.014 tm_pt_11=0.000 tm_pt_12=-8.104 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-26.435 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -83.040
+70 ||| but this is to for even research to going on ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-41.411 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.189 tm_pt_11=0.000 tm_pt_12=-7.023 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-26.717 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -84.283
+70 ||| but this is to for even research progress going on ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-36.903 tm_pt_6=-19.630 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-25.493 tm_pt_11=0.000 tm_pt_12=-7.023 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-28.458 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -84.511
+70 ||| but this is to even for research to going on ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-41.411 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.701 tm_pt_11=0.000 tm_pt_12=-7.407 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-26.007 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -84.637
+70 ||| but this is to for presently research to going on ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-40.742 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.255 tm_pt_11=0.000 tm_pt_12=-7.023 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-27.162 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -84.722
+70 ||| but this is to presently for research to going on ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-40.742 tm_pt_6=-19.178 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-30.766 tm_pt_11=0.000 tm_pt_12=-7.407 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-26.172 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -84.730
+70 ||| but this is to even for research progress going on ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-36.903 tm_pt_6=-19.630 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.503 tm_pt_10=-26.005 tm_pt_11=0.000 tm_pt_12=-7.407 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-27.748 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -84.866
+71 ||| সুপারএইচ ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-7.355 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
+72 ||| he army for আনফিট was declared . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-12.054 tm_pt_6=-13.977 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-13.455 tm_pt_11=0.000 tm_pt_12=-3.627 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.183 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -155.071
+72 ||| he army for আনফিট declared was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.054 tm_pt_6=-13.977 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-12.735 tm_pt_11=0.000 tm_pt_12=-3.541 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.450 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -155.456
+72 ||| he army for আনফিট declared the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.810 tm_pt_6=-14.785 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-14.635 tm_pt_11=0.000 tm_pt_12=-4.030 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-20.541 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -155.801
+72 ||| he army to আনফিট was declared . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-13.940 tm_pt_6=-14.814 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-15.536 tm_pt_11=0.000 tm_pt_12=-4.934 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-19.972 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.459
+72 ||| he army for আনফিট declared in . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.187 tm_pt_6=-15.095 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-14.207 tm_pt_11=0.000 tm_pt_12=-4.672 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.040 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.523
+72 ||| he army আনফিট for was declared . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-12.054 tm_pt_6=-13.977 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-13.967 tm_pt_11=0.000 tm_pt_12=-4.011 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-20.562 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.773
+72 ||| he army to আনফিট declared was . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-13.940 tm_pt_6=-14.814 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-0.368 tm_pt_10=-14.815 tm_pt_11=0.000 tm_pt_12=-4.848 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.239 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-100.000 ||| -156.845
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib form government for for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-51.550 tm_pt_6=-32.711 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.387 tm_pt_10=-32.377 tm_pt_11=0.000 tm_pt_12=-6.725 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-74.299 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.226
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan was form government for for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-54.468 tm_pt_6=-32.962 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-7.369 tm_pt_10=-35.733 tm_pt_11=0.000 tm_pt_12=-8.335 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-72.148 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.453
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib form government for to জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-53.005 tm_pt_6=-33.116 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.387 tm_pt_10=-33.151 tm_pt_11=0.000 tm_pt_12=-6.725 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-74.200 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.721
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with declared in the yahya khan mujib form government for for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-50.735 tm_pt_6=-29.491 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-5.455 tm_pt_10=-39.390 tm_pt_11=0.000 tm_pt_12=-9.219 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-74.046 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.784
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 by mujib announced that the yahya khan form government for for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-52.464 tm_pt_6=-33.588 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.376 tm_pt_10=-33.449 tm_pt_11=0.000 tm_pt_12=-9.053 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-71.145 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.784
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with mujib announced that the yahya khan form government for for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-51.550 tm_pt_6=-32.711 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.376 tm_pt_10=-33.112 tm_pt_11=0.000 tm_pt_12=-7.985 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-72.023 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.930
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib government for the for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-16.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-55.212 tm_pt_6=-31.900 tm_pt_7=-16.000 tm_pt_8=-43.488 tm_pt_9=-5.522 tm_pt_10=-40.450 tm_pt_11=0.000 tm_pt_12=-7.600 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-72.232 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.941
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan was form government for to জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-55.923 tm_pt_6=-33.367 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-7.369 tm_pt_10=-36.507 tm_pt_11=0.000 tm_pt_12=-8.335 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-72.049 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.949
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 announced that with the yahya khan mujib form government for for জানালে he no government has নেবেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-15.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-51.550 tm_pt_6=-32.711 tm_pt_7=-15.000 tm_pt_8=-40.770 tm_pt_9=-6.405 tm_pt_10=-33.276 tm_pt_11=0.000 tm_pt_12=-7.734 tm_pt_13=0.000 tm_pt_14=-19.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-73.638 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-400.000 ||| -581.978
+73 ||| bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the yahya khan mujib form government for for জানালে he that government by নেবেন not . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-17.000 tm_pt_3=0.000 tm_pt_4=-17.000 tm_pt_5=-53.183 tm_pt_6=-24.907 tm_pt_7=-17.000 tm_pt_8=-46.206 tm_pt_9=-3.805 tm_pt_10=-34.741 tm_pt_11=0.000 tm_pt_12=-8.898 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=21.000 lm_0=-77.742 lm_1=-81.528 WordPenalty=-11.292 OOVPenalty=-400.000 ||| -581.984
+74 ||| and computer words money গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-8.448 tm_pt_6=-5.506 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.002 tm_pt_10=-5.943 tm_pt_11=0.000 tm_pt_12=-2.092 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-23.685 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -250.238
+74 ||| the computer words money গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-11.337 tm_pt_6=-7.378 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.003 tm_pt_10=-7.382 tm_pt_11=0.000 tm_pt_12=-4.299 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-22.025 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -250.531
+74 ||| and computer word meaning গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-9.368 tm_pt_6=-6.074 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.375 tm_pt_10=-8.628 tm_pt_11=0.000 tm_pt_12=-4.325 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-22.256 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -250.632
+74 ||| and computer words means গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-9.592 tm_pt_6=-5.858 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-7.376 tm_pt_11=0.000 tm_pt_12=-3.073 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-23.552 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.136
+74 ||| and computer words meaning গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-8.565 tm_pt_6=-5.969 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-6.970 tm_pt_11=0.000 tm_pt_12=-3.073 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-23.908 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.313
+74 ||| and computer words the গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.774 tm_pt_6=-6.030 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-12.066 tm_pt_11=0.000 tm_pt_12=-3.073 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-21.894 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.351
+74 ||| the computer words means গণনাকারী machine . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.480 tm_pt_6=-7.730 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.009 tm_pt_10=-8.815 tm_pt_11=0.000 tm_pt_12=-5.280 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-21.892 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -251.429
+75 ||| on 4th july ১৭৭৬ this constituents a independence notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-11.577 tm_pt_11=0.000 tm_pt_12=-4.190 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-36.395 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.518
+75 ||| ১৭৭৬ on 4th july this constituents a independence notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-12.645 tm_pt_11=0.000 tm_pt_12=-4.101 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-37.838 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.584
+75 ||| on 4th july this ১৭৭৬ constituents a independence notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-11.577 tm_pt_11=0.000 tm_pt_12=-3.784 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-36.395 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.339
+75 ||| on 4th july ১৭৭৬ this constituents independence a notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.418 tm_pt_10=-11.815 tm_pt_11=0.000 tm_pt_12=-3.838 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-36.395 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.460
+75 ||| ১৭৭৬ on 4th july this constituents independence a notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.418 tm_pt_10=-12.882 tm_pt_11=0.000 tm_pt_12=-3.748 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-37.838 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.525
+75 ||| on 4th july ১৭৭৬ the constituents a independence notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-15.758 tm_pt_6=-16.129 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-14.374 tm_pt_11=0.000 tm_pt_12=-6.108 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-35.456 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.840
+75 ||| on 4th july ১৭৭৬ this constituents a of notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-17.112 tm_pt_6=-14.586 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-16.407 tm_pt_11=0.000 tm_pt_12=-6.387 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-34.517 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.855
+75 ||| ১৭৭৬ on 4th july this constituents a of notice জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-17.112 tm_pt_6=-14.586 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-17.474 tm_pt_11=0.000 tm_pt_12=-6.298 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-35.960 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -287.921
+75 ||| on 4th july ১৭৭৬ this constituents a notice independence জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-12.508 tm_pt_11=0.000 tm_pt_12=-4.308 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-36.395 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -288.585
+75 ||| ১৭৭৬ on 4th july this constituents a notice independence জারি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-12.423 tm_pt_6=-14.837 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.368 tm_pt_10=-13.575 tm_pt_11=0.000 tm_pt_12=-4.218 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-37.838 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -288.651
+76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-40.037 tm_pt_6=-27.087 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.419 tm_pt_10=-14.481 tm_pt_11=0.000 tm_pt_12=-2.350 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-54.519 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -534.960
+76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-36.787 tm_pt_6=-27.934 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.437 tm_pt_10=-11.919 tm_pt_11=0.000 tm_pt_12=-3.138 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-55.093 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -534.967
+76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- middle is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-34.903 tm_pt_6=-27.087 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.419 tm_pt_10=-9.385 tm_pt_11=0.000 tm_pt_12=-1.975 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-56.628 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -535.133
+76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- central is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-35.140 tm_pt_6=-27.200 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.419 tm_pt_10=-10.729 tm_pt_11=0.000 tm_pt_12=-2.039 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=12.000 lm_0=-56.309 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -535.194
+76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-40.034 tm_pt_6=-26.757 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.064 tm_pt_10=-24.499 tm_pt_11=0.000 tm_pt_12=-4.432 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-54.703 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.344
+76 ||| germany -lrb- in german : deutschland ডয়চ্ the লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.784 tm_pt_6=-27.604 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.082 tm_pt_10=-21.937 tm_pt_11=0.000 tm_pt_12=-5.221 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-55.276 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.350
+76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-40.037 tm_pt_6=-27.087 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.193 tm_pt_10=-23.900 tm_pt_11=0.000 tm_pt_12=-5.126 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-54.519 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.426
+76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ the pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.787 tm_pt_6=-27.934 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.211 tm_pt_10=-21.338 tm_pt_11=0.000 tm_pt_12=-5.914 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-55.093 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.432
+76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- the is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-40.040 tm_pt_6=-27.417 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.321 tm_pt_10=-23.301 tm_pt_11=0.000 tm_pt_12=-5.819 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-54.336 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.508
+76 ||| germany -lrb- in german : deutschland ডয়চ্ of লান্ট্ of pronounced [ dɔʏtʃlant ] -rrb- through is a country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-36.790 tm_pt_6=-28.265 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-2.340 tm_pt_10=-20.739 tm_pt_11=0.000 tm_pt_12=-6.607 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-54.909 lm_1=-68.484 WordPenalty=-9.554 OOVPenalty=-400.000 ||| -537.515
+77 ||| খ্রিস্টধর্ম the main religion . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-8.682 tm_pt_6=-3.222 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.368 tm_pt_10=-8.196 tm_pt_11=0.000 tm_pt_12=-2.807 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-13.390 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -134.806
+77 ||| খ্রিস্টধর্ম russia main religion . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-2.865 tm_pt_6=-3.257 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.370 tm_pt_10=-1.801 tm_pt_11=0.000 tm_pt_12=-0.861 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-17.547 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -135.684
+77 ||| খ্রিস্টধর্ম russia the religion . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-6.650 tm_pt_6=-3.344 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.002 tm_pt_10=-6.572 tm_pt_11=0.000 tm_pt_12=-2.367 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-16.281 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -136.531
+77 ||| খ্রিস্টধর্ম russiar main religion . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-2.245 tm_pt_6=-5.203 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.736 tm_pt_10=-0.009 tm_pt_11=0.000 tm_pt_12=-2.114 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-18.360 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.514
+78 ||| but গলদের education রোমানীকরণের গতি was slow down . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-9.388 tm_pt_6=-3.301 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.386 tm_pt_10=-4.108 tm_pt_11=0.000 tm_pt_12=-2.060 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-30.819 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -361.851
+78 ||| but গলদের the রোমানীকরণের গতি was slow down . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-14.896 tm_pt_6=-3.724 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-0.503 tm_pt_10=-8.340 tm_pt_11=0.000 tm_pt_12=-2.570 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-29.343 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -362.598
+78 ||| but গলদের are রোমানীকরণের গতি was slow down . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-13.874 tm_pt_6=-4.877 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.368 tm_pt_10=-7.258 tm_pt_11=0.000 tm_pt_12=-3.669 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-29.623 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -363.960
+78 ||| but গলদের education রোমানীকরণের গতি was too slow . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-10.731 tm_pt_6=-3.524 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-1.018 tm_pt_10=-4.108 tm_pt_11=0.000 tm_pt_12=-2.753 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-31.760 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-300.000 ||| -364.112
+79 ||| subject : gnu foundation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-2.244 tm_pt_6=-1.768 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.656 tm_pt_11=0.000 tm_pt_12=-1.687 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-11.822 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -26.233
+79 ||| category : gnu foundation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-2.012 tm_pt_6=-2.238 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.642 tm_pt_11=0.000 tm_pt_12=-2.047 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.001 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -26.695
+79 ||| subject-class : gnu foundation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-1.966 tm_pt_6=-3.787 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.669 tm_pt_11=0.000 tm_pt_12=-3.010 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.283 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -27.913
+79 ||| subject : gonu foundation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-2.126 tm_pt_6=-3.308 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.050 tm_pt_10=-0.451 tm_pt_11=0.000 tm_pt_12=-3.392 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.324 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -27.949
+79 ||| topics : gnu foundation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-2.124 tm_pt_6=-4.100 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.000 tm_pt_10=-0.642 tm_pt_11=0.000 tm_pt_12=-3.220 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.514 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -28.403
+79 ||| category : gonu foundation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-1.894 tm_pt_6=-3.779 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.050 tm_pt_10=-0.437 tm_pt_11=0.000 tm_pt_12=-3.751 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-12.503 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=0.000 ||| -28.411
+80 ||| economic policy and revenue নীতিকেও it study . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-11.391 tm_pt_6=-10.283 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.386 tm_pt_10=-11.196 tm_pt_11=0.000 tm_pt_12=-2.816 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-25.344 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -161.720
+80 ||| economic policy and tax নীতিকেও it study . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-11.710 tm_pt_6=-10.283 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.386 tm_pt_10=-12.177 tm_pt_11=0.000 tm_pt_12=-2.816 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-25.178 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -161.830
+80 ||| economic policy and revenue নীতিকেও studying it . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-11.529 tm_pt_6=-11.605 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-10.097 tm_pt_11=0.000 tm_pt_12=-3.039 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.499 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.715
+80 ||| economic policy and for নীতিকেও it study . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-16.601 tm_pt_6=-10.976 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.018 tm_pt_10=-17.015 tm_pt_11=0.000 tm_pt_12=-3.509 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-23.359 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.770
+80 ||| economic policy and tax নীতিকেও studying it . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-11.848 tm_pt_6=-11.605 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.368 tm_pt_10=-11.078 tm_pt_11=0.000 tm_pt_12=-3.039 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.333 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.825
+80 ||| economic policy and revenue নীতিকেও this study . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-12.985 tm_pt_6=-11.230 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.386 tm_pt_10=-13.129 tm_pt_11=0.000 tm_pt_12=-3.449 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-25.189 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -162.882
+80 ||| economic policy tax and নীতিকেও it study . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-11.710 tm_pt_6=-10.283 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.018 tm_pt_10=-12.870 tm_pt_11=0.000 tm_pt_12=-3.828 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-24.592 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -163.125
+80 ||| economic policy and নীতিকেও for it study . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-16.601 tm_pt_6=-10.976 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.018 tm_pt_10=-16.048 tm_pt_11=0.000 tm_pt_12=-3.627 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-23.187 lm_1=-29.350 WordPenalty=-4.343 OOVPenalty=-100.000 ||| -163.312
+81 ||| among them are : may be তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-26.350 tm_pt_6=-16.516 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.736 tm_pt_10=-6.623 tm_pt_11=0.000 tm_pt_12=-6.240 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.164 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -397.697
+81 ||| among these are : may be তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-27.293 tm_pt_6=-16.548 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.000 tm_pt_10=-7.999 tm_pt_11=0.000 tm_pt_12=-4.493 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.398 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -397.798
+81 ||| among them are : may be তোমার get is ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-30.305 tm_pt_6=-17.069 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.738 tm_pt_10=-10.642 tm_pt_11=0.000 tm_pt_12=-6.779 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-39.595 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -397.937
+81 ||| among these are : may be তোমার get is ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-31.248 tm_pt_6=-17.101 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.002 tm_pt_10=-12.017 tm_pt_11=0.000 tm_pt_12=-5.032 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-39.829 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.038
+81 ||| among them are : would have তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-24.692 tm_pt_6=-15.956 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.368 tm_pt_10=-6.623 tm_pt_11=0.000 tm_pt_12=-6.527 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-42.167 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.086
+81 ||| among these are : would have তোমার get seen ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-25.635 tm_pt_6=-15.989 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.000 tm_pt_10=-6.613 tm_pt_11=0.000 tm_pt_12=-4.493 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-42.401 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.172
+81 ||| among them are : may be তোমার numbers is ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-30.645 tm_pt_6=-17.069 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.738 tm_pt_10=-13.408 tm_pt_11=0.000 tm_pt_12=-6.779 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-39.159 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.178
+81 ||| among them are : would have তোমার get is ওরে this any স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-28.647 tm_pt_6=-16.509 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.370 tm_pt_10=-10.642 tm_pt_11=0.000 tm_pt_12=-7.066 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.599 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.326
+81 ||| among them are : may be তোমার get seen ওরে this no স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-26.658 tm_pt_6=-17.123 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.736 tm_pt_10=-6.677 tm_pt_11=0.000 tm_pt_12=-7.285 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.147 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.326
+81 ||| among these are : may be তোমার get seen ওরে this no স্নেহ-সুরধুনী . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-27.600 tm_pt_6=-17.155 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-3.000 tm_pt_10=-8.052 tm_pt_11=0.000 tm_pt_12=-5.538 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.381 lm_1=-48.917 WordPenalty=-6.949 OOVPenalty=-300.000 ||| -398.428
+82 ||| on 23rd april 1992 satyajit died . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-7.544 tm_pt_6=-12.167 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.369 tm_pt_10=-3.969 tm_pt_11=0.000 tm_pt_12=-3.180 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-14.394 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -45.335
+82 ||| on 23rd april 1992 satyajit expired . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-6.291 tm_pt_6=-13.553 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.001 tm_pt_10=-1.517 tm_pt_11=0.000 tm_pt_12=-3.358 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-14.659 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -45.653
+82 ||| satyajit died on 23rd april 1992 . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-7.544 tm_pt_6=-12.167 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-2.369 tm_pt_10=-4.522 tm_pt_11=0.000 tm_pt_12=-4.487 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-14.171 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=0.000 ||| -48.260
+83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-16.030 tm_pt_6=-16.653 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-13.963 tm_pt_11=0.000 tm_pt_12=-5.389 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.256 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.197
+83 ||| this time nazrul medical রিপোর্ট sent to stay famous চিকিৎসকদের . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-16.030 tm_pt_6=-16.653 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.420 tm_pt_10=-12.132 tm_pt_11=0.000 tm_pt_12=-6.333 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-34.004 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.263
+83 ||| this time nazrul medical রিপোর্ট stay famous sent to চিকিৎসকদের . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-16.030 tm_pt_6=-16.653 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.420 tm_pt_10=-11.801 tm_pt_11=0.000 tm_pt_12=-6.236 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-35.756 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.345
+83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to send to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-19.143 tm_pt_6=-11.248 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-11.660 tm_pt_11=0.000 tm_pt_12=-5.389 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.275 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.473
+83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to sent . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-17.020 tm_pt_6=-17.752 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-13.452 tm_pt_11=0.000 tm_pt_12=-5.389 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.147 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.493
+83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to send . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-15.462 tm_pt_6=-13.355 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.152 tm_pt_10=-19.885 tm_pt_11=0.000 tm_pt_12=-5.763 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-35.787 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.554
+83 ||| this time nazrul medical রিপোর্ট stay famous চিকিৎসকদের to sent to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-19.065 tm_pt_6=-10.907 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.188 tm_pt_10=-12.354 tm_pt_11=0.000 tm_pt_12=-4.290 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-38.221 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.594
+83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to send to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-20.132 tm_pt_6=-12.347 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.052 tm_pt_10=-11.150 tm_pt_11=0.000 tm_pt_12=-5.389 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-37.166 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.770
+83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to send . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-16.451 tm_pt_6=-14.454 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.152 tm_pt_10=-19.374 tm_pt_11=0.000 tm_pt_12=-5.763 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-35.678 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-200.000 ||| -286.851
+83 ||| this time nazrul medical রিপোর্ট earning famous চিকিৎসকদের to sent to . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-20.055 tm_pt_6=-12.006 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-1.188 tm_pt_10=-11.843 tm_pt_11=0.000 tm_pt_12=-4.290 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-38.112 lm_1=-42.395 WordPenalty=-6.080 OOVPenalty=-200.000 ||| -286.890
+84 ||| acted in different times rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-22.380 tm_pt_6=-14.549 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.424 tm_pt_10=-13.562 tm_pt_11=0.000 tm_pt_12=-6.591 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.958 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -396.253
+84 ||| acting in different times rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-23.032 tm_pt_6=-15.414 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.553 tm_pt_10=-14.373 tm_pt_11=0.000 tm_pt_12=-7.284 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-40.924 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.192
+84 ||| acted in different time rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-22.602 tm_pt_6=-13.033 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.424 tm_pt_10=-13.562 tm_pt_11=0.000 tm_pt_12=-6.591 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-42.187 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.297
+84 ||| acted in different times rani মুখার্জী various দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-22.396 tm_pt_6=-15.771 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.424 tm_pt_10=-13.500 tm_pt_11=0.000 tm_pt_12=-7.777 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-41.409 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.635
+84 ||| different times , acted rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-22.591 tm_pt_6=-13.702 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.153 tm_pt_10=-14.288 tm_pt_11=0.000 tm_pt_12=-3.905 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-41.777 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -397.961
+84 ||| acted , different times rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-22.591 tm_pt_6=-13.702 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.792 tm_pt_10=-14.576 tm_pt_11=0.000 tm_pt_12=-5.897 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-43.056 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -398.099
+84 ||| in different times acted rani মুখার্জী different দাতব্য are connected with থেকেছেন . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-22.380 tm_pt_6=-14.549 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-3.786 tm_pt_10=-12.484 tm_pt_11=0.000 tm_pt_12=-4.655 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-41.466 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -398.142
+85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-55.591 tm_pt_6=-26.977 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.106 tm_pt_10=-26.460 tm_pt_11=0.000 tm_pt_12=-3.613 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-62.080 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.327
+85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-13.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-56.621 tm_pt_6=-27.198 tm_pt_7=-13.000 tm_pt_8=-35.334 tm_pt_9=-4.474 tm_pt_10=-28.388 tm_pt_11=0.000 tm_pt_12=-5.318 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=15.000 lm_0=-60.734 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.339
+85 ||| bengali literature and culture his special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-50.176 tm_pt_6=-22.617 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-3.738 tm_pt_10=-26.585 tm_pt_11=0.000 tm_pt_12=-3.925 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-64.166 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.413
+85 ||| bengali literature and culture his special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-51.205 tm_pt_6=-22.838 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-28.514 tm_pt_11=0.000 tm_pt_12=-5.630 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-62.820 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -459.425
+85 ||| bengali literature and culture in special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-54.240 tm_pt_6=-24.867 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-3.738 tm_pt_10=-29.755 tm_pt_11=0.000 tm_pt_12=-6.549 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-61.999 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.029
+85 ||| bengali literature and culture in special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-55.269 tm_pt_6=-25.088 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-31.684 tm_pt_11=0.000 tm_pt_12=-8.253 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-60.653 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.041
+85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-58.799 tm_pt_6=-25.191 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-22.587 tm_pt_11=0.000 tm_pt_12=-3.786 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-62.950 lm_1=-81.528 WordPenalty=-11.292 OOVPenalty=-300.000 ||| -460.239
+85 ||| bengali literature and culture a special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-14.000 tm_pt_5=-59.828 tm_pt_6=-25.412 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.474 tm_pt_10=-24.516 tm_pt_11=0.000 tm_pt_12=-5.490 tm_pt_13=0.000 tm_pt_14=-21.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=17.000 lm_0=-61.604 lm_1=-81.528 WordPenalty=-11.292 OOVPenalty=-300.000 ||| -460.251
+85 ||| bengali literature and culture its special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university he honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-51.065 tm_pt_6=-24.579 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-3.738 tm_pt_10=-27.055 tm_pt_11=0.000 tm_pt_12=-5.930 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-63.766 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.590
+85 ||| bengali literature and culture its special contribution on স্বীকৃতিস্বরুপ year on 9th december , dhaka university was honorary ডি.লিট উপাধিতে awarded with . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-14.000 tm_pt_3=0.000 tm_pt_4=-13.000 tm_pt_5=-52.094 tm_pt_6=-24.800 tm_pt_7=-14.000 tm_pt_8=-38.052 tm_pt_9=-4.106 tm_pt_10=-28.983 tm_pt_11=0.000 tm_pt_12=-7.634 tm_pt_13=0.000 tm_pt_14=-20.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=16.000 lm_0=-62.420 lm_1=-78.267 WordPenalty=-10.857 OOVPenalty=-300.000 ||| -460.602
+86 ||| kolkata durga puja city of tourism আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-38.348 tm_pt_6=-18.126 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.135 tm_pt_10=-11.306 tm_pt_11=0.000 tm_pt_12=-5.878 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-28.824 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.380
+86 ||| kolkata durga puja of the tourism আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-41.831 tm_pt_6=-17.807 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.271 tm_pt_10=-15.896 tm_pt_11=0.000 tm_pt_12=-6.947 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-26.385 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.564
+86 ||| kolkata durga puja tourism of the আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-41.831 tm_pt_6=-17.807 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.185 tm_pt_10=-11.722 tm_pt_11=0.000 tm_pt_12=-6.829 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.752 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.823
+86 ||| the durga puja city of tourism আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-42.056 tm_pt_6=-18.216 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.135 tm_pt_10=-13.404 tm_pt_11=0.000 tm_pt_12=-5.473 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-28.392 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -180.986
+86 ||| the durga puja of the tourism আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-45.538 tm_pt_6=-17.897 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.271 tm_pt_10=-17.994 tm_pt_11=0.000 tm_pt_12=-6.542 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-25.953 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.169
+86 ||| the durga puja tourism of the আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-45.538 tm_pt_6=-17.897 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.185 tm_pt_10=-13.820 tm_pt_11=0.000 tm_pt_12=-6.424 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-26.319 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.429
+86 ||| tourism of kolkata durga puja the আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-41.831 tm_pt_6=-17.807 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.068 tm_pt_10=-10.909 tm_pt_11=0.000 tm_pt_12=-6.286 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-26.087 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.430
+86 ||| tourism of kolkata durga puja city আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-38.348 tm_pt_6=-18.126 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.068 tm_pt_10=-8.610 tm_pt_11=0.000 tm_pt_12=-5.593 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-27.334 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.574
+86 ||| kolkata durga puja tourism of city আকর্ষণও is also a reason ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-38.348 tm_pt_6=-18.126 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-2.185 tm_pt_10=-8.229 tm_pt_11=0.000 tm_pt_12=-6.205 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-28.790 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=-100.000 ||| -181.662
+86 ||| kolkata durga puja city of tourism আকর্ষণও বটে . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-11.192 tm_pt_6=-6.496 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-1.135 tm_pt_10=-11.315 tm_pt_11=0.000 tm_pt_12=-5.882 tm_pt_13=0.000 tm_pt_14=-7.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-28.144 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=-200.000 ||| -264.676
+87 ||| but many of east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-73.802 tm_pt_6=-53.757 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-3.007 tm_pt_10=-7.175 tm_pt_11=0.000 tm_pt_12=-1.869 tm_pt_13=0.000 tm_pt_14=-30.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-49.154 lm_1=-100.563 WordPenalty=-13.897 OOVPenalty=0.000 ||| -162.668
+87 ||| but many of east germany started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-77.075 tm_pt_6=-56.565 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-4.007 tm_pt_10=-9.884 tm_pt_11=0.000 tm_pt_12=-2.562 tm_pt_13=0.000 tm_pt_14=-30.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-47.637 lm_1=-100.563 WordPenalty=-13.897 OOVPenalty=0.000 ||| -164.335
+87 ||| but when lakhs east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-71.335 tm_pt_6=-57.389 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-3.368 tm_pt_10=-0.747 tm_pt_11=0.000 tm_pt_12=-1.269 tm_pt_13=0.000 tm_pt_14=-30.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-51.889 lm_1=-100.563 WordPenalty=-13.897 OOVPenalty=0.000 ||| -165.048
+87 ||| but when of east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-5.000 tm_pt_3=0.000 tm_pt_4=-5.000 tm_pt_5=-76.892 tm_pt_6=-57.471 tm_pt_7=-5.000 tm_pt_8=-13.590 tm_pt_9=-3.375 tm_pt_10=-5.103 tm_pt_11=0.000 tm_pt_12=-1.674 tm_pt_13=0.000 tm_pt_14=-30.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-50.113 lm_1=-100.563 WordPenalty=-13.897 OOVPenalty=0.000 ||| -165.219
+87 ||| but many lakhs east german started living in economically reach and democratic west germany , east germany government built a wall in berlin in 1961 and reinforced its boundaries . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-6.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-68.245 tm_pt_6=-53.675 tm_pt_7=-6.000 tm_pt_8=-16.308 tm_pt_9=-3.000 tm_pt_10=-2.820 tm_pt_11=0.000 tm_pt_12=-1.463 tm_pt_13=0.000 tm_pt_14=-30.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-53.671 lm_1=-100.563 WordPenalty=-13.897 OOVPenalty=0.000 ||| -165.888
+88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-17.000 tm_pt_5=-63.476 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.106 tm_pt_10=-37.944 tm_pt_11=0.000 tm_pt_12=-10.606 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=25.000 lm_0=-102.048 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.611
+88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-17.000 tm_pt_5=-59.695 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.156 tm_pt_10=-33.741 tm_pt_11=0.000 tm_pt_12=-9.220 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=25.000 lm_0=-104.615 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.760
+88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown them and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-18.000 tm_pt_5=-64.617 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.738 tm_pt_10=-39.119 tm_pt_11=0.000 tm_pt_12=-11.992 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=26.000 lm_0=-101.785 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.853
+88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown sound and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-18.000 tm_pt_5=-62.205 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.738 tm_pt_10=-37.672 tm_pt_11=0.000 tm_pt_12=-11.992 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=26.000 lm_0=-102.550 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1035.963
+88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown them and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-18.000 tm_pt_5=-60.836 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.788 tm_pt_10=-34.916 tm_pt_11=0.000 tm_pt_12=-10.606 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=26.000 lm_0=-104.352 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.002
+88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown sound and second one is " the দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-18.000 tm_pt_5=-58.424 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.788 tm_pt_10=-33.469 tm_pt_11=0.000 tm_pt_12=-10.606 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=26.000 lm_0=-105.117 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.112
+88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and the second one is " দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-21.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-63.476 tm_pt_6=-36.182 tm_pt_7=-21.000 tm_pt_8=-57.078 tm_pt_9=-3.745 tm_pt_10=-39.194 tm_pt_11=0.000 tm_pt_12=-15.506 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-98.761 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.426
+88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and the second one is " দোলনার scene " where চারু অমলের for his love মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-21.000 tm_pt_3=0.000 tm_pt_4=-16.000 tm_pt_5=-59.695 tm_pt_6=-36.346 tm_pt_7=-21.000 tm_pt_8=-57.078 tm_pt_9=-2.795 tm_pt_10=-34.991 tm_pt_11=0.000 tm_pt_12=-14.120 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-101.329 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1036.575
+88 ||| first one is the নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের love for his মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-63.476 tm_pt_6=-36.182 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-4.075 tm_pt_10=-37.509 tm_pt_11=0.000 tm_pt_12=-11.627 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-101.929 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1037.615
+88 ||| first one is film নির্বাক first seven মিনিট which চারুর একঘেয়েমি life shown level and second one is " the দোলনার scene " where চারু অমলের love for his মুখোমুখি . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-20.000 tm_pt_3=0.000 tm_pt_4=-15.000 tm_pt_5=-59.695 tm_pt_6=-36.346 tm_pt_7=-20.000 tm_pt_8=-54.360 tm_pt_9=-3.124 tm_pt_10=-33.307 tm_pt_11=0.000 tm_pt_12=-10.240 tm_pt_13=0.000 tm_pt_14=-23.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=23.000 lm_0=-104.496 lm_1=-104.356 WordPenalty=-14.332 OOVPenalty=-800.000 ||| -1037.764
+89 ||| 18th century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development it . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-33.743 tm_pt_6=-15.231 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-6.068 tm_pt_10=-14.342 tm_pt_11=0.000 tm_pt_12=-4.255 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-55.872 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -330.408
+89 ||| 18th century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development people . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-33.399 tm_pt_6=-15.924 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-6.068 tm_pt_10=-14.342 tm_pt_11=0.000 tm_pt_12=-4.255 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-55.886 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -330.597
+89 ||| 18 century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development it . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-32.293 tm_pt_6=-13.806 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-5.118 tm_pt_10=-16.169 tm_pt_11=0.000 tm_pt_12=-4.726 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-57.011 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -331.105
+89 ||| 18 century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development people . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-12.000 tm_pt_3=0.000 tm_pt_4=-12.000 tm_pt_5=-31.949 tm_pt_6=-14.499 tm_pt_7=-12.000 tm_pt_8=-32.616 tm_pt_9=-5.118 tm_pt_10=-16.169 tm_pt_11=0.000 tm_pt_12=-4.726 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=14.000 lm_0=-57.026 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -331.295
+89 ||| 18th century a group চিন্তাবিদ and writer income and production চক্রাকার current through economic thoughts development emperor . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-11.000 tm_pt_3=0.000 tm_pt_4=-11.000 tm_pt_5=-30.810 tm_pt_6=-15.924 tm_pt_7=-11.000 tm_pt_8=-29.898 tm_pt_9=-6.068 tm_pt_10=-12.732 tm_pt_11=0.000 tm_pt_12=-4.255 tm_pt_13=0.000 tm_pt_14=-16.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=13.000 lm_0=-57.425 lm_1=-61.961 WordPenalty=-8.686 OOVPenalty=-200.000 ||| -331.588
+90 ||| the arousal activities before penetrating male organ into vagina is called foreplay . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-40.937 tm_pt_6=-21.594 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-1.007 tm_pt_10=-0.415 tm_pt_11=0.000 tm_pt_12=-0.920 tm_pt_13=0.000 tm_pt_14=-13.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-22.210 lm_1=-41.130 WordPenalty=-6.514 OOVPenalty=0.000 ||| -67.075
+90 ||| the arousal activities before penetrating male organ into vagina called foreplay . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-37.122 tm_pt_6=-21.535 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-2.000 tm_pt_10=-0.009 tm_pt_11=0.000 tm_pt_12=-0.003 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-24.209 lm_1=-37.869 WordPenalty=-6.080 OOVPenalty=0.000 ||| -67.081
+90 ||| the arousal activities before penetrating male organ into vagina is called stimulation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-41.890 tm_pt_6=-31.076 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.693 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-22.691 lm_1=-37.338 WordPenalty=-6.080 OOVPenalty=0.000 ||| -67.760
+90 ||| vagina arousal activities before penetrating male organ into vagina is called stimulation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-36.650 tm_pt_6=-30.932 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-1.135 tm_pt_10=-2.197 tm_pt_11=0.000 tm_pt_12=-1.099 tm_pt_13=0.000 tm_pt_14=-12.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-25.494 lm_1=-35.909 WordPenalty=-6.080 OOVPenalty=0.000 ||| -69.424
+90 ||| the arousal activities before penetrating male organ into vagina called stimulation ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-38.075 tm_pt_6=-31.017 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-2.000 tm_pt_10=-2.623 tm_pt_11=0.000 tm_pt_12=-3.476 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-24.690 lm_1=-34.076 WordPenalty=-5.646 OOVPenalty=0.000 ||| -70.247
+91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by british . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-43.503 tm_pt_6=-16.453 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.063 tm_pt_10=-29.648 tm_pt_11=0.000 tm_pt_12=-8.509 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-36.899 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -400.621
+91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by british . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-38.304 tm_pt_6=-16.616 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.928 tm_pt_10=-22.777 tm_pt_11=0.000 tm_pt_12=-4.755 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-39.881 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -400.780
+91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by cultures . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-40.802 tm_pt_6=-16.453 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.063 tm_pt_10=-28.731 tm_pt_11=0.000 tm_pt_12=-8.509 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-38.736 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.139
+91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে would by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-41.446 tm_pt_6=-14.844 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-1.431 tm_pt_10=-28.964 tm_pt_11=0.000 tm_pt_12=-7.819 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-38.842 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.297
+91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by cultures . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-35.603 tm_pt_6=-16.616 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.928 tm_pt_10=-21.861 tm_pt_11=0.000 tm_pt_12=-4.755 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-41.718 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.298
+91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে would by . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-36.247 tm_pt_6=-15.006 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.295 tm_pt_10=-22.093 tm_pt_11=0.000 tm_pt_12=-4.065 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-41.824 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.455
+91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected cultures . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-37.296 tm_pt_6=-17.347 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.063 tm_pt_10=-23.915 tm_pt_11=0.000 tm_pt_12=-8.509 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-40.503 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.712
+91 ||| it is শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by would . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-41.446 tm_pt_6=-14.844 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.063 tm_pt_10=-29.839 tm_pt_11=0.000 tm_pt_12=-9.765 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=11.000 lm_0=-38.868 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.798
+91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected cultures . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-7.000 tm_pt_3=0.000 tm_pt_4=-6.000 tm_pt_5=-32.097 tm_pt_6=-17.510 tm_pt_7=-7.000 tm_pt_8=-19.026 tm_pt_9=-2.928 tm_pt_10=-17.044 tm_pt_11=0.000 tm_pt_12=-4.755 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-43.485 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.871
+91 ||| it basically শূকরের in in was which can শূকরকে ইনফ্লুয়েঞ্জাতে by would . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-8.000 tm_pt_3=0.000 tm_pt_4=-7.000 tm_pt_5=-36.247 tm_pt_6=-15.006 tm_pt_7=-8.000 tm_pt_8=-21.744 tm_pt_9=-2.928 tm_pt_10=-22.968 tm_pt_11=0.000 tm_pt_12=-6.011 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-41.850 lm_1=-45.656 WordPenalty=-6.514 OOVPenalty=-300.000 ||| -402.957
+92 ||| these একএ the mycelium structure . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-11.032 tm_pt_6=-8.359 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-15.357 tm_pt_11=0.000 tm_pt_12=-2.852 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-19.717 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -248.730
+92 ||| these একএ to mycelium structure . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-10.353 tm_pt_6=-9.145 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-14.748 tm_pt_11=0.000 tm_pt_12=-3.432 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-19.811 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.033
+92 ||| these একএ the mycelium formed . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-10.989 tm_pt_6=-7.740 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-15.807 tm_pt_11=0.000 tm_pt_12=-3.494 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.043 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.258
+92 ||| these একএ to mycelium formed . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-10.311 tm_pt_6=-8.526 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.000 tm_pt_10=-15.198 tm_pt_11=0.000 tm_pt_12=-4.074 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.137 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.561
+92 ||| these একএ the mycelium formed the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-14.196 tm_pt_6=-5.953 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-8.281 tm_pt_11=0.000 tm_pt_12=-2.016 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.826 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -249.619
+92 ||| these একএ by mycelium structure . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-10.365 tm_pt_6=-10.244 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.019 tm_pt_10=-14.289 tm_pt_11=0.000 tm_pt_12=-4.461 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-20.001 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -249.906
+92 ||| these একএ to mycelium formed the . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-13.518 tm_pt_6=-6.740 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-7.673 tm_pt_11=0.000 tm_pt_12=-2.595 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=6.000 lm_0=-21.920 lm_1=-26.089 WordPenalty=-3.909 OOVPenalty=-200.000 ||| -249.921
+92 ||| these একএ in mycelium structure . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-11.559 tm_pt_6=-9.819 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.019 tm_pt_10=-15.897 tm_pt_11=0.000 tm_pt_12=-4.461 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=-1.000 tm_pt_16=0.000 tm_glue_0=5.000 lm_0=-19.703 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=-200.000 ||| -250.038
+93 ||| russia now a democratic country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-6.332 tm_pt_6=-4.466 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-1.906 tm_pt_11=0.000 tm_pt_12=-2.722 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-13.823 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=0.000 ||| -35.538
+93 ||| russia at a democratic country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-7.468 tm_pt_6=-3.817 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.185 tm_pt_10=-4.040 tm_pt_11=0.000 tm_pt_12=-5.120 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-12.833 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=0.000 ||| -35.822
+93 ||| russia presently a democratic country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-5.165 tm_pt_6=-4.075 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.050 tm_pt_10=-1.100 tm_pt_11=0.000 tm_pt_12=-2.327 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-14.848 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=0.000 ||| -36.101
+93 ||| russia is a democratic country . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-4.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-10.602 tm_pt_6=-5.298 tm_pt_7=-4.000 tm_pt_8=-10.872 tm_pt_9=-0.185 tm_pt_10=-6.630 tm_pt_11=0.000 tm_pt_12=-5.120 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-11.653 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=0.000 ||| -36.140
+94 ||| penis ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.771 tm_pt_6=-1.305 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.018 tm_pt_10=-1.609 tm_pt_11=0.000 tm_pt_12=-1.099 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.586 lm_1=-5.991 WordPenalty=-1.303 OOVPenalty=0.000 ||| -10.979
+94 ||| sex ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-3.989 tm_pt_6=-3.384 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=-2.251 tm_pt_11=0.000 tm_pt_12=-2.015 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-4.702 lm_1=-5.991 WordPenalty=-1.303 OOVPenalty=0.000 ||| -11.798
+94 ||| gender ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-0.811 tm_pt_6=-1.305 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.007 tm_pt_10=-0.405 tm_pt_11=0.000 tm_pt_12=-0.916 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-6.387 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=0.000 ||| -12.058
+94 ||| the ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-9.110 tm_pt_6=-2.979 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-8.187 tm_pt_11=0.000 tm_pt_12=-2.708 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-3.208 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=0.000 ||| -13.752
+94 ||| sexual organs ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-9.102 tm_pt_6=-4.078 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-1.386 tm_pt_11=0.000 tm_pt_12=-2.708 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.946 lm_1=-9.252 WordPenalty=-1.737 OOVPenalty=0.000 ||| -17.194
+94 ||| লিঙ্গ ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_pt_3=0.000 tm_pt_4=0.000 tm_pt_5=0.000 tm_pt_6=0.000 tm_pt_7=0.000 tm_pt_8=0.000 tm_pt_9=0.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=0.000 tm_pt_13=0.000 tm_pt_14=0.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-7.355 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=-100.000 ||| -111.358
+95 ||| this state is called orgasm . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-12.738 tm_pt_6=-21.324 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.693 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.945 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=0.000 ||| -31.255
+95 ||| this state called orgasm . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-8.924 tm_pt_6=-21.265 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.504 tm_pt_10=-2.441 tm_pt_11=0.000 tm_pt_12=-2.783 tm_pt_13=0.000 tm_pt_14=-5.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-10.709 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=0.000 ||| -36.164
+95 ||| this situation is called orgasm . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-11.823 tm_pt_6=-21.729 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.386 tm_pt_13=0.000 tm_pt_14=-6.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-9.800 lm_1=-22.828 WordPenalty=-3.474 OOVPenalty=0.000 ||| -36.754
+96 ||| different period here was established royal more empire . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-21.275 tm_pt_6=-12.515 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.754 tm_pt_10=-16.893 tm_pt_11=0.000 tm_pt_12=-4.327 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-26.591 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -77.701
+96 ||| different period was established this royal more empire . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-25.005 tm_pt_6=-13.679 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.386 tm_pt_10=-18.640 tm_pt_11=0.000 tm_pt_12=-5.020 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-24.820 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -77.837
+96 ||| different period here established the royal more empire . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-24.031 tm_pt_6=-13.324 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-2.503 tm_pt_10=-19.552 tm_pt_11=0.000 tm_pt_12=-3.996 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-26.232 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -78.390
+96 ||| different period was established royal this more empire . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-9.000 tm_pt_3=0.000 tm_pt_4=-4.000 tm_pt_5=-25.005 tm_pt_6=-13.679 tm_pt_7=-9.000 tm_pt_8=-24.462 tm_pt_9=-3.386 tm_pt_10=-18.640 tm_pt_11=0.000 tm_pt_12=-5.020 tm_pt_13=0.000 tm_pt_14=-9.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-25.484 lm_1=-32.611 WordPenalty=-4.777 OOVPenalty=0.000 ||| -78.658
+96 ||| history different period here was established royal more empire . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-23.037 tm_pt_6=-9.337 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.387 tm_pt_10=-20.795 tm_pt_11=0.000 tm_pt_12=-5.694 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-30.742 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -79.860
+96 ||| history different period this was established royal more empire . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-26.767 tm_pt_6=-10.501 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-2.019 tm_pt_10=-24.010 tm_pt_11=0.000 tm_pt_12=-6.675 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=9.000 lm_0=-29.314 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -79.939
+97 ||| micro economics ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-2.764 tm_pt_6=-1.552 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.050 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-0.560 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.021 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -12.731
+97 ||| microeconomics ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-1.386 tm_pt_6=-5.894 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-1.000 tm_pt_10=-1.099 tm_pt_11=0.000 tm_pt_12=-1.946 tm_pt_13=0.000 tm_pt_14=-1.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.336 lm_1=-6.522 WordPenalty=-1.303 OOVPenalty=0.000 ||| -13.795
+97 ||| macro economics ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-1.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-2.997 tm_pt_6=-2.245 tm_pt_7=-1.000 tm_pt_8=-2.718 tm_pt_9=-0.368 tm_pt_10=0.000 tm_pt_11=0.000 tm_pt_12=-1.253 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-5.691 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -14.348
+97 ||| macro economy ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-3.219 tm_pt_6=-2.957 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.368 tm_pt_10=-1.009 tm_pt_11=0.000 tm_pt_12=-2.277 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-6.566 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -16.531
+97 ||| micro economy ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-2.985 tm_pt_6=-2.264 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.050 tm_pt_10=-1.874 tm_pt_11=0.000 tm_pt_12=-1.583 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-8.065 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -17.823
+97 ||| economics micro ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-2.764 tm_pt_6=-1.552 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.051 tm_pt_10=-4.442 tm_pt_11=0.000 tm_pt_12=-4.472 tm_pt_13=0.000 tm_pt_14=-2.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=1.000 lm_0=-9.491 lm_1=-9.783 WordPenalty=-1.737 OOVPenalty=0.000 ||| -21.955
+98 ||| user to operating system the visible of the computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-30.494 tm_pt_6=-10.074 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-25.134 tm_pt_11=0.000 tm_pt_12=-9.193 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-27.398 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -79.254
+98 ||| user to operating system the visible form is computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-24.401 tm_pt_6=-9.477 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-19.267 tm_pt_11=0.000 tm_pt_12=-8.160 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-30.618 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -79.981
+98 ||| user to operating system the visible form are computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-23.695 tm_pt_6=-9.596 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-17.971 tm_pt_11=0.000 tm_pt_12=-7.737 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-31.122 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -80.025
+98 ||| user to operating system the visible of are computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-27.275 tm_pt_6=-9.031 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-21.498 tm_pt_11=0.000 tm_pt_12=-7.737 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-30.220 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -80.315
+98 ||| user to operating system the visible of the computers interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-30.657 tm_pt_6=-11.632 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-24.916 tm_pt_11=0.000 tm_pt_12=-10.715 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-27.592 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -80.540
+98 ||| the user to operating visible of are computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-8.000 tm_pt_5=-25.284 tm_pt_6=-9.508 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-1.603 tm_pt_10=-20.953 tm_pt_11=0.000 tm_pt_12=-10.715 tm_pt_13=0.000 tm_pt_14=-10.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=7.000 lm_0=-28.611 lm_1=-35.872 WordPenalty=-5.212 OOVPenalty=0.000 ||| -80.707
+98 ||| user to operating system most visible of are computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-10.000 tm_pt_5=-23.367 tm_pt_6=-8.876 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.553 tm_pt_10=-17.510 tm_pt_11=0.000 tm_pt_12=-7.099 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=10.000 lm_0=-32.231 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -80.742
+98 ||| operating system to users most visible of are computer interface . ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-10.000 tm_pt_3=0.000 tm_pt_4=-9.000 tm_pt_5=-23.295 tm_pt_6=-9.312 tm_pt_7=-10.000 tm_pt_8=-27.180 tm_pt_9=-0.878 tm_pt_10=-14.536 tm_pt_11=0.000 tm_pt_12=-7.541 tm_pt_13=0.000 tm_pt_14=-11.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=8.000 lm_0=-30.825 lm_1=-39.134 WordPenalty=-5.646 OOVPenalty=0.000 ||| -80.754
+99 ||| বিস্তারিতঃ 1971 temporary government ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-2.717 tm_pt_6=-9.543 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.503 tm_pt_10=-4.447 tm_pt_11=0.000 tm_pt_12=-2.398 tm_pt_13=0.000 tm_pt_14=-3.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-16.410 lm_1=-16.306 WordPenalty=-2.606 OOVPenalty=-100.000 ||| -135.444
+99 ||| বিস্তারিতঃ 1971 temporary bangladesh government ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-3.000 tm_pt_3=0.000 tm_pt_4=-3.000 tm_pt_5=-3.296 tm_pt_6=-5.286 tm_pt_7=-3.000 tm_pt_8=-8.154 tm_pt_9=-0.368 tm_pt_10=-3.934 tm_pt_11=0.000 tm_pt_12=-2.129 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=4.000 lm_0=-17.358 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.226
+99 ||| বিস্তারিতঃ , 1971 temporary government ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-7.272 tm_pt_6=-7.036 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.271 tm_pt_10=-2.590 tm_pt_11=0.000 tm_pt_12=-1.992 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-16.939 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.400
+99 ||| বিস্তারিতঃ temporary government , 1971 ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-7.272 tm_pt_6=-7.036 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.185 tm_pt_10=-1.204 tm_pt_11=0.000 tm_pt_12=-2.351 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-16.484 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.498
+99 ||| বিস্তারিতঃ in 1971 temporary government ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-2.000 tm_pt_5=-6.823 tm_pt_6=-7.645 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.271 tm_pt_10=-2.827 tm_pt_11=0.000 tm_pt_12=-1.992 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=3.000 lm_0=-16.946 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.592
+99 ||| বিস্তারিতঃ temporary government in 1971 ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-2.000 tm_pt_3=0.000 tm_pt_4=-1.000 tm_pt_5=-6.823 tm_pt_6=-7.645 tm_pt_7=-2.000 tm_pt_8=-5.436 tm_pt_9=-0.185 tm_pt_10=-1.674 tm_pt_11=0.000 tm_pt_12=-2.351 tm_pt_13=0.000 tm_pt_14=-4.000 tm_pt_15=0.000 tm_pt_16=0.000 tm_glue_0=2.000 lm_0=-16.404 lm_1=-19.567 WordPenalty=-3.040 OOVPenalty=-100.000 ||| -137.643
diff --git a/test/decoder/num_translation_options/output.gold b/test/decoder/num_translation_options/output.gold
index 705562b..4203822 100644
--- a/test/decoder/num_translation_options/output.gold
+++ b/test/decoder/num_translation_options/output.gold
@@ -1,12 +1,12 @@
--19.196 ||| i like taco bell ||| lm_0=-17.449 tm_pt_0=4.000 tm_glue_0=1.000 WordPenalty=-2.606
--19.733 ||| i love taco bell ||| lm_0=-18.690 tm_pt_0=5.000 tm_glue_0=1.000 WordPenalty=-2.606
--22.883 ||| i appreciate taco bell ||| lm_0=-19.620 tm_pt_0=3.000 tm_glue_0=1.000 WordPenalty=-2.606
--424.954 ||| yo quiero taco bell ||| lm_0=-21.293 tm_pt_0=0.000 tm_glue_0=4.000 WordPenalty=-2.606 OOVPenalty=-400.000
--19.196 ||| i like taco bell ||| lm_0=-17.449 tm_pt_0=4.000 tm_glue_0=1.000 WordPenalty=-2.606
--19.733 ||| i love taco bell ||| lm_0=-18.690 tm_pt_0=5.000 tm_glue_0=1.000 WordPenalty=-2.606
--22.883 ||| i appreciate taco bell ||| lm_0=-19.620 tm_pt_0=3.000 tm_glue_0=1.000 WordPenalty=-2.606
--424.954 ||| yo quiero taco bell ||| lm_0=-21.293 tm_pt_0=0.000 tm_glue_0=4.000 WordPenalty=-2.606 OOVPenalty=-400.000
--19.196 ||| i like taco bell ||| lm_0=-17.449 tm_pt_0=4.000 tm_glue_0=1.000 WordPenalty=-2.606
--19.733 ||| i love taco bell ||| lm_0=-18.690 tm_pt_0=5.000 tm_glue_0=1.000 WordPenalty=-2.606
--22.883 ||| i appreciate taco bell ||| lm_0=-19.620 tm_pt_0=3.000 tm_glue_0=1.000 WordPenalty=-2.606
--424.954 ||| yo quiero taco bell ||| lm_0=-21.293 tm_pt_0=0.000 tm_glue_0=4.000 WordPenalty=-2.606 OOVPenalty=-400.000
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
diff --git a/test/decoder/oov-list/output.gold b/test/decoder/oov-list/output.gold
index 8ad5b56..d911c52 100644
--- a/test/decoder/oov-list/output.gold
+++ b/test/decoder/oov-list/output.gold
@@ -1,3 +1,3 @@
-0 ||| Goats eat cheese ||| lm_0=-16.587 tm_phrase_0=-0.000 tm_glue_0=1.000 WordPenalty=-2.171 ||| -11.503
-1 ||| i will go home ||| lm_0=-12.155 tm_phrase_0=-0.000 tm_glue_0=1.000 WordPenalty=-2.606 ||| -4.414
-2 ||| goets_OOV eet_OOV cheez_OOV ||| lm_0=-17.700 tm_phrase_0=-0.000 tm_glue_0=1.000 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
+0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
+1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
+2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
diff --git a/test/decoder/phrase/constrained/output.gold b/test/decoder/phrase/constrained/output.gold
index 96d9582..238387c 100644
--- a/test/decoder/phrase/constrained/output.gold
+++ b/test/decoder/phrase/constrained/output.gold
@@ -1,5 +1,5 @@
-0 ||| President Obama |8-8| to |7-7| hinder |4-4| a strategy |0-1| for |3-3| Republican |2-2| re @-@ election |5-6| ||| Distortion=-24.000 WordPenalty=-4.777 PhrasePenalty=7.000 tm_pt_0=-15.792 tm_pt_1=-17.550 tm_pt_2=-14.599 tm_pt_3=-18.298 lm_0=-29.452 ||| -15.163
-0 ||| President Obama |8-8| to |7-7| hinder |4-4| a |0-0| strategy |1-1| for |3-3| Republican |2-2| re @-@ election |5-6| ||| Distortion=-24.000 WordPenalty=-4.777 PhrasePenalty=8.000 tm_pt_0=-16.919 tm_pt_1=-17.550 tm_pt_2=-14.917 tm_pt_3=-18.298 lm_0=-29.452 ||| -15.505
-0 ||| President Obama |8-8| to hinder |3-4| a strategy |0-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| Distortion=-32.000 WordPenalty=-4.777 PhrasePenalty=6.000 tm_pt_0=-14.986 tm_pt_1=-17.951 tm_pt_2=-14.075 tm_pt_3=-18.699 lm_0=-29.452 ||| -15.762
-0 ||| President Obama |8-8| to hinder |3-4| a |0-0| strategy |1-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| Distortion=-32.000 WordPenalty=-4.777 PhrasePenalty=7.000 tm_pt_0=-16.112 tm_pt_1=-17.951 tm_pt_2=-14.393 tm_pt_3=-18.699 lm_0=-29.452 ||| -16.103
-0 ||| President Obama |8-8| to |3-3| hinder |4-4| a strategy |0-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| Distortion=-32.000 WordPenalty=-4.777 PhrasePenalty=7.000 tm_pt_0=-16.329 tm_pt_1=-17.951 tm_pt_2=-15.136 tm_pt_3=-18.699 lm_0=-29.452 ||| -16.257
+0 ||| President Obama |8-8| to |7-7| hinder |4-4| a strategy |0-1| for |3-3| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-15.792 tm_pt_1=-17.550 tm_pt_2=-14.599 tm_pt_3=-18.298 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-24.000 PhrasePenalty=7.000 ||| -15.163
+0 ||| President Obama |8-8| to |7-7| hinder |4-4| a |0-0| strategy |1-1| for |3-3| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-16.919 tm_pt_1=-17.550 tm_pt_2=-14.917 tm_pt_3=-18.298 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-24.000 PhrasePenalty=8.000 ||| -15.505
+0 ||| President Obama |8-8| to hinder |3-4| a strategy |0-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-14.986 tm_pt_1=-17.951 tm_pt_2=-14.075 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=6.000 ||| -15.762
+0 ||| President Obama |8-8| to hinder |3-4| a |0-0| strategy |1-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-16.112 tm_pt_1=-17.951 tm_pt_2=-14.393 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=7.000 ||| -16.103
+0 ||| President Obama |8-8| to |3-3| hinder |4-4| a strategy |0-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-16.329 tm_pt_1=-17.951 tm_pt_2=-15.136 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=7.000 ||| -16.257
diff --git a/test/decoder/phrase/decode/output.gold b/test/decoder/phrase/decode/output.gold
index f3f7eca..509a3de 100644
--- a/test/decoder/phrase/decode/output.gold
+++ b/test/decoder/phrase/decode/output.gold
@@ -1 +1 @@
-0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| Distortion=0.000 WordPenalty=-3.040 PhrasePenalty=5.000 tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 ||| -7.496
+0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496
diff --git a/test/decoder/regexp-grammar-both-rule-types/output.gold b/test/decoder/regexp-grammar-both-rule-types/output.gold
index 52b1429..c8edb86 100644
--- a/test/decoder/regexp-grammar-both-rule-types/output.gold
+++ b/test/decoder/regexp-grammar-both-rule-types/output.gold
@@ -1,12 +1,12 @@
-0 ||| girl feminine-singular-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-0.000 tm_glue_0=2.000 ||| -4.000
-0 ||| girl feminine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 ||| -5.000
-0 ||| girl generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 ||| -6.000
-1 ||| boys masculine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 ||| -5.000
-1 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 ||| -6.000
-1 ||| boys lindos_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=-0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
-2 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 ||| -6.000
-2 ||| boys lind?s_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=-0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
+0 ||| girl feminine-singular-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -4.000
+0 ||| girl feminine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+0 ||| girl generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
+1 ||| boys masculine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+1 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
+1 ||| boys lindos_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
+2 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
+2 ||| boys lind?s_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
2 ||| chicos_OOV generic-pretty ||| tm_regexp_0=-1.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -105.000
-3 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=-1.000 tm_glue_0=1.000 ||| -3.000
+3 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=-1.000 tm_glue_0=1.000 OOVPenalty=0.000 ||| -3.000
3 ||| 1928371028_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
4 ||| 192837102_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
diff --git a/test/decoder/regexp-grammar/output.gold b/test/decoder/regexp-grammar/output.gold
index afaf4a8..49c5ea4 100644
--- a/test/decoder/regexp-grammar/output.gold
+++ b/test/decoder/regexp-grammar/output.gold
@@ -1,4 +1,4 @@
-0 ||| girl pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 ||| -5.000
-1 ||| boy pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 ||| -5.000
-2 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=-0.000 tm_glue_0=1.000 ||| -2.000
+0 ||| girl pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+1 ||| boy pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+2 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=0.000 ||| -2.000
3 ||| 192837102_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
diff --git a/test/decoder/rescoring/output.gold b/test/decoder/rescoring/output.gold
index 4d6c512..5d6600d 100644
--- a/test/decoder/rescoring/output.gold
+++ b/test/decoder/rescoring/output.gold
@@ -1,12 +1,12 @@
-0 ||| the boy ate the cockroach ||| lm_0=-17.198 tm_pt_0=-6.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -19.240
-0 ||| the boy eated the cockroach ||| lm_0=-17.198 tm_pt_0=-11.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -24.240
-0 ||| the boy eated the cockroach ||| lm_0=-17.198 tm_pt_0=-11.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -24.240
-0 ||| the kid eated the cockroach ||| lm_0=-20.053 tm_pt_0=-15.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -31.773
-0 ||| the boy ate the cockroach ||| lm_0=-17.198 tm_pt_0=-6.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -19.240
-0 ||| the boy ate the bug ||| lm_0=-18.059 tm_pt_0=-15.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -29.306
-1 ||| the boy ate the cockroach ||| lm_0=-17.198 tm_pt_0=-6.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -19.240
-1 ||| the boy eated the cockroach ||| lm_0=-17.198 tm_pt_0=-11.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -24.240
-1 ||| the boy ate the cockroach ||| lm_0=-17.198 tm_pt_0=-6.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -19.240
-1 ||| the boy eated the cockroach ||| lm_0=-17.198 tm_pt_0=-11.000 tm_glue_0=5.000 WordPenalty=-3.040 ||| -24.240
-1 ||| the big storm swarmed the coast ||| lm_0=-21.293 tm_pt_0=-48.000 tm_glue_0=3.000 WordPenalty=-3.474 ||| -67.873
-1 ||| the big storm swarmed the cockroach ||| lm_0=-22.115 tm_pt_0=-31.000 tm_glue_0=4.000 WordPenalty=-3.474 ||| -50.890
+0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
+0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
diff --git a/test/decoder/source-annotations/output.gold b/test/decoder/source-annotations/output.gold
index 2640f35..10125f5 100644
--- a/test/decoder/source-annotations/output.gold
+++ b/test/decoder/source-annotations/output.gold
@@ -1,2 +1,2 @@
-0 ||| my friends call me ||| lm_0=-11.974 tm_pt_0=-3.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -7.650
-0 ||| my friends call me ||| lm_0=-111.513 tm_pt_0=-3.000 tm_glue_0=3.000 WordPenalty=-2.606 ||| -107.189
+0 ||| my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650
+0 ||| my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189
diff --git a/test/decoder/target-bigram/out.gold b/test/decoder/target-bigram/out.gold
index 76ab2cb..8d53a28 100644
--- a/test/decoder/target-bigram/out.gold
+++ b/test/decoder/target-bigram/out.gold
@@ -1,3 +1,3 @@
-0 ||| this is a test ||| TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 tm_glue_0=4.000 ||| 0.000
-0 ||| this is a test ||| TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 tm_glue_0=4.000 ||| 0.000
-0 ||| this is a test ||| TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 tm_glue_0=4.000 ||| 0.000
+0 ||| this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 ||| 0.000
+0 ||| this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 ||| 0.000
+0 ||| this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 ||| 0.000
diff --git a/test/decoder/target-bigram/test.sh b/test/decoder/target-bigram/test.sh
index 07f7acb..6414b61 100755
--- a/test/decoder/target-bigram/test.sh
+++ b/test/decoder/target-bigram/test.sh
@@ -1,8 +1,8 @@
#!/bin/bash
-(echo "this is a test" | decoder -feature-function "TargetBigram -vocab vocab -top-n 2";
-echo "this is a test" | decoder -feature-function "TargetBigram -vocab vocab -top-n 3 -threshold 20";
-echo "this is a test" | decoder -feature-function "TargetBigram -vocab vocab -threshold 10") 2>log > out
+(echo "this is a test" | $JOSHUA/bin/joshua-decoder -feature-function "TargetBigram -vocab vocab -top-n 2";
+echo "this is a test" | $JOSHUA/bin/joshua-decoder -feature-function "TargetBigram -vocab vocab -top-n 3 -threshold 20";
+echo "this is a test" | $JOSHUA/bin/joshua-decoder -feature-function "TargetBigram -vocab vocab -threshold 10") 2>log > out
# Compare
diff -u out out.gold > diff
diff --git a/test/decoder/tree-output/glue-grammar b/test/decoder/tree-output/glue-grammar
index d9e40ae..1ec13e2 100644
--- a/test/decoder/tree-output/glue-grammar
+++ b/test/decoder/tree-output/glue-grammar
@@ -3,3 +3,4 @@
[GOAL] ||| [GOAL,1] [D,2] ||| [GOAL,1] [D,2] ||| -1
[GOAL] ||| [GOAL,1] [S,2] ||| [GOAL,1] [S,2] ||| -1
[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0
+[GOAL] ||| <s> [NP\DT] </s> ||| <s> [NP\DT,1] </s> ||| -1
diff --git a/test/decoder/tree-output/grammar.gz b/test/decoder/tree-output/grammar.gz
index a9510ef..a538500 100644
--- a/test/decoder/tree-output/grammar.gz
+++ b/test/decoder/tree-output/grammar.gz
Binary files differ
diff --git a/test/decoder/tree-output/input b/test/decoder/tree-output/input
index 1442567..a5a18b4 100644
--- a/test/decoder/tree-output/input
+++ b/test/decoder/tree-output/input
@@ -2,3 +2,4 @@
an unparseable sentence
baz
yo soy
+purchase xslot
diff --git a/test/decoder/tree-output/output.gold b/test/decoder/tree-output/output.gold
index d45a148..9474041 100644
--- a/test/decoder/tree-output/output.gold
+++ b/test/decoder/tree-output/output.gold
@@ -2,3 +2,4 @@
1 ()
2 (GOAL (GOAL (GOAL <s>) (D baz)) </s>)
3 (GOAL (GOAL (GOAL <s>) (S I AM)) </s>)
+4 (GOAL <s> (NP\DT right (NN xslot)) </s>)
diff --git a/test/grammar/sparse-features/output.gold b/test/grammar/sparse-features/output.gold
index c2dfc88..7e07c66 100644
--- a/test/grammar/sparse-features/output.gold
+++ b/test/grammar/sparse-features/output.gold
@@ -1 +1 @@
-0 ||| the boy ||| lm_0=0.000 tm_pt_0=1.000 sparse_test_feature=1.000 svd=1.000 the_boy=1.000 tm_glue_0=1.000 ||| 1.000
+0 ||| the boy ||| tm_pt_0=1.000 tm_glue_0=1.000 sparse_test_feature=1.000 svd=1.000 the_boy=1.000 ||| 1.000
diff --git a/test/grammar/sparse-features/test.sh b/test/grammar/sparse-features/test.sh
index 07f46d2..6878dd7 100755
--- a/test/grammar/sparse-features/test.sh
+++ b/test/grammar/sparse-features/test.sh
@@ -2,13 +2,13 @@
set -u
-echo el chico | $JOSHUA/bin/decoder -c joshua.config -v 0 > output
+echo el chico | $JOSHUA/bin/decoder -c joshua.config -v 0 > output 2> log
# Compare
diff -u output output.gold > diff
if [ $? -eq 0 ]; then
- rm -f diff output
+ rm -f diff output log
exit 0
else
exit 1
diff --git a/test/lattice-short/output.expected b/test/lattice-short/output.expected
index fb3baf1..e9d42f3 100644
--- a/test/lattice-short/output.expected
+++ b/test/lattice-short/output.expected
@@ -1,17 +1,17 @@
-0 ||| A ||| lm_0=-3.040 tm_pt_0=-1.000 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=0.000 ||| -2.737
-0 ||| a ||| lm_0=-100.681 tm_pt_0=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=0.000 ||| -199.378
-1 ||| A X ||| lm_0=-101.827 tm_pt_0=-4.000 tm_glue_0=2.000 WordPenalty=-1.737 SourcePath=0.000 ||| -104.090
-1 ||| A x ||| lm_0=-101.827 tm_pt_0=-1.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=0.000 ||| -201.090
-1 ||| a X ||| lm_0=-200.681 tm_pt_0=-3.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=0.000 ||| -301.944
-1 ||| a x ||| lm_0=-200.681 tm_pt_0=0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-200.000 SourcePath=0.000 ||| -398.944
-2 ||| B ||| lm_0=-100.681 tm_pt_0=-2.000 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=0.000 ||| -101.378
-2 ||| b ||| lm_0=-100.681 tm_pt_0=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=0.000 ||| -199.378
-3 ||| B X ||| lm_0=-200.681 tm_pt_0=-5.000 tm_glue_0=2.000 WordPenalty=-1.737 SourcePath=0.000 ||| -203.944
-3 ||| B x ||| lm_0=-200.681 tm_pt_0=-2.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=0.000 ||| -300.944
-3 ||| b X ||| lm_0=-200.681 tm_pt_0=-3.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=0.000 ||| -301.944
-3 ||| b x ||| lm_0=-200.681 tm_pt_0=0.000 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-200.000 SourcePath=0.000 ||| -398.944
-4 ||| A X ||| lm_0=-101.146 tm_pt_0=-4.000 tm_glue_0=2.000 WordPenalty=-1.303 SourcePath=2.000 ||| -101.843
-4 ||| A x ||| lm_0=-101.146 tm_pt_0=-1.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=2.000 ||| -198.843
-4 ||| B X ||| lm_0=-200.000 tm_pt_0=-5.000 tm_glue_0=2.000 WordPenalty=-1.303 SourcePath=2.000 ||| -201.697
-4 ||| B x ||| lm_0=-200.000 tm_pt_0=-2.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=2.000 ||| -298.697
-4 ||| a X ||| lm_0=-200.000 tm_pt_0=-3.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=2.000 ||| -299.697
+0 ||| A ||| tm_pt_0=-1.000 tm_glue_0=1.000 lm_0=-3.040 OOVPenalty=0.000 WordPenalty=-1.303 SourcePath=0.000 ||| -2.737
+0 ||| a ||| tm_pt_0=0.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=-100.000 WordPenalty=-1.303 SourcePath=0.000 ||| -199.378
+1 ||| A X ||| tm_pt_0=-4.000 tm_glue_0=2.000 lm_0=-101.827 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=0.000 ||| -104.090
+1 ||| A x ||| tm_pt_0=-1.000 tm_glue_0=2.000 lm_0=-101.827 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=0.000 ||| -201.090
+1 ||| a X ||| tm_pt_0=-3.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=0.000 ||| -301.944
+1 ||| a x ||| tm_pt_0=0.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-200.000 WordPenalty=-1.737 SourcePath=0.000 ||| -398.944
+2 ||| B ||| tm_pt_0=-2.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=0.000 WordPenalty=-1.303 SourcePath=0.000 ||| -101.378
+2 ||| b ||| tm_pt_0=0.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=-100.000 WordPenalty=-1.303 SourcePath=0.000 ||| -199.378
+3 ||| B X ||| tm_pt_0=-5.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=0.000 ||| -203.944
+3 ||| B x ||| tm_pt_0=-2.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=0.000 ||| -300.944
+3 ||| b X ||| tm_pt_0=-3.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=0.000 ||| -301.944
+3 ||| b x ||| tm_pt_0=0.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-200.000 WordPenalty=-1.737 SourcePath=0.000 ||| -398.944
+4 ||| A X ||| tm_pt_0=-4.000 tm_glue_0=2.000 lm_0=-101.827 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=2.000 ||| -102.090
+4 ||| A x ||| tm_pt_0=-1.000 tm_glue_0=2.000 lm_0=-101.827 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=2.000 ||| -199.090
+4 ||| B X ||| tm_pt_0=-5.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=2.000 ||| -201.944
+4 ||| B x ||| tm_pt_0=-2.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=2.000 ||| -298.944
+4 ||| b X ||| tm_pt_0=-3.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=2.000 ||| -299.944
diff --git a/test/lattice/output.expected b/test/lattice/output.expected
index 9cc6d89..2528f19 100644
--- a/test/lattice/output.expected
+++ b/test/lattice/output.expected
@@ -1,32 +1,33 @@
-0 ||| dieses haus ||| this house ||| lm_0=-1.748 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=-1.000 ||| -1.456
-0 ||| ein haus ||| a house ||| lm_0=-1.809 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=-0.900 ||| -1.531
-0 ||| haus ||| house ||| lm_0=-2.437 tm_pt_0=-0.022 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-0.869 SourcePath=-0.500 ||| -2.101
-0 ||| ein haus ||| a small house ||| lm_0=-1.957 tm_pt_0=-0.778 tm_pt_1=-0.228 tm_pt_2=-1.343 tm_glue_0=1.000 WordPenalty=-1.737 SourcePath=-0.900 ||| -2.684
-0 ||| dieses haus ||| this small house ||| lm_0=-2.844 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=2.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -4.080
-0 ||| haus ||| small house ||| lm_0=-3.555 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=-0.500 ||| -4.724
-0 ||| haus ||| haus_OOV ||| lm_0=-100.000 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 WordPenalty=-0.869 OOVPenalty=-100.000 SourcePath=-0.500 ||| -199.631
-0 ||| ein haus ||| ein_OOV house ||| lm_0=-100.992 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-0.900 ||| -200.600
-0 ||| dieses haus ||| dieses_OOV house ||| lm_0=-100.992 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-1.000 ||| -200.700
-0 ||| dieses haus ||| this haus_OOV ||| lm_0=-101.146 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-1.000 ||| -200.843
-0 ||| ein haus ||| a haus_OOV ||| lm_0=-101.146 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-0.900 ||| -200.857
-0 ||| ein haus ||| ein_OOV small house ||| lm_0=-102.110 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=-0.900 ||| -203.246
-0 ||| dieses haus ||| dieses_OOV small house ||| lm_0=-102.110 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=-1.000 ||| -203.346
-0 ||| ein haus ||| ein_OOV haus_OOV ||| lm_0=-200.000 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-200.000 SourcePath=-0.900 ||| -399.597
-0 ||| dieses haus ||| dieses_OOV haus_OOV ||| lm_0=-200.000 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-200.000 SourcePath=-1.000 ||| -399.697
-1 ||| ein ||| a ||| lm_0=-3.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=0.000 ||| -1.851
-1 ||| ein ||| ein_OOV ||| lm_0=-100.681 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=0.000 ||| -199.378
-2 ||| ein haus ||| a house ||| lm_0=-1.809 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=-0.700 ||| -1.331
-2 ||| dieses haus ||| this house ||| lm_0=-1.748 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=-1.000 ||| -1.456
-2 ||| haus ||| house ||| lm_0=-2.437 tm_pt_0=-0.022 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-0.869 SourcePath=-0.500 ||| -2.101
-2 ||| ein haus ||| a small house ||| lm_0=-1.957 tm_pt_0=-0.778 tm_pt_1=-0.228 tm_pt_2=-1.343 tm_glue_0=1.000 WordPenalty=-1.737 SourcePath=-0.700 ||| -2.484
-2 ||| dieses haus ||| this small house ||| lm_0=-2.844 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=2.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -4.080
-2 ||| haus ||| small house ||| lm_0=-3.555 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=1.000 WordPenalty=-1.303 SourcePath=-0.500 ||| -4.724
-2 ||| haus ||| haus_OOV ||| lm_0=-100.000 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 WordPenalty=-0.869 OOVPenalty=-100.000 SourcePath=-0.500 ||| -199.631
-2 ||| ein haus ||| ein_OOV house ||| lm_0=-100.992 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-0.700 ||| -200.400
-2 ||| ein haus ||| a haus_OOV ||| lm_0=-101.146 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-0.700 ||| -200.657
-2 ||| dieses haus ||| dieses_OOV house ||| lm_0=-100.992 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.021 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-1.000 ||| -200.700
-2 ||| dieses haus ||| this haus_OOV ||| lm_0=-101.146 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=1.000 WordPenalty=-1.303 OOVPenalty=-100.000 SourcePath=-1.000 ||| -200.843
-2 ||| ein haus ||| ein_OOV small house ||| lm_0=-102.110 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=-0.700 ||| -203.046
-2 ||| dieses haus ||| dieses_OOV small house ||| lm_0=-102.110 tm_pt_0=-1.301 tm_pt_1=-0.000 tm_pt_2=-1.343 tm_glue_0=2.000 WordPenalty=-1.737 OOVPenalty=-100.000 SourcePath=-1.000 ||| -203.346
-2 ||| ein haus ||| ein_OOV haus_OOV ||| lm_0=-200.000 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-200.000 SourcePath=-0.700 ||| -399.397
-2 ||| dieses haus ||| dieses_OOV haus_OOV ||| lm_0=-200.000 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 WordPenalty=-1.303 OOVPenalty=-200.000 SourcePath=-1.000 ||| -399.697
+0 ||| dieses haus ||| this house ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-1.866 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -1.140
+0 ||| ein haus ||| a house ||| tm_pt_0=0.000 tm_pt_1=-0.228 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-1.888 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=-0.900 ||| -1.176
+0 ||| haus ||| house ||| tm_pt_0=-0.022 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-2.555 OOVPenalty=0.000 WordPenalty=-1.303 SourcePath=-0.500 ||| -1.785
+0 ||| ein haus ||| a small house ||| tm_pt_0=-0.778 tm_pt_1=-0.228 tm_pt_2=-1.343 tm_glue_0=1.000 lm_0=-2.279 OOVPenalty=0.000 WordPenalty=-2.171 SourcePath=-0.900 ||| -2.572
+0 ||| dieses haus ||| this small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=2.000 lm_0=-3.166 OOVPenalty=0.000 WordPenalty=-2.171 SourcePath=-1.000 ||| -3.967
+0 ||| haus ||| small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=1.000 lm_0=-3.877 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=-0.500 ||| -4.612
+0 ||| haus ||| haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=-100.000 WordPenalty=-1.303 SourcePath=-0.500 ||| -199.878
+0 ||| ein haus ||| ein_OOV house ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-101.110 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-0.900 ||| -200.284
+0 ||| dieses haus ||| dieses_OOV house ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-101.110 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -200.384
+0 ||| dieses haus ||| this haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 lm_0=-101.827 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -201.090
+0 ||| ein haus ||| a haus_OOV ||| tm_pt_0=0.000 tm_pt_1=-0.228 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-101.827 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-0.900 ||| -201.104
+0 ||| ein haus ||| ein_OOV small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=2.000 lm_0=-102.433 OOVPenalty=-100.000 WordPenalty=-2.171 SourcePath=-0.900 ||| -203.134
+0 ||| dieses haus ||| dieses_OOV small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=2.000 lm_0=-102.433 OOVPenalty=-100.000 WordPenalty=-2.171 SourcePath=-1.000 ||| -203.234
+0 ||| ein haus ||| ein_OOV haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-200.000 WordPenalty=-1.737 SourcePath=-0.900 ||| -399.844
+0 ||| dieses haus ||| dieses_OOV haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-200.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -399.944
+1 ||| ein ||| a ||| tm_pt_0=0.000 tm_pt_1=-0.228 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-3.040 OOVPenalty=0.000 WordPenalty=-1.303 SourcePath=0.000 ||| -1.851
+1 ||| ein ||| ein_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=-100.000 WordPenalty=-1.303 SourcePath=0.000 ||| -199.378
+2 ||| ein haus ||| a house ||| tm_pt_0=0.000 tm_pt_1=-0.228 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-1.888 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=-0.700 ||| -0.976
+2 ||| dieses haus ||| this house ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-1.866 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -1.140
+2 ||| haus ||| house ||| tm_pt_0=-0.022 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-2.555 OOVPenalty=0.000 WordPenalty=-1.303 SourcePath=-0.500 ||| -1.785
+2 ||| ein haus ||| a small house ||| tm_pt_0=-0.778 tm_pt_1=-0.228 tm_pt_2=-1.343 tm_glue_0=1.000 lm_0=-2.279 OOVPenalty=0.000 WordPenalty=-2.171 SourcePath=-0.700 ||| -2.372
+2 ||| dieses haus ||| this small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=2.000 lm_0=-3.166 OOVPenalty=0.000 WordPenalty=-2.171 SourcePath=-1.000 ||| -3.967
+2 ||| haus ||| small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=1.000 lm_0=-3.877 OOVPenalty=0.000 WordPenalty=-1.737 SourcePath=-0.500 ||| -4.612
+2 ||| haus ||| haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=-100.000 WordPenalty=-1.303 SourcePath=-0.500 ||| -199.878
+2 ||| ein haus ||| ein_OOV house ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-101.110 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-0.700 ||| -200.084
+2 ||| dieses haus ||| dieses_OOV house ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=-0.021 tm_glue_0=1.000 lm_0=-101.110 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -200.384
+2 ||| ein haus ||| a haus_OOV ||| tm_pt_0=0.000 tm_pt_1=-0.228 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-101.827 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-0.700 ||| -200.904
+2 ||| dieses haus ||| this haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-101.827 OOVPenalty=-100.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -201.090
+2 ||| ein haus ||| ein_OOV small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=2.000 lm_0=-102.433 OOVPenalty=-100.000 WordPenalty=-2.171 SourcePath=-0.700 ||| -202.934
+2 ||| dieses haus ||| dieses_OOV small house ||| tm_pt_0=-1.301 tm_pt_1=0.000 tm_pt_2=-1.343 tm_glue_0=2.000 lm_0=-102.433 OOVPenalty=-100.000 WordPenalty=-2.171 SourcePath=-1.000 ||| -203.234
+2 ||| ein haus ||| ein_OOV haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-200.000 WordPenalty=-1.737 SourcePath=-0.700 ||| -399.644
+2 ||| dieses haus ||| dieses_OOV haus_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 lm_0=-200.681 OOVPenalty=-200.000 WordPenalty=-1.737 SourcePath=-1.000 ||| -399.944
+3 ||| sí ||| sí_OOV ||| tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=1.000 lm_0=-100.681 OOVPenalty=-100.000 WordPenalty=-1.303 SourcePath=0.000 ||| -199.378
diff --git a/test/lattice/test.plf b/test/lattice/test.plf
index 2aa2761..632e952 100644
--- a/test/lattice/test.plf
+++ b/test/lattice/test.plf
@@ -1,3 +1,4 @@
((('ein',-0.2,1),('dieses',-0.3,1),('haus',-0.5,2),),(('haus',-0.7,1),),)
ein haus
((('ein',1.56462193e-07,1),('dieses',-0.3,1),('haus',-0.5,2),),(('haus',-0.7,1),),)
+((('sí', 0, 1),),)
diff --git a/test/lm/berkeley/output.gold b/test/lm/berkeley/output.gold
index fca0674..c0d30a4 100644
--- a/test/lm/berkeley/output.gold
+++ b/test/lm/berkeley/output.gold
@@ -1,4 +1,4 @@
-lm_0=-7.153 tm_glue_0=2.000
-lm_0=-7.153 tm_glue_0=2.000
-lm_0=-7.153 tm_glue_0=2.000
-lm_0=-7.153 tm_glue_0=2.000
+tm_glue_0=2.000 lm_0=-7.153
+tm_glue_0=2.000 lm_0=-7.153
+tm_glue_0=2.000 lm_0=-7.153
+tm_glue_0=2.000 lm_0=-7.153
diff --git a/test/lm/berkeley/test.sh b/test/lm/berkeley/test.sh
index 25ff944..71fa413 100755
--- a/test/lm/berkeley/test.sh
+++ b/test/lm/berkeley/test.sh
@@ -1,7 +1,7 @@
#!/bin/bash
(for file in lm lm.gz lm.berkeleylm lm.berkeleylm.gz; do
- echo the chat-rooms | joshua-decoder -feature-function "LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file $file" -v 0 -output-format %f 2> log
+ echo the chat-rooms | $JOSHUA/bin/joshua-decoder -feature-function "LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file $file" -v 0 -output-format %f 2> log
done) > output
# Compare
diff --git a/test/packed-grammar/joshua.config b/test/packed-grammar/joshua.config
index 1d5e2f0..b1f02a5 100644
--- a/test/packed-grammar/joshua.config
+++ b/test/packed-grammar/joshua.config
@@ -1,6 +1,6 @@
lm = kenlm 5 false false 100 lm.gz
-tm = packed pt 12 grammar.packed
-tm = thrax glue -1 grammar.glue
+tm = thrax -owner pt -maxspan 12 -path grammar.packed
+tm = thrax -owner glue -maxspan -1 -path grammar.glue
mark_oovs=false
diff --git a/test/packed-grammar/test-multiple.sh b/test/packed-grammar/test-multiple.sh
new file mode 100755
index 0000000..04a755a
--- /dev/null
+++ b/test/packed-grammar/test-multiple.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -u
+
+# pack the grammar
+rm -rf foo.packed bar.packed
+$JOSHUA/scripts/support/grammar-packer.pl -v -g 'grammar.gz grammar.gz' -o 'foo.packed bar.packed' 2> packer-multiple.log
+
+diff -q foo.packed/vocabulary bar.packed/vocabulary > diff
+
+if [ $? -eq 0 ]; then
+ rm -rf foo.packed bar.packed packer-multiple.log
+ exit 0
+else
+ exit 1
+fi
diff --git a/test/packed-grammar/test.sh b/test/packed-grammar/test.sh
index fde6e8b..cca15fa 100755
--- a/test/packed-grammar/test.sh
+++ b/test/packed-grammar/test.sh
@@ -2,14 +2,12 @@
set -u
-export THRAX=$JOSHUA/thrax
-
# pack the grammar
rm -rf grammar.packed
-$JOSHUA/scripts/support/grammar-packer.pl grammar.gz grammar.packed 2> packer.log
+$JOSHUA/scripts/support/grammar-packer.pl -v -g grammar.gz -o grammar.packed 2> packer.log
# generate the glue grammar
-java -Xmx2g -cp $JOSHUA/lib/*:$THRAX/bin/thrax.jar edu.jhu.thrax.util.CreateGlueGrammar grammar.packed > grammar.glue 2> glue.log
+java -Xmx2g -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.decoder.ff.tm.CreateGlueGrammar -g grammar.packed > grammar.glue 2> glue.log
# decode
cat input.bn | $JOSHUA/bin/joshua-decoder -m 1g -threads 2 -c joshua.config > output 2> log
@@ -17,7 +15,7 @@
diff -u output output.gold > diff
if [ $? -eq 0 ]; then
- rm -f packer.log diff log output.bleu output grammar.glue glue.log
+ #rm -f packer.log diff log output.bleu output grammar.glue glue.log
rm -rf grammar.packed
exit 0
else
diff --git a/test/thrax/extraction/test.sh b/test/thrax/extraction/test.sh
index 6a80a99..3f6edf8 100755
--- a/test/thrax/extraction/test.sh
+++ b/test/thrax/extraction/test.sh
@@ -4,14 +4,14 @@
set -u
+export VERSION=2.5.2
+
rm -rf thrax.log grammar.gz .grammar.crc thrax
-hadoop_dl_url=http://archive.apache.org/dist/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz
-[[ ! -f $JOSHUA/lib/hadoop-0.20.2.tar.gz ]] && wget -q -O $JOSHUA/lib/hadoop-0.20.2.tar.gz $hadoop_dl_url
-[[ ! -d hadoop-0.20.2 ]] && tar xzf $JOSHUA/lib/hadoop-0.20.2.tar.gz
+[[ ! -d hadoop-$VERSION ]] && tar xzf $JOSHUA/lib/hadoop-$VERSION.tar.gz
unset HADOOP HADOOP_HOME HADOOP_CONF_DIR
-export HADOOP=$(pwd)/hadoop-0.20.2
+export HADOOP=$(pwd)/hadoop-$VERSION
# run hadoop
$HADOOP/bin/hadoop jar $JOSHUA/thrax/bin/thrax.jar input/thrax.conf thrax > thrax.log 2>&1
@@ -19,8 +19,8 @@
size=$(perl -e "print +(stat('grammar.gz'))[7] . $/")
-rm -rf hadoop-0.20.2
-if [[ $size -eq 989817 ]]; then
+rm -rf hadoop-$VERSION
+if [[ $size -eq 1004401 ]]; then
rm -rf thrax.log grammar.gz .grammar.crc thrax
exit 0
else
diff --git a/thrax b/thrax
index 3fd6b10..512622c 160000
--- a/thrax
+++ b/thrax
@@ -1 +1 @@
-Subproject commit 3fd6b10a4c01084088fca67246e873a9db69be35
+Subproject commit 512622c68537ec79a379b852b2ec072e85a46fc2