| <?xml version="1.0"?> |
| <project name="benchmark" default="default"> |
| |
| <description> |
| Lucene Benchmarking Contributions |
| </description> |
| |
| <import file="../contrib-build.xml"/> |
| <property name="working.dir" location="work"/> |
| |
| |
| <target name="check-files"> |
| <available file="temp/news20.tar.gz" property="news20.exists"/> |
| |
| <available file="${working.dir}/20_newsgroup" property="news20.expanded"/> |
| |
| <available file="temp/reuters21578.tar.gz" property="reuters.exists"/> |
| <available file="${working.dir}/reuters" property="reuters.expanded"/> |
| <available file="${working.dir}/reuters-out" property="reuters.extracted"/> |
| <available file="temp/20news-18828.tar.gz" property="20news-18828.exists"/> |
| <available file="${working.dir}/20news-18828" property="20news-18828.expanded"/> |
| <available file="${working.dir}/mini_newsgroups" property="mini.expanded"/> |
| |
| <available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/> |
| <available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/> |
| <available file="${working.dir}/enwiki.txt" property="enwiki.extracted"/> |
| |
| </target> |
| |
| <target name="enwiki-files" depends="check-files"> |
| <mkdir dir="temp"/> |
| <antcall target="get-enwiki"/> |
| <antcall target="expand-enwiki"/> |
| </target> |
| |
| <target name="get-enwiki" unless="enwiki.exists"> |
| <get src="http://people.apache.org/~gsingers/wikipedia/enwiki-20070527-pages-articles.xml.bz2" |
| dest="temp/enwiki-20070527-pages-articles.xml.bz2"/> |
| </target> |
| |
| <target name="expand-enwiki" unless="enwiki.expanded"> |
| <bunzip2 src="temp/enwiki-20070527-pages-articles.xml.bz2" dest="temp"/> |
| </target> |
| |
| <target name="get-news-20" unless="20news-18828.exists"> |
| <get src="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz" |
| dest="temp/news20.tar.gz"/> |
| |
| </target> |
| <target name="get-reuters" unless="reuters.exists"> |
| |
| <get src="http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz" |
| dest="temp/reuters21578.tar.gz"/> |
| </target> |
| |
| <target name="expand-news-20" unless="news20.expanded"> |
| <gunzip src="temp/news20.tar.gz" dest="temp"/> |
| <untar src="temp/news20.tar" dest="${working.dir}"/> |
| </target> |
| <target name="expand-reuters" unless="reuters.expanded"> |
| <gunzip src="temp/reuters21578.tar.gz" dest="temp"/> |
| <mkdir dir="${working.dir}/reuters"/> |
| <untar src="temp/reuters21578.tar" dest="${working.dir}/reuters"/> |
| <delete > |
| <fileset dir="${working.dir}/reuters"> |
| <include name="*.txt"/> |
| </fileset> |
| </delete> |
| |
| </target> |
| <target name="extract-reuters" depends="check-files" unless="reuters.extracted"> |
| <mkdir dir="${working.dir}/reuters-out"/> |
| <java classname="org.apache.lucene.benchmark.utils.ExtractReuters" maxmemory="1024M" fork="true"> |
| <classpath refid="run.classpath"/> |
| <arg file="${working.dir}/reuters"/> |
| <arg file="${working.dir}/reuters-out"/> |
| </java> |
| </target> |
| <target name="get-20news-18828" unless="20news-18828.exists"> |
| <get src="http://people.csail.mit.edu/u/j/jrennie/public_html/20Newsgroups/20news-18828.tar.gz" |
| dest="temp/20news-18828.tar.gz"/> |
| |
| </target> |
| <target name="expand-20news-18828" unless="20news-18828.expanded"> |
| <gunzip src="temp/20news-18828.tar.gz" dest="temp"/> |
| <untar src="temp/20news-18828.tar" dest="${working.dir}"/> |
| </target> |
| <target name="get-mini-news" unless="mini.exists"> |
| <get src="http://kdd.ics.uci.edu/databases/20newsgroups/mini_newsgroups.tar.gz" |
| dest="temp/mini_newsgroups.tar.gz"/> |
| </target> |
| <target name="expand-mini-news" unless="mini.expanded"> |
| <gunzip src="temp/mini_newsgroups.tar.gz" dest="temp"/> |
| <untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/> |
| </target> |
| |
| <target name="get-files" depends="check-files"> |
| <mkdir dir="temp"/> |
| <antcall target="get-reuters"/> |
| <antcall target="expand-reuters"/> |
| <antcall target="extract-reuters"/> |
| </target> |
| |
| <path id="classpath"> |
| <pathelement path="${common.dir}/build/classes/java"/> |
| <pathelement path="${common.dir}/build/classes/demo"/> |
| <pathelement path="${common.dir}/build/contrib/highlighter/classes/java"/> |
| <pathelement path="${common.dir}/build/contrib/memory/classes/java"/> |
| <pathelement path="${common.dir}/build/contrib/fast-vector-highlighter/classes/java"/> |
| <fileset dir="lib"> |
| <include name="**/*.jar"/> |
| </fileset> |
| </path> |
| <path id="run.classpath"> |
| <path refid="classpath"/> |
| <pathelement location="${build.dir}/classes/java"/> |
| <pathelement location="${benchmark.ext.classpath}"/> |
| </path> |
| |
| <property name="task.alg" location="conf/micro-standard.alg"/> |
| <property name="task.mem" value="140M"/> |
| |
| <target name="run-task" depends="compile,check-files,get-files" |
| description="Run compound penalty perf test (optional: -Dtask.alg=your-algorithm-file -Dtask.mem=java-max-mem)"> |
| <echo>Working Directory: ${working.dir}</echo> |
| <java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" fork="true"> |
| <classpath refid="run.classpath"/> |
| <arg file="${task.alg}"/> |
| </java> |
| </target> |
| |
| <target name="enwiki" depends="compile,check-files,enwiki-files"> |
| <echo>Working Directory: ${working.dir}</echo> |
| <java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="1024M" fork="true"> |
| <assertions> |
| <enable/> |
| </assertions> |
| <classpath refid="run.classpath"/> |
| <arg file="conf/extractWikipedia.alg"/> |
| </java> |
| </target> |
| |
| <target name="compile-demo"> |
| <subant target="compile-demo"> |
| <fileset dir="${common.dir}" includes="build.xml"/> |
| </subant> |
| </target> |
| <target name="compile-highlighter"> |
| <subant target="compile"> |
| <fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/> |
| </subant> |
| </target> |
| <target name="compile-memory"> |
| <subant target="compile"> |
| <fileset dir="${common.dir}/contrib/memory" includes="build.xml"/> |
| </subant> |
| </target> |
| <target name="compile-vector-highlighter"> |
| <subant target="compile"> |
| <fileset dir="${common.dir}/contrib/fast-vector-highlighter" includes="build.xml"/> |
| </subant> |
| </target> |
| |
| <target name="init" depends="common.init,compile-demo,compile-memory,compile-highlighter,compile-vector-highlighter,check-files"/> |
| |
| <!-- make sure online collections (reuters) are first downloaded --> |
| <target name="test" depends="init,get-files"> |
| <antcall target="common.test" inheritRefs="true" /> |
| </target> |
| |
| </project> |