Merged documentation branch into trunk

git-svn-id: https://hyracks.googlecode.com/svn/trunk/hyracks@284 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-documentation/.classpath b/hyracks-documentation/.classpath
new file mode 100644
index 0000000..3f62785
--- /dev/null
+++ b/hyracks-documentation/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.4"/>
+	<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/hyracks-documentation/.project b/hyracks-documentation/.project
new file mode 100644
index 0000000..64d4505
--- /dev/null
+++ b/hyracks-documentation/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>hyracks-documentation</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.maven.ide.eclipse.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.maven.ide.eclipse.maven2Nature</nature>
+	</natures>
+</projectDescription>
diff --git a/hyracks-documentation/.settings/org.eclipse.jdt.core.prefs b/hyracks-documentation/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..15a7e04
--- /dev/null
+++ b/hyracks-documentation/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+#Tue Oct 19 13:07:01 PDT 2010
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.4
+org.eclipse.jdt.core.compiler.compliance=1.4
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.4
diff --git a/hyracks-documentation/.settings/org.maven.ide.eclipse.prefs b/hyracks-documentation/.settings/org.maven.ide.eclipse.prefs
new file mode 100644
index 0000000..1b662c1
--- /dev/null
+++ b/hyracks-documentation/.settings/org.maven.ide.eclipse.prefs
@@ -0,0 +1,9 @@
+#Tue Oct 19 13:07:01 PDT 2010
+activeProfiles=
+eclipse.preferences.version=1
+fullBuildGoals=process-test-resources
+includeModules=false
+resolveWorkspaceProjects=true
+resourceFilterGoals=process-resources resources\:testResources
+skipCompilerPlugin=true
+version=1
diff --git a/hyracks-documentation/pom.xml b/hyracks-documentation/pom.xml
new file mode 100644
index 0000000..4f5801a
--- /dev/null
+++ b/hyracks-documentation/pom.xml
@@ -0,0 +1,48 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>edu.uci.ics.hyracks</groupId>
+  <artifactId>hyracks-documentation</artifactId>
+  <version>0.1.4-SNAPSHOT</version>
+
+  <parent>
+    <groupId>edu.uci.ics.hyracks</groupId>
+    <artifactId>hyracks</artifactId>
+    <version>0.1.4-SNAPSHOT</version>
+  </parent>
+
+  <build>
+    <plugins>
+    	<plugin>
+    		<groupId>org.apache.maven.doxia</groupId>
+    		<artifactId>doxia-maven-plugin</artifactId>
+    		<version>1.1.3</version>
+    		<executions>
+    		  <execution>
+    		    <phase>package</phase>
+    		    <goals>
+    		      <goal>render-books</goal>
+    		    </goals>
+    		  </execution>
+    		</executions>
+    		<configuration>
+    		  <books>
+    		    <book>
+    		      <directory>src/books/user-guide</directory>
+    		      <descriptor>src/books/user-guide/doxia-descriptor.xml</descriptor>
+    		      <formats>
+    		        <format>
+    		          <id>pdf</id>
+    		        </format>
+    		        <format>
+    		          <id>xhtml</id>
+    		        </format>
+    		      </formats>
+    		    </book>
+    		  </books>
+    		</configuration>
+    	</plugin>
+    </plugins>
+  </build>
+  <dependencies>
+  </dependencies>
+</project>
diff --git a/hyracks-documentation/src/books/user-guide/doxia-descriptor.xml b/hyracks-documentation/src/books/user-guide/doxia-descriptor.xml
new file mode 100644
index 0000000..1fb3c0c
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/doxia-descriptor.xml
@@ -0,0 +1,56 @@
+<book xmlns="http://maven.apache.org/BOOK/1.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/BOOK/1.0.0 ../../../doxia/doxia-book/target/generated-site/xsd/book-1.0.0.xsd">
+  <id>user-guide</id>
+  <title>Hyracks User Guide</title>
+  <chapters>
+    <chapter>
+      <id>introduction</id>
+      <title>Introduction</title>
+      <sections>
+        <section>
+          <id>sec-introduction-whatis</id>
+          <title>What is Hyracks?</title>
+          <file>sec-introduction-whatis.apt</file>
+        </section>
+        <section>
+          <id>sec-introduction-overview</id>
+          <title>Hyracks Overview</title>
+          <file>sec-introduction-overview.apt</file>
+        </section>
+      </sections>
+    </chapter>
+    <chapter>
+      <id>concepts</id>
+      <title>Hyracks Concepts</title>
+      <sections>
+        <section>
+          <id>sec-concepts-terminology</id>
+          <title>Terminology</title>
+          <file>sec-concepts-terminology.apt</file>
+        </section>
+        <section>
+          <id>sec-concepts-applications</id>
+          <title>Hyracks Applications</title>
+          <file>sec-concepts-applications.apt</file>
+        </section>
+      </sections>
+    </chapter>
+    <chapter>
+      <id>hyrackscli</id>
+      <title>Hyracks Command Line Interface</title>
+      <sections>
+        <section>
+          <id>sec-hyrackscli-running</id>
+          <title>Running the Hyracks CLI</title>
+          <file>sec-hyrackscli-running.apt</file>
+        </section>
+        <section>
+          <id>sec-hyrackscli-commands</id>
+          <title>Hyracks CLI Commands</title>
+          <file>sec-hyrackscli-commands.apt</file>
+        </section>
+      </sections>
+    </chapter>
+  </chapters>
+</book>
diff --git a/hyracks-documentation/src/books/user-guide/sec-concepts-applications.apt b/hyracks-documentation/src/books/user-guide/sec-concepts-applications.apt
new file mode 100644
index 0000000..87ef0ef
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/sec-concepts-applications.apt
@@ -0,0 +1,27 @@
+Hyracks Applications
+
+  The Hyracks platform provides users the ability to execute jobs on a cluster. Jobs in Hyracks are expressed as a DAG of Operators and Connectors. Prior to being able to execute jobs on a Hyracks cluster, the code that implements the operators and connectors used in the jobs must be installed on the nodes of the cluster. The unit of deployment of code onto the cluster is called an application. Once an application containing the required code is created, multiple jobs can be executed against it until it is destroyed. A Hyracks cluster can have multiple applications installed simultaneously.
+
+* Hyracks Application Archive
+
+  A Hyracks application must be packaged into a Hyracks Application Archive (HAR) file before it can be installed on a Hyracks Cluster. A HAR file is a Zip file that contains all the code that will be executed on the cluster when jobs are issued and any deployment descriptors (discussed below) that are necessary to set up the application correctly.
+
+* HAR File Contents
+
+  A HAR file is a zip file that contains one or more JAR files. The JAR files are appened to the classpath of the application.
+
+* Hyracks Application Startup and Shutdown
+
+  Hyracks provides applications to run code at startup and shutdown. An application can provide the code to be executed at the ClusterController by providing an implementation of the edu.uci.ics.hyracks.api.application.ICCBootstrap interface. Similarly, the code to be executed at the NodeControllers can be specified by providing and implementation of the edu.uci.ics.hyracks.api.application.INCBootstrap interface. The bootstrap classes implementations of the above interfaces are specified in a deployment descriptor file.
+
+* Hyracks deployment descriptor
+
+  A Hyracks deployment descriptor is a file called hyracks-deployment.properties. This file is made available to Hyracks in the application classpath by packaging it in a JAR file inside the Hyracks Application Archive.
+
+  The Hyracks deployment descriptor currently allows two properties.
+
+  * cc.bootstrap.class: Name of the Bootstrap class to be executed at the Cluster Controller.
+
+  * nc.bootstrap.class: Name of the Bootstrap class to be executed at the Node Controller.
+
+  
diff --git a/hyracks-documentation/src/books/user-guide/sec-concepts-terminology.apt b/hyracks-documentation/src/books/user-guide/sec-concepts-terminology.apt
new file mode 100644
index 0000000..b39440e
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/sec-concepts-terminology.apt
@@ -0,0 +1,15 @@
+Terminology
+
+  [Operator Descriptor] An Operator Descriptor is a node in a Hyracks Job DAG. An Operator Descriptor has zero or more inputs and produces zero or more outputs.
+
+  [Connector Descriptor] A Connector Descriptor is an edge in a Hyracks Job DAG that connects one Operator Descriptor's output to another's input.
+
+  [Operator Activity] An Operator Activity is a phase of an operator. An operator contains one or more activities. For example, an operator that sorts its input using a bounded amount of memory, does so usually in two distinct phases. The first phase produces sorted runs of its input. Once the runs are generated, the runs are merged to produce the sorted output. Such a sort operator would comprise of two activities -- the Run Generation Activity and the Run Merging Activity.
+
+  [Operator Activity Clone] At runtime, each Operator Activity creates a number of clones that are executed in parallel on partitions of the activity's inputs to produce partitions of its output.
+
+  [Connector Partitioner] At runtime, each Connector Descriptor creates as many partitioners as the number of its data producing activity clones. Each partitioner implements the logic to decide how data produced by the producing activity clone is re-partitioned to the clones of the consuming activity. For example, a hash-partitioning connector's partitioner uses a hash function to determine how each data item is re-distributed to the consuming activity clones.
+
+  [Connector Receiver] A Connector Descriptor's receiving activity's clones receives its input from a Connector Receiver. A Connector Receiver encapsulates the logic to merge data arriving from each of the Connector Partitioners belonging to the same Connector Descriptor.
+
+  [Job Specification] A Hyracks job is represented by an instance of a Job Specification.
diff --git a/hyracks-documentation/src/books/user-guide/sec-hyrackscli-commands.apt b/hyracks-documentation/src/books/user-guide/sec-hyrackscli-commands.apt
new file mode 100644
index 0000000..6ddd162
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/sec-hyrackscli-commands.apt
@@ -0,0 +1,13 @@
+Hyracks CLI Commands
+
+  * Connecting to a Hyracks Cluster Controller
+
+    connect to "<hostname>[:<port>]";
+
+  * Creating an application
+
+    create application <application name> "<absolute path to HAR file>";
+
+  * Destroying an application
+
+    destroy application <application name>;
diff --git a/hyracks-documentation/src/books/user-guide/sec-hyrackscli-running.apt b/hyracks-documentation/src/books/user-guide/sec-hyrackscli-running.apt
new file mode 100644
index 0000000..aa16d9d
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/sec-hyrackscli-running.apt
@@ -0,0 +1,9 @@
+Running Hyracks CLI
+
+  The Hyracks CLI is contained in the hyracks-cli subproject under the Hyracks source tree. The Hyracks CLI binary distribution contains the scripts to start the CLI in the bin folder.
+
+  On a Unix like system, the CLI can be started by running
+    hyracks-cli-xxx-binary-assembly/bin/hyrackscli
+
+  On a Windows system, the CLI is started by running
+    hyracks-cli-xxx-binary-assembly\bin\hyrackscli.bat
diff --git a/hyracks-documentation/src/books/user-guide/sec-introduction-overview.apt b/hyracks-documentation/src/books/user-guide/sec-introduction-overview.apt
new file mode 100644
index 0000000..f35b7e4
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/sec-introduction-overview.apt
@@ -0,0 +1,12 @@
+Hyracks Overview
+
+  Hyracks employs a client-server architecture. On the server side, the software module that is responsible for interacting with clients, keeping track of and dispatching work
+  to other machines in the cluster is called the Hyracks Cluster Controller (CC). There is one CC per logical Hyracks cluster. The module that executes on the worker machine
+  and interacts with the CC to receive work and act on it, is called the Hyracks Node Controller (NC). Every NC in a single Hyracks cluster has a unique logical name. When an
+  NC is started, it is provided the address of the CC whose cluster it must join. Although it is sufficient to run one instance of the NC on a physical machine, it is possible
+  to run multiple instances of NCs (ofcourse each NC has a different logical name) on the same physical machine -- often used for simulating a cluster on a single machine
+  to facilitate testing.
+
+  Hyracks clients interact solely with the CC when submitting their jobs. A Hyracks Job is the unit of work that a client can execute on the Hyracks cluster. A job is expressed
+  as a directed acyclic graph (DAG) of Operators connected to each other by means of Connectors. A more detailed description of jobs, operators, and connectors follows in
+  chapter "Hyracks Concepts".
diff --git a/hyracks-documentation/src/books/user-guide/sec-introduction-whatis.apt b/hyracks-documentation/src/books/user-guide/sec-introduction-whatis.apt
new file mode 100644
index 0000000..e3a659e
--- /dev/null
+++ b/hyracks-documentation/src/books/user-guide/sec-introduction-whatis.apt
@@ -0,0 +1,3 @@
+What is Hyracks?
+
+  Hyracks is a partitioned-parallel platform for running data-intensive computation on a shared-nothing cluster of commodity machines.
diff --git a/pom.xml b/pom.xml
index eced394..5588649 100644
--- a/pom.xml
+++ b/pom.xml
@@ -66,5 +66,6 @@
     <module>hyracks-storage-am-btree</module>
     <module>hyracks-storage-am-invertedindex</module>
     <module>hyracks-examples</module>
+    <module>hyracks-documentation</module>
   </modules>
 </project>