Final upmerge before merge to master
diff --git a/CHANGES.txt b/CHANGES.txt
index c610896..fc28a9f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,16 @@
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-1672. Update jetty to use stable 7.x version - 7.6.16.v20140903.
+  TEZ-1822. Docs for Timeline/ACLs/HistoryText.
+  TEZ-1252. Change wording on http://tez.apache.org/team-list.html related to member confusion.
+  TEZ-1805. Tez client DAG cycle detection should detect self loops
+  TEZ-1816. It is possible to receive START event when DAG is failed
+  TEZ-1787. Counters for speculation
+  TEZ-1773. Add attempt failure cause enum to the attempt failed/killed
+  history record
+  TEZ-14. Support MR like speculation capabilities based on latency deviation
+  from the mean
   TEZ-1733. TezMerger should sort FileChunks on size when merging
   TEZ-1738. Tez tfile parser for log parsing
   TEZ-1627. Remove OUTPUT_CONSUMABLE and related Event in TaskAttemptImpl
@@ -13,6 +23,17 @@
   TEZ-1721. Update INSTALL instructions for clarifying tez client jars
     compatibility with runtime tarball on HDFS.
   TEZ-1690. TestMultiMRInput tests fail because of user collisions.
+  TEZ-1687. Use logIdentifier of Vertex for logging.
+  TEZ-1737. Should add taskNum in VertexFinishedEvent.
+  TEZ-1772. Failing tests post TEZ-1737.
+  TEZ-1785. Remove unused snappy-java dependency.
+  TEZ-1685. Remove YARNMaster which is never used.
+  TEZ-1797. Create necessary content for Tez DOAP file.
+  TEZ-1650. Please create a DOAP file for your TLP.
+  TEZ-1697. DAG submission fails if a local resource added is already part of tez.lib.uris
+  TEZ-1060 Add randomness to fault tolerance tests
+  TEZ-1800. Integer overflow in ExternalSorter.getInitialMemoryRequirement()
+  TEZ-1790. DeallocationTaskRequest may been handled before corresponding AllocationTaskRequest in local mode
 
 TEZ-UI CHANGES (TEZ-8):
   TEZ-1799. Enable Cross Origin Support in Tez UI
@@ -43,11 +64,13 @@
   TEZ-1749. Increase test timeout for TestLocalMode.testMultipleClientsWithSession
   TEZ-1750. Add a DAGScheduler which schedules tasks only when sources have been scheduled.
   TEZ-1761. TestRecoveryParser::testGetLastInProgressDAG fails in similar manner to TEZ-1686.
-  TEZ-1687. Use logIdentifier of Vertex for logging.
-  TEZ-1737. Should add taskNum in VertexFinishedEvent.
   TEZ-1770. Handle ConnectExceptions correctly when establishing connections to an NM which may be down.
   TEZ-1774. AppLaunched event for Timeline does not have start time set.
-  TEZ-1772. Failing tests post TEZ-1737.
+  TEZ-1780. tez-api is missing jersey dependencies.
+  TEZ-1796. Use of DeprecationDelta broke build against 2.2 Hadoop.
+  TEZ-1818. Problem loading tez-api-version-info.properties in case current context classloader
+    in not pointing to Tez jars.
+  TEZ-1808. Job can fail since name of intermediate files can be too long in specific situation.
 
 Release 0.5.2: 2014-11-07
 
diff --git a/Tez_DOAP.rdf b/Tez_DOAP.rdf
new file mode 100644
index 0000000..bc1f6d9
--- /dev/null
+++ b/Tez_DOAP.rdf
@@ -0,0 +1,71 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl"?>
+<rdf:RDF xml:lang="en"
+         xmlns="http://usefulinc.com/ns/doap#" 
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
+         xmlns:asfext="http://projects.apache.org/ns/asfext#"
+         xmlns:foaf="http://xmlns.com/foaf/0.1/">
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+   
+         http://www.apache.org/licenses/LICENSE-2.0
+   
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <Project rdf:about="http://tez.apache.org">
+    <created>2014-11-21</created>
+    <license rdf:resource="http://spdx.org/licenses/Apache-2.0" />
+    <name>Apache Tez</name>
+    <homepage rdf:resource="http://tez.apache.org" />
+    <asfext:pmc rdf:resource="http://tez.apache.org/pmc/tez.rdf" />
+    <shortdesc>Apache Tez is an effort to develop a generic application framework which can be used to process arbitrarily complex directed-acyclic graphs (DAGs) of data-processing tasks and also a re-usable set of data-processing primitives which can be used by other projects.</shortdesc>
+    <description></description>
+    <bug-database rdf:resource="https://issues.apache.org/jira/browse/TEZ" />
+    <mailing-list rdf:resource="http://tez.apache.org/mail-lists.html" />
+    <download-page rdf:resource="http://tez.apache.org/releases/" />
+    <programming-language>Java</programming-language>
+    <category rdf:resource="http://projects.apache.org/category/big-data" />
+    <release>
+      <Version>
+        <name>Version 0.5.1</name>
+        <created>2014-10-08</created>
+        <revision>0.5.1</revision>
+      </Version>
+    </release>
+    <release>
+      <Version>
+        <name>Version 0.5.0</name>
+        <created>2014-09-04</created>
+        <revision>0.5.0</revision>
+      </Version>
+    </release>
+    <release>
+      <Version>
+        <name>Version 0.4.1-incubating</name>
+        <created>2014-07-15</created>
+        <revision>0.4.1-incubating</revision>
+      </Version>
+    </release>
+    <repository>
+      <GitRepository>
+        <location rdf:resource="https://git-wip-us.apache.org/repos/asf/tez.git"/>
+        <browse rdf:resource="https://git-wip-us.apache.org/repos/asf?p=tez.git"/>
+      </GitRepository>
+    </repository>
+    <maintainer>
+      <foaf:Person>
+        <foaf:name>Apache Tez PMC</foaf:name>
+          <foaf:mbox rdf:resource="mailto:dev@tez.apache.org"/>
+      </foaf:Person>
+    </maintainer>
+  </Project>
+</rdf:RDF>
diff --git a/docs/pom.xml b/docs/pom.xml
index c4ec467..64d6f61 100644
--- a/docs/pom.xml
+++ b/docs/pom.xml
@@ -530,6 +530,9 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-project-info-reports-plugin</artifactId>
+          <configuration>
+            <customBundle>${project.basedir}/src/site/custom/project-info-report.properties</customBundle>
+          </configuration>
           <reportSets>
             <reportSet>
               <reports>
diff --git a/docs/src/site/custom/project-info-report.properties b/docs/src/site/custom/project-info-report.properties
new file mode 100644
index 0000000..a3d2497
--- /dev/null
+++ b/docs/src/site/custom/project-info-report.properties
@@ -0,0 +1,12 @@
+report.team-list.intro.description2 = The team is comprised of the PMC, \
+Committers and Contributors. The PMC (project management committee) is a\
+committee of the Apache Software Foundation charged with responsibility \
+for a top level project. Committers have direct access to the source of \
+a project and actively evolve the code-base. Contributors improve the \
+project in various ways such as submission of patches, improving \
+documentation, testing the product and by providing suggestions to the \
+Committers. The number of Contributors to the project is unbounded. Get \
+involved today. All contributions to the project are greatly appreciated.
+report.team-list.nocontributor = The contributors to this project can be\
+ tracked by looking at the <a href="https://issues.apache.org/jira/browse/TEZ" \
+class="externalLink">Apache Tez JIRAi website</a>.
diff --git a/docs/src/site/markdown/releases/index_0_5_0.md b/docs/src/site/markdown/releases/apache-tez-0-5-0.md
similarity index 100%
rename from docs/src/site/markdown/releases/index_0_5_0.md
rename to docs/src/site/markdown/releases/apache-tez-0-5-0.md
diff --git a/docs/src/site/markdown/releases/index_0_5_1.md b/docs/src/site/markdown/releases/apache-tez-0-5-1.md
similarity index 100%
rename from docs/src/site/markdown/releases/index_0_5_1.md
rename to docs/src/site/markdown/releases/apache-tez-0-5-1.md
diff --git a/docs/src/site/markdown/releases/index.md b/docs/src/site/markdown/releases/index.md
new file mode 100644
index 0000000..68f6471
--- /dev/null
+++ b/docs/src/site/markdown/releases/index.md
@@ -0,0 +1,24 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<head><title>Apache Tez Releases</title></head>
+
+Releases
+------------
+
+-   [Apache Tez 0.5.1](./apache-tez-0-5-1.html) (Oct 08, 2014)
+-   [Apache Tez 0.5.0](./apache-tez-0-5-1.html) (Sep 04, 2014)
diff --git a/docs/src/site/markdown/tez_acls.md b/docs/src/site/markdown/tez_acls.md
new file mode 100644
index 0000000..2ac6830
--- /dev/null
+++ b/docs/src/site/markdown/tez_acls.md
@@ -0,0 +1,67 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<head><title>Access Control in Tez</title></head>
+
+## Background
+
+Access control in Tez can be categorized as follows:
+
+  - Modify permissions on the Tez AM ( or Session ). Users with this permision can:
+    - Submit a DAG to a Tez Session
+    - Kill any DAG within the given AM/Session
+    - Kill the Session
+  - View permissions on the Tez AM ( or Session ). Users with this permision can:
+    - Monitor/View the status of the Session
+    - Monitor/View the progress/status of any DAG within the given AM/Session
+  - Modify permissions on a particular Tez DAG. Users with this permision can:
+    - Kill the DAG
+  - View permissions on a particular Tez DAG. Users with this permision can:
+    - Monitor/View the progress/status of the DAG
+
+From the above, you can see that All users/groups that have access to do operations on the AM also have access to similar operations on all DAGs within that AM/session. Also, by default, the owner of the Tez AM,  i.e. the user who started the Tez AM, is considered a super-user and has access to all operations on the AM as well as all DAGs within the AM/Session.
+
+Support for ACLs was introduced in Tez 0.5.0. Integration of these ACLs with YARN Timeline is only available from Tez 0.6.0 onwards.
+
+## How to setup the ACLs
+
+By default, ACLs are always enabled in Tez. To disable ACLs, set the following configuration property:
+
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.am.acls.enabled&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
+
+### AM/Session Level ACLs
+
+AM/Session level ACLs are driven by configuration. To setup the ACLs, the following properties need to be defined:
+
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.am.view-acls&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.am.modify-acls&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
+
+The format of the value is a comma-separated list of users and groups with the users and groups separated by a single whitespace. e.g. "user1,user2 group1,group2". To allow all users to do a given operation, the value "*" can be specified.
+
+### DAG ACLs
+
+In certain scenarios, applications may need DAGs running within a given Session to have different access permissions. In such cases, the ACLs for each DAG can be specified programmatically via the DAG API. Look for DAG::setAccessControls in the API docs for the Tez release that you are using.
+In this scenario, it is important to note that the Session ACLs should be defined with only super-users specified to ensure that other users do not inadvertently gain access to information for all DAGs within the given Session.
diff --git a/docs/src/site/markdown/tez_ui_user_data.md b/docs/src/site/markdown/tez_ui_user_data.md
new file mode 100644
index 0000000..71ffde3
--- /dev/null
+++ b/docs/src/site/markdown/tez_ui_user_data.md
@@ -0,0 +1,52 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<head><title>Embedding Application Specific Data into Tez UI</title></head>
+
+# Embedding Application Specific Data into Tez UI
+
+The Tez UI is built mainly off data stored in [YARN Timeline]. The Tez API, currently, provides some minimal support for an application to inject application-specific data into the same storage layer. Using a general standard guideline by following a well-defined format, this data can also be displayed in the Tez UI. 
+
+## Setting DAG-level Information
+
+To set DAG level information, the API to use is DAG::setDAGInfo.  ( Please refer to the Javadocs for more detailed and up-to-date information )
+
+The DAG::setDAGInfo() API expects a to be String passed to it. This string is recommended to be a json-encoded value with the following keys recognized keys by the UI:
+  - "context": The application context in which this DAG is being used. For example, this could be set to "Hive" or "Pig" if this is being run as part of a Hive or Pig script.
+  - "description": General description on what this DAG is going to do. In the case of Hive, this could be the SQL query text.
+
+## Setting Information for each Input/Output/Processor
+
+Each Input/Output/Processor specified in the DAG Plan is specified via a TezEntityDescriptor. Applications specify a user payload that is used to initialize/configure the instance as needed. From a Tez UI point of view, users are usually keen to understand what "work" the particular Input/Output/Processor is doing in addition to any additional configuration information on how the object was initialized/configured. Keeping that in mind, each TezEntityDescriptor supports an api for application developers to specify this information when creating the DAG plan. The API to use for this is setHistoryText(). 
+
+The setHistoryText() API expects a String to be passed to it. This string is recommended to be a json-encoded value with the following keys recognized keys by the UI:
+  - "desc" : A simple string describing for the object in question. For example, for a particular Hive Processor, this could be a description of what that particular processor is doing.
+  - "config" : A map of key-value pairs representing the configuration/payload used to initialize the object in question.
+
+By default, the Inputs/Outputs/Processors that are part of the tez-runtime-library do not publish their configuration information via the setHistoryText() API. To enable this, the following property needs to be enabled:
+
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.runtime.convert.user-payload.to.history-text&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;true&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
+
+## Use of DAG Info and History Text in the Tez UI
+
+If the data setup in the DAG Info and History Text conforms to the format expected by the UI, it will be displayed in the Tez UI in an easy to consume manner. In cases where this is not possible, the UI may fall back to either not displaying the data at all or displaying the string as is in a safe manner. 
+
+[YARN Timeline]:./tez_yarn_timeline.html
+
diff --git a/docs/src/site/markdown/tez_yarn_timeline.md b/docs/src/site/markdown/tez_yarn_timeline.md
new file mode 100644
index 0000000..745f65c
--- /dev/null
+++ b/docs/src/site/markdown/tez_yarn_timeline.md
@@ -0,0 +1,69 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head><title>Using YARN Timeline with Tez for History</title></head>
+
+## YARN Timeline Background
+
+Initial support for [YARN Timeline](http://hadoop.apache.org/docs/r2.4.0/hadoop-yarn/hadoop-yarn-site/TimelineServer.html) was introduced in Apache Hadoop 2.4.0. Support for ACLs in Timeline was introduced in Apache Hadoop 2.6.0. Support for Timeline was introduced in Tez in 0.5.x ( with some experimental support in 0.4.x ). However, Tez ACLs integration with Timeline is only available from Tez 0.6.0 onwards.
+
+## How Tez Uses YARN Timeline
+
+Tez uses YARN Timeline as its application history store. Tez stores most of its lifecycle information into this history store such as:
+  - DAG information such as:
+    - DAG Plan
+    - DAG Submission, Start and End times
+    - DAG Counters
+    - Final status of the DAG and additional diagnostics
+  - Vertex, Task and Task Attempt Information
+    - Start and End times
+    - Counters
+    - Diagnostics
+
+Using the above information, a user can analyze a Tez DAG while it is running and after it has completed.
+
+## YARN Timeline and Hadoop Versions
+
+Given that the support for YARN Timeline with full security was only realized in Apache Hadoop 2.6.0, some features may or may not be supported depending on which version of Apache Hadoop is used.
+
+
+|  | Hadoop 2.2.x, 2.3.x | Hadoop 2.4.x, 2.5.x | Hadoop 2.6.x and higher |
+| ------- | ----- | ----- | ----- |
+| Timeline Support | No | Yes | Yes |
+| Timeline with ACLs Support | No | No | Yes |
+
+## Configuring Tez to use YARN Timeline
+
+By default, Tez writes its history data into a file on HDFS. To use Timeline, add the following property into your tez-site.xml:
+
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.history.logging.service.class&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
+
+For Tez 0.4.x, the above property is not respected. For 0.4.x, please set the following property:
+
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.yarn.ats.enabled&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;true&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
+
+When using Tez with Apache Hadoop 2.4.x or 2.5.x, given that these versions are not fully secure, the following property also needs to be enabled:
+
+> &lt;property&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;name&gt;tez.allow.disabled.timeline-domains&lt;/name&gt;<br/>
+> &nbsp;&nbsp;&nbsp;&lt;value&gt;true&lt;/value&gt;<br/>
+> &lt;/property&gt;<br/>
diff --git a/docs/src/site/markdown/user_guides.md b/docs/src/site/markdown/user_guides.md
new file mode 100644
index 0000000..959f0e5
--- /dev/null
+++ b/docs/src/site/markdown/user_guides.md
@@ -0,0 +1,25 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<head><title>User Guides for various Tez features</title></head>
+
+# User Guides and Documentation for various Tez features
+
+   - [Using YARN Timeline with Tez for History](./tez_yarn_timeline.html)
+   - [Access Control in Tez](./tez_acls.html)
+   - [Embedding Application Specific Data into Tez UI](./tez_ui_user_data.html)
+
diff --git a/docs/src/site/resources/pmc/tez.rdf b/docs/src/site/resources/pmc/tez.rdf
new file mode 100644
index 0000000..3086f4a
--- /dev/null
+++ b/docs/src/site/resources/pmc/tez.rdf
@@ -0,0 +1,191 @@
+<?xml version="1.0"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<rdf:RDF xml:lang="en"
+         xmlns="http://usefulinc.com/ns/doap#" 
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
+         xmlns:asfext="http://projects.apache.org/ns/asfext#"
+         xmlns:foaf="http://xmlns.com/foaf/0.1/">
+  <asfext:PMC rdf:about="tez">
+    <asfext:name>Apache Tez</asfext:name>
+    <foaf:homepage rdf:resource="http://tez.apache.org/"/>
+    <asfext:chair>
+      <foaf:Person>
+        <foaf:name>Hitesh Shah</foaf:name>
+      </foaf:Person>
+    </asfext:chair>
+    <asfext:charter>Apache Tez is an effort to develop a generic application framework which can be used to process arbitrarily complex DAGs of data-processing tasks and also a re-usable set of data-processing primitives which can be used by other projects.</asfext:charter>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Alan Gates</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Arun C. Murthy</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Ashutosh Chauhan</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Bikas Saha</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Bill Graham</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Chris Douglas</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Chris Mattmann</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Daryn Sharp</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Devaraj Das</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Gopal Vijayaraghavan</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Gunther Hagleitner</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Hitesh Shah</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Jakob Homan</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Jason Lowe</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Jitendra Pandey</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Jonathan Eagles</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Julien Le Dem</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Kevin Wilfong</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Mike Liddell</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Mohammad Kamrul Islam</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Namit Jain</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Nathan Roberts</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Owen O’Malley</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Rajesh Balamohan</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Robert Evans</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Rohini Palaniswamy</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Siddharth Seth</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Tassapol Athiapinya</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Thomas Graves</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Tom White</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Vikram Dixit</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+    <asfext:member>
+      <foaf:Person>
+        <foaf:name>Vinod Kumar Vavilapalli</foaf:name>
+      </foaf:Person>
+    </asfext:member>
+  </asfext:PMC>
+
+</rdf:RDF>
diff --git a/docs/src/site/site.xml b/docs/src/site/site.xml
index 3de9b32..32819da 100644
--- a/docs/src/site/site.xml
+++ b/docs/src/site/site.xml
@@ -101,6 +101,7 @@
       <item name="Install Guide" href="install.html"/>
       <item name="Local Mode" href="localmode.html"/>
       <item name="Tez UI" href="tez-ui.html"/>
+      <item name="User Guides" href="user_guides.html"/>
     </menu>
 
     <menu name="Community">
@@ -113,8 +114,9 @@
 
     <menu name="Releases">
       <item name="0.4.1-incubating" href="http://archive.apache.org/dist/incubator/tez/tez-0.4.1-incubating/"/>
-      <item name="0.5.0" href="./releases/index_0_5_0.html"/>
-      <item name="0.5.1" href="./releases/index_0_5_1.html"/>
+      <item name="0.5.0" href="./releases/apache-tez-0-5-0.html"/>
+      <item name="0.5.1" href="./releases/apache-tez-0-5-1.html"/>
+      <item name="All Releases" href="./releases/index.html"/>
     </menu>
 
     <menu name="Contribute">
diff --git a/pom.xml b/pom.xml
index 2f8159e..bf798bc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -38,7 +38,7 @@
     <maven.test.redirectTestOutputToFile>true</maven.test.redirectTestOutputToFile>
     <clover.license>${user.home}/clover.license</clover.license>
     <hadoop.version>2.4.0</hadoop.version>
-    <jetty.version>7.6.10.v20130312</jetty.version>
+    <jetty.version>7.6.16.v20140903</jetty.version>
     <distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
     <distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
     <distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
@@ -211,10 +211,6 @@
           </exclusion>
           <exclusion>
             <groupId>com.sun.jersey</groupId>
-            <artifactId>jersey-json</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
             <artifactId>jersey-server</artifactId>
           </exclusion>
           <exclusion>
@@ -292,10 +288,6 @@
           </exclusion>
           <exclusion>
             <groupId>com.sun.jersey</groupId>
-            <artifactId>jersey-json</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
             <artifactId>jersey-server</artifactId>
           </exclusion>
           <exclusion>
@@ -373,10 +365,6 @@
             <artifactId>jersey-core</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>jersey-json</artifactId>
-          </exclusion>
-          <exclusion>
             <groupId>io.netty</groupId>
             <artifactId>netty</artifactId>
           </exclusion>
@@ -402,6 +390,10 @@
             <groupId>javax.xml.bind</groupId>
             <artifactId>jaxb-api</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-server</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -456,10 +448,6 @@
             <artifactId>guice-servlet</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>jersey-json</artifactId>
-          </exclusion>
-          <exclusion>
             <groupId>io.netty</groupId>
             <artifactId>netty</artifactId>
           </exclusion>
@@ -576,11 +564,6 @@
        <version>${protobuf.version}</version>
       </dependency>
       <dependency>
-        <groupId>org.xerial.snappy</groupId>
-        <artifactId>snappy-java</artifactId>
-        <version>1.0.4.1</version>
-      </dependency>
-      <dependency>
         <groupId>com.google.guava</groupId>
         <artifactId>guava</artifactId>
         <version>11.0.2</version>
@@ -595,6 +578,16 @@
         <artifactId>jsr305</artifactId>
         <version>2.0.3</version>
       </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-client</artifactId>
+       <version>1.9</version>
+      </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-json</artifactId>
+        <version>1.9</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
diff --git a/tez-api/pom.xml b/tez-api/pom.xml
index a2ed5f0..732e466 100644
--- a/tez-api/pom.xml
+++ b/tez-api/pom.xml
@@ -82,6 +82,14 @@
       <artifactId>jsr305</artifactId>
     </dependency>
     <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-json</artifactId>
+    </dependency>
+    <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-hdfs</artifactId>
       <scope>runtime</scope>
diff --git a/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java b/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java
index 7b47b9c..e502374 100644
--- a/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java
+++ b/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java
@@ -62,6 +62,7 @@
   public static final String FINISH_TIME = "endTime";
   public static final String TIME_TAKEN = "timeTaken";
   public static final String STATUS = "status";
+  public static final String TASK_ATTEMPT_ERROR_ENUM = "taskAttemptErrorEnum";
   public static final String DIAGNOSTICS = "diagnostics";
   public static final String SUCCESSFUL_ATTEMPT_ID = "successfulAttemptId";
   public static final String COUNTERS = "counters";
diff --git a/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java
index 345177a..c5d5ebc 100644
--- a/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java
+++ b/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java
@@ -307,14 +307,36 @@
   }
   
   public static void addAdditionalLocalResources(Map<String, LocalResource> additionalLrs,
-      Map<String, LocalResource> originalLRs) {
+      Map<String, LocalResource> originalLRs, String logContext) {
+    // TODO TEZ-1798. Handle contents of Tez archives for duplicate LocalResource checks
     if (additionalLrs != null && !additionalLrs.isEmpty()) {
-      for (Map.Entry<String, LocalResource> lr : additionalLrs.entrySet()) {
-        if (originalLRs.containsKey(lr.getKey())) {
-          throw new TezUncheckedException("Attempting to add duplicate resource: " + lr.getKey());
-        } else {
-          originalLRs.put(lr.getKey(), lr.getValue());
+      StringBuilder sb = new StringBuilder();
+      for (Map.Entry<String, LocalResource> lrEntry : additionalLrs.entrySet()) {
+        LocalResource originalLr = originalLRs.get(lrEntry.getKey());
+        if (originalLr != null) {
+          LocalResource additionalLr = lrEntry.getValue();
+          if (originalLr.getSize() != additionalLr.getSize()) {
+            throw new TezUncheckedException(
+                "Duplicate Resources found with different size for [" + logContext + "]: " + lrEntry.getKey() +
+                    " : " + "[" + additionalLr.getResource() + "=" + additionalLr.getSize() +
+                    "],[" + originalLr.getResource() + "=" + originalLr.getSize());
+          } else {
+            if (originalLr.getResource().equals(additionalLr.getResource())) {
+              sb.append("[").append(lrEntry.getKey()).append(" : Duplicate]");
+            } else {
+              sb.append("[").append(lrEntry.getKey()).append(" : DuplicateDifferentPath]");
+            }
+          }
         }
+        // The LR either does not exist, or is an 'equivalent' dupe.
+        // Prefer the tez specified LR instead of the equivalent user specified LR for container reuse matching
+        originalLRs.put(lrEntry.getKey(), lrEntry.getValue());
+      }
+      String logString = sb.toString();
+      if (!logString.isEmpty()) {
+        LOG.warn("Found Resources Duplication in " + logContext + " after including resources from " +
+            TezConfiguration.TEZ_LIB_URIS + " and " + TezConfiguration.TEZ_AUX_URIS + ": " +
+            logString);
       }
     }
   }
diff --git a/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java b/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java
index 0287d0b..b5e44c9 100644
--- a/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java
+++ b/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java
@@ -47,18 +47,16 @@
   protected VersionInfo(String component) {
     this.component = component;
     info = new Properties();
-    String versionInfoFile = component + "-version-info.properties";
+    String versionInfoFile = "/" + component + "-version-info.properties";
     InputStream is = null;
     try {
-      is = Thread.currentThread().getContextClassLoader()
-          .getResourceAsStream(versionInfoFile);
+      is = this.getClass().getResourceAsStream(versionInfoFile);
       if (is == null) {
-        throw new IOException("Resource not found");
+        throw new IOException("Resource not found: " + versionInfoFile);
       }
       info.load(is);
     } catch (IOException ex) {
-      LogFactory.getLog(getClass()).warn("Could not read '" +
-          versionInfoFile + "', " + ex.toString(), ex);
+      LOG.warn("Could not read '" + versionInfoFile + "', " + ex.toString(), ex);
     } finally {
       IOUtils.closeStream(is);
     }
@@ -86,7 +84,7 @@
         + ", version=" + getVersion()
         + ", revision=" + getRevision()
         + ", SCM-URL=" + getSCMURL()
-        + ", buildTIme=" + getBuildTime()
+        + ", buildTime=" + getBuildTime()
         + " ]";
   }
 
diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java
index 22d7f59..94cae5f 100644
--- a/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java
+++ b/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java
@@ -24,6 +24,8 @@
 @Private
 public enum TaskCounter {
   // TODO Eventually, rename counters to be non-MR specific and map them to MR equivalent.
+  
+  NUM_SPECULATIONS,
 
   /**
    * Number of Input Groups seen by ShuffledMergedInput.
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
index acef4da..91b468d 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java
@@ -113,7 +113,7 @@
    */
   public synchronized DAG addTaskLocalFiles(Map<String, LocalResource> localFiles) {
     Preconditions.checkNotNull(localFiles);
-    TezCommonUtils.addAdditionalLocalResources(localFiles, commonTaskLocalFiles);
+    TezCommonUtils.addAdditionalLocalResources(localFiles, commonTaskLocalFiles, "DAG " + getName());
     return this;
   }
   
@@ -591,6 +591,15 @@
         }
         message.append(av.v.getName());
         throw new IllegalStateException("DAG contains a cycle: " + message);
+      } else {
+        // detect self-cycle
+        if (edgeMap.containsKey(pop.v)) {
+          for (Edge edge : edgeMap.get(pop.v)) {
+            if (edge.getOutputVertex().equals(pop.v)) {
+              throw new IllegalStateException("DAG contains a self-cycle on vertex:" + pop.v.getName());
+            }
+          }
+        }
       }
       topologicalVertexStack.push(av.v.getName());
     }
@@ -661,11 +670,14 @@
           dagCredentials.addAll(dataSource.getCredentials());
         }
         if (dataSource.getAdditionalLocalFiles() != null) {
-          TezCommonUtils.addAdditionalLocalResources(dataSource.getAdditionalLocalFiles(), vertexLRs);
+          TezCommonUtils
+              .addAdditionalLocalResources(dataSource.getAdditionalLocalFiles(), vertexLRs,
+                  "Vertex " + vertex.getName());
         }
       }
       if (tezJarResources != null) {
-        TezCommonUtils.addAdditionalLocalResources(tezJarResources, vertexLRs);
+        TezCommonUtils
+            .addAdditionalLocalResources(tezJarResources, vertexLRs, "Vertex " + vertex.getName());
       }
       if (binaryConfig != null) {
         vertexLRs.put(TezConstants.TEZ_PB_BINARY_CONF_NAME, binaryConfig);
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index 6873863..06c7008 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -36,33 +36,30 @@
   public final static String TEZ_SITE_XML = "tez-site.xml";
 
   static {
-    Configuration.addDeprecations(new DeprecationDelta[]
-        {
-            new DeprecationDelta("tez.am.counters.max.keys", TezConfiguration.TEZ_COUNTERS_MAX),
-            new DeprecationDelta("tez.am.counters.groups.max.keys",
-                TezConfiguration.TEZ_COUNTERS_MAX_GROUPS),
-            new DeprecationDelta("tez.am.counters.name.max.keys",
-                TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH),
-            new DeprecationDelta("tez.am.counters.group-name.max.keys",
-                TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH),
+    Configuration.addDeprecation("tez.am.counters.max.keys", TezConfiguration.TEZ_COUNTERS_MAX);
+    Configuration.addDeprecation("tez.am.counters.groups.max.keys",
+        TezConfiguration.TEZ_COUNTERS_MAX_GROUPS);
+    Configuration.addDeprecation("tez.am.counters.name.max.keys",
+        TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH);
+    Configuration.addDeprecation("tez.am.counters.group-name.max.keys",
+        TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH);
 
-            new DeprecationDelta("tez.task.scale.task.memory.enabled",
-                TezConfiguration.TEZ_TASK_SCALE_MEMORY_ENABLED),
-            new DeprecationDelta("tez.task.scale.task.memory.allocator.class",
-                TezConfiguration.TEZ_TASK_SCALE_MEMORY_ALLOCATOR_CLASS),
-            new DeprecationDelta("tez.task.scale.task.memory.reserve-fraction",
-                TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION),
-            new DeprecationDelta(
-                "tez.task.scale.task.memory.additional-reservation.fraction.per-io",
-                TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO),
-            new DeprecationDelta("tez.task.scale.task.memory.additional-reservation.fraction.max",
-                TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX),
-            new DeprecationDelta("tez.task.scale.task.memory.ratios",
-                TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS),
+    Configuration.addDeprecation("tez.task.scale.task.memory.enabled",
+        TezConfiguration.TEZ_TASK_SCALE_MEMORY_ENABLED);
+    Configuration.addDeprecation("tez.task.scale.task.memory.allocator.class",
+        TezConfiguration.TEZ_TASK_SCALE_MEMORY_ALLOCATOR_CLASS);
+    Configuration.addDeprecation("tez.task.scale.task.memory.reserve-fraction",
+        TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION);
+    Configuration
+        .addDeprecation("tez.task.scale.task.memory.additional-reservation.fraction.per-io",
+            TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_PER_IO);
+    Configuration.addDeprecation("tez.task.scale.task.memory.additional-reservation.fraction.max",
+        TezConfiguration.TEZ_TASK_SCALE_MEMORY_ADDITIONAL_RESERVATION_FRACTION_MAX);
+    Configuration.addDeprecation("tez.task.scale.task.memory.ratios",
+        TezConfiguration.TEZ_TASK_SCALE_MEMORY_WEIGHTED_RATIOS);
 
-            new DeprecationDelta("tez.task.max-events-per-heartbeat.max",
-                TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT)
-        });
+    Configuration.addDeprecation("tez.task.max-events-per-heartbeat.max",
+        TezConfiguration.TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT);
   }
 
   public TezConfiguration() {
@@ -272,7 +269,21 @@
       TEZ_PREFIX + "counters.group-name.max-length";
   public static final int TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT = 128;
 
-
+  @Unstable
+  /**
+   * Boolean value. Enable speculative execution of slower tasks. This can help reduce job latency 
+   * when some tasks are running slower due bad/slow machines
+   */
+  public static final String TEZ_AM_SPECULATION_ENABLED = TEZ_AM_PREFIX + "speculation.enabled";
+  public static final boolean TEZ_AM_SPECULATION_ENABLED_DEFAULT = false;
+  
+  /**
+   * Float value. Specifies how many standard deviations away from the mean task execution time 
+   * should be considered as an outlier/slow task.
+   */
+  @Unstable
+  public static final String TEZ_AM_LEGACY_SPECULATIVE_SLOWTASK_THRESHOLD = 
+                                     TEZ_AM_PREFIX + "legacy.speculative.slowtask.threshold";
 
   /**
    * Int value. Upper limit on the number of threads user to launch containers in the app
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java b/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java
index 04acdaf..8d59928 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java
@@ -254,7 +254,7 @@
    */
   public Vertex addTaskLocalFiles(Map<String, LocalResource> localFiles) {
     if (localFiles != null) {
-      TezCommonUtils.addAdditionalLocalResources(localFiles, taskLocalResources);
+      TezCommonUtils.addAdditionalLocalResources(localFiles, taskLocalResources, "Vertex " + getName());
     }
     return this;
   }
diff --git a/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java b/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java
index 23d2dbb..e8c4f74 100644
--- a/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java
+++ b/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java
@@ -19,16 +19,23 @@
 package org.apache.tez.common;
 
 import java.io.IOException;
+import java.util.Map;
 
+import com.google.common.collect.Maps;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.URL;
 import org.apache.tez.client.TestTezClientUtils;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezConstants;
+import org.apache.tez.dag.api.TezUncheckedException;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -242,4 +249,69 @@
     Assert.assertArrayEquals(expectedTokens, tokens);
   }
 
+
+  @Test(timeout = 5000)
+  public void testAddAdditionalLocalResources() {
+    String lrName = "LR";
+    Map<String, LocalResource> originalLrs;
+    originalLrs= Maps.newHashMap();
+    originalLrs.put(lrName, LocalResource.newInstance(
+        URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+
+
+    Map<String, LocalResource> additionalLrs;
+
+    // Same path, same size.
+    originalLrs= Maps.newHashMap();
+    originalLrs.put(lrName, LocalResource.newInstance(
+        URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+    additionalLrs = Maps.newHashMap();
+    additionalLrs.put(lrName, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+    TezCommonUtils.addAdditionalLocalResources(additionalLrs, originalLrs, "");
+
+    // Same path, different size.
+    originalLrs= Maps.newHashMap();
+    originalLrs.put(lrName, LocalResource.newInstance(
+        URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+    additionalLrs = Maps.newHashMap();
+    additionalLrs.put(lrName, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 100, 1));
+    try {
+      TezCommonUtils.addAdditionalLocalResources(additionalLrs, originalLrs, "");
+      Assert.fail("Duplicate LRs with different sizes expected to fail");
+    } catch (TezUncheckedException e) {
+      Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size"));
+    }
+
+    // Different path, same size, diff timestamp
+    originalLrs= Maps.newHashMap();
+    originalLrs.put(lrName, LocalResource.newInstance(
+        URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+    additionalLrs = Maps.newHashMap();
+    additionalLrs.put(lrName, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test2"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 100));
+    TezCommonUtils.addAdditionalLocalResources(additionalLrs, originalLrs, "");
+
+    // Different path, different size
+    originalLrs= Maps.newHashMap();
+    originalLrs.put(lrName, LocalResource.newInstance(
+        URL.newInstance("file", "localhost", 0, "/test"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+    additionalLrs = Maps.newHashMap();
+    additionalLrs.put(lrName, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test2"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 100, 1));
+    try {
+      TezCommonUtils.addAdditionalLocalResources(additionalLrs, originalLrs, "");
+      Assert.fail("Duplicate LRs with different sizes expected to fail");
+    } catch (TezUncheckedException e) {
+      Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size"));
+    }
+
+  }
+
 }
diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
index 0697584..7a95c9a 100644
--- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
+++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java
@@ -471,6 +471,32 @@
     Assert.assertTrue(ex.getMessage().startsWith("DAG contains a cycle"));
   }
 
+  // v1 -> v1
+  @Test(timeout = 5000)
+  public void testSelfCycle(){
+    IllegalStateException ex=null;
+    Vertex v1 = Vertex.create("v1",
+        ProcessorDescriptor.create("MapProcessor"),
+        dummyTaskCount, dummyTaskResource);
+    Edge e1 = Edge.create(v1, v1,
+        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
+            DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
+            OutputDescriptor.create("dummy output class"),
+            InputDescriptor.create("dummy input class")));
+    DAG dag = DAG.create("testDag");
+    dag.addVertex(v1);
+    dag.addEdge(e1);
+    try{
+      dag.verify();
+    }
+    catch (IllegalStateException e){
+      ex = e;
+    }
+    Assert.assertNotNull(ex);
+    System.out.println(ex.getMessage());
+    Assert.assertTrue(ex.getMessage().startsWith("DAG contains a self-cycle"));
+  }
+
   @Test(timeout = 5000)
   public void repeatedVertexName() {
     IllegalStateException ex=null;
@@ -956,33 +982,40 @@
     String lrName1 = "LR1";
     lrs.put(lrName1, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test"),
         LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
+
+    // Same lr, different size
+    Map<String, LocalResource> lrs2 = Maps.newHashMap();
+    lrs2.put(lrName1, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test2"),
+        LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 100, 1));
+
     v1.addTaskLocalFiles(lrs);
+    // Allowed since the LR is the same.
     try {
-      v1.addTaskLocalFiles(lrs);
+      v1.addTaskLocalFiles(lrs2);
       Assert.fail();
     } catch (TezUncheckedException e) {
-      Assert.assertTrue(e.getMessage().contains("Attempting to add duplicate resource"));
+      Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size"));
     }
 
     DataSourceDescriptor ds = DataSourceDescriptor.create(InputDescriptor.create("I.class"), 
-        null, -1, null, null, lrs);
+        null, -1, null, null, lrs2);
     v1.addDataSource("i1", ds);
     
     DAG dag = DAG.create("testDag");
     dag.addVertex(v1);
     dag.addTaskLocalFiles(lrs);
     try {
-      dag.addTaskLocalFiles(lrs);
+      dag.addTaskLocalFiles(lrs2);
       Assert.fail();
     } catch (TezUncheckedException e) {
-      Assert.assertTrue(e.getMessage().contains("Attempting to add duplicate resource"));
+      Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size"));
     }
     try {
       // data source will add duplicate common files to vertex
       dag.createDag(new TezConfiguration(), null, null, null, true);
       Assert.fail();
     } catch (TezUncheckedException e) {
-      Assert.assertTrue(e.getMessage().contains("Attempting to add duplicate resource"));
+      Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size"));
     }
   }
   
diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
new file mode 100644
index 0000000..ef0bb33
--- /dev/null
+++ b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.records;
+
+public enum TaskAttemptTerminationCause {
+  UNKNOWN_ERROR, // The error cause is unknown. Usually means a gap in error propagation
+  
+  TERMINATED_BY_CLIENT, // Killed by client command
+  TERMINATED_AT_SHUTDOWN, // Killed due execution shutdown
+  INTERNAL_PREEMPTION, // Killed by Tez to makes space for higher pri work
+  EXTERNAL_PREEMPTION, // Killed by the cluster to make space for other work
+  TERMINATED_INEFFECTIVE_SPECULATION, // Killed speculative attempt because original succeeded
+  TERMINATED_EFFECTIVE_SPECULATION, // Killed original attempt because speculation succeeded
+  TERMINATED_ORPHANED, // Attempt is no longer needed by the task
+  
+  APPLICATION_ERROR, // Failed due to application code error
+  FRAMEWORK_ERROR, // Failed due to code error in Tez code
+  INPUT_READ_ERROR, // Failed due to error in reading inputs
+  OUTPUT_WRITE_ERROR, // Failed due to error in writing outputs
+  OUTPUT_LOST, // Failed because attempts output were reported lost
+  TASK_HEARTBEAT_ERROR, // Failed because AM lost connection to the task
+  
+  CONTAINER_LAUNCH_FAILED, // Failed to launch container
+  CONTAINER_EXITED, // Container exited. Indicates gap in specific error propagation from the cluster
+  CONTAINER_STOPPED, // Container stopped or released by Tez
+  NODE_FAILED, // Node for the container failed
+  NODE_DISK_ERROR, // Disk failed on the node runnign the task
+  
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskHeartbeatHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskHeartbeatHandler.java
index 6b698aa..d115b14 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskHeartbeatHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskHeartbeatHandler.java
@@ -23,6 +23,7 @@
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 
@@ -60,6 +61,6 @@
   protected void handleTimeOut(TezTaskAttemptID attemptId) {
     eventHandler.handle(new TaskAttemptEventAttemptFailed(attemptId,
         TaskAttemptEventType.TA_TIMED_OUT, "AttemptID:" + attemptId.toString()
-        + " Timed out after " + timeOut / 1000 + " secs"));
+        + " Timed out after " + timeOut / 1000 + " secs", TaskAttemptTerminationCause.TASK_HEARTBEAT_ERROR));
   }
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
index f30fc5c..3f60a4e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java
@@ -29,6 +29,7 @@
 import org.apache.tez.dag.api.oldrecords.TaskAttemptReport;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.history.HistoryEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -40,10 +41,14 @@
 public interface TaskAttempt {
 
   public static class TaskAttemptStatus {
+    public TezTaskAttemptID id;
     public TaskAttemptState state;
-    public DAGCounter localityCounter;
     public float progress;
     public TezCounters counters;
+    
+    public TaskAttemptStatus(TezTaskAttemptID id) {
+      this.id = id;
+    }
 
     // insert these counters till they come natively from the task itself.
     // HDFS-5098
@@ -69,6 +74,7 @@
   
   TaskAttemptReport getReport();
   List<String> getDiagnostics();
+  TaskAttemptTerminationCause getTerminationCause();
   TezCounters getCounters();
   float getProgress();
   TaskAttemptState getState();
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
index cfedc41..7487fd9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java
@@ -124,6 +124,7 @@
   int getInputVerticesCount();
   int getOutputVerticesCount();
   void scheduleTasks(List<TaskWithLocationHint> tasks);
+  void scheduleSpeculativeTask(TezTaskID taskId);
   Resource getTaskResource();
 
   ProcessorDescriptor getProcessorDescriptor();
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
index 5c7b956..b9c1d09 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java
@@ -18,16 +18,19 @@
 
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 public class TaskAttemptEventAttemptFailed extends TaskAttemptEvent 
-  implements DiagnosableEvent {
+  implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
 
   private final String diagnostics;
+  private final TaskAttemptTerminationCause errorCause;
   public TaskAttemptEventAttemptFailed(TezTaskAttemptID id,
-      TaskAttemptEventType type, String diagnostics) {
+      TaskAttemptEventType type, String diagnostics, TaskAttemptTerminationCause errorCause) {
     super(id, type);
     this.diagnostics = diagnostics;
+    this.errorCause = errorCause;
   }
 
   @Override
@@ -35,5 +38,9 @@
     return diagnostics;
   }
   
-  
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
+
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminated.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminated.java
index 87aa313..5dd0141 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminated.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminated.java
@@ -17,20 +17,29 @@
 
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 public class TaskAttemptEventContainerTerminated extends TaskAttemptEvent
-    implements DiagnosableEvent {
+    implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
 
   private final String message;
+  private final TaskAttemptTerminationCause errorCause;
 
-  public TaskAttemptEventContainerTerminated(TezTaskAttemptID id, String message) {
+  public TaskAttemptEventContainerTerminated(TezTaskAttemptID id, String message, 
+      TaskAttemptTerminationCause errCause) {
     super(id, TaskAttemptEventType.TA_CONTAINER_TERMINATED);
     this.message = message;
+    this.errorCause = errCause;
   }
 
   @Override
   public String getDiagnosticInfo() {
     return message;
   }
+
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminatedBySystem.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminatedBySystem.java
index a92aafd..a3c57e4 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminatedBySystem.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminatedBySystem.java
@@ -18,19 +18,28 @@
 
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 public class TaskAttemptEventContainerTerminatedBySystem extends TaskAttemptEvent 
-  implements DiagnosableEvent {
+  implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
 
   private final String diagnostics;
-  public TaskAttemptEventContainerTerminatedBySystem(TezTaskAttemptID id, String diagnostics) {
+  private final TaskAttemptTerminationCause errorCause;
+  public TaskAttemptEventContainerTerminatedBySystem(TezTaskAttemptID id, String diagnostics,
+      TaskAttemptTerminationCause errorCause) {
     super(id, TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM);
     this.diagnostics = diagnostics;
+    this.errorCause = errorCause;
   }
 
   @Override
   public String getDiagnosticInfo() {
     return diagnostics;
   }
+
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminating.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminating.java
index 7da6e14..02d04a5 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminating.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventContainerTerminating.java
@@ -17,17 +17,20 @@
 
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 public class TaskAttemptEventContainerTerminating extends TaskAttemptEvent
-    implements DiagnosableEvent {
+    implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
 
   private final String message;
+  private final TaskAttemptTerminationCause errorCause;
 
   public TaskAttemptEventContainerTerminating(TezTaskAttemptID id,
-      String diagMessage) {
+      String diagMessage, TaskAttemptTerminationCause errCause) {
     super(id, TaskAttemptEventType.TA_CONTAINER_TERMINATING);
     this.message = diagMessage;
+    this.errorCause = errCause;
   }
 
   @Override
@@ -35,4 +38,9 @@
     return this.message;
   }
 
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
+
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventKillRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventKillRequest.java
index 9bceb1d..a0dfe5d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventKillRequest.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventKillRequest.java
@@ -17,19 +17,29 @@
 */
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
-public class TaskAttemptEventKillRequest extends TaskAttemptEvent {
+public class TaskAttemptEventKillRequest extends TaskAttemptEvent 
+  implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
 
   private final String message;
+  private final TaskAttemptTerminationCause errorCause;
 
-  public TaskAttemptEventKillRequest(TezTaskAttemptID id, String message) {
+  public TaskAttemptEventKillRequest(TezTaskAttemptID id, String message, TaskAttemptTerminationCause err) {
     super(id, TaskAttemptEventType.TA_KILL_REQUEST);
     this.message = message;
+    this.errorCause = err;
   }
 
-  public String getMessage() {
-    return this.message;
+  @Override
+  public String getDiagnosticInfo() {
+    return message;
+  }
+
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
   }
 
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventNodeFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventNodeFailed.java
index 6d97466..541ef00 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventNodeFailed.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventNodeFailed.java
@@ -17,17 +17,20 @@
 
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
 public class TaskAttemptEventNodeFailed extends TaskAttemptEvent 
-  implements DiagnosableEvent{
+  implements DiagnosableEvent, TaskAttemptEventTerminationCauseEvent {
 
   private final String message;
+  private final TaskAttemptTerminationCause errorCause;
 
   public TaskAttemptEventNodeFailed(TezTaskAttemptID id,
-      String diagMessage) {
+      String diagMessage, TaskAttemptTerminationCause errorCause) {
     super(id, TaskAttemptEventType.TA_NODE_FAILED);
     this.message = diagMessage;
+    this.errorCause = errorCause;
   }
 
   @Override
@@ -35,4 +38,9 @@
     return this.message;
   }
 
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
+
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java
index 678e1e7..6bc110a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java
@@ -18,10 +18,12 @@
 
 package org.apache.tez.dag.app.dag.event;
 
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.impl.TezEvent;
 
-public class TaskAttemptEventOutputFailed extends TaskAttemptEvent {
+public class TaskAttemptEventOutputFailed extends TaskAttemptEvent 
+  implements TaskAttemptEventTerminationCauseEvent {
   
   private TezEvent inputFailedEvent;
   private int consumerTaskNumber;
@@ -40,5 +42,10 @@
   public int getConsumerTaskNumber() {
     return consumerTaskNumber;
   }
+
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return TaskAttemptTerminationCause.OUTPUT_LOST;
+  }
   
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventStatusUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventStatusUpdate.java
index 13577c5..c5a6ea7 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventStatusUpdate.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventStatusUpdate.java
@@ -18,12 +18,6 @@
 
 package org.apache.tez.dag.app.dag.event;
 
-import java.util.List;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.tez.common.counters.DAGCounter;
-import org.apache.tez.common.counters.TezCounters;
-import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 
@@ -40,51 +34,4 @@
   public TaskStatusUpdateEvent getStatusEvent() {
     return this.taskAttemptStatus;
   }
-
-  private TaskAttemptStatusOld reportedTaskAttemptStatus;
-
-  public TaskAttemptEventStatusUpdate(TezTaskAttemptID id,
-      TaskAttemptStatusOld taskAttemptStatus) {
-    super(id, TaskAttemptEventType.TA_STATUS_UPDATE);
-    this.reportedTaskAttemptStatus = taskAttemptStatus;
-  }
-
-  public TaskAttemptStatusOld getReportedTaskAttemptStatus() {
-    return reportedTaskAttemptStatus;
-  }
-
-  /**
-   * The internal TaskAttemptStatus object corresponding to remote Task status.
-   * 
-   */
-  public static class TaskAttemptStatusOld {
-    
-    private AtomicBoolean localitySet = new AtomicBoolean(false);
-
-    public TezTaskAttemptID id;
-    public float progress;
-    public TezCounters counters;
-    public String stateString;
-    //public Phase phase;
-    public long outputSize;
-    public List<TezTaskAttemptID> fetchFailedMaps;
-    public long mapFinishTime;
-    public long shuffleFinishTime;
-    public long sortFinishTime;
-    public TaskAttemptState taskState;
-
-    public void setLocalityCounter(DAGCounter localityCounter) {
-      if (!localitySet.get()) {
-        localitySet.set(true);
-        if (counters == null) {
-          counters = new TezCounters();
-        }
-        if (localityCounter != null) {
-          counters.findCounter(localityCounter).increment(1);
-          // TODO Maybe validate that the correct value is being set.
-        }
-      }
-    }
-
-  }
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventTerminationCauseEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventTerminationCauseEvent.java
new file mode 100644
index 0000000..70c20e3
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventTerminationCauseEvent.java
@@ -0,0 +1,26 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.event;
+
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
+
+public interface TaskAttemptEventTerminationCauseEvent {
+
+  public TaskAttemptTerminationCause getTerminationCause();
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
index c8eec1b..b7aca36 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventType.java
@@ -29,8 +29,7 @@
 //Producer: TaskAttemptListener
   TA_STARTED_REMOTELY,
   TA_STATUS_UPDATE,
-  TA_DIAGNOSTICS_UPDATE,
-  TA_COMMIT_PENDING,
+  TA_DIAGNOSTICS_UPDATE, // REMOVE THIS - UNUSED
   TA_DONE,
   TA_FAILED,
   TA_TIMED_OUT,
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTermination.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTermination.java
index 73d5744..d48a0bf 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTermination.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTermination.java
@@ -18,22 +18,23 @@
 
 package org.apache.tez.dag.app.dag.event;
 
-import org.apache.tez.dag.app.dag.TaskTerminationCause;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskID;
 
-public class TaskEventTermination extends TaskEvent implements DiagnosableEvent{
+public class TaskEventTermination extends TaskEvent implements DiagnosableEvent,
+    TaskAttemptEventTerminationCauseEvent {
 
-  private TaskTerminationCause terminationCause;
-  private String diagnostics;
+  private final String diagnostics;
+  private final TaskAttemptTerminationCause errorCause;
   
-  public TaskEventTermination(TezTaskID taskID, TaskTerminationCause terminationCause) {
+  public TaskEventTermination(TezTaskID taskID, TaskAttemptTerminationCause errorCause, String diagnostics) {
     super(taskID, TaskEventType.T_TERMINATE);
-    this.terminationCause = terminationCause;
-    this.diagnostics = "Task is terminated due to:" + terminationCause.name();
-  }
-
-  public TaskTerminationCause getTerminationCause() {
-    return terminationCause;
+    this.errorCause = errorCause;
+    if (diagnostics != null) {
+      this.diagnostics = diagnostics;
+    } else {
+      this.diagnostics = "Task is terminated due to: " + errorCause.name();
+    }
   }
 
   @Override
@@ -41,4 +42,9 @@
     return diagnostics;
   }
 
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errorCause;
+  }
+
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptStatusUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptStatusUpdate.java
new file mode 100644
index 0000000..696680d
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptStatusUpdate.java
@@ -0,0 +1,60 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.event;
+
+import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+public class VertexEventTaskAttemptStatusUpdate extends VertexEvent {
+  final TezTaskAttemptID id;
+  final TaskAttemptState state;
+  final long timestamp;
+  final boolean justStarted;
+  
+  public VertexEventTaskAttemptStatusUpdate(TezTaskAttemptID taId, TaskAttemptState state,
+      long timestamp) {
+    this(taId, state, timestamp, false);
+  }
+  
+  public VertexEventTaskAttemptStatusUpdate(TezTaskAttemptID taId, TaskAttemptState state,
+      long timestamp, boolean justStarted) {
+    super(taId.getTaskID().getVertexID(), VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE);
+    this.id = taId;
+    this.state = state;
+    this.timestamp = timestamp;
+    this.justStarted = justStarted;
+  }
+  
+  public long getTimestamp() {
+    return timestamp;
+  }
+  
+  public TezTaskAttemptID getAttemptId() {
+    return id;
+  }
+  
+  public boolean hasJustStarted() {
+    return justStarted;
+  }
+  
+  public TaskAttemptState getTaskAttemptState() {
+    return state;
+  }
+
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java
index b4f7e29..5565f93 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java
@@ -40,6 +40,9 @@
   V_TASK_RESCHEDULED,
   V_TASK_ATTEMPT_COMPLETED,
   
+  //Producer:TaskAttempt
+  V_TASK_ATTEMPT_STATUS_UPDATE,
+  
   //Producer:Any component
   V_INTERNAL_ERROR,
   V_MANAGER_USER_CODE_ERROR,
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index de62752..bec3626 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -111,6 +111,7 @@
 import org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.dag.utils.TaskSpecificLaunchCmdOption;
 import org.apache.tez.dag.utils.RelocalizationUtils;
@@ -315,6 +316,7 @@
           // Ignore-able events
           .addTransition(DAGState.FAILED, DAGState.FAILED,
               EnumSet.of(DAGEventType.DAG_KILL,
+                  DAGEventType.DAG_START,
                   DAGEventType.DAG_VERTEX_RERUNNING,
                   DAGEventType.DAG_SCHEDULER_UPDATE,
                   DAGEventType.DAG_VERTEX_COMPLETED))
@@ -768,6 +770,10 @@
         .getAttempt(taId);
   }
 
+  public TaskImpl getTask(TezTaskID tId) {
+    return (TaskImpl) getVertex(tId.getVertexID()).getTask(tId);
+  }
+
   protected void initializeVerticesAndStart() {
     for (Vertex v : vertices.values()) {
       if (v.getInputVerticesCount() == 0) {
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index deaba42..007774f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -54,6 +54,7 @@
 import org.apache.tez.common.counters.DAGCounter;
 import org.apache.tez.common.counters.TezCounters;
 import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.TaskLocationHint;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptReport;
@@ -75,6 +76,7 @@
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminated;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventDiagnosticsUpdate;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventTerminationCauseEvent;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventOutputFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
@@ -83,12 +85,14 @@
 import org.apache.tez.dag.app.dag.event.TaskEventTAUpdate;
 import org.apache.tez.dag.app.dag.event.TaskEventType;
 import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent;
+import org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptStatusUpdate;
 import org.apache.tez.dag.app.rm.AMSchedulerEventTAEnded;
 import org.apache.tez.dag.app.rm.AMSchedulerEventTALaunchRequest;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.HistoryEvent;
 import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent;
 import org.apache.tez.dag.history.events.TaskAttemptStartedEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -122,6 +126,7 @@
   protected EventHandler eventHandler;
   private final TezTaskAttemptID attemptId;
   private final Clock clock;
+  private TaskAttemptTerminationCause terminationCause = TaskAttemptTerminationCause.UNKNOWN_ERROR;
   private final List<String> diagnostics = new ArrayList<String>();
   private final Lock readLock;
   private final Lock writeLock;
@@ -289,7 +294,6 @@
           EnumSet.of(TaskAttemptEventType.TA_STARTED_REMOTELY,
               TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
               TaskAttemptEventType.TA_STATUS_UPDATE,
-              TaskAttemptEventType.TA_COMMIT_PENDING,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
@@ -311,7 +315,6 @@
           EnumSet.of(TaskAttemptEventType.TA_STARTED_REMOTELY,
               TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
               TaskAttemptEventType.TA_STATUS_UPDATE,
-              TaskAttemptEventType.TA_COMMIT_PENDING,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
@@ -330,7 +333,6 @@
               TaskAttemptEventType.TA_SCHEDULE,
               TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
               TaskAttemptEventType.TA_STATUS_UPDATE,
-              TaskAttemptEventType.TA_COMMIT_PENDING,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
@@ -350,7 +352,6 @@
               TaskAttemptEventType.TA_SCHEDULE,
               TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
               TaskAttemptEventType.TA_STATUS_UPDATE,
-              TaskAttemptEventType.TA_COMMIT_PENDING,
               TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_FAILED,
               TaskAttemptEventType.TA_TIMED_OUT,
               TaskAttemptEventType.TA_KILL_REQUEST,
@@ -413,7 +414,7 @@
     this.clock = clock;
     this.taskHeartbeatHandler = taskHeartbeatHandler;
     this.appContext = appContext;
-    this.reportedStatus = new TaskAttemptStatus();
+    this.reportedStatus = new TaskAttemptStatus(this.attemptId);
     initTaskAttemptStatus(reportedStatus);
     RackResolver.init(conf);
     this.stateMachine = stateMachineFactory.make(this);
@@ -496,6 +497,11 @@
       readLock.unlock();
     }
   }
+  
+  @Override
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return terminationCause;
+  }
 
   @Override
   public TezCounters getCounters() {
@@ -743,6 +749,8 @@
         this.reportedStatus.counters = tEvent.getCounters();
         this.reportedStatus.progress = 1f;
         this.reportedStatus.state = tEvent.getState();
+        this.terminationCause = tEvent.getTaskAttemptError() != null ? tEvent.getTaskAttemptError()
+            : TaskAttemptTerminationCause.UNKNOWN_ERROR;
         this.diagnostics.add(tEvent.getDiagnostics());
         this.recoveredState = tEvent.getState();
         sendEvent(createDAGCounterUpdateEventTAFinished(this, tEvent.getState()));
@@ -957,8 +965,8 @@
 
     TaskAttemptFinishedEvent finishEvt = new TaskAttemptFinishedEvent(
         attemptId, getTask().getVertex().getName(), getLaunchTime(),
-        getFinishTime(), TaskAttemptState.SUCCEEDED, "",
-        getCounters());
+        getFinishTime(), TaskAttemptState.SUCCEEDED, null,
+        "", getCounters());
     // FIXME how do we store information regd completion events
     this.appContext.getHistoryHandler().handle(
         new DAGHistoryEvent(getDAGID(), finishEvt));
@@ -969,9 +977,9 @@
     TaskAttemptFinishedEvent finishEvt = new TaskAttemptFinishedEvent(
         attemptId, getTask().getVertex().getName(), getLaunchTime(),
         clock.getTime(), state,
+        terminationCause,
         StringUtils.join(
-            getDiagnostics(), LINE_SEPARATOR),
-        getCounters());
+            getDiagnostics(), LINE_SEPARATOR), getCounters());
     // FIXME how do we store information regd completion events
     this.appContext.getHistoryHandler().handle(
         new DAGHistoryEvent(getDAGID(), finishEvt));
@@ -997,11 +1005,12 @@
         remoteTaskSpec = ta.createRemoteTaskSpec();
         LOG.info("remoteTaskSpec:" + remoteTaskSpec);
       } catch (AMUserCodeException e) {
-        String msg = "Exception in " + e.getSource() + ", taskAttempt=" + ta.getTaskID();
+        String msg = "Exception in " + e.getSource() + ", taskAttempt=" + ta;
         LOG.error(msg, e);
         String diag = msg + ", " + e.getMessage() + ", " + ExceptionUtils.getStackTrace(e.getCause());
-        new TerminatedBeforeRunningTransition(FAILED_HELPER).transition(ta,
-            new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, diag));
+        new TerminateTransition(FAILED_HELPER).transition(ta,
+            new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, diag,
+                TaskAttemptTerminationCause.APPLICATION_ERROR));
         return TaskAttemptStateInternal.FAILED;
       }
       // Create startTaskRequest
@@ -1083,6 +1092,13 @@
       if (event instanceof DiagnosableEvent) {
         ta.addDiagnosticInfo(((DiagnosableEvent) event).getDiagnosticInfo());
       }
+      
+      // this should catch at test time if any new events are missing the error cause
+      assert event instanceof TaskAttemptEventTerminationCauseEvent;
+      
+      if (event instanceof TaskAttemptEventTerminationCauseEvent) {
+        ta.trySetTerminationCause(((TaskAttemptEventTerminationCauseEvent) event).getTerminationCause());
+      }
 
       ta.sendEvent(createDAGCounterUpdateEventTAFinished(ta,
           helper.getTaskAttemptState()));
@@ -1151,10 +1167,20 @@
       // Inform the Task
       ta.sendEvent(new TaskEventTAUpdate(ta.attemptId,
           TaskEventType.T_ATTEMPT_LAUNCHED));
+      
+      if (ta.isSpeculationEnabled()) {
+        ta.sendEvent(new VertexEventTaskAttemptStatusUpdate(ta.attemptId, TaskAttemptState.RUNNING,
+            ta.launchTime, true));
+      }
 
       ta.taskHeartbeatHandler.register(ta.attemptId);
     }
   }
+  
+  private boolean isSpeculationEnabled() {
+    return conf.getBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED,
+        TezConfiguration.TEZ_AM_SPECULATION_ENABLED_DEFAULT);
+  }
 
   protected static class TerminatedBeforeRunningTransition extends
       TerminateTransition {
@@ -1235,6 +1261,10 @@
 
       ta.updateProgressSplits();
 
+      if (ta.isSpeculationEnabled()) {
+        ta.sendEvent(new VertexEventTaskAttemptStatusUpdate(ta.attemptId, ta.getState(),
+            ta.clock.getTime()));
+      }
     }
   }
 
@@ -1259,6 +1289,14 @@
 
       // Unregister from the TaskHeartbeatHandler.
       ta.taskHeartbeatHandler.unregister(ta.attemptId);
+      
+      ta.reportedStatus.state = TaskAttemptState.SUCCEEDED;
+      ta.reportedStatus.progress = 1.0f;
+      
+      if (ta.isSpeculationEnabled()) {
+        ta.sendEvent(new VertexEventTaskAttemptStatusUpdate(ta.attemptId, TaskAttemptState.SUCCEEDED,
+            ta.clock.getTime()));
+      }
 
       // TODO maybe. For reuse ... Stacking pulls for a reduce task, even if the
       // TA finishes independently. // Will likely be the Job's responsibility.
@@ -1278,6 +1316,11 @@
     public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) {
       super.transition(ta, event);
       ta.taskHeartbeatHandler.unregister(ta.attemptId);
+      ta.reportedStatus.state = helper.getTaskAttemptState(); // FAILED or KILLED
+      if (ta.isSpeculationEnabled()) {
+        ta.sendEvent(new VertexEventTaskAttemptStatusUpdate(ta.attemptId, helper.getTaskAttemptState(),
+            ta.clock.getTime()));
+      }
     }
   }
 
@@ -1455,6 +1498,13 @@
       sendEvent(new VertexEventRouteEvent(vertex.getVertexId(), tezIfEvents));
     }
   }
+  
+  private void trySetTerminationCause(TaskAttemptTerminationCause err) {
+    // keep only the first error cause
+    if (terminationCause == TaskAttemptTerminationCause.UNKNOWN_ERROR) {
+      terminationCause = err;
+    }
+  }
 
   private void initTaskAttemptStatus(TaskAttemptStatus result) {
     result.progress = 0.0f;
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
index 4ded9be..a4c4dee 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
@@ -33,6 +33,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Maps;
+
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -45,8 +46,8 @@
 import org.apache.hadoop.yarn.state.SingleArcTransition;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
 import org.apache.hadoop.yarn.util.Clock;
+import org.apache.tez.common.counters.TaskCounter;
 import org.apache.tez.common.counters.TezCounters;
-import org.apache.tez.dag.api.ProcessorDescriptor;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
@@ -85,6 +86,7 @@
 import org.apache.tez.dag.history.events.TaskAttemptStartedEvent;
 import org.apache.tez.dag.history.events.TaskFinishedEvent;
 import org.apache.tez.dag.history.events.TaskStartedEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
@@ -92,6 +94,7 @@
 import org.apache.tez.runtime.api.impl.TezEvent;
 
 import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.tez.state.OnStateChangedCallback;
 import org.apache.tez.state.StateMachineTez;
 
@@ -117,6 +120,7 @@
   private final Lock readLock;
   private final Lock writeLock;
   private final List<String> diagnostics = new ArrayList<String>();
+  private TezCounters counters = new TezCounters();
   // TODO Metrics
   //private final MRAppMetrics metrics;
   protected final AppContext appContext;
@@ -415,15 +419,13 @@
 
   @Override
   public TezCounters getCounters() {
-    TezCounters counters = null;
+    TezCounters counters = new TezCounters();
+    counters.incrAllCounters(this.counters);
     readLock.lock();
     try {
       TaskAttempt bestAttempt = selectBestAttempt();
       if (bestAttempt != null) {
-        counters = bestAttempt.getCounters();
-      } else {
-        counters = TaskAttemptImpl.EMPTY_COUNTERS;
-//        counters.groups = new HashMap<CharSequence, CounterGroup>();
+        counters.incrAllCounters(bestAttempt.getCounters());
       }
       return counters;
     } finally {
@@ -699,7 +701,7 @@
       if (getState() != TaskState.RUNNING) {
         LOG.info("Task not running. Issuing kill to bad commit attempt " + taskAttemptID);
         eventHandler.handle(new TaskAttemptEventKillRequest(taskAttemptID
-            , "Task not running. Bad attempt."));
+            , "Task not running. Bad attempt.", TaskAttemptTerminationCause.TERMINATED_ORPHANED));
         return false;
       }
       if (commitAttempt == null) {
@@ -888,7 +890,7 @@
     task.commitAttempt = null;
     task.successfulAttempt = null;
   }
-
+  
   /**
   * @return a String representation of the splits.
   *
@@ -987,6 +989,7 @@
     @Override
     public void transition(TaskImpl task, TaskEvent event) {
       LOG.info("Scheduling a redundant attempt for task " + task.taskId);
+      task.counters.findCounter(TaskCounter.NUM_SPECULATIONS).increment(1);
       task.addAndScheduleAttempt();
     }
   }
@@ -1016,6 +1019,7 @@
       if (task.historyTaskStartGenerated) {
         task.logJobHistoryTaskFinishedEvent();
       }
+      TaskAttempt successfulAttempt = task.attempts.get(successTaId);
 
       // issue kill to all other attempts
       for (TaskAttempt attempt : task.attempts.values()) {
@@ -1024,9 +1028,21 @@
             //  TA_KILL message to an attempt that doesn't need one for
             //  other reasons.
             !attempt.isFinished()) {
-          LOG.info("Issuing kill to other attempt " + attempt.getID());
+          LOG.info("Issuing kill to other attempt " + attempt.getID() + " as attempt: " +
+            task.successfulAttempt + " has succeeded");
+          String diagnostics = null;
+          TaskAttemptTerminationCause errCause = null;
+          if (attempt.getLaunchTime() < successfulAttempt.getLaunchTime()) {
+            diagnostics = "Killed this attempt as other speculative attempt : " + successTaId
+                + " succeeded";
+            errCause = TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION;
+          } else {
+            diagnostics = "Killed this speculative attempt as original attempt: " + successTaId
+                + " succeeded";
+            errCause = TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION;
+          }
           task.eventHandler.handle(new TaskAttemptEventKillRequest(attempt
-              .getID(), "Alternate attempt succeeded"));
+              .getID(), diagnostics, errCause));
         }
       }
       // send notification to DAG scheduler
@@ -1336,12 +1352,6 @@
 
     @Override
     public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
-      // verify that this occurs only for map task
-      // TODO: consider moving it to MapTaskImpl
-      if (task.leafVertex) {
-        LOG.error("Unexpected event for task of leaf vertex " + event.getType());
-        task.internalError(event.getType());
-      }
 
       TaskEventTAUpdate attemptEvent = (TaskEventTAUpdate) event;
       TezTaskAttemptID attemptId = attemptEvent.getTaskAttemptID();
@@ -1365,6 +1375,8 @@
         return TaskStateInternal.SCHEDULED;
       } else {
         // nothing to do
+        LOG.info("Ignoring kill of attempt: " + attemptId + " because attempt: " +
+                 task.successfulAttempt + " is already successful");
         return TaskStateInternal.SUCCEEDED;
       }
     }
@@ -1411,14 +1423,13 @@
     }
   }
 
-  private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg) {
+  private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg, TaskAttemptTerminationCause errorCause) {
     if (commitAttempt != null && commitAttempt.equals(attempt)) {
       LOG.info("Removing commit attempt: " + commitAttempt);
       commitAttempt = null;
     }
     if (attempt != null && !attempt.isFinished()) {
-      eventHandler.handle(new TaskAttemptEventKillRequest(attempt.getID(),
-          logMsg));
+      eventHandler.handle(new TaskAttemptEventKillRequest(attempt.getID(), logMsg, errorCause));
     }
   }
 
@@ -1440,8 +1451,8 @@
       task.addDiagnosticInfo(terminateEvent.getDiagnosticInfo());
       // issue kill to all non finished attempts
       for (TaskAttempt attempt : task.attempts.values()) {
-        task.killUnfinishedAttempt
-            (attempt, "Task KILL is received. Killing attempt!");
+        task.killUnfinishedAttempt(attempt, "Task KILL is received. Killing attempt. Diagnostics: "
+            + terminateEvent.getDiagnosticInfo(), terminateEvent.getTerminationCause());
       }
     }
   }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index d19c4cc..3246f38 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -68,6 +68,7 @@
 import org.apache.tez.dag.api.OutputDescriptor;
 import org.apache.tez.dag.api.ProcessorDescriptor;
 import org.apache.tez.dag.api.RootInputLeafOutput;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.VertexLocationHint;
 import org.apache.tez.dag.api.TaskLocationHint;
@@ -121,11 +122,13 @@
 import org.apache.tez.dag.app.dag.event.VertexEventSourceVertexRecovered;
 import org.apache.tez.dag.app.dag.event.VertexEventSourceVertexStarted;
 import org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptCompleted;
+import org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptStatusUpdate;
 import org.apache.tez.dag.app.dag.event.VertexEventTaskCompleted;
 import org.apache.tez.dag.app.dag.event.VertexEventTaskReschedule;
 import org.apache.tez.dag.app.dag.event.VertexEventTermination;
 import org.apache.tez.dag.app.dag.event.VertexEventType;
 import org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo;
+import org.apache.tez.dag.app.dag.speculation.legacy.LegacySpeculator;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.HistoryEvent;
 import org.apache.tez.dag.history.events.VertexCommitStartedEvent;
@@ -136,6 +139,7 @@
 import org.apache.tez.dag.history.events.VertexStartedEvent;
 import org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager;
 import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -205,6 +209,8 @@
   private Resource taskResource;
 
   private Configuration conf;
+  
+  private final boolean isSpeculationEnabled;
 
   //fields initialized in init
 
@@ -235,6 +241,8 @@
   private static final TaskAttemptCompletedEventTransition
       TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION =
           new TaskAttemptCompletedEventTransition();
+  private static final TaskAttempStatusUpdateEventTransition
+      TASK_ATTEMPT_STATUS_UPDATE_EVENT_TRANSITION = new TaskAttempStatusUpdateEventTransition();
   private static final SourceTaskAttemptCompletedEventTransition
       SOURCE_TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION =
           new SourceTaskAttemptCompletedEventTransition();
@@ -248,6 +256,8 @@
 
   @VisibleForTesting
   final List<TezEvent> pendingInitializerEvents = new LinkedList<TezEvent>();
+  
+  LegacySpeculator speculator;
 
   protected static final
     StateMachineFactory<VertexImpl, VertexState, VertexEventType, VertexEvent>
@@ -460,6 +470,10 @@
               EnumSet.of(VertexState.RUNNING, VertexState.TERMINATING),
               VertexEventType.V_ROUTE_EVENT,
               ROUTE_EVENT_TRANSITION)
+          .addTransition(
+              VertexState.RUNNING,
+              VertexState.RUNNING, VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE,
+              TASK_ATTEMPT_STATUS_UPDATE_EVENT_TRANSITION)
 
           // Transitions from TERMINATING state.
           .addTransition
@@ -477,6 +491,7 @@
                   VertexEventType.V_MANAGER_USER_CODE_ERROR,
                   VertexEventType.V_ROOT_INPUT_FAILED,
                   VertexEventType.V_SOURCE_VERTEX_STARTED,
+                  VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE,
                   VertexEventType.V_ROOT_INPUT_INITIALIZED,
                   VertexEventType.V_NULL_EDGE_INITIALIZED,
                   VertexEventType.V_ROUTE_EVENT,
@@ -494,7 +509,6 @@
               VertexEventType.V_TASK_RESCHEDULED,
               new TaskRescheduledAfterVertexSuccessTransition())
 
-          // Ignore-able events
           .addTransition(
               VertexState.SUCCEEDED,
               EnumSet.of(VertexState.SUCCEEDED, VertexState.FAILED),
@@ -506,10 +520,12 @@
               EnumSet.of(VertexState.FAILED, VertexState.ERROR),
               VertexEventType.V_TASK_COMPLETED,
               new TaskCompletedAfterVertexSuccessTransition())
+          // Ignore-able events
           .addTransition(VertexState.SUCCEEDED, VertexState.SUCCEEDED,
               EnumSet.of(VertexEventType.V_TERMINATE,
                   VertexEventType.V_ROOT_INPUT_FAILED,
                   VertexEventType.V_TASK_ATTEMPT_COMPLETED,
+                  VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE,
                   // after we are done reruns of source tasks should not affect
                   // us. These reruns may be triggered by other consumer vertices.
                   // We should have been in RUNNING state if we had triggered the
@@ -519,6 +535,7 @@
               VertexEventType.V_TASK_ATTEMPT_COMPLETED,
               new TaskAttemptCompletedEventTransition())
 
+
           // Transitions from FAILED state
           .addTransition(
               VertexState.FAILED,
@@ -534,6 +551,7 @@
                   VertexEventType.V_START,
                   VertexEventType.V_ROUTE_EVENT,
                   VertexEventType.V_TASK_ATTEMPT_COMPLETED,
+                  VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE,
                   VertexEventType.V_TASK_COMPLETED,
                   VertexEventType.V_ONE_TO_ONE_SOURCE_SPLIT,
                   VertexEventType.V_ROOT_INPUT_INITIALIZED,
@@ -558,6 +576,7 @@
                   VertexEventType.V_ROUTE_EVENT,
                   VertexEventType.V_TASK_RESCHEDULED,
                   VertexEventType.V_TASK_ATTEMPT_COMPLETED,
+                  VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE,
                   VertexEventType.V_ONE_TO_ONE_SOURCE_SPLIT,
                   VertexEventType.V_SOURCE_TASK_ATTEMPT_COMPLETED,
                   VertexEventType.V_TASK_COMPLETED,
@@ -577,6 +596,7 @@
                   VertexEventType.V_ROUTE_EVENT,
                   VertexEventType.V_TERMINATE,
                   VertexEventType.V_MANAGER_USER_CODE_ERROR,
+                  VertexEventType.V_TASK_ATTEMPT_STATUS_UPDATE,
                   VertexEventType.V_TASK_COMPLETED,
                   VertexEventType.V_TASK_ATTEMPT_COMPLETED,
                   VertexEventType.V_ONE_TO_ONE_SOURCE_SPLIT,
@@ -773,7 +793,14 @@
     // Not sending the notifier a parallelism update since this is the initial parallelism
 
     this.dagVertexGroups = dagVertexGroups;
-
+    
+    isSpeculationEnabled = conf.getBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED,
+        TezConfiguration.TEZ_AM_SPECULATION_ENABLED_DEFAULT);
+    
+    if (isSpeculationEnabled()) {
+      speculator = new LegacySpeculator(conf, getAppContext(), this);
+    }
+    
     logIdentifier =  this.getVertexId() + " [" + this.getName() + "]";
     // This "this leak" is okay because the retained pointer is in an
     //  instance variable.
@@ -782,6 +809,10 @@
         stateMachineFactory.make(this), this);
     augmentStateMachine();
   }
+  
+  private boolean isSpeculationEnabled() {
+    return isSpeculationEnabled;
+  }
 
   protected StateMachine<VertexState, VertexEventType, VertexEvent> getStateMachine() {
     return stateMachine;
@@ -1194,6 +1225,12 @@
   }
 
   @Override
+  public void scheduleSpeculativeTask(TezTaskID taskId) {
+    Preconditions.checkState(taskId.getId() < numTasks);
+    eventHandler.handle(new TaskEvent(taskId, TaskEventType.T_ADD_SPEC_ATTEMPT));
+  }
+  
+  @Override
   public void scheduleTasks(List<TaskWithLocationHint> tasksToSchedule) {
     writeLock.lock();
     try {
@@ -1785,12 +1822,17 @@
    */
   void tryEnactKill(VertexTerminationCause trigger,
       TaskTerminationCause taskterminationCause) {
+    // In most cases the dag is shutting down due to some error
+    TaskAttemptTerminationCause errCause = TaskAttemptTerminationCause.TERMINATED_AT_SHUTDOWN;
+    if (taskterminationCause == TaskTerminationCause.DAG_KILL) {
+      errCause = TaskAttemptTerminationCause.TERMINATED_BY_CLIENT;
+    }
     if(trySetTerminationCause(trigger)){
-      LOG.info("Killing tasks in vertex: " + logIdentifier + " due to trigger: "
-          + trigger);
+      String msg = "Killing tasks in vertex: " + logIdentifier + " due to trigger: " + trigger; 
+      LOG.info(msg);
       for (Task task : tasks.values()) {
-        eventHandler.handle(
-            new TaskEventTermination(task.getTaskId(), taskterminationCause));
+        eventHandler.handle( // attempt was terminated because the vertex is shutting down
+            new TaskEventTermination(task.getTaskId(), errCause, msg));
       }
     }
   }
@@ -3282,6 +3324,7 @@
       eventHandler.handle(new VertexEvent(
         this.vertexId, VertexEventType.V_COMPLETED));
     }
+    
     return VertexState.RUNNING;
   }
 
@@ -3483,7 +3526,8 @@
             String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier();
             LOG.error(msg, e);
             vertex.addDiagnostic(msg + "," + ExceptionUtils.getStackTrace(e.getCause()));
-            vertex.tryEnactKill(VertexTerminationCause.AM_USERCODE_FAILURE, TaskTerminationCause.AM_USERCODE_FAILURE);
+            vertex.tryEnactKill(VertexTerminationCause.AM_USERCODE_FAILURE,
+                TaskTerminationCause.AM_USERCODE_FAILURE);
             return VertexState.TERMINATING;
           }
         } else {
@@ -3515,6 +3559,23 @@
     }
   }
 
+  private static class TaskAttempStatusUpdateEventTransition implements
+      SingleArcTransition<VertexImpl, VertexEvent> {
+    @Override
+    public void transition(VertexImpl vertex, VertexEvent event) {
+      VertexEventTaskAttemptStatusUpdate updateEvent =
+        ((VertexEventTaskAttemptStatusUpdate) event);
+      if (vertex.isSpeculationEnabled()) {
+        if (updateEvent.hasJustStarted()) {
+          vertex.speculator.notifyAttemptStarted(updateEvent.getAttemptId(),
+              updateEvent.getTimestamp());
+        } else {
+          vertex.speculator.notifyAttemptStatusUpdate(updateEvent.getAttemptId(),
+              updateEvent.getTaskAttemptState(), updateEvent.getTimestamp());
+        }
+      }
+    }
+  }
   private static class TaskCompletedTransition implements
       MultipleArcTransition<VertexImpl, VertexEvent, VertexState> {
 
@@ -3857,12 +3918,32 @@
       case TASK_ATTEMPT_FAILED_EVENT:
         {
           checkEventSourceMetadata(vertex, sourceMeta);
+          TaskAttemptTerminationCause errCause = null;
+          switch (sourceMeta.getEventGenerator()) {
+          case INPUT:
+            errCause = TaskAttemptTerminationCause.INPUT_READ_ERROR;
+            break;
+          case PROCESSOR:
+            errCause = TaskAttemptTerminationCause.APPLICATION_ERROR;
+            break;
+          case OUTPUT:
+            errCause = TaskAttemptTerminationCause.OUTPUT_WRITE_ERROR;
+            break;
+          case SYSTEM:
+            errCause = TaskAttemptTerminationCause.FRAMEWORK_ERROR;
+            break;
+          default:
+            throw new TezUncheckedException("Unknown EventProducerConsumerType: " +
+                sourceMeta.getEventGenerator());
+          }
           TaskAttemptFailedEvent taskFailedEvent =
               (TaskAttemptFailedEvent) tezEvent.getEvent();
           vertex.getEventHandler().handle(
               new TaskAttemptEventAttemptFailed(sourceMeta.getTaskAttemptID(),
                   TaskAttemptEventType.TA_FAILED,
-                  "Error: " + taskFailedEvent.getDiagnostics()));
+                  "Error: " + taskFailedEvent.getDiagnostics(), 
+                  errCause)
+              );
         }
         break;
       default:
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java
new file mode 100644
index 0000000..7e6f1c2
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java
@@ -0,0 +1,86 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.speculation.legacy;
+
+import com.google.common.annotations.VisibleForTesting;
+
+public class DataStatistics {
+  private int count = 0;
+  private double sum = 0;
+  private double sumSquares = 0;
+
+  public DataStatistics() {
+  }
+
+  public DataStatistics(double initNum) {
+    this.count = 1;
+    this.sum = initNum;
+    this.sumSquares = initNum * initNum;
+  }
+
+  public synchronized void add(double newNum) {
+    this.count++;
+    this.sum += newNum;
+    this.sumSquares += newNum * newNum;
+  }
+
+  @VisibleForTesting
+  synchronized void updateStatistics(double old, double update) {
+    this.sum += update - old;
+    this.sumSquares += (update * update) - (old * old);
+  }
+
+  public synchronized double mean() {
+    // when no data then mean estimate should be large
+    //return count == 0 ? 0.0 : sum/count;
+    return count == 0 ? Long.MAX_VALUE : sum/count;
+  }
+
+  public synchronized double var() {
+    // E(X^2) - E(X)^2
+    if (count <= 1) {
+      return 0.0;
+    }
+    double mean = mean();
+    return Math.max((sumSquares/count) - mean * mean, 0.0d);
+  }
+
+  public synchronized double std() {
+    return Math.sqrt(this.var());
+  }
+
+  public synchronized double outlier(float sigma) {
+    if (count != 0.0) {
+      return mean() + std() * sigma;
+    }
+
+    // when no data available then outlier estimate should be large
+    //return 0.0;
+    return Long.MAX_VALUE;
+  }
+
+  public synchronized double count() {
+    return count;
+  }
+
+  public String toString() {
+    return "DataStatistics: count is " + count + ", sum is " + sum +
+    ", sumSquares is " + sumSquares + " mean is " + mean() + " std() is " + std();
+  }
+}
\ No newline at end of file
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java
new file mode 100644
index 0000000..8f76e05
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java
@@ -0,0 +1,396 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.speculation.legacy;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.api.oldrecords.TaskState;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.TaskAttempt;
+import org.apache.tez.dag.app.dag.Vertex;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.dag.records.TezTaskID;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Maintains runtime estimation statistics. Makes periodic updates
+ * estimates based on progress and decides on when to trigger a 
+ * speculative attempt. Speculation attempts are triggered when the 
+ * estimated runtime is more than a threshold beyond the mean runtime
+ * and the original task still has enough estimated runtime left that 
+ * the speculative version is expected to finish sooner than that. If 
+ * the original is close to completion then we dont start a speculation
+ * because it may be likely a wasted attempt. There is a delay between
+ * successive speculations.
+ */
+public class LegacySpeculator {
+  
+  private static final long ON_SCHEDULE = Long.MIN_VALUE;
+  private static final long ALREADY_SPECULATING = Long.MIN_VALUE + 1;
+  private static final long TOO_NEW = Long.MIN_VALUE + 2;
+  private static final long PROGRESS_IS_GOOD = Long.MIN_VALUE + 3;
+  private static final long NOT_RUNNING = Long.MIN_VALUE + 4;
+  private static final long TOO_LATE_TO_SPECULATE = Long.MIN_VALUE + 5;
+
+  private static final long SOONEST_RETRY_AFTER_NO_SPECULATE = 1000L * 1L;
+  private static final long SOONEST_RETRY_AFTER_SPECULATE = 1000L * 15L;
+
+  private static final double PROPORTION_RUNNING_TASKS_SPECULATABLE = 0.1;
+  private static final double PROPORTION_TOTAL_TASKS_SPECULATABLE = 0.01;
+  private static final int  MINIMUM_ALLOWED_SPECULATIVE_TASKS = 10;
+
+  private static final Log LOG = LogFactory.getLog(LegacySpeculator.class);
+
+  private final ConcurrentMap<TezTaskID, Boolean> runningTasks
+      = new ConcurrentHashMap<TezTaskID, Boolean>();
+
+  // Used to track any TaskAttempts that aren't heart-beating for a while, so
+  // that we can aggressively speculate instead of waiting for task-timeout.
+  private final ConcurrentMap<TezTaskAttemptID, TaskAttemptHistoryStatistics>
+      runningTaskAttemptStatistics = new ConcurrentHashMap<TezTaskAttemptID,
+          TaskAttemptHistoryStatistics>();
+  // Regular heartbeat from tasks is every 3 secs. So if we don't get a
+  // heartbeat in 9 secs (3 heartbeats), we simulate a heartbeat with no change
+  // in progress.
+  private static final long MAX_WAITTING_TIME_FOR_HEARTBEAT = 9 * 1000;
+
+
+  private final Set<TezTaskID> mayHaveSpeculated = new HashSet<TezTaskID>();
+
+  private Vertex vertex;
+  private TaskRuntimeEstimator estimator;
+
+  private final Clock clock;
+  private long nextSpeculateTime = Long.MIN_VALUE;
+
+  public LegacySpeculator(Configuration conf, AppContext context, Vertex vertex) {
+    this(conf, context.getClock(), vertex);
+  }
+
+  public LegacySpeculator(Configuration conf, Clock clock, Vertex vertex) {
+    this(conf, getEstimator(conf, vertex), clock, vertex);
+  }
+  
+  static private TaskRuntimeEstimator getEstimator
+      (Configuration conf, Vertex vertex) {
+    TaskRuntimeEstimator estimator = new LegacyTaskRuntimeEstimator();
+    estimator.contextualize(conf, vertex);
+    
+    return estimator;
+  }
+
+  // This constructor is designed to be called by other constructors.
+  //  However, it's public because we do use it in the test cases.
+  // Normally we figure out our own estimator.
+  public LegacySpeculator
+      (Configuration conf, TaskRuntimeEstimator estimator, Clock clock, Vertex vertex) {
+    this.vertex = vertex;
+    this.estimator = estimator;
+    this.clock = clock;
+  }
+
+/*   *************************************************************    */
+
+  void maybeSpeculate() {
+    long now = clock.getTime();
+    
+    if (now < nextSpeculateTime) {
+      return;
+    }
+    
+    int speculations = maybeScheduleASpeculation();
+    long mininumRecomp
+        = speculations > 0 ? SOONEST_RETRY_AFTER_SPECULATE
+                           : SOONEST_RETRY_AFTER_NO_SPECULATE;
+
+    long wait = Math.max(mininumRecomp,
+          clock.getTime() - now);
+    nextSpeculateTime = now + wait;
+
+    if (speculations > 0) {
+      LOG.info("We launched " + speculations
+          + " speculations.  Waiting " + wait + " milliseconds.");
+    }
+  }
+
+/*   *************************************************************    */
+
+  public void notifyAttemptStarted(TezTaskAttemptID taId, long timestamp) {
+    estimator.enrollAttempt(taId, timestamp);    
+  }
+  
+  public void notifyAttemptStatusUpdate(TezTaskAttemptID taId, TaskAttemptState reportedState,
+      long timestamp) {
+    statusUpdate(taId, reportedState, timestamp);
+    maybeSpeculate();
+  }
+
+  /**
+   * Absorbs one TaskAttemptStatus
+   *
+   * @param reportedStatus the status report that we got from a task attempt
+   *        that we want to fold into the speculation data for this job
+   * @param timestamp the time this status corresponds to.  This matters
+   *        because statuses contain progress.
+   */
+  private void statusUpdate(TezTaskAttemptID attemptID, TaskAttemptState reportedState, long timestamp) {
+
+    TezTaskID taskID = attemptID.getTaskID();
+    Task task = vertex.getTask(taskID);
+
+    Preconditions.checkState(task != null, "Null task for attempt: " + attemptID);
+
+    estimator.updateAttempt(attemptID, reportedState, timestamp);
+
+    //if (stateString.equals(TaskAttemptState.RUNNING.name())) {
+    if (reportedState == TaskAttemptState.RUNNING) {
+      runningTasks.putIfAbsent(taskID, Boolean.TRUE);
+    } else {
+      runningTasks.remove(taskID, Boolean.TRUE);
+      //if (!stateString.equals(TaskAttemptState.STARTING.name())) {
+      if (reportedState == TaskAttemptState.STARTING) {
+        runningTaskAttemptStatistics.remove(attemptID);
+      }
+    }
+  }
+
+/*   *************************************************************    */
+
+// This is the code section that runs periodically and adds speculations for
+//  those jobs that need them.
+
+
+  // This can return a few magic values for tasks that shouldn't speculate:
+  //  returns ON_SCHEDULE if thresholdRuntime(taskID) says that we should not
+  //     considering speculating this task
+  //  returns ALREADY_SPECULATING if that is true.  This has priority.
+  //  returns TOO_NEW if our companion task hasn't gotten any information
+  //  returns PROGRESS_IS_GOOD if the task is sailing through
+  //  returns NOT_RUNNING if the task is not running
+  //
+  // All of these values are negative.  Any value that should be allowed to
+  //  speculate is 0 or positive.
+  private long speculationValue(Task task, long now) {
+    Map<TezTaskAttemptID, TaskAttempt> attempts = task.getAttempts();
+    TezTaskID taskID = task.getTaskId();
+    long acceptableRuntime = Long.MIN_VALUE;
+    long result = Long.MIN_VALUE;
+
+    // short circuit completed tasks. no need to spend time on them
+    if (task.getState() == TaskState.SUCCEEDED) {
+      return NOT_RUNNING;
+    }
+    
+    if (!mayHaveSpeculated.contains(taskID)) {
+      acceptableRuntime = estimator.thresholdRuntime(taskID);
+      if (acceptableRuntime == Long.MAX_VALUE) {
+        return ON_SCHEDULE;
+      }
+    }
+
+    TezTaskAttemptID runningTaskAttemptID = null;
+
+    int numberRunningAttempts = 0;
+
+    for (TaskAttempt taskAttempt : attempts.values()) {
+      if (taskAttempt.getState() == TaskAttemptState.RUNNING
+          || taskAttempt.getState() == TaskAttemptState.STARTING) {
+        if (++numberRunningAttempts > 1) {
+          return ALREADY_SPECULATING;
+        }
+        runningTaskAttemptID = taskAttempt.getID();
+
+        long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID);
+
+        long taskAttemptStartTime
+            = estimator.attemptEnrolledTime(runningTaskAttemptID);
+        if (taskAttemptStartTime > now) {
+          // This background process ran before we could process the task
+          //  attempt status change that chronicles the attempt start
+          return TOO_NEW;
+        }
+
+        long estimatedEndTime = estimatedRunTime + taskAttemptStartTime;
+
+        long estimatedReplacementEndTime
+            = now + estimator.newAttemptEstimatedRuntime();
+
+        float progress = taskAttempt.getProgress();
+        TaskAttemptHistoryStatistics data =
+            runningTaskAttemptStatistics.get(runningTaskAttemptID);
+        if (data == null) {
+          runningTaskAttemptStatistics.put(runningTaskAttemptID,
+            new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now));
+        } else {
+          if (estimatedRunTime == data.getEstimatedRunTime()
+              && progress == data.getProgress()) {
+            // Previous stats are same as same stats
+            if (data.notHeartbeatedInAWhile(now)) {
+              // Stats have stagnated for a while, simulate heart-beat.
+              // Now simulate the heart-beat
+              statusUpdate(taskAttempt.getID(), taskAttempt.getState(), clock.getTime());
+            }
+          } else {
+            // Stats have changed - update our data structure
+            data.setEstimatedRunTime(estimatedRunTime);
+            data.setProgress(progress);
+            data.resetHeartBeatTime(now);
+          }
+        }
+
+        if (estimatedEndTime < now) {
+          return PROGRESS_IS_GOOD;
+        }
+
+        if (estimatedReplacementEndTime >= estimatedEndTime) {
+          return TOO_LATE_TO_SPECULATE;
+        }
+
+        result = estimatedEndTime - estimatedReplacementEndTime;
+      }
+    }
+
+    // If we are here, there's at most one task attempt.
+    if (numberRunningAttempts == 0) {
+      return NOT_RUNNING;
+    }
+
+
+
+    if (acceptableRuntime == Long.MIN_VALUE) {
+      acceptableRuntime = estimator.thresholdRuntime(taskID);
+      if (acceptableRuntime == Long.MAX_VALUE) {
+        return ON_SCHEDULE;
+      }
+    }
+
+    return result;
+  }
+
+  //Add attempt to a given Task.
+  protected void addSpeculativeAttempt(TezTaskID taskID) {
+    LOG.info
+        ("DefaultSpeculator.addSpeculativeAttempt -- we are speculating " + taskID);
+    vertex.scheduleSpeculativeTask(taskID);
+    mayHaveSpeculated.add(taskID);
+  }
+
+  private int maybeScheduleASpeculation() {
+    int successes = 0;
+
+    long now = clock.getTime();
+
+    int numberSpeculationsAlready = 0;
+    int numberRunningTasks = 0;
+
+    Map<TezTaskID, Task> tasks = vertex.getTasks();
+
+    int numberAllowedSpeculativeTasks
+        = (int) Math.max(MINIMUM_ALLOWED_SPECULATIVE_TASKS,
+                         PROPORTION_TOTAL_TASKS_SPECULATABLE * tasks.size());
+
+    TezTaskID bestTaskID = null;
+    long bestSpeculationValue = -1L;
+
+    // this loop is potentially pricey.
+    // TODO track the tasks that are potentially worth looking at
+    for (Map.Entry<TezTaskID, Task> taskEntry : tasks.entrySet()) {
+      long mySpeculationValue = speculationValue(taskEntry.getValue(), now);
+
+      if (mySpeculationValue == ALREADY_SPECULATING) {
+        ++numberSpeculationsAlready;
+      }
+
+      if (mySpeculationValue != NOT_RUNNING) {
+        ++numberRunningTasks;
+      }
+
+      if (mySpeculationValue > bestSpeculationValue) {
+        bestTaskID = taskEntry.getKey();
+        bestSpeculationValue = mySpeculationValue;
+      }
+    }
+    numberAllowedSpeculativeTasks
+        = (int) Math.max(numberAllowedSpeculativeTasks,
+                         PROPORTION_RUNNING_TASKS_SPECULATABLE * numberRunningTasks);
+
+    // If we found a speculation target, fire it off
+    if (bestTaskID != null
+        && numberAllowedSpeculativeTasks > numberSpeculationsAlready) {
+      addSpeculativeAttempt(bestTaskID);
+      ++successes;
+    }
+
+    return successes;
+  }
+
+  static class TaskAttemptHistoryStatistics {
+
+    private long estimatedRunTime;
+    private float progress;
+    private long lastHeartBeatTime;
+
+    public TaskAttemptHistoryStatistics(long estimatedRunTime, float progress,
+        long nonProgressStartTime) {
+      this.estimatedRunTime = estimatedRunTime;
+      this.progress = progress;
+      resetHeartBeatTime(nonProgressStartTime);
+    }
+
+    public long getEstimatedRunTime() {
+      return this.estimatedRunTime;
+    }
+
+    public float getProgress() {
+      return this.progress;
+    }
+
+    public void setEstimatedRunTime(long estimatedRunTime) {
+      this.estimatedRunTime = estimatedRunTime;
+    }
+
+    public void setProgress(float progress) {
+      this.progress = progress;
+    }
+
+    public boolean notHeartbeatedInAWhile(long now) {
+      if (now - lastHeartBeatTime <= MAX_WAITTING_TIME_FOR_HEARTBEAT) {
+        return false;
+      } else {
+        resetHeartBeatTime(now);
+        return true;
+      }
+    }
+
+    public void resetHeartBeatTime(long lastHeartBeatTime) {
+      this.lastHeartBeatTime = lastHeartBeatTime;
+    }
+  }
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacyTaskRuntimeEstimator.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacyTaskRuntimeEstimator.java
new file mode 100644
index 0000000..14d269c
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacyTaskRuntimeEstimator.java
@@ -0,0 +1,136 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.speculation.legacy;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.TaskAttempt;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+
+/**
+ * Runtime estimator that uses a simple scheme of estimating task attempt
+ * runtime based on current elapsed runtime and reported progress. 
+ */
+public class LegacyTaskRuntimeEstimator extends StartEndTimesBase {
+
+  private final Map<TaskAttempt, AtomicLong> attemptRuntimeEstimates
+      = new ConcurrentHashMap<TaskAttempt, AtomicLong>();
+  private final ConcurrentHashMap<TaskAttempt, AtomicLong> attemptRuntimeEstimateVariances
+      = new ConcurrentHashMap<TaskAttempt, AtomicLong>();
+
+  @Override
+  public void updateAttempt(TezTaskAttemptID attemptID, TaskAttemptState state, long timestamp) {
+    super.updateAttempt(attemptID, state, timestamp);
+    
+
+    Task task = vertex.getTask(attemptID.getTaskID());
+
+    if (task == null) {
+      return;
+    }
+
+    TaskAttempt taskAttempt = task.getAttempt(attemptID);
+
+    if (taskAttempt == null) {
+      return;
+    }
+    
+    float progress = taskAttempt.getProgress();
+
+    Long boxedStart = startTimes.get(attemptID);
+    long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;
+
+    // We need to do two things.
+    //  1: If this is a completion, we accumulate statistics in the superclass
+    //  2: If this is not a completion, we learn more about it.
+
+    // This is not a completion, but we're cooking.
+    //
+    if (taskAttempt.getState() == TaskAttemptState.RUNNING) {
+      // See if this task is already in the registry
+      AtomicLong estimateContainer = attemptRuntimeEstimates.get(taskAttempt);
+      AtomicLong estimateVarianceContainer
+          = attemptRuntimeEstimateVariances.get(taskAttempt);
+
+      if (estimateContainer == null) {
+        if (attemptRuntimeEstimates.get(taskAttempt) == null) {
+          attemptRuntimeEstimates.put(taskAttempt, new AtomicLong());
+
+          estimateContainer = attemptRuntimeEstimates.get(taskAttempt);
+        }
+      }
+
+      if (estimateVarianceContainer == null) {
+        attemptRuntimeEstimateVariances.putIfAbsent(taskAttempt, new AtomicLong());
+        estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt);
+      }
+
+
+      long estimate = -1;
+      long varianceEstimate = -1;
+
+      // This code assumes that we'll never consider starting a third
+      //  speculative task attempt if two are already running for this task
+      if (start > 0 && timestamp > start) {
+        estimate = (long) ((timestamp - start) / Math.max(0.0001, progress));
+        varianceEstimate = (long) (estimate * progress / 10);
+      }
+      if (estimateContainer != null) {
+        estimateContainer.set(estimate);
+      }
+      if (estimateVarianceContainer != null) {
+        estimateVarianceContainer.set(varianceEstimate);
+      }
+    }
+  }
+
+  private long storedPerAttemptValue
+       (Map<TaskAttempt, AtomicLong> data, TezTaskAttemptID attemptID) {
+    Task task = vertex.getTask(attemptID.getTaskID());
+
+    if (task == null) {
+      return -1L;
+    }
+
+    TaskAttempt taskAttempt = task.getAttempt(attemptID);
+
+    if (taskAttempt == null) {
+      return -1L;
+    }
+
+    AtomicLong estimate = data.get(taskAttempt);
+
+    return estimate == null ? -1L : estimate.get();
+
+  }
+
+  @Override
+  public long estimatedRuntime(TezTaskAttemptID attemptID) {
+    return storedPerAttemptValue(attemptRuntimeEstimates, attemptID);
+  }
+
+  @Override
+  public long runtimeEstimateVariance(TezTaskAttemptID attemptID) {
+    return storedPerAttemptValue(attemptRuntimeEstimateVariances, attemptID);
+  }
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java
new file mode 100644
index 0000000..d4d1a7f
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java
@@ -0,0 +1,138 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.speculation.legacy;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.TaskAttempt;
+import org.apache.tez.dag.app.dag.Vertex;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.dag.records.TezTaskID;
+
+/**
+ * Base class that uses the attempt runtime estimations from a derived class
+ * and uses it to determine outliers based on deviating beyond the mean
+ * estimated runtime by some threshold
+ */
+abstract class StartEndTimesBase implements TaskRuntimeEstimator {
+  static final float MINIMUM_COMPLETE_PROPORTION_TO_SPECULATE
+      = 0.05F;
+  static final int MINIMUM_COMPLETE_NUMBER_TO_SPECULATE
+      = 1;
+
+  protected Vertex vertex;
+
+  protected final Map<TezTaskAttemptID, Long> startTimes
+      = new ConcurrentHashMap<TezTaskAttemptID, Long>();
+
+  protected final DataStatistics taskStatistics = new DataStatistics();
+
+  private float slowTaskRelativeTresholds;
+
+  protected final Set<Task> doneTasks = new HashSet<Task>();
+
+  @Override
+  public void enrollAttempt(TezTaskAttemptID id, long timestamp) {
+    startTimes.put(id, timestamp);
+  }
+
+  @Override
+  public long attemptEnrolledTime(TezTaskAttemptID attemptID) {
+    Long result = startTimes.get(attemptID);
+
+    return result == null ? Long.MAX_VALUE : result;
+  }
+
+  @Override
+  public void contextualize(Configuration conf, Vertex vertex) {
+    slowTaskRelativeTresholds = conf.getFloat(
+        TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SLOWTASK_THRESHOLD, 1.0f);
+    this.vertex = vertex;
+  }
+
+  protected DataStatistics dataStatisticsForTask(TezTaskID taskID) {
+    return taskStatistics;
+  }
+
+  @Override
+  public long thresholdRuntime(TezTaskID taskID) {
+    int completedTasks = vertex.getCompletedTasks();
+
+    int totalTasks = vertex.getTotalTasks();
+    
+    if (completedTasks < MINIMUM_COMPLETE_NUMBER_TO_SPECULATE
+        || (((float)completedTasks) / totalTasks)
+              < MINIMUM_COMPLETE_PROPORTION_TO_SPECULATE ) {
+      return Long.MAX_VALUE;
+    }
+    
+    long result = (long)taskStatistics.outlier(slowTaskRelativeTresholds);
+    return result;
+  }
+
+  @Override
+  public long newAttemptEstimatedRuntime() {
+    return (long)taskStatistics.mean();
+  }
+
+  @Override
+  public void updateAttempt(TezTaskAttemptID attemptID, TaskAttemptState state, long timestamp) {
+
+    Task task = vertex.getTask(attemptID.getTaskID());
+
+    if (task == null) {
+      return;
+    }
+
+    Long boxedStart = startTimes.get(attemptID);
+    long start = boxedStart == null ? Long.MIN_VALUE : boxedStart;
+    
+    TaskAttempt taskAttempt = task.getAttempt(attemptID);
+
+    if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) {
+      boolean isNew = false;
+      // is this  a new success?
+      synchronized (doneTasks) {
+        if (!doneTasks.contains(task)) {
+          doneTasks.add(task);
+          isNew = true;
+        }
+      }
+
+      // It's a new completion
+      // Note that if a task completes twice [because of a previous speculation
+      //  and a race, or a success followed by loss of the machine with the
+      //  local data] we only count the first one.
+      if (isNew) {
+        long finish = timestamp;
+        if (start > 1L && finish > 1L && start <= finish) {
+          long duration = finish - start;
+          taskStatistics.add(duration);
+        }
+      }
+    }
+  }
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java
new file mode 100644
index 0000000..c8edd1e
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java
@@ -0,0 +1,91 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.speculation.legacy;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.app.dag.Vertex;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.dag.records.TezTaskID;
+
+/**
+ * Estimate the runtime for tasks of a given vertex.
+ * 
+ */
+public interface TaskRuntimeEstimator {
+  public void enrollAttempt(TezTaskAttemptID id, long timestamp);
+
+  public long attemptEnrolledTime(TezTaskAttemptID attemptID);
+
+  public void updateAttempt(TezTaskAttemptID taId, TaskAttemptState reportedState, long timestamp);
+
+  public void contextualize(Configuration conf, Vertex vertex);
+
+  /**
+   *
+   * Find a maximum reasonable execution wallclock time.  Includes the time
+   * already elapsed.
+   *
+   * Find a maximum reasonable execution time.  Includes the time
+   * already elapsed.  If the projected total execution time for this task
+   * ever exceeds its reasonable execution time, we may speculate it.
+   *
+   * @param id the {@link TezTaskID} of the task we are asking about
+   * @return the task's maximum reasonable runtime, or MAX_VALUE if
+   *         we don't have enough information to rule out any runtime,
+   *         however long.
+   *
+   */
+  public long thresholdRuntime(TezTaskID id);
+
+  /**
+   *
+   * Estimate a task attempt's total runtime.  Includes the time already
+   * elapsed.
+   *
+   * @param id the {@link TezTaskAttemptID} of the attempt we are asking about
+   * @return our best estimate of the attempt's runtime, or {@code -1} if
+   *         we don't have enough information yet to produce an estimate.
+   *
+   */
+  public long estimatedRuntime(TezTaskAttemptID id);
+
+  /**
+   *
+   * Estimates how long a new attempt on this task will take if we start
+   *  one now
+   *
+   * @return our best estimate of a new attempt's runtime, or {@code -1} if
+   *         we don't have enough information yet to produce an estimate.
+   *
+   */
+  public long newAttemptEstimatedRuntime();
+
+  /**
+   *
+   * Computes the width of the error band of our estimate of the task
+   *  runtime as returned by {@link #estimatedRuntime(TezTaskAttemptID)}
+   *
+   * @param id the {@link TezTaskAttemptID} of the attempt we are asking about
+   * @return our best estimate of the attempt's runtime, or {@code -1} if
+   *         we don't have enough information yet to produce an estimate.
+   *
+   */
+  public long runtimeEstimateVariance(TezTaskAttemptID id);
+}
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
index baeb9a3..f14fd5d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java
@@ -67,6 +67,7 @@
 import org.apache.tez.dag.app.rm.container.AMContainerEventType;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.events.ContainerLaunchedEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.runtime.task.TezChild;
 
 
@@ -242,14 +243,14 @@
         LOG.info("Container: " + containerId + " completed successfully");
         appContext.getEventHandler().handle(
             new AMContainerEventCompleted(containerId, result.getExitStatus().getExitCode(),
-                null));
+                null, TaskAttemptTerminationCause.CONTAINER_EXITED));
       } else {
         LOG.info("Container: " + containerId + " completed but with errors");
         appContext.getEventHandler().handle(
             new AMContainerEventCompleted(containerId, result.getExitStatus().getExitCode(),
                 result.getErrorMessage() == null ?
                     (result.getThrowable() == null ? null : result.getThrowable().getMessage()) :
-                    result.getErrorMessage()));
+                    result.getErrorMessage(), TaskAttemptTerminationCause.APPLICATION_ERROR));
       }
     }
 
@@ -263,13 +264,13 @@
         appContext.getEventHandler()
             .handle(new AMContainerEventCompleted(containerId,
                 TezChild.ContainerExecutionResult.ExitStatus.EXECUTION_FAILURE.getExitCode(),
-                t.getMessage()));
+                t.getMessage(), TaskAttemptTerminationCause.APPLICATION_ERROR));
       } else {
         LOG.info("Ignoring CancellationException - triggered by LocalContainerLauncher");
         appContext.getEventHandler()
             .handle(new AMContainerEventCompleted(containerId,
                 TezChild.ContainerExecutionResult.ExitStatus.SUCCESS.getExitCode(),
-                "CancellationException"));
+                "CancellationException", TaskAttemptTerminationCause.CONTAINER_EXITED));
       }
     }
   }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
index 026ed7d..2ebcebb 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java
@@ -25,12 +25,13 @@
 import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 
 import com.google.common.primitives.Ints;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -38,8 +39,8 @@
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
-
 import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.rm.container.ContainerSignatureMatcher;
 
@@ -156,12 +157,16 @@
 
   @Override
   public void serviceInit(Configuration conf) {
-      taskRequestHandler = new AsyncDelegateRequestHandler(taskRequestQueue,
+    taskRequestHandler = createRequestHandler(conf);
+    asyncDelegateRequestThread = new Thread(taskRequestHandler);
+  }
+
+  protected AsyncDelegateRequestHandler createRequestHandler(Configuration conf) {
+    return new AsyncDelegateRequestHandler(taskRequestQueue,
         new LocalContainerFactory(appContext),
         taskAllocations,
         appClientDelegate,
         conf);
-    asyncDelegateRequestThread = new Thread(taskRequestHandler);
   }
 
   @Override
@@ -341,7 +346,20 @@
         appClientDelegate.containerBeingReleased(container.getId());
       }
       else {
-        LOG.warn("Unable to find and remove task " + request.task + " from task allocations");
+        boolean deallocationBeforeAllocation = false;
+        Iterator<TaskRequest> iter = taskRequestQueue.iterator();
+        while (iter.hasNext()) {
+          TaskRequest taskRequest = iter.next();
+          if (taskRequest instanceof AllocateTaskRequest && taskRequest.task.equals(request.task)) {
+            iter.remove();
+            deallocationBeforeAllocation = true;
+            LOG.info("deallcation happen before allocation for task:" + request.task);
+            break;
+          }
+        }
+        if (!deallocationBeforeAllocation) {
+          throw new TezUncheckedException("Unable to find and remove task " + request.task + " from task allocations");
+        }
       }
     }
   }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
index ec8e73f..625b09e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerEventHandler.java
@@ -69,6 +69,7 @@
 import org.apache.tez.dag.app.rm.node.AMNodeEventStateChanged;
 import org.apache.tez.dag.app.rm.node.AMNodeEventTaskAttemptEnded;
 import org.apache.tez.dag.app.rm.node.AMNodeEventTaskAttemptSucceeded;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 
 import com.google.common.base.Preconditions;
 
@@ -422,19 +423,22 @@
     // Inform the Containers about completion.
     AMContainer amContainer = appContext.getAllContainers().get(containerStatus.getContainerId());
     if (amContainer != null) {
-      String message = null;
+      String message = "Container completed. ";
+      TaskAttemptTerminationCause errCause = TaskAttemptTerminationCause.CONTAINER_EXITED;
       int exitStatus = containerStatus.getExitStatus();
       if (exitStatus == ContainerExitStatus.PREEMPTED) {
         message = "Container preempted externally. ";
+        errCause = TaskAttemptTerminationCause.EXTERNAL_PREEMPTION;
       } else if (exitStatus == ContainerExitStatus.DISKS_FAILED) {
         message = "Container disk failed. ";
-      } else {
+        errCause = TaskAttemptTerminationCause.NODE_DISK_ERROR;
+      } else if (exitStatus != ContainerExitStatus.SUCCESS){
         message = "Container failed. ";
       }
       if (containerStatus.getDiagnostics() != null) {
         message += containerStatus.getDiagnostics();
       }
-      sendEvent(new AMContainerEventCompleted(amContainer.getContainerId(), exitStatus, message));
+      sendEvent(new AMContainerEventCompleted(amContainer.getContainerId(), exitStatus, message, errCause));
     }
   }
 
@@ -550,8 +554,8 @@
   public void preemptContainer(ContainerId containerId) {
     taskScheduler.deallocateContainer(containerId);
     // Inform the Containers about completion.
-    sendEvent(new AMContainerEventCompleted(containerId,
-        ContainerExitStatus.PREEMPTED, "Container preempted internally"));
+    sendEvent(new AMContainerEventCompleted(containerId, ContainerExitStatus.INVALID,
+        "Container preempted internally", TaskAttemptTerminationCause.INTERNAL_PREEMPTION));
   }
 
   public void setShouldUnregisterFlag() {
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventCompleted.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventCompleted.java
index e9649f3..a455f1e 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventCompleted.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventCompleted.java
@@ -20,17 +20,20 @@
 
 import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 
 public class AMContainerEventCompleted extends AMContainerEvent {
 
   private final int exitStatus;
   private final String diagnostics;
+  private final TaskAttemptTerminationCause errCause;
 
   public AMContainerEventCompleted(ContainerId containerId, 
-      int exitStatus, String diagnostics) {
+      int exitStatus, String diagnostics, TaskAttemptTerminationCause errCause) {
     super(containerId, AMContainerEventType.C_COMPLETED);
     this.exitStatus = exitStatus;
     this.diagnostics = diagnostics;
+    this.errCause = errCause;
   }
 
   public boolean isPreempted() {
@@ -41,6 +44,10 @@
     return (exitStatus == ContainerExitStatus.DISKS_FAILED);
   }
   
+  public boolean isClusterAction() {
+    return isPreempted() || isDiskFailed();
+  }
+  
   public String getDiagnostics() {
     return diagnostics;
   }
@@ -48,5 +55,9 @@
   public int getContainerExitStatus() {
     return exitStatus;
   }
+  
+  public TaskAttemptTerminationCause getTerminationCause() {
+    return errCause;
+  }
 
 }
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
index a0f9cb7..9d4f46b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java
@@ -59,6 +59,7 @@
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.HistoryEventHandler;
 import org.apache.tez.dag.history.events.ContainerStoppedEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 //import org.apache.tez.dag.app.dag.event.TaskAttemptEventDiagnosticsUpdate;
@@ -533,7 +534,7 @@
           .getTaskAttemptId());
       container.sendTerminatedToTaskAttempt(event.getTaskAttemptId(),
           "AMScheduler Error: TaskAttempt allocated to unlaunched container: " +
-              container.getContainerId());
+              container.getContainerId(), TaskAttemptTerminationCause.FRAMEWORK_ERROR);
       container.deAllocate();
       LOG.warn("Unexpected TA Assignment: TAId: " + event.getTaskAttemptId() +
           "  for ContainerId: " + container.getContainerId() +
@@ -644,8 +645,10 @@
     public void transition(AMContainerImpl container, AMContainerEvent cEvent) {
       if (container.pendingAttempt != null) {
         AMContainerEventLaunchFailed event = (AMContainerEventLaunchFailed) cEvent;
+        // for a properly setup cluster this should almost always be an app error
+        // need to differentiate between launch failed due to framework/cluster or app
         container.sendTerminatingToTaskAttempt(container.pendingAttempt,
-            event.getMessage());
+            event.getMessage(), TaskAttemptTerminationCause.CONTAINER_LAUNCH_FAILED);
       }
       container.unregisterFromTAListener();
       container.deAllocate();
@@ -659,12 +662,17 @@
       AMContainerEventCompleted event = (AMContainerEventCompleted) cEvent;
       if (container.pendingAttempt != null) {
         String errorMessage = getMessage(container, event);
-        if (event.isPreempted() || event.isDiskFailed()) {
+        if (event.isClusterAction()) {
           container.sendContainerTerminatedBySystemToTaskAttempt(container.pendingAttempt,
-              errorMessage);
+              errorMessage, event.getTerminationCause());
         } else {
-          container.sendTerminatedToTaskAttempt(container.pendingAttempt,
-              errorMessage);
+          container
+              .sendTerminatedToTaskAttempt(
+                  container.pendingAttempt,
+                  errorMessage,
+                  // if termination cause is generic exited then replace with specific
+                  (event.getTerminationCause() == TaskAttemptTerminationCause.CONTAINER_EXITED ? 
+                      TaskAttemptTerminationCause.CONTAINER_LAUNCH_FAILED : event.getTerminationCause()));
         }
         container.registerFailedAttempt(container.pendingAttempt);
         container.pendingAttempt = null;
@@ -696,7 +704,7 @@
     public void transition(AMContainerImpl container, AMContainerEvent cEvent) {
       if (container.pendingAttempt != null) {
         container.sendTerminatingToTaskAttempt(container.pendingAttempt,
-            getMessage(container, cEvent));
+            getMessage(container, cEvent), TaskAttemptTerminationCause.CONTAINER_STOPPED);
       }
       container.unregisterFromTAListener();
       container.logStopped(container.pendingAttempt == null ? 
@@ -722,27 +730,31 @@
         return;
       }
       container.nodeFailed = true;
-      String errorMessage = null;
+      String errorMessage = "Node " + container.getContainer().getNodeId() + " failed. ";
       if (cEvent instanceof DiagnosableEvent) {
-        errorMessage = ((DiagnosableEvent) cEvent).getDiagnosticInfo();
+        errorMessage += ((DiagnosableEvent) cEvent).getDiagnosticInfo();
       }
 
       for (TezTaskAttemptID taId : container.failedAssignments) {
-        container.sendNodeFailureToTA(taId, errorMessage);
+        container.sendNodeFailureToTA(taId, errorMessage, TaskAttemptTerminationCause.NODE_FAILED);
       }
       for (TezTaskAttemptID taId : container.completedAttempts) {
-        container.sendNodeFailureToTA(taId, errorMessage);
+        container.sendNodeFailureToTA(taId, errorMessage, TaskAttemptTerminationCause.NODE_FAILED);
       }
 
       if (container.pendingAttempt != null) {
         // Will be null in COMPLETED state.
-        container.sendNodeFailureToTA(container.pendingAttempt, errorMessage);
-        container.sendTerminatingToTaskAttempt(container.pendingAttempt, "Node failure");
+        container.sendNodeFailureToTA(container.pendingAttempt, errorMessage, 
+            TaskAttemptTerminationCause.NODE_FAILED);
+        container.sendTerminatingToTaskAttempt(container.pendingAttempt, errorMessage,
+            TaskAttemptTerminationCause.NODE_FAILED);
       }
       if (container.runningAttempt != null) {
         // Will be null in COMPLETED state.
-        container.sendNodeFailureToTA(container.runningAttempt, errorMessage);
-        container.sendTerminatingToTaskAttempt(container.runningAttempt, "Node failure");
+        container.sendNodeFailureToTA(container.runningAttempt, errorMessage, 
+            TaskAttemptTerminationCause.NODE_FAILED);
+        container.sendTerminatingToTaskAttempt(container.runningAttempt, errorMessage,
+            TaskAttemptTerminationCause.NODE_FAILED);
       }
       container.logStopped(ContainerExitStatus.ABORTED);
     }
@@ -767,7 +779,7 @@
         container.sendTerminatingToTaskAttempt(container.pendingAttempt,
             "Container " + container.getContainerId() +
                 " hit an invalid transition - " + cEvent.getType() + " at " +
-                container.getState());
+                container.getState(), TaskAttemptTerminationCause.FRAMEWORK_ERROR);
       }
       container.logStopped(ContainerExitStatus.ABORTED);
       container.sendStopRequestToNM();
@@ -909,12 +921,12 @@
     @Override
     public void transition(AMContainerImpl container, AMContainerEvent cEvent) {
       AMContainerEventCompleted event = (AMContainerEventCompleted) cEvent;
-      if (event.isPreempted() || event.isDiskFailed()) {
+      if (event.isClusterAction()) {
         container.sendContainerTerminatedBySystemToTaskAttempt(container.runningAttempt,
-            getMessage(container, event));
+            getMessage(container, event), event.getTerminationCause());
       } else {
         container.sendTerminatedToTaskAttempt(container.runningAttempt,
-            getMessage(container, event));
+            getMessage(container, event), event.getTerminationCause());
       }
       container.unregisterAttemptFromListener(container.runningAttempt);
       container.registerFailedAttempt(container.runningAttempt);
@@ -929,8 +941,8 @@
 
       container.unregisterAttemptFromListener(container.runningAttempt);
       container.sendTerminatingToTaskAttempt(container.runningAttempt,
-          " Container" + container.getContainerId() +
-              " received a STOP_REQUEST");
+          " Container" + container.getContainerId() + " received a STOP_REQUEST",
+          TaskAttemptTerminationCause.CONTAINER_STOPPED);
       super.transition(container, cEvent);
     }
   }
@@ -964,7 +976,7 @@
       container.sendTerminatingToTaskAttempt(container.runningAttempt,
           "Container " + container.getContainerId() +
               " hit an invalid transition - " + cEvent.getType() + " at " +
-              container.getState());
+              container.getState(), TaskAttemptTerminationCause.FRAMEWORK_ERROR);
     }
   }
 
@@ -978,7 +990,8 @@
           " cannot be allocated to container: " + container.getContainerId() +
           " in " + container.getState() + " state";
       container.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
-      container.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage);
+      container.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage,
+          TaskAttemptTerminationCause.CONTAINER_EXITED);
       container.registerFailedAttempt(event.getTaskAttemptId());
     }
   }
@@ -1001,15 +1014,18 @@
       AMContainerEventCompleted event = (AMContainerEventCompleted) cEvent;
       String diag = event.getDiagnostics();
       for (TezTaskAttemptID taId : container.failedAssignments) {
-        container.sendTerminatedToTaskAttempt(taId, diag);
+        container.sendTerminatedToTaskAttempt(taId, diag, 
+            TaskAttemptTerminationCause.CONTAINER_EXITED);
       }
       if (container.pendingAttempt != null) {
-        container.sendTerminatedToTaskAttempt(container.pendingAttempt, diag);
+        container.sendTerminatedToTaskAttempt(container.pendingAttempt, diag, 
+            TaskAttemptTerminationCause.CONTAINER_EXITED);
         container.registerFailedAttempt(container.pendingAttempt);
         container.pendingAttempt = null;
       }
       if (container.runningAttempt != null) {
-        container.sendTerminatedToTaskAttempt(container.runningAttempt, diag);
+        container.sendTerminatedToTaskAttempt(container.runningAttempt, diag, 
+            TaskAttemptTerminationCause.CONTAINER_EXITED);
         container.registerFailedAttempt(container.runningAttempt);
         container.runningAttempt = null;
       }
@@ -1078,12 +1094,11 @@
           + " in COMPLETED state";
       container.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
       container.sendTerminatedToTaskAttempt(event.getTaskAttemptId(),
-          errorMessage);
+          errorMessage, TaskAttemptTerminationCause.FRAMEWORK_ERROR);
       container.registerFailedAttempt(event.getTaskAttemptId());
     }
   }
 
-
   private void handleExtraTAAssign(
       AMContainerEventAssignTA event, TezTaskAttemptID currentTaId) {
     this.inError = true;
@@ -1092,8 +1107,10 @@
         ". Attempts: " + currentTaId + ", " + event.getTaskAttemptId() +
         ". Current state: " + this.getState();
     this.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
-    this.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage);
-    this.sendTerminatingToTaskAttempt(currentTaId, errorMessage);
+    this.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage,
+        TaskAttemptTerminationCause.FRAMEWORK_ERROR);
+    this.sendTerminatingToTaskAttempt(currentTaId, errorMessage,
+        TaskAttemptTerminationCause.FRAMEWORK_ERROR);
     this.registerFailedAttempt(event.getTaskAttemptId());
     LOG.warn(errorMessage);
     this.logStopped(ContainerExitStatus.INVALID);
@@ -1122,28 +1139,29 @@
   }
 
   protected void sendTerminatedToTaskAttempt(
-      TezTaskAttemptID taId, String message) {
-    sendEvent(new TaskAttemptEventContainerTerminated(taId, message));
+      TezTaskAttemptID taId, String message, TaskAttemptTerminationCause errCause) {
+    sendEvent(new TaskAttemptEventContainerTerminated(taId, message, errCause));
   }
   
   protected void sendContainerTerminatedBySystemToTaskAttempt(
-    TezTaskAttemptID taId, String message) {
-      sendEvent(new TaskAttemptEventContainerTerminatedBySystem(taId, message));
+    TezTaskAttemptID taId, String message, TaskAttemptTerminationCause errorCause) {
+      sendEvent(new TaskAttemptEventContainerTerminatedBySystem(taId, message, errorCause));
   }
 
   protected void sendTerminatingToTaskAttempt(TezTaskAttemptID taId,
-      String message) {
-    sendEvent(new TaskAttemptEventContainerTerminating(taId, message));
+      String message, TaskAttemptTerminationCause errorCause) {
+    sendEvent(new TaskAttemptEventContainerTerminating(taId, message, errorCause));
   }
 
   protected void maybeSendNodeFailureForFailedAssignment(TezTaskAttemptID taId) {
     if (this.nodeFailed) {
-      this.sendNodeFailureToTA(taId, "Node Failed");
+      this.sendNodeFailureToTA(taId, "Node Failed", TaskAttemptTerminationCause.NODE_FAILED);
     }
   }
 
-  protected void sendNodeFailureToTA(TezTaskAttemptID taId, String message) {
-    sendEvent(new TaskAttemptEventNodeFailed(taId, message));
+  protected void sendNodeFailureToTA(TezTaskAttemptID taId, String message, 
+      TaskAttemptTerminationCause errorCause) {
+    sendEvent(new TaskAttemptEventNodeFailed(taId, message, errorCause));
   }
 
   protected void sendStartRequestToNM(ContainerLaunchContext clc) {
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java
index 0ae8061..2b21c89 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java
@@ -29,6 +29,7 @@
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.history.HistoryEvent;
 import org.apache.tez.dag.history.HistoryEventType;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.recovery.records.RecoveryProtos.TaskAttemptFinishedProto;
 
@@ -43,21 +44,23 @@
   private TaskAttemptState state;
   private String diagnostics;
   private TezCounters tezCounters;
+  private TaskAttemptTerminationCause error;
 
   public TaskAttemptFinishedEvent(TezTaskAttemptID taId,
       String vertexName,
       long startTime,
       long finishTime,
       TaskAttemptState state,
-      String diagnostics,
-      TezCounters counters) {
+      TaskAttemptTerminationCause error,
+      String diagnostics, TezCounters counters) {
     this.taskAttemptId = taId;
     this.vertexName = vertexName;
     this.startTime = startTime;
     this.finishTime = finishTime;
     this.state = state;
     this.diagnostics = diagnostics;
-    tezCounters = counters;
+    this.tezCounters = counters;
+    this.error = error;
   }
 
   public TaskAttemptFinishedEvent() {
@@ -87,6 +90,9 @@
     if (diagnostics != null) {
       builder.setDiagnostics(diagnostics);
     }
+    if (error != null) {
+      builder.setErrorEnum(error.name());
+    }
     if (tezCounters != null) {
       builder.setCounters(DagTypeConverters.convertTezCountersToProto(tezCounters));
     }
@@ -100,6 +106,9 @@
     if (proto.hasDiagnostics()) {
       this.diagnostics = proto.getDiagnostics();
     }
+    if (proto.hasErrorEnum()) {
+      this.error = TaskAttemptTerminationCause.valueOf(proto.getErrorEnum());
+    }
     if (proto.hasCounters()) {
       this.tezCounters = DagTypeConverters.convertTezCountersFromProto(
         proto.getCounters());
@@ -129,6 +138,7 @@
         + ", finishTime=" + finishTime
         + ", timeTaken=" + (finishTime - startTime)
         + ", status=" + state.name()
+        + ", errorEnum=" + (error != null ? error.name() : "")
         + ", diagnostics=" + diagnostics
         + ", counters=" + (tezCounters == null ? "null" :
           tezCounters.toString()
@@ -146,6 +156,10 @@
   public String getDiagnostics() {
     return diagnostics;
   }
+  
+  public TaskAttemptTerminationCause getTaskAttemptError() {
+    return error;
+  }
 
   public long getFinishTime() {
     return finishTime;
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java b/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java
index 8560359..79a0c34 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java
@@ -18,11 +18,9 @@
 
 package org.apache.tez.dag.history.logging.impl;
 
-import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 
-import java.util.Map.Entry;
 import java.util.TreeMap;
 
 import org.apache.tez.common.ATSConstants;
@@ -485,6 +483,9 @@
     otherInfo.put(ATSConstants.FINISH_TIME, event.getFinishTime());
     otherInfo.put(ATSConstants.TIME_TAKEN, (event.getFinishTime() - event.getStartTime()));
     otherInfo.put(ATSConstants.STATUS, event.getState().name());
+    if (event.getTaskAttemptError() != null) {
+      otherInfo.put(ATSConstants.TASK_ATTEMPT_ERROR_ENUM, event.getTaskAttemptError().name());
+    }
     otherInfo.put(ATSConstants.DIAGNOSTICS, event.getDiagnostics());
     otherInfo.put(ATSConstants.COUNTERS,
         DAGUtils.convertCountersToJSON(event.getCounters()));
diff --git a/tez-dag/src/main/proto/HistoryEvents.proto b/tez-dag/src/main/proto/HistoryEvents.proto
index e8f323d..45e9582 100644
--- a/tez-dag/src/main/proto/HistoryEvents.proto
+++ b/tez-dag/src/main/proto/HistoryEvents.proto
@@ -166,6 +166,7 @@
   optional int32 state = 3;
   optional string diagnostics = 4;
   optional TezCountersProto counters = 5;
+  optional string error_enum = 6;
 }
 
 message EventMetaDataProto {
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java
new file mode 100644
index 0000000..d015714
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java
@@ -0,0 +1,36 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app;
+
+import org.apache.hadoop.yarn.util.Clock;
+
+public class MockClock implements Clock {
+  
+  long time = 1000;
+
+  @Override
+  public long getTime() {
+    return time;
+  }
+  
+  public void incrementTime(long inc) {
+    time += inc;
+  }
+
+}
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index b4109e7..a548e3c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -50,6 +50,7 @@
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.events.TaskAttemptCompletedEvent;
+import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 import org.apache.tez.runtime.api.impl.EventMetaData;
 import org.apache.tez.runtime.api.impl.TezEvent;
 import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
@@ -75,9 +76,12 @@
     
     AtomicBoolean startScheduling = new AtomicBoolean(true);
     AtomicBoolean goFlag;
+    boolean updateProgress = true;
     
     Map<TezTaskID, Integer> preemptedTasks = Maps.newConcurrentMap();
     
+    Map<TezTaskAttemptID, Integer> tasksWithStatusUpdates = Maps.newConcurrentMap();
+    
     public MockContainerLauncher(AtomicBoolean goFlag) {
       super("MockContainerLauncher");
       this.goFlag = goFlag;
@@ -88,6 +92,7 @@
       TezTaskAttemptID taId;
       String vName;
       ContainerLaunchContext launchContext;
+      int numUpdates = 0;
       boolean completed;
       
       public ContainerData(ContainerId cId, ContainerLaunchContext context) {
@@ -149,6 +154,10 @@
     public void startScheduling(boolean value) {
       startScheduling.set(value);
     }
+    
+    public void updateProgress(boolean value) {
+      this.updateProgress = value;
+    }
 
     public Map<ContainerId, ContainerData> getContainers() {
       return containers;
@@ -164,6 +173,10 @@
       cData.clear();
     }
     
+    public void setStatusUpdatesForTask(TezTaskAttemptID tId, int numUpdates) {
+      tasksWithStatusUpdates.put(tId, numUpdates);
+    }
+    
     void stop(NMCommunicatorStopRequestEvent event) {
       // remove from simulated container list
       containers.remove(event.getContainerId());
@@ -183,6 +196,13 @@
         Thread.sleep(50);
       }
     }
+    
+    void incrementTime(long inc) {
+      Clock clock = getContext().getClock();
+      if (clock instanceof MockClock) {
+        ((MockClock) clock).incrementTime(inc);
+      }
+    }
 
     @Override
     public void run() {
@@ -192,6 +212,7 @@
         if (!startScheduling.get()) { // schedule when asked to do so by the test code
           continue;
         }
+        incrementTime(1000);
         for (Map.Entry<ContainerId, ContainerData> entry : containers.entrySet()) {
           ContainerData cData = entry.getValue();
           ContainerId cId = entry.getKey();
@@ -214,8 +235,19 @@
           } else if (!cData.completed) {
             // container is assigned a task and task is not completed
             // complete the task or preempt the task
-            Integer version = preemptedTasks.get(cData.taId.getTaskID()); 
-            if (version != null && cData.taId.getId() <= version.intValue()) {
+            Integer version = preemptedTasks.get(cData.taId.getTaskID());
+            Integer updatesToMake = tasksWithStatusUpdates.get(cData.taId);
+            if (cData.numUpdates == 0 || // do at least one update
+                updatesToMake != null && cData.numUpdates < updatesToMake) {
+              cData.numUpdates++;
+              float maxUpdates = (updatesToMake != null) ? updatesToMake.intValue() : 1;
+              float progress = updateProgress ? cData.numUpdates/maxUpdates : 0f;
+              TezVertexID vertexId = cData.taId.getTaskID().getVertexID();
+              getContext().getEventHandler().handle(
+                  new VertexEventRouteEvent(vertexId, Collections.singletonList(new TezEvent(
+                      new TaskStatusUpdateEvent(null, progress), new EventMetaData(
+                          EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId)))));
+            } else if (version != null && cData.taId.getId() <= version.intValue()) {
               preemptContainer(cData);
             } else {
               // send a done notification
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java
index 7e408e1..2631e3c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java
@@ -29,16 +29,18 @@
 public class MockLocalClient extends LocalClient {
   MockDAGAppMaster mockApp;
   AtomicBoolean mockAppLauncherGoFlag;
+  Clock mockClock;
   
-  public MockLocalClient(AtomicBoolean mockAppLauncherGoFlag) {
+  public MockLocalClient(AtomicBoolean mockAppLauncherGoFlag, Clock clock) {
     this.mockAppLauncherGoFlag = mockAppLauncherGoFlag;
+    this.mockClock = clock;
   }
   
   protected DAGAppMaster createDAGAppMaster(ApplicationAttemptId applicationAttemptId,
       ContainerId cId, String currentHost, int nmPort, int nmHttpPort,
       Clock clock, long appSubmitTime, boolean isSession, String userDir) {
     mockApp = new MockDAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort,
-        new SystemClock(), appSubmitTime, isSession, userDir, mockAppLauncherGoFlag);
+        (mockClock!=null ? mockClock : clock), appSubmitTime, isSession, userDir, mockAppLauncherGoFlag);
     return mockApp;
   }
   
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockTezClient.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockTezClient.java
index 617415e..0ff3340 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockTezClient.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockTezClient.java
@@ -23,6 +23,7 @@
 
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.util.Clock;
 import org.apache.tez.client.FrameworkClient;
 import org.apache.tez.client.TezClient;
 import org.apache.tez.dag.api.TezConfiguration;
@@ -32,9 +33,9 @@
   
   MockTezClient(String name, TezConfiguration tezConf, boolean isSession,
       Map<String, LocalResource> localResources, Credentials credentials,
-      AtomicBoolean mockAppLauncherGoFlag) {
+      Clock clock, AtomicBoolean mockAppLauncherGoFlag) {
     super(name, tezConf, isSession, localResources, credentials);
-    this.client = new MockLocalClient(mockAppLauncherGoFlag);
+    this.client = new MockLocalClient(mockAppLauncherGoFlag, clock);
   }
   
   protected FrameworkClient createFrameworkClient() {
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
index 8650aea..682e6ed 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
@@ -67,7 +67,7 @@
   public void testLocalResourceSetup() throws Exception {
     TezConfiguration tezconf = new TezConfiguration(defaultConf);
     
-    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null);
+    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
     tezClient.start();
     
     MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
@@ -119,7 +119,7 @@
 
     TezConfiguration tezconf = new TezConfiguration(defaultConf);
     
-    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null);
+    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
     tezClient.start();
     DAGClient dagClient = tezClient.submitDAG(dag);
     dagClient.waitForCompletion();
@@ -127,7 +127,7 @@
     tezClient.stop();
     
     // submit the same DAG again to verify it can be done.
-    tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null);
+    tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
     tezClient.start();
     dagClient = tezClient.submitDAG(dag);
     dagClient.waitForCompletion();
@@ -139,7 +139,7 @@
   public void testSchedulerErrorHandling() throws Exception {
     TezConfiguration tezconf = new TezConfiguration(defaultConf);
 
-    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null);
+    MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
     tezClient.start();
 
     MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestPreemption.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestPreemption.java
index 0958c48..8cc2e8b 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestPreemption.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestPreemption.java
@@ -41,6 +41,7 @@
 import org.apache.tez.dag.app.dag.TaskAttemptStateInternal;
 import org.apache.tez.dag.app.dag.impl.DAGImpl;
 import org.apache.tez.dag.app.dag.impl.TaskAttemptImpl;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
@@ -92,7 +93,7 @@
     tezconf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 0);
     AtomicBoolean mockAppLauncherGoFlag = new AtomicBoolean(false);
     MockTezClient tezClient = new MockTezClient("testPreemption", tezconf, false, null, null,
-        mockAppLauncherGoFlag);
+        null, mockAppLauncherGoFlag);
     tezClient.start();
     
     DAGClient dagClient = tezClient.submitDAG(createDAG(DataMovementType.SCATTER_GATHER));
@@ -148,7 +149,7 @@
     tezconf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 0);
     AtomicBoolean mockAppLauncherGoFlag = new AtomicBoolean(false);
     MockTezClient tezClient = new MockTezClient("testPreemption", tezconf, true, null, null,
-        mockAppLauncherGoFlag);
+        null, mockAppLauncherGoFlag);
     tezClient.start();
     syncWithMockAppLauncher(false, mockAppLauncherGoFlag, tezClient);
     return tezClient;
@@ -200,6 +201,7 @@
       TezTaskAttemptID testTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), i);      
       TaskAttemptImpl taImpl = dagImpl.getTaskAttempt(testTaId);
       Assert.assertEquals(TaskAttemptStateInternal.KILLED, taImpl.getInternalState());
+      Assert.assertEquals(TaskAttemptTerminationCause.EXTERNAL_PREEMPTION, taImpl.getTerminationCause());
     }
     
     System.out.println("TestPreemption - Done running - " + info);
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java
new file mode 100644
index 0000000..c349957
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java
@@ -0,0 +1,184 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.tez.common.counters.DAGCounter;
+import org.apache.tez.common.counters.TaskCounter;
+import org.apache.tez.dag.api.DAG;
+import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.apache.tez.dag.api.Vertex;
+import org.apache.tez.dag.api.client.DAGClient;
+import org.apache.tez.dag.api.client.DAGStatus;
+import org.apache.tez.dag.app.MockDAGAppMaster.MockContainerLauncher;
+import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.TaskAttempt;
+import org.apache.tez.dag.app.dag.impl.DAGImpl;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
+import org.apache.tez.dag.records.TezTaskAttemptID;
+import org.apache.tez.dag.records.TezTaskID;
+import org.apache.tez.dag.records.TezVertexID;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.base.Joiner;
+
+
+@SuppressWarnings("deprecation")
+public class TestSpeculation {
+  static Configuration defaultConf;
+  static FileSystem localFs;
+  static Path workDir;
+  
+  MockDAGAppMaster mockApp;
+  MockContainerLauncher mockLauncher;
+  
+  static {
+    try {
+      defaultConf = new Configuration(false);
+      defaultConf.set("fs.defaultFS", "file:///");
+      defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true);
+      defaultConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true);
+      defaultConf.setInt(TezConfiguration.TEZ_AM_INLINE_TASK_EXECUTION_MAX_TASKS, 2);
+      localFs = FileSystem.getLocal(defaultConf);
+      workDir = new Path(new Path(System.getProperty("test.build.data", "/tmp")),
+          "TestSpeculation").makeQualified(localFs);
+    } catch (IOException e) {
+      throw new RuntimeException("init failure", e);
+    }
+  }
+  
+  MockTezClient createTezSession() throws Exception {
+    TezConfiguration tezconf = new TezConfiguration(defaultConf);
+    AtomicBoolean mockAppLauncherGoFlag = new AtomicBoolean(false);
+    MockTezClient tezClient = new MockTezClient("testspeculation", tezconf, true, null, null,
+        new MockClock(), mockAppLauncherGoFlag);
+    tezClient.start();
+    syncWithMockAppLauncher(false, mockAppLauncherGoFlag, tezClient);
+    return tezClient;
+  }
+  
+  void syncWithMockAppLauncher(boolean allowScheduling, AtomicBoolean mockAppLauncherGoFlag, 
+      MockTezClient tezClient) throws Exception {
+    synchronized (mockAppLauncherGoFlag) {
+      while (!mockAppLauncherGoFlag.get()) {
+        mockAppLauncherGoFlag.wait();
+      }
+      mockApp = tezClient.getLocalClient().getMockApp();
+      mockLauncher = mockApp.getContainerLauncher();
+      mockLauncher.startScheduling(allowScheduling);
+      mockAppLauncherGoFlag.notify();
+    }     
+  }
+  
+  public void testBasicSpeculation(boolean withProgress) throws Exception {
+    DAG dag = DAG.create("test");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
+    dag.addVertex(vA);
+
+    MockTezClient tezClient = createTezSession();
+    
+    DAGClient dagClient = tezClient.submitDAG(dag);
+    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
+    TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
+    // original attempt is killed and speculative one is successful
+    TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
+    TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
+
+    mockLauncher.updateProgress(withProgress);
+    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
+
+    mockLauncher.startScheduling(true);
+    dagClient.waitForCompletion();
+    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
+    Task task = dagImpl.getTask(killedTaId.getTaskID());
+    Assert.assertEquals(2, task.getAttempts().size());
+    Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
+    TaskAttempt killedAttempt = task.getAttempt(killedTaId);
+    Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt");
+    Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, 
+        killedAttempt.getTerminationCause());
+    if (withProgress) {
+      // without progress updates occasionally more than 1 task specualates
+      Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
+          .getValue());
+      Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
+          .getValue());
+      org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID());
+      Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
+          .getValue());
+    }
+    tezClient.stop();
+  }
+  
+  @Test (timeout=10000)
+  public void testBasicSpeculationWithProgress() throws Exception {
+    testBasicSpeculation(true);
+  }
+
+  @Test (timeout=10000)
+  public void testBasicSpeculationWithoutProgress() throws Exception {
+    testBasicSpeculation(false);
+  }
+
+  @Test (timeout=10000)
+  public void testBasicSpeculationNotUseful() throws Exception {
+    DAG dag = DAG.create("test");
+    Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
+    dag.addVertex(vA);
+
+    MockTezClient tezClient = createTezSession();
+    
+    DAGClient dagClient = tezClient.submitDAG(dag);
+    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
+    TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
+    // original attempt is successful and speculative one is killed
+    TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
+    TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
+
+    mockLauncher.setStatusUpdatesForTask(successTaId, 100);
+    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
+
+    mockLauncher.startScheduling(true);
+    dagClient.waitForCompletion();
+    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
+    Task task = dagImpl.getTask(killedTaId.getTaskID());
+    Assert.assertEquals(2, task.getAttempts().size());
+    Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
+    TaskAttempt killedAttempt = task.getAttempt(killedTaId);
+    Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed speculative attempt as");
+    Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION, 
+        killedAttempt.getTerminationCause());
+    Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
+        .getValue());
+    Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
+        .getValue());
+    org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID());
+    Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
+        .getValue());
+    tezClient.stop();
+  }
+
+}
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
index d859ae0..ba92c40 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
@@ -840,6 +840,10 @@
     dagWithCustomEdge.handle(
         new DAGEvent(dagWithCustomEdge.getID(), DAGEventType.DAG_INIT));
     Assert.assertEquals(DAGState.FAILED, dagWithCustomEdge.getState());
+    // START event is followed after INIT event
+    dagWithCustomEdge.handle(new DAGEvent(dagWithCustomEdge.getID(), DAGEventType.DAG_START));
+    dispatcher.await();
+    Assert.assertEquals(DAGState.FAILED, dagWithCustomEdge.getState());
   }
 
   @Test(timeout = 5000)
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
index c98e3de..29469b1 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java
@@ -56,6 +56,7 @@
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.apache.tez.dag.api.TaskLocationHint;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ClusterInfo;
@@ -69,6 +70,7 @@
 import org.apache.tez.dag.app.dag.event.DAGEventCounterUpdate;
 import org.apache.tez.dag.app.dag.event.DAGEventType;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminated;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminating;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest;
@@ -76,17 +78,22 @@
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventOutputFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventStatusUpdate;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.dag.event.TaskEventTAUpdate;
+import org.apache.tez.dag.app.dag.event.TaskEventType;
+import org.apache.tez.dag.app.dag.event.VertexEventTaskAttemptStatusUpdate;
 import org.apache.tez.dag.app.rm.AMSchedulerEventTAEnded;
 import org.apache.tez.dag.app.rm.AMSchedulerEventTALaunchRequest;
 import org.apache.tez.dag.app.rm.container.AMContainerMap;
 import org.apache.tez.dag.app.rm.container.ContainerContextMatcher;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
+import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent;
 import org.apache.tez.runtime.api.impl.EventMetaData;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezEvent;
@@ -285,9 +292,11 @@
 
     taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
     // At state STARTING.
-    taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null));
+    taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null,
+        TaskAttemptTerminationCause.TERMINATED_BY_CLIENT));
     // At some KILLING state.
-    taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null));
+    taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null,
+        TaskAttemptTerminationCause.TERMINATED_BY_CLIENT));
     // taImpl.handle(new TaskAttemptEventContainerTerminating(taskAttemptID,
     // null));
     assertFalse(eventHandler.internalError);
@@ -352,7 +361,7 @@
     verify(eventHandler, times(expectedEventsAtRunning)).handle(arg.capture());
 
     taImpl.handle(new TaskAttemptEventContainerTerminating(taskAttemptID,
-        "Terminating"));
+        "Terminating", TaskAttemptTerminationCause.APPLICATION_ERROR));
     assertFalse(
         "InternalError occurred trying to handle TA_CONTAINER_TERMINATING",
         eventHandler.internalError);
@@ -362,6 +371,7 @@
 
     assertEquals(1, taImpl.getDiagnostics().size());
     assertEquals("Terminating", taImpl.getDiagnostics().get(0));
+    assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause());
 
     int expectedEvenstAfterTerminating = expectedEventsAtRunning + 3;
     arg = ArgumentCaptor.forClass(Event.class);
@@ -378,13 +388,16 @@
             expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
 
     taImpl.handle(new TaskAttemptEventContainerTerminated(taskAttemptID,
-        "Terminated"));
+        "Terminated", TaskAttemptTerminationCause.CONTAINER_EXITED));
     int expectedEventAfterTerminated = expectedEvenstAfterTerminating + 0;
     arg = ArgumentCaptor.forClass(Event.class);
     verify(eventHandler, times(expectedEventAfterTerminated)).handle(arg.capture());
 
     assertEquals(2, taImpl.getDiagnostics().size());
     assertEquals("Terminated", taImpl.getDiagnostics().get(1));
+    
+    // check that original error cause is retained
+    assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause());
   }
 
 
@@ -439,13 +452,14 @@
         null));
     assertEquals("Task attempt is not in running state", taImpl.getState(),
         TaskAttemptState.RUNNING);
-    taImpl.handle(new TaskAttemptEventContainerTerminated(taskAttemptID, "Terminated"));
+    taImpl.handle(new TaskAttemptEventContainerTerminated(taskAttemptID, "Terminated",
+        TaskAttemptTerminationCause.CONTAINER_EXITED));
     assertFalse(
         "InternalError occurred trying to handle TA_CONTAINER_TERMINATED",
         eventHandler.internalError);
 
     assertEquals("Terminated", taImpl.getDiagnostics().get(0));
-
+    assertEquals(TaskAttemptTerminationCause.CONTAINER_EXITED, taImpl.getTerminationCause());
     // TODO Ensure TA_TERMINATING after this is ingored.
   }
 
@@ -529,16 +543,206 @@
             expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
 
     taImpl.handle(new TaskAttemptEventContainerTerminated(taskAttemptID,
-        "Terminated"));
+        "Terminated", TaskAttemptTerminationCause.CONTAINER_EXITED));
     int expectedEventAfterTerminated = expectedEvenstAfterTerminating + 0;
     arg = ArgumentCaptor.forClass(Event.class);
     verify(eventHandler, times(expectedEventAfterTerminated)).handle(arg.capture());
 
     // Verify that the diagnostic message included in the Terminated event is not
-    // captured - TA already succeeded.
+    // captured - TA already succeeded. Error cause is the default value.
     assertEquals(0, taImpl.getDiagnostics().size());
+    assertEquals(TaskAttemptTerminationCause.UNKNOWN_ERROR, taImpl.getTerminationCause());
   }
+  
+  @Test(timeout = 5000)
+  public void testFailure() throws Exception {
+    ApplicationId appId = ApplicationId.newInstance(1, 2);
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
+        appId, 0);
+    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
+    TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
+    TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
+    TezTaskAttemptID taskAttemptID = TezTaskAttemptID.getInstance(taskID, 0);
 
+    MockEventHandler eventHandler = spy(new MockEventHandler());
+    TaskAttemptListener taListener = mock(TaskAttemptListener.class);
+    when(taListener.getAddress()).thenReturn(
+        new InetSocketAddress("localhost", 0));
+
+    Configuration taskConf = new Configuration();
+    taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
+    taskConf.setBoolean("fs.file.impl.disable.cache", true);
+    taskConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true);
+
+    TaskLocationHint locationHint = TaskLocationHint.createTaskLocationHint(
+        new HashSet<String>(Arrays.asList(new String[]{"127.0.0.1"})), null);
+    Resource resource = Resource.newInstance(1024, 1);
+
+    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
+    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
+    Container container = mock(Container.class);
+    when(container.getId()).thenReturn(contId);
+    when(container.getNodeId()).thenReturn(nid);
+    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
+
+    AppContext appCtx = mock(AppContext.class);
+    AMContainerMap containers = new AMContainerMap(
+        mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
+        new ContainerContextMatcher(), appCtx);
+    containers.addContainerIfNew(container);
+
+    doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
+    doReturn(containers).when(appCtx).getAllContainers();
+
+    TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler,
+        taListener, taskConf, new SystemClock(),
+        mock(TaskHeartbeatHandler.class), appCtx, locationHint, false,
+        resource, createFakeContainerContext(), false);
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+
+    taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
+    // At state STARTING.
+    taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID, contId,
+        null));
+    assertEquals("Task attempt is not in the RUNNING state", taImpl.getState(),
+        TaskAttemptState.RUNNING);
+
+    int expectedEventsAtRunning = 4;
+    verify(eventHandler, times(expectedEventsAtRunning)).handle(arg.capture());
+    verifyEventType(
+        arg.getAllValues().subList(0,
+            expectedEventsAtRunning), VertexEventTaskAttemptStatusUpdate.class, 1);
+    
+    taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f)));
+    
+    taImpl.handle(new TaskAttemptEventAttemptFailed(taskAttemptID, TaskAttemptEventType.TA_FAILED, "0",
+        TaskAttemptTerminationCause.APPLICATION_ERROR));
+
+    assertEquals("Task attempt is not in the  FAIL_IN_PROGRESS state", taImpl.getInternalState(),
+        TaskAttemptStateInternal.FAIL_IN_PROGRESS);
+
+    assertEquals(1, taImpl.getDiagnostics().size());
+    assertEquals("0", taImpl.getDiagnostics().get(0));
+    assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause());
+    
+    taImpl.handle(new TaskAttemptEventContainerTerminated(taskAttemptID, "1",
+        TaskAttemptTerminationCause.CONTAINER_EXITED));
+
+    assertEquals(2, taImpl.getDiagnostics().size());
+    assertEquals("1", taImpl.getDiagnostics().get(1));
+    // err cause does not change
+    assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause());
+
+    int expectedEvenstAfterTerminating = expectedEventsAtRunning + 5;
+    arg = ArgumentCaptor.forClass(Event.class);
+    verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture());
+
+
+    Event e = verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), TaskEventTAUpdate.class, 1);
+    assertEquals(TaskEventType.T_ATTEMPT_FAILED, e.getType());
+    verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1);
+    verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
+    verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), VertexEventTaskAttemptStatusUpdate.class, 2);
+  }
+  
+  @Test//(timeout = 5000)
+  public void testSuccess() throws Exception {
+    ApplicationId appId = ApplicationId.newInstance(1, 2);
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
+        appId, 0);
+    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
+    TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
+    TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
+    TezTaskAttemptID taskAttemptID = TezTaskAttemptID.getInstance(taskID, 0);
+
+    MockEventHandler eventHandler = spy(new MockEventHandler());
+    TaskAttemptListener taListener = mock(TaskAttemptListener.class);
+    when(taListener.getAddress()).thenReturn(
+        new InetSocketAddress("localhost", 0));
+
+    Configuration taskConf = new Configuration();
+    taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
+    taskConf.setBoolean("fs.file.impl.disable.cache", true);
+    taskConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true);
+
+    TaskLocationHint locationHint = TaskLocationHint.createTaskLocationHint(
+        new HashSet<String>(Arrays.asList(new String[]{"127.0.0.1"})), null);
+    Resource resource = Resource.newInstance(1024, 1);
+
+    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
+    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
+    Container container = mock(Container.class);
+    when(container.getId()).thenReturn(contId);
+    when(container.getNodeId()).thenReturn(nid);
+    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
+
+    AppContext appCtx = mock(AppContext.class);
+    AMContainerMap containers = new AMContainerMap(
+        mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
+        new ContainerContextMatcher(), appCtx);
+    containers.addContainerIfNew(container);
+
+    doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
+    doReturn(containers).when(appCtx).getAllContainers();
+
+    TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler,
+        taListener, taskConf, new SystemClock(),
+        mock(TaskHeartbeatHandler.class), appCtx, locationHint, false,
+        resource, createFakeContainerContext(), false);
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+
+    taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
+    // At state STARTING.
+    taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID, contId,
+        null));
+    assertEquals("Task attempt is not in the RUNNING state", taImpl.getState(),
+        TaskAttemptState.RUNNING);
+
+    int expectedEventsAtRunning = 4;
+    verify(eventHandler, times(expectedEventsAtRunning)).handle(arg.capture());
+    verifyEventType(
+        arg.getAllValues().subList(0,
+            expectedEventsAtRunning), VertexEventTaskAttemptStatusUpdate.class, 1);
+    
+    taImpl.handle(new TaskAttemptEventStatusUpdate(taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f)));
+    
+    taImpl.handle(new TaskAttemptEvent(taskAttemptID, TaskAttemptEventType.TA_DONE));
+
+    assertEquals("Task attempt is not in the  SUCCEEDED state", taImpl.getState(),
+        TaskAttemptState.SUCCEEDED);
+
+    assertEquals(0, taImpl.getDiagnostics().size());
+
+    int expectedEvenstAfterTerminating = expectedEventsAtRunning + 5;
+    arg = ArgumentCaptor.forClass(Event.class);
+    verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture());
+
+
+    Event e = verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), TaskEventTAUpdate.class, 1);
+    assertEquals(TaskEventType.T_ATTEMPT_SUCCEEDED, e.getType());
+    verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1);
+    verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
+    verifyEventType(
+        arg.getAllValues().subList(expectedEventsAtRunning,
+            expectedEvenstAfterTerminating), VertexEventTaskAttemptStatusUpdate.class, 2);
+  }
+  
   @Test(timeout = 5000)
   // Ensure Container Preemption race with task completion is handled correctly by
   // the TaskAttempt
@@ -625,8 +829,9 @@
     verify(eventHandler, times(expectedEventAfterTerminated)).handle(arg.capture());
 
     // Verify that the diagnostic message included in the Terminated event is not
-    // captured - TA already succeeded.
+    // captured - TA already succeeded. Error cause should be the default value
     assertEquals(0, taImpl.getDiagnostics().size());
+    assertEquals(TaskAttemptTerminationCause.UNKNOWN_ERROR, taImpl.getTerminationCause());
   }
 
   @Test(timeout = 5000)
@@ -708,7 +913,8 @@
             expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
 
     // Send out a Node Failure.
-    taImpl.handle(new TaskAttemptEventNodeFailed(taskAttemptID, "NodeDecomissioned"));
+    taImpl.handle(new TaskAttemptEventNodeFailed(taskAttemptID, "NodeDecomissioned",
+        TaskAttemptTerminationCause.NODE_FAILED));
     // Verify in KILLED state
     assertEquals("Task attempt is not in the  KILLED state", TaskAttemptState.KILLED,
         taImpl.getState());
@@ -724,6 +930,7 @@
     // Verify still in KILLED state
     assertEquals("Task attempt is not in the  KILLED state", TaskAttemptState.KILLED,
         taImpl.getState());
+    assertEquals(TaskAttemptTerminationCause.NODE_FAILED, taImpl.getTerminationCause());
   }
   
   @Test(timeout = 5000)
@@ -805,7 +1012,8 @@
             expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1);
 
     // Send out a Node Failure.
-    taImpl.handle(new TaskAttemptEventNodeFailed(taskAttemptID, "NodeDecomissioned"));
+    taImpl.handle(new TaskAttemptEventNodeFailed(taskAttemptID, "NodeDecomissioned", 
+        TaskAttemptTerminationCause.NODE_FAILED));
 
     // Verify no additional events
     int expectedEventsNodeFailure = expectedEvenstAfterTerminating + 0;
@@ -815,6 +1023,8 @@
     // Verify still in SUCCEEDED state
     assertEquals("Task attempt is not in the  SUCCEEDED state", TaskAttemptState.SUCCEEDED,
         taImpl.getState());
+    // error cause remains as default value
+    assertEquals(TaskAttemptTerminationCause.UNKNOWN_ERROR, taImpl.getTerminationCause());
   }
 
   @Test(timeout = 5000)
@@ -893,6 +1103,8 @@
     taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 4));
     assertEquals("Task attempt is not in succeeded state", taImpl.getState(),
         TaskAttemptState.SUCCEEDED);
+    // default value of error cause
+    assertEquals(TaskAttemptTerminationCause.UNKNOWN_ERROR, taImpl.getTerminationCause());
 
     // different destination attempt reports error. now threshold crossed
     TezTaskAttemptID mockDestId2 = mock(TezTaskAttemptID.class);
@@ -901,6 +1113,7 @@
     
     assertEquals("Task attempt is not in FAILED state", taImpl.getState(),
         TaskAttemptState.FAILED);
+    assertEquals(TaskAttemptTerminationCause.OUTPUT_LOST, taImpl.getTerminationCause());
 
     assertEquals(true, taImpl.inputFailedReported);
     int expectedEventsAfterFetchFailure = expectedEventsTillSucceeded + 2;
@@ -921,17 +1134,20 @@
         arg.capture());
   }
 
-  private void verifyEventType(List<Event> events,
+  private Event verifyEventType(List<Event> events,
       Class<? extends Event> eventClass, int expectedOccurences) {
     int count = 0;
+    Event ret = null;
     for (Event e : events) {
       if (eventClass.isInstance(e)) {
         count++;
+        ret = e;
       }
     }
     assertEquals(
         "Mismatch in num occurences of event: " + eventClass.getCanonicalName(),
         expectedOccurences, count);
+    return ret;
   }
 
   public static class MockEventHandler implements EventHandler {
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttemptRecovery.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttemptRecovery.java
index 143268b..100e8d9 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttemptRecovery.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttemptRecovery.java
@@ -46,6 +46,7 @@
 import org.apache.tez.dag.app.dag.event.TaskEventTAUpdate;
 import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent;
 import org.apache.tez.dag.history.events.TaskAttemptStartedEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.junit.Before;
@@ -87,10 +88,15 @@
   private void restoreFromTAFinishedEvent(TaskAttemptState state) {
     String diag = "test_diag";
     TezCounters counters = mock(TezCounters.class);
+    
+    TaskAttemptTerminationCause errorEnum = null;
+    if (state != TaskAttemptState.SUCCEEDED) {
+      errorEnum = TaskAttemptTerminationCause.APPLICATION_ERROR;
+    }
 
     TaskAttemptState recoveredState =
         ta.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            startTime, finishTime, state, diag, counters));
+            startTime, finishTime, state, errorEnum, diag, counters));
     assertEquals(startTime, ta.getLaunchTime());
     assertEquals(finishTime, ta.getFinishTime());
     assertEquals(counters, ta.reportedStatus.counters);
@@ -99,6 +105,11 @@
     assertEquals(1, ta.getDiagnostics().size());
     assertEquals(diag, ta.getDiagnostics().get(0));
     assertEquals(state, recoveredState);
+    if (state != TaskAttemptState.SUCCEEDED) {
+      assertEquals(errorEnum, ta.getTerminationCause());
+    } else {
+      assertEquals(TaskAttemptTerminationCause.UNKNOWN_ERROR, ta.getTerminationCause());
+    }
   }
 
   private void verifyEvents(List<Event> events, Class<? extends Event> eventClass,
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
index 88fa83d..e363dbd 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
@@ -54,7 +54,6 @@
 import org.apache.tez.dag.app.TaskHeartbeatHandler;
 import org.apache.tez.dag.app.dag.StateChangeNotifier;
 import org.apache.tez.dag.app.dag.TaskStateInternal;
-import org.apache.tez.dag.app.dag.TaskTerminationCause;
 import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventDiagnosticsUpdate;
 import org.apache.tez.dag.app.dag.event.TaskEvent;
@@ -64,6 +63,7 @@
 import org.apache.tez.dag.app.dag.event.VertexEventType;
 import org.apache.tez.dag.app.rm.container.AMContainer;
 import org.apache.tez.dag.app.rm.node.AMNodeEventType;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -173,7 +173,7 @@
   }
 
   private void killTask(TezTaskID taskId) {
-    mockTask.handle(new TaskEventTermination(taskId, TaskTerminationCause.DAG_KILL));
+    mockTask.handle(new TaskEventTermination(taskId, TaskAttemptTerminationCause.TERMINATED_AT_SHUTDOWN, null));
     assertTaskKillWaitState();
   }
 
@@ -553,18 +553,18 @@
   @Test
   public void testDiagnostics_KillNew(){
     TezTaskID taskId = getNewTaskID();
-    mockTask.handle(new TaskEventTermination(taskId, TaskTerminationCause.DAG_KILL));
+    mockTask.handle(new TaskEventTermination(taskId, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, null));
     assertEquals(1, mockTask.getDiagnostics().size());
-    assertTrue(mockTask.getDiagnostics().get(0).contains(TaskTerminationCause.DAG_KILL.name()));
+    assertTrue(mockTask.getDiagnostics().get(0).contains(TaskAttemptTerminationCause.TERMINATED_BY_CLIENT.name()));
   }
   
   @Test
   public void testDiagnostics_Kill(){
     TezTaskID taskId = getNewTaskID();
     scheduleTaskAttempt(taskId);
-    mockTask.handle(new TaskEventTermination(taskId, TaskTerminationCause.OTHER_TASK_FAILURE));
+    mockTask.handle(new TaskEventTermination(taskId, TaskAttemptTerminationCause.TERMINATED_AT_SHUTDOWN, null));
     assertEquals(1, mockTask.getDiagnostics().size());
-    assertTrue(mockTask.getDiagnostics().get(0).contains(TaskTerminationCause.OTHER_TASK_FAILURE.name()));
+    assertTrue(mockTask.getDiagnostics().get(0).contains(TaskAttemptTerminationCause.TERMINATED_AT_SHUTDOWN.name()));
   }
 
   // TODO Add test to validate the correct commit attempt.
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskRecovery.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskRecovery.java
index afc3433..d953fef 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskRecovery.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskRecovery.java
@@ -297,8 +297,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.SUCCEEDED, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -329,8 +329,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.FAILED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.FAILED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.RUNNING, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -362,8 +362,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.KILLED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.KILLED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.RUNNING, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -397,8 +397,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.SUCCEEDED, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -439,8 +439,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.SUCCEEDED, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -451,8 +451,8 @@
     // it is possible for TaskAttempt transit from SUCCEEDED to FAILURE due to output failure.
     recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.FAILED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.FAILED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.RUNNING, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -486,8 +486,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.SUCCEEDED, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -498,8 +498,8 @@
     // it is possible for TaskAttempt transit from SUCCEEDED to KILLED due to node failure.
     recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.KILLED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.KILLED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.RUNNING, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -537,8 +537,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.SUCCEEDED, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -577,8 +577,8 @@
     long taFinishTime = taStartTime + 100L;
     TaskState recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.SUCCEEDED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.SUCCEEDED, recoveredState);
     assertEquals(1, task.getAttempts().size());
     assertEquals(1, task.getFinishedAttemptsCount());
@@ -658,8 +658,8 @@
     long taFinishTime = taStartTime + 100L;
     recoveredState =
         task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName,
-            taStartTime, taFinishTime, TaskAttemptState.KILLED, "",
-            new TezCounters()));
+            taStartTime, taFinishTime, TaskAttemptState.KILLED, null,
+            "", new TezCounters()));
     assertEquals(TaskState.RUNNING, recoveredState);
     assertEquals(TaskAttemptStateInternal.NEW,
         ((TaskAttemptImpl) task.getAttempt(taId)).getInternalState());
@@ -700,7 +700,7 @@
       task.restoreFromEvent(new TaskAttemptStartedEvent(taId, vertexName, 0L,
           mock(ContainerId.class), mock(NodeId.class), "", "", ""));
       task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName, 0,
-          0, TaskAttemptState.KILLED, "", null));
+          0, TaskAttemptState.KILLED, null, "", null));
     }
     assertEquals(maxFailedAttempts, task.getAttempts().size());
     assertEquals(0, task.failedAttempts);
@@ -730,7 +730,7 @@
       task.restoreFromEvent(new TaskAttemptStartedEvent(taId, vertexName, 0L,
           mock(ContainerId.class), mock(NodeId.class), "", "", ""));
       task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName, 0,
-          0, TaskAttemptState.FAILED, "", null));
+          0, TaskAttemptState.FAILED, null, "", null));
     }
     assertEquals(maxFailedAttempts, task.getAttempts().size());
     assertEquals(maxFailedAttempts, task.failedAttempts);
@@ -760,7 +760,7 @@
       task.restoreFromEvent(new TaskAttemptStartedEvent(taId, vertexName, 0L,
           mock(ContainerId.class), mock(NodeId.class), "", "", ""));
       task.restoreFromEvent(new TaskAttemptFinishedEvent(taId, vertexName, 0,
-          0, TaskAttemptState.FAILED, "", null));
+          0, TaskAttemptState.FAILED, null, "", null));
     }
     assertEquals(maxFailedAttempts - 1, task.getAttempts().size());
     assertEquals(maxFailedAttempts - 1, task.failedAttempts);
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
index 9500c97..687908d 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
@@ -56,6 +56,9 @@
 import org.apache.hadoop.service.Service.STATE;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -100,12 +103,15 @@
 import org.apache.tez.dag.api.records.DAGProtos.TezEntityDescriptorProto;
 import org.apache.tez.dag.api.records.DAGProtos.VertexPlan;
 import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.ClusterInfo;
+import org.apache.tez.dag.app.ContainerHeartbeatHandler;
 import org.apache.tez.dag.app.TaskAttemptListener;
 import org.apache.tez.dag.app.TaskHeartbeatHandler;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.RootInputInitializerManager;
 import org.apache.tez.dag.app.dag.StateChangeNotifier;
 import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.dag.TaskAttempt;
 import org.apache.tez.dag.app.dag.TaskAttemptStateInternal;
 import org.apache.tez.dag.app.dag.Vertex;
 import org.apache.tez.dag.app.dag.VertexState;
@@ -114,6 +120,8 @@
 import org.apache.tez.dag.app.dag.event.DAGEvent;
 import org.apache.tez.dag.app.dag.event.DAGEventType;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.dag.event.TaskEvent;
 import org.apache.tez.dag.app.dag.event.TaskEventTAUpdate;
@@ -131,9 +139,12 @@
 import org.apache.tez.dag.app.dag.impl.DAGImpl.VertexGroupInfo;
 import org.apache.tez.dag.app.dag.impl.TestVertexImpl.VertexManagerWithException.VMExceptionLocation;
 import org.apache.tez.dag.app.rm.TaskSchedulerEventHandler;
+import org.apache.tez.dag.app.rm.container.AMContainerMap;
+import org.apache.tez.dag.app.rm.container.ContainerContextMatcher;
 import org.apache.tez.dag.history.HistoryEventHandler;
 import org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager;
 import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -151,6 +162,7 @@
 import org.apache.tez.runtime.api.events.InputDataInformationEvent;
 import org.apache.tez.runtime.api.events.InputInitializerEvent;
 import org.apache.tez.runtime.api.events.InputUpdatePayloadEvent;
+import org.apache.tez.runtime.api.events.TaskAttemptFailedEvent;
 import org.apache.tez.runtime.api.events.VertexManagerEvent;
 import org.apache.tez.test.EdgeManagerForTest;
 import org.apache.tez.test.VertexManagerPluginForTest;
@@ -2802,7 +2814,113 @@
     Assert.assertEquals(0, committer.commitCounter);
     Assert.assertEquals(1, committer.abortCounter);
   }
+  
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
+  public void testVertexTaskAttemptProcessorFailure() {
+    initAllVertices(VertexState.INITED);
 
+    VertexImpl v = vertices.get("vertex1");
+
+    startVertex(v);
+    TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next();
+    ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2));
+    
+    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
+    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
+    Container container = mock(Container.class);
+    when(container.getId()).thenReturn(contId);
+    when(container.getNodeId()).thenReturn(nid);
+    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
+    AMContainerMap containers = new AMContainerMap(
+        mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
+        new ContainerContextMatcher(), appContext);
+    containers.addContainerIfNew(container);
+    doReturn(containers).when(appContext).getAllContainers();
+
+    ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
+    Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState());
+
+    dispatcher.getEventHandler().handle(
+        new VertexEventRouteEvent(v.getVertexId(), Collections.singletonList(new TezEvent(
+            new TaskAttemptFailedEvent("Failed"), new EventMetaData(
+                EventProducerConsumerType.PROCESSOR, v.getName(), null, ta.getID())))));
+    dispatcher.await();
+    Assert.assertEquals(VertexState.RUNNING, v.getState());
+    Assert.assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, ta.getTerminationCause());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
+  public void testVertexTaskAttemptInputFailure() {
+    initAllVertices(VertexState.INITED);
+
+    VertexImpl v = vertices.get("vertex1");
+
+    startVertex(v);
+    TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next();
+    ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2));
+    
+    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
+    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
+    Container container = mock(Container.class);
+    when(container.getId()).thenReturn(contId);
+    when(container.getNodeId()).thenReturn(nid);
+    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
+    AMContainerMap containers = new AMContainerMap(
+        mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
+        new ContainerContextMatcher(), appContext);
+    containers.addContainerIfNew(container);
+    doReturn(containers).when(appContext).getAllContainers();
+
+    ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
+    Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState());
+
+    dispatcher.getEventHandler().handle(
+        new VertexEventRouteEvent(v.getVertexId(), Collections.singletonList(new TezEvent(
+            new TaskAttemptFailedEvent("Failed"), new EventMetaData(
+                EventProducerConsumerType.INPUT, v.getName(), null, ta.getID())))));
+    dispatcher.await();
+    Assert.assertEquals(VertexState.RUNNING, v.getState());
+    Assert.assertEquals(TaskAttemptTerminationCause.INPUT_READ_ERROR, ta.getTerminationCause());
+  }
+
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
+  public void testVertexTaskAttemptOutputFailure() {
+    initAllVertices(VertexState.INITED);
+
+    VertexImpl v = vertices.get("vertex1");
+
+    startVertex(v);
+    TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next();
+    ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2));
+    
+    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
+    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
+    Container container = mock(Container.class);
+    when(container.getId()).thenReturn(contId);
+    when(container.getNodeId()).thenReturn(nid);
+    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
+    AMContainerMap containers = new AMContainerMap(
+        mock(ContainerHeartbeatHandler.class), mock(TaskAttemptListener.class),
+        new ContainerContextMatcher(), appContext);
+    containers.addContainerIfNew(container);
+    doReturn(containers).when(appContext).getAllContainers();
+
+    ta.handle(new TaskAttemptEventStartedRemotely(ta.getID(), contId, null));
+    Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState());
+
+    dispatcher.getEventHandler().handle(
+        new VertexEventRouteEvent(v.getVertexId(), Collections.singletonList(new TezEvent(
+            new TaskAttemptFailedEvent("Failed"), new EventMetaData(
+                EventProducerConsumerType.OUTPUT, v.getName(), null, ta.getID())))));
+    dispatcher.await();
+    Assert.assertEquals(VertexState.RUNNING, v.getState());
+    Assert.assertEquals(TaskAttemptTerminationCause.OUTPUT_WRITE_ERROR, ta.getTerminationCause());
+  }
+  
   @Test(timeout = 5000)
   public void testSourceVertexStartHandling() {
     LOG.info("Testing testSourceVertexStartHandling");
@@ -2819,21 +2937,6 @@
   }
 
   @Test(timeout = 5000)
-  public void testCounters() {
-    // FIXME need to test counters at vertex level
-  }
-
-  @Test(timeout = 5000)
-  public void testDiagnostics() {
-    // FIXME need to test diagnostics in various cases
-  }
-
-  @Test(timeout = 5000)
-  public void testTaskAttemptCompletionEvents() {
-    // FIXME need to test handling of task attempt events
-  }
-
-  @Test(timeout = 5000)
   public void testSourceTaskAttemptCompletionEvents() {
     LOG.info("Testing testSourceTaskAttemptCompletionEvents");
     initAllVertices(VertexState.INITED);
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/speculation/legacy/TestDataStatistics.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/speculation/legacy/TestDataStatistics.java
new file mode 100644
index 0000000..fe09594
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/speculation/legacy/TestDataStatistics.java
@@ -0,0 +1,73 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.tez.dag.app.dag.speculation.legacy;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestDataStatistics {
+
+  private static final double TOL = 0.001;
+
+  @Test
+  public void testEmptyDataStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics();
+    Assert.assertEquals(0, statistics.count(), TOL);
+    Assert.assertEquals(Long.MAX_VALUE, statistics.mean(), TOL);
+    Assert.assertEquals(0, statistics.var(), TOL);
+    Assert.assertEquals(0, statistics.std(), TOL);
+    Assert.assertEquals(Long.MAX_VALUE, statistics.outlier(1.0f), TOL);
+  }
+  
+  @Test
+  public void testSingleEntryDataStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics(17.29);
+    Assert.assertEquals(1, statistics.count(), TOL);
+    Assert.assertEquals(17.29, statistics.mean(), TOL);
+    Assert.assertEquals(0, statistics.var(), TOL);
+    Assert.assertEquals(0, statistics.std(), TOL);
+    Assert.assertEquals(17.29, statistics.outlier(1.0f), TOL);
+  }
+  
+  @Test
+  public void testMutiEntryDataStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics();
+    statistics.add(17);
+    statistics.add(29);
+    Assert.assertEquals(2, statistics.count(), TOL);
+    Assert.assertEquals(23.0, statistics.mean(), TOL);
+    Assert.assertEquals(36.0, statistics.var(), TOL);
+    Assert.assertEquals(6.0, statistics.std(), TOL);
+    Assert.assertEquals(29.0, statistics.outlier(1.0f), TOL);
+ }
+  
+  @Test
+  public void testUpdateStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics(17);
+    statistics.add(29);
+    Assert.assertEquals(2, statistics.count(), TOL);
+    Assert.assertEquals(23.0, statistics.mean(), TOL);
+    Assert.assertEquals(36.0, statistics.var(), TOL);
+
+    statistics.updateStatistics(17, 29);
+    Assert.assertEquals(2, statistics.count(), TOL);
+    Assert.assertEquals(29.0, statistics.mean(), TOL);
+    Assert.assertEquals(0.0, statistics.var(), TOL);
+  }
+}
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
index 5fc5a7d..3cf4f6c 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java
@@ -18,20 +18,30 @@
 
 package org.apache.tez.dag.app.rm;
 
+import java.util.HashMap;
+import java.util.concurrent.BlockingQueue;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.dag.Task;
+import org.apache.tez.dag.app.rm.TaskSchedulerService.TaskSchedulerAppCallback;
+import org.apache.tez.dag.app.rm.TestLocalTaskSchedulerService.MockLocalTaskSchedulerSerivce.MockAsyncDelegateRequestHandler;
+import org.apache.tez.dag.app.rm.container.ContainerSignatureMatcher;
 import org.junit.Assert;
 import org.junit.Test;
 
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
 
 public class TestLocalTaskSchedulerService {
 
   LocalTaskSchedulerService ltss ;
   int core =10;
 
-  @Test
+  @Test(timeout = 5000)
   public void testCreateResource() {
     Resource resource;
     //value in integer
@@ -40,7 +50,7 @@
     Assert.assertEquals((int)(value/(1024*1024)),resource.getMemory());
   }
 
-  @Test
+  @Test(timeout = 5000)
   public void testCreateResourceLargerThanIntMax() {
     //value beyond integer but within Long.MAX_VALUE
     try {
@@ -52,7 +62,7 @@
     }
   }
 
-  @Test
+  @Test(timeout = 5000)
   public void testCreateResourceWithNegativeValue() {
     //value is Long.MAX_VALUE*1024*1024,
     // it will be negative after it is passed to createResource
@@ -65,4 +75,131 @@
       assertTrue(ex.getMessage().contains("Negative Memory or Core provided!"));
     }
   }
+
+  /**
+   * Normal flow of TaskAttempt
+   */
+  @Test(timeout = 5000)
+  public void testDeallocationBeforeAllocation() {
+    MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce
+        (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", mock(AppContext.class));
+    taskSchedulerService.init(new Configuration());
+    taskSchedulerService.start();
+
+    Task task = mock(Task.class);
+    taskSchedulerService.allocateTask(task, Resource.newInstance(1024, 1), null, null, Priority.newInstance(1), null, null);
+    taskSchedulerService.deallocateTask(task, false);
+    // start the RequestHandler, DeallocateTaskRequest has higher priority, so will be processed first
+    taskSchedulerService.startRequestHandlerThread();
+
+    MockAsyncDelegateRequestHandler requestHandler = taskSchedulerService.getRequestHandler();
+    requestHandler.drainRequest(1);
+    assertEquals(1, requestHandler.deallocateCount);
+    // The corresponding AllocateTaskRequest will be removed, so won't been processed.
+    assertEquals(0, requestHandler.allocateCount);
+    taskSchedulerService.stop();
+  }
+
+  /**
+   * TaskAttempt Killed from START_WAIT
+   */
+  @Test(timeout = 5000)
+  public void testDeallocationAfterAllocation() {
+    MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce
+        (mock(TaskSchedulerAppCallback.class), mock(ContainerSignatureMatcher.class), "", 0, "", mock(AppContext.class));
+    taskSchedulerService.init(new Configuration());
+    taskSchedulerService.start();
+
+    Task task = mock(Task.class);
+    taskSchedulerService.allocateTask(task, Resource.newInstance(1024, 1), null, null, Priority.newInstance(1), null, null);
+    taskSchedulerService.startRequestHandlerThread();
+
+    MockAsyncDelegateRequestHandler requestHandler = taskSchedulerService.getRequestHandler();
+    requestHandler.drainRequest(1);
+    taskSchedulerService.deallocateTask(task, false);
+    requestHandler.drainRequest(2);
+    assertEquals(1, requestHandler.deallocateCount);
+    assertEquals(1, requestHandler.allocateCount);
+    taskSchedulerService.stop();
+  }
+
+  static class MockLocalTaskSchedulerSerivce extends LocalTaskSchedulerService {
+
+    private MockAsyncDelegateRequestHandler requestHandler;
+
+    public MockLocalTaskSchedulerSerivce(TaskSchedulerAppCallback appClient,
+        ContainerSignatureMatcher containerSignatureMatcher,
+        String appHostName, int appHostPort, String appTrackingUrl,
+        AppContext appContext) {
+      super(appClient, containerSignatureMatcher, appHostName, appHostPort,
+          appTrackingUrl, appContext);
+    }
+
+    @Override
+    public AsyncDelegateRequestHandler createRequestHandler(Configuration conf) {
+      requestHandler = new MockAsyncDelegateRequestHandler(taskRequestQueue,
+          new LocalContainerFactory(appContext),
+          taskAllocations,
+          appClientDelegate,
+          conf);
+      return requestHandler;
+    }
+
+    @Override
+    public void serviceStart() {
+      // don't start RequestHandler thread, control it in unit test
+    }
+
+    public void startRequestHandlerThread() {
+      asyncDelegateRequestThread.start();
+    }
+
+    public MockAsyncDelegateRequestHandler getRequestHandler() {
+      return requestHandler;
+    }
+
+    static class MockAsyncDelegateRequestHandler extends AsyncDelegateRequestHandler {
+
+      public int allocateCount = 0;
+      public int deallocateCount = 0;
+      public int processedCount =0;
+
+      MockAsyncDelegateRequestHandler(
+          BlockingQueue<TaskRequest> taskRequestQueue,
+          LocalContainerFactory localContainerFactory,
+          HashMap<Object, Container> taskAllocations,
+          TaskSchedulerAppCallback appClientDelegate, Configuration conf) {
+        super(taskRequestQueue, localContainerFactory, taskAllocations,
+            appClientDelegate, conf);
+      }
+
+      @Override
+      void processRequest() {
+        super.processRequest();
+        processedCount ++;
+      }
+
+      public void drainRequest(int count) {
+        while(processedCount != count || !taskRequestQueue.isEmpty()) {
+          try {
+            Thread.sleep(100);
+          } catch (InterruptedException e) {
+            e.printStackTrace();
+          }
+        }
+      }
+
+      @Override
+      void allocateTask(AllocateTaskRequest request) {
+        super.allocateTask(request);
+        allocateCount ++;
+      }
+
+      @Override
+      void deallocateTask(DeallocateTaskRequest request) {
+        super.deallocateTask(request);
+        deallocateCount ++;
+      }
+    }
+  }
 }
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
index 4ec1916..d2dece3 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerEventHandler.java
@@ -49,6 +49,7 @@
 import org.apache.tez.dag.app.rm.container.AMContainerEventType;
 import org.apache.tez.dag.app.rm.container.AMContainerMap;
 import org.apache.tez.dag.app.rm.container.ContainerSignatureMatcher;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.junit.Assert;
 import org.junit.Before;
@@ -179,6 +180,8 @@
     Assert.assertEquals("Container preempted externally. Container preempted by RM.", 
         completedEvent.getDiagnostics());
     Assert.assertTrue(completedEvent.isPreempted());
+    Assert.assertEquals(TaskAttemptTerminationCause.EXTERNAL_PREEMPTION,
+        completedEvent.getTerminationCause());
     Assert.assertFalse(completedEvent.isDiskFailed());
 
     schedulerHandler.stop();
@@ -186,6 +189,31 @@
   }
   
   @Test (timeout = 5000)
+  public void testContainerInternalPreempted() throws IOException {
+    Configuration conf = new Configuration(false);
+    schedulerHandler.init(conf);
+    schedulerHandler.start();
+    
+    ContainerId mockCId = mock(ContainerId.class);
+    verify(mockTaskScheduler, times(0)).deallocateContainer((ContainerId)any());
+    schedulerHandler.preemptContainer(mockCId);
+    verify(mockTaskScheduler, times(1)).deallocateContainer(mockCId);
+    Assert.assertEquals(1, mockEventHandler.events.size());
+    Event event = mockEventHandler.events.get(0);
+    Assert.assertEquals(AMContainerEventType.C_COMPLETED, event.getType());
+    AMContainerEventCompleted completedEvent = (AMContainerEventCompleted) event;
+    Assert.assertEquals(mockCId, completedEvent.getContainerId());
+    Assert.assertEquals("Container preempted internally", completedEvent.getDiagnostics());
+    Assert.assertFalse(completedEvent.isPreempted());
+    Assert.assertFalse(completedEvent.isDiskFailed());
+    Assert.assertEquals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION,
+        completedEvent.getTerminationCause());
+
+    schedulerHandler.stop();
+    schedulerHandler.close();
+  }
+  
+  @Test (timeout = 5000)
   public void testContainerDiskFailed() throws IOException {
     Configuration conf = new Configuration(false);
     schedulerHandler.init(conf);
@@ -211,6 +239,8 @@
         completedEvent.getDiagnostics());
     Assert.assertFalse(completedEvent.isPreempted());
     Assert.assertTrue(completedEvent.isDiskFailed());
+    Assert.assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR,
+        completedEvent.getTerminationCause());
 
     schedulerHandler.stop();
     schedulerHandler.close();
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
index c0be044..f273896 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java
@@ -67,17 +67,21 @@
 import org.apache.tez.dag.app.ContainerContext;
 import org.apache.tez.dag.app.TaskAttemptListener;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminated;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminatedBySystem;
+import org.apache.tez.dag.app.dag.event.TaskAttemptEventContainerTerminating;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventNodeFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.rm.AMSchedulerEventType;
 import org.apache.tez.dag.app.rm.NMCommunicatorEventType;
 import org.apache.tez.dag.history.DAGHistoryEvent;
 import org.apache.tez.dag.history.HistoryEventHandler;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 import org.mockito.invocation.InvocationOnMock;
@@ -87,9 +91,7 @@
 
 
 public class TestAMContainer {
-
-
-  @Test
+  @Test (timeout=5000)
   // Assign before launch.
   public void tetSingleSuccessfulTaskFlow() {
     WrappedContainer wc = new WrappedContainer();
@@ -135,7 +137,7 @@
     assertNull(wc.amContainer.getRunningTaskAttempt());
     verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
@@ -146,7 +148,7 @@
     assertFalse(wc.amContainer.isInErrorState());
   }
 
-  @Test
+  @Test (timeout=5000)
   // Assign after launch.
   public void testSingleSuccessfulTaskFlow2() {
     WrappedContainer wc = new WrappedContainer();
@@ -191,7 +193,7 @@
     assertNull(wc.amContainer.getRunningTaskAttempt());
     verify(wc.tal).unregisterTaskAttempt(wc.taskAttemptID);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
@@ -202,7 +204,7 @@
     assertFalse(wc.amContainer.isInErrorState());
   }
 
-  @Test
+  @Test (timeout=5000)
   public void testSingleSuccessfulTaskFlowStopRequest() {
     WrappedContainer wc = new WrappedContainer();
 
@@ -225,7 +227,7 @@
     wc.verifyState(AMContainerState.STOPPING);
     wc.verifyNoOutgoingEvents();
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
@@ -238,7 +240,7 @@
     assertFalse(wc.amContainer.isInErrorState());
   }
 
-  @Test
+  @Test (timeout=5000)
   public void testSingleSuccessfulTaskFlowFailedNMStopRequest() {
     WrappedContainer wc = new WrappedContainer();
 
@@ -264,7 +266,7 @@
     assertTrue(wc.verifyCountAndGetOutgoingEvents(1).get(0).getType() ==
         AMSchedulerEventType.S_CONTAINER_DEALLOCATE);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
@@ -278,7 +280,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testMultipleAllocationsAtIdle() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -303,7 +305,7 @@
     assertTrue(wc.amContainer.isInErrorState());
 
     wc.nmStopSent();
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     // 1 Inform scheduler. 2 TERMINATED to TaskAttempt.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -317,7 +319,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testAllocationAtRunning() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -343,7 +345,7 @@
     assertTrue(wc.amContainer.isInErrorState());
 
     wc.nmStopSent();
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     // 1 Inform scheduler. 2 TERMINATED to TaskAttempt.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -357,7 +359,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testMultipleAllocationsAtLaunching() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -382,7 +384,7 @@
     assertTrue(wc.amContainer.isInErrorState());
 
     wc.nmStopSent();
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     // 1 Inform scheduler. 2 TERMINATED to TaskAttempt.
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
@@ -396,7 +398,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testContainerTimedOutAtRunning() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -418,7 +420,7 @@
         NMCommunicatorEventType.CONTAINER_STOP_REQUEST);
     // TODO Should this be an RM DE-ALLOCATE instead ?
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -432,7 +434,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testLaunchFailure() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -449,22 +451,28 @@
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
         TaskAttemptEventType.TA_CONTAINER_TERMINATING,
         AMSchedulerEventType.S_CONTAINER_DEALLOCATE);
+    for (Event e : outgoingEvents) {
+      if (e.getType() == TaskAttemptEventType.TA_CONTAINER_TERMINATING) {
+        Assert.assertEquals(TaskAttemptTerminationCause.CONTAINER_LAUNCH_FAILED,
+            ((TaskAttemptEventContainerTerminating)e).getTerminationCause());        
+      }
+    }
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
         TaskAttemptEventType.TA_CONTAINER_TERMINATED);
-
+    
     // Valid transition. Container complete, but not with an error.
     assertFalse(wc.amContainer.isInErrorState());
   }
 
-  @Test
+  @Test (timeout=5000)
   public void testContainerCompletedAtAllocated() {
     WrappedContainer wc = new WrappedContainer();
     wc.verifyState(AMContainerState.ALLOCATED);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
     wc.verifyNoOutgoingEvents();
 
@@ -472,7 +480,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   // Verify that incoming NM launched events to COMPLETED containers are
   // handled.
   public void testContainerCompletedAtLaunching() {
@@ -484,7 +492,7 @@
 
     wc.assignTaskAttempt(wc.taskAttemptID);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
     verify(wc.tal).registerRunningContainer(wc.containerID);
     verify(wc.tal).unregisterRunningContainer(wc.containerID);
@@ -492,6 +500,70 @@
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
         TaskAttemptEventType.TA_CONTAINER_TERMINATED);
+    Assert.assertEquals(TaskAttemptTerminationCause.CONTAINER_LAUNCH_FAILED,
+        ((TaskAttemptEventContainerTerminated)outgoingEvents.get(0)).getTerminationCause());
+
+    assertFalse(wc.amContainer.isInErrorState());
+
+    // Container launched generated by NM call.
+    wc.containerLaunched();
+    wc.verifyNoOutgoingEvents();
+
+    assertFalse(wc.amContainer.isInErrorState());
+  }
+  
+  @SuppressWarnings("rawtypes")
+  @Test (timeout=5000)
+  public void testContainerCompletedAtLaunchingSpecificClusterError() {
+    WrappedContainer wc = new WrappedContainer();
+    List<Event> outgoingEvents;
+
+    wc.launchContainer();
+
+
+    wc.assignTaskAttempt(wc.taskAttemptID);
+
+    wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR);
+    wc.verifyState(AMContainerState.COMPLETED);
+    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+
+    outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
+    verifyUnOrderedOutgoingEventTypes(outgoingEvents,
+        TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM);
+    Assert.assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR,
+        ((TaskAttemptEventContainerTerminatedBySystem)outgoingEvents.get(0)).getTerminationCause());
+
+    assertFalse(wc.amContainer.isInErrorState());
+
+    // Container launched generated by NM call.
+    wc.containerLaunched();
+    wc.verifyNoOutgoingEvents();
+
+    assertFalse(wc.amContainer.isInErrorState());
+  }
+  
+  @SuppressWarnings("rawtypes")
+  @Test (timeout=5000)
+  public void testContainerCompletedAtLaunchingSpecificError() {
+    WrappedContainer wc = new WrappedContainer();
+    List<Event> outgoingEvents;
+
+    wc.launchContainer();
+
+
+    wc.assignTaskAttempt(wc.taskAttemptID);
+
+    wc.containerCompleted(ContainerExitStatus.ABORTED, TaskAttemptTerminationCause.NODE_FAILED);
+    wc.verifyState(AMContainerState.COMPLETED);
+    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+
+    outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
+    verifyUnOrderedOutgoingEventTypes(outgoingEvents,
+        TaskAttemptEventType.TA_CONTAINER_TERMINATED);
+    Assert.assertEquals(TaskAttemptTerminationCause.NODE_FAILED,
+        ((TaskAttemptEventContainerTerminated)outgoingEvents.get(0)).getTerminationCause());
 
     assertFalse(wc.amContainer.isInErrorState());
 
@@ -503,7 +575,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testContainerCompletedAtIdle() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -514,7 +586,7 @@
     wc.containerLaunched();
     wc.verifyState(AMContainerState.IDLE);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
     verify(wc.tal).registerRunningContainer(wc.containerID);
     verify(wc.tal).unregisterRunningContainer(wc.containerID);
@@ -538,7 +610,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testContainerCompletedAtRunning() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -550,7 +622,7 @@
     wc.pullTaskToRun();
     wc.verifyState(AMContainerState.RUNNING);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
     verify(wc.tal).registerRunningContainer(wc.containerID);
     verify(wc.tal).unregisterRunningContainer(wc.containerID);
@@ -574,7 +646,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testContainerPreemptedAtRunning() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -586,7 +658,7 @@
     wc.pullTaskToRun();
     wc.verifyState(AMContainerState.RUNNING);
 
-    wc.containerCompleted(ContainerExitStatus.PREEMPTED);
+    wc.containerCompleted(ContainerExitStatus.PREEMPTED, TaskAttemptTerminationCause.EXTERNAL_PREEMPTION);
     wc.verifyState(AMContainerState.COMPLETED);
     verify(wc.tal).registerRunningContainer(wc.containerID);
     verify(wc.tal).unregisterRunningContainer(wc.containerID);
@@ -594,6 +666,8 @@
     verify(wc.chh).unregister(wc.containerID);
 
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
+    Assert.assertEquals(TaskAttemptTerminationCause.EXTERNAL_PREEMPTION,
+        ((TaskAttemptEventContainerTerminatedBySystem)outgoingEvents.get(0)).getTerminationCause());
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
         TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM);
 
@@ -608,9 +682,47 @@
 
     assertFalse(wc.amContainer.isInErrorState());
   }
+
+  @SuppressWarnings("rawtypes")
+  @Test (timeout=5000)
+  public void testContainerInternallyPreemptedAtRunning() {
+    WrappedContainer wc = new WrappedContainer();
+    List<Event> outgoingEvents;
+
+    wc.launchContainer();
+
+    wc.assignTaskAttempt(wc.taskAttemptID);
+    wc.containerLaunched();
+    wc.pullTaskToRun();
+    wc.verifyState(AMContainerState.RUNNING);
+
+    wc.containerCompleted(ContainerExitStatus.INVALID, TaskAttemptTerminationCause.INTERNAL_PREEMPTION);
+    wc.verifyState(AMContainerState.COMPLETED);
+    verify(wc.tal).registerRunningContainer(wc.containerID);
+    verify(wc.tal).unregisterRunningContainer(wc.containerID);
+    verify(wc.chh).register(wc.containerID);
+    verify(wc.chh).unregister(wc.containerID);
+
+    outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
+    Assert.assertEquals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION,
+        ((TaskAttemptEventContainerTerminated)outgoingEvents.get(0)).getTerminationCause());
+    verifyUnOrderedOutgoingEventTypes(outgoingEvents,
+        TaskAttemptEventType.TA_CONTAINER_TERMINATED);
+
+    assertFalse(wc.amContainer.isInErrorState());
+
+    // Pending task complete. (Ideally, container should be dead at this point
+    // and this event should not be generated. Network timeout on NM-RM heartbeat
+    // can cause it to be genreated)
+    wc.taskAttemptSucceeded(wc.taskAttemptID);
+    wc.verifyNoOutgoingEvents();
+    wc.verifyHistoryStopEvent();
+
+    assertFalse(wc.amContainer.isInErrorState());
+  }
   
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testContainerDiskFailedAtRunning() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -622,7 +734,7 @@
     wc.pullTaskToRun();
     wc.verifyState(AMContainerState.RUNNING);
 
-    wc.containerCompleted(ContainerExitStatus.DISKS_FAILED);
+    wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR);
     wc.verifyState(AMContainerState.COMPLETED);
     verify(wc.tal).registerRunningContainer(wc.containerID);
     verify(wc.tal).unregisterRunningContainer(wc.containerID);
@@ -630,6 +742,8 @@
     verify(wc.chh).unregister(wc.containerID);
 
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
+    Assert.assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR,
+        ((TaskAttemptEventContainerTerminatedBySystem)outgoingEvents.get(0)).getTerminationCause());
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
         TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM);
 
@@ -646,7 +760,7 @@
   }
   
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testTaskAssignedToCompletedContainer() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -657,7 +771,7 @@
     wc.pullTaskToRun();
     wc.taskAttemptSucceeded(wc.taskAttemptID);
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
 
     TezTaskAttemptID taID2 = TezTaskAttemptID.getInstance(wc.taskID, 2);
@@ -677,7 +791,7 @@
     assertTrue(wc.amContainer.isInErrorState());
   }
 
-  @Test
+  @Test (timeout=5000)
   public void testTaskPullAtLaunching() {
     WrappedContainer wc = new WrappedContainer();
 
@@ -690,7 +804,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testNodeFailedAtIdle() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -712,11 +826,11 @@
     for (Event event : outgoingEvents) {
       if (event.getType() == TaskAttemptEventType.TA_NODE_FAILED) {
         TaskAttemptEventNodeFailed nfEvent = (TaskAttemptEventNodeFailed) event;
-        assertEquals("nodeFailed", nfEvent.getDiagnosticInfo());
+        assertTrue(nfEvent.getDiagnosticInfo().contains("nodeFailed"));
       }
     }
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -726,7 +840,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testNodeFailedAtIdleMultipleAttempts() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -756,13 +870,13 @@
     for (Event event : outgoingEvents) {
       if (event.getType() == TaskAttemptEventType.TA_NODE_FAILED) {
         TaskAttemptEventNodeFailed nfEvent = (TaskAttemptEventNodeFailed) event;
-        assertEquals("nodeFailed", nfEvent.getDiagnosticInfo());
+        assertTrue(nfEvent.getDiagnosticInfo().contains("nodeFailed"));
       }
     }
 
     assertFalse(wc.amContainer.isInErrorState());
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyNoOutgoingEvents();
     wc.verifyHistoryStopEvent();
 
@@ -772,7 +886,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testNodeFailedAtRunningMultipleAttempts() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -801,11 +915,11 @@
     for (Event event : outgoingEvents) {
       if (event.getType() == TaskAttemptEventType.TA_NODE_FAILED) {
         TaskAttemptEventNodeFailed nfEvent = (TaskAttemptEventNodeFailed) event;
-        assertEquals("nodeFailed", nfEvent.getDiagnosticInfo());
+        assertTrue(nfEvent.getDiagnosticInfo().contains("nodeFailed"));
       }
     }
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyHistoryStopEvent();
     outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
     verifyUnOrderedOutgoingEventTypes(outgoingEvents,
@@ -818,7 +932,7 @@
   }
 
   @SuppressWarnings("rawtypes")
-  @Test
+  @Test (timeout=5000)
   public void testNodeFailedAtCompletedMultipleSuccessfulTAs() {
     WrappedContainer wc = new WrappedContainer();
     List<Event> outgoingEvents;
@@ -835,7 +949,7 @@
     wc.taskAttemptSucceeded(taID2);
     wc.stopRequest();
     wc.nmStopSent();
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
 
     wc.nodeFailed();
@@ -849,7 +963,7 @@
     assertEquals(2, wc.amContainer.getAllTaskAttempts().size());
   }
 
-  @Test
+  @Test (timeout=5000)
   public void testDuplicateCompletedEvents() {
     WrappedContainer wc = new WrappedContainer();
 
@@ -865,17 +979,17 @@
     wc.taskAttemptSucceeded(taID2);
     wc.stopRequest();
     wc.nmStopSent();
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyState(AMContainerState.COMPLETED);
 
     wc.verifyNoOutgoingEvents();
 
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     wc.verifyNoOutgoingEvents();
     wc.verifyHistoryStopEvent();
   }
   
-  @Test
+  @Test (timeout=5000)
   public void testLocalResourceAddition() {
     WrappedContainer wc = new WrappedContainer();
 
@@ -926,13 +1040,13 @@
     wc.taskAttemptSucceeded(taID3);
 
     // Verify references are cleared after a container completes.
-    wc.containerCompleted(false);
+    wc.containerCompleted();
     assertNull(wc.amContainer.containerLocalResources);
     assertNull(wc.amContainer.additionalLocalResources);
   }
 
   @SuppressWarnings("unchecked")
-  @Test
+  @Test (timeout=5000)
   public void testCredentialsTransfer() {
     WrappedContainerMultipleDAGs wc = new WrappedContainerMultipleDAGs();
 
@@ -1183,15 +1297,15 @@
           AMContainerEventType.C_NM_STOP_FAILED));
     }
 
-    public void containerCompleted(boolean preempted) {
+    public void containerCompleted() {
       reset(eventHandler);
-      amContainer.handle(new AMContainerEventCompleted(containerID, 
-          (preempted ? ContainerExitStatus.PREEMPTED : ContainerExitStatus.SUCCESS), null));
+      amContainer.handle(new AMContainerEventCompleted(containerID, ContainerExitStatus.SUCCESS, null,
+          TaskAttemptTerminationCause.CONTAINER_EXITED));
     }
     
-    public void containerCompleted(int exitStatus) {
+    public void containerCompleted(int exitStatus, TaskAttemptTerminationCause errCause) {
       reset(eventHandler);
-      amContainer.handle(new AMContainerEventCompleted(containerID, exitStatus, null));
+      amContainer.handle(new AMContainerEventCompleted(containerID, exitStatus, null, errCause));
     }
 
     public void containerTimedOut() {
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java b/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java
index 8913287..cd770a3 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java
@@ -21,6 +21,7 @@
 import static org.junit.Assert.fail;
 
 import java.nio.ByteBuffer;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -44,6 +45,7 @@
 import org.apache.tez.dag.history.HistoryEvent;
 import org.apache.tez.dag.history.HistoryEventType;
 import org.apache.tez.dag.history.SummaryEvent;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -471,7 +473,7 @@
           TezTaskAttemptID.getInstance(TezTaskID.getInstance(TezVertexID.getInstance(
               TezDAGID.getInstance(ApplicationId.newInstance(0, 1), 1), 111), 1), 1),
           "vertex1", 10001l, 1000434444l, TaskAttemptState.FAILED,
-          null, null);
+          null, null, null);
       TaskAttemptFinishedEvent deserializedEvent = (TaskAttemptFinishedEvent)
           testProtoConversion(event);
       Assert.assertEquals(event.getTaskAttemptID(),
@@ -491,7 +493,7 @@
           TezTaskAttemptID.getInstance(TezTaskID.getInstance(TezVertexID.getInstance(
               TezDAGID.getInstance(ApplicationId.newInstance(0, 1), 1), 111), 1), 1),
           "vertex1", 10001l, 1000434444l, TaskAttemptState.FAILED,
-          "diagnose", new TezCounters());
+          TaskAttemptTerminationCause.APPLICATION_ERROR, "diagnose", new TezCounters());
       TaskAttemptFinishedEvent deserializedEvent = (TaskAttemptFinishedEvent)
           testProtoConversion(event);
       Assert.assertEquals(event.getTaskAttemptID(),
@@ -504,6 +506,8 @@
           deserializedEvent.getState());
       Assert.assertEquals(event.getCounters(),
           deserializedEvent.getCounters());
+      Assert.assertEquals(event.getTaskAttemptError(),
+          deserializedEvent.getTaskAttemptError());
       logEvents(event, deserializedEvent);
     }
   }
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/history/logging/impl/TestHistoryEventJsonConversion.java b/tez-dag/src/test/java/org/apache/tez/dag/history/logging/impl/TestHistoryEventJsonConversion.java
index a20c9fe..e0f8c21 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/history/logging/impl/TestHistoryEventJsonConversion.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/history/logging/impl/TestHistoryEventJsonConversion.java
@@ -59,6 +59,7 @@
 import org.apache.tez.dag.history.events.VertexParallelismUpdatedEvent;
 import org.apache.tez.dag.history.events.VertexStartedEvent;
 import org.apache.tez.dag.history.utils.DAGUtils;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -155,7 +156,7 @@
           break;
         case TASK_ATTEMPT_FINISHED:
           event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(),
-              random.nextInt(), TaskAttemptState.FAILED, null, null);
+              random.nextInt(), TaskAttemptState.KILLED, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, null, null);
           break;
         case CONTAINER_LAUNCHED:
           event = new ContainerLaunchedEvent(containerId, random.nextInt(),
diff --git a/tez-plugins/tez-mbeans-resource-calculator/src/test/java/org/apache/tez/util/TestTezMxBeanResourceCalculator.java b/tez-plugins/tez-mbeans-resource-calculator/src/test/java/org/apache/tez/util/TestTezMxBeanResourceCalculator.java
index fdc0cb7..1aba859 100644
--- a/tez-plugins/tez-mbeans-resource-calculator/src/test/java/org/apache/tez/util/TestTezMxBeanResourceCalculator.java
+++ b/tez-plugins/tez-mbeans-resource-calculator/src/test/java/org/apache/tez/util/TestTezMxBeanResourceCalculator.java
@@ -19,25 +19,12 @@
 package org.apache.tez.util;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
-import org.apache.hadoop.yarn.client.api.TimelineClient;
 import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
 import org.apache.tez.dag.api.TezConfiguration;
-import org.apache.tez.dag.app.AppContext;
-import org.apache.tez.dag.history.DAGHistoryEvent;
-import org.apache.tez.dag.history.events.DAGStartedEvent;
-import org.apache.tez.dag.records.TezDAGID;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
 
 public class TestTezMxBeanResourceCalculator {
 
diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java b/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java
index 4b6d648..91346ae 100644
--- a/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java
+++ b/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java
@@ -376,6 +376,9 @@
     atsEntity.addOtherInfo(ATSConstants.FINISH_TIME, event.getFinishTime());
     atsEntity.addOtherInfo(ATSConstants.TIME_TAKEN, (event.getFinishTime() - event.getStartTime()));
     atsEntity.addOtherInfo(ATSConstants.STATUS, event.getState().name());
+    if (event.getTaskAttemptError() != null) {
+      atsEntity.addOtherInfo(ATSConstants.TASK_ATTEMPT_ERROR_ENUM, event.getTaskAttemptError().name());
+    }
     atsEntity.addOtherInfo(ATSConstants.DIAGNOSTICS, event.getDiagnostics());
     atsEntity.addOtherInfo(ATSConstants.COUNTERS,
         DAGUtils.convertCountersToATSMap(event.getCounters()));
diff --git a/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestHistoryEventTimelineConversion.java b/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestHistoryEventTimelineConversion.java
index ce47820..0f2942c 100644
--- a/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestHistoryEventTimelineConversion.java
+++ b/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestHistoryEventTimelineConversion.java
@@ -63,6 +63,7 @@
 import org.apache.tez.dag.history.events.VertexStartedEvent;
 import org.apache.tez.dag.history.logging.EntityTypes;
 import org.apache.tez.dag.history.utils.DAGUtils;
+import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -157,7 +158,7 @@
           break;
         case TASK_ATTEMPT_FINISHED:
           event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(),
-              random.nextInt(), TaskAttemptState.FAILED, null, null);
+              random.nextInt(), TaskAttemptState.FAILED, TaskAttemptTerminationCause.OUTPUT_LOST, null, null);
           break;
         case CONTAINER_LAUNCHED:
           event = new ContainerLaunchedEvent(containerId, random.nextInt(),
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/YARNMaster.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/YARNMaster.java
deleted file mode 100644
index 8709e05..0000000
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/YARNMaster.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.tez.runtime.library.common;
-
-import java.io.IOException;
-import java.net.InetSocketAddress;
-
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.security.SecurityUtil;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-
-@Private
-@Unstable
-public class YARNMaster {
-  
-  public enum State {
-    INITIALIZING, RUNNING;
-  }
-
-  public static String getMasterUserName(Configuration conf) {
-    return conf.get(YarnConfiguration.RM_PRINCIPAL);
-  }
-  
-  public static InetSocketAddress getMasterAddress(Configuration conf) {
-    return conf.getSocketAddr(
-        YarnConfiguration.RM_ADDRESS,
-        YarnConfiguration.DEFAULT_RM_ADDRESS,
-        YarnConfiguration.DEFAULT_RM_PORT);
-  }
-
-  public static String getMasterPrincipal(Configuration conf) 
-  throws IOException {
-    String masterHostname = getMasterAddress(conf).getHostName();
-    // get kerberos principal for use as delegation token renewer
-    return SecurityUtil.getServerPrincipal(
-        getMasterUserName(conf), masterHostname);
-  }
-  
-}
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
index 6081f91..fd8b1ea 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
@@ -26,6 +26,8 @@
 import java.util.TreeSet;
 
 import com.google.common.annotations.VisibleForTesting;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -130,6 +132,7 @@
   private final int ifileReadAheadLength;
   private final int ifileBufferSize;
 
+  private AtomicInteger mergeFileSequenceId = new AtomicInteger(0);
 
   /**
    * Construct the MergeManager. Must call start before it becomes usable.
@@ -696,10 +699,13 @@
       } else {
         namePart = file0.getPath().getName().toString();
       }
-      Path outputPath = localDirAllocator.getLocalPathForWrite(namePart, approxOutputSize, conf);
-      outputPath = outputPath.suffix(Constants.MERGED_OUTPUT_PREFIX);
 
-      Writer writer = 
+      // namePart includes the suffix of the file. We need to remove it.
+      namePart = FilenameUtils.removeExtension(namePart);
+      Path outputPath = localDirAllocator.getLocalPathForWrite(namePart, approxOutputSize, conf);
+      outputPath = outputPath.suffix(Constants.MERGED_OUTPUT_PREFIX + mergeFileSequenceId.getAndIncrement());
+
+      Writer writer =
         new Writer(conf, rfs, outputPath, 
                         (Class)ConfigUtils.getIntermediateInputKeyClass(conf), 
                         (Class)ConfigUtils.getIntermediateInputValueClass(conf),
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
index f100f23..c73fc49 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java
@@ -287,7 +287,7 @@
             TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 
             TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB_DEFAULT);
     Preconditions.checkArgument(initialMemRequestMb != 0, "io.sort.mb should be larger than 0");
-    long reqBytes = initialMemRequestMb << 20;
+    long reqBytes = ((long) initialMemRequestMb) << 20;
     LOG.info("Requested SortBufferSize (io.sort.mb): " + initialMemRequestMb);
     return reqBytes;
   }
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java b/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java
index 5db17c3..35e6dcd 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java
@@ -132,7 +132,7 @@
 
     tezConf = new TezConfiguration(miniTezCluster.getConfig());
     tezConf.setInt(TezConfiguration.DAG_RECOVERY_MAX_UNFLUSHED_EVENTS, 0);
-    tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "DEBUG");
+    tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO");
     tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR,
         remoteStagingDir.toString());
     tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java b/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
index 1c1e846..c834dee 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java
@@ -713,5 +713,23 @@
     DAG dag = SimpleTestDAG.createDAG("testTwoTasksHaveInputFailuresSuccess", testConf);
     runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
   }
-
+  
+  @Test (timeout=60000)
+  public void testRandomFailingTasks() throws Exception {
+    Configuration testConf = new Configuration(false);
+    testConf.setBoolean(TestProcessor.TEZ_FAILING_PROCESSOR_DO_RANDOM_FAIL, true);
+    testConf.setFloat(TestProcessor.TEZ_FAILING_PROCESSOR_RANDOM_FAIL_PROBABILITY, 0.5f);
+    DAG dag = SixLevelsFailingDAG.createDAG("testRandomFailingTasks", testConf);
+    runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
+  }
+  
+  @Test (timeout=60000)
+  public void testRandomFailingInputs() throws Exception {
+    Configuration testConf = new Configuration(false);
+    testConf.setBoolean(TestInput.TEZ_FAILING_INPUT_DO_RANDOM_FAIL, true);
+    testConf.setFloat(TestInput.TEZ_FAILING_INPUT_RANDOM_FAIL_PROBABILITY, 0.5f);
+    DAG dag = SixLevelsFailingDAG.createDAG("testRandomFailingInputs", testConf);
+    runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
+  }
+  
 }
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestInput.java b/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
index 465dd9c..0050961 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestInput.java
@@ -29,6 +29,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.tez.common.TezUtils;
 import org.apache.tez.dag.api.InputDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.runtime.api.AbstractLogicalInput;
 import org.apache.tez.runtime.api.Event;
@@ -65,6 +66,8 @@
   int failingInputUpto = 0;
   
   boolean doFail = false;
+  boolean doRandomFail = false;
+  float randomFailProbability = 0.0f;
   boolean doFailAndExit = false;
   Set<Integer> failingTaskIndices = Sets.newHashSet();
   Set<Integer> failingTaskAttempts = Sets.newHashSet();
@@ -78,6 +81,16 @@
   public static String TEZ_FAILING_INPUT_DO_FAIL =
       "tez.failing-input.do-fail";
   /**
+   * Enable failure for this logical input. The config is set per DAG.
+   */
+  public static String TEZ_FAILING_INPUT_DO_RANDOM_FAIL =
+      "tez.failing-input.do-random-fail";
+  /**
+   * Probability to random fail an input. Range is 0 to 1. The number is set per DAG.
+   */
+  public static String TEZ_FAILING_INPUT_RANDOM_FAIL_PROBABILITY =
+      "tez.failing-input.random-fail-probability";
+  /**
    * Logical input will exit (and cause task failure) after reporting failure to 
    * read.
    */
@@ -146,66 +159,96 @@
         lastInputReadyValue = inputReady.get();
         LOG.info("Done for inputReady: " + lastInputReadyValue);
       }
-      if (doFail) {
-        if (
-            (failingTaskIndices.contains(failAll) ||
-            failingTaskIndices.contains(getContext().getTaskIndex())) &&
-            (failingTaskAttempts.contains(failAll) || 
-             failingTaskAttempts.contains(getContext().getTaskAttemptNumber())) &&
-             (lastInputReadyValue <= failingInputUpto)) {
-          List<Event> events = Lists.newLinkedList();
-          if (failingInputIndices.contains(failAll)) {
-            for (int i=0; i<getNumPhysicalInputs(); ++i) {
-              String msg = ("FailingInput: " + getContext().getUniqueIdentifier() + 
-                  " index: " + i + " version: " + lastInputReadyValue);
-              events.add(InputReadErrorEvent.create(msg, i, lastInputReadyValue));
-              LOG.info("Failing input: " + msg);
+      if (!doRandomFail) {
+        // not random fail
+        if (doFail) {
+          if (
+              (failingTaskIndices.contains(failAll) ||
+              failingTaskIndices.contains(getContext().getTaskIndex())) &&
+              (failingTaskAttempts.contains(failAll) || 
+               failingTaskAttempts.contains(getContext().getTaskAttemptNumber())) &&
+               (lastInputReadyValue <= failingInputUpto)) {
+            List<Event> events = Lists.newLinkedList();
+            if (failingInputIndices.contains(failAll)) {
+              for (int i=0; i<getNumPhysicalInputs(); ++i) {
+                String msg = ("FailingInput: " + getContext().getUniqueIdentifier() + 
+                    " index: " + i + " version: " + lastInputReadyValue);
+                events.add(InputReadErrorEvent.create(msg, i, lastInputReadyValue));
+                LOG.info("Failing input: " + msg);
+              }
+            } else {
+              for (Integer index : failingInputIndices) {
+                if (index.intValue() >= getNumPhysicalInputs()) {
+                  throwException("InputIndex: " + index.intValue() + 
+                      " should be less than numInputs: " + getNumPhysicalInputs());
+                }
+                if (completedInputVersion[index.intValue()] < lastInputReadyValue) {
+                  continue; // dont fail a previous version now.
+                }
+                String msg = ("FailingInput: " + getContext().getUniqueIdentifier() + 
+                    " index: " + index.intValue() + " version: " + lastInputReadyValue);
+                events.add(InputReadErrorEvent.create(msg, index.intValue(), lastInputReadyValue));
+                LOG.info("Failing input: " + msg);
+              }
             }
-          } else {
-            for (Integer index : failingInputIndices) {
-              if (index.intValue() >= getNumPhysicalInputs()) {
-                throwException("InputIndex: " + index.intValue() + 
-                    " should be less than numInputs: " + getNumPhysicalInputs());
+            getContext().sendEvents(events);
+            if (doFailAndExit) {
+              String msg = "FailingInput exiting: " + getContext().getUniqueIdentifier();
+              LOG.info(msg);
+              throwException(msg);
+            } else {
+              done = false;
+            }
+          } else if ((failingTaskIndices.contains(failAll) ||
+              failingTaskIndices.contains(getContext().getTaskIndex()))){
+            boolean previousAttemptReadFailed = false;
+            if (failingTaskAttempts.contains(failAll)) {
+              previousAttemptReadFailed = true;
+            } else {
+              for (int i=0 ; i<getContext().getTaskAttemptNumber(); ++i) {
+                if (failingTaskAttempts.contains(new Integer(i))) {
+                  previousAttemptReadFailed = true;
+                  break;
+                }
               }
-              if (completedInputVersion[index.intValue()] < lastInputReadyValue) {
-                continue; // dont fail a previous version now.
-              }
-              String msg = ("FailingInput: " + getContext().getUniqueIdentifier() + 
-                  " index: " + index.intValue() + " version: " + lastInputReadyValue);
-              events.add(InputReadErrorEvent.create(msg, index.intValue(), lastInputReadyValue));
-              LOG.info("Failing input: " + msg);
+            }
+            if (previousAttemptReadFailed && 
+                (lastInputReadyValue <= failingInputUpto)) {
+              // if any previous attempt has failed then dont be done when we see
+              // a previously failed input
+              LOG.info("Previous task attempt failed and input version less than failing upto version");
+              done = false;
             }
           }
-          getContext().sendEvents(events);
-          if (doFailAndExit) {
-            String msg = "FailingInput exiting: " + getContext().getUniqueIdentifier();
+          
+        }
+      } else {
+        // random fail
+        List<Event> events = Lists.newLinkedList();
+        for (int index=0; index<getNumPhysicalInputs(); ++index) {
+          // completedInputVersion[index] has DataMovementEvent.getVersion() value.
+          int sourceInputVersion = completedInputVersion[index];
+          int maxFailedAttempt = conf.getInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 
+              TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS_DEFAULT);
+          if (sourceInputVersion < maxFailedAttempt - 1) {
+            float rollNumber = (float) Math.random();
+            String msg = "FailingInput random fail turned on." +
+                "Do a roll:" + getContext().getUniqueIdentifier() + 
+                " index: " + index + " version: " + sourceInputVersion +
+                " rollNumber: " + rollNumber + 
+                " randomFailProbability " + randomFailProbability;
             LOG.info(msg);
-            throwException(msg);
-          } else {
-            done = false;
-          }
-        } else if ((failingTaskIndices.contains(failAll) ||
-            failingTaskIndices.contains(getContext().getTaskIndex()))){
-          boolean previousAttemptReadFailed = false;
-          if (failingTaskAttempts.contains(failAll)) {
-            previousAttemptReadFailed = true;
-          } else {
-            for (int i=0 ; i<getContext().getTaskAttemptNumber(); ++i) {
-              if (failingTaskAttempts.contains(new Integer(i))) {
-                previousAttemptReadFailed = true;
-                break;
-              }
+            if (rollNumber < randomFailProbability) {
+              // fail the source input
+              msg = "FailingInput: rollNumber < randomFailProbability. Do fail." + 
+                            getContext().getUniqueIdentifier() + 
+                            " index: " + index + " version: " + sourceInputVersion;
+              LOG.info(msg);
+              events.add(InputReadErrorEvent.create(msg, index, sourceInputVersion));
             }
           }
-          if (previousAttemptReadFailed && 
-              (lastInputReadyValue <= failingInputUpto)) {
-            // if any previous attempt has failed then dont be done when we see
-            // a previously failed input
-            LOG.info("Previous task attempt failed and input version less than failing upto version");
-            done = false;
-          }
         }
-        
+        getContext().sendEvents(events);
       }
     } while (!done);
     
@@ -265,6 +308,11 @@
           failingInputIndices.add(Integer.valueOf(failingIndex));
         }
       }
+      doRandomFail = conf
+          .getBoolean(TEZ_FAILING_INPUT_DO_RANDOM_FAIL, false);
+      randomFailProbability = conf.getFloat(TEZ_FAILING_INPUT_RANDOM_FAIL_PROBABILITY, 0.0f);
+      LOG.info("doRandomFail: " + doRandomFail);
+      LOG.info("randomFailProbability: " + randomFailProbability);
     }
     return Collections.emptyList();
   }
diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestProcessor.java b/tez-tests/src/test/java/org/apache/tez/test/TestProcessor.java
index ed37ea9..90a4f13 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/TestProcessor.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/TestProcessor.java
@@ -27,6 +27,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.tez.common.TezUtils;
 import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.runtime.api.AbstractLogicalIOProcessor;
 import org.apache.tez.runtime.api.Event;
@@ -51,6 +52,8 @@
   Configuration conf;
   
   boolean doFail = false;
+  boolean doRandomFail = false;
+  float randomFailProbability = 0.0f;
   long sleepMs;
   Set<Integer> failingTaskIndices = Sets.newHashSet();
   int failingTaskAttemptUpto = 0;
@@ -65,6 +68,15 @@
   public static String TEZ_FAILING_PROCESSOR_DO_FAIL =
       "tez.failing-processor.do-fail";
   /**
+   * Enable random failure for all processors.
+   */
+  public static String TEZ_FAILING_PROCESSOR_DO_RANDOM_FAIL = "tez.failing-processor.do-random-fail";
+  /**
+   * Probability to random fail a task attempt. Range is 0 to 1. The number is set per DAG.
+   */
+  public static String TEZ_FAILING_PROCESSOR_RANDOM_FAIL_PROBABILITY = "tez.failing-processor.random-fail-probability";
+  
+  /**
    * Time to sleep in the processor in milliseconds.
    */
   public static String TEZ_FAILING_PROCESSOR_SLEEP_MS =
@@ -154,6 +166,12 @@
         LOG.info("Adding failing attempt : " + failingTaskAttemptUpto + 
             " dag: " + getContext().getDAGName());
       }
+      
+      doRandomFail = conf
+          .getBoolean(TEZ_FAILING_PROCESSOR_DO_RANDOM_FAIL, false);
+      randomFailProbability = conf.getFloat(TEZ_FAILING_PROCESSOR_RANDOM_FAIL_PROBABILITY, 0.0f);
+      LOG.info("doRandomFail: " + doRandomFail);
+      LOG.info("randomFailProbability: " + randomFailProbability);
     }
   }
 
@@ -179,21 +197,48 @@
 
     Thread.sleep(sleepMs);
     
-    if (doFail) {
-      if (
-          (failingTaskIndices.contains(failAll) ||
-          failingTaskIndices.contains(getContext().getTaskIndex())) &&
-          (failingTaskAttemptUpto == failAll.intValue() || 
-           failingTaskAttemptUpto >= getContext().getTaskAttemptNumber())) {
-        String msg = "FailingProcessor: " + getContext().getUniqueIdentifier() + 
+    if (!doRandomFail) {
+      // not random fail
+      if (doFail) {
+        if (
+            (failingTaskIndices.contains(failAll) ||
+            failingTaskIndices.contains(getContext().getTaskIndex())) &&
+            (failingTaskAttemptUpto == failAll.intValue() || 
+             failingTaskAttemptUpto >= getContext().getTaskAttemptNumber())) {
+          String msg = "FailingProcessor: " + getContext().getUniqueIdentifier() + 
+              " dag: " + getContext().getDAGName() +
+              " taskIndex: " + getContext().getTaskIndex() +
+              " taskAttempt: " + getContext().getTaskAttemptNumber();
+          LOG.info(msg);
+          throwException(msg);
+        }
+      }
+    } else {
+      // random fail
+      // If task attempt number is below limit, try to randomly fail the attempt.
+      int taskAttemptNumber = getContext().getTaskAttemptNumber();
+      int maxFailedAttempt = conf.getInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 
+                                     TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS_DEFAULT);
+      if (taskAttemptNumber < maxFailedAttempt - 1) {
+        float rollNumber = (float) Math.random();
+        String msg = "FailingProcessor random fail turned on." + 
+            " Do a roll: " + getContext().getUniqueIdentifier() + 
             " dag: " + getContext().getDAGName() +
             " taskIndex: " + getContext().getTaskIndex() +
-            " taskAttempt: " + getContext().getTaskAttemptNumber();
+            " taskAttempt: " + taskAttemptNumber +
+            " maxFailedAttempt: " + maxFailedAttempt +
+            " rollNumber: " + rollNumber + 
+            " randomFailProbability " + randomFailProbability;
         LOG.info(msg);
-        throwException(msg);
+        if (rollNumber < randomFailProbability) {
+          // fail the attempt
+          msg = "FailingProcessor: rollNumber < randomFailProbability. Do fail.";
+          LOG.info(msg);
+          throwException(msg);
+        }
       }
     }
-    
+      
     if (inputs.entrySet().size() > 0) {
         String msg = "Reading input of current FailingProcessor: " + getContext().getUniqueIdentifier() + 
             " dag: " + getContext().getDAGName() +
diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java
index 58b9413..ab2fef8 100644
--- a/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java
+++ b/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java
@@ -121,6 +121,7 @@
         String payload = new String(getContext().getUserPayload().deepCopyAsArray());
         int successAttemptId = Integer.valueOf(payload);
         LOG.info("Checking whether to crash AM or schedule tasks"
+            + ", vertex: " + getContext().getVertexName()
             + ", successfulAttemptID=" + successAttemptId
             + ", currentAttempt=" + getContext().getDAGAttemptNumber());
         if (successAttemptId > getContext().getDAGAttemptNumber()) {
diff --git a/tez-tools/tez-tfile-parser/pom.xml b/tez-tools/tez-tfile-parser/pom.xml
index 18f55d0..38a954e 100644
--- a/tez-tools/tez-tfile-parser/pom.xml
+++ b/tez-tools/tez-tfile-parser/pom.xml
@@ -28,11 +28,6 @@
 
     <dependencies>
         <dependency>
-            <groupId>org.xerial.snappy</groupId>
-            <artifactId>snappy-java</artifactId>
-            <version>1.1.1.3</version>
-        </dependency>
-        <dependency>
             <groupId>org.apache.pig</groupId>
             <artifactId>pig</artifactId>
             <version>0.13.0</version>