blob: 9931a04a60e54e1262d4b2019e1d228703238dac [file] [log] [blame]
<noautolink>
[[index][::Go back to Oozie Documentation Index::]]
-----
---+!! Oozie DistCp Action Extension
%TOC%
---++ DistCp Action
The =DistCp= action uses Hadoop distributed copy to copy files from one cluster to another or within the same cluster.
*IMPORTANT:* The DistCp action may not work properly with all configurations (secure, insecure) in all versions of Hadoop.
Both Hadoop clusters have to be configured with proxyuser for the Oozie process as explained
[[DG_QuickStart#HadoopProxyUser][here]] on the Quick Start page.
*Syntax:*
<verbatim>
<workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.4">
...
<action name="[NODE-NAME]">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode1}</name-node>
<arg>${nameNode1}/path/to/input.txt</arg>
<arg>${nameNode2}/path/to/output.txt</arg>
</distcp>
<ok to="[NODE-NAME]"/>
<error to="[NODE-NAME]"/>
</action>
...
</workflow-app>
</verbatim>
The first =arg= indicates the input and the second =arg= indicates the output. In the above example, the input is on =namenode1=
and the output is on =namenode2=.
*IMPORTANT:* If using the DistCp action between 2 secure clusters, the following property must be added to the =configuration= of
the action:
<verbatim>
<property>
<name>oozie.launcher.mapreduce.job.hdfs-servers</name>
<value>${nameNode1},${nameNode2}</value>
</property>
</verbatim>
---++ Appendix, DistCp XML-Schema
---+++ AE.A Appendix A, DistCp XML-Schema
---++++ DistCp Action Schema Version 0.2
<verbatim>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:distcp="uri:oozie:distcp-action:0.2" elementFormDefault="qualified"
targetNamespace="uri:oozie:distcp-action:0.2">
.
<xs:element name="distcp" type="distcp:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="prepare" type="distcp:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="configuration" type="distcp:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:element name="java-opts" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="arg" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="distcp:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="distcp:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>
</verbatim>
---++++ DistCp Action Schema Version 0.1
<verbatim>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:distcp="uri:oozie:distcp-action:0.1" elementFormDefault="qualified"
targetNamespace="uri:oozie:distcp-action:0.1">
.
<xs:element name="distcp" type="distcp:ACTION"/>
.
<xs:complexType name="ACTION">
<xs:sequence>
<xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="prepare" type="distcp:PREPARE" minOccurs="0" maxOccurs="1"/>
<xs:element name="configuration" type="distcp:CONFIGURATION" minOccurs="0" maxOccurs="1"/>
<xs:element name="java-opts" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="arg" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="CONFIGURATION">
<xs:sequence>
<xs:element name="property" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/>
<xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="PREPARE">
<xs:sequence>
<xs:element name="delete" type="distcp:DELETE" minOccurs="0" maxOccurs="unbounded"/>
<xs:element name="mkdir" type="distcp:MKDIR" minOccurs="0" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
.
<xs:complexType name="DELETE">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
<xs:complexType name="MKDIR">
<xs:attribute name="path" type="xs:string" use="required"/>
</xs:complexType>
.
</xs:schema>
</verbatim>
[[index][::Go back to Oozie Documentation Index::]]
</noautolink>