| <noautolink> |
| |
| [[index][::Go back to Oozie Documentation Index::]] |
| |
| ----- |
| |
| ---+!! Oozie DistCp Action Extension |
| |
| %TOC% |
| |
| ---++ DistCp Action |
| |
| The =DistCp= action uses Hadoop distributed copy to copy files from one cluster to another or within the same cluster. |
| |
| *IMPORTANT:* The DistCp action may not work properly with all configurations (secure, insecure) in all versions of Hadoop. |
| |
| Both Hadoop clusters have to be configured with proxyuser for the Oozie process as explained |
| [[DG_QuickStart#HadoopProxyUser][here]] on the Quick Start page. |
| |
| *Syntax:* |
| |
| <verbatim> |
| <workflow-app name="[WF-DEF-NAME]" xmlns="uri:oozie:workflow:0.4"> |
| ... |
| <action name="[NODE-NAME]"> |
| <distcp xmlns="uri:oozie:distcp-action:0.2"> |
| <job-tracker>${jobTracker}</job-tracker> |
| <name-node>${nameNode1}</name-node> |
| <arg>${nameNode1}/path/to/input.txt</arg> |
| <arg>${nameNode2}/path/to/output.txt</arg> |
| </distcp> |
| <ok to="[NODE-NAME]"/> |
| <error to="[NODE-NAME]"/> |
| </action> |
| ... |
| </workflow-app> |
| </verbatim> |
| |
| The first =arg= indicates the input and the second =arg= indicates the output. In the above example, the input is on =namenode1= |
| and the output is on =namenode2=. |
| |
| *IMPORTANT:* If using the DistCp action between 2 secure clusters, the following property must be added to the =configuration= of |
| the action: |
| <verbatim> |
| <property> |
| <name>oozie.launcher.mapreduce.job.hdfs-servers</name> |
| <value>${nameNode1},${nameNode2}</value> |
| </property> |
| </verbatim> |
| |
| ---++ Appendix, DistCp XML-Schema |
| |
| ---+++ AE.A Appendix A, DistCp XML-Schema |
| |
| ---++++ DistCp Action Schema Version 0.2 |
| <verbatim> |
| <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" |
| xmlns:distcp="uri:oozie:distcp-action:0.2" elementFormDefault="qualified" |
| targetNamespace="uri:oozie:distcp-action:0.2"> |
| . |
| <xs:element name="distcp" type="distcp:ACTION"/> |
| . |
| <xs:complexType name="ACTION"> |
| <xs:sequence> |
| <xs:element name="job-tracker" type="xs:string" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="name-node" type="xs:string" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="prepare" type="distcp:PREPARE" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="configuration" type="distcp:CONFIGURATION" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="java-opts" type="xs:string" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="arg" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> |
| </xs:sequence> |
| </xs:complexType> |
| . |
| <xs:complexType name="CONFIGURATION"> |
| <xs:sequence> |
| <xs:element name="property" minOccurs="1" maxOccurs="unbounded"> |
| <xs:complexType> |
| <xs:sequence> |
| <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/> |
| <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/> |
| <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/> |
| </xs:sequence> |
| </xs:complexType> |
| </xs:element> |
| </xs:sequence> |
| </xs:complexType> |
| . |
| <xs:complexType name="PREPARE"> |
| <xs:sequence> |
| <xs:element name="delete" type="distcp:DELETE" minOccurs="0" maxOccurs="unbounded"/> |
| <xs:element name="mkdir" type="distcp:MKDIR" minOccurs="0" maxOccurs="unbounded"/> |
| </xs:sequence> |
| </xs:complexType> |
| . |
| <xs:complexType name="DELETE"> |
| <xs:attribute name="path" type="xs:string" use="required"/> |
| </xs:complexType> |
| . |
| <xs:complexType name="MKDIR"> |
| <xs:attribute name="path" type="xs:string" use="required"/> |
| </xs:complexType> |
| . |
| </xs:schema> |
| </verbatim> |
| |
| ---++++ DistCp Action Schema Version 0.1 |
| <verbatim> |
| <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" |
| xmlns:distcp="uri:oozie:distcp-action:0.1" elementFormDefault="qualified" |
| targetNamespace="uri:oozie:distcp-action:0.1"> |
| . |
| <xs:element name="distcp" type="distcp:ACTION"/> |
| . |
| <xs:complexType name="ACTION"> |
| <xs:sequence> |
| <xs:element name="job-tracker" type="xs:string" minOccurs="1" maxOccurs="1"/> |
| <xs:element name="name-node" type="xs:string" minOccurs="1" maxOccurs="1"/> |
| <xs:element name="prepare" type="distcp:PREPARE" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="configuration" type="distcp:CONFIGURATION" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="java-opts" type="xs:string" minOccurs="0" maxOccurs="1"/> |
| <xs:element name="arg" type="xs:string" minOccurs="0" maxOccurs="unbounded"/> |
| </xs:sequence> |
| </xs:complexType> |
| . |
| <xs:complexType name="CONFIGURATION"> |
| <xs:sequence> |
| <xs:element name="property" minOccurs="1" maxOccurs="unbounded"> |
| <xs:complexType> |
| <xs:sequence> |
| <xs:element name="name" minOccurs="1" maxOccurs="1" type="xs:string"/> |
| <xs:element name="value" minOccurs="1" maxOccurs="1" type="xs:string"/> |
| <xs:element name="description" minOccurs="0" maxOccurs="1" type="xs:string"/> |
| </xs:sequence> |
| </xs:complexType> |
| </xs:element> |
| </xs:sequence> |
| </xs:complexType> |
| . |
| <xs:complexType name="PREPARE"> |
| <xs:sequence> |
| <xs:element name="delete" type="distcp:DELETE" minOccurs="0" maxOccurs="unbounded"/> |
| <xs:element name="mkdir" type="distcp:MKDIR" minOccurs="0" maxOccurs="unbounded"/> |
| </xs:sequence> |
| </xs:complexType> |
| . |
| <xs:complexType name="DELETE"> |
| <xs:attribute name="path" type="xs:string" use="required"/> |
| </xs:complexType> |
| . |
| <xs:complexType name="MKDIR"> |
| <xs:attribute name="path" type="xs:string" use="required"/> |
| </xs:complexType> |
| . |
| </xs:schema> |
| </verbatim> |
| |
| [[index][::Go back to Oozie Documentation Index::]] |
| |
| </noautolink> |