blob: 43e697bec81d3a9c23f4f5e87c0ccb4d132de82e [file] [log] [blame]
<!DOCTYPE html>
<!--
| Generated by Apache Maven Doxia at 2018-03-12
| Rendered using Apache Maven Fluido Skin 1.3.0
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="Date-Revision-yyyymmdd" content="20180312" />
<meta http-equiv="Content-Language" content="en" />
<title>Falcon - Contents</title>
<link rel="stylesheet" href="./css/apache-maven-fluido-1.3.0.min.css" />
<link rel="stylesheet" href="./css/site.css" />
<link rel="stylesheet" href="./css/print.css" media="print" />
<script type="text/javascript" src="./js/apache-maven-fluido-1.3.0.min.js"></script>
<script type="text/javascript">$( document ).ready( function() { $( '.carousel' ).carousel( { interval: 3500 } ) } );</script>
</head>
<body class="topBarDisabled">
<div class="container-fluid">
<div id="banner">
<div class="pull-left">
<a href="../index.html" id="bannerLeft">
<img src="images/falcon-logo.png" alt="Apache Falcon" width="200px" height="45px"/>
</a>
</div>
<div class="pull-right"> <a href="http://www.apache.org" id="bannerRight">
<img src="images/apache-feather-tm.gif" alt="Falcon" height="45px"/>
</a>
</div>
<div class="clear"><hr/></div>
</div>
<div id="breadcrumbs">
<ul class="breadcrumb">
<li class="">
<a href="http://www.apache.org" class="externalLink" title="Apache">
Apache</a>
</li>
<li class="divider ">/</li>
<li class="">
<a href="index.html" title="Falcon">
Falcon</a>
</li>
<li class="divider ">/</li>
<li class="">Contents</li>
<li id="publishDate" class="pull-right">Last Published: 2018-03-12</li>
</ul>
</div>
<div class="row-fluid">
<div id="leftColumn" class="span3">
<div class="well sidebar-nav">
<ul class="nav nav-list">
<li class="nav-header">Falcon</li>
<li>
<a href="index.html" title="About">
<i class="none"></i>
About</a>
</li>
<li>
<a href="slides/falcon-overview.html" title="Overview">
<i class="none"></i>
Overview</a>
</li>
<li>
<a href="slides/falcon-user-guide.html" title="User Guide">
<i class="none"></i>
User Guide</a>
</li>
<li>
<a href="GettingStarted.html" title="Getting Started">
<i class="none"></i>
Getting Started</a>
</li>
<li>
<a href="FalconDocumentation.html" title="Architecture">
<i class="none"></i>
Architecture</a>
</li>
<li>
<a href="InstallationSteps.html" title="Installation">
<i class="none"></i>
Installation</a>
</li>
<li class="active">
<a href="#"><i class="none"></i>On Boarding</a>
</li>
<li>
<a href="MigrationInstructions.html" title="Migrate to 0.10">
<i class="none"></i>
Migrate to 0.10</a>
</li>
<li>
<a href="Operability.html" title="Operability">
<i class="none"></i>
Operability</a>
</li>
<li>
<a href="EntitySpecification.html" title="Entity Specification">
<i class="none"></i>
Entity Specification</a>
</li>
<li>
<a href="falconcli/FalconCLI.html" title="Client (Falcon CLI)">
<i class="none"></i>
Client (Falcon CLI)</a>
</li>
<li>
<a href="restapi/ResourceList.html" title="Rest API">
<i class="icon-chevron-right"></i>
Rest API</a>
</li>
<li>
<a href="HiveIntegration.html" title="Hive Integration">
<i class="none"></i>
Hive Integration</a>
</li>
<li>
<a href="Extensions.html" title="Server side Extensions">
<i class="none"></i>
Server side Extensions</a>
</li>
<li>
<a href="Security.html" title="Security">
<i class="none"></i>
Security</a>
</li>
<li class="nav-header">Project Information</li>
<li>
<a href="project-info.html" title="Summary">
<i class="none"></i>
Summary</a>
</li>
<li>
<a href="mail-lists.html" title="Mailing Lists">
<i class="none"></i>
Mailing Lists</a>
</li>
<li>
<a href="http://webchat.freenode.net?channels=apachefalcon&uio=d4" class="externalLink" title="IRC">
<i class="none"></i>
IRC</a>
</li>
<li>
<a href="team-list.html" title="Team">
<i class="none"></i>
Team</a>
</li>
<li>
<a href="issue-tracking.html" title="Issue Tracking">
<i class="none"></i>
Issue Tracking</a>
</li>
<li>
<a href="source-repository.html" title="Source Repository">
<i class="none"></i>
Source Repository</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/Index" class="externalLink" title="Wiki">
<i class="none"></i>
Wiki</a>
</li>
<li>
<a href="license.html" title="License">
<i class="none"></i>
License</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/News" class="externalLink" title="News">
<i class="none"></i>
News</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/PoweredBy" class="externalLink" title="Powered by">
<i class="none"></i>
Powered by</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/Acknowledgements" class="externalLink" title="Acknowledgements">
<i class="none"></i>
Acknowledgements</a>
</li>
<li>
<a href="http://blogs.apache.org/falcon/" class="externalLink" title="Blog">
<i class="none"></i>
Blog</a>
</li>
<li class="nav-header">Releases</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.11" class="externalLink" title="0.11">
<i class="none"></i>
0.11</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.10" class="externalLink" title="0.10">
<i class="none"></i>
0.10</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.9" class="externalLink" title="0.9">
<i class="none"></i>
0.9</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.8" class="externalLink" title="0.8">
<i class="none"></i>
0.8</a>
</li>
<li>
<a href="http://www.apache.org/dyn/closer.lua/falcon/0.7" class="externalLink" title="0.7">
<i class="none"></i>
0.7</a>
</li>
<li>
<a href="http://archive.apache.org/dist/falcon/0.6.1" class="externalLink" title="0.6.1">
<i class="none"></i>
0.6.1</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.6-incubating" class="externalLink" title="0.6-incubating">
<i class="none"></i>
0.6-incubating</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.5-incubating" class="externalLink" title="0.5-incubating">
<i class="none"></i>
0.5-incubating</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.4-incubating" class="externalLink" title="0.4-incubating">
<i class="none"></i>
0.4-incubating</a>
</li>
<li>
<a href="http://archive.apache.org/dist/incubator/falcon/0.3-incubating" class="externalLink" title="0.3-incubating">
<i class="none"></i>
0.3-incubating</a>
</li>
<li>
<a href="https://cwiki.apache.org/confluence/display/FALCON/Roadmap" class="externalLink" title="Coming soon">
<i class="none"></i>
Coming soon</a>
</li>
<li class="nav-header">Documentation</li>
<li>
<a href="0.11/index.html" title="0.11 (Current)">
<i class="none"></i>
0.11 (Current)</a>
</li>
<li>
<a href="0.10/index.html" title="0.10">
<i class="none"></i>
0.10</a>
</li>
<li>
<a href="0.9/index.html" title="0.9">
<i class="none"></i>
0.9</a>
</li>
<li>
<a href="0.8/index.html" title="0.8">
<i class="none"></i>
0.8</a>
</li>
<li>
<a href="0.7/index.html" title="0.7">
<i class="none"></i>
0.7</a>
</li>
<li>
<a href="0.6.1/index.html" title="0.6.1">
<i class="none"></i>
0.6.1</a>
</li>
<li>
<a href="0.6-incubating/index.html" title="0.6-incubating">
<i class="none"></i>
0.6-incubating</a>
</li>
<li>
<a href="0.5-incubating/index.html" title="0.5-incubating">
<i class="none"></i>
0.5-incubating</a>
</li>
<li>
<a href="0.4-incubating/index.html" title="0.4-incubating">
<i class="none"></i>
0.4-incubating</a>
</li>
<li>
<a href="0.3-incubating/index.html" title="0.3-incubating">
<i class="none"></i>
0.3-incubating</a>
</li>
<li class="nav-header">ASF</li>
<li>
<a href="http://www.apache.org/foundation/how-it-works.html" class="externalLink" title="How Apache Works">
<i class="none"></i>
How Apache Works</a>
</li>
<li>
<a href="http://www.apache.org/foundation/" class="externalLink" title="Foundation">
<i class="none"></i>
Foundation</a>
</li>
<li>
<a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsoring Apache">
<i class="none"></i>
Sponsoring Apache</a>
</li>
<li>
<a href="http://www.apache.org/foundation/thanks.html" class="externalLink" title="Thanks">
<i class="none"></i>
Thanks</a>
</li>
</ul>
<hr class="divider" />
<div id="poweredBy">
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png" />
</a>
</div>
</div>
</div>
<div id="bodyColumn" class="span9" >
<div class="section">
<h3>Contents<a name="Contents"></a></h3>
<p></p>
<ul>
<li><a href="#Onboarding Steps">Onboarding Steps</a></li>
<li><a href="#Sample Pipeline">Sample Pipeline</a></li>
<li><a href="./HiveIntegration.html">Hive Examples</a></li></ul></div>
<div class="section">
<h4>Onboarding Steps<a name="Onboarding_Steps"></a></h4>
<p></p>
<ul>
<li>Create cluster definition for the cluster, specifying name node, job tracker, workflow engine endpoint, messaging endpoint. Refer to <a href="./EntitySpecification.html">cluster definition</a> for details.</li>
<li>Create Feed definitions for each of the input and output specifying frequency, data path, ownership. Refer to <a href="./EntitySpecification.html">feed definition</a> for details.</li>
<li>Create Process definition for your job. Process defines configuration for the workflow job. Important attributes are frequency, inputs/outputs and workflow path. Refer to <a href="./EntitySpecification.html">process definition</a> for process details.</li>
<li>Define workflow for your job using the workflow engine(only oozie is supported as of now). Refer <a class="externalLink" href="http://oozie.apache.org/docs/3.1.3-incubating/WorkflowFunctionalSpec.html">Oozie Workflow Specification</a>. The libraries required for the workflow should be available in lib folder in workflow path.</li>
<li>Set-up workflow definition, libraries and referenced scripts on hadoop.</li>
<li>Submit cluster definition</li>
<li>Submit and schedule feed and process definitions</li></ul></div>
<div class="section">
<h4>Sample Pipeline<a name="Sample_Pipeline"></a></h4></div>
<div class="section">
<h5>Cluster <a name="Cluster"></a></h5>
<p>Cluster definition that contains end points for name node, job tracker, oozie and jms server: The cluster locations MUST be created prior to submitting a cluster entity to Falcon. <b>staging</b> must have 777 permissions and the parent dirs must have execute permissions <b>working</b> must have 755 permissions and the parent dirs must have execute permissions</p>
<div class="source">
<pre>
&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;!--
Cluster configuration
--&gt;
&lt;cluster colo=&quot;ua2&quot; description=&quot;&quot; name=&quot;corp&quot; xmlns=&quot;uri:falcon:cluster:0.1&quot;
xmlns:xsi=&quot;http://www.w3.org/2001/XMLSchema-instance&quot;&gt;
&lt;interfaces&gt;
&lt;interface type=&quot;readonly&quot; endpoint=&quot;hftp://name-node.com:50070&quot; version=&quot;2.5.0&quot; /&gt;
&lt;interface type=&quot;write&quot; endpoint=&quot;hdfs://name-node.com:54310&quot; version=&quot;2.5.0&quot; /&gt;
&lt;interface type=&quot;execute&quot; endpoint=&quot;job-tracker:54311&quot; version=&quot;2.5.0&quot; /&gt;
&lt;interface type=&quot;workflow&quot; endpoint=&quot;http://oozie.com:11000/oozie/&quot; version=&quot;4.0.1&quot; /&gt;
&lt;interface type=&quot;messaging&quot; endpoint=&quot;tcp://jms-server.com:61616?daemon=true&quot; version=&quot;5.1.6&quot; /&gt;
&lt;/interfaces&gt;
&lt;locations&gt;
&lt;location name=&quot;staging&quot; path=&quot;/projects/falcon/staging&quot; /&gt;
&lt;location name=&quot;temp&quot; path=&quot;/tmp&quot; /&gt;
&lt;location name=&quot;working&quot; path=&quot;/projects/falcon/working&quot; /&gt;
&lt;/locations&gt;
&lt;/cluster&gt;
</pre></div></div>
<div class="section">
<h5>Input Feed<a name="Input_Feed"></a></h5>
<p>Hourly feed that defines feed path, frequency, ownership and validity:</p>
<div class="source">
<pre>
&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
&lt;!--
Hourly sample input data
--&gt;
&lt;feed description=&quot;sample input data&quot; name=&quot;SampleInput&quot; xmlns=&quot;uri:falcon:feed:0.1&quot;
xmlns:xsi=&quot;http://www.w3.org/2001/XMLSchema-instance&quot;&gt;
&lt;groups&gt;group&lt;/groups&gt;
&lt;frequency&gt;hours(1)&lt;/frequency&gt;
&lt;late-arrival cut-off=&quot;hours(6)&quot; /&gt;
&lt;clusters&gt;
&lt;cluster name=&quot;corp&quot; type=&quot;source&quot;&gt;
&lt;validity start=&quot;2009-01-01T00:00Z&quot; end=&quot;2099-12-31T00:00Z&quot; timezone=&quot;UTC&quot; /&gt;
&lt;retention limit=&quot;months(24)&quot; action=&quot;delete&quot; /&gt;
&lt;/cluster&gt;
&lt;/clusters&gt;
&lt;locations&gt;
&lt;location type=&quot;data&quot; path=&quot;/projects/bootcamp/data/${YEAR}-${MONTH}-${DAY}-${HOUR}/SampleInput&quot; /&gt;
&lt;location type=&quot;stats&quot; path=&quot;/projects/bootcamp/stats/SampleInput&quot; /&gt;
&lt;location type=&quot;meta&quot; path=&quot;/projects/bootcamp/meta/SampleInput&quot; /&gt;
&lt;/locations&gt;
&lt;ACL owner=&quot;suser&quot; group=&quot;users&quot; permission=&quot;0755&quot; /&gt;
&lt;schema location=&quot;/none&quot; provider=&quot;none&quot; /&gt;
&lt;/feed&gt;
</pre></div></div>
<div class="section">
<h5>Output Feed<a name="Output_Feed"></a></h5>
<p>Daily feed that defines feed path, frequency, ownership and validity:</p>
<div class="source">
<pre>
&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
&lt;!--
Daily sample output data
--&gt;
&lt;feed description=&quot;sample output data&quot; name=&quot;SampleOutput&quot; xmlns=&quot;uri:falcon:feed:0.1&quot;
xmlns:xsi=&quot;http://www.w3.org/2001/XMLSchema-instance&quot;&gt;
&lt;groups&gt;group&lt;/groups&gt;
&lt;frequency&gt;days(1)&lt;/frequency&gt;
&lt;late-arrival cut-off=&quot;hours(6)&quot; /&gt;
&lt;clusters&gt;
&lt;cluster name=&quot;corp&quot; type=&quot;source&quot;&gt;
&lt;validity start=&quot;2009-01-01T00:00Z&quot; end=&quot;2099-12-31T00:00Z&quot; timezone=&quot;UTC&quot; /&gt;
&lt;retention limit=&quot;months(24)&quot; action=&quot;delete&quot; /&gt;
&lt;/cluster&gt;
&lt;/clusters&gt;
&lt;locations&gt;
&lt;location type=&quot;data&quot; path=&quot;/projects/bootcamp/output/${YEAR}-${MONTH}-${DAY}/SampleOutput&quot; /&gt;
&lt;location type=&quot;stats&quot; path=&quot;/projects/bootcamp/stats/SampleOutput&quot; /&gt;
&lt;location type=&quot;meta&quot; path=&quot;/projects/bootcamp/meta/SampleOutput&quot; /&gt;
&lt;/locations&gt;
&lt;ACL owner=&quot;suser&quot; group=&quot;users&quot; permission=&quot;0755&quot; /&gt;
&lt;schema location=&quot;/none&quot; provider=&quot;none&quot; /&gt;
&lt;/feed&gt;
</pre></div></div>
<div class="section">
<h5>Process<a name="Process"></a></h5>
<p>Sample process which runs daily at 6th hour on corp cluster. It takes one input - SampleInput for the previous day(24 instances). It generates one output - SampleOutput for previous day. The workflow is defined at /projects/bootcamp/workflow/workflow.xml. Any libraries available for the workflow should be at /projects/bootcamp/workflow/lib. The process also defines properties queueName, ssh.host, and fileTimestamp which are passed to the workflow. In addition, Falcon exposes the following properties to the workflow: nameNode, jobTracker(hadoop properties), input and output(Input/Output properties).</p>
<div class="source">
<pre>
&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
&lt;!--
Daily sample process. Runs at 6th hour every day. Input - last day's hourly data. Generates output for yesterday
--&gt;
&lt;process name=&quot;SampleProcess&quot;&gt;
&lt;cluster name=&quot;corp&quot; /&gt;
&lt;frequency&gt;days(1)&lt;/frequency&gt;
&lt;validity start=&quot;2012-04-03T06:00Z&quot; end=&quot;2022-12-30T00:00Z&quot; timezone=&quot;UTC&quot; /&gt;
&lt;inputs&gt;
&lt;input name=&quot;input&quot; feed=&quot;SampleInput&quot; start=&quot;yesterday(0,0)&quot; end=&quot;today(-1,0)&quot; /&gt;
&lt;/inputs&gt;
&lt;outputs&gt;
&lt;output name=&quot;output&quot; feed=&quot;SampleOutput&quot; instance=&quot;yesterday(0,0)&quot; /&gt;
&lt;/outputs&gt;
&lt;properties&gt;
&lt;property name=&quot;queueName&quot; value=&quot;reports&quot; /&gt;
&lt;property name=&quot;ssh.host&quot; value=&quot;host.com&quot; /&gt;
&lt;property name=&quot;fileTimestamp&quot; value=&quot;${coord:formatTime(coord:nominalTime(), 'yyyy-MM-dd')}&quot; /&gt;
&lt;/properties&gt;
&lt;workflow engine=&quot;oozie&quot; path=&quot;/projects/bootcamp/workflow&quot; /&gt;
&lt;retry policy=&quot;periodic&quot; delay=&quot;minutes(5)&quot; attempts=&quot;3&quot; /&gt;
&lt;late-process policy=&quot;exp-backoff&quot; delay=&quot;hours(1)&quot;&gt;
&lt;late-input input=&quot;input&quot; workflow-path=&quot;/projects/bootcamp/workflow/lateinput&quot; /&gt;
&lt;/late-process&gt;
&lt;/process&gt;
</pre></div></div>
<div class="section">
<h5>Oozie Workflow<a name="Oozie_Workflow"></a></h5>
<p>The sample user workflow contains 3 actions:</p>
<ul>
<li>Pig action - Executes pig script /projects/bootcamp/workflow/script.pig</li>
<li>concatenator - Java action that concatenates part files and generates a single file</li>
<li>file upload - ssh action that gets the concatenated file from hadoop and sends the file to a remote host</li></ul>
<div class="source">
<pre>
&lt;workflow-app xmlns=&quot;uri:oozie:workflow:0.2&quot; name=&quot;sample-wf&quot;&gt;
&lt;start to=&quot;pig&quot; /&gt;
&lt;action name=&quot;pig&quot;&gt;
&lt;pig&gt;
&lt;job-tracker&gt;${jobTracker}&lt;/job-tracker&gt;
&lt;name-node&gt;${nameNode}&lt;/name-node&gt;
&lt;prepare&gt;
&lt;delete path=&quot;${output}&quot;/&gt;
&lt;/prepare&gt;
&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;mapred.job.queue.name&lt;/name&gt;
&lt;value&gt;${queueName}&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;mapreduce.fileoutputcommitter.marksuccessfuljobs&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
&lt;script&gt;${nameNode}/projects/bootcamp/workflow/script.pig&lt;/script&gt;
&lt;param&gt;input=${input}&lt;/param&gt;
&lt;param&gt;output=${output}&lt;/param&gt;
&lt;file&gt;lib/dependent.jar&lt;/file&gt;
&lt;/pig&gt;
&lt;ok to=&quot;concatenator&quot; /&gt;
&lt;error to=&quot;fail&quot; /&gt;
&lt;/action&gt;
&lt;action name=&quot;concatenator&quot;&gt;
&lt;java&gt;
&lt;job-tracker&gt;${jobTracker}&lt;/job-tracker&gt;
&lt;name-node&gt;${nameNode}&lt;/name-node&gt;
&lt;prepare&gt;
&lt;delete path=&quot;${nameNode}/projects/bootcamp/concat/data-${fileTimestamp}.csv&quot;/&gt;
&lt;/prepare&gt;
&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;mapred.job.queue.name&lt;/name&gt;
&lt;value&gt;${queueName}&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
&lt;main-class&gt;com.wf.Concatenator&lt;/main-class&gt;
&lt;arg&gt;${output}&lt;/arg&gt;
&lt;arg&gt;${nameNode}/projects/bootcamp/concat/data-${fileTimestamp}.csv&lt;/arg&gt;
&lt;/java&gt;
&lt;ok to=&quot;fileupload&quot; /&gt;
&lt;error to=&quot;fail&quot;/&gt;
&lt;/action&gt;
&lt;action name=&quot;fileupload&quot;&gt;
&lt;ssh&gt;
&lt;host&gt;localhost&lt;/host&gt;
&lt;command&gt;/tmp/fileupload.sh&lt;/command&gt;
&lt;args&gt;${nameNode}/projects/bootcamp/concat/data-${fileTimestamp}.csv&lt;/args&gt;
&lt;args&gt;${wf:conf(&quot;ssh.host&quot;)}&lt;/args&gt;
&lt;capture-output/&gt;
&lt;/ssh&gt;
&lt;ok to=&quot;fileUploadDecision&quot; /&gt;
&lt;error to=&quot;fail&quot;/&gt;
&lt;/action&gt;
&lt;decision name=&quot;fileUploadDecision&quot;&gt;
&lt;switch&gt;
&lt;case to=&quot;end&quot;&gt;
${wf:actionData('fileupload')['output'] == '0'}
&lt;/case&gt;
&lt;default to=&quot;fail&quot;/&gt;
&lt;/switch&gt;
&lt;/decision&gt;
&lt;kill name=&quot;fail&quot;&gt;
&lt;message&gt;Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/message&gt;
&lt;/kill&gt;
&lt;end name=&quot;end&quot; /&gt;
&lt;/workflow-app&gt;
</pre></div></div>
<div class="section">
<h5>File Upload Script<a name="File_Upload_Script"></a></h5>
<p>The script gets the file from hadoop, rsyncs the file to /tmp on remote host and deletes the file from hadoop</p>
<div class="source">
<pre>
#!/bin/bash
trap 'echo &quot;output=$?&quot;; exit $?' ERR INT TERM
echo &quot;Arguments: $@&quot;
SRCFILE=$1
DESTHOST=$3
FILENAME=`basename $SRCFILE`
rm -f /tmp/$FILENAME
hadoop fs -copyToLocal $SRCFILE /tmp/
echo &quot;Copied $SRCFILE to /tmp&quot;
rsync -ztv --rsh=ssh --stats /tmp/$FILENAME $DESTHOST:/tmp
echo &quot;rsynced $FILENAME to $DESTUSER@$DESTHOST:$DESTFILE&quot;
hadoop fs -rmr $SRCFILE
echo &quot;Deleted $SRCFILE&quot;
rm -f /tmp/$FILENAME
echo &quot;output=0&quot;
</pre></div></div>
</div>
</div>
</div>
<hr/>
<footer>
<div class="container-fluid">
<div class="row span12">Copyright &copy; 2013-2018
<a href="http://www.apache.org">Apache Software Foundation</a>.
All Rights Reserved.
</div>
</div>
</footer>
</body>
</html>