blob: 2d64716b37fd19cb3be1c7a0946a92166e688f5e [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs=""
targetNamespace="uri:falcon:process:0.1" xmlns="uri:falcon:process:0.1"
xmlns:jaxb="" jaxb:version="2.1">
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version
2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
<jaxb:package name="org.apache.falcon.entity.v0.process"/>
<xs:element name="process" type="process">
A process defines configuration for the workflow job like
frequency of the workflow, inputs and outputs for the workflow, how to
handle workflow failures, how to handle data that comes late and so on.
<xs:complexType name="process">
<xs:element type="KEY_VALUE_PAIR" name="tags" minOccurs="0">
tags: a process specifies an optional list of comma separated tags,
Key Value Pairs, separated by comma,
which is used for classification of processes.
Example:,, department=forecasting
<xs:element type="PIPELINE_LIST" name="pipelines" minOccurs="0" maxOccurs="1">
pipelines: a process specifies an optional list of comma separated pipelines,
separated by comma, which is used for classification of processes.
Example: dataReplicationPipeline, clickStreamPipeline
<xs:element type="clusters" name="clusters">
<xs:documentation>Defines the clusters where the workflow should run
<xs:element name="parallel">
<xs:documentation>Defines how many workflow instances can run concurrently
<xs:restriction base="xs:unsignedShort">
<xs:minInclusive value="1"/>
<xs:maxInclusive value="12"/>
<xs:element type="execution-type" name="order">
<xs:documentation>Defines the order in which ready workflow instances should run
<xs:element type="frequency-type" name="timeout" minOccurs="0">
<xs:documentation>Defines time after which instances will no longer be executed
<xs:element type="frequency-type" name="frequency">
<xs:documentation>Defines workflow frequency
<xs:element type="sla" name="sla" minOccurs="0">
Defines SLA(Service Level Agreement) for process.
<xs:element name="timezone" minOccurs="0" default="UTC">
<jaxb:javaType name="java.util.TimeZone" parseMethod="java.util.TimeZone.getTimeZone"
<xs:restriction base="xs:string"/>
<xs:element type="inputs" name="inputs" minOccurs="0">
<xs:documentation>Defines inputs for the workflow. The workflow will run only when the scheduled
time is up and all the inputs are available
<xs:element type="outputs" name="outputs" minOccurs="0">
<xs:documentation>Defines outputs of the workflow
<xs:element type="properties" name="properties" minOccurs="0">
<xs:element type="workflow" name="workflow">
<xs:documentation>Defines the workflow that should run. The workflow should be defined with respect
to the workflow specification of the workflow engine.
oozie workflow engine is supported as of now. The workflow path is the path on hdfs which
contains the workflow xml
<xs:element type="retry" name="retry" minOccurs="0">
<xs:documentation>Retry defines how to handle workflow failures. Policy type - backoff, exponention
backoff along with the delay define how frequenctly
workflow should be re-tried. Number of attempts define how many times to re-try the failures.
<xs:element type="late-process" name="late-process" minOccurs="0">
<xs:documentation>Late process defines how the late data should be handled. Late policy - backoff,
exponential backoff, final along with delay
define how
frequently Falcon should check for late data. The late data handling can be customized for each
input separatly.
<xs:element type="ACL" name="ACL" minOccurs="0"/>
<xs:attribute type="IDENTIFIER" name="name" use="required"/>
<xs:simpleType name="IDENTIFIER">
<xs:restriction base="xs:string">
<xs:pattern value="(([a-zA-Z]([\-a-zA-Z0-9])*){1,39})"/>
<xs:complexType name="clusters">
A list of clusters.
<xs:element type="cluster" name="cluster" maxOccurs="unbounded" minOccurs="1">
<xs:complexType name="cluster">
Defines the cluster where the workflow should run. In addition, it also defines the validity of the
workflow on this cluster
<xs:element type="validity" name="validity"/>
<xs:attribute type="IDENTIFIER" name="name" use="required"/>
<xs:complexType name="validity">
Defines the vailidity of the workflow as start and end time
<xs:attribute type="date-time-type" name="start" use="required"/>
<xs:attribute type="date-time-type" name="end" use="required"/>
<xs:simpleType name="date-time-type">
<jaxb:javaType name="java.util.Date" parseMethod="org.apache.falcon.entity.v0.SchemaHelper.parseDateUTC"
<xs:restriction base="xs:string">
value="((1|2)\d\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])T([0-1][0-9]|2[0-3]):([0-5][0-9]))Z"/>
<xs:simpleType name="execution-type">
<xs:restriction base="xs:string">
<xs:enumeration value="FIFO"/>
<xs:enumeration value="LIFO"/>
<xs:enumeration value="LAST_ONLY"/>
<xs:simpleType name="frequency-type">
<jaxb:javaType name="org.apache.falcon.entity.v0.Frequency"
<xs:restriction base="xs:string">
<xs:pattern value="(minutes|hours|days|months)\([1-9]\d*\)"/>
<xs:complexType name="sla">
sla has 2 optional attributes - shouldStartIn and shouldEndIn. All the attributes
are written using expressions like frequency. shouldStartIn is the time in which the process should have
started. shouldEndIn is the time in which the process should have finished.
<xs:attribute type="frequency-type" name="shouldStartIn"/>
<xs:attribute type="frequency-type" name="shouldEndIn" />
<xs:complexType name="inputs">
<xs:element type="input" name="input" maxOccurs="unbounded" minOccurs="1">
<xs:documentation>Defines input for the workflow. Each input maps to a feed. Input path and
frequency are picked from feed definition.
The input specifies the
start and end instance for the workflow. Falcon creates a property with input name which
contains paths of all input
instances between start and end. This
property will be available for the workflow to read inputs.
Input can also optionally specify the specific partition of feed that the workflow needs.
<xs:complexType name="input">
<xs:attribute type="IDENTIFIER" name="name" use="required"/>
<xs:attribute type="IDENTIFIER" name="feed" use="required"/>
<xs:attribute type="xs:string" name="start" use="required"/>
<xs:attribute type="xs:string" name="end" use="required"/>
<xs:attribute type="xs:string" name="partition" use="optional"/>
<xs:attribute type="xs:boolean" name="optional" use="optional" default="false"/>
<xs:complexType name="outputs">
<xs:element type="output" name="output" maxOccurs="unbounded" minOccurs="1">
<xs:documentation>Each output maps to a feed. The Output path and frequency are picked from the
corresponding feed definition.
The output also specifies the
instance that is created in terms of EL expression.
For each output, Falcon creates a property with the output name which can be used in workflows
<xs:complexType name="output">
<xs:attribute type="IDENTIFIER" name="name" use="required"/>
<xs:attribute type="IDENTIFIER" name="feed" use="required"/>
<xs:attribute type="xs:string" name="instance" use="required"/>
<xs:complexType name="workflow">
<xs:attribute type="xs:string" name="name" use="optional"/>
<xs:attribute type="xs:string" name="version" use="optional" default="1.0"/>
<xs:attribute type="engine-type" name="engine" use="optional" default="oozie"/>
<xs:attribute type="xs:string" name="path" use="required"/>
<xs:attribute type="xs:string" name="lib" use="optional"/>
<xs:simpleType name="engine-type">
<xs:restriction base="xs:string">
<xs:enumeration value="oozie"/>
<xs:enumeration value="pig"/>
<xs:enumeration value="hive"/>
<xs:complexType name="retry">
<xs:attribute type="policy-type" name="policy" use="required"/>
<xs:attribute type="frequency-type" name="delay" use="required"/>
<xs:attribute name="attempts" use="required">
<xs:restriction base="xs:unsignedShort">
<xs:minInclusive value="1"/>
<xs:simpleType name="policy-type">
<xs:restriction base="xs:string">
<xs:enumeration value="periodic"/>
<xs:enumeration value="exp-backoff"/>
<xs:enumeration value="final"/>
<xs:complexType name="late-process">
<xs:element type="late-input" name="late-input" maxOccurs="unbounded" minOccurs="1">
For each input, defines the workflow that should be run when late data is detected
<xs:attribute type="policy-type" name="policy" use="required"/>
<xs:attribute type="frequency-type" name="delay" use="required"/>
<xs:complexType name="late-input">
<xs:attribute type="IDENTIFIER" name="input" use="required"/>
<xs:attribute type="xs:string" name="workflow-path" use="required"/>
<xs:complexType name="properties">
<xs:element type="property" name="property" maxOccurs="unbounded" minOccurs="0"/>
<xs:complexType name="property">
<xs:attribute type="xs:string" name="name" use="required"/>
<xs:attribute type="xs:string" name="value" use="required"/>
<xs:simpleType name="KEY_VALUE_PAIR">
<xs:restriction base="xs:string">
<xs:pattern value="(\w+=[^,]+)?([,]?[ ]*[\w]+=[^,]+)*"/>
<xs:simpleType name="PIPELINE_LIST">
<xs:restriction base="xs:string">
<xs:pattern value="([\w+_]+)([,]?[ ]*([\w+_]+))*"/>
<xs:complexType name="ACL">
Access control list for this process.
owner is the Owner of this entity.
group is the one which has access to read - not used at this time.
permission is not enforced at this time
<xs:attribute type="xs:string" name="owner"/>
<xs:attribute type="xs:string" name="group"/>
<xs:attribute type="xs:string" name="permission" default="*"/>