blob: 84e3db97daeda44d217a33f02fc826f86612c563 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="StudySettings">
<StudyTaskManager>
<option name="VERSION" value="14" />
<option name="myUserTests">
<map />
</option>
<option name="course">
<EduCourse>
<option name="authors">
<list>
<StepikUserInfo>
<option name="firstName" value="Henry" />
<option name="id" value="48485817" />
<option name="lastName" value="Suryawirawan" />
</StepikUserInfo>
</list>
</option>
<option name="compatible" value="true" />
<option name="courseMode" value="Course Creator" />
<option name="createDate" value="1557824500323" />
<option name="customPresentableName" />
<option name="description" value="This course provides a series of katas to get familiar with Apache Beam. &#10;&#10;Apache Beam website – https://beam.apache.org/" />
<option name="environment" value="" />
<option name="fromZip" value="false" />
<option name="id" value="54532" />
<option name="index" value="-1" />
<option name="instructors">
<list>
<option value="48485817" />
</list>
</option>
<option name="language" value="Python 2.7" />
<option name="languageCode" value="en" />
<option name="name" value="Beam Katas - Python" />
<option name="public" value="true" />
<option name="sectionIds">
<list />
</option>
<option name="stepikChangeStatus" value="Up to date" />
<option name="type" value="pycharm11 Python 2.7" />
<option name="updateDate" value="1560937766000" />
<option name="items">
<list>
<Section>
<option name="courseId" value="54532" />
<option name="customPresentableName" />
<option name="id" value="85644" />
<option name="index" value="1" />
<option name="name" value="Introduction" />
<option name="position" value="0" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1559325495000" />
<option name="items">
<list>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238426" />
<option name="index" value="1" />
<option name="name" value="Hello Beam" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560937886298" />
<option name="unitId" value="210886" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Hello Beam Pipeline&lt;/h2&gt;&#10;&lt;p&gt;&#10; Apache Beam is an open source, unified model for defining both batch and streaming data-parallel&#10; processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the&#10; pipeline. The pipeline is then executed by one of Beam’s supported distributed processing&#10; back-ends, which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the&#10; problem can be decomposed into many smaller bundles of data that can be processed independently&#10; and in parallel. You can also use Beam for Extract, Transform, and Load (ETL) tasks and pure data&#10; integration. These tasks are useful for moving data between different storage media and data&#10; sources, transforming data into a more desirable format, or loading data onto a new system.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; To learn more about Apache Beam, refer to&#10; &lt;a href=&quot;https://beam.apache.org/get-started/beam-overview/&quot;&gt;Apache Beam Overview&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Your first kata is to create a simple pipeline that takes a hardcoded input element&#10; &quot;Hello Beam&quot;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Hardcoded input can be created using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Create&quot;&gt;&#10; Create&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#creating-pcollection-in-memory&quot;&gt;&#10; &quot;Creating a PCollection from in-memory data&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755575" />
<option name="index" value="1" />
<option name="name" value="Hello Beam" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="903" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.Create(['Hello Beam'])" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560937891911" />
</EduTask>
</list>
</option>
</Lesson>
</list>
</option>
</Section>
<Section>
<option name="courseId" value="54532" />
<option name="customPresentableName" />
<option name="id" value="85645" />
<option name="index" value="2" />
<option name="name" value="Core Transforms" />
<option name="position" value="0" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560432551000" />
<option name="items">
<list>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238427" />
<option name="index" value="1" />
<option name="name" value="Map" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560937929994" />
<option name="unitId" value="210887" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;ParDo&lt;/h2&gt;&#10;&lt;p&gt;&#10; ParDo is a Beam transform for generic parallel processing. The ParDo processing paradigm is&#10; similar to the “Map” phase of a Map/Shuffle/Reduce-style algorithm: a ParDo transform considers&#10; each element in the input PCollection, performs some processing function (your user code) on&#10; that element, and emits zero, one, or multiple elements to an output PCollection.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Please write a simple ParDo that maps the input element by multiplying it by 10.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Override &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn.process&quot;&gt;&#10; process&lt;/a&gt; method.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo&quot;&gt;&#10; ParDo&lt;/a&gt; with&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn&quot;&gt;DoFn&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#pardo&quot;&gt;&quot;ParDo&quot;&lt;/a&gt; section for&#10; more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755577" />
<option name="index" value="1" />
<option name="name" value="ParDo" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Info and Content changed" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="919" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def process(self, element):&#10; yield element * 10" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1036" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.ParDo(MultiplyByTenDoFn())" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560937936091" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;ParDo OneToMany&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Please write a ParDo that maps each input sentence into words tokenized by&#10; whitespace (&quot; &quot;).&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Override&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn.process&quot;&gt;&#10; process&lt;/a&gt; method. You can return an Iterable for multiple elements or call &quot;yield&quot; for each&#10; element to return a generator.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo&quot;&gt;&#10; ParDo&lt;/a&gt; with&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn&quot;&gt;&#10; DoFn&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#pardo&quot;&gt;&quot;ParDo&quot;&lt;/a&gt; section for&#10; more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755578" />
<option name="index" value="2" />
<option name="name" value="ParDo OneToMany" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Info and Content changed" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="920" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def process(self, element):&#10; return element.split()" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1057" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.ParDo(BreakIntoWordsDoFn())" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560937938522" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;MapElements&lt;/h2&gt;&#10;&lt;p&gt;&#10; The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a simple map function that multiplies all input elements by 5 using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Map&quot;&gt;&#10; Map&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Map&quot;&gt;&#10; Map&lt;/a&gt; with a lambda.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#lightweight-dofns&quot;&gt;&#10; &quot;Lightweight DoFns and other abstractions&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755579" />
<option name="index" value="3" />
<option name="name" value="Map" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="942" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.Map(lambda num: num * 5)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560937942178" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;FlatMapElements&lt;/h2&gt;&#10;&lt;p&gt;&#10; The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; FlatMap can be used to simplify DoFn that maps an element to multiple elements (one to many).&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a function that maps each input sentence into words tokenized by whitespace&#10; (&quot; &quot;) using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.FlatMap&quot;&gt;&#10; FlatMap&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.FlatMap&quot;&gt;&#10; FlatMap&lt;/a&gt; with a lambda.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#lightweight-dofns&quot;&gt;&#10; &quot;Lightweight DoFns and other abstractions&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755580" />
<option name="index" value="4" />
<option name="name" value="FlatMap" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="968" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.FlatMap(lambda sentence: sentence.split())" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560937944601" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238428" />
<option name="index" value="2" />
<option name="name" value="GroupByKey" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560937980839" />
<option name="unitId" value="210888" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;GroupByKey&lt;/h2&gt;&#10;&lt;p&gt;&#10; GroupByKey is a Beam transform for processing collections of key/value pairs. It’s a parallel&#10; reduction operation, analogous to the Shuffle phase of a Map/Shuffle/Reduce-style algorithm. The&#10; input to GroupByKey is a collection of key/value pairs that represents a multimap, where the&#10; collection contains multiple pairs that have the same key, but different values. Given such a&#10; collection, you use GroupByKey to collect all of the values associated with each unique key.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.GroupByKey&quot;&gt;&#10; GroupByKey&lt;/a&gt; transform that groups words by its first letter.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.GroupByKey&quot;&gt;GroupByKey&lt;/a&gt;&#10; to solve this problem.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#groupbykey&quot;&gt;&#10; &quot;GroupByKey&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755582" />
<option name="index" value="1" />
<option name="name" value="GroupByKey" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="8" />
<option name="offset" value="970" />
<option name="placeholderDependency" />
<option name="placeholderText" value="| TODO()" />
<option name="possibleAnswer" value="| beam.Map(lambda word: (word[0], word))&#10; | beam.GroupByKey()" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560937986273" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238429" />
<option name="index" value="3" />
<option name="name" value="CoGroupByKey" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560938006360" />
<option name="unitId" value="210889" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;CoGroupByKey&lt;/h2&gt;&#10;&lt;p&gt;&#10; CoGroupByKey performs a relational join of two or more key/value PCollections that have the same&#10; key type.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.util.html#apache_beam.transforms.util.CoGroupByKey&quot;&gt;&#10; CoGroupByKey&lt;/a&gt; transform that join words by its first alphabetical letter, and then produces&#10; the string representation of the WordsAlphabet model.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.util.html#apache_beam.transforms.util.CoGroupByKey&quot;&gt;&#10; CoGroupByKey&lt;/a&gt;to solve this problem.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#cogroupbykey&quot;&gt;&#10; &quot;CoGroupByKey&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755583" />
<option name="index" value="1" />
<option name="name" value="CoGroupByKey" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1228" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def map_to_alphabet_kv(word):&#10; return (word[0], word)&#10;&#10; def cogbk_result_to_wordsalphabet(cgbk_result):&#10; (alphabet, words) = cgbk_result&#10; return WordsAlphabet(alphabet, words['fruits'][0], words['countries'][0])&#10;&#10; fruits_kv = (fruits | 'Fruit to KV' &gt;&gt; beam.Map(map_to_alphabet_kv))&#10; countries_kv = (countries | 'Country to KV' &gt;&gt; beam.Map(map_to_alphabet_kv))&#10;&#10; return ({'fruits': fruits_kv, 'countries': countries_kv}&#10; | beam.CoGroupByKey()&#10; | beam.Map(cogbk_result_to_wordsalphabet))" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938011025" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238430" />
<option name="index" value="4" />
<option name="name" value="Combine" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938016807" />
<option name="unitId" value="210890" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Combine - Simple Function&lt;/h2&gt;&#10;&lt;p&gt;&#10; Combine is a Beam transform for combining collections of elements or values in your data.&#10; When you apply a Combine transform, you must provide the function that contains the logic for&#10; combining the elements or values. The combining function should be commutative and associative,&#10; as the function is not necessarily invoked exactly once on all values with a given key. Because&#10; the input data (including the value collection) may be distributed across multiple workers, the&#10; combining function might be called multiple times to perform partial combining on subsets of&#10; the value collection.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; Simple combine operations, such as sums, can usually be implemented as a simple function.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement the summation of numbers using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineGlobally&quot;&gt;&#10; CombineGlobally&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Implement a simple Python function that performs the summation of the values.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#simple-combines&quot;&gt;&#10; &quot;Simple combinations using simple functions&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755584" />
<option name="index" value="1" />
<option name="name" value="Simple Function" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="900" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="total = 0&#10;&#10; for num in numbers:&#10; total += num&#10;&#10; return total" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1036" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.CombineGlobally(sum)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938025042" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Combine - CombineFn&lt;/h2&gt;&#10;&lt;p&gt;&#10; Combine is a Beam transform for combining collections of elements or values in your data.&#10; When you apply a Combine transform, you must provide the function that contains the logic for&#10; combining the elements or values. The combining function should be commutative and associative,&#10; as the function is not necessarily invoked exactly once on all values with a given key. Because&#10; the input data (including the value collection) may be distributed across multiple workers, the&#10; combining function might be called multiple times to perform partial combining on subsets of&#10; the value collection.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; Complex combination operations might require you to create a subclass of CombineFn that has an&#10; accumulation type distinct from the input/output type. You should use CombineFn if the combine&#10; function requires a more sophisticated accumulator, must perform additional pre- or&#10; post-processing, might change the output type, or takes the key into account.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement the average of numbers using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineFn&quot;&gt;&#10; Combine.CombineFn&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Extend the&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineFn&quot;&gt;&#10; CombineFn&lt;/a&gt; class that counts the average of the number.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#advanced-combines&quot;&gt;&#10; &quot;Advanced combinations using CombineFn&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755585" />
<option name="index" value="2" />
<option name="name" value="CombineFn" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="916" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def create_accumulator(self):&#10; return 0.0, 0&#10;&#10; def add_input(self, accumulator, element):&#10; (sum, count) = accumulator&#10; return sum + element, count + 1&#10;&#10; def merge_accumulators(self, accumulators):&#10; sums, counts = zip(*accumulators)&#10; return sum(sums), sum(counts)&#10;&#10; def extract_output(self, accumulator):&#10; (sum, count) = accumulator&#10; return sum / count if count else float('NaN')" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1420" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.CombineGlobally(AverageFn())" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938027519" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Combine - Combine PerKey&lt;/h2&gt;&#10;&lt;p&gt;&#10; After creating a keyed PCollection (for example, by using a GroupByKey transform), a common&#10; pattern is to combine the collection of values associated with each key into a single, merged&#10; value. This pattern of a GroupByKey followed by merging the collection of values is equivalent to&#10; Combine PerKey transform. The combine function you supply to Combine PerKey must be an associative&#10; reduction function or a subclass of CombineFn.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement the sum of scores per player using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombinePerKey&quot;&gt;&#10; CombinePerKey&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombinePerKey&quot;&gt;&#10; CombinePerKey(CombineFn)&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Extend the&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineFn&quot;&gt;&#10; CombineFn&lt;/a&gt; class that counts the sum of the number.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#combining-values-in-a-keyed-pcollection&quot;&gt;&#10; &quot;Combining values in a keyed PCollection&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755587" />
<option name="index" value="3" />
<option name="name" value="Combine PerKey" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1088" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.CombinePerKey(sum)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938030159" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238431" />
<option name="index" value="5" />
<option name="name" value="Flatten" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938036123" />
<option name="unitId" value="210891" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Flatten&lt;/h2&gt;&#10;&lt;p&gt;&#10; Flatten is a Beam transform for PCollection objects that store the same data type.&#10; Flatten merges multiple PCollection objects into a single logical PCollection.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Flatten&quot;&gt;&#10; Flatten&lt;/a&gt; transform that merges two PCollection of words into a single PCollection.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Flatten&quot;&gt;&#10; Flatten&lt;/a&gt; to solve this problem.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#flatten&quot;&gt;&#10; &quot;Flatten&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755588" />
<option name="index" value="1" />
<option name="name" value="Flatten" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1140" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.Flatten()" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938041998" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238432" />
<option name="index" value="6" />
<option name="name" value="Partition" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938052303" />
<option name="unitId" value="210892" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Partition&lt;/h2&gt;&#10;&lt;p&gt;&#10; Partition is a Beam transform for PCollection objects that store the same data type.&#10; Partition splits a single PCollection into a fixed number of smaller collections.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; Partition divides the elements of a PCollection according to a partitioning function&#10; that you provide. The partitioning function contains the logic that determines how to split up&#10; the elements of the input PCollection into each resulting partition PCollection.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Partition&quot;&gt;&#10; Partition&lt;/a&gt; transform that splits a PCollection of numbers into two PCollections.&#10; The first PCollection contains numbers greater than 100, and the second PCollection contains&#10; the remaining numbers.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Partition&quot;&gt;&#10; Partition&lt;/a&gt; to solve this problem.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#partition&quot;&gt;&#10; &quot;Partition&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755589" />
<option name="index" value="1" />
<option name="name" value="Partition" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="924" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="if number &gt; 100:&#10; return 0&#10; else:&#10; return 1" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1087" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.Partition(partition_fn, 2)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938058938" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238433" />
<option name="index" value="7" />
<option name="name" value="Side Input" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938065022" />
<option name="unitId" value="210893" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Side Input&lt;/h2&gt;&#10;&lt;p&gt;&#10; In addition to the main input PCollection, you can provide additional inputs to a ParDo transform&#10; in the form of side inputs. A side input is an additional input that your DoFn can access each&#10; time it processes an element in the input PCollection. When you specify a side input, you create&#10; a view of some other data that can be read from within the ParDo transform’s DoFn while&#10; processing each element.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; Side inputs are useful if your ParDo needs to inject additional data when processing each element&#10; in the input PCollection, but the additional data needs to be determined at runtime (and not&#10; hard-coded). Such values might be determined by the input data, or depend on a different branch&#10; of your pipeline.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Please enrich each Person with the country based on the city he/she lives in.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Override &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn.process&quot;&gt;&#10; process&lt;/a&gt; method that also accepts side input argument.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo&quot;&gt;&#10; ParDo&lt;/a&gt; with&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn&quot;&gt;&#10; DoFn&lt;/a&gt; that accepts side input.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#side-inputs&quot;&gt;&quot;Side inputs&quot;&lt;/a&gt;&#10; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755590" />
<option name="index" value="1" />
<option name="name" value="Side Input" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1534" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def process(self, element, cities_to_countries):&#10; yield Person(element.name, element.city,&#10; cities_to_countries[element.city])" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="2096" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.ParDo(EnrichCountryDoFn(), cities_to_countries)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938069904" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238434" />
<option name="index" value="8" />
<option name="name" value="Side Output" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938076976" />
<option name="unitId" value="210894" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Side Output&lt;/h2&gt;&#10;&lt;p&gt;&#10; While ParDo always produces a main output PCollection (as the return value from apply), you can&#10; also have your ParDo produce any number of additional output PCollections. If you choose to have&#10; multiple outputs, your ParDo returns all of the output PCollections (including the main output)&#10; bundled together.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement additional output to your ParDo for numbers bigger than 100.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.pvalue.html#apache_beam.pvalue.TaggedOutput&quot;&gt;&#10; pvalue.TaggedOutput&lt;/a&gt; and&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo.with_outputs&quot;&gt;&#10; .with_outputs&lt;/a&gt; to output multiple tagged-outputs in a&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo&quot;&gt;&#10; ParDo.&lt;/a&gt;&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#additional-outputs&quot;&gt;&#10; &quot;Additional outputs&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755591" />
<option name="index" value="1" />
<option name="name" value="Side Output" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1011" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def process(self, element):&#10; if element &lt;= 100:&#10; yield element&#10; else:&#10; yield pvalue.TaggedOutput(num_above_100_tag, element)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1264" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.ParDo(ProcessNumbersDoFn())&#10; .with_outputs(num_above_100_tag, main=num_below_100_tag))" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938083234" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238435" />
<option name="index" value="9" />
<option name="name" value="Branching" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938090650" />
<option name="unitId" value="210895" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Branching&lt;/h2&gt;&#10;&lt;p&gt;&#10; You can use the same PCollection as input for multiple transforms without consuming the input&#10; or altering it.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Branch out the numbers to two different transforms: one transform is multiplying&#10; each number by 5 and the other transform is multiplying each number by 10.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Design Your Pipeline Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/pipelines/design-your-pipeline/#multiple-transforms-process-the-same-pcollection&quot;&gt;&#10; &quot;Multiple transforms process the same PCollection&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755592" />
<option name="index" value="1" />
<option name="name" value="Branching" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="945" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="numbers | beam.Map(lambda num: num * 5)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1002" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="numbers | beam.Map(lambda num: num * 10)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938095634" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238436" />
<option name="index" value="10" />
<option name="name" value="Composite Transform" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938102699" />
<option name="unitId" value="210896" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Composite Transform&lt;/h2&gt;&#10;&lt;p&gt;&#10; Transforms can have a nested structure, where a complex transform performs multiple simpler&#10; transforms (such as more than one ParDo, Combine, GroupByKey, or even other composite transforms).&#10; These transforms are called composite transforms. Nesting multiple transforms inside a single&#10; composite transform can make your code more modular and easier to understand.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; To create your own composite transform, create a subclass of the PTransform class and override&#10; the expand method to specify the actual processing logic. You can then use this transform just as&#10; you would a built-in transform from the Beam SDK. Within your PTransform subclass, you’ll need to&#10; override the expand method. The expand method is where you add the processing logic for the&#10; PTransform. Your override of expand must accept the appropriate type of input PCollection as a&#10; parameter, and specify the output PCollection as the return value.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Please implement a composite transform &quot;ExtractAndMultiplyNumbers&quot; that extracts&#10; numbers from comma separated line and then multiplies each number by 10.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.ptransform.html#apache_beam.transforms.ptransform.PTransform&quot;&gt;&#10; PTransform&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#composite-transforms&quot;&gt;&#10; &quot;Composite transforms&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755593" />
<option name="index" value="1" />
<option name="name" value="Composite Transform" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="920" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def expand(self, pcoll):&#10; return (pcoll&#10; | beam.FlatMap(lambda line: map(int, line.split(',')))&#10; | beam.Map(lambda num: num * 10)&#10; )" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1179" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="ExtractAndMultiplyNumbers()" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938107880" />
</EduTask>
</list>
</option>
</Lesson>
</list>
</option>
</Section>
<Section>
<option name="courseId" value="54532" />
<option name="customPresentableName" />
<option name="id" value="85646" />
<option name="index" value="3" />
<option name="name" value="Common Transforms" />
<option name="position" value="0" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560431009000" />
<option name="items">
<list>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238437" />
<option name="index" value="1" />
<option name="name" value="Filter" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938208485" />
<option name="unitId" value="210897" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Filter using ParDo&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a filter function that filters out the even numbers by using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.ParDo&quot;&gt;&#10; ParDo&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Override &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn.process&quot;&gt;&#10; process&lt;/a&gt; method. You can use &quot;yield&quot; for each intended element.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755595" />
<option name="index" value="1" />
<option name="name" value="ParDo" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="942" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="def process(self, element):&#10; if element % 2 == 1:&#10; yield element" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938213611" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Filter&lt;/h2&gt;&#10;&lt;p&gt;&#10; The Beam SDKs provide language-specific ways to simplify how you provide your DoFn implementation.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Implement a filter function that filters out the odd numbers by using&#10; &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Filter&quot;&gt;&#10; Filter&lt;/a&gt;.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Filter&quot;&gt;&#10; Filter&lt;/a&gt; with a lambda.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755596" />
<option name="index" value="2" />
<option name="name" value="Filter" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="934" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.Filter(lambda num: num % 2 == 0)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938217127" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238438" />
<option name="index" value="2" />
<option name="name" value="Aggregation" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938223924" />
<option name="unitId" value="210898" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Aggregation - Count&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Count the number of elements from an input.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.combiners.html#apache_beam.transforms.combiners.Count&quot;&gt;&#10; Count&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755597" />
<option name="index" value="1" />
<option name="name" value="Count" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="934" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.combiners.Count.Globally()" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938230679" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Aggregation - Sum&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Compute the sum of all elements from an input.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.CombineGlobally&quot;&gt;&#10; CombineGlobally&lt;/a&gt; and Python built-in&#10; &lt;a href=&quot;https://docs.python.org/2/library/functions.html#sum&quot;&gt;sum&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755598" />
<option name="index" value="2" />
<option name="name" value="Sum" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="934" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.CombineGlobally(sum)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938232928" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Aggregation - Mean&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Compute the mean/average of all elements from an input.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.combiners.html#apache_beam.transforms.combiners.Mean&quot;&gt;&#10; Mean&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755599" />
<option name="index" value="3" />
<option name="name" value="Mean" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="934" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.combiners.Mean.Globally()" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938235730" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Aggregation - Smallest&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Compute the smallest of the elements from an input.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.combiners.html#apache_beam.transforms.combiners.Top.Smallest&quot;&gt;&#10; Top.Smallest&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755600" />
<option name="index" value="4" />
<option name="name" value="Smallest" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="934" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.combiners.Top.Smallest(1)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938237747" />
</EduTask>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Aggregation - Largest&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Compute the largest of the elements from an input.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.combiners.html#apache_beam.transforms.combiners.Top.Largest&quot;&gt;&#10; Top.Largest&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755601" />
<option name="index" value="5" />
<option name="name" value="Largest" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="934" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.combiners.Top.Largest(1)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938239860" />
</EduTask>
</list>
</option>
</Lesson>
</list>
</option>
</Section>
<Section>
<option name="courseId" value="54532" />
<option name="customPresentableName" />
<option name="id" value="88017" />
<option name="index" value="4" />
<option name="name" value="IO" />
<option name="position" value="5" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560436240000" />
<option name="items">
<list>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238439" />
<option name="index" value="1" />
<option name="name" value="TextIO" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938245888" />
<option name="unitId" value="210899" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;ReadFromText&lt;/h2&gt;&#10;&lt;p&gt;&#10; When you create a pipeline, you often need to read data from some external source, such as a file&#10; or a database. Likewise, you may want your pipeline to output its result data to an external&#10; storage system. Beam provides read and write transforms for a number of common data storage types.&#10; If you want your pipeline to read from or write to a data storage format that isn’t supported by&#10; the built-in transforms, you can implement your own read and write transforms.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; To read a PCollection from one or more text files, use beam.io.ReadFromText to instantiate a&#10; transform and specify the path of the file(s) to be read.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Read the 'countries.txt' file and convert each country name into uppercase.&#10;&lt;/p&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Use &lt;a href=&quot;https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html#apache_beam.io.textio.ReadFromText&quot;&gt;&#10; beam.io.ReadFromText&lt;/a&gt;.&#10;&lt;/div&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to the Beam Programming Guide&#10; &lt;a href=&quot;https://beam.apache.org/documentation/programming-guide/#pipeline-io-reading-data&quot;&gt;&#10; &quot;Reading input data&quot;&lt;/a&gt; section for more information.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755602" />
<option name="index" value="1" />
<option name="name" value="ReadFromText" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="919" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.io.ReadFromText(file_path)" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="1" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="956" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.Map(lambda country: country.upper())" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
<entry key="countries.txt">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="countries.txt" />
<option name="text" value="" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938252130" />
</EduTask>
</list>
</option>
</Lesson>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238440" />
<option name="index" value="2" />
<option name="name" value="Built-in IOs" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938258337" />
<option name="unitId" value="210900" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Built-in I/Os&lt;/h2&gt;&#10;&lt;p&gt;&#10; Beam SDKs provide many out of the box I/O transforms that can be used to read from many&#10; different sources and write to many different sinks.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; See the &lt;a href=&quot;https://beam.apache.org/documentation/io/built-in/&quot;&gt;Beam-provided I/O&#10; Transforms&lt;/a&gt; page for a list of the currently available I/O transforms.&#10;&lt;/p&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755603" />
<option name="index" value="1" />
<option name="name" value="Built-in IOs" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938263697" />
</EduTask>
</list>
</option>
</Lesson>
</list>
</option>
</Section>
<Section>
<option name="courseId" value="54532" />
<option name="customPresentableName" />
<option name="id" value="85647" />
<option name="index" value="5" />
<option name="name" value="Examples" />
<option name="position" value="0" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="updateDate" value="1560435414000" />
<option name="items">
<list>
<Lesson>
<option name="customPresentableName" />
<option name="id" value="238441" />
<option name="index" value="1" />
<option name="name" value="Word Count" />
<option name="stepikChangeStatus" value="Content changed" />
<option name="updateDate" value="1560938269193" />
<option name="unitId" value="210901" />
<option name="items">
<list>
<EduTask>
<option name="customPresentableName" />
<option name="descriptionFormat" value="HTML" />
<option name="descriptionText" value="&lt;!--&#10; ~ Licensed to the Apache Software Foundation (ASF) under one&#10; ~ or more contributor license agreements. See the NOTICE file&#10; ~ distributed with this work for additional information&#10; ~ regarding copyright ownership. The ASF licenses this file&#10; ~ to you under the Apache License, Version 2.0 (the&#10; ~ &quot;License&quot;); you may not use this file except in compliance&#10; ~ with the License. You may obtain a copy of the License at&#10; ~&#10; ~ http://www.apache.org/licenses/LICENSE-2.0&#10; ~&#10; ~ Unless required by applicable law or agreed to in writing, software&#10; ~ distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10; ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10; ~ See the License for the specific language governing permissions and&#10; ~ limitations under the License.&#10; --&gt;&#10;&#10;&lt;html&gt;&#10;&lt;h2&gt;Word Count Pipeline&lt;/h2&gt;&#10;&lt;p&gt;&#10; &lt;b&gt;Kata:&lt;/b&gt; Create a pipeline that counts the number of words.&#10;&lt;/p&gt;&#10;&lt;p&gt;&#10; Please output the count of each word in the following format:&#10;&lt;/p&gt;&#10;&lt;pre&gt;&#10; word:count&#10; ball:5&#10; book:3&#10;&lt;/pre&gt;&#10;&lt;br&gt;&#10;&lt;div class=&quot;hint&quot;&gt;&#10; Refer to your katas above.&#10;&lt;/div&gt;&#10;&lt;/html&gt;&#10;" />
<option name="feedbackLink">
<FeedbackLink>
<option name="link" />
<option name="type" value="STEPIK" />
</FeedbackLink>
</option>
<option name="id" value="755604" />
<option name="index" value="1" />
<option name="name" value="Word Count" />
<option name="record" value="-1" />
<option name="status" value="Unchecked" />
<option name="stepikChangeStatus" value="Up to date" />
<option name="files">
<map>
<entry key="task.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list>
<AnswerPlaceholder>
<option name="hints">
<list />
</option>
<option name="index" value="0" />
<option name="initialState" />
<option name="initializedFromDependency" value="false" />
<option name="length" value="6" />
<option name="offset" value="1021" />
<option name="placeholderDependency" />
<option name="placeholderText" value="TODO()" />
<option name="possibleAnswer" value="beam.FlatMap(lambda sentence: sentence.split())&#10; | beam.combiners.Count.PerElement()&#10; | beam.Map(lambda (k, v): k + &quot;:&quot; + str(v))" />
<option name="selected" value="false" />
<option name="status" value="Unchecked" />
<option name="studentAnswer" />
<option name="useLength" value="false" />
</AnswerPlaceholder>
</list>
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="task.py" />
<option name="text" value="# TODO: type solution here&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="true" />
</TaskFile>
</value>
</entry>
<entry key="tests.py">
<value>
<TaskFile>
<option name="answerPlaceholders">
<list />
</option>
<option name="highlightErrors" value="true" />
<option name="name" value="tests.py" />
<option name="text" value="from test_helper import run_common_tests, failed, passed, get_answer_placeholders&#10;&#10;&#10;def test_answer_placeholders():&#10; placeholders = get_answer_placeholders()&#10; placeholder = placeholders[0]&#10; if placeholder == &quot;&quot;: # TODO: your condition here&#10; passed()&#10; else:&#10; failed()&#10;&#10;&#10;if __name__ == '__main__':&#10; run_common_tests()&#10; # test_answer_placeholders() # TODO: uncomment test call&#10;&#10;&#10;" />
<option name="trackChanges" value="true" />
<option name="trackLengths" value="true" />
<option name="visible" value="false" />
</TaskFile>
</value>
</entry>
</map>
</option>
<option name="updateDate" value="1560938273811" />
</EduTask>
</list>
</option>
</Lesson>
</list>
</option>
</Section>
</list>
</option>
</EduCourse>
</option>
</StudyTaskManager>
</component>
</project>