blob: b51c281403652113a88f4ce0bf2e5dd0fbd4da2f [file] [log] [blame]
/**
* Copyright (c) 2011, Cloudera, Inc. All Rights Reserved.
*
* Cloudera, Inc. licenses this file to you under the Apache License,
* Version 2.0 (the "License"). You may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for
* the specific language governing permissions and limitations under the
* License.
*/
import org.apache.crunch.scrunch.PipelineApp
object WordCount extends PipelineApp {
def countWords(file: String) = {
read(from.textFile(file))
.flatMap(_.split("\\W+").filter(!_.isEmpty()))
.count
}
val counts = join(countWords(args(0)), countWords(args(1)))
write(counts, to.textFile(args(2)))
}