blob: 9fcebef974c4405450cef12a915e301ec5cc5665 [file] [log] [blame]
package org.qcri.rheem.apps.simwords
import java.lang.Iterable
import java.util
import org.qcri.rheem.core.function.ExecutionContext
import org.qcri.rheem.core.function.FunctionDescriptor.ExtendedSerializableFunction
/**
* UDF to split text lines and scrub the tokens.
*/
class ScrubFunction extends ExtendedSerializableFunction[String, java.lang.Iterable[String]] {
lazy val textScrubber = new TextScrubber
override def open(ctx: ExecutionContext): Unit = {}
override def apply(line: String): Iterable[String] = {
val result = new util.LinkedList[String]()
textScrubber.splitAndScrub(line, result)
result
}
}