blob: ae744ed60c69508d6a38b052bb44dd1150692959 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.tutorial;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
public class TutorialUtil {
/**
* This function splits a search query string into a set
* of non-empty words
*/
protected static String[] splitToWords(String query) {
List<String> res = new LinkedList<String>();
String[] words = query.split("\\W");
for (String word : words) {
if (!word.equals("")) {
res.add(word);
}
}
return res.toArray(new String[res.size()]);
}
/**
* This is a simple utility function that make word-level
* ngrams from a set of words
* @param words
* @param ngrams
* @param size
*/
protected static void makeNGram(String[] words, Set<String> ngrams, int size) {
int stop = words.length - size + 1;
for (int i = 0; i < stop; i++) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < size; j++) {
sb.append(words[i + j]).append(" ");
}
sb.deleteCharAt(sb.length() - 1);
ngrams.add(sb.toString());
}
if (size > 1) {
makeNGram(words, ngrams, size - 1);
}
}
}