| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.id; |
| |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.core.KeywordTokenizer; |
| import org.apache.lucene.util.IOUtils; |
| |
| /** |
| * Tests {@link IndonesianStemmer} |
| */ |
| public class TestIndonesianStemmer extends BaseTokenStreamTestCase { |
| private Analyzer a, b; |
| |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| /* full stemming, no stopwords */ |
| a = new Analyzer() { |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); |
| return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer)); |
| } |
| }; |
| /* inflectional-only stemming */ |
| b = new Analyzer() { |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); |
| return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false)); |
| } |
| }; |
| } |
| |
| @Override |
| public void tearDown() throws Exception { |
| IOUtils.close(a, b); |
| super.tearDown(); |
| } |
| |
| /** Some examples from the paper */ |
| public void testExamples() throws IOException { |
| checkOneTerm(a, "bukukah", "buku"); |
| checkOneTerm(a, "adalah", "ada"); |
| checkOneTerm(a, "bukupun", "buku"); |
| checkOneTerm(a, "bukuku", "buku"); |
| checkOneTerm(a, "bukumu", "buku"); |
| checkOneTerm(a, "bukunya", "buku"); |
| checkOneTerm(a, "mengukur", "ukur"); |
| checkOneTerm(a, "menyapu", "sapu"); |
| checkOneTerm(a, "menduga", "duga"); |
| checkOneTerm(a, "menuduh", "uduh"); |
| checkOneTerm(a, "membaca", "baca"); |
| checkOneTerm(a, "merusak", "rusak"); |
| checkOneTerm(a, "pengukur", "ukur"); |
| checkOneTerm(a, "penyapu", "sapu"); |
| checkOneTerm(a, "penduga", "duga"); |
| checkOneTerm(a, "pembaca", "baca"); |
| checkOneTerm(a, "diukur", "ukur"); |
| checkOneTerm(a, "tersapu", "sapu"); |
| checkOneTerm(a, "kekasih", "kasih"); |
| checkOneTerm(a, "berlari", "lari"); |
| checkOneTerm(a, "belajar", "ajar"); |
| checkOneTerm(a, "bekerja", "kerja"); |
| checkOneTerm(a, "perjelas", "jelas"); |
| checkOneTerm(a, "pelajar", "ajar"); |
| checkOneTerm(a, "pekerja", "kerja"); |
| checkOneTerm(a, "tarikkan", "tarik"); |
| checkOneTerm(a, "ambilkan", "ambil"); |
| checkOneTerm(a, "mengambilkan", "ambil"); |
| checkOneTerm(a, "makanan", "makan"); |
| checkOneTerm(a, "janjian", "janji"); |
| checkOneTerm(a, "perjanjian", "janji"); |
| checkOneTerm(a, "tandai", "tanda"); |
| checkOneTerm(a, "dapati", "dapat"); |
| checkOneTerm(a, "mendapati", "dapat"); |
| checkOneTerm(a, "pantai", "panta"); |
| } |
| |
| /** Some detailed analysis examples (that might not be the best) */ |
| public void testIRExamples() throws IOException { |
| checkOneTerm(a, "penyalahgunaan", "salahguna"); |
| checkOneTerm(a, "menyalahgunakan", "salahguna"); |
| checkOneTerm(a, "disalahgunakan", "salahguna"); |
| |
| checkOneTerm(a, "pertanggungjawaban", "tanggungjawab"); |
| checkOneTerm(a, "mempertanggungjawabkan", "tanggungjawab"); |
| checkOneTerm(a, "dipertanggungjawabkan", "tanggungjawab"); |
| |
| checkOneTerm(a, "pelaksanaan", "laksana"); |
| checkOneTerm(a, "pelaksana", "laksana"); |
| checkOneTerm(a, "melaksanakan", "laksana"); |
| checkOneTerm(a, "dilaksanakan", "laksana"); |
| |
| checkOneTerm(a, "melibatkan", "libat"); |
| checkOneTerm(a, "terlibat", "libat"); |
| |
| checkOneTerm(a, "penculikan", "culik"); |
| checkOneTerm(a, "menculik", "culik"); |
| checkOneTerm(a, "diculik", "culik"); |
| checkOneTerm(a, "penculik", "culik"); |
| |
| checkOneTerm(a, "perubahan", "ubah"); |
| checkOneTerm(a, "peledakan", "ledak"); |
| checkOneTerm(a, "penanganan", "tangan"); |
| checkOneTerm(a, "kepolisian", "polisi"); |
| checkOneTerm(a, "kenaikan", "naik"); |
| checkOneTerm(a, "bersenjata", "senjata"); |
| checkOneTerm(a, "penyelewengan", "seleweng"); |
| checkOneTerm(a, "kecelakaan", "celaka"); |
| } |
| |
| /** Test stemming only inflectional suffixes */ |
| public void testInflectionalOnly() throws IOException { |
| checkOneTerm(b, "bukunya", "buku"); |
| checkOneTerm(b, "bukukah", "buku"); |
| checkOneTerm(b, "bukunyakah", "buku"); |
| checkOneTerm(b, "dibukukannya", "dibukukan"); |
| } |
| |
| public void testShouldntStem() throws IOException { |
| checkOneTerm(a, "bersenjata", "senjata"); |
| checkOneTerm(a, "bukukah", "buku"); |
| checkOneTerm(a, "gigi", "gigi"); |
| } |
| |
| public void testEmptyTerm() throws IOException { |
| Analyzer a = new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer tokenizer = new KeywordTokenizer(); |
| return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer)); |
| } |
| }; |
| checkOneTerm(a, "", ""); |
| a.close(); |
| } |
| } |