blob: 8773f5139a4a42b4ea9b3ace0a421e865bf5eb32 [file] [log] [blame]
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.IO;
using System.IO.Compression;
using System.Text;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Analysis.Hunspell
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Can be retrieved via:
/// wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
/// Note some of the files differ only in case. This may be a problem on your operating system!
///
/// LUCENENET NOTE: The above URL is no longer valid. These dictionaries can be retreived via FTP at one of these URLs
/// ftp://ftp.us.horde.org/pub/software/openoffice/contrib/dictionaries/
/// ftp://mirror.nl.leaseweb.net/openoffice/contrib/dictionaries/
/// ftp://mirror.aptus.co.tz/openoffice/contrib/dictionaries/
///
/// Or you can search by file name at:
/// http://www.filewatcher.com/
/// </summary>
[Ignore("Enable manually")]
public class TestAllDictionaries : LuceneTestCase
{
// set this to the location of where you downloaded all the files
internal static readonly DirectoryInfo DICTIONARY_HOME = new DirectoryInfo(
System.IO.Path.Combine(new DirectoryInfo(typeof(TestAllDictionaries2).Assembly.Location).Parent.Parent.Parent.Parent.Parent.FullName,
@"test-files\analysis\data\dictionaries"));
internal readonly string[] tests = new string[]
{
/* zip file */ /* dictionary */ /* affix */
"af_ZA.zip", "af_ZA.dic", "af_ZA.aff",
"ak_GH.zip", "ak_GH.dic", "ak_GH.aff",
"bg_BG.zip", "bg_BG.dic", "bg_BG.aff",
"ca_ANY.zip", "catalan.dic", "catalan.aff",
"ca_ES.zip", "ca_ES.dic", "ca_ES.aff",
// BUG: broken flag "cop_EG.zip", "cop_EG.dic", "cop_EG.aff",
"cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff",
"cy_GB.zip", "cy_GB.dic", "cy_GB.aff",
"da_DK.zip", "da_DK.dic", "da_DK.aff",
"de_AT.zip", "de_AT.dic", "de_AT.aff",
"de_CH.zip", "de_CH.dic", "de_CH.aff",
"de_DE.zip", "de_DE.dic", "de_DE.aff",
"de_DE_comb.zip", "de_DE_comb.dic", "de_DE_comb.aff",
"de_DE_frami.zip", "de_DE_frami.dic", "de_DE_frami.aff",
"de_DE_neu.zip", "de_DE_neu.dic", "de_DE_neu.aff",
"el_GR.zip", "el_GR.dic", "el_GR.aff",
"en_AU.zip", "en_AU.dic", "en_AU.aff",
"en_CA.zip", "en_CA.dic", "en_CA.aff",
"en_GB-oed.zip", "en_GB-oed.dic", "en_GB-oed.aff",
"en_GB.zip", "en_GB.dic", "en_GB.aff",
"en_NZ.zip", "en_NZ.dic", "en_NZ.aff",
"eo.zip", "eo_l3.dic", "eo_l3.aff",
"eo_EO.zip", "eo_EO.dic", "eo_EO.aff",
"es_AR.zip", "es_AR.dic", "es_AR.aff",
"es_BO.zip", "es_BO.dic", "es_BO.aff",
"es_CL.zip", "es_CL.dic", "es_CL.aff",
"es_CO.zip", "es_CO.dic", "es_CO.aff",
"es_CR.zip", "es_CR.dic", "es_CR.aff",
"es_CU.zip", "es_CU.dic", "es_CU.aff",
"es_DO.zip", "es_DO.dic", "es_DO.aff",
"es_EC.zip", "es_EC.dic", "es_EC.aff",
"es_ES.zip", "es_ES.dic", "es_ES.aff",
"es_GT.zip", "es_GT.dic", "es_GT.aff",
"es_HN.zip", "es_HN.dic", "es_HN.aff",
"es_MX.zip", "es_MX.dic", "es_MX.aff",
"es_NEW.zip", "es_NEW.dic", "es_NEW.aff",
"es_NI.zip", "es_NI.dic", "es_NI.aff",
"es_PA.zip", "es_PA.dic", "es_PA.aff",
"es_PE.zip", "es_PE.dic", "es_PE.aff",
"es_PR.zip", "es_PR.dic", "es_PR.aff",
"es_PY.zip", "es_PY.dic", "es_PY.aff",
"es_SV.zip", "es_SV.dic", "es_SV.aff",
"es_UY.zip", "es_UY.dic", "es_UY.aff",
"es_VE.zip", "es_VE.dic", "es_VE.aff",
"et_EE.zip", "et_EE.dic", "et_EE.aff",
"fo_FO.zip", "fo_FO.dic", "fo_FO.aff",
"fr_FR-1990_1-3-2.zip", "fr_FR-1990.dic", "fr_FR-1990.aff",
"fr_FR-classique_1-3-2.zip", "fr_FR-classique.dic", "fr_FR-classique.aff",
"fr_FR_1-3-2.zip", "fr_FR.dic", "fr_FR.aff",
"fy_NL.zip", "fy_NL.dic", "fy_NL.aff",
"ga_IE.zip", "ga_IE.dic", "ga_IE.aff",
"gd_GB.zip", "gd_GB.dic", "gd_GB.aff",
"gl_ES.zip", "gl_ES.dic", "gl_ES.aff",
"gsc_FR.zip", "gsc_FR.dic", "gsc_FR.aff",
"gu_IN.zip", "gu_IN.dic", "gu_IN.aff",
"he_IL.zip", "he_IL.dic", "he_IL.aff",
"hi_IN.zip", "hi_IN.dic", "hi_IN.aff",
"hil_PH.zip", "hil_PH.dic", "hil_PH.aff",
"hr_HR.zip", "hr_HR.dic", "hr_HR.aff",
"hu_HU.zip", "hu_HU.dic", "hu_HU.aff",
"hu_HU_comb.zip", "hu_HU.dic", "hu_HU.aff",
"ia.zip", "ia.dic", "ia.aff",
"id_ID.zip", "id_ID.dic", "id_ID.aff",
"it_IT.zip", "it_IT.dic", "it_IT.aff",
"ku_TR.zip", "ku_TR.dic", "ku_TR.aff",
"la.zip", "la.dic", "la.aff",
"lt_LT.zip", "lt_LT.dic", "lt_LT.aff",
"lv_LV.zip", "lv_LV.dic", "lv_LV.aff",
"mg_MG.zip", "mg_MG.dic", "mg_MG.aff",
"mi_NZ.zip", "mi_NZ.dic", "mi_NZ.aff",
"mk_MK.zip", "mk_MK.dic", "mk_MK.aff",
"mos_BF.zip", "mos_BF.dic", "mos_BF.aff",
"mr_IN.zip", "mr_IN.dic", "mr_IN.aff",
"ms_MY.zip", "ms_MY.dic", "ms_MY.aff",
"nb_NO.zip", "nb_NO.dic", "nb_NO.aff",
"ne_NP.zip", "ne_NP.dic", "ne_NP.aff",
"nl_NL.zip", "nl_NL.dic", "nl_NL.aff",
"nl_med.zip", "nl_med.dic", "nl_med.aff",
"nn_NO.zip", "nn_NO.dic", "nn_NO.aff",
"nr_ZA.zip", "nr_ZA.dic", "nr_ZA.aff",
"ns_ZA.zip", "ns_ZA.dic", "ns_ZA.aff",
"ny_MW.zip", "ny_MW.dic", "ny_MW.aff",
"oc_FR.zip", "oc_FR.dic", "oc_FR.aff",
"pl_PL.zip", "pl_PL.dic", "pl_PL.aff",
"pt_BR.zip", "pt_BR.dic", "pt_BR.aff",
"pt_PT.zip", "pt_PT.dic", "pt_PT.aff",
"ro_RO.zip", "ro_RO.dic", "ro_RO.aff",
"ru_RU.zip", "ru_RU.dic", "ru_RU.aff",
"ru_RU_ye.zip", "ru_RU_ie.dic", "ru_RU_ie.aff",
"ru_RU_yo.zip", "ru_RU_yo.dic", "ru_RU_yo.aff",
"rw_RW.zip", "rw_RW.dic", "rw_RW.aff",
"sk_SK.zip", "sk_SK.dic", "sk_SK.aff",
"sl_SI.zip", "sl_SI.dic", "sl_SI.aff",
"sq_AL.zip", "sq_AL.dic", "sq_AL.aff",
"ss_ZA.zip", "ss_ZA.dic", "ss_ZA.aff",
"st_ZA.zip", "st_ZA.dic", "st_ZA.aff",
"sv_SE.zip", "sv_SE.dic", "sv_SE.aff",
"sw_KE.zip", "sw_KE.dic", "sw_KE.aff",
"tet_ID.zip", "tet_ID.dic", "tet_ID.aff",
"th_TH.zip", "th_TH.dic", "th_TH.aff",
"tl_PH.zip", "tl_PH.dic", "tl_PH.aff",
"tn_ZA.zip", "tn_ZA.dic", "tn_ZA.aff",
"ts_ZA.zip", "ts_ZA.dic", "ts_ZA.aff",
"uk_UA.zip", "uk_UA.dic", "uk_UA.aff",
"ve_ZA.zip", "ve_ZA.dic", "ve_ZA.aff",
"vi_VN.zip", "vi_VN.dic", "vi_VN.aff",
"xh_ZA.zip", "xh_ZA.dic", "xh_ZA.aff",
"zu_ZA.zip", "zu_ZA.dic", "zu_ZA.aff",
};
[Test]
public virtual void Test()
{
for (int i = 0; i < tests.Length; i += 3)
{
FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
if (Debugging.AssertsEnabled) Debugging.Assert(f.Exists);
using Stream fileStream = f.OpenRead();
using ZipArchive zip = new ZipArchive(fileStream, ZipArchiveMode.Read, false, Encoding.UTF8);
ZipArchiveEntry dicEntry = zip.GetEntry(tests[i + 1]);
if (Debugging.AssertsEnabled) Debugging.Assert(dicEntry != null);
ZipArchiveEntry affEntry = zip.GetEntry(tests[i + 2]);
if (Debugging.AssertsEnabled) Debugging.Assert(affEntry != null);
using Stream dictionary = dicEntry.Open();
using Stream affix = affEntry.Open();
Dictionary dic = new Dictionary(affix, dictionary);
Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.HumanSizeOf(dic) + "\t(" +
"words=" + RamUsageEstimator.HumanSizeOf(dic.words) + ", " +
"flags=" + RamUsageEstimator.HumanSizeOf(dic.flagLookup) + ", " +
"strips=" + RamUsageEstimator.HumanSizeOf(dic.stripData) + ", " +
"conditions=" + RamUsageEstimator.HumanSizeOf(dic.patterns) + ", " +
"affixData=" + RamUsageEstimator.HumanSizeOf(dic.affixData) + ", " +
"prefixes=" + RamUsageEstimator.HumanSizeOf(dic.prefixes) + ", " +
"suffixes=" + RamUsageEstimator.HumanSizeOf(dic.suffixes) + ")");
}
}
[Test]
public virtual void TestOneDictionary()
{
string toTest = "hu_HU.zip";
for (int i = 0; i < tests.Length; i++)
{
if (tests[i].Equals(toTest, StringComparison.Ordinal))
{
FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
if (Debugging.AssertsEnabled) Debugging.Assert(f.Exists);
using Stream fileStream = f.OpenRead();
using ZipArchive zip = new ZipArchive(fileStream, ZipArchiveMode.Read, false, Encoding.UTF8);
ZipArchiveEntry dicEntry = zip.GetEntry(tests[i + 1]);
if (Debugging.AssertsEnabled) Debugging.Assert(dicEntry != null);
ZipArchiveEntry affEntry = zip.GetEntry(tests[i + 2]);
if (Debugging.AssertsEnabled) Debugging.Assert(affEntry != null);
using Stream dictionary = dicEntry.Open();
using Stream affix = affEntry.Open();
new Dictionary(affix, dictionary);
}
}
}
}
}