| # Generated by Snowball 2.2.0 - https://snowballstem.org/ |
| |
| from .basestemmer import BaseStemmer |
| from .among import Among |
| |
| |
| class ItalianStemmer(BaseStemmer): |
| ''' |
| This class implements the stemming algorithm defined by a snowball script. |
| Generated by Snowball 2.2.0 - https://snowballstem.org/ |
| ''' |
| |
| a_0 = [ |
| Among(u"", -1, 7), |
| Among(u"qu", 0, 6), |
| Among(u"\u00E1", 0, 1), |
| Among(u"\u00E9", 0, 2), |
| Among(u"\u00ED", 0, 3), |
| Among(u"\u00F3", 0, 4), |
| Among(u"\u00FA", 0, 5) |
| ] |
| |
| a_1 = [ |
| Among(u"", -1, 3), |
| Among(u"I", 0, 1), |
| Among(u"U", 0, 2) |
| ] |
| |
| a_2 = [ |
| Among(u"la", -1, -1), |
| Among(u"cela", 0, -1), |
| Among(u"gliela", 0, -1), |
| Among(u"mela", 0, -1), |
| Among(u"tela", 0, -1), |
| Among(u"vela", 0, -1), |
| Among(u"le", -1, -1), |
| Among(u"cele", 6, -1), |
| Among(u"gliele", 6, -1), |
| Among(u"mele", 6, -1), |
| Among(u"tele", 6, -1), |
| Among(u"vele", 6, -1), |
| Among(u"ne", -1, -1), |
| Among(u"cene", 12, -1), |
| Among(u"gliene", 12, -1), |
| Among(u"mene", 12, -1), |
| Among(u"sene", 12, -1), |
| Among(u"tene", 12, -1), |
| Among(u"vene", 12, -1), |
| Among(u"ci", -1, -1), |
| Among(u"li", -1, -1), |
| Among(u"celi", 20, -1), |
| Among(u"glieli", 20, -1), |
| Among(u"meli", 20, -1), |
| Among(u"teli", 20, -1), |
| Among(u"veli", 20, -1), |
| Among(u"gli", 20, -1), |
| Among(u"mi", -1, -1), |
| Among(u"si", -1, -1), |
| Among(u"ti", -1, -1), |
| Among(u"vi", -1, -1), |
| Among(u"lo", -1, -1), |
| Among(u"celo", 31, -1), |
| Among(u"glielo", 31, -1), |
| Among(u"melo", 31, -1), |
| Among(u"telo", 31, -1), |
| Among(u"velo", 31, -1) |
| ] |
| |
| a_3 = [ |
| Among(u"ando", -1, 1), |
| Among(u"endo", -1, 1), |
| Among(u"ar", -1, 2), |
| Among(u"er", -1, 2), |
| Among(u"ir", -1, 2) |
| ] |
| |
| a_4 = [ |
| Among(u"ic", -1, -1), |
| Among(u"abil", -1, -1), |
| Among(u"os", -1, -1), |
| Among(u"iv", -1, 1) |
| ] |
| |
| a_5 = [ |
| Among(u"ic", -1, 1), |
| Among(u"abil", -1, 1), |
| Among(u"iv", -1, 1) |
| ] |
| |
| a_6 = [ |
| Among(u"ica", -1, 1), |
| Among(u"logia", -1, 3), |
| Among(u"osa", -1, 1), |
| Among(u"ista", -1, 1), |
| Among(u"iva", -1, 9), |
| Among(u"anza", -1, 1), |
| Among(u"enza", -1, 5), |
| Among(u"ice", -1, 1), |
| Among(u"atrice", 7, 1), |
| Among(u"iche", -1, 1), |
| Among(u"logie", -1, 3), |
| Among(u"abile", -1, 1), |
| Among(u"ibile", -1, 1), |
| Among(u"usione", -1, 4), |
| Among(u"azione", -1, 2), |
| Among(u"uzione", -1, 4), |
| Among(u"atore", -1, 2), |
| Among(u"ose", -1, 1), |
| Among(u"ante", -1, 1), |
| Among(u"mente", -1, 1), |
| Among(u"amente", 19, 7), |
| Among(u"iste", -1, 1), |
| Among(u"ive", -1, 9), |
| Among(u"anze", -1, 1), |
| Among(u"enze", -1, 5), |
| Among(u"ici", -1, 1), |
| Among(u"atrici", 25, 1), |
| Among(u"ichi", -1, 1), |
| Among(u"abili", -1, 1), |
| Among(u"ibili", -1, 1), |
| Among(u"ismi", -1, 1), |
| Among(u"usioni", -1, 4), |
| Among(u"azioni", -1, 2), |
| Among(u"uzioni", -1, 4), |
| Among(u"atori", -1, 2), |
| Among(u"osi", -1, 1), |
| Among(u"anti", -1, 1), |
| Among(u"amenti", -1, 6), |
| Among(u"imenti", -1, 6), |
| Among(u"isti", -1, 1), |
| Among(u"ivi", -1, 9), |
| Among(u"ico", -1, 1), |
| Among(u"ismo", -1, 1), |
| Among(u"oso", -1, 1), |
| Among(u"amento", -1, 6), |
| Among(u"imento", -1, 6), |
| Among(u"ivo", -1, 9), |
| Among(u"it\u00E0", -1, 8), |
| Among(u"ist\u00E0", -1, 1), |
| Among(u"ist\u00E8", -1, 1), |
| Among(u"ist\u00EC", -1, 1) |
| ] |
| |
| a_7 = [ |
| Among(u"isca", -1, 1), |
| Among(u"enda", -1, 1), |
| Among(u"ata", -1, 1), |
| Among(u"ita", -1, 1), |
| Among(u"uta", -1, 1), |
| Among(u"ava", -1, 1), |
| Among(u"eva", -1, 1), |
| Among(u"iva", -1, 1), |
| Among(u"erebbe", -1, 1), |
| Among(u"irebbe", -1, 1), |
| Among(u"isce", -1, 1), |
| Among(u"ende", -1, 1), |
| Among(u"are", -1, 1), |
| Among(u"ere", -1, 1), |
| Among(u"ire", -1, 1), |
| Among(u"asse", -1, 1), |
| Among(u"ate", -1, 1), |
| Among(u"avate", 16, 1), |
| Among(u"evate", 16, 1), |
| Among(u"ivate", 16, 1), |
| Among(u"ete", -1, 1), |
| Among(u"erete", 20, 1), |
| Among(u"irete", 20, 1), |
| Among(u"ite", -1, 1), |
| Among(u"ereste", -1, 1), |
| Among(u"ireste", -1, 1), |
| Among(u"ute", -1, 1), |
| Among(u"erai", -1, 1), |
| Among(u"irai", -1, 1), |
| Among(u"isci", -1, 1), |
| Among(u"endi", -1, 1), |
| Among(u"erei", -1, 1), |
| Among(u"irei", -1, 1), |
| Among(u"assi", -1, 1), |
| Among(u"ati", -1, 1), |
| Among(u"iti", -1, 1), |
| Among(u"eresti", -1, 1), |
| Among(u"iresti", -1, 1), |
| Among(u"uti", -1, 1), |
| Among(u"avi", -1, 1), |
| Among(u"evi", -1, 1), |
| Among(u"ivi", -1, 1), |
| Among(u"isco", -1, 1), |
| Among(u"ando", -1, 1), |
| Among(u"endo", -1, 1), |
| Among(u"Yamo", -1, 1), |
| Among(u"iamo", -1, 1), |
| Among(u"avamo", -1, 1), |
| Among(u"evamo", -1, 1), |
| Among(u"ivamo", -1, 1), |
| Among(u"eremo", -1, 1), |
| Among(u"iremo", -1, 1), |
| Among(u"assimo", -1, 1), |
| Among(u"ammo", -1, 1), |
| Among(u"emmo", -1, 1), |
| Among(u"eremmo", 54, 1), |
| Among(u"iremmo", 54, 1), |
| Among(u"immo", -1, 1), |
| Among(u"ano", -1, 1), |
| Among(u"iscano", 58, 1), |
| Among(u"avano", 58, 1), |
| Among(u"evano", 58, 1), |
| Among(u"ivano", 58, 1), |
| Among(u"eranno", -1, 1), |
| Among(u"iranno", -1, 1), |
| Among(u"ono", -1, 1), |
| Among(u"iscono", 65, 1), |
| Among(u"arono", 65, 1), |
| Among(u"erono", 65, 1), |
| Among(u"irono", 65, 1), |
| Among(u"erebbero", -1, 1), |
| Among(u"irebbero", -1, 1), |
| Among(u"assero", -1, 1), |
| Among(u"essero", -1, 1), |
| Among(u"issero", -1, 1), |
| Among(u"ato", -1, 1), |
| Among(u"ito", -1, 1), |
| Among(u"uto", -1, 1), |
| Among(u"avo", -1, 1), |
| Among(u"evo", -1, 1), |
| Among(u"ivo", -1, 1), |
| Among(u"ar", -1, 1), |
| Among(u"ir", -1, 1), |
| Among(u"er\u00E0", -1, 1), |
| Among(u"ir\u00E0", -1, 1), |
| Among(u"er\u00F2", -1, 1), |
| Among(u"ir\u00F2", -1, 1) |
| ] |
| |
| g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1] |
| |
| g_AEIO = [17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2] |
| |
| g_CG = [17] |
| |
| I_p2 = 0 |
| I_p1 = 0 |
| I_pV = 0 |
| |
| def __r_prelude(self): |
| v_1 = self.cursor |
| while True: |
| v_2 = self.cursor |
| try: |
| self.bra = self.cursor |
| among_var = self.find_among(ItalianStemmer.a_0) |
| if among_var == 0: |
| raise lab0() |
| self.ket = self.cursor |
| if among_var == 1: |
| if not self.slice_from(u"\u00E0"): |
| return False |
| elif among_var == 2: |
| if not self.slice_from(u"\u00E8"): |
| return False |
| elif among_var == 3: |
| if not self.slice_from(u"\u00EC"): |
| return False |
| elif among_var == 4: |
| if not self.slice_from(u"\u00F2"): |
| return False |
| elif among_var == 5: |
| if not self.slice_from(u"\u00F9"): |
| return False |
| elif among_var == 6: |
| if not self.slice_from(u"qU"): |
| return False |
| else: |
| if self.cursor >= self.limit: |
| raise lab0() |
| self.cursor += 1 |
| continue |
| except lab0: pass |
| self.cursor = v_2 |
| break |
| self.cursor = v_1 |
| while True: |
| v_3 = self.cursor |
| try: |
| try: |
| while True: |
| v_4 = self.cursor |
| try: |
| if not self.in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab3() |
| self.bra = self.cursor |
| try: |
| v_5 = self.cursor |
| try: |
| if not self.eq_s(u"u"): |
| raise lab5() |
| self.ket = self.cursor |
| if not self.in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab5() |
| if not self.slice_from(u"U"): |
| return False |
| raise lab4() |
| except lab5: pass |
| self.cursor = v_5 |
| if not self.eq_s(u"i"): |
| raise lab3() |
| self.ket = self.cursor |
| if not self.in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab3() |
| if not self.slice_from(u"I"): |
| return False |
| except lab4: pass |
| self.cursor = v_4 |
| raise lab2() |
| except lab3: pass |
| self.cursor = v_4 |
| if self.cursor >= self.limit: |
| raise lab1() |
| self.cursor += 1 |
| except lab2: pass |
| continue |
| except lab1: pass |
| self.cursor = v_3 |
| break |
| return True |
| |
| def __r_mark_regions(self): |
| self.I_pV = self.limit |
| self.I_p1 = self.limit |
| self.I_p2 = self.limit |
| v_1 = self.cursor |
| try: |
| try: |
| v_2 = self.cursor |
| try: |
| if not self.in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab2() |
| try: |
| v_3 = self.cursor |
| try: |
| if not self.out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab4() |
| if not self.go_out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab4() |
| self.cursor += 1 |
| raise lab3() |
| except lab4: pass |
| self.cursor = v_3 |
| if not self.in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab2() |
| if not self.go_in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab2() |
| self.cursor += 1 |
| except lab3: pass |
| raise lab1() |
| except lab2: pass |
| self.cursor = v_2 |
| if not self.out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab0() |
| try: |
| v_4 = self.cursor |
| try: |
| if not self.out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab6() |
| if not self.go_out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab6() |
| self.cursor += 1 |
| raise lab5() |
| except lab6: pass |
| self.cursor = v_4 |
| if not self.in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab0() |
| if self.cursor >= self.limit: |
| raise lab0() |
| self.cursor += 1 |
| except lab5: pass |
| except lab1: pass |
| self.I_pV = self.cursor |
| except lab0: pass |
| self.cursor = v_1 |
| v_5 = self.cursor |
| try: |
| if not self.go_out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab7() |
| self.cursor += 1 |
| if not self.go_in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab7() |
| self.cursor += 1 |
| self.I_p1 = self.cursor |
| if not self.go_out_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab7() |
| self.cursor += 1 |
| if not self.go_in_grouping(ItalianStemmer.g_v, 97, 249): |
| raise lab7() |
| self.cursor += 1 |
| self.I_p2 = self.cursor |
| except lab7: pass |
| self.cursor = v_5 |
| return True |
| |
| def __r_postlude(self): |
| while True: |
| v_1 = self.cursor |
| try: |
| self.bra = self.cursor |
| among_var = self.find_among(ItalianStemmer.a_1) |
| if among_var == 0: |
| raise lab0() |
| self.ket = self.cursor |
| if among_var == 1: |
| if not self.slice_from(u"i"): |
| return False |
| elif among_var == 2: |
| if not self.slice_from(u"u"): |
| return False |
| else: |
| if self.cursor >= self.limit: |
| raise lab0() |
| self.cursor += 1 |
| continue |
| except lab0: pass |
| self.cursor = v_1 |
| break |
| return True |
| |
| def __r_RV(self): |
| if not self.I_pV <= self.cursor: |
| return False |
| return True |
| |
| def __r_R1(self): |
| if not self.I_p1 <= self.cursor: |
| return False |
| return True |
| |
| def __r_R2(self): |
| if not self.I_p2 <= self.cursor: |
| return False |
| return True |
| |
| def __r_attached_pronoun(self): |
| self.ket = self.cursor |
| if self.find_among_b(ItalianStemmer.a_2) == 0: |
| return False |
| self.bra = self.cursor |
| among_var = self.find_among_b(ItalianStemmer.a_3) |
| if among_var == 0: |
| return False |
| if not self.__r_RV(): |
| return False |
| if among_var == 1: |
| if not self.slice_del(): |
| return False |
| |
| else: |
| if not self.slice_from(u"e"): |
| return False |
| return True |
| |
| def __r_standard_suffix(self): |
| self.ket = self.cursor |
| among_var = self.find_among_b(ItalianStemmer.a_6) |
| if among_var == 0: |
| return False |
| self.bra = self.cursor |
| if among_var == 1: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| elif among_var == 2: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_1 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"ic"): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| if not self.slice_del(): |
| return False |
| |
| except lab0: pass |
| elif among_var == 3: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_from(u"log"): |
| return False |
| elif among_var == 4: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_from(u"u"): |
| return False |
| elif among_var == 5: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_from(u"ente"): |
| return False |
| elif among_var == 6: |
| if not self.__r_RV(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| elif among_var == 7: |
| if not self.__r_R1(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_2 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| among_var = self.find_among_b(ItalianStemmer.a_4) |
| if among_var == 0: |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| if not self.slice_del(): |
| return False |
| |
| if among_var == 1: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"at"): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| if not self.slice_del(): |
| return False |
| |
| except lab1: pass |
| elif among_var == 8: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_3 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if self.find_among_b(ItalianStemmer.a_5) == 0: |
| self.cursor = self.limit - v_3 |
| raise lab2() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_3 |
| raise lab2() |
| if not self.slice_del(): |
| return False |
| |
| except lab2: pass |
| else: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_4 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"at"): |
| self.cursor = self.limit - v_4 |
| raise lab3() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_4 |
| raise lab3() |
| if not self.slice_del(): |
| return False |
| |
| self.ket = self.cursor |
| if not self.eq_s_b(u"ic"): |
| self.cursor = self.limit - v_4 |
| raise lab3() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_4 |
| raise lab3() |
| if not self.slice_del(): |
| return False |
| |
| except lab3: pass |
| return True |
| |
| def __r_verb_suffix(self): |
| if self.cursor < self.I_pV: |
| return False |
| v_2 = self.limit_backward |
| self.limit_backward = self.I_pV |
| self.ket = self.cursor |
| if self.find_among_b(ItalianStemmer.a_7) == 0: |
| self.limit_backward = v_2 |
| return False |
| self.bra = self.cursor |
| if not self.slice_del(): |
| return False |
| |
| self.limit_backward = v_2 |
| return True |
| |
| def __r_vowel_suffix(self): |
| v_1 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if not self.in_grouping_b(ItalianStemmer.g_AEIO, 97, 242): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| self.bra = self.cursor |
| if not self.__r_RV(): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| if not self.slice_del(): |
| return False |
| |
| self.ket = self.cursor |
| if not self.eq_s_b(u"i"): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| self.bra = self.cursor |
| if not self.__r_RV(): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| if not self.slice_del(): |
| return False |
| |
| except lab0: pass |
| v_2 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"h"): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| self.bra = self.cursor |
| if not self.in_grouping_b(ItalianStemmer.g_CG, 99, 103): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| if not self.__r_RV(): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| if not self.slice_del(): |
| return False |
| |
| except lab1: pass |
| return True |
| |
| def _stem(self): |
| v_1 = self.cursor |
| self.__r_prelude() |
| self.cursor = v_1 |
| self.__r_mark_regions() |
| self.limit_backward = self.cursor |
| self.cursor = self.limit |
| v_3 = self.limit - self.cursor |
| self.__r_attached_pronoun() |
| self.cursor = self.limit - v_3 |
| v_4 = self.limit - self.cursor |
| try: |
| try: |
| v_5 = self.limit - self.cursor |
| try: |
| if not self.__r_standard_suffix(): |
| raise lab2() |
| raise lab1() |
| except lab2: pass |
| self.cursor = self.limit - v_5 |
| if not self.__r_verb_suffix(): |
| raise lab0() |
| except lab1: pass |
| except lab0: pass |
| self.cursor = self.limit - v_4 |
| v_6 = self.limit - self.cursor |
| self.__r_vowel_suffix() |
| self.cursor = self.limit - v_6 |
| self.cursor = self.limit_backward |
| v_7 = self.cursor |
| self.__r_postlude() |
| self.cursor = v_7 |
| return True |
| |
| |
| class lab0(BaseException): pass |
| |
| |
| class lab1(BaseException): pass |
| |
| |
| class lab2(BaseException): pass |
| |
| |
| class lab3(BaseException): pass |
| |
| |
| class lab4(BaseException): pass |
| |
| |
| class lab5(BaseException): pass |
| |
| |
| class lab6(BaseException): pass |
| |
| |
| class lab7(BaseException): pass |