| # Generated by Snowball 2.2.0 - https://snowballstem.org/ |
| |
| from .basestemmer import BaseStemmer |
| from .among import Among |
| |
| |
| class PortugueseStemmer(BaseStemmer): |
| ''' |
| This class implements the stemming algorithm defined by a snowball script. |
| Generated by Snowball 2.2.0 - https://snowballstem.org/ |
| ''' |
| |
| a_0 = [ |
| Among(u"", -1, 3), |
| Among(u"\u00E3", 0, 1), |
| Among(u"\u00F5", 0, 2) |
| ] |
| |
| a_1 = [ |
| Among(u"", -1, 3), |
| Among(u"a~", 0, 1), |
| Among(u"o~", 0, 2) |
| ] |
| |
| a_2 = [ |
| Among(u"ic", -1, -1), |
| Among(u"ad", -1, -1), |
| Among(u"os", -1, -1), |
| Among(u"iv", -1, 1) |
| ] |
| |
| a_3 = [ |
| Among(u"ante", -1, 1), |
| Among(u"avel", -1, 1), |
| Among(u"\u00EDvel", -1, 1) |
| ] |
| |
| a_4 = [ |
| Among(u"ic", -1, 1), |
| Among(u"abil", -1, 1), |
| Among(u"iv", -1, 1) |
| ] |
| |
| a_5 = [ |
| Among(u"ica", -1, 1), |
| Among(u"\u00E2ncia", -1, 1), |
| Among(u"\u00EAncia", -1, 4), |
| Among(u"logia", -1, 2), |
| Among(u"ira", -1, 9), |
| Among(u"adora", -1, 1), |
| Among(u"osa", -1, 1), |
| Among(u"ista", -1, 1), |
| Among(u"iva", -1, 8), |
| Among(u"eza", -1, 1), |
| Among(u"idade", -1, 7), |
| Among(u"ante", -1, 1), |
| Among(u"mente", -1, 6), |
| Among(u"amente", 12, 5), |
| Among(u"\u00E1vel", -1, 1), |
| Among(u"\u00EDvel", -1, 1), |
| Among(u"ico", -1, 1), |
| Among(u"ismo", -1, 1), |
| Among(u"oso", -1, 1), |
| Among(u"amento", -1, 1), |
| Among(u"imento", -1, 1), |
| Among(u"ivo", -1, 8), |
| Among(u"a\u00E7a~o", -1, 1), |
| Among(u"u\u00E7a~o", -1, 3), |
| Among(u"ador", -1, 1), |
| Among(u"icas", -1, 1), |
| Among(u"\u00EAncias", -1, 4), |
| Among(u"logias", -1, 2), |
| Among(u"iras", -1, 9), |
| Among(u"adoras", -1, 1), |
| Among(u"osas", -1, 1), |
| Among(u"istas", -1, 1), |
| Among(u"ivas", -1, 8), |
| Among(u"ezas", -1, 1), |
| Among(u"idades", -1, 7), |
| Among(u"adores", -1, 1), |
| Among(u"antes", -1, 1), |
| Among(u"a\u00E7o~es", -1, 1), |
| Among(u"u\u00E7o~es", -1, 3), |
| Among(u"icos", -1, 1), |
| Among(u"ismos", -1, 1), |
| Among(u"osos", -1, 1), |
| Among(u"amentos", -1, 1), |
| Among(u"imentos", -1, 1), |
| Among(u"ivos", -1, 8) |
| ] |
| |
| a_6 = [ |
| Among(u"ada", -1, 1), |
| Among(u"ida", -1, 1), |
| Among(u"ia", -1, 1), |
| Among(u"aria", 2, 1), |
| Among(u"eria", 2, 1), |
| Among(u"iria", 2, 1), |
| Among(u"ara", -1, 1), |
| Among(u"era", -1, 1), |
| Among(u"ira", -1, 1), |
| Among(u"ava", -1, 1), |
| Among(u"asse", -1, 1), |
| Among(u"esse", -1, 1), |
| Among(u"isse", -1, 1), |
| Among(u"aste", -1, 1), |
| Among(u"este", -1, 1), |
| Among(u"iste", -1, 1), |
| Among(u"ei", -1, 1), |
| Among(u"arei", 16, 1), |
| Among(u"erei", 16, 1), |
| Among(u"irei", 16, 1), |
| Among(u"am", -1, 1), |
| Among(u"iam", 20, 1), |
| Among(u"ariam", 21, 1), |
| Among(u"eriam", 21, 1), |
| Among(u"iriam", 21, 1), |
| Among(u"aram", 20, 1), |
| Among(u"eram", 20, 1), |
| Among(u"iram", 20, 1), |
| Among(u"avam", 20, 1), |
| Among(u"em", -1, 1), |
| Among(u"arem", 29, 1), |
| Among(u"erem", 29, 1), |
| Among(u"irem", 29, 1), |
| Among(u"assem", 29, 1), |
| Among(u"essem", 29, 1), |
| Among(u"issem", 29, 1), |
| Among(u"ado", -1, 1), |
| Among(u"ido", -1, 1), |
| Among(u"ando", -1, 1), |
| Among(u"endo", -1, 1), |
| Among(u"indo", -1, 1), |
| Among(u"ara~o", -1, 1), |
| Among(u"era~o", -1, 1), |
| Among(u"ira~o", -1, 1), |
| Among(u"ar", -1, 1), |
| Among(u"er", -1, 1), |
| Among(u"ir", -1, 1), |
| Among(u"as", -1, 1), |
| Among(u"adas", 47, 1), |
| Among(u"idas", 47, 1), |
| Among(u"ias", 47, 1), |
| Among(u"arias", 50, 1), |
| Among(u"erias", 50, 1), |
| Among(u"irias", 50, 1), |
| Among(u"aras", 47, 1), |
| Among(u"eras", 47, 1), |
| Among(u"iras", 47, 1), |
| Among(u"avas", 47, 1), |
| Among(u"es", -1, 1), |
| Among(u"ardes", 58, 1), |
| Among(u"erdes", 58, 1), |
| Among(u"irdes", 58, 1), |
| Among(u"ares", 58, 1), |
| Among(u"eres", 58, 1), |
| Among(u"ires", 58, 1), |
| Among(u"asses", 58, 1), |
| Among(u"esses", 58, 1), |
| Among(u"isses", 58, 1), |
| Among(u"astes", 58, 1), |
| Among(u"estes", 58, 1), |
| Among(u"istes", 58, 1), |
| Among(u"is", -1, 1), |
| Among(u"ais", 71, 1), |
| Among(u"eis", 71, 1), |
| Among(u"areis", 73, 1), |
| Among(u"ereis", 73, 1), |
| Among(u"ireis", 73, 1), |
| Among(u"\u00E1reis", 73, 1), |
| Among(u"\u00E9reis", 73, 1), |
| Among(u"\u00EDreis", 73, 1), |
| Among(u"\u00E1sseis", 73, 1), |
| Among(u"\u00E9sseis", 73, 1), |
| Among(u"\u00EDsseis", 73, 1), |
| Among(u"\u00E1veis", 73, 1), |
| Among(u"\u00EDeis", 73, 1), |
| Among(u"ar\u00EDeis", 84, 1), |
| Among(u"er\u00EDeis", 84, 1), |
| Among(u"ir\u00EDeis", 84, 1), |
| Among(u"ados", -1, 1), |
| Among(u"idos", -1, 1), |
| Among(u"amos", -1, 1), |
| Among(u"\u00E1ramos", 90, 1), |
| Among(u"\u00E9ramos", 90, 1), |
| Among(u"\u00EDramos", 90, 1), |
| Among(u"\u00E1vamos", 90, 1), |
| Among(u"\u00EDamos", 90, 1), |
| Among(u"ar\u00EDamos", 95, 1), |
| Among(u"er\u00EDamos", 95, 1), |
| Among(u"ir\u00EDamos", 95, 1), |
| Among(u"emos", -1, 1), |
| Among(u"aremos", 99, 1), |
| Among(u"eremos", 99, 1), |
| Among(u"iremos", 99, 1), |
| Among(u"\u00E1ssemos", 99, 1), |
| Among(u"\u00EAssemos", 99, 1), |
| Among(u"\u00EDssemos", 99, 1), |
| Among(u"imos", -1, 1), |
| Among(u"armos", -1, 1), |
| Among(u"ermos", -1, 1), |
| Among(u"irmos", -1, 1), |
| Among(u"\u00E1mos", -1, 1), |
| Among(u"ar\u00E1s", -1, 1), |
| Among(u"er\u00E1s", -1, 1), |
| Among(u"ir\u00E1s", -1, 1), |
| Among(u"eu", -1, 1), |
| Among(u"iu", -1, 1), |
| Among(u"ou", -1, 1), |
| Among(u"ar\u00E1", -1, 1), |
| Among(u"er\u00E1", -1, 1), |
| Among(u"ir\u00E1", -1, 1) |
| ] |
| |
| a_7 = [ |
| Among(u"a", -1, 1), |
| Among(u"i", -1, 1), |
| Among(u"o", -1, 1), |
| Among(u"os", -1, 1), |
| Among(u"\u00E1", -1, 1), |
| Among(u"\u00ED", -1, 1), |
| Among(u"\u00F3", -1, 1) |
| ] |
| |
| a_8 = [ |
| Among(u"e", -1, 1), |
| Among(u"\u00E7", -1, 2), |
| Among(u"\u00E9", -1, 1), |
| Among(u"\u00EA", -1, 1) |
| ] |
| |
| g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2] |
| |
| I_p2 = 0 |
| I_p1 = 0 |
| I_pV = 0 |
| |
| def __r_prelude(self): |
| while True: |
| v_1 = self.cursor |
| try: |
| self.bra = self.cursor |
| among_var = self.find_among(PortugueseStemmer.a_0) |
| if among_var == 0: |
| raise lab0() |
| self.ket = self.cursor |
| if among_var == 1: |
| if not self.slice_from(u"a~"): |
| return False |
| elif among_var == 2: |
| if not self.slice_from(u"o~"): |
| return False |
| else: |
| if self.cursor >= self.limit: |
| raise lab0() |
| self.cursor += 1 |
| continue |
| except lab0: pass |
| self.cursor = v_1 |
| break |
| return True |
| |
| def __r_mark_regions(self): |
| self.I_pV = self.limit |
| self.I_p1 = self.limit |
| self.I_p2 = self.limit |
| v_1 = self.cursor |
| try: |
| try: |
| v_2 = self.cursor |
| try: |
| if not self.in_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab2() |
| try: |
| v_3 = self.cursor |
| try: |
| if not self.out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab4() |
| if not self.go_out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab4() |
| self.cursor += 1 |
| raise lab3() |
| except lab4: pass |
| self.cursor = v_3 |
| if not self.in_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab2() |
| if not self.go_in_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab2() |
| self.cursor += 1 |
| except lab3: pass |
| raise lab1() |
| except lab2: pass |
| self.cursor = v_2 |
| if not self.out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab0() |
| try: |
| v_4 = self.cursor |
| try: |
| if not self.out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab6() |
| if not self.go_out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab6() |
| self.cursor += 1 |
| raise lab5() |
| except lab6: pass |
| self.cursor = v_4 |
| if not self.in_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab0() |
| if self.cursor >= self.limit: |
| raise lab0() |
| self.cursor += 1 |
| except lab5: pass |
| except lab1: pass |
| self.I_pV = self.cursor |
| except lab0: pass |
| self.cursor = v_1 |
| v_5 = self.cursor |
| try: |
| if not self.go_out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab7() |
| self.cursor += 1 |
| if not self.go_in_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab7() |
| self.cursor += 1 |
| self.I_p1 = self.cursor |
| if not self.go_out_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab7() |
| self.cursor += 1 |
| if not self.go_in_grouping(PortugueseStemmer.g_v, 97, 250): |
| raise lab7() |
| self.cursor += 1 |
| self.I_p2 = self.cursor |
| except lab7: pass |
| self.cursor = v_5 |
| return True |
| |
| def __r_postlude(self): |
| while True: |
| v_1 = self.cursor |
| try: |
| self.bra = self.cursor |
| among_var = self.find_among(PortugueseStemmer.a_1) |
| if among_var == 0: |
| raise lab0() |
| self.ket = self.cursor |
| if among_var == 1: |
| if not self.slice_from(u"\u00E3"): |
| return False |
| elif among_var == 2: |
| if not self.slice_from(u"\u00F5"): |
| return False |
| else: |
| if self.cursor >= self.limit: |
| raise lab0() |
| self.cursor += 1 |
| continue |
| except lab0: pass |
| self.cursor = v_1 |
| break |
| return True |
| |
| def __r_RV(self): |
| if not self.I_pV <= self.cursor: |
| return False |
| return True |
| |
| def __r_R1(self): |
| if not self.I_p1 <= self.cursor: |
| return False |
| return True |
| |
| def __r_R2(self): |
| if not self.I_p2 <= self.cursor: |
| return False |
| return True |
| |
| def __r_standard_suffix(self): |
| self.ket = self.cursor |
| among_var = self.find_among_b(PortugueseStemmer.a_5) |
| if among_var == 0: |
| return False |
| self.bra = self.cursor |
| if among_var == 1: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| elif among_var == 2: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_from(u"log"): |
| return False |
| elif among_var == 3: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_from(u"u"): |
| return False |
| elif among_var == 4: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_from(u"ente"): |
| return False |
| elif among_var == 5: |
| if not self.__r_R1(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_1 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| among_var = self.find_among_b(PortugueseStemmer.a_2) |
| if among_var == 0: |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| if not self.slice_del(): |
| return False |
| |
| if among_var == 1: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"at"): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_1 |
| raise lab0() |
| if not self.slice_del(): |
| return False |
| |
| except lab0: pass |
| elif among_var == 6: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_2 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if self.find_among_b(PortugueseStemmer.a_3) == 0: |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_2 |
| raise lab1() |
| if not self.slice_del(): |
| return False |
| |
| except lab1: pass |
| elif among_var == 7: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_3 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if self.find_among_b(PortugueseStemmer.a_4) == 0: |
| self.cursor = self.limit - v_3 |
| raise lab2() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_3 |
| raise lab2() |
| if not self.slice_del(): |
| return False |
| |
| except lab2: pass |
| elif among_var == 8: |
| if not self.__r_R2(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| v_4 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"at"): |
| self.cursor = self.limit - v_4 |
| raise lab3() |
| self.bra = self.cursor |
| if not self.__r_R2(): |
| self.cursor = self.limit - v_4 |
| raise lab3() |
| if not self.slice_del(): |
| return False |
| |
| except lab3: pass |
| else: |
| if not self.__r_RV(): |
| return False |
| if not self.eq_s_b(u"e"): |
| return False |
| if not self.slice_from(u"ir"): |
| return False |
| return True |
| |
| def __r_verb_suffix(self): |
| if self.cursor < self.I_pV: |
| return False |
| v_2 = self.limit_backward |
| self.limit_backward = self.I_pV |
| self.ket = self.cursor |
| if self.find_among_b(PortugueseStemmer.a_6) == 0: |
| self.limit_backward = v_2 |
| return False |
| self.bra = self.cursor |
| if not self.slice_del(): |
| return False |
| |
| self.limit_backward = v_2 |
| return True |
| |
| def __r_residual_suffix(self): |
| self.ket = self.cursor |
| if self.find_among_b(PortugueseStemmer.a_7) == 0: |
| return False |
| self.bra = self.cursor |
| if not self.__r_RV(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| return True |
| |
| def __r_residual_form(self): |
| self.ket = self.cursor |
| among_var = self.find_among_b(PortugueseStemmer.a_8) |
| if among_var == 0: |
| return False |
| self.bra = self.cursor |
| if among_var == 1: |
| if not self.__r_RV(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| self.ket = self.cursor |
| try: |
| v_1 = self.limit - self.cursor |
| try: |
| if not self.eq_s_b(u"u"): |
| raise lab1() |
| self.bra = self.cursor |
| v_2 = self.limit - self.cursor |
| if not self.eq_s_b(u"g"): |
| raise lab1() |
| self.cursor = self.limit - v_2 |
| raise lab0() |
| except lab1: pass |
| self.cursor = self.limit - v_1 |
| if not self.eq_s_b(u"i"): |
| return False |
| self.bra = self.cursor |
| v_3 = self.limit - self.cursor |
| if not self.eq_s_b(u"c"): |
| return False |
| self.cursor = self.limit - v_3 |
| except lab0: pass |
| if not self.__r_RV(): |
| return False |
| if not self.slice_del(): |
| return False |
| |
| else: |
| if not self.slice_from(u"c"): |
| return False |
| return True |
| |
| def _stem(self): |
| v_1 = self.cursor |
| self.__r_prelude() |
| self.cursor = v_1 |
| self.__r_mark_regions() |
| self.limit_backward = self.cursor |
| self.cursor = self.limit |
| v_3 = self.limit - self.cursor |
| try: |
| try: |
| v_4 = self.limit - self.cursor |
| try: |
| v_5 = self.limit - self.cursor |
| try: |
| v_6 = self.limit - self.cursor |
| try: |
| if not self.__r_standard_suffix(): |
| raise lab4() |
| raise lab3() |
| except lab4: pass |
| self.cursor = self.limit - v_6 |
| if not self.__r_verb_suffix(): |
| raise lab2() |
| except lab3: pass |
| self.cursor = self.limit - v_5 |
| v_7 = self.limit - self.cursor |
| try: |
| self.ket = self.cursor |
| if not self.eq_s_b(u"i"): |
| raise lab5() |
| self.bra = self.cursor |
| v_8 = self.limit - self.cursor |
| if not self.eq_s_b(u"c"): |
| raise lab5() |
| self.cursor = self.limit - v_8 |
| if not self.__r_RV(): |
| raise lab5() |
| if not self.slice_del(): |
| return False |
| |
| except lab5: pass |
| self.cursor = self.limit - v_7 |
| raise lab1() |
| except lab2: pass |
| self.cursor = self.limit - v_4 |
| if not self.__r_residual_suffix(): |
| raise lab0() |
| except lab1: pass |
| except lab0: pass |
| self.cursor = self.limit - v_3 |
| v_9 = self.limit - self.cursor |
| self.__r_residual_form() |
| self.cursor = self.limit - v_9 |
| self.cursor = self.limit_backward |
| v_10 = self.cursor |
| self.__r_postlude() |
| self.cursor = v_10 |
| return True |
| |
| |
| class lab0(BaseException): pass |
| |
| |
| class lab1(BaseException): pass |
| |
| |
| class lab2(BaseException): pass |
| |
| |
| class lab3(BaseException): pass |
| |
| |
| class lab4(BaseException): pass |
| |
| |
| class lab5(BaseException): pass |
| |
| |
| class lab6(BaseException): pass |
| |
| |
| class lab7(BaseException): pass |