plugins/cmssite/template/docbook/webhelp/template/content/search/stemmers/en_stemmer.js - ofbiz - Git at Google

 // Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
 // paper, in
 //
 //  Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
 //  no. 3, pp 130-137,
 //
 // see also http://www.tartarus.org/~martin/PorterStemmer

 // Release 1
 // Derived from (http://tartarus.org/~martin/PorterStemmer/js.txt) - cjm (iizuu) Aug 24, 2009

 var stemmer = (function(){
 	var step2list = {
 			"ational" : "ate",
 			"tional" : "tion",
 			"enci" : "ence",
 			"anci" : "ance",
 			"izer" : "ize",
 			"bli" : "ble",
 			"alli" : "al",
 			"entli" : "ent",
 			"eli" : "e",
 			"ousli" : "ous",
 			"ization" : "ize",
 			"ation" : "ate",
 			"ator" : "ate",
 			"alism" : "al",
 			"iveness" : "ive",
 			"fulness" : "ful",
 			"ousness" : "ous",
 			"aliti" : "al",
 			"iviti" : "ive",
 			"biliti" : "ble",
 			"logi" : "log"
 		},

 		step3list = {
 			"icate" : "ic",
 			"ative" : "",
 			"alize" : "al",
 			"iciti" : "ic",
 			"ical" : "ic",
 			"ful" : "",
 			"ness" : ""
 		},

 		c = "[^aeiou]",          // consonant
 		v = "[aeiouy]",          // vowel
 		C = c + "[^aeiouy]*",    // consonant sequence
 		V = v + "[aeiou]*",      // vowel sequence

 		mgr0 = "^(" + C + ")?" + V + C,               // [C]VC... is m>0
 		meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$",  // [C]VC[V] is m=1
 		mgr1 = "^(" + C + ")?" + V + C + V + C,       // [C]VCVC... is m>1
 		s_v = "^(" + C + ")?" + v;                   // vowel in stem

 	return function (w) {
 		var 	stem,
 			suffix,
 			firstch,
 			re,
 			re2,
 			re3,
 			re4,
 			origword = w;

 		if (w.length < 3) { return w; }

 		firstch = w.substr(0,1);
 		if (firstch == "y") {
 			w = firstch.toUpperCase() + w.substr(1);
 		}

 		// Step 1a
 		re = /^(.+?)(ss|i)es$/;
 		re2 = /^(.+?)([^s])s$/;

 		if (re.test(w)) { w = w.replace(re,"$1$2"); }
 		else if (re2.test(w)) {	w = w.replace(re2,"$1$2"); }

 		// Step 1b
 		re = /^(.+?)eed$/;
 		re2 = /^(.+?)(ed|ing)$/;
 		if (re.test(w)) {
 			var fp = re.exec(w);
 			re = new RegExp(mgr0);
 			if (re.test(fp[1])) {
 				re = /.$/;
 				w = w.replace(re,"");
 			}
 		} else if (re2.test(w)) {
 			var fp = re2.exec(w);
 			stem = fp[1];
 			re2 = new RegExp(s_v);
 			if (re2.test(stem)) {
 				w = stem;
 				re2 = /(at|bl|iz)$/;
 				re3 = new RegExp("([^aeiouylsz])\\1$");
 				re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
 				if (re2.test(w)) { w = w + "e"; }
 				else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
 				else if (re4.test(w)) { w = w + "e"; }
 			}
 		}

 		// Step 1c
 	        re = new RegExp("^(.+" + c + ")y$");
 		    if (re.test(w)) {
 			var fp = re.exec(w);
 			stem = fp[1];
 		    w = stem + "i";
 		}

 		// Step 2
 		re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
 		if (re.test(w)) {
 			var fp = re.exec(w);
 			stem = fp[1];
 			suffix = fp[2];
 			re = new RegExp(mgr0);
 			if (re.test(stem)) {
 				w = stem + step2list[suffix];
 			}
 		}

 		// Step 3
 		re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
 		if (re.test(w)) {
 			var fp = re.exec(w);
 			stem = fp[1];
 			suffix = fp[2];
 			re = new RegExp(mgr0);
 			if (re.test(stem)) {
 				w = stem + step3list[suffix];
 			}
 		}

 		// Step 4
 		re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
 		re2 = /^(.+?)(s|t)(ion)$/;
 		if (re.test(w)) {
 			var fp = re.exec(w);
 			stem = fp[1];
 			re = new RegExp(mgr1);
 			if (re.test(stem)) {
 				w = stem;
 			}
 		} else if (re2.test(w)) {
 			var fp = re2.exec(w);
 			stem = fp[1] + fp[2];
 			re2 = new RegExp(mgr1);
 			if (re2.test(stem)) {
 				w = stem;
 			}
 		}

 		// Step 5
 		re = /^(.+?)e$/;
 		if (re.test(w)) {
 			var fp = re.exec(w);
 			stem = fp[1];
 			re = new RegExp(mgr1);
 			re2 = new RegExp(meq1);
 			re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
 			if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
 				w = stem;
 			}
 		}

 		re = /ll$/;
 		re2 = new RegExp(mgr1);
 		if (re.test(w) && re2.test(w)) {
 			re = /.$/;
 			w = w.replace(re,"");
 		}

 		// and turn initial Y back to y

 		if (firstch == "y") {
 			w = firstch.toLowerCase() + w.substr(1);
 		}

 	    // See http://snowball.tartarus.org/algorithms/english/stemmer.html
 	    // "Exceptional forms in general"
 	    var specialWords = {
 	    	"skis" : "ski",
 	    	"skies" : "sky",
 	    	"dying" : "die",
 	    	"lying" : "lie",
 	    	"tying" : "tie",
 	    	"idly" : "idl",
 	    	"gently" : "gentl",
 	    	"ugly" : "ugli",
 	    	"early": "earli",
 	    	"only": "onli",
 	    	"singly": "singl"
 	    };

 	    if(specialWords[origword]){
 	    	w = specialWords[origword];
 	    }

 	    if( "sky news howe atlas cosmos bias \
 	    	 andes inning outing canning herring \
 	    	 earring proceed exceed succeed".indexOf(origword) !== -1 ){
 	    	w = origword;
 	    }

 	    // Address words overstemmed as gener-
 	    re = /.*generate?s?d?(ing)?$/;
 	    if( re.test(origword) ){
 		w = w + 'at';
 	    }
 	    re = /.*general(ly)?$/;
 	    if( re.test(origword) ){
 		w = w + 'al';
 	    }
 	    re = /.*generic(ally)?$/;
 	    if( re.test(origword) ){
 		w = w + 'ic';
 	    }
 	    re = /.*generous(ly)?$/;
 	    if( re.test(origword) ){
 		w = w + 'ous';
 	    }
 	    // Address words overstemmed as commun-
 	    re = /.*communit(ies)?y?/;
 	    if( re.test(origword) ){
 		w = w + 'iti';
 	    }

 	    return w;
 	}
 })();
	// Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
	// paper, in
	//
	// Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
	// no. 3, pp 130-137,
	//
	// see also http://www.tartarus.org/~martin/PorterStemmer

	// Release 1
	// Derived from (http://tartarus.org/~martin/PorterStemmer/js.txt) - cjm (iizuu) Aug 24, 2009

	var stemmer = (function(){
	var step2list = {
	"ational" : "ate",
	"tional" : "tion",
	"enci" : "ence",
	"anci" : "ance",
	"izer" : "ize",
	"bli" : "ble",
	"alli" : "al",
	"entli" : "ent",
	"eli" : "e",
	"ousli" : "ous",
	"ization" : "ize",
	"ation" : "ate",
	"ator" : "ate",
	"alism" : "al",
	"iveness" : "ive",
	"fulness" : "ful",
	"ousness" : "ous",
	"aliti" : "al",
	"iviti" : "ive",
	"biliti" : "ble",
	"logi" : "log"
	},

	step3list = {
	"icate" : "ic",
	"ative" : "",
	"alize" : "al",
	"iciti" : "ic",
	"ical" : "ic",
	"ful" : "",
	"ness" : ""
	},

	c = "[^aeiou]", // consonant
	v = "[aeiouy]", // vowel
	C = c + "[^aeiouy]*", // consonant sequence
	V = v + "[aeiou]*", // vowel sequence

	mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
	meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
	mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
	s_v = "^(" + C + ")?" + v; // vowel in stem

	return function (w) {
	var stem,
	suffix,
	firstch,
	re,
	re2,
	re3,
	re4,
	origword = w;

	if (w.length < 3) { return w; }

	firstch = w.substr(0,1);
	if (firstch == "y") {
	w = firstch.toUpperCase() + w.substr(1);
	}

	// Step 1a
	re = /^(.+?)(ss\|i)es$/;
	re2 = /^(.+?)([^s])s$/;

	if (re.test(w)) { w = w.replace(re,"$1$2"); }
	else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }

	// Step 1b
	re = /^(.+?)eed$/;
	re2 = /^(.+?)(ed\|ing)$/;
	if (re.test(w)) {
	var fp = re.exec(w);
	re = new RegExp(mgr0);
	if (re.test(fp[1])) {
	re = /.$/;
	w = w.replace(re,"");
	}
	} else if (re2.test(w)) {
	var fp = re2.exec(w);
	stem = fp[1];
	re2 = new RegExp(s_v);
	if (re2.test(stem)) {
	w = stem;
	re2 = /(at\|bl\|iz)$/;
	re3 = new RegExp("([^aeiouylsz])\\1$");
	re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
	if (re2.test(w)) { w = w + "e"; }
	else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
	else if (re4.test(w)) { w = w + "e"; }
	}
	}

	// Step 1c
	re = new RegExp("^(.+" + c + ")y$");
	if (re.test(w)) {
	var fp = re.exec(w);
	stem = fp[1];
	w = stem + "i";
	}

	// Step 2
	re = /^(.+?)(ational\|tional\|enci\|anci\|izer\|bli\|alli\|entli\|eli\|ousli\|ization\|ation\|ator\|alism\|iveness\|fulness\|ousness\|aliti\|iviti\|biliti\|logi)$/;
	if (re.test(w)) {
	var fp = re.exec(w);
	stem = fp[1];
	suffix = fp[2];
	re = new RegExp(mgr0);
	if (re.test(stem)) {
	w = stem + step2list[suffix];
	}
	}

	// Step 3
	re = /^(.+?)(icate\|ative\|alize\|iciti\|ical\|ful\|ness)$/;
	if (re.test(w)) {
	var fp = re.exec(w);
	stem = fp[1];
	suffix = fp[2];
	re = new RegExp(mgr0);
	if (re.test(stem)) {
	w = stem + step3list[suffix];
	}
	}

	// Step 4
	re = /^(.+?)(al\|ance\|ence\|er\|ic\|able\|ible\|ant\|ement\|ment\|ent\|ou\|ism\|ate\|iti\|ous\|ive\|ize)$/;
	re2 = /^(.+?)(s\|t)(ion)$/;
	if (re.test(w)) {
	var fp = re.exec(w);
	stem = fp[1];
	re = new RegExp(mgr1);
	if (re.test(stem)) {
	w = stem;
	}
	} else if (re2.test(w)) {
	var fp = re2.exec(w);
	stem = fp[1] + fp[2];
	re2 = new RegExp(mgr1);
	if (re2.test(stem)) {
	w = stem;
	}
	}

	// Step 5
	re = /^(.+?)e$/;
	if (re.test(w)) {
	var fp = re.exec(w);
	stem = fp[1];
	re = new RegExp(mgr1);
	re2 = new RegExp(meq1);
	re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
	if (re.test(stem) \|\| (re2.test(stem) && !(re3.test(stem)))) {
	w = stem;
	}
	}

	re = /ll$/;
	re2 = new RegExp(mgr1);
	if (re.test(w) && re2.test(w)) {
	re = /.$/;
	w = w.replace(re,"");
	}

	// and turn initial Y back to y

	if (firstch == "y") {
	w = firstch.toLowerCase() + w.substr(1);
	}

	// See http://snowball.tartarus.org/algorithms/english/stemmer.html
	// "Exceptional forms in general"
	var specialWords = {
	"skis" : "ski",
	"skies" : "sky",
	"dying" : "die",
	"lying" : "lie",
	"tying" : "tie",
	"idly" : "idl",
	"gently" : "gentl",
	"ugly" : "ugli",
	"early": "earli",
	"only": "onli",
	"singly": "singl"
	};

	if(specialWords[origword]){
	w = specialWords[origword];
	}

	if( "sky news howe atlas cosmos bias \
	andes inning outing canning herring \
	earring proceed exceed succeed".indexOf(origword) !== -1 ){
	w = origword;
	}

	// Address words overstemmed as gener-
	re = /.*generate?s?d?(ing)?$/;
	if( re.test(origword) ){
	w = w + 'at';
	}
	re = /.*general(ly)?$/;
	if( re.test(origword) ){
	w = w + 'al';
	}
	re = /.*generic(ally)?$/;
	if( re.test(origword) ){
	w = w + 'ic';
	}
	re = /.*generous(ly)?$/;
	if( re.test(origword) ){
	w = w + 'ous';
	}
	// Address words overstemmed as commun-
	re = /.*communit(ies)?y?/;
	if( re.test(origword) ){
	w = w + 'iti';
	}

	return w;
	}
	})();