trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/LMSEstimator.java - jackrabbit-oak - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.jackrabbit.oak.plugins.index.solr.query;

 import java.util.Arrays;

 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.solr.common.SolrDocumentList;

 /**
  * A very simple estimator for no. of entries in the index using least mean square update method for linear regression.
  */
 class LMSEstimator {

     private static final double DEFAULT_ALPHA = 0.03;
     private static final int DEFAULT_THRESHOLD = 5;

     private double[] weights;
     private final double alpha;
     private final long threshold;

     LMSEstimator(double alpha, double[] weights, long threshold) {
         this.alpha = alpha;
         this.weights = weights;
         this.threshold = threshold;
     }

     LMSEstimator(double[] weights) {
         this(DEFAULT_ALPHA, weights, DEFAULT_THRESHOLD);
     }

     LMSEstimator() {
         this(DEFAULT_ALPHA, new double[5], 5);
     }

     synchronized void update(Filter filter, SolrDocumentList docs) {
         double[] updatedWeights = new double[weights.length];

         // least mean square cost
         long estimate = estimate(filter);
         long numFound = docs.getNumFound();
         long residual = numFound - estimate;
         double delta = Math.pow(residual, 2);

         if (Math.abs(delta) > threshold) {
             for (int i = 0; i < updatedWeights.length; i++) {
                 updatedWeights[i] = weights[i] + alpha * residual * getInput(filter, i);
             }
             // weights updated
             weights = Arrays.copyOf(updatedWeights, 5);
         }
     }

     long estimate(Filter filter) {
         long estimatedEntryCount = 0;
         for (int i = 0; i < 5; i++) {
             estimatedEntryCount += weights[i] * getInput(filter, i);
         }
         if (Double.isInfinite(estimatedEntryCount) || Double.isNaN(estimatedEntryCount)) {
             // prevent over / under flow
             estimatedEntryCount = 1;
             weights = new double[5];
         }
         return Math.max(0, estimatedEntryCount);
     }

     /**
      * Get the input value for a certain feature (by index) in the given filter.
      * <p/>
      * A filter is represented as a vector in R^5 where
      * i_0 : no. of property restrictions
      * i_1 : 1 if any native constraint exists in the filter, 0 otherwise
      * i_2 : the path restriction ordinal
      * i_3 : the depth of the path restriction if set, 0 otherwise
      * i_4 : the precedence of the dominant full text constraint if present, 0 otherwise
      *
      * @param filter the filter
      * @param i      the index of the filter vector feature to retrieve
      * @return the feature value
      */
     private long getInput(Filter filter, int i) {
         assert i < 5;
         if (i == 0) {
             return filter.getPropertyRestrictions() != null ? filter.getPropertyRestrictions().size() : 0;
         } else if (i == 1) {
             return filter.containsNativeConstraint() ? 1 : 0;
         } else if (i == 2) {
             return filter.getPathRestriction() != null ? filter.getPathRestriction().ordinal() : 0;
         } else if (i == 3) {
             return filter.getPathRestriction() != null ? filter.getPathRestriction().toString().split("/").length : 0;
         } else if (i == 4) {
             return filter.getFullTextConstraint() != null ? filter.getFullTextConstraint().getPrecedence() : 0;
         }
         return 0;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.jackrabbit.oak.plugins.index.solr.query;

	import java.util.Arrays;

	import org.apache.jackrabbit.oak.spi.query.Filter;
	import org.apache.solr.common.SolrDocumentList;

	/**
	* A very simple estimator for no. of entries in the index using least mean square update method for linear regression.
	*/
	class LMSEstimator {

	private static final double DEFAULT_ALPHA = 0.03;
	private static final int DEFAULT_THRESHOLD = 5;

	private double[] weights;
	private final double alpha;
	private final long threshold;

	LMSEstimator(double alpha, double[] weights, long threshold) {
	this.alpha = alpha;
	this.weights = weights;
	this.threshold = threshold;
	}

	LMSEstimator(double[] weights) {
	this(DEFAULT_ALPHA, weights, DEFAULT_THRESHOLD);
	}

	LMSEstimator() {
	this(DEFAULT_ALPHA, new double[5], 5);
	}

	synchronized void update(Filter filter, SolrDocumentList docs) {
	double[] updatedWeights = new double[weights.length];

	// least mean square cost
	long estimate = estimate(filter);
	long numFound = docs.getNumFound();
	long residual = numFound - estimate;
	double delta = Math.pow(residual, 2);

	if (Math.abs(delta) > threshold) {
	for (int i = 0; i < updatedWeights.length; i++) {
	updatedWeights[i] = weights[i] + alpha * residual * getInput(filter, i);
	}
	// weights updated
	weights = Arrays.copyOf(updatedWeights, 5);
	}
	}

	long estimate(Filter filter) {
	long estimatedEntryCount = 0;
	for (int i = 0; i < 5; i++) {
	estimatedEntryCount += weights[i] * getInput(filter, i);
	}
	if (Double.isInfinite(estimatedEntryCount) \|\| Double.isNaN(estimatedEntryCount)) {
	// prevent over / under flow
	estimatedEntryCount = 1;
	weights = new double[5];
	}
	return Math.max(0, estimatedEntryCount);
	}

	/**
	* Get the input value for a certain feature (by index) in the given filter.
	* <p/>
	* A filter is represented as a vector in R^5 where
	* i_0 : no. of property restrictions
	* i_1 : 1 if any native constraint exists in the filter, 0 otherwise
	* i_2 : the path restriction ordinal
	* i_3 : the depth of the path restriction if set, 0 otherwise
	* i_4 : the precedence of the dominant full text constraint if present, 0 otherwise
	*
	* @param filter the filter
	* @param i the index of the filter vector feature to retrieve
	* @return the feature value
	*/
	private long getInput(Filter filter, int i) {
	assert i < 5;
	if (i == 0) {
	return filter.getPropertyRestrictions() != null ? filter.getPropertyRestrictions().size() : 0;
	} else if (i == 1) {
	return filter.containsNativeConstraint() ? 1 : 0;
	} else if (i == 2) {
	return filter.getPathRestriction() != null ? filter.getPathRestriction().ordinal() : 0;
	} else if (i == 3) {
	return filter.getPathRestriction() != null ? filter.getPathRestriction().toString().split("/").length : 0;
	} else if (i == 4) {
	return filter.getFullTextConstraint() != null ? filter.getFullTextConstraint().getPrecedence() : 0;
	}
	return 0;
	}
	}