java-base/src/notebooks/qf_probability_sum.py - datasketches-characterization - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import numpy as np
 from scipy.stats import binom as binom_dist

 def qf_probability_sum(num_slots:int|np.int64, num_elements:int|np.int64, fingerprint_length:int|np.int64) -> float:
     """
         This sum is well approximated by alpha*2^(-f) for alpha = num_elements / num_slots
         :param num_slots: The number of slots in the filter
         :param num_elements: The number of elements to be inserted into the filter
         :param fingerprint_length: The length of the fingerprint used in the filter
         :return: The sum of probabilities
     """
     m = num_slots
     n = num_elements
     f = fingerprint_length
     assert isinstance(m, int) | isinstance(f, np.int64), "num_slots must be an integer"
     assert isinstance(n, int) | isinstance(f, np.int64), "num_elements must be an integer"
     assert isinstance(f, int) | isinstance(f, np.int64), "fingerprint_length must be an integer"
     dist = binom_dist(n, 1./m)
     probs = dist.pmf(np.arange(m+1))
     probs *= (1. - (1. - 2.**(-f))**np.arange(m+1))
     return np.sum(probs)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import numpy as np
	from scipy.stats import binom as binom_dist

	def qf_probability_sum(num_slots:int\|np.int64, num_elements:int\|np.int64, fingerprint_length:int\|np.int64) -> float:
	"""
	This sum is well approximated by alpha*2^(-f) for alpha = num_elements / num_slots
	:param num_slots: The number of slots in the filter
	:param num_elements: The number of elements to be inserted into the filter
	:param fingerprint_length: The length of the fingerprint used in the filter
	:return: The sum of probabilities
	"""
	m = num_slots
	n = num_elements
	f = fingerprint_length
	assert isinstance(m, int) \| isinstance(f, np.int64), "num_slots must be an integer"
	assert isinstance(n, int) \| isinstance(f, np.int64), "num_elements must be an integer"
	assert isinstance(f, int) \| isinstance(f, np.int64), "fingerprint_length must be an integer"
	dist = binom_dist(n, 1./m)
	probs = dist.pmf(np.arange(m+1))
	probs = (1. - (1. - 2.(-f))*np.arange(m+1))
	return np.sum(probs)