blob: 6a558f72837d35b5d9809be5618c13dbd6009f9c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System.Threading;
#if !NET35
using System.Threading.Tasks;
#endif
/*
Suppose, we want a faceted search on fields f1 f2 f3,
and their values in index are
f1 f2 f3
-- -- --
doc1 A I 1
doc2 A I 2
doc3 A I 3
doc4 A J 1
doc5 A J 2
doc6 A J 3
doc7 B I 1
Algorithm:
1- Find all possible values for f1 which are (A,B) , for f2 which are (I,J) and for f3 which are (1,2,3)
2- Find Cartesian Product of (A,B)X(I,J)X(1,2,3). (12 possible groups)
3- Eliminate the ones that surely result in 0 hits. (for ex, B J 2. since they have no doc. in common)
*/
/*
TODO: Support for pre-built queries defining groups can be added
*/
namespace Lucene.Net.Search
{
public partial class SimpleFacetedSearch : IDisposable
{
public const int DefaultMaxDocPerGroup = 25;
public static int MAX_FACETS = 2048;
IndexReader _Reader;
List<KeyValuePair<List<string>, OpenBitSetDISI>> _Groups = new List<KeyValuePair<List<string>, OpenBitSetDISI>>();
public SimpleFacetedSearch(IndexReader reader, string groupByField) : this(reader, new string[] { groupByField })
{
}
public SimpleFacetedSearch(IndexReader reader, string[] groupByFields)
{
this._Reader = reader;
List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>();
//STEP 1
//f1 = A, B
//f2 = I, J
//f3 = 1, 2, 3
int maxFacets = 1;
IList<IList<string>> inputToCP = new List<IList<string>>();
foreach (string field in groupByFields)
{
FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
maxFacets *= f.FieldValueBitSetPair.Count;
if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
fieldValuesBitSets.Add(f);
inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
}
//STEP 2
// comb1: A I 1
// comb2: A I 2 etc.
var cp = inputToCP.CartesianProduct();
//SETP 3
//create a single BitSet for each combination
//BitSet1: A AND I AND 1
//BitSet2: A AND I AND 2 etc.
//and remove impossible comb's (for ex, B J 3) from list.
#if !NET35
Parallel.ForEach(cp, combinations =>
#else
foreach(var combinations in cp)
#endif
{
OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc);
bitSet.Set(0, bitSet.Size());
List<string> comb = combinations.ToList();
for (int j = 0; j < comb.Count; j++)
{
bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]);
}
//STEP 3
if (bitSet.Cardinality() > 0)
{
lock(_Groups)
_Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet));
}
}
#if !NET35
);
#endif
//Now _Groups has 7 rows (as <List<string>, BitSet> pairs)
}
public Hits Search(Query query)
{
return Search(query, DefaultMaxDocPerGroup);
}
public Hits Search(Query query, int maxDocPerGroup)
{
var hitsPerGroup = new List<HitsPerFacet>();
DocIdSet queryDocidSet = new CachingWrapperFilter(new QueryWrapperFilter(query)).GetDocIdSet(_Reader);
var actions = new Action[_Groups.Count];
for (int i = 0; i < _Groups.Count; i++)
{
var h = new HitsPerFacet(new FacetName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup);
hitsPerGroup.Add(h);
actions[i] = h.Calculate;
}
#if !NET35
Parallel.Invoke(actions);
#else
foreach (var action in actions)
action();
#endif
Hits hits = new Hits {HitsPerFacet = hitsPerGroup.ToArray()};
return hits;
}
public void Dispose()
{
}
}
}