blob: 9e2d616a72128b14dccb10f2e1afb3a0490533e6 [file] [log] [blame]
/*
* Copyright 2015 Webindex authors (see AUTHORS)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package webindex.data.fluo;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.apache.accumulo.core.client.lexicoder.Lexicoder;
import org.apache.accumulo.core.client.lexicoder.ReverseLexicoder;
import org.apache.accumulo.core.client.lexicoder.ULongLexicoder;
import org.apache.commons.codec.binary.Hex;
import org.apache.fluo.api.data.Bytes;
import org.apache.fluo.api.data.Column;
import org.apache.fluo.api.data.RowColumn;
import org.apache.fluo.recipes.accumulo.export.DifferenceExport;
import webindex.core.Constants;
import webindex.core.models.URL;
import webindex.data.fluo.UriMap.UriInfo;
import webindex.data.util.FluoConstants;
public class UriCountExport extends DifferenceExport<String, UriInfo> {
public UriCountExport() {}
public UriCountExport(Optional<UriInfo> oldCount, Optional<UriInfo> newCount) {
super(oldCount, newCount);
}
@Override
protected Map<RowColumn, Bytes> generateData(String pageID, Optional<UriInfo> val) {
if (val.orElse(UriInfo.ZERO).equals(UriInfo.ZERO)) {
return Collections.emptyMap();
}
UriInfo uriInfo = val.get();
Map<RowColumn, Bytes> rcMap = new HashMap<>();
Bytes linksTo = Bytes.of("" + uriInfo.linksTo);
rcMap.put(new RowColumn(createTotalRow(pageID, uriInfo.linksTo), Column.EMPTY), linksTo);
String domain = URL.fromPageID(pageID).getReverseDomain();
String domainRow = encodeDomainRankPageId(domain, uriInfo.linksTo, pageID);
rcMap.put(new RowColumn(domainRow, new Column(Constants.RANK, "")), linksTo);
rcMap.put(new RowColumn("p:" + pageID, FluoConstants.PAGE_INCOUNT_COL), linksTo);
return rcMap;
}
public static String revEncodeLong(Long num) {
Lexicoder<Long> lexicoder = new ReverseLexicoder<>(new ULongLexicoder());
return Hex.encodeHexString(lexicoder.encode(num));
}
public static String encodeDomainRankPageId(String domain, long linksTo, String pageId) {
return "d:" + domain + ":" + revEncodeLong(linksTo) + ":" + pageId;
}
private static String createTotalRow(String uri, long curr) {
return "t:" + revEncodeLong(curr) + ":" + uri;
}
}