| /* |
| * Copyright 2015 Webindex authors (see AUTHORS) |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| * in compliance with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software distributed under the License |
| * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
| * or implied. See the License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| |
| package webindex.data.fluo; |
| |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.Optional; |
| |
| import org.apache.accumulo.core.client.lexicoder.Lexicoder; |
| import org.apache.accumulo.core.client.lexicoder.ReverseLexicoder; |
| import org.apache.accumulo.core.client.lexicoder.ULongLexicoder; |
| import org.apache.commons.codec.binary.Hex; |
| import org.apache.fluo.api.data.Bytes; |
| import org.apache.fluo.api.data.Column; |
| import org.apache.fluo.api.data.RowColumn; |
| import org.apache.fluo.recipes.accumulo.export.DifferenceExport; |
| import webindex.core.Constants; |
| import webindex.core.models.URL; |
| import webindex.data.fluo.UriMap.UriInfo; |
| import webindex.data.util.FluoConstants; |
| |
| public class UriCountExport extends DifferenceExport<String, UriInfo> { |
| |
| public UriCountExport() {} |
| |
| public UriCountExport(Optional<UriInfo> oldCount, Optional<UriInfo> newCount) { |
| super(oldCount, newCount); |
| } |
| |
| @Override |
| protected Map<RowColumn, Bytes> generateData(String pageID, Optional<UriInfo> val) { |
| if (val.orElse(UriInfo.ZERO).equals(UriInfo.ZERO)) { |
| return Collections.emptyMap(); |
| } |
| |
| UriInfo uriInfo = val.get(); |
| |
| Map<RowColumn, Bytes> rcMap = new HashMap<>(); |
| Bytes linksTo = Bytes.of("" + uriInfo.linksTo); |
| rcMap.put(new RowColumn(createTotalRow(pageID, uriInfo.linksTo), Column.EMPTY), linksTo); |
| String domain = URL.fromPageID(pageID).getReverseDomain(); |
| String domainRow = encodeDomainRankPageId(domain, uriInfo.linksTo, pageID); |
| rcMap.put(new RowColumn(domainRow, new Column(Constants.RANK, "")), linksTo); |
| rcMap.put(new RowColumn("p:" + pageID, FluoConstants.PAGE_INCOUNT_COL), linksTo); |
| return rcMap; |
| } |
| |
| public static String revEncodeLong(Long num) { |
| Lexicoder<Long> lexicoder = new ReverseLexicoder<>(new ULongLexicoder()); |
| return Hex.encodeHexString(lexicoder.encode(num)); |
| } |
| |
| public static String encodeDomainRankPageId(String domain, long linksTo, String pageId) { |
| return "d:" + domain + ":" + revEncodeLong(linksTo) + ":" + pageId; |
| } |
| |
| private static String createTotalRow(String uri, long curr) { |
| return "t:" + revEncodeLong(curr) + ":" + uri; |
| } |
| } |