blob: 863d27eea3420a17b164a760a954321450a628d1 [file] [log] [blame]
/*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sdap.mudrod.weblog.structure.session;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import org.apache.sdap.mudrod.discoveryengine.MudrodAbstract;
import org.apache.sdap.mudrod.driver.ESDriver;
import org.apache.sdap.mudrod.main.MudrodConstants;
import org.apache.sdap.mudrod.weblog.structure.log.RequestUrl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
/**
* ClassName: SessionTree Function: Convert request list in a session to a tree
*/
public class SessionTree extends MudrodAbstract {
/**
*
*/
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(SessionTree.class);
// size: node numbers in the session tree
public int size = 0;
// root: root node of session tree
protected SessionNode root = null;
// binsert: indicates inserting a node or not
public boolean binsert = false;
// tmpnode: tempt node
public SessionNode tmpnode;
// latestDatasetnode: the latest inserted node whose key is "dataset"
public SessionNode latestDatasetnode;
// sessionID: session ID
private String sessionID;
// cleanupType: session type in Elasticsearch
private String cleanupType;
/**
* Creates a new instance of SessionTree.
*
* @param props: the Mudrod configuration
* @param es: the Elasticsearch drive
* @param rootData: root node of the tree
* @param sessionID: session ID
* @param cleanupType: session type
*/
public SessionTree(Properties props, ESDriver es, SessionNode rootData, String sessionID, String cleanupType) {
super(props, es, null);
root = new SessionNode(props, "root", "root", "", "", 0);
tmpnode = root;
this.sessionID = sessionID;
this.cleanupType = cleanupType;
}
/**
* Creates a new instance of SessionTree.
*
* @param props: the Mudrod configuration
* @param es: the Elasticsearch drive
* @param sessionID: session ID
* @param cleanupType: session type
*/
public SessionTree(Properties props, ESDriver es, String sessionID, String cleanupType) {
super(props, es, null);
root = new SessionNode(props, "root", "root", "", "", 0);
root.setParent(root);
tmpnode = root;
this.sessionID = sessionID;
this.cleanupType = cleanupType;
}
/**
* insert: insert a node into the session tree.
*
* @param node {@link SessionNode}
* @return session node
*/
public SessionNode insert(SessionNode node) {
// begin with datasetlist
if (MudrodConstants.SEARCH_MARKER.equals(node.getKey())) {
this.binsert = true;
}
if (!this.binsert) {
return null;
}
// remove unrelated node
if (!MudrodConstants.SEARCH_MARKER.equals(node.getKey()) &&
!MudrodConstants.VIEW_MARKER.equals(node.getKey()) &&
!MudrodConstants.FTP_LOG.equals(node.getKey())) {
return null;
}
// remove dumplicated click
if (node.getRequest().equals(tmpnode.getRequest())) {
return null;
}
// search insert node
SessionNode parentnode = this.searchParentNode(node);
if (parentnode == null) {
return null;
}
node.setParent(parentnode);
parentnode.addChildren(node);
// record insert node
tmpnode = node;
if (MudrodConstants.VIEW_MARKER.equals(node.getKey())) {
latestDatasetnode = node;
}
size++;
return node;
}
/**
* printTree: Print session tree
*
* @param node root node of the session tree
*/
public void printTree(SessionNode node) {
LOG.info("node: {} \n", node.getRequest());
if (node.children.isEmpty()) {
for (int i = 0; i < node.children.size(); i++) {
printTree(node.children.get(i));
}
}
}
/**
* TreeToJson: Convert the session tree to Json object
*
* @param node node of the session tree
* @return tree content in Json format
*/
public JsonObject treeToJson(SessionNode node) {
Gson gson = new Gson();
JsonObject json = new JsonObject();
json.addProperty("seq", node.getSeq());
if ("datasetlist".equals(node.getKey())) {
json.addProperty("icon", "./resources/images/searching.png");
json.addProperty("name", node.getRequest());
} else if ("dataset".equals(node.getKey())) {
json.addProperty("icon", "./resources/images/viewing.png");
json.addProperty("name", node.getDatasetId());
} else if ("ftp".equals(node.getKey())) {
json.addProperty("icon", "./resources/images/downloading.png");
json.addProperty("name", node.getRequest());
} else if ("root".equals(node.getKey())) {
json.addProperty("name", "");
json.addProperty("icon", "./resources/images/users.png");
}
if (!node.children.isEmpty()) {
List<JsonObject> jsonChildren = new ArrayList<>();
for (int i = 0; i < node.children.size(); i++) {
JsonObject jsonChild = treeToJson(node.children.get(i));
jsonChildren.add(jsonChild);
}
JsonElement jsonElement = gson.toJsonTree(jsonChildren);
json.add("children", jsonElement);
}
return json;
}
/**
* getClickStreamList: Get click stream list in the session
* @param props populated {@link java.util.Properties} which includes the index name
*
* @return {@link ClickStream}
*/
public List<ClickStream> getClickStreamList(Properties props) {
List<ClickStream> clickthroughs = new ArrayList<>();
List<SessionNode> viewnodes = this.getViewNodes(this.root);
for (SessionNode viewnode : viewnodes) {
SessionNode parent = viewnode.getParent();
List<SessionNode> children = viewnode.getChildren();
if (!MudrodConstants.SEARCH_MARKER.equals(parent.getKey())) {
continue;
}
RequestUrl requestURL = new RequestUrl();
String viewquery = "";
try {
String infoStr = requestURL.getSearchInfo(viewnode.getRequest());
viewquery = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
} catch (UnsupportedEncodingException | InterruptedException | ExecutionException e) {
LOG.warn("Exception getting search info. Ignoring...", e);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
}
String dataset = viewnode.getDatasetId();
boolean download = false;
for (SessionNode child : children) {
if ("ftp".equals(child.getKey())) {
download = true;
break;
}
}
if (viewquery != null && !"".equals(viewquery)) {
String[] queries = viewquery.trim().split(",");
if (queries.length > 0) {
for (String query : queries) {
ClickStream data = new ClickStream(query, dataset, download);
data.setSessionId(this.sessionID);
data.setType(this.cleanupType);
clickthroughs.add(data);
}
}
}
}
return clickthroughs;
}
/**
* searchParentNode:Get parent node of a session node
*
* @param node {@link SessionNode}
* @return node {@link SessionNode}
*/
private SessionNode searchParentNode(SessionNode node) {
String nodeKey = node.getKey();
if ("datasetlist".equals(nodeKey)) {
if ("-".equals(node.getReferer())) {
return root;
} else {
SessionNode tmp = this.findLatestRefer(tmpnode, node.getReferer());
if (tmp == null) {
return root;
} else {
return tmp;
}
}
} else if ("dataset".equals(nodeKey)) {
if ("-".equals(node.getReferer())) {
return null;
} else {
return this.findLatestRefer(tmpnode, node.getReferer());
}
} else if ("ftp".equals(nodeKey)) {
return latestDatasetnode;
}
return tmpnode;
}
/**
* findLatestRefer: Find parent node whose visiting url is equal to the refer
* url of a session node
*
* @param node: {@link SessionNode}
* @param refer: request url
* @return
*/
private SessionNode findLatestRefer(SessionNode node, String refer) {
while (true) {
if ("root".equals(node.getKey())) {
return null;
}
SessionNode parentNode = node.getParent();
if (refer.equals(parentNode.getRequest())) {
return parentNode;
}
SessionNode tmp = this.iterChild(parentNode, refer);
if (tmp == null) {
node = parentNode;
continue;
} else {
return tmp;
}
}
}
/**
* iterChild:
*
* @param start
* @param refer
* @return
*/
private SessionNode iterChild(SessionNode start, String refer) {
List<SessionNode> children = start.getChildren();
for (int i = children.size() - 1; i >= 0; i--) {
SessionNode tmp = children.get(i);
if (tmp.getChildren().isEmpty()) {
if (refer.equals(tmp.getRequest())) {
return tmp;
} else {
continue;
}
} else {
iterChild(tmp, refer);
}
}
return null;
}
/**
* check:
*
* @param children
* @param str
* @return
*/
private boolean check(List<SessionNode> children, String str) {
for (SessionNode aChildren : children) {
if (aChildren.key.equals(str)) {
return true;
}
}
return false;
}
/**
* insertHelperChildren:
*
* @param entry
* @param children
* @return
*/
private boolean insertHelperChildren(SessionNode entry, List<SessionNode> children) {
for (SessionNode aChildren : children) {
boolean result = insertHelper(entry, aChildren);
if (result) {
return result;
}
}
return false;
}
/**
* insertHelper:
*
* @param entry
* @param node
* @return
*/
private boolean insertHelper(SessionNode entry, SessionNode node) {
if ("datasetlist".equals(entry.key) || "dataset".equals(entry.key)) {
if ("datasetlist".equals(node.key)) {
if (node.children.isEmpty()) {
node.children.add(entry);
return true;
} else {
boolean flag = check(node.children, "datasetlist");
if (!flag) {
node.children.add(entry);
return true;
} else {
insertHelperChildren(entry, node.children);
}
}
} else {
insertHelperChildren(entry, node.children);
}
} else if ("ftp".equals(entry.key)) {
if ("dataset".equals(node.key)) {
if (node.children.isEmpty()) {
node.children.add(entry);
return true;
} else {
boolean flag = check(node.children, "dataset");
if (!flag) {
node.children.add(entry);
return true;
} else {
insertHelperChildren(entry, node.children);
}
}
} else {
insertHelperChildren(entry, node.children);
}
}
return false;
}
/**
* getViewNodes: Get a session node's child nodes whose key is "dataset".
*
* @param node
* @return a list of session node
*/
private List<SessionNode> getViewNodes(SessionNode node) {
List<SessionNode> viewnodes = new ArrayList<>();
if (MudrodConstants.VIEW_MARKER.equals(node.getKey())) {
viewnodes.add(node);
}
if (!node.children.isEmpty()) {
for (int i = 0; i < node.children.size(); i++) {
SessionNode childNode = node.children.get(i);
viewnodes.addAll(getViewNodes(childNode));
}
}
return viewnodes;
}
private List<SessionNode> getQueryNodes(SessionNode node) {
return this.getNodes(node, MudrodConstants.SEARCH_MARKER);
}
private List<SessionNode> getNodes(SessionNode node, String nodeKey) {
List<SessionNode> nodes = new ArrayList<>();
if (node.getKey().equals(nodeKey)) {
nodes.add(node);
}
if (!node.children.isEmpty()) {
for (int i = 0; i < node.children.size(); i++) {
SessionNode childNode = node.children.get(i);
nodes.addAll(getNodes(childNode, nodeKey));
}
}
return nodes;
}
/**
* Obtain the ranking training data.
*
* @param indexName the index from whcih to obtain the data
* @return {@link ClickStream}
* @throws UnsupportedEncodingException if there is an error whilst
* processing the ranking training data.
*/
public List<RankingTrainData> getRankingTrainData(String indexName) throws UnsupportedEncodingException {
List<RankingTrainData> trainDatas = new ArrayList<>();
List<SessionNode> queryNodes = this.getQueryNodes(this.root);
for (SessionNode querynode : queryNodes) {
List<SessionNode> children = querynode.getChildren();
LinkedHashMap<String, Boolean> datasetOpt = new LinkedHashMap<>();
int ndownload = 0;
for (SessionNode node : children) {
if ("dataset".equals(node.getKey())) {
Boolean bDownload = false;
List<SessionNode> nodeChildren = node.getChildren();
for (SessionNode aNodeChildren : nodeChildren) {
if ("ftp".equals(aNodeChildren.getKey())) {
bDownload = true;
ndownload += 1;
break;
}
}
datasetOpt.put(node.datasetId, bDownload);
}
}
// method 1: The priority of download data are higher
if (datasetOpt.size() > 1 && ndownload > 0) {
// query
RequestUrl requestURL = new RequestUrl();
String queryUrl = querynode.getRequest();
String infoStr = requestURL.getSearchInfo(queryUrl);
String query = null;
try {
query = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
} catch (InterruptedException | ExecutionException e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new RuntimeException("Error performing custom analyzing", e);
}
Map<String, String> filter = RequestUrl.getFilterInfo(queryUrl);
for (String datasetA : datasetOpt.keySet()) {
Boolean bDownloadA = datasetOpt.get(datasetA);
if (bDownloadA) {
for (String datasetB : datasetOpt.keySet()) {
Boolean bDownloadB = datasetOpt.get(datasetB);
if (!bDownloadB) {
String[] queries = query.split(",");
for (String query1 : queries) {
RankingTrainData trainData = new RankingTrainData(query1, datasetA, datasetB);
trainData.setSessionId(this.sessionID);
trainData.setIndex(indexName);
trainData.setFilter(filter);
trainDatas.add(trainData);
}
}
}
}
}
}
}
return trainDatas;
}
}