blob: 863d27eea3420a17b164a760a954321450a628d1 [file] [log] [blame]
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.sdap.mudrod.weblog.structure.session;
import org.apache.sdap.mudrod.discoveryengine.MudrodAbstract;
import org.apache.sdap.mudrod.driver.ESDriver;
import org.apache.sdap.mudrod.main.MudrodConstants;
import org.apache.sdap.mudrod.weblog.structure.log.RequestUrl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
* ClassName: SessionTree Function: Convert request list in a session to a tree
public class SessionTree extends MudrodAbstract {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(SessionTree.class);
// size: node numbers in the session tree
public int size = 0;
// root: root node of session tree
protected SessionNode root = null;
// binsert: indicates inserting a node or not
public boolean binsert = false;
// tmpnode: tempt node
public SessionNode tmpnode;
// latestDatasetnode: the latest inserted node whose key is "dataset"
public SessionNode latestDatasetnode;
// sessionID: session ID
private String sessionID;
// cleanupType: session type in Elasticsearch
private String cleanupType;
* Creates a new instance of SessionTree.
* @param props: the Mudrod configuration
* @param es: the Elasticsearch drive
* @param rootData: root node of the tree
* @param sessionID: session ID
* @param cleanupType: session type
public SessionTree(Properties props, ESDriver es, SessionNode rootData, String sessionID, String cleanupType) {
super(props, es, null);
root = new SessionNode(props, "root", "root", "", "", 0);
tmpnode = root;
this.sessionID = sessionID;
this.cleanupType = cleanupType;
* Creates a new instance of SessionTree.
* @param props: the Mudrod configuration
* @param es: the Elasticsearch drive
* @param sessionID: session ID
* @param cleanupType: session type
public SessionTree(Properties props, ESDriver es, String sessionID, String cleanupType) {
super(props, es, null);
root = new SessionNode(props, "root", "root", "", "", 0);
tmpnode = root;
this.sessionID = sessionID;
this.cleanupType = cleanupType;
* insert: insert a node into the session tree.
* @param node {@link SessionNode}
* @return session node
public SessionNode insert(SessionNode node) {
// begin with datasetlist
if (MudrodConstants.SEARCH_MARKER.equals(node.getKey())) {
this.binsert = true;
if (!this.binsert) {
return null;
// remove unrelated node
if (!MudrodConstants.SEARCH_MARKER.equals(node.getKey()) &&
!MudrodConstants.VIEW_MARKER.equals(node.getKey()) &&
!MudrodConstants.FTP_LOG.equals(node.getKey())) {
return null;
// remove dumplicated click
if (node.getRequest().equals(tmpnode.getRequest())) {
return null;
// search insert node
SessionNode parentnode = this.searchParentNode(node);
if (parentnode == null) {
return null;
// record insert node
tmpnode = node;
if (MudrodConstants.VIEW_MARKER.equals(node.getKey())) {
latestDatasetnode = node;
return node;
* printTree: Print session tree
* @param node root node of the session tree
public void printTree(SessionNode node) {"node: {} \n", node.getRequest());
if (node.children.isEmpty()) {
for (int i = 0; i < node.children.size(); i++) {
* TreeToJson: Convert the session tree to Json object
* @param node node of the session tree
* @return tree content in Json format
public JsonObject treeToJson(SessionNode node) {
Gson gson = new Gson();
JsonObject json = new JsonObject();
json.addProperty("seq", node.getSeq());
if ("datasetlist".equals(node.getKey())) {
json.addProperty("icon", "./resources/images/searching.png");
json.addProperty("name", node.getRequest());
} else if ("dataset".equals(node.getKey())) {
json.addProperty("icon", "./resources/images/viewing.png");
json.addProperty("name", node.getDatasetId());
} else if ("ftp".equals(node.getKey())) {
json.addProperty("icon", "./resources/images/downloading.png");
json.addProperty("name", node.getRequest());
} else if ("root".equals(node.getKey())) {
json.addProperty("name", "");
json.addProperty("icon", "./resources/images/users.png");
if (!node.children.isEmpty()) {
List<JsonObject> jsonChildren = new ArrayList<>();
for (int i = 0; i < node.children.size(); i++) {
JsonObject jsonChild = treeToJson(node.children.get(i));
JsonElement jsonElement = gson.toJsonTree(jsonChildren);
json.add("children", jsonElement);
return json;
* getClickStreamList: Get click stream list in the session
* @param props populated {@link java.util.Properties} which includes the index name
* @return {@link ClickStream}
public List<ClickStream> getClickStreamList(Properties props) {
List<ClickStream> clickthroughs = new ArrayList<>();
List<SessionNode> viewnodes = this.getViewNodes(this.root);
for (SessionNode viewnode : viewnodes) {
SessionNode parent = viewnode.getParent();
List<SessionNode> children = viewnode.getChildren();
if (!MudrodConstants.SEARCH_MARKER.equals(parent.getKey())) {
RequestUrl requestURL = new RequestUrl();
String viewquery = "";
try {
String infoStr = requestURL.getSearchInfo(viewnode.getRequest());
viewquery = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
} catch (UnsupportedEncodingException | InterruptedException | ExecutionException e) {
LOG.warn("Exception getting search info. Ignoring...", e);
if (e instanceof InterruptedException) {
String dataset = viewnode.getDatasetId();
boolean download = false;
for (SessionNode child : children) {
if ("ftp".equals(child.getKey())) {
download = true;
if (viewquery != null && !"".equals(viewquery)) {
String[] queries = viewquery.trim().split(",");
if (queries.length > 0) {
for (String query : queries) {
ClickStream data = new ClickStream(query, dataset, download);
return clickthroughs;
* searchParentNode:Get parent node of a session node
* @param node {@link SessionNode}
* @return node {@link SessionNode}
private SessionNode searchParentNode(SessionNode node) {
String nodeKey = node.getKey();
if ("datasetlist".equals(nodeKey)) {
if ("-".equals(node.getReferer())) {
return root;
} else {
SessionNode tmp = this.findLatestRefer(tmpnode, node.getReferer());
if (tmp == null) {
return root;
} else {
return tmp;
} else if ("dataset".equals(nodeKey)) {
if ("-".equals(node.getReferer())) {
return null;
} else {
return this.findLatestRefer(tmpnode, node.getReferer());
} else if ("ftp".equals(nodeKey)) {
return latestDatasetnode;
return tmpnode;
* findLatestRefer: Find parent node whose visiting url is equal to the refer
* url of a session node
* @param node: {@link SessionNode}
* @param refer: request url
* @return
private SessionNode findLatestRefer(SessionNode node, String refer) {
while (true) {
if ("root".equals(node.getKey())) {
return null;
SessionNode parentNode = node.getParent();
if (refer.equals(parentNode.getRequest())) {
return parentNode;
SessionNode tmp = this.iterChild(parentNode, refer);
if (tmp == null) {
node = parentNode;
} else {
return tmp;
* iterChild:
* @param start
* @param refer
* @return
private SessionNode iterChild(SessionNode start, String refer) {
List<SessionNode> children = start.getChildren();
for (int i = children.size() - 1; i >= 0; i--) {
SessionNode tmp = children.get(i);
if (tmp.getChildren().isEmpty()) {
if (refer.equals(tmp.getRequest())) {
return tmp;
} else {
} else {
iterChild(tmp, refer);
return null;
* check:
* @param children
* @param str
* @return
private boolean check(List<SessionNode> children, String str) {
for (SessionNode aChildren : children) {
if (aChildren.key.equals(str)) {
return true;
return false;
* insertHelperChildren:
* @param entry
* @param children
* @return
private boolean insertHelperChildren(SessionNode entry, List<SessionNode> children) {
for (SessionNode aChildren : children) {
boolean result = insertHelper(entry, aChildren);
if (result) {
return result;
return false;
* insertHelper:
* @param entry
* @param node
* @return
private boolean insertHelper(SessionNode entry, SessionNode node) {
if ("datasetlist".equals(entry.key) || "dataset".equals(entry.key)) {
if ("datasetlist".equals(node.key)) {
if (node.children.isEmpty()) {
return true;
} else {
boolean flag = check(node.children, "datasetlist");
if (!flag) {
return true;
} else {
insertHelperChildren(entry, node.children);
} else {
insertHelperChildren(entry, node.children);
} else if ("ftp".equals(entry.key)) {
if ("dataset".equals(node.key)) {
if (node.children.isEmpty()) {
return true;
} else {
boolean flag = check(node.children, "dataset");
if (!flag) {
return true;
} else {
insertHelperChildren(entry, node.children);
} else {
insertHelperChildren(entry, node.children);
return false;
* getViewNodes: Get a session node's child nodes whose key is "dataset".
* @param node
* @return a list of session node
private List<SessionNode> getViewNodes(SessionNode node) {
List<SessionNode> viewnodes = new ArrayList<>();
if (MudrodConstants.VIEW_MARKER.equals(node.getKey())) {
if (!node.children.isEmpty()) {
for (int i = 0; i < node.children.size(); i++) {
SessionNode childNode = node.children.get(i);
return viewnodes;
private List<SessionNode> getQueryNodes(SessionNode node) {
return this.getNodes(node, MudrodConstants.SEARCH_MARKER);
private List<SessionNode> getNodes(SessionNode node, String nodeKey) {
List<SessionNode> nodes = new ArrayList<>();
if (node.getKey().equals(nodeKey)) {
if (!node.children.isEmpty()) {
for (int i = 0; i < node.children.size(); i++) {
SessionNode childNode = node.children.get(i);
nodes.addAll(getNodes(childNode, nodeKey));
return nodes;
* Obtain the ranking training data.
* @param indexName the index from whcih to obtain the data
* @return {@link ClickStream}
* @throws UnsupportedEncodingException if there is an error whilst
* processing the ranking training data.
public List<RankingTrainData> getRankingTrainData(String indexName) throws UnsupportedEncodingException {
List<RankingTrainData> trainDatas = new ArrayList<>();
List<SessionNode> queryNodes = this.getQueryNodes(this.root);
for (SessionNode querynode : queryNodes) {
List<SessionNode> children = querynode.getChildren();
LinkedHashMap<String, Boolean> datasetOpt = new LinkedHashMap<>();
int ndownload = 0;
for (SessionNode node : children) {
if ("dataset".equals(node.getKey())) {
Boolean bDownload = false;
List<SessionNode> nodeChildren = node.getChildren();
for (SessionNode aNodeChildren : nodeChildren) {
if ("ftp".equals(aNodeChildren.getKey())) {
bDownload = true;
ndownload += 1;
datasetOpt.put(node.datasetId, bDownload);
// method 1: The priority of download data are higher
if (datasetOpt.size() > 1 && ndownload > 0) {
// query
RequestUrl requestURL = new RequestUrl();
String queryUrl = querynode.getRequest();
String infoStr = requestURL.getSearchInfo(queryUrl);
String query = null;
try {
query = es.customAnalyzing(props.getProperty(MudrodConstants.ES_INDEX_NAME), infoStr);
} catch (InterruptedException | ExecutionException e) {
if (e instanceof InterruptedException) {
throw new RuntimeException("Error performing custom analyzing", e);
Map<String, String> filter = RequestUrl.getFilterInfo(queryUrl);
for (String datasetA : datasetOpt.keySet()) {
Boolean bDownloadA = datasetOpt.get(datasetA);
if (bDownloadA) {
for (String datasetB : datasetOpt.keySet()) {
Boolean bDownloadB = datasetOpt.get(datasetB);
if (!bDownloadB) {
String[] queries = query.split(",");
for (String query1 : queries) {
RankingTrainData trainData = new RankingTrainData(query1, datasetA, datasetB);
return trainDatas;