blob: 4ca569871b99749b44bb7fc2247ddb5586ff5346 [file] [log] [blame]
/*
* Copyright (c) 2013 DataTorrent, Inc. ALL Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datatorrent.lib.io;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import com.datatorrent.api.BaseOperator;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.InputOperator;
import com.datatorrent.api.Context.OperatorContext;
/**
* Generates apache server log entries. The apache access log has the following
* format
*
* %s %h %l %u %t "%r" %s %b "%{Referer}" "%{User-agent}"
*
* %s - server name - server0.mydomain.com:80 ....................... server9.mydomain.com:80
* %h - The ip address of the client
* %l - The identity of the client typically "-"
* %u - The username of the user if HTTP authentication was used otherwise "-"
* %t - The time the request was received e.g., [31/May/2013:08:03:46 -0700]
* %r - The HTTP request string e.g., "GET /favicon.ico HTTP/1.1"
* %s - The status code of the response e.g., 404
* %b - The number of bytes in the response
* %{Referer} - The referer web site reported by the client, "-" if there is none
* %{User-agent} - Unique string identifying the client browser e.g.,
* "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36"
*
* Putting it all together a sample log string looks like :
* --------------------------------------------------------
* 127.0.0.1 - [31/May/2013:09:05:49 -0700] "GET /favicon.ico HTTP/1.1" 304 210 "-"
* "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 Chrome/25.0.1364.160 Safari/537.22"
*
* @displayName Generate Random Apache Logs
* @category Input
* @tags log, input operator, generate
*
* @since 0.3.2
*/
public class ApacheGenRandomLogs extends BaseOperator implements InputOperator
{
/**
* This is the output port which emits generated log strings.
*/
public final transient DefaultOutputPort<String> outport = new DefaultOutputPort<String>();
// server name/ip-address random variable
private Random rand = new Random();
// Apache date format
private static SimpleDateFormat apapcheDateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z");
// http status codes
private static String [] httpStatusCodes = {"100", "101", "200", "201", "202", "203", "204", "205", "206", "300", "301",
"301", "302", "303", "304", "305", "306", "307", "400", "401", "402", "403",
"405", "406", "407", "408", "409", "410", "411", "412", "413", "414",
"415", "416", "417", "500", "501", "502", "503", "504", "505"};
// possible url string formats
private static String[] urlFormats = {
"mydomain.com/home.php", "mydomain.com/products.php", "mydomain.com/products.php?productid=%d",
"mydomain.com/solutions.php", "mydomain.com/solutions.php?solutionid=%d", "mydomain.com/support.php",
"mydomain.com/about.php", "mydomain.com/contactus.php", "mydomain.com/services.php",
"mydomain.com/services.php?serviceid=%d", "mydomain.com/partners.php", "mydomain.com/partners.php?partnerid=%d"
};
// browser id
private static String[] browserIds = {
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:20.0) Gecko/%d Firefox/20.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/%d Firefox/18.0",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) Gecko/%d Fedora/1.0.4-4 Firefox/1.0.",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.10) Gecko/%d CentOS/1.5.0.10-0.1.el4.centos Firefox/1.5.0.10"
};
// generate server name and IP address for server
private int genServerId()
{
return rand.nextInt(10);
}
private String genServerName(int serverId)
{
return new StringBuilder("server").append(new Integer(serverId).toString()).append(".mydomain.com:80").toString();
}
private String genIpAddress(int serverId)
{
return new StringBuilder().append(rand.nextInt(255))
.append(".").append(rand.nextInt(255)).append(".").append(rand.nextInt(255))
.append(".").append(rand.nextInt(255)).toString();
}
private String getTimeStamp()
{
return new StringBuilder("[").append(apapcheDateFormat.format(new Date())).append("]").toString();
}
private String genHttpCode()
{
return httpStatusCodes[rand.nextInt(httpStatusCodes.length)];
}
private String genUrl()
{
String format = urlFormats[rand.nextInt(urlFormats.length)];
return String.format(format, rand.nextInt(100));
}
private String genBrowserId()
{
String format = browserIds[rand.nextInt(browserIds.length)];
return String.format(format, rand.nextInt(100000));
}
// generate log string
private String genLogString(String ipAddress, String browserId, String httpCode, String url)
{
// server/ipaddress
int serverId = genServerId();
String serverName = genServerName(serverId);
if (ipAddress == null)
{
ipAddress = genIpAddress(serverId);
}
// time
String logTime = getTimeStamp();
// url
if (url == null)
{
url = new StringBuilder("\"").append("GET").append(" ").append(genUrl()).append(" ").append("HTTP/1.1").append("\"").toString();
}
// http code
if (httpCode == null)
{
httpCode = genHttpCode();
}
// number of bytes
int numBytes = rand.nextInt(4000);
// browser id
if(browserId == null)
{
browserId = genBrowserId();
}
// print
return new StringBuilder().append(serverName).append(" ").append(ipAddress).append(" - - ").append(logTime).append(" ").append(url).append(" ")
.append(httpCode).append(" ").append(numBytes).append(" \" \" \"").append(browserId).append("\"").toString();
}
@Override
public void beginWindow(long windowId)
{
// TODO Auto-generated method stub
}
@Override
public void endWindow()
{
// TODO Auto-generated method stub
}
boolean genTuples;
int attackInterval;
@Override
public void setup(OperatorContext context)
{
genTuples = true;
attackInterval = rand.nextInt(10)+ 1;
}
@Override
public void teardown()
{
genTuples = false;
}
@Override
public void emitTuples()
{
attackInterval--;
String browserId = null;
String ipAdddress = null;
if (attackInterval == 0)
{
browserId = genBrowserId();
ipAdddress = genIpAddress(rand.nextInt(10));
attackInterval += rand.nextInt(10) + 1;
for (int i = 0; i < rand.nextInt(3); i++) outport.emit(genLogString(ipAdddress, browserId, "404", null));
String url = new StringBuilder("\"").append("GET").append(" ").append(genUrl()).append(" ").append("HTTP/1.1").append("\"").toString();
for (int i = 0; i < rand.nextInt(3); i++) outport.emit(genLogString(ipAdddress, browserId, "404", url));
}
for (int i = 0; i < rand.nextInt(100000); i++) outport.emit(genLogString(ipAdddress, browserId, null, null));
}
}