blob: 4cbc5feb1520bd9cd980829ea9682b926a233ad9 [file] [log] [blame]
CREATE DATABASE crawl;
CREATE USER 'crawler' IDENTIFIED BY 'crawler';
GRANT ALL PRIVILEGES ON crawl.* TO 'crawler' WITH GRANT OPTION;
FLUSH PRIVILEGES;
DROP TABLE crawl.urls;
CREATE TABLE crawl.urls (
url VARCHAR(255),
status VARCHAR(16) DEFAULT 'DISCOVERED',
nextfetchdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
metadata TEXT,
bucket SMALLINT DEFAULT 0,
host VARCHAR(128),
PRIMARY KEY(url)
);
ALTER TABLE crawl.urls ADD INDEX b (`bucket`);
ALTER TABLE crawl.urls ADD INDEX t (`nextfetchdate`);
ALTER TABLE crawl.urls ADD INDEX h (`host`);
DROP TABLE crawl.metrics;
CREATE TABLE crawl.metrics (
srcComponentId VARCHAR(128),
srcTaskId INT,
srcWorkerHost VARCHAR(128),
srcWorkerPort INT,
name VARCHAR(128),
value DOUBLE,
timestamp TIMESTAMP
);
# Read only user for accessing the metrics
CREATE USER 'metricsReader' IDENTIFIED BY 'metricsReader';
GRANT SELECT ON crawl.metrics TO 'metricsReader';
FLUSH PRIVILEGES;