blob: 78c87915a6ac8e2a975ac968c889aace32584a46 [file] [log] [blame]
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
CREATE DATABASE crawl;
CREATE USER 'crawler' IDENTIFIED BY 'crawler';
GRANT ALL PRIVILEGES ON crawl.* TO 'crawler' WITH GRANT OPTION;
FLUSH PRIVILEGES;
DROP TABLE crawl.urls;
CREATE TABLE crawl.urls (
url VARCHAR(255),
status VARCHAR(16) DEFAULT 'DISCOVERED',
nextfetchdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
metadata TEXT,
bucket SMALLINT DEFAULT 0,
host VARCHAR(128),
PRIMARY KEY(url)
);
ALTER TABLE crawl.urls ADD INDEX b (`bucket`);
ALTER TABLE crawl.urls ADD INDEX t (`nextfetchdate`);
ALTER TABLE crawl.urls ADD INDEX h (`host`);
DROP TABLE crawl.metrics;
CREATE TABLE crawl.metrics (
srcComponentId VARCHAR(128),
srcTaskId INT,
srcWorkerHost VARCHAR(128),
srcWorkerPort INT,
name VARCHAR(128),
value DOUBLE,
timestamp TIMESTAMP
);
# Read only user for accessing the metrics
CREATE USER 'metricsReader' IDENTIFIED BY 'metricsReader';
GRANT SELECT ON crawl.metrics TO 'metricsReader';
FLUSH PRIVILEGES;