| -- |
| -- Licensed to the Apache Software Foundation (ASF) under one or more |
| -- contributor license agreements. See the NOTICE file distributed with |
| -- this work for additional information regarding copyright ownership. |
| -- The ASF licenses this file to You under the Apache License, Version 2.0 |
| -- (the "License"); you may not use this file except in compliance with |
| -- the License. You may obtain a copy of the License at |
| -- |
| -- http://www.apache.org/licenses/LICENSE-2.0 |
| -- |
| -- Unless required by applicable law or agreed to in writing, software |
| -- distributed under the License is distributed on an "AS IS" BASIS, |
| -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| -- See the License for the specific language governing permissions and |
| -- limitations under the License. |
| -- |
| |
| CREATE DATABASE crawl; |
| |
| CREATE USER 'crawler' IDENTIFIED BY 'crawler'; |
| GRANT ALL PRIVILEGES ON crawl.* TO 'crawler' WITH GRANT OPTION; |
| FLUSH PRIVILEGES; |
| |
| DROP TABLE crawl.urls; |
| |
| CREATE TABLE crawl.urls ( |
| url VARCHAR(255), |
| status VARCHAR(16) DEFAULT 'DISCOVERED', |
| nextfetchdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP, |
| metadata TEXT, |
| bucket SMALLINT DEFAULT 0, |
| host VARCHAR(128), |
| PRIMARY KEY(url) |
| ); |
| |
| ALTER TABLE crawl.urls ADD INDEX b (`bucket`); |
| ALTER TABLE crawl.urls ADD INDEX t (`nextfetchdate`); |
| ALTER TABLE crawl.urls ADD INDEX h (`host`); |
| |
| DROP TABLE crawl.metrics; |
| |
| CREATE TABLE crawl.metrics ( |
| srcComponentId VARCHAR(128), |
| srcTaskId INT, |
| srcWorkerHost VARCHAR(128), |
| srcWorkerPort INT, |
| name VARCHAR(128), |
| value DOUBLE, |
| timestamp TIMESTAMP |
| ); |
| |
| # Read only user for accessing the metrics |
| CREATE USER 'metricsReader' IDENTIFIED BY 'metricsReader'; |
| GRANT SELECT ON crawl.metrics TO 'metricsReader'; |
| FLUSH PRIVILEGES; |