blob: 8e3566b0fd3b8a0fcd477822f4be38eacf3ba9ff [file]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
openapi: "3.1.2"
info:
title: Apache Nutch REST API
description: >-
REST API for managing Apache Nutch crawl jobs, configurations, seed lists,
database queries, and data readers.
version: "1.0.0"
license:
name: Apache 2.0
identifier: Apache-2.0
contact:
name: Apache Nutch
url: https://nutch.apache.org
servers:
- url: "{protocol}://localhost:{port}"
description: Nutch REST server
variables:
protocol:
default: http
enum:
- http
- https
description: The protocol used to access the Nutch server.
port:
default: "8081"
description: >-
The port the Nutch server listens on. Configurable via the --port
command-line argument.
security:
- basicAuth: []
tags:
- name: Admin
description: Server administration operations
- name: Configuration
description: Manage Nutch configurations
- name: Job
description: Manage crawl jobs
- name: Database
description: Query the CrawlDB and FetchDB
- name: Seed
description: Manage seed URL lists
- name: Reader
description: Read sequence files and webgraph data
- name: Services
description: Auxiliary service operations such as CommonCrawl data dumps
paths:
# ---------------------------------------------------------------------------
# Admin
# ---------------------------------------------------------------------------
/admin/:
get:
tags:
- Admin
summary: Get server status
description: >-
Returns the current status of the Nutch server including start date,
known configurations, all jobs, and currently running jobs.
operationId: getServerStatus
responses:
"200":
description: Server status retrieved successfully.
content:
application/json:
schema:
$ref: "#/components/schemas/NutchServerInfo"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/admin/stop:
get:
tags:
- Admin
summary: Stop the Nutch server
description: >-
Initiates a graceful shutdown of the Nutch server. If jobs are still
running and force is not set, the server will refuse to stop.
operationId: stopServer
parameters:
- name: force
in: query
required: false
description: >-
If true, kills any running jobs before stopping the server.
schema:
type: boolean
default: false
responses:
"200":
description: Shutdown status message.
content:
application/json:
schema:
type: string
examples:
stopping:
value: "Stopping in server on port 8081"
busy:
value: "Jobs still running -- Cannot stop server now"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
/config/:
get:
tags:
- Configuration
summary: List all configuration IDs
description: Returns the set of all known configuration identifiers.
operationId: getConfigs
responses:
"200":
description: A JSON array of configuration ID strings.
content:
application/json:
schema:
type: array
items:
type: string
uniqueItems: true
example:
- default
- my-custom-config
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/config/{configId}:
get:
tags:
- Configuration
summary: Get configuration properties
description: Returns all key-value properties for the specified configuration.
operationId: getConfig
parameters:
- $ref: "#/components/parameters/configId"
responses:
"200":
description: A JSON object of configuration property key-value pairs.
content:
application/json:
schema:
type: object
additionalProperties:
type: string
example:
http.agent.name: "NutchBot"
http.robots.agents: "NutchBot,*"
"401":
$ref: "#/components/responses/Unauthorized"
"404":
$ref: "#/components/responses/NotFound"
"500":
$ref: "#/components/responses/InternalServerError"
delete:
tags:
- Configuration
summary: Delete a configuration
description: >-
Removes the specified configuration from the list of known
configurations.
operationId: deleteConfig
parameters:
- $ref: "#/components/parameters/configId"
responses:
"204":
description: Configuration deleted successfully.
"401":
$ref: "#/components/responses/Unauthorized"
"404":
$ref: "#/components/responses/NotFound"
"500":
$ref: "#/components/responses/InternalServerError"
/config/{configId}/{propertyId}:
get:
tags:
- Configuration
summary: Get a single configuration property
description: >-
Returns the value of a specific property within the given
configuration.
operationId: getProperty
parameters:
- $ref: "#/components/parameters/configId"
- $ref: "#/components/parameters/propertyId"
responses:
"200":
description: The property value as plain text.
content:
text/plain:
schema:
type: string
example: "NutchBot"
"401":
$ref: "#/components/responses/Unauthorized"
"404":
$ref: "#/components/responses/NotFound"
"500":
$ref: "#/components/responses/InternalServerError"
put:
tags:
- Configuration
summary: Update a configuration property
description: >-
Adds or updates the value of a property in the specified
configuration.
operationId: updateProperty
parameters:
- $ref: "#/components/parameters/configId"
- $ref: "#/components/parameters/propertyId"
requestBody:
required: true
description: The new property value as plain text.
content:
text/plain:
schema:
type: string
example: "MyNewBot"
responses:
"200":
description: Property updated successfully.
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/config/create:
post:
tags:
- Configuration
summary: Create a new configuration
description: >-
Creates a new Nutch configuration with the specified parameters.
Returns the configuration ID on success.
operationId: createConfig
requestBody:
required: true
description: The configuration to create.
content:
application/json:
schema:
$ref: "#/components/schemas/NutchConfig"
responses:
"200":
description: Configuration created. Returns the configuration ID.
content:
text/plain:
schema:
type: string
example: "my-custom-config"
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# ---------------------------------------------------------------------------
# Job
# ---------------------------------------------------------------------------
/job/:
get:
tags:
- Job
summary: List all jobs
description: >-
Returns job history for all jobs or filtered by crawl ID, regardless
of job state.
operationId: getJobs
parameters:
- name: crawlId
in: query
required: false
description: Optional crawl ID to filter jobs by.
schema:
type: string
responses:
"200":
description: A JSON array of job information objects.
content:
application/json:
schema:
type: array
items:
$ref: "#/components/schemas/JobInfo"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/job/{id}:
get:
tags:
- Job
summary: Get job info
description: Returns detailed information for a specific job.
operationId: getJobInfo
parameters:
- $ref: "#/components/parameters/jobId"
- name: crawlId
in: query
required: false
description: The crawl ID associated with the job.
schema:
type: string
responses:
"200":
description: Job details.
content:
application/json:
schema:
$ref: "#/components/schemas/JobInfo"
"401":
$ref: "#/components/responses/Unauthorized"
"404":
$ref: "#/components/responses/NotFound"
"500":
$ref: "#/components/responses/InternalServerError"
/job/{id}/stop:
get:
tags:
- Job
summary: Stop a running job
description: Attempts to gracefully stop a running job.
operationId: stopJob
parameters:
- $ref: "#/components/parameters/jobId"
- name: crawlId
in: query
required: false
description: The crawl ID associated with the job.
schema:
type: string
responses:
"200":
description: Whether the job was successfully stopped.
content:
application/json:
schema:
type: boolean
example: true
"401":
$ref: "#/components/responses/Unauthorized"
"404":
$ref: "#/components/responses/NotFound"
"500":
$ref: "#/components/responses/InternalServerError"
/job/{id}/abort:
get:
tags:
- Job
summary: Abort a job
description: >-
Forcefully aborts a job. Unlike stop, this kills the job immediately.
operationId: abortJob
parameters:
- $ref: "#/components/parameters/jobId"
- name: crawlId
in: query
required: false
description: The crawl ID associated with the job.
schema:
type: string
responses:
"200":
description: Whether the job was successfully aborted.
content:
application/json:
schema:
type: boolean
example: true
"401":
$ref: "#/components/responses/Unauthorized"
"404":
$ref: "#/components/responses/NotFound"
"500":
$ref: "#/components/responses/InternalServerError"
/job/create:
post:
tags:
- Job
summary: Create a new job
description: >-
Creates and enqueues a new Nutch job (e.g., inject, generate, fetch,
parse, updatedb, index).
operationId: createJob
requestBody:
required: true
description: The job configuration specifying type, crawl ID, and arguments.
content:
application/json:
schema:
$ref: "#/components/schemas/JobConfig"
responses:
"200":
description: Job created. Returns the job information.
content:
application/json:
schema:
$ref: "#/components/schemas/JobInfo"
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# ---------------------------------------------------------------------------
# Database
# ---------------------------------------------------------------------------
/db/crawldb:
post:
tags:
- Database
summary: Query the CrawlDB
description: >-
Executes a query against the Nutch CrawlDB. The type field in the
request body determines the operation: stats, dump, topN, or url.
The stats and url types return JSON; dump and topN return binary
octet-stream data.
operationId: readCrawlDb
requestBody:
required: true
description: The database query parameters.
content:
application/json:
schema:
$ref: "#/components/schemas/DbQuery"
responses:
"200":
description: >-
Query results. Content type varies by query type: application/json
for stats and url queries; application/octet-stream for dump and
topN queries.
content:
application/json:
schema:
type: object
description: >-
CrawlDB query result (returned for stats and url query
types).
application/octet-stream:
schema:
type: string
format: binary
description: >-
Binary data stream (returned for dump and topN query types).
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/db/fetchdb:
get:
tags:
- Database
summary: Get FetchDB node information
description: >-
Returns fetch node database entries for the specified index range.
Both from and to default to 0; if to is 0 or exceeds the total
number of entries, all entries from the starting index are returned.
operationId: fetchDb
parameters:
- name: from
in: query
required: false
description: Starting index (inclusive). Defaults to 0.
schema:
type: integer
format: int32
minimum: 0
maximum: 2147483647
default: 0
- name: to
in: query
required: false
description: Ending index (inclusive). Defaults to 0 (returns all).
schema:
type: integer
format: int32
minimum: 0
maximum: 2147483647
default: 0
responses:
"200":
description: A JSON array of fetch node information objects.
content:
application/json:
schema:
type: array
items:
$ref: "#/components/schemas/FetchNodeDbInfo"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# ---------------------------------------------------------------------------
# Seed
# ---------------------------------------------------------------------------
/seed/:
get:
tags:
- Seed
summary: List all seed lists
description: Returns a map of all created seed files keyed by name.
operationId: getSeedLists
responses:
"200":
description: A JSON object mapping seed list names to SeedList objects.
content:
application/json:
schema:
type: object
additionalProperties:
$ref: "#/components/schemas/SeedList"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/seed/create:
post:
tags:
- Seed
summary: Create a seed list file
description: >-
Creates a seed list file from the provided URLs and writes it to
HDFS. Returns the path to the created seed file directory.
operationId: createSeedFile
requestBody:
required: true
description: The seed list containing URLs to write.
content:
application/json:
schema:
$ref: "#/components/schemas/SeedList"
responses:
"200":
description: Path to the created seed file directory.
content:
text/plain:
schema:
type: string
example: "seedFiles/seed-1700000000000"
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# ---------------------------------------------------------------------------
# Reader
# ---------------------------------------------------------------------------
/reader/sequence/read:
post:
tags:
- Reader
summary: Read a sequence file
description: >-
Reads key-value pairs from a Hadoop sequence file. Supports reading
all rows, a limited number of rows, a row range, or counting the
total number of rows.
operationId: seqRead
parameters:
- $ref: "#/components/parameters/nrows"
- $ref: "#/components/parameters/start"
- $ref: "#/components/parameters/end"
- $ref: "#/components/parameters/count"
requestBody:
required: true
description: Reader configuration specifying the file path.
content:
application/json:
schema:
$ref: "#/components/schemas/ReaderConfig"
responses:
"200":
description: >-
Sequence file data. Returns application/json when reading rows,
or text/plain when count=true.
content:
application/json:
schema:
type: array
items:
type: object
text/plain:
schema:
type: integer
format: int32
minimum: 0
maximum: 2147483647
description: Number of rows in the sequence file.
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/reader/link:
get:
tags:
- Reader
summary: Get link reader schema
description: >-
Returns the schema describing the fields in link reader responses.
operationId: getLinkSchema
responses:
"200":
description: Link reader response schema.
content:
application/json:
schema:
$ref: "#/components/schemas/LinkSchema"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/reader/link/read:
post:
tags:
- Reader
summary: Read link objects
description: >-
Reads link data (LinkDatum) from the Nutch webgraph. Supports
reading all rows, a limited number of rows, a row range, or
counting the total number of rows.
operationId: linkRead
parameters:
- $ref: "#/components/parameters/nrows"
- $ref: "#/components/parameters/start"
- $ref: "#/components/parameters/end"
- $ref: "#/components/parameters/count"
requestBody:
required: true
description: Reader configuration specifying the file path.
content:
application/json:
schema:
$ref: "#/components/schemas/ReaderConfig"
responses:
"200":
description: >-
Link data. Returns application/json when reading rows, or
text/plain when count=true.
content:
application/json:
schema:
type: array
items:
type: object
text/plain:
schema:
type: integer
format: int32
minimum: 0
maximum: 2147483647
description: Number of link entries.
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/reader/node:
get:
tags:
- Reader
summary: Get node reader schema
description: >-
Returns the schema describing the fields in node reader responses.
operationId: getNodeSchema
responses:
"200":
description: Node reader response schema.
content:
application/json:
schema:
$ref: "#/components/schemas/NodeSchema"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/reader/node/read:
post:
tags:
- Reader
summary: Read node objects
description: >-
Reads Node objects from the Nutch webgraph. Supports reading all
rows, a limited number of rows, a row range, or counting the total
number of rows.
operationId: nodeRead
parameters:
- $ref: "#/components/parameters/nrows"
- $ref: "#/components/parameters/start"
- $ref: "#/components/parameters/end"
- $ref: "#/components/parameters/count"
requestBody:
required: true
description: Reader configuration specifying the file path.
content:
application/json:
schema:
$ref: "#/components/schemas/ReaderConfig"
responses:
"200":
description: >-
Node data. Returns application/json when reading rows, or
text/plain when count=true.
content:
application/json:
schema:
type: array
items:
type: object
text/plain:
schema:
type: integer
format: int32
minimum: 0
maximum: 2147483647
description: Number of node entries.
"400":
$ref: "#/components/responses/BadRequest"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# ---------------------------------------------------------------------------
# Services
# ---------------------------------------------------------------------------
/services/commoncrawldump/{crawlId}:
get:
tags:
- Services
summary: List CommonCrawl dump paths
description: >-
Lists the dump file paths for a given crawl ID.
operationId: listDumpPaths
parameters:
- name: crawlId
in: path
required: true
description: The crawl ID whose dump paths to list.
schema:
type: string
responses:
"200":
description: Service information containing the list of dump paths.
content:
application/json:
schema:
$ref: "#/components/schemas/ServiceInfo"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
/services/commoncrawldump:
post:
tags:
- Services
summary: Create a CommonCrawl data dump
description: >-
Executes a CommonCrawl data dump job for the specified crawl and
returns the output directory path.
operationId: commoncrawlDump
requestBody:
required: true
description: Service configuration specifying crawl ID and arguments.
content:
application/json:
schema:
$ref: "#/components/schemas/ServiceConfig"
responses:
"200":
description: The output directory path for the dump.
content:
text/plain:
schema:
type: string
example: "myCrawl/dump/commoncrawl-20260213120000"
"401":
$ref: "#/components/responses/Unauthorized"
"500":
$ref: "#/components/responses/InternalServerError"
# =============================================================================
# Components
# =============================================================================
components:
# ---------------------------------------------------------------------------
# Security Schemes
# ---------------------------------------------------------------------------
securitySchemes:
basicAuth:
type: http
scheme: basic
description: HTTP Basic Authentication.
# ---------------------------------------------------------------------------
# Reusable Parameters
# ---------------------------------------------------------------------------
parameters:
configId:
name: configId
in: path
required: true
description: The unique identifier for the configuration.
schema:
type: string
propertyId:
name: propertyId
in: path
required: true
description: The name (key) of the configuration property.
schema:
type: string
jobId:
name: id
in: path
required: true
description: The unique identifier for the job.
schema:
type: string
nrows:
name: nrows
in: query
required: false
description: >-
Number of rows to read. If not specified (or -1), all rows are
returned.
schema:
type: integer
format: int32
minimum: -1
maximum: 2147483647
default: -1
start:
name: start
in: query
required: false
description: Starting line number for a range read.
schema:
type: integer
format: int32
minimum: -1
maximum: 2147483647
default: -1
end:
name: end
in: query
required: false
description: Ending line number for a range read.
schema:
type: integer
format: int32
minimum: 0
maximum: 2147483647
count:
name: count
in: query
required: false
description: >-
If true, returns the number of lines instead of the data itself.
When set, the response content type is text/plain.
schema:
type: boolean
default: false
# ---------------------------------------------------------------------------
# Schemas
# ---------------------------------------------------------------------------
schemas:
# -- Request Models -------------------------------------------------------
NutchConfig:
type: object
description: Configuration for creating a new Nutch configuration.
properties:
configId:
type: string
description: The identifier for this configuration.
force:
type: boolean
description: >-
If true, overwrites an existing configuration with the same ID.
default: false
params:
type: object
additionalProperties:
type: string
description: Key-value pairs of Nutch configuration properties.
example:
configId: "my-config"
force: false
params:
http.agent.name: "MyBot"
http.robots.agents: "MyBot,*"
JobConfig:
type: object
description: Configuration for creating a new crawl job.
required:
- type
properties:
crawlId:
type: string
description: The crawl identifier.
type:
$ref: "#/components/schemas/JobType"
confId:
type: string
description: >-
The configuration ID to use for this job. Defaults to "default"
if not specified.
jobClassName:
type: string
description: >-
Fully qualified class name when type is CLASS.
args:
type: object
additionalProperties: true
description: Additional arguments for the job.
example:
crawlId: "crawl-01"
type: "INJECT"
confId: "default"
args:
seedDir: "seedFiles/seed-1700000000000"
DbQuery:
type: object
description: Parameters for a CrawlDB query.
required:
- crawlId
- type
properties:
confId:
type: string
description: >-
Configuration ID. Falls back to "default" if not provided.
type:
type: string
description: The type of CrawlDB query to execute.
enum:
- stats
- dump
- topN
- url
args:
type: object
additionalProperties:
type: string
description: Additional arguments for the query.
crawlId:
type: string
description: The crawl identifier.
example:
confId: "default"
type: "stats"
crawlId: "crawl-01"
args: {}
ReaderConfig:
type: object
description: Configuration specifying a file path for reader operations.
required:
- path
properties:
path:
type: string
description: >-
The path to the sequence file, link data, or node data to read.
example:
path: "crawl-01/crawldb/current/part-00000/data"
SeedList:
type: object
description: A named list of seed URLs.
required:
- seedUrls
properties:
id:
type: integer
format: int64
minimum: 0
maximum: 9007199254740991
description: The seed list identifier.
readOnly: true
name:
type: string
description: A human-readable name for this seed list.
seedFilePath:
type: string
description: >-
The HDFS path where the seed file is stored. Populated after
creation.
readOnly: true
seedUrls:
type: array
items:
$ref: "#/components/schemas/SeedUrl"
description: The collection of seed URLs in this list.
example:
name: "my-seeds"
seedUrls:
- url: "https://example.com"
- url: "https://nutch.apache.org"
SeedUrl:
type: object
description: A single seed URL entry.
properties:
id:
type: integer
format: int64
minimum: 0
maximum: 9007199254740991
description: The seed URL identifier.
readOnly: true
url:
type: string
description: The seed URL.
example:
url: "https://example.com"
ServiceConfig:
type: object
description: >-
Configuration for service operations such as CommonCrawl data dumps.
required:
- crawlId
properties:
crawlId:
type: string
description: The crawl identifier.
confId:
type: string
description: The configuration ID.
args:
type: object
additionalProperties: true
description: Additional arguments for the service operation.
example:
crawlId: "crawl-01"
confId: "default"
args: {}
# -- Response Models ------------------------------------------------------
NutchServerInfo:
type: object
description: Status information about the running Nutch server.
required:
- configuration
- jobs
- runningJobs
properties:
startDate:
type: string
format: date-time
description: The date and time the server was started.
configuration:
type: array
items:
type: string
uniqueItems: true
description: Set of known configuration IDs.
jobs:
type: array
items:
$ref: "#/components/schemas/JobInfo"
description: All jobs (any state).
runningJobs:
type: array
items:
$ref: "#/components/schemas/JobInfo"
description: Currently running jobs.
JobInfo:
type: object
description: Information about a crawl job.
required:
- type
- state
properties:
id:
type: string
description: The unique job identifier.
type:
$ref: "#/components/schemas/JobType"
confId:
type: string
description: The configuration ID used for this job.
args:
type: object
additionalProperties: true
description: Arguments passed to the job.
result:
type: object
additionalProperties: true
description: Result data returned after job completion.
state:
$ref: "#/components/schemas/State"
msg:
type: string
description: A human-readable status or error message.
crawlId:
type: string
description: The crawl identifier associated with this job.
FetchNodeDbInfo:
type: object
description: Information about a fetched node in the FetchDB.
required:
- children
properties:
url:
type: string
description: The URL of the fetched node.
status:
type: integer
format: int32
minimum: 0
maximum: 2147483647
description: The HTTP status code of the fetch.
numOfOutlinks:
type: integer
format: int32
minimum: 0
maximum: 2147483647
description: The number of outgoing links discovered.
children:
type: array
items:
$ref: "#/components/schemas/ChildNode"
description: The outgoing links from this node.
ChildNode:
type: object
description: A child (outlink) of a fetched node.
properties:
childUrl:
type: string
description: The URL of the child node.
anchorText:
type: string
description: The anchor text of the link.
ServiceInfo:
type: object
description: Information returned by service operations.
required:
- dumpPaths
properties:
dumpPaths:
type: array
items:
type: string
description: List of file paths for the dump output.
# -- Schema Objects (Reader) ----------------------------------------------
LinkSchema:
type: object
description: Schema describing the fields in a link reader response.
properties:
key_url:
type: string
example: "string"
timestamp:
type: string
example: "int"
score:
type: string
example: "float"
anchor:
type: string
example: "string"
linktype:
type: string
example: "string"
url:
type: string
example: "string"
NodeSchema:
type: object
description: Schema describing the fields in a node reader response.
properties:
key_url:
type: string
example: "string"
num_inlinks:
type: string
example: "int"
num_outlinks:
type: string
example: "int"
inlink_score:
type: string
example: "float"
outlink_score:
type: string
example: "float"
metadata:
type: string
example: "string"
# -- Enums ----------------------------------------------------------------
JobType:
type: string
description: The type of Nutch crawl job.
enum:
- INJECT
- GENERATE
- FETCH
- PARSE
- UPDATEDB
- INDEX
- READDB
- CLASS
- INVERTLINKS
- DEDUP
State:
type: string
description: The current state of a job.
enum:
- IDLE
- RUNNING
- FINISHED
- FAILED
- KILLED
- STOPPING
- KILLING
- ANY
# ---------------------------------------------------------------------------
# Reusable Responses
# ---------------------------------------------------------------------------
responses:
BadRequest:
description: >-
Bad request. The request body is missing, malformed, or contains
invalid parameters.
content:
text/plain:
schema:
type: string
example: "Nutch configuration cannot be empty!"
Unauthorized:
description: >-
Unauthorized. Basic authentication credentials are missing or
invalid.
content:
application/json:
schema:
type: object
properties:
message:
type: string
example:
message: "Authentication required."
NotFound:
description: The requested resource was not found.
content:
application/json:
schema:
type: object
properties:
message:
type: string
example:
message: "Resource not found."
InternalServerError:
description: An unexpected server error occurred.
content:
text/plain:
schema:
type: string
example: "Internal server error."