blob: fb93fe98ab37047b4f23d0c494bc4f837566153a [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE TO DEVELOPERS: Make sure this file passes linting tests
# by running https://github.com/replicatedhq/dockerfilelint
# BUILD_MODE can be either
# 0 == Nutch master branch source install with 'crawl' and 'nutch' scripts on PATH
# 1 == Same as mode 0 with addition of Nutch REST Server
# 2 == Same as mode 1 with addition of Nutch WebApp
ARG BUILD_MODE=0
FROM alpine:3.13 AS base
ARG SERVER_PORT=8081
ARG SERVER_HOST=0.0.0.0
ARG WEBAPP_PORT=8080
LABEL maintainer="Apache Nutch Developers <dev@nutch.apache.org>"
LABEL org.opencontainers.image.authors="Apache Nutch Developers <dev@nutch.apache.org>"
LABEL org.opencontainers.image.description="Docker image for running Apache Nutch, a highly extensible and scalable open source web crawler software project. Visit the project website at https://nutch.apache.org"
LABEL org.opencontainers.image.documentation="https://hub.docker.com/r/apache/nutch"
LABEL org.opencontainers.image.licenses="Apache-2.0"
LABEL org.opencontainers.image.source="https://raw.githubusercontent.com/apache/nutch/master/docker/Dockerfile"
LABEL org.opencontainers.image.title="Apache Nutch 1.x Docker Image"
LABEL org.opencontainers.image.url="https://hub.docker.com/r/apache/nutch"
LABEL org.opencontainers.image.vendor="Apache Nutch https://nutch.apache.org"
WORKDIR /root/
# Install dependencies
RUN apk update
RUN apk --no-cache add apache-ant bash git openjdk11 supervisor
# Establish environment variables
RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-11-openjdk' >> $HOME/.bashrc
RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-11-openjdk' >> $HOME/.ashrc
ENV JAVA_HOME='/usr/lib/jvm/java-11-openjdk'
ENV NUTCH_HOME='/root/nutch_source/runtime/local'
# Checkout and build the Nutch master branch (1.x)
RUN git clone https://github.com/apache/nutch.git nutch_source && \
cd nutch_source && \
ant runtime && \
rm -rf build/ && \
rm -rf /root/.ivy2/
# Create symlinks for runtime/local/bin/nutch and runtime/local/bin/crawl
RUN ln -sf $NUTCH_HOME/bin/nutch /usr/local/bin/
RUN ln -sf $NUTCH_HOME/bin/crawl /usr/local/bin/
FROM base AS branch-version-0
RUN echo "Nutch master branch source install with 'crawl' and 'nutch' scripts on PATH"
FROM base AS branch-version-1
RUN echo "Nutch master branch source install with 'crawl' and 'nutch' scripts on PATH and Nutch REST Server on $SERVER_HOST:$SERVER_PORT"
ARG SERVER_PORT=8081
ARG SERVER_HOST=0.0.0.0
ENV SERVER_PORT=$SERVER_PORT
ENV SERVER_HOST=$SERVER_HOST
# Arrange necessary setup for supervisord
RUN mkdir -p /var/log/supervisord
COPY ./config/supervisord_startserver.conf /etc/supervisord.conf
# Expose port for server which can only be accessed if
# the same port is published when the container is run.
EXPOSE $SERVER_PORT
ENTRYPOINT [ "supervisord", "--nodaemon", "--configuration", "/etc/supervisord.conf" ]
FROM base AS branch-version-2
RUN echo "Nutch master branch source install with 'crawl' and 'nutch' scripts on PATH, Nutch REST Server on $SERVER_HOST:$SERVER_PORT and WebApp on this container port $WEBAPP_PORT"
ARG SERVER_PORT=8081
ARG SERVER_HOST=0.0.0.0
ARG WEBAPP_PORT=8080
ENV SERVER_PORT=$SERVER_PORT
ENV SERVER_HOST=$SERVER_HOST
ENV WEBAPP_PORT=$WEBAPP_PORT
# Install the webapp
RUN apk --no-cache add maven
RUN git clone https://github.com/apache/nutch-webapp.git nutch_webapp && \
cd nutch_webapp && \
mvn package
# Arrange necessary setup for supervisord
RUN mkdir -p /var/log/supervisord
COPY ./config/supervisord_startserver_webapp.conf /etc/supervisord.conf
# Expose ports for server and webapp, these can only be accessed if
# the same ports are published when the container is run.
EXPOSE $SERVER_PORT
EXPOSE $WEBAPP_PORT
ENTRYPOINT [ "supervisord", "--nodaemon", "--configuration", "/etc/supervisord.conf" ]
FROM branch-version-$BUILD_MODE AS final
RUN echo "Successfully built image, see https://s.apache.org/m5933 for guidance on running a container instance."