blob: 74cba962267209d4ce0bbebc423ca21921dce459 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.oodt.opendapps.extractors;
//JDK imports
import java.net.MalformedURLException;
import java.net.URL;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
//OODT imports
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.opendapps.config.OpendapConfig;
import org.apache.oodt.opendapps.util.ProfileUtils;
//Spring imports
import org.springframework.util.StringUtils;
//THREDDS imports
import thredds.catalog.InvAccess;
import thredds.catalog.InvDataset;
import thredds.catalog.InvDocumentation;
import thredds.catalog.InvProperty;
import thredds.catalog.ServiceType;
import thredds.catalog.ThreddsMetadata.Contributor;
import thredds.catalog.ThreddsMetadata.GeospatialCoverage;
import thredds.catalog.ThreddsMetadata.Range;
import thredds.catalog.ThreddsMetadata.Source;
import thredds.catalog.ThreddsMetadata.Vocab;
import ucar.nc2.units.DateType;
import ucar.unidata.geoloc.LatLonRect;
/**
* Implementation of {@link MetadataExtractor} that extracts metadata from a
* Thredds dataset.
*
* @author Luca Cinquini
*
*/
public class ThreddsMetadataExtractor implements MetadataExtractor {
private static Logger LOG = Logger.getLogger(ThreddsMetadataExtractor.class
.getName());
// constant missing for 4.2 version of NetCDF library
public final static String SERVICE_TYPE_NCML = "NCML";
/**
* The source of metadata to be extracted.
*/
private final InvDataset dataset;
public ThreddsMetadataExtractor(final InvDataset dataset) {
this.dataset = dataset;
}
public void extract(Metadata met, OpendapConfig config) {
LOG.log(Level.INFO, "Crawling catalog URL=" + dataset.getCatalogUrl()
+ " dataset ID=" + dataset.getID());
ProfileUtils.addIfNotNull(met, "Authority", dataset.getAuthority());
ProfileUtils.addIfNotNull(met, "CatalogUrl", dataset.getCatalogUrl());
try {
ProfileUtils.addIfNotNull(met, "Host", (new URL(dataset.getCatalogUrl())).getHost() );
} catch(MalformedURLException e) {
LOG.log(Level.WARNING, e.getMessage());
}
ProfileUtils.addIfNotNull(met, "DatasetFullName", dataset.getFullName());
if (dataset.getContributors() != null) {
for (Contributor contributor : dataset.getContributors()) {
ProfileUtils.addIfNotNull(met, "Contributor", contributor.getName());
}
}
if (dataset.getCreators() != null) {
for (Source source : dataset.getCreators()) {
ProfileUtils.addIfNotNull(met, "Creator", source.getName());
}
}
if (dataset.getDataFormatType() != null) {
ProfileUtils.addIfNotNull(met, "DataFormatType", dataset
.getDataFormatType().toString());
}
if (dataset.getDataType() != null) {
ProfileUtils.addIfNotNull(met, "DataType", dataset.getDataType()
.toString());
}
if (dataset.getDates() != null) {
for (DateType dateType : dataset.getDates()) {
String dateString = null;
try {
dateString = ProfileUtils.toISO8601(dateType.getDate());
} catch (Exception e) {
LOG.log(Level.WARNING,
"Error converting date: [" + dateType.getDate() + "]: Message: "
+ e.getMessage());
}
ProfileUtils.addIfNotNull(met, "Dates", dateString);
}
}
if (dataset.getDocumentation() != null) {
for (InvDocumentation doc : dataset.getDocumentation()) {
// textual documentation
if (StringUtils.hasText(doc.getInlineContent())) {
if (StringUtils.hasText(doc.getType())) {
// use specific documentation type, when available
ProfileUtils.addIfNotNull(met, doc.getType(), doc.getInlineContent());
} else {
// otherwise use generic "Documentation" tag
ProfileUtils.addIfNotNull(met, "Documentation", doc.getInlineContent());
}
}
// hyperlinked documentation
if (StringUtils.hasText(doc.getXlinkHref())) {
String tuple = this.encodeXlinkTuple(doc.getXlinkHref(), doc.getXlinkTitle(), doc.getType());
ProfileUtils.addIfNotNull(met, "Xlink", tuple);
}
}
}
ProfileUtils.addIfNotNull(met, "FullName", dataset.getFullName());
GeospatialCoverage geoCoverage = dataset.getGeospatialCoverage();
if (geoCoverage != null) {
LatLonRect bbox = geoCoverage.getBoundingBox();
if (bbox != null) {
ProfileUtils.addIfNotNull(met, "SouthwestBC", bbox.getLowerLeftPoint()
.toString());
ProfileUtils.addIfNotNull(met, "NorthwestBC", bbox.getUpperLeftPoint()
.toString());
ProfileUtils.addIfNotNull(met, "NortheastBC", bbox.getUpperRightPoint()
.toString());
ProfileUtils.addIfNotNull(met, "SoutheastBC", bbox.getLowerRightPoint()
.toString());
}
// try north south, east west
if (geoCoverage.getNorthSouthRange() != null) {
Range nsRange = geoCoverage.getNorthSouthRange();
ProfileUtils.addIfNotNull(met, "NorthSouthRangeStart",
String.valueOf(nsRange.getStart()));
ProfileUtils.addIfNotNull(met, "NorthSouthRangeResolution",
String.valueOf(nsRange.getResolution()));
ProfileUtils.addIfNotNull(met, "NorthSouthRangeSize",
String.valueOf(nsRange.getSize()));
ProfileUtils.addIfNotNull(met, "NorthSouthRangeUnits",
nsRange.getUnits());
ProfileUtils.addIfNotNull(met, "NorthSouthRangeStop",
String.valueOf(nsRange.getStart()+nsRange.getSize()));
}
if (geoCoverage.getEastWestRange() != null) {
Range nsRange = geoCoverage.getEastWestRange();
ProfileUtils.addIfNotNull(met, "EastWestRangeStart",
String.valueOf(nsRange.getStart()));
ProfileUtils.addIfNotNull(met, "EastWestRangeResolution",
String.valueOf(nsRange.getResolution()));
ProfileUtils.addIfNotNull(met, "EastWestRangeSize",
String.valueOf(nsRange.getSize()));
ProfileUtils.addIfNotNull(met, "EastWestRangeUnits",
nsRange.getUnits());
ProfileUtils.addIfNotNull(met, "EastWestRangeStop",
String.valueOf(nsRange.getStart()+nsRange.getSize()));
}
ProfileUtils.addIfNotNull(met, "GeospatialCoverageLatitudeResolution",
String.valueOf(dataset.getGeospatialCoverage().getLatResolution()));
ProfileUtils.addIfNotNull(met, "GeospatialCoverageLongitudeResolution",
String.valueOf(dataset.getGeospatialCoverage().getLonResolution()));
// add geo-spatial coverage alternative form
ProfileUtils.addIfNotNull(met, "GeospatialCoverageLatSouth", String.valueOf(dataset.getGeospatialCoverage().getLatSouth()));
ProfileUtils.addIfNotNull(met, "GeospatialCoverageLatNorth", String.valueOf(dataset.getGeospatialCoverage().getLatNorth()));
ProfileUtils.addIfNotNull(met, "GeospatialCoverageLonWest", String.valueOf(dataset.getGeospatialCoverage().getLonWest()));
ProfileUtils.addIfNotNull(met, "GeospatialCoverageLonEast", String.valueOf(dataset.getGeospatialCoverage().getLonEast()));
if (dataset.getGeospatialCoverage().getNames() != null) {
for (Vocab gName : dataset.getGeospatialCoverage().getNames()) {
ProfileUtils.addIfNotNull(met, "GeospatialCoverage", gName.getText());
}
}
}
ProfileUtils.addIfNotNull(met, "History", dataset.getHistory());
if (dataset.getKeywords() != null) {
for (Vocab vocab : dataset.getKeywords()) {
ProfileUtils.addIfNotNull(met, "Keywords", vocab.getText());
}
}
ProfileUtils.addIfNotNull(met, "Name", dataset.getName());
ProfileUtils.addIfNotNull(met, "Processing", dataset.getProcessing());
if (dataset.getProjects() != null) {
for (Vocab vocab : dataset.getProjects()) {
ProfileUtils.addIfNotNull(met, "Projects", vocab.getText());
}
}
if (dataset.getProperties() != null) {
for (InvProperty prop : dataset.getProperties()) {
ProfileUtils.addIfNotNull(met, prop.getName(), prop.getValue());
}
}
if (dataset.getPublishers() != null) {
for (Source source : dataset.getPublishers()) {
// Note: use "Publisher" (singular) as from the OODT profile specification
ProfileUtils.addIfNotNull(met, "Publisher", source.getName());
}
}
ProfileUtils.addIfNotNull(met, "RestrictAccess",
dataset.getRestrictAccess());
if (dataset.getTimeCoverage() != null) {
String startDateTimeStr = null, endDateTimeStr = null;
try {
startDateTimeStr = ProfileUtils.toISO8601(dataset.getTimeCoverage()
.getStart().getDate());
endDateTimeStr = ProfileUtils.toISO8601(dataset.getTimeCoverage()
.getEnd().getDate());
} catch (Exception e) {
LOG.log(
Level.WARNING,
"Error converting start/end date time strings: Message: "
+ e.getMessage());
}
ProfileUtils.addIfNotNull(met, "StartDateTime", startDateTimeStr);
ProfileUtils.addIfNotNull(met, "EndDateTime", endDateTimeStr);
}
if (dataset.getTimeCoverage() != null
&& dataset.getTimeCoverage().getResolution() != null) {
ProfileUtils.addIfNotNull(met, "TimeCoverageResolution", dataset
.getTimeCoverage().getResolution().getText());
}
// dataset unique ID
ProfileUtils.addIfNotNull(met, "UniqueID", dataset.getUniqueID());
// dataset ID is typically not null
ProfileUtils.addIfNotNull(met, "ID", dataset.getID());
// generate a UUID for each dataset, to be used as profile ID
ProfileUtils.addIfNotNull(met, "UUID", UUID.randomUUID().toString());
// store access services - only the OpenDAP endpoint for now
for (InvAccess access : dataset.getAccess()) {
String url = access.getStandardUri().toString();
String type = access.getService().getServiceType().toString();
String name = access.getService().getName();
// add opendap access URL
if (type.equalsIgnoreCase(ServiceType.OPENDAP.toString())) {
// store opendap URL
ProfileUtils.addIfNotNull(met,"OpendapUrl",url);
// note: special processing of opendap endpoints since URL in thredds catalog is unusable without a suffix
ProfileUtils.addIfNotNull(met,"Access", this.encodeAccessTuple(url+".html", ProfileUtils.MIME_TYPE_OPENDAP_HTML, type));
}
}
// add TREDDS XML catalog URL
String url = dataset.getCatalogUrl(); // catalog_url.xml#dataset_id
ProfileUtils.addIfNotNull(met,"Access", this.encodeAccessTuple(url, ProfileUtils.MIME_TYPE_THREDDS, "Catalog/XML"));
ProfileUtils.addIfNotNull(met,"Access", this.encodeAccessTuple(url.replaceAll("\\.xml", ".html"), ProfileUtils.MIME_TYPE_HTML, "Catalog/HTML"));
}
/**
* Utility method that joins the parts of an xlink tuple (href, title, type) with a delimiting character.
* @param href : the xlink URL, must be not null
* @param title : the xlink title, may be null
* @param type : the xlink type, may be null
* @return
*/
private String encodeXlinkTuple(final String href, final String title, final String type) {
final StringBuilder tuple = new StringBuilder();
tuple.append(href)
.append(ProfileUtils.CHAR)
.append(StringUtils.hasText(title) ? title : "Reference" )
.append(ProfileUtils.CHAR)
.append(StringUtils.hasText(type) ? type : "HTML" );
return tuple.toString();
}
/**
* Utility method that joins the part of A THREDDS access point (url, service type, service name) with a delimiting character
* @param url : the access URL
* @param type : the service type, mapped to a mime type
* @param name : the service name
* @return
*/
private String encodeAccessTuple(final String url, final String type, final String name) {
final StringBuilder tuple = new StringBuilder();
tuple.append(url)
.append(ProfileUtils.CHAR)
.append(StringUtils.hasText(type) ? type : "")
.append(ProfileUtils.CHAR)
.append(StringUtils.hasText(name) ? name : "");
return tuple.toString();
}
}