| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.oodt.cas.filemgr.catalog.solr; |
| |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import javax.xml.parsers.DocumentBuilder; |
| import javax.xml.parsers.DocumentBuilderFactory; |
| |
| import org.apache.oodt.cas.filemgr.structs.Product; |
| import org.apache.oodt.cas.filemgr.structs.ProductType; |
| import org.apache.oodt.cas.filemgr.structs.Reference; |
| import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException; |
| import org.apache.oodt.cas.metadata.Metadata; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.xml.sax.InputSource; |
| |
| /** |
| * Default implementation of {@link ProductSerializer} |
| * that transforms a CAS product into a single Solr document based on the following rules: |
| * o) the core product attributes are used to generate Solr fields starting with "CAS...." |
| * o) the product references are converted to Solr fields starting with "CAS.Reference..." or "CAS.RootReference..." |
| * o) all other metadata fields are converted into Solr fields with the same name and number of values. |
| * Note that the field multiplicity must be consistent with its definition in the Solr schema.xml. |
| * |
| * This class generates all Solr documents in XML format. |
| * |
| * @author Luca Cinquini |
| * |
| */ |
| public class DefaultProductSerializer implements ProductSerializer { |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public String getMimeType() { |
| return Parameters.MIME_TYPE_XML; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public List<String> serialize(Product product, boolean create) { |
| |
| Map<String, List<String>> fields = new HashMap<String, List<String>>(); |
| List<String> docs = new ArrayList<String>(); |
| |
| // add core product attributes to map |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_ID, product.getProductId()); |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_NAME, product.getProductName()); |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_STRUCTURE, product.getProductStructure()); |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_TRANSFER_STATUS, product.getTransferStatus()); |
| ProductType productType = product.getProductType(); |
| if (productType!=null) { |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_TYPE_NAME, productType.getName()); |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_TYPE_ID, productType.getProductTypeId()); |
| } |
| if (create) { |
| // only insert date/time when product is first created |
| Date productDateTime = new Date(); // current datetime |
| this.addKeyValueToMap(fields, Parameters.PRODUCT_RECEIVED_TIME, Parameters.SOLR_DATE_TIME_FORMATTER.format(productDateTime)); |
| } |
| |
| // create new product: use Solr id == CAS id |
| if (create) { |
| docs.add( this.generateInsertDocuments(product.getProductId(), fields) ); |
| |
| // update existing product |
| } else { |
| docs.addAll( this.generateUpdateDocuments(product.getProductId(), fields, true) ); // replace=true |
| |
| } |
| |
| return docs; |
| |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| public List<String> serialize(String productId, Reference rootReference, List<Reference> references, boolean replace) { |
| |
| Map<String, List<String>> fields = new HashMap<String, List<String>>(); |
| |
| // product root reference |
| if (rootReference!=null) { |
| |
| addKeyValueToMap(fields, Parameters.ROOT_REFERENCE_ORIGINAL, rootReference.getOrigReference()); |
| addKeyValueToMap(fields, Parameters.ROOT_REFERENCE_DATASTORE, rootReference.getDataStoreReference()); |
| addKeyValueToMap(fields, Parameters.ROOT_REFERENCE_FILESIZE, ""+rootReference.getFileSize()); |
| addKeyValueToMap(fields, Parameters.ROOT_REFERENCE_MIMETYPE, rootReference.getMimeType().toString()); |
| |
| } |
| |
| // all other product references |
| // note that Solr will preserve the indexing order. |
| for (Reference reference : references) { |
| |
| addKeyValueToMap(fields, Parameters.REFERENCE_ORIGINAL, reference.getOrigReference()); |
| addKeyValueToMap(fields, Parameters.REFERENCE_DATASTORE, reference.getDataStoreReference()); |
| addKeyValueToMap(fields, Parameters.REFERENCE_FILESIZE, ""+reference.getFileSize()); |
| addKeyValueToMap(fields, Parameters.REFERENCE_MIMETYPE, reference.getMimeType().toString()); |
| |
| } |
| |
| return generateUpdateDocuments(productId, fields, replace); |
| |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| public QueryResponse deserialize(String xml) throws CatalogException { |
| |
| try { |
| |
| QueryResponse queryResponse = new QueryResponse(); |
| |
| // parse XML into DOM |
| Document document = parseXml(xml); |
| |
| // extract information from DOM to Product |
| Element response = document.getDocumentElement(); |
| Node result = response.getElementsByTagName("result").item(0); |
| queryResponse.setNumFound( Integer.parseInt( ((Element)result).getAttribute("numFound") ) ); |
| queryResponse.setStart( Integer.parseInt( ((Element)result).getAttribute("start") ) ); |
| NodeList docs = result.getChildNodes(); |
| for (int i=0; i< docs.getLength(); i++) { |
| Node node = docs.item(i); |
| if (node.getNodeName().equals("doc")) { |
| Element doc = (Element)node; |
| CompleteProduct cp = this.deserialize(doc); |
| queryResponse.getCompleteProducts().add(cp); |
| } |
| } |
| return queryResponse; |
| |
| } catch(Exception e) { |
| e.printStackTrace(); |
| throw new CatalogException(e.getMessage()); |
| } |
| |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| public List<String> serialize(String productId, Metadata metadata, boolean replace) { |
| |
| Map<String, List<String>> fields = new HashMap<String, List<String>>(); |
| |
| for (String key : metadata.getKeys()) { |
| if (! (key.startsWith(Parameters.NS) // skip metadata keys starting with reserved namespace |
| || Parameters.PRODUCT_TYPE_NAME.indexOf(key)>=0 // skip 'ProductType' as already stored as 'CAS.ProductTypeName' |
| || Parameters.PRODUCT_STRUCTURE.indexOf(key)>=0)) { // skip 'ProductType' as already stored as 'CAS.ProductStructure' |
| for (String value : metadata.getAllMetadata(key)) { |
| this.addKeyValueToMap(fields, key, value); |
| } |
| } |
| } |
| |
| return this.generateUpdateDocuments(productId, fields, replace); |
| |
| } |
| |
| /** |
| * Method to add a (key, value) to a multi-valued map with appropriate checks. |
| * @param map |
| * @param key |
| * @param value |
| */ |
| protected void addKeyValueToMap(Map<String, List<String>> map, String key, String value) { |
| |
| if (!map.containsKey(key)) { |
| map.put(key, new ArrayList<String>()); |
| } |
| if (value!=null) { |
| map.get(key).add(value); |
| } else { |
| // use special value to trigger key removal |
| map.get(key).add(Parameters.NULL); |
| } |
| } |
| |
| /** |
| * Utility method to generate a Solr insert document. |
| * |
| * @param productId |
| * @param fields |
| * @return |
| */ |
| protected String generateInsertDocuments(String productId, Map<String,List<String>> fields) { |
| |
| StringBuilder doc = new StringBuilder(); |
| doc.append("<doc>"); |
| |
| // product Solr id field |
| doc.append( encodeIndexField(Parameters.ID, productId) ); |
| |
| // all other fields |
| for (String key : fields.keySet()) { |
| for (String value : fields.get(key)) { |
| doc.append( encodeIndexField(key, value) ); |
| } |
| } |
| |
| doc.append("</doc>"); |
| return doc.toString(); |
| } |
| |
| /** |
| * Utility method to generate Solr update documents. |
| * Note that the requests for setting/adding/deleting fields must be sent as separate documents to Solr |
| * @param productId |
| * @param fields |
| * @param replace |
| * @return |
| */ |
| protected List<String> generateUpdateDocuments(String productId, Map<String,List<String>> fields, boolean replace) { |
| |
| // list for different instruction types |
| List<String> setFields = new ArrayList<String>(); |
| List<String> addFields = new ArrayList<String>(); |
| List<String> delFields = new ArrayList<String>(); |
| |
| // encode update instructions |
| for (String key : fields.keySet()) { |
| |
| List<String> values = fields.get(key); |
| |
| if (replace) { |
| |
| if (values.isEmpty()) { |
| // use special value to flag removal |
| delFields.add( this.encodeUpdateField(key, Parameters.NULL, replace) ); |
| |
| } else { |
| for (String value : values) { |
| setFields.add( this.encodeUpdateField(key, value, replace) ); |
| } |
| } |
| |
| } else { |
| for (String value : values) { |
| addFields.add( this.encodeUpdateField(key, value, replace) ); |
| } |
| } |
| |
| } |
| |
| List<String> docs = new ArrayList<String>(); |
| if (!delFields.isEmpty()) docs.add( toDoc(productId, delFields) ); |
| if (!setFields.isEmpty()) docs.add( toDoc(productId, setFields) ); |
| if (!addFields.isEmpty()) docs.add( toDoc(productId, addFields) ); |
| return docs; |
| |
| } |
| |
| /** |
| * Utility method to merge field update instructions into a single document. |
| * @param productId |
| * @param updates |
| * @return |
| */ |
| private String toDoc(String productId, List<String> updates) { |
| |
| StringBuilder doc = new StringBuilder(); |
| doc.append("<doc>"); |
| |
| // reference product record id |
| doc.append( encodeIndexField(Parameters.ID, productId) ); |
| |
| // loop over field update instructions |
| for (String update : updates) { |
| doc.append(update); |
| } |
| |
| doc.append("</doc>"); |
| |
| return doc.toString(); |
| |
| } |
| |
| /** |
| * Method to encode a Solr field indexing instruction. |
| * If the value is null, the empty string is returned. |
| * @param key |
| * @param value |
| * @return |
| */ |
| protected String encodeIndexField(String key, String value) { |
| if (value==null || value.equals(Parameters.NULL)) { |
| return ""; |
| } else { |
| return "<field name=\""+key+"\">" + value + "</field>"; |
| } |
| } |
| |
| /** |
| * Method to encode a field update instruction for the three possible cases: |
| * add new values to a key (1), replace current values for a key (2), remove all values for a key (3). |
| * |
| * @param key |
| * @param value |
| * @param replace |
| * @return |
| */ |
| protected String encodeUpdateField(String key, String value, boolean replace) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("<field name=\""+key+"\""); |
| |
| if (replace) { |
| |
| if (value==null || value.equals(Parameters.NULL)) { |
| |
| // (3) remove all values for given key |
| sb.append(" update=\"set\" null=\"true\" />"); |
| |
| } else { |
| |
| // (2) replace existing values with new values |
| sb.append(" update=\"set\">" + value + "</field>"); |
| } |
| |
| } else { |
| |
| // (1) add new values to existing values |
| sb.append(" update=\"add\">"+value+"</field>"); |
| |
| } |
| |
| return sb.toString(); |
| } |
| |
| |
| |
| /** |
| * Method that parses a single Solr document snippet |
| * to extract Product and Metadata attributes. |
| * |
| * @param doc |
| * @return |
| */ |
| private CompleteProduct deserialize(Element doc) { |
| |
| CompleteProduct cp = new CompleteProduct(); |
| Product product = cp.getProduct(); |
| ProductType productType = product.getProductType(); |
| Metadata metadata = cp.getMetadata(); |
| List<Reference> references = product.getProductReferences(); |
| Reference rootReference = product.getRootRef(); |
| |
| NodeList children = doc.getChildNodes(); |
| for (int j=0; j<children.getLength(); j++) { |
| |
| Node child = children.item(j); |
| Element element = (Element)child; |
| String name = element.getAttribute("name"); |
| |
| /** |
| *<arr name="ScanPointingSource"> |
| * <str>G073.65+0.19</str> |
| * <str>J2015+3410</str> |
| * .......... |
| */ |
| if (child.getNodeName().equals("arr")) { |
| |
| NodeList values = element.getChildNodes(); |
| List<String> vals = new ArrayList<String>(); |
| for (int k=0; k<values.getLength(); k++) { |
| String value = ((Element)values.item(k)).getTextContent(); |
| vals.add(value); |
| } |
| // CAS.reference.... fields |
| if (name.startsWith(Parameters.NS)) { |
| for (int k=0; k<values.getLength(); k++) { |
| // create this reference |
| if (references.size()<=k) references.add(new Reference()); |
| if (name.equals(Parameters.REFERENCE_ORIGINAL)) { |
| references.get(k).setOrigReference(vals.get(k)); |
| } else if (name.equals(Parameters.REFERENCE_DATASTORE)) { |
| references.get(k).setDataStoreReference(vals.get(k)); |
| } else if (name.equals(Parameters.REFERENCE_FILESIZE)) { |
| references.get(k).setFileSize(Long.parseLong(vals.get(k))); |
| } else if (name.equals(Parameters.REFERENCE_MIMETYPE)) { |
| references.get(k).setMimeType(vals.get(k)); |
| } |
| } |
| // all other multi-valued fields |
| } else { |
| this.deserializeMultiValueField(name, vals, metadata); |
| } |
| |
| /** |
| * <str name="id">6684d79d-a011-4bc0-b3b3-4f11817091c8</str> |
| * <str name="CAS.ProductId">6684d79d-a011-4bc0-b3b3-4f11817091c8</str> |
| * <str name="CAS.ProductName">tns_br145x4_20</str> |
| * <str name="FileLocation">/usr/local/ska-dc/data/archive</str> |
| * ........... |
| */ |
| } else { |
| |
| String value = element.getTextContent(); |
| |
| // core CAS fields |
| if (name.startsWith(Parameters.NS)) { |
| if (name.equals(Parameters.PRODUCT_ID)) { |
| product.setProductId(value); |
| } else if (name.equals(Parameters.PRODUCT_NAME)) { |
| product.setProductName(value); |
| } else if (name.equals(Parameters.PRODUCT_STRUCTURE)) { |
| product.setProductStructure(value); |
| } else if (name.equals(Parameters.PRODUCT_TRANSFER_STATUS)) { |
| product.setTransferStatus(value); |
| } else if (name.equals(Parameters.PRODUCT_TYPE_NAME)) { |
| productType.setName(value); |
| } else if (name.equals(Parameters.PRODUCT_TYPE_ID)) { |
| productType.setProductTypeId(value); |
| } else if (name.equals(Parameters.PRODUCT_RECEIVED_TIME)) { |
| // ignore ? |
| |
| // CAS root reference |
| } else if (name.startsWith(Parameters.NS+Parameters.ROOT)) { |
| if (rootReference==null) rootReference = new Reference(); |
| if (name.equals(Parameters.ROOT_REFERENCE_ORIGINAL)) { |
| rootReference.setOrigReference(value); |
| } else if (name.equals(Parameters.ROOT_REFERENCE_DATASTORE)) { |
| rootReference.setDataStoreReference(value); |
| } else if (name.equals(Parameters.ROOT_REFERENCE_FILESIZE)) { |
| rootReference.setFileSize(Long.parseLong(value)); |
| } else if (name.equals(Parameters.ROOT_REFERENCE_MIMETYPE)) { |
| rootReference.setMimeType(value); |
| } |
| |
| } |
| |
| // non core single-valued fields |
| } else { |
| this.deserializeSingleValueField(name, value, metadata); |
| } // "CAS".... or not |
| |
| } // "arr" or anything else |
| |
| } // loop over <doc> children |
| |
| return cp; |
| |
| } |
| |
| private Document parseXml(String xml) throws Exception { |
| |
| DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| DocumentBuilder parser = factory.newDocumentBuilder(); |
| Document document = parser.parse( new InputSource(new StringReader(xml)) ); |
| return document; |
| |
| } |
| |
| /** |
| * Method that deserializes a single-valued Solr field into a Metadata element. |
| * This method can be overridden by sub-classes to provide custom behavior. |
| * |
| * @param name : the Solr field name |
| * @param value : the Solr field single value |
| * @param metadata : the metadata container |
| */ |
| protected void deserializeSingleValueField(String name, String value, Metadata metadata) { |
| |
| if (name.equals(Parameters.ID)) { |
| // ignore Solr internal identifier (as it is duplicate information of CAS.ProductId) |
| } else { |
| metadata.addMetadata(name, value); |
| } |
| |
| } |
| |
| /** |
| * Method that deserializes a multi-valued Solr field into a Metadata element. |
| * This method can be overridden by sub-classes to provide custom behavior. |
| * |
| * @param name : the Solr field name |
| * @param values : the Solr field multiple values |
| * @param metadata : the metadata container |
| */ |
| protected void deserializeMultiValueField(String name, List<String> values, Metadata metadata) { |
| metadata.addMetadata(name, values); |
| } |
| |
| |
| } |