| function get_class(name) { |
| var clazz; |
| try { |
| // Java8 Nashorn |
| clazz = eval("Java.type(name).class"); |
| } catch(e) { |
| // Java7 Rhino |
| clazz = eval("Packages."+name); |
| } |
| |
| return clazz; |
| } |
| |
| function processAdd(cmd) { |
| |
| doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument |
| var id = doc.getFieldValue("id"); |
| logger.info("update-script#processAdd: id=" + id); |
| |
| // The idea here is to use the file's content_type value to |
| // simplify into user-friendly values, such that types of, say, image/jpeg and image/tiff |
| // are in an "Images" facet |
| |
| var ct = doc.getFieldValue("content_type"); |
| if (ct) { |
| // strip off semicolon onward |
| var semicolon_index = ct.indexOf(';'); |
| if (semicolon_index != -1) { |
| ct = ct.substring(0,semicolon_index); |
| } |
| // and split type/subtype |
| var ct_type = ct.substring(0,ct.indexOf('/')); |
| var ct_subtype = ct.substring(ct.indexOf('/')+1); |
| |
| var doc_type; |
| switch(true) { |
| case /^application\/rtf/.test(ct) || /wordprocessing/.test(ct): |
| doc_type = "doc"; |
| break; |
| |
| case /html/.test(ct): |
| doc_type = "html"; |
| break; |
| |
| case /^image\/.*/.test(ct): |
| doc_type = "image"; |
| break; |
| |
| case /presentation|powerpoint/.test(ct): |
| doc_type = "presentation"; |
| break; |
| |
| case /spreadsheet|excel/.test(ct): |
| doc_type = "spreadsheet"; |
| break; |
| |
| case /^application\/pdf/.test(ct): |
| doc_type = "pdf"; |
| break; |
| |
| case /^text\/plain/.test(ct): |
| doc_type = "text" |
| break; |
| |
| default: |
| break; |
| } |
| |
| // TODO: error handling needed? What if there is no slash? |
| if(doc_type) { doc.setField("doc_type", doc_type); } |
| doc.setField("content_type_type_s", ct_type); |
| doc.setField("content_type_subtype_s", ct_subtype); |
| } |
| |
| var content = doc.getFieldValue("content"); |
| if (!content) { |
| return; //No content found, so we are done here |
| } |
| |
| var analyzer = |
| req.getCore().getLatestSchema() |
| .getFieldTypeByName("text_email_url") |
| .getIndexAnalyzer(); |
| |
| var token_stream = |
| analyzer.tokenStream("content", content); |
| var term_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.CharTermAttribute")); |
| var type_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.TypeAttribute")); |
| token_stream.reset(); |
| while (token_stream.incrementToken()) { |
| doc.addField(type_att.type().replace(/\<|\>/g,'').toLowerCase()+"_ss", term_att.toString()); |
| } |
| token_stream.end(); |
| token_stream.close(); |
| } |
| |
| function processDelete(cmd) { |
| // no-op |
| } |
| |
| function processMergeIndexes(cmd) { |
| // no-op |
| } |
| |
| function processCommit(cmd) { |
| // no-op |
| } |
| |
| function processRollback(cmd) { |
| // no-op |
| } |
| |
| function finish() { |
| // no-op |
| } |