blob: 54fab0f4f92067ffc29df2430d37750b3640bb81 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.nlpcraft.model;
import com.fasterxml.jackson.databind.*;
import com.fasterxml.jackson.dataformat.yaml.*;
import org.apache.nlpcraft.common.*;
import org.apache.nlpcraft.common.util.*;
import org.apache.nlpcraft.model.impl.json.*;
import java.util.*;
* Adapter for data models that can load model configuration from external JSON/YAML file.
* <p>
* One of the use cases this adapter supports is ability to load model configuration from the external
* JSON/YAML file and then update it in the code. For example, a model can load its configuration
* from JSON file and then add intents or synonyms loaded from a database to a certain model element.
* To support this usage all getters return internal mutable sets or maps, i.e. you can modify them in your sub-class
* constructors and those modifications will alter the model's configuration. The following getters return
* mutable collections that can be modified by the caller:
* <ul>
* <li>{@link #getSuspiciousWords()}</li>
* <li>{@link #getAdditionalStopWords()}</li>
* <li>{@link #getIntents()}</li>
* <li>{@link #getElements()}</li>
* <li>{@link #getEnabledBuiltInTokens()}</li>
* <li>{@link #getExcludedStopWords()}</li>
* <li>{@link #getParsers()}</li>
* <li>{@link #getMacros()}</li>
* <li>{@link #getMetadata()}</li>
* </ul>
* <p>
* Read full documentation in <a target=_ href="">Data Model</a> section and review
* <a target=_ href="">examples</a>.
* @see NCModelAdapter
abstract public class NCModelFileAdapter extends NCModelAdapter {
private final NCModelJson proxy;
private final Set<String> suspWords;
private final Set<String> enabledToks;
private final Set<String> addStopwords;
private final Set<String> exclStopwords;
private final Set<String> intents;
private final Map<String, String> macros;
private final Map<String, Object> metadata;
private final Set<NCElement> elems;
private final List<NCCustomParser> parsers;
private final String origin;
/** */
private static final Gson GSON = new Gson();
* Creates new model loading its configuration from given file path. Only <code>.js</code>,
* <code>.json</code>, <code>.yml</code> and <code>.yaml</code> files are supported. File path can be
* classpath relative or absolute.
* @param filePath Classpath relative or absolute file path to load model configuration from.
* @throws NCException Thrown in case of any errors loading model configuration.
public NCModelFileAdapter(String filePath) {
this(mkProxy(filePath), filePath);
* Creates new model loading its configuration from given URI. Only <code>.js</code>,
* <code>.json</code>, <code>.yml</code> and <code>.yaml</code> resources are supported.
* @param uri URI to load model configuration from.
* @throws NCException Thrown in case of any errors loading model configuration.
public NCModelFileAdapter(URI uri) {
this(mkProxy(uri), uri.toString());
* @param proxy
* @param origin
* @throws NCException Thrown in case of any errors intents definition.
private NCModelFileAdapter(NCModelJson proxy, String origin) throws NCException {
super(proxy.getId(), proxy.getName(), proxy.getDescription());
this.proxy = proxy;
this.suspWords = convert(proxy.getSuspiciousWords(), null);
this.enabledToks = convert(proxy.getEnabledBuiltInTokens(), NCModelView.DFLT_ENABLED_BUILTIN_TOKENS);
this.addStopwords = convert(proxy.getAdditionalStopwords(), null);
this.exclStopwords = convert(proxy.getExcludedStopwords(), null);
this.elems = convertElements(proxy.getElements());
this.macros = convertMacros(proxy.getMacros());
this.metadata = convertMeta(proxy.getMetadata());
this.intents = convert(proxy.getIntents(), null);
this.parsers = convertParsers(proxy.getParsers());
// NOTE: we can only test/check this at this point. Downstream - this information is lost.
if (proxy.getIntents() != null && intents.size() != proxy.getIntents().length)
throw new NCException("Model contains duplicate intents: " + origin);
this.origin = origin;
* @param filePath
* @return
private static NCModelJson mkProxy(String filePath) {
// Try on-classpath first.
InputStream in = NCModelFileAdapter.class.getClassLoader().getResourceAsStream(filePath);
if (in == null)
try {
in = new FileInputStream(new File(filePath));
catch (FileNotFoundException e) {
// Ignore.
if (in == null)
throw new NCException("Model configuration file path not found: " + filePath);
return readModel(filePath, in, filePath.toLowerCase());
* @param uri
* @return
* @throws NCException
private static NCModelJson mkProxy(URI uri) {
try {
return readModel(uri.toString(), uri.toURL().openStream(), uri.getPath().toLowerCase());
catch (MalformedURLException e) {
throw new NCException("Malformed model configuration URI: " + uri.toString(), e);
catch (IOException e) {
throw new NCException("Failed to read model configuration: " + uri.toString(), e);
* @param path
* @param in
* @param pathLow
* @return
* @throws NCException
private static NCModelJson readModel(String path, InputStream in, String pathLow) {
if (pathLow.endsWith("yaml") || pathLow.endsWith("yml")) {
ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
try {
return mapper.readValue(in, NCModelJson.class);
catch (Exception e) {
throw new NCException("Failed to load YAML: " + path, e);
else if (pathLow.endsWith("js") || pathLow.endsWith("json")) {
try (Reader reader = new BufferedReader(new InputStreamReader(in))) {
return GSON.fromJson(reader, NCModelJson.class);
catch (Exception e) {
throw new NCException("Failed to load JSON: " + path, e);
throw new NCException("Unsupported model configuration file type (.yaml, .yml, .js or .json only): " + path);
* @param arr
* @return
private static List<NCCustomParser> convertParsers(String[] arr) {
return arr == null ?
new ArrayList<>() : -> (NCCustomParser)NCUtils.mkObject(p)).collect(Collectors.toList());
* @param arr
* @return
private static Set<String> convert(String[] arr, Set<String> dflt) {
return arr != null ?
new HashSet<>(Arrays.asList(arr)) :
dflt != null ?
new HashSet<>(dflt) :
new HashSet<>();
* @param arr
* @return
private static Map<String, String> convertMacros(NCMacroJson[] arr) {
arr == null ?
new HashMap<>() :, NCMacroJson::getMacro));
* @param m
* @return
private static Map<String, Object> convertMeta(Map<String, Object> m) {
return m != null ? m : new HashMap<>();
* @param arr
* @return
private static Set<NCElement> convertElements(NCElementJson[] arr) {
if (arr == null)
return Collections.emptySet();
Map<String, NCValueLoader> loaders = new HashMap<>();
return -> {
List<String> syns = Arrays.asList(js.getSynonyms());
List<NCValue> vals = ->
new NCValue() {
public String getName() {
return e.getName();
public List<String> getSynonyms() {
return Arrays.asList(e.getSynonyms());
List<String> groups = js.getGroups() == null ? Collections.singletonList(js.getId()) : Arrays.asList(js.getGroups());
Map<String, Object> md = js.getMetadata();
new NCElement() {
public String getId() {
return js.getId();
public List<String> getGroups() {
return groups;
public Map<String, Object> getMetadata() {
return md;
public String getDescription() {
return js.getDescription();
public List<NCValue> getValues() {
return vals;
public String getParentId() {
return js.getParentId();
public List<String> getSynonyms() {
return syns;
private NCValueLoader mkLoader(String clsName) {
NCValueLoader ldr = NCUtils.mkObject(clsName);
return ldr;
public NCValueLoader getValueLoader() {
return js.getValueLoader() != null ?
loaders.computeIfAbsent(js.getValueLoader(), this::mkLoader) :
* Gets this file model adapter origin (file path or URI).
* @return This file model adapter origin (file path or URI).
public String getOrigin() {
return origin;
* Gets list of intents declared in JSON/YML model definition, if any.
* @return List of intents declared in JSON/YML model definition, potentially empty.
public Set<String> getIntents() {
return intents;
public String getId() {
return proxy.getId();
public String getName() {
return proxy.getName();
public String getVersion() {
return proxy.getVersion();
public String getDescription() {
return proxy.getDescription();
public int getMaxUnknownWords() {
return proxy.getMaxUnknownWords();
public int getMaxFreeWords() {
return proxy.getMaxFreeWords();
public int getMaxSuspiciousWords() {
return proxy.getMaxSuspiciousWords();
public int getMinWords() {
return proxy.getMinWords();
public int getMaxWords() {
return proxy.getMaxWords();
public int getMinTokens() {
return proxy.getMinTokens();
public int getMaxTokens() {
return proxy.getMaxTokens();
public int getMinNonStopwords() {
return proxy.getMinNonStopwords();
public boolean isNonEnglishAllowed() {
return proxy.isNonEnglishAllowed();
public boolean isNotLatinCharsetAllowed() {
return proxy.isNotLatinCharsetAllowed();
public boolean isSwearWordsAllowed() {
return proxy.isSwearWordsAllowed();
public boolean isNoNounsAllowed() {
return proxy.isNoNounsAllowed();
public boolean isPermutateSynonyms() {
return proxy.isPermutateSynonyms();
public boolean isDupSynonymsAllowed() {
return proxy.isDupSynonymsAllowed();
public int getMaxTotalSynonyms() {
return proxy.getMaxTotalSynonyms();
public boolean isNoUserTokensAllowed() {
return proxy.isNoUserTokensAllowed();
public int getJiggleFactor() {
return proxy.getJiggleFactor();
public Map<String, Object> getMetadata() {
return metadata;
public Set<String> getAdditionalStopWords() {
return addStopwords;
public Set<String> getExcludedStopWords() {
return exclStopwords;
public Set<String> getSuspiciousWords() {
return suspWords;
public Map<String, String> getMacros() {
return macros;
public Set<NCElement> getElements() {
return elems;
public Set<String> getEnabledBuiltInTokens() {
return enabledToks;
public List<NCCustomParser> getParsers() {
return parsers;