Namecat Trainer now writes zip package with vocab files inside
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/namecat/NameCategorizer.java b/tf-ner-poc/src/main/java/org/apache/opennlp/namecat/NameCategorizer.java
index 7ec98b3..2513c03 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/namecat/NameCategorizer.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/namecat/NameCategorizer.java
@@ -20,8 +20,8 @@
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
@@ -39,22 +39,21 @@
private final Map<Character, Integer> charMap = new HashMap<>();
private final Map<Integer, String> labelMap;
- public NameCategorizer(InputStream vocabChars, InputStream labelDict,
- InputStream modelZipPackage) throws IOException {
+ public NameCategorizer(InputStream modelZipPackage) throws IOException {
- try (BufferedReader in = new BufferedReader(new InputStreamReader(vocabChars,
- StandardCharsets.UTF_8))) {
+ Path tmpModelPath = ModelUtil.writeModelToTmpDir(modelZipPackage);
+
+ try (BufferedReader in = Files.newBufferedReader(
+ tmpModelPath.resolve("char_dict.txt"), StandardCharsets.UTF_8)) {
in.lines().forEach(ch -> charMap.put(ch.charAt(0), charMap.size()));
}
labelMap = new HashMap<>();
- try (BufferedReader in = new BufferedReader(new InputStreamReader(labelDict,
- StandardCharsets.UTF_8))) {
+ try (BufferedReader in = Files.newBufferedReader(
+ tmpModelPath.resolve("label_dict.txt"), StandardCharsets.UTF_8)) {
in.lines().forEach(label -> labelMap.put(labelMap.size(), label));
}
- Path tmpModelPath = ModelUtil.writeModelToTmpDir(modelZipPackage);
-
SavedModelBundle model = SavedModelBundle.load(tmpModelPath.toString(), "serve");
session = model.session();
}
diff --git a/tf-ner-poc/src/main/python/namecat/namecat.py b/tf-ner-poc/src/main/python/namecat/namecat.py
index c71a7b1..21c56ff 100644
--- a/tf-ner-poc/src/main/python/namecat/namecat.py
+++ b/tf-ner-poc/src/main/python/namecat/namecat.py
@@ -23,6 +23,9 @@
from math import floor
import numpy as np
import random
+import zipfile
+import os
+from tempfile import TemporaryDirectory
def load_data(file):
with open(file) as f:
@@ -148,9 +151,6 @@
char_dict = {k: v for v, k in enumerate(char_set)}
- write_mapping(label_dict, "label_dict.txt")
- write_mapping(char_dict, "char_dict.txt")
-
dropout_keep_prob, char_ids_ph, name_lengths_ph, y_ph = create_placeholders()
train_op, probs_op = create_graph(dropout_keep_prob, char_ids_ph, name_lengths_ph, y_ph, len(char_set), len(label_dict))
@@ -205,10 +205,22 @@
#print("Test acc: " + str(np.mean(acc_test)))
- saver = tf.train.Saver()
- builder = tf.saved_model.builder.SavedModelBuilder("./namecat_model" + str(epoch))
- builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING])
- builder.save()
+ with TemporaryDirectory() as temp_dir:
+ temp_model_dir = temp_dir + "/model"
+
+ builder = tf.saved_model.builder.SavedModelBuilder("./namecat_model" + str(epoch))
+ builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING])
+ builder.save()
+
+ write_mapping(label_dict, temp_model_dir + "/label_dict.txt")
+ write_mapping(char_dict, temp_model_dir + "/char_dict.txt")
+
+ zipf = zipfile.ZipFile("namecat-" + str(epoch) +".zip", 'w', zipfile.ZIP_DEFLATED)
+
+ for root, dirs, files in os.walk(temp_model_dir):
+ for file in files:
+ modelFile = os.path.join(root, file)
+ zipf.write(modelFile, arcname=os.path.relpath(modelFile, temp_model_dir))
if __name__ == "__main__":
main()