Name Finder Trainer now writes zip package with vocab files inside
diff --git a/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java b/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java
index 2464445..23bd16c 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/namefinder/SequenceTagging.java
@@ -20,6 +20,7 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
@@ -49,14 +50,19 @@
this.indexTagger = new IndexTagger((new FileInputStream(config.getVocabTags())));
}
- public SequenceTagging(InputStream vocabWords, InputStream vocabChars,
- InputStream vocabTags, InputStream modelZipPackage) throws IOException {
-
- wordIndexer = new WordIndexer(vocabWords, vocabChars);
- indexTagger = new IndexTagger(vocabTags);
+ public SequenceTagging(InputStream modelZipPackage) throws IOException {
Path tmpDir = ModelUtil.writeModelToTmpDir(modelZipPackage);
+ try (InputStream wordsIn = Files.newInputStream(tmpDir.resolve("word_dict.txt"));
+ InputStream charsIn = Files.newInputStream(tmpDir.resolve("char_dict.txt"))) {
+ wordIndexer = new WordIndexer(wordsIn, charsIn);
+ }
+
+ try (InputStream in = Files.newInputStream(tmpDir.resolve("label_dict.txt"))) {
+ indexTagger = new IndexTagger(in);
+ }
+
model = SavedModelBundle.load(tmpDir.toString(), "serve");
session = model.session();
}
diff --git a/tf-ner-poc/src/main/python/namefinder/namefinder.py b/tf-ner-poc/src/main/python/namefinder/namefinder.py
index 9150bd1..b9eab50 100644
--- a/tf-ner-poc/src/main/python/namefinder/namefinder.py
+++ b/tf-ner-poc/src/main/python/namefinder/namefinder.py
@@ -25,6 +25,9 @@
import tensorflow as tf
import re
import numpy as np
+import zipfile
+import os
+from tempfile import TemporaryDirectory
# Parse the OpenNLP Name Finder format into begin, end, type triples
class NameSample:
@@ -371,10 +374,6 @@
embedding_ph, token_ids_ph, char_ids_ph, word_lengths_ph, sequence_lengths_ph, labels_ph, dropout_keep_prob, train_op \
= name_finder.create_graph(len(char_set | char_set_dev), embeddings)
- write_mapping(word_dict, 'word_dict.txt')
- write_mapping(name_finder.label_dict, "label_dict.txt")
- write_mapping(char_dict, "char_dict.txt")
-
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
log_device_placement=True))
@@ -437,12 +436,28 @@
if (f1 > best_f1):
+
+
best_f1 = f1
no_improvement = 0
- saver = tf.train.Saver()
- builder = tf.saved_model.builder.SavedModelBuilder("./savedmodel/{}".format(epoch))
- builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING])
- builder.save()
+
+ with TemporaryDirectory() as temp_dir:
+ temp_model_dir = temp_dir + "/model"
+
+ builder = tf.saved_model.builder.SavedModelBuilder(temp_model_dir)
+ builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING])
+ builder.save()
+
+ write_mapping(word_dict, temp_model_dir + '/word_dict.txt')
+ write_mapping(name_finder.label_dict, temp_model_dir + "/label_dict.txt")
+ write_mapping(char_dict, temp_model_dir + "/char_dict.txt")
+
+ zipf = zipfile.ZipFile("namefinder-" + str(epoch) +".zip", 'w', zipfile.ZIP_DEFLATED)
+
+ for root, dirs, files in os.walk(temp_model_dir):
+ for file in files:
+ modelFile = os.path.join(root, file)
+ zipf.write(modelFile, arcname=os.path.relpath(modelFile, temp_model_dir))
else:
no_improvement += 1
@@ -453,5 +468,6 @@
print("No further improvement. Stopping.")
break
+
if __name__ == "__main__":
main()