| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.zeppelin.jupyter; |
| |
| import com.google.gson.Gson; |
| import com.google.gson.GsonBuilder; |
| import com.google.gson.JsonArray; |
| import com.google.gson.JsonObject; |
| import com.google.gson.typeadapters.RuntimeTypeAdapterFactory; |
| import com.sun.org.apache.xerces.internal.impl.xpath.regex.RegularExpression; |
| import org.apache.commons.cli.CommandLine; |
| import org.apache.commons.cli.CommandLineParser; |
| import org.apache.commons.cli.DefaultParser; |
| import org.apache.commons.cli.HelpFormatter; |
| import org.apache.commons.cli.Options; |
| import org.apache.commons.cli.ParseException; |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.zeppelin.jupyter.nbformat.Cell; |
| import org.apache.zeppelin.jupyter.nbformat.CodeCell; |
| import org.apache.zeppelin.jupyter.nbformat.DisplayData; |
| import org.apache.zeppelin.jupyter.nbformat.Error; |
| import org.apache.zeppelin.jupyter.nbformat.ExecuteResult; |
| import org.apache.zeppelin.jupyter.nbformat.HeadingCell; |
| import org.apache.zeppelin.jupyter.nbformat.MarkdownCell; |
| import org.apache.zeppelin.jupyter.nbformat.Nbformat; |
| import org.apache.zeppelin.jupyter.nbformat.Output; |
| import org.apache.zeppelin.jupyter.nbformat.RawCell; |
| import org.apache.zeppelin.jupyter.nbformat.Stream; |
| import org.apache.zeppelin.jupyter.zformat.Note; |
| import org.apache.zeppelin.jupyter.zformat.Paragraph; |
| import org.apache.zeppelin.jupyter.zformat.Result; |
| import org.apache.zeppelin.jupyter.zformat.TypeData; |
| import org.apache.zeppelin.markdown.MarkdownParser; |
| import org.apache.zeppelin.markdown.PegdownParser; |
| |
| import java.io.BufferedReader; |
| import java.io.FileReader; |
| import java.io.FileWriter; |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.nio.file.Path; |
| import java.nio.file.Paths; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| /** |
| * |
| */ |
| public class JupyterUtil { |
| |
| private static Gson Pretty_Gson = new GsonBuilder().setPrettyPrinting().create(); |
| |
| private final RuntimeTypeAdapterFactory<Cell> cellTypeFactory; |
| private final RuntimeTypeAdapterFactory<Output> outputTypeFactory; |
| |
| private final MarkdownParser markdownProcessor; |
| |
| public JupyterUtil() { |
| this.cellTypeFactory = RuntimeTypeAdapterFactory.of(Cell.class, "cell_type") |
| .registerSubtype(MarkdownCell.class, "markdown").registerSubtype(CodeCell.class, "code") |
| .registerSubtype(RawCell.class, "raw").registerSubtype(HeadingCell.class, "heading"); |
| this.outputTypeFactory = RuntimeTypeAdapterFactory.of(Output.class, "output_type") |
| .registerSubtype(ExecuteResult.class, "execute_result") |
| .registerSubtype(DisplayData.class, "display_data").registerSubtype(Stream.class, "stream") |
| .registerSubtype(Error.class, "error"); |
| this.markdownProcessor = new PegdownParser(); |
| } |
| |
| public Nbformat getNbformat(Reader in) { |
| return getNbformat(in, new GsonBuilder()); |
| } |
| |
| public Nbformat getNbformat(Reader in, GsonBuilder gsonBuilder) { |
| return getGson(gsonBuilder).fromJson(in, Nbformat.class); |
| } |
| |
| public Note getNote(Reader in, String id, String codeReplaced, String markdownReplaced) { |
| return getNote(in, id, new GsonBuilder(), codeReplaced, markdownReplaced); |
| } |
| |
| public Note getNote(Reader in, String id, GsonBuilder gsonBuilder, String codeReplaced, |
| String markdownReplaced) { |
| return getNote(getNbformat(in, gsonBuilder), id, codeReplaced, markdownReplaced); |
| } |
| |
| public Note getNote(Nbformat nbformat, String id, String codeReplaced, String markdownReplaced) { |
| Note note = new Note(); |
| |
| String name = nbformat.getMetadata().getTitle(); |
| if (null == name) { |
| name = "Note converted from Jupyter_" + id; |
| } |
| note.setName(name); |
| |
| String lineSeparator = System.lineSeparator(); |
| Paragraph paragraph; |
| List<Paragraph> paragraphs = new ArrayList<>(); |
| String interpreterName; |
| List<TypeData> typeDataList; |
| |
| for (Cell cell : nbformat.getCells()) { |
| String status = Result.SUCCESS; |
| paragraph = new Paragraph(); |
| typeDataList = new ArrayList<>(); |
| Object cellSource = cell.getSource(); |
| List<String> sourceRaws = new ArrayList<>(); |
| |
| if (cellSource instanceof String) { |
| sourceRaws.add((String) cellSource); |
| } else { |
| sourceRaws.addAll((List<String>) cellSource); |
| } |
| |
| List<String> source = Output.verifyEndOfLine(sourceRaws); |
| String codeText = StringUtils.join(source, ""); |
| |
| if (cell instanceof CodeCell) { |
| interpreterName = codeReplaced; |
| for (Output output : ((CodeCell) cell).getOutputs()) { |
| if (output instanceof Error) { |
| typeDataList.add(output.toZeppelinResult()); |
| } else { |
| typeDataList.add(output.toZeppelinResult()); |
| if (output instanceof Stream) { |
| Stream streamOutput = (Stream) output; |
| if (streamOutput.isError()) { |
| status = Result.ERROR; |
| } |
| } |
| } |
| } |
| } else if (cell instanceof MarkdownCell || cell instanceof HeadingCell) { |
| interpreterName = markdownReplaced; |
| String markdownContent = markdownProcessor.render(codeText); |
| typeDataList.add(new TypeData(TypeData.HTML, markdownContent)); |
| paragraph.setUpMarkdownConfig(true); |
| } else { |
| interpreterName = ""; |
| } |
| |
| paragraph.setText(interpreterName + lineSeparator + codeText); |
| paragraph.setResults(new Result(status, typeDataList)); |
| |
| paragraphs.add(paragraph); |
| } |
| |
| note.setParagraphs(paragraphs); |
| |
| return note; |
| } |
| |
| private Gson getGson(GsonBuilder gsonBuilder) { |
| return gsonBuilder.registerTypeAdapterFactory(cellTypeFactory) |
| .registerTypeAdapterFactory(outputTypeFactory).create(); |
| } |
| |
| public String getJson(String input, String id, String codeReplaced, String markdownReplaced ) { |
| Note note = getNote(new StringReader(input), id, codeReplaced, markdownReplaced); |
| return new Gson().toJson(note); |
| } |
| |
| public String getNbformat(String note) { |
| Note noteFormat = getGson(new GsonBuilder()).fromJson(note, Note.class); |
| |
| JsonObject nbformat = new JsonObject(); |
| JsonArray cells = new JsonArray(); |
| |
| RegularExpression MD = new RegularExpression("%md\\s"); |
| RegularExpression SQL = new RegularExpression("%sql\\s"); |
| RegularExpression UNKNOWN_MAGIC = new RegularExpression("%\\w+\\s"); |
| RegularExpression HTML = new RegularExpression("%html\\s"); |
| RegularExpression SPARK = new RegularExpression("%spark\\s"); |
| |
| int index = 0; |
| for (Paragraph paragraph : noteFormat.getParagraphs()) { |
| String code = StringUtils.stripStart(paragraph.getText(), " "); |
| JsonObject codeJson = new JsonObject(); |
| |
| if (code == null || code.trim().isEmpty()) |
| continue; |
| |
| if (MD.matches(code)) { |
| codeJson.addProperty("cell_type", "markdown"); |
| codeJson.add("metadata", new JsonObject()); |
| codeJson.addProperty("source", |
| StringUtils.stripStart(StringUtils.stripStart(code, "%md"), |
| "\n")); // remove '%md' |
| } else if (SQL.matches(code) || HTML.matches(code)) { |
| codeJson.addProperty("cell_type", "code"); |
| codeJson.addProperty("execution_count", index); |
| codeJson.add("metadata", new JsonObject()); |
| codeJson.add("outputs", new JsonArray()); |
| codeJson.addProperty("source", "%" + code); // add % to convert to cell magic |
| } else if (SPARK.matches(code)) { |
| codeJson.addProperty("cell_type", "code"); |
| codeJson.addProperty("execution_count", index); |
| JsonObject metadataJson = new JsonObject(); |
| metadataJson.addProperty("autoscroll", "auto"); |
| codeJson.add("metadata", metadataJson); |
| codeJson.add("outputs", new JsonArray()); |
| codeJson.addProperty("source", code); |
| } else if (UNKNOWN_MAGIC.matches(code)) { |
| // use raw cells for unknown magic |
| codeJson.addProperty("cell_type", "raw"); |
| JsonObject metadataJson = new JsonObject(); |
| metadataJson.addProperty("format", "text/plain"); |
| codeJson.add("metadata", metadataJson); |
| codeJson.addProperty("source", code); |
| } else { |
| codeJson.addProperty("cell_type", "code"); |
| codeJson.addProperty("execution_count", index); |
| JsonObject metadataJson = new JsonObject(); |
| metadataJson.addProperty("autoscroll", "auto"); |
| codeJson.add("metadata", metadataJson); |
| codeJson.add("outputs", new JsonArray()); |
| codeJson.addProperty("source", code); |
| } |
| |
| cells.add(codeJson); |
| |
| index++; |
| } |
| |
| JsonObject metadataJson = new JsonObject(); |
| |
| JsonObject kernelspecJson = new JsonObject(); |
| kernelspecJson.addProperty("language", "scala"); |
| kernelspecJson.addProperty("name", "spark2-scala"); |
| |
| JsonObject languageInfoJson = new JsonObject(); |
| languageInfoJson.addProperty("codemirror_mode", "text/x-scala"); |
| languageInfoJson.addProperty("file_extension", ".scala"); |
| languageInfoJson.addProperty("mimetype", "text/x-scala"); |
| languageInfoJson.addProperty("name", "scala"); |
| languageInfoJson.addProperty("pygments_lexer", "scala"); |
| |
| metadataJson.addProperty("name", noteFormat.getName()); |
| metadataJson.add("kernelspec", kernelspecJson); |
| metadataJson.add("language_info", languageInfoJson); |
| |
| nbformat.add("metadata", metadataJson); |
| nbformat.addProperty("nbformat", 4); |
| nbformat.addProperty("nbformat_minor", 2); |
| nbformat.add("cells", cells); |
| return Pretty_Gson.toJson(nbformat); |
| } |
| |
| public static void main(String[] args) throws ParseException, IOException { |
| Options options = new Options(); |
| options.addOption("i", true, "Jupyter notebook file"); |
| options.addOption("o", true, "Zeppelin note file. Default: note.json"); |
| |
| CommandLineParser parser = new DefaultParser(); |
| CommandLine cmd = parser.parse(options, args); |
| |
| if (!cmd.hasOption("i")) { |
| new HelpFormatter().printHelp("java " + JupyterUtil.class.getName(), options); |
| System.exit(1); |
| } |
| |
| Path jupyterPath = Paths.get(cmd.getOptionValue("i")); |
| Path zeppelinPath = Paths.get(cmd.hasOption("o") ? cmd.getOptionValue("o") : "note.json"); |
| |
| try (BufferedReader in = new BufferedReader(new FileReader(jupyterPath.toFile())); |
| FileWriter fw = new FileWriter(zeppelinPath.toFile())) { |
| Note note = new JupyterUtil().getNote(in, "id", "%python", "%md"); |
| Gson gson = new GsonBuilder().setPrettyPrinting().create(); |
| gson.toJson(note, fw); |
| } |
| } |
| } |