blob: 00e2e5160edbc9adb079c6671267ae33029b7866 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.document.rdb;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
import org.apache.jackrabbit.oak.plugins.document.Collection;
import org.apache.jackrabbit.oak.plugins.document.Document;
import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getModuleVersion;
/**
* Utility for dumping contents from {@link RDBDocumentStore}'s tables.
*/
public class RDBExport {
private static final Charset UTF8 = Charset.forName("UTF-8");
private enum Format {
JSON, JSONARRAY, CSV
};
private static final RDBJSONSupport JSON = new RDBJSONSupport(false);
private static final Set<String> EXCLUDE_COLUMNS = new HashSet<String>();
static {
EXCLUDE_COLUMNS.add(Document.ID);
}
public static void main(String[] args) throws ClassNotFoundException, SQLException, IOException {
String url = null, user = null, pw = null, table = "nodes", query = null, dumpfile = null, lobdir = null;
List<String> fieldList = Collections.emptyList();
Format format = Format.JSON;
PrintStream out = System.out;
RDBDocumentSerializer ser = new RDBDocumentSerializer(new MemoryDocumentStore());
String columns = null;
String param = null;
try {
for (int i = 0; i < args.length; i++) {
param = args[i];
if ("-u".equals(param) || "--username".equals(param)) {
user = args[++i];
} else if ("-p".equals(param) || "--password".equals(param)) {
pw = args[++i];
} else if ("-c".equals(param) || "--collection".equals(param)) {
table = args[++i];
} else if ("-j".equals(param) || "--jdbc-url".equals(param)) {
url = args[++i];
} else if ("-q".equals(param) || "--query".equals(param)) {
query = args[++i];
} else if ("-o".equals(param) || "--out".equals(param)) {
OutputStream os = new FileOutputStream(args[++i]);
out = new PrintStream(os, true, "UTF-8");
} else if ("--from-db2-dump".equals(param)) {
dumpfile = args[++i];
} else if ("--lobdir".equals(param)) {
lobdir = args[++i];
} else if ("--jsonArray".equals(param)) {
format = Format.JSONARRAY;
} else if ("--csv".equals(param)) {
format = Format.CSV;
} else if ("--columns".equals(param)) {
columns = args[++i];
} else if ("--fields".equals(param)) {
String fields = args[++i];
fieldList = Arrays.asList(fields.split(","));
} else if ("--version".equals(param)) {
System.out.println(RDBExport.class.getName() + " version " + getModuleVersion());
System.exit(0);
} else if ("--help".equals(param)) {
printHelp();
System.exit(0);
} else {
System.err.println(RDBExport.class.getName() + ": invalid parameter " + args[i]);
printUsage();
System.exit(2);
}
}
} catch (IndexOutOfBoundsException ex) {
System.err.println(RDBExport.class.getName() + ": value missing for parameter " + param);
printUsage();
System.exit(2);
}
if (format == Format.CSV && fieldList.isEmpty()) {
System.err.println(RDBExport.class.getName() + ": csv output requires specification of field list");
System.exit(2);
}
// JSON output with fieldList missing "_id"
if ((format == Format.JSON || format == Format.JSONARRAY) && !fieldList.isEmpty() && !fieldList.contains("_id")) {
fieldList = new ArrayList<String>(fieldList);
fieldList.add(0, "_id");
}
if (dumpfile == null && url == null) {
System.err.println(RDBExport.class.getName() + ": must use either dump file or JDBC URL");
printUsage();
System.exit(2);
} else if (dumpfile != null) {
columns = (columns == null) ? "id, modified, hasbinary, deletedonce, cmodcount, modcount, dsize, data, bdata" : columns;
List<String> columnList = Arrays.asList(columns.toLowerCase(Locale.ENGLISH).replace(" ", "").split(","));
dumpFile(dumpfile, lobdir, format, out, fieldList, columnList, ser);
} else {
if (columns != null) {
System.err.println(RDBExport.class.getName() + ": column names ignored when using JDBC");
}
dumpJDBC(url, user, pw, table, query, format, out, fieldList, ser);
}
out.flush();
out.close();
}
private static void dumpFile(String filename, String lobdir, Format format, PrintStream out, List<String> fieldNames,
List<String> columnNames, RDBDocumentSerializer ser) throws IOException {
File f = new File(filename);
File lobDirectory = lobdir == null ? new File(f.getParentFile(), "lobdir") : new File(lobdir);
int iId = columnNames.indexOf("id");
int iModified = columnNames.indexOf("modified");
int iHasBinary = columnNames.indexOf("hasbinary");
int iDeletedOnce = columnNames.indexOf("deletedonce");
int iModCount = columnNames.indexOf("modcount");
int iCModCount = columnNames.indexOf("cmodcount");
int iData = columnNames.indexOf("data");
int iBData = columnNames.indexOf("bdata");
if (iId < 0 || iModified < 0 || iHasBinary < 0 || iDeletedOnce < 0 || iModCount < 0 || iCModCount < 0 || iData < 0
|| iBData < 0) {
throw new IOException("required columns: id, modified, hasbinary, deletedonce, modcount, cmodcount, data, bdata");
}
FileInputStream fis = new FileInputStream(f);
InputStreamReader ir = new InputStreamReader(fis, UTF8);
BufferedReader br = new BufferedReader(ir);
if (format == Format.JSONARRAY) {
out.println("[");
} else if (format == Format.CSV) {
out.println(dumpFieldNames(fieldNames));
}
boolean needComma = format == Format.JSONARRAY;
String line = br.readLine();
while (line != null) {
List<String> fields = parseDel(line);
String id = fields.get(iId);
String smodified = fields.get(iModified);
String shasbinary = fields.get(iHasBinary);
String sdeletedonce = fields.get(iDeletedOnce);
String smodcount = fields.get(iModCount);
String scmodcount = fields.get(iCModCount);
String sdata = fields.get(iData);
String sbdata = fields.get(iBData);
byte[] bytes = null;
if (sbdata.length() != 0) {
String lobfile = sbdata.replace("/", "");
if (!lobfile.endsWith(".lob")) {
int lastdot = lobfile.lastIndexOf('.');
String length = lobfile.substring(lastdot + 1);
lobfile = lobfile.substring(0, lastdot);
lastdot = lobfile.lastIndexOf('.');
String startpos = lobfile.substring(lastdot + 1);
lobfile = lobfile.substring(0, lastdot);
System.err.println("lastdot: " + lastdot + "; length: " + length + "; lobfile: " + lobfile + "; lastdot: " + lastdot + "; startpos: " + startpos);
int s = Integer.valueOf(startpos);
int l = Integer.valueOf(length);
File lf = new File(lobDirectory, lobfile);
InputStream is = new FileInputStream(lf);
bytes = new byte[l];
IOUtils.skip(is, s);
IOUtils.read(is, bytes, 0, l);
IOUtils.closeQuietly(is);
} else {
File lf = new File(lobDirectory, lobfile);
bytes = FileUtils.readFileToByteArray(lf);
}
}
try {
RDBRow row = new RDBRow(id, "1".equals(shasbinary) ? 1L : 0L, "1".equals(sdeletedonce),
smodified.length() == 0 ? 0 : Long.parseLong(smodified), Long.parseLong(smodcount),
Long.parseLong(scmodcount), -1L, -1L, -1L, sdata, bytes);
StringBuilder fulljson = dumpRow(ser, id, row);
if (format == Format.CSV) {
out.println(asCSV(fieldNames, fulljson));
} else {
fulljson = asJSON(fieldNames, fulljson);
if (format == Format.JSONARRAY && needComma) {
fulljson.append(",");
}
out.println(fulljson);
needComma = true;
}
} catch (DocumentStoreException ex) {
System.err.println("Error: skipping line for ID " + id + " because of " + ex.getMessage());
} catch (Exception e) {
System.err.println("Error reading fields: " + fields);
throw e;
}
line = br.readLine();
}
br.close();
if (format == Format.JSONARRAY) {
out.println("]");
}
}
protected static List<String> parseDel(String line) {
ArrayList<String> result = new ArrayList<String>();
boolean inQuoted = false;
char quotechar = '"';
char fielddelim = ',';
StringBuilder value = new StringBuilder();
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (!inQuoted) {
if (c == fielddelim) {
result.add(value.toString());
value = new StringBuilder();
} else {
if (value.length() == 0 && c == quotechar) {
inQuoted = true;
} else {
value.append(c);
}
}
} else {
if (c == quotechar) {
if (i + 1 != line.length() && line.charAt(i + 1) == quotechar) {
// quoted quote char
value.append(c);
i += 1;
} else {
inQuoted = false;
}
} else {
value.append(c);
}
}
}
result.add(value.toString());
return result;
}
private static void dumpJDBC(String url, String user, String pw, String table, String query, Format format, PrintStream out,
List<String> fieldNames, RDBDocumentSerializer ser) throws SQLException {
String driver = RDBJDBCTools.driverForDBType(RDBJDBCTools.jdbctype(url));
try {
Class.forName(driver);
} catch (ClassNotFoundException ex) {
System.err.println(RDBExport.class.getName() + ":attempt to load class " + driver + " failed:" + ex.getMessage());
}
Connection c = DriverManager.getConnection(url, user, pw);
c.setReadOnly(true);
Statement stmt = c.createStatement();
String sql = "select ID, MODIFIED, MODCOUNT, CMODCOUNT, HASBINARY, DELETEDONCE, DATA, BDATA from " + table;
if (query != null) {
sql += " where " + query;
}
sql += " order by id";
ResultSet rs = stmt.executeQuery(sql);
if (format == Format.JSONARRAY) {
out.println("[");
} else if (format == Format.CSV) {
out.println(dumpFieldNames(fieldNames));
}
boolean needComma = format == Format.JSONARRAY;
ResultSetMetaData rsm = null;
boolean idIsAscii = true;
while (rs.next()) {
if (rsm == null) {
rsm = rs.getMetaData();
idIsAscii = !isBinaryType(rsm.getColumnType(1));
}
String id = idIsAscii ? rs.getString("ID") : new String(rs.getBytes("ID"), UTF8);
long modified = rs.getLong("MODIFIED");
long modcount = rs.getLong("MODCOUNT");
long cmodcount = rs.getLong("CMODCOUNT");
Long hasBinary = readLongOrNullFromResultSet(rs, "HASBINARY");
Boolean deletedOnce = readBooleanOrNullFromResultSet(rs, "DELETEDONCE");
String data = rs.getString("DATA");
byte[] bdata = rs.getBytes("BDATA");
RDBRow row = new RDBRow(id, hasBinary, deletedOnce, modified, modcount, cmodcount, -1L, -1L, -1L, data, bdata);
StringBuilder fulljson = dumpRow(ser, id, row);
if (format == Format.CSV) {
out.println(asCSV(fieldNames, fulljson));
} else {
fulljson = asJSON(fieldNames, fulljson);
if (format == Format.JSONARRAY && needComma && !rs.isLast()) {
fulljson.append(",");
}
out.println(fulljson);
needComma = true;
}
}
if (format == Format.JSONARRAY) {
out.println("]");
}
out.close();
rs.close();
stmt.close();
c.close();
}
@Nullable
private static Boolean readBooleanOrNullFromResultSet(ResultSet res, String field) throws SQLException {
long v = res.getLong(field);
return res.wasNull() ? null : Boolean.valueOf(v != 0);
}
@Nullable
private static Long readLongOrNullFromResultSet(ResultSet res, String field) throws SQLException {
long v = res.getLong(field);
return res.wasNull() ? null : Long.valueOf(v);
}
@NotNull
private static StringBuilder dumpRow(RDBDocumentSerializer ser, String id, RDBRow row) {
NodeDocument doc = ser.fromRow(Collection.NODES, row);
String docjson = ser.asString(doc, EXCLUDE_COLUMNS);
StringBuilder fulljson = new StringBuilder();
fulljson.append("{\"_id\":\"");
JsopBuilder.escape(id, fulljson);
fulljson.append("\",");
fulljson.append(docjson.substring(1));
return fulljson;
}
@NotNull
private static String dumpFieldNames(List<String> fieldNames) {
StringBuilder result = new StringBuilder();
for (String f : fieldNames) {
if (result.length() != 0) {
result.append(',');
}
result.append(f);
}
return result.toString();
}
@NotNull
private static StringBuilder asJSON(List<String> fieldNames, StringBuilder fulljson) {
if (fieldNames.isEmpty()) {
return fulljson;
} else {
Map<String, Object> doc = (Map<String, Object>) JSON.parse(fulljson.toString());
StringBuilder buf = new StringBuilder();
buf.append('{');
String delim = "";
for (String field : fieldNames) {
buf.append(delim);
delim = ",";
String[] fn = field.split("\\.");
if (doc.containsKey(fn[0])) {
Object o = doc.get(fn[0]);
RDBJSONSupport.appendJsonMember(buf, fn[0], o);
}
}
buf.append('}');
return buf;
}
}
@NotNull
private static StringBuilder asCSV(List<String> csvFieldNames, StringBuilder fulljson) {
Map<String, Object> doc = (Map<String, Object>) JSON.parse(fulljson.toString());
StringBuilder buf = new StringBuilder();
String delim = "";
for (String field : csvFieldNames) {
buf.append(delim);
delim = ",";
String[] fn = field.split("\\.");
boolean checkMember = fn.length > 1;
if (doc.containsKey(fn[0])) {
Object o = doc.get(fn[0]);
if (checkMember) {
if (o instanceof Map) {
Map<String, Object> m = (Map<String, Object>) o;
if (m.containsKey(fn[1])) {
dumpJsonValuetoCsv(buf, m.get(fn[1]));
}
}
} else {
dumpJsonValuetoCsv(buf, o);
}
}
}
return buf;
}
private static void dumpJsonValuetoCsv(StringBuilder buf, Object o) {
if (o == null) {
buf.append("null");
} else if (o instanceof Boolean) {
buf.append(o.toString());
} else if (o instanceof Long) {
buf.append(((Long) o).longValue());
} else {
buf.append('"');
buf.append(o.toString().replace("\"", "\"\""));
buf.append('"');
}
}
private static boolean isBinaryType(int sqlType) {
return sqlType == Types.VARBINARY || sqlType == Types.BINARY || sqlType == Types.LONGVARBINARY;
}
private static void printUsage() {
System.err.println("Usage: " + RDBExport.class.getName()
+ " -j/--jdbc-url JDBC-URL [-u/--username username] [-p/--password password] [-c/--collection table] [-q/--query query] [-o/--out file] [--fields list] [--csv] [--jsonArray]");
System.err.println(
"Usage: " + RDBExport.class.getName() + " --from-db2-dump file [--lobdir lobdir] [-o/--out file] [--fields list] [--csv] [--jsonArray]");
System.err.println("Usage: " + RDBExport.class.getName() + " --version");
System.err.println("Usage: " + RDBExport.class.getName() + " --help");
}
private static void printHelp() {
System.err.println("Export Apache OAK RDB data to JSON files");
System.err.println("");
System.err.println("Generic options:");
System.err.println(" --help produce this help message");
System.err.println(" --version show version information");
System.err.println("");
System.err.println("JDBC options:");
System.err.println(" -j/--jdbc-url JDBC-URL JDBC URL of database to connect to");
System.err.println(" -u/--username username database username");
System.err.println(" -p/--password password database password");
System.err.println(" -c/--collection table table name (defaults to 'nodes')");
System.err.println(" -q/--query query SQL where clause (minus 'where')");
System.err.println("");
System.err.println("Dump file options:");
System.err.println(" --columns column-names column names (comma separated)");
System.err.println(" --from-db2-dump file name of DB2 DEL export file");
System.err.println(" --lobdir dir name of DB2 DEL export file LOB directory");
System.err.println(" (defaults to ./lobdir under the dump file)");
System.err.println("");
System.err.println("Output options:");
System.err.println(" -o/--out file Output to name file (instead of stdout)");
System.err.println(" --jsonArray Output a JSON array (instead of one");
System.err.println(" JSON doc per line)");
System.err.println(" --csv Output in CSV format (requires --fields");
System.err.println(" --fields names field names (comma separated); required");
System.err.println(" for CSV output");
}
}