phoenix5-spark3/src/main/java/org/apache/phoenix/spark/sql/connector/writer/PhoenixDataWriter.java - phoenix-connectors - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.phoenix.spark.sql.connector.writer;

 import java.io.IOException;
 import java.sql.Connection;
 import java.sql.DriverManager;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Properties;
 import java.util.stream.Collectors;


 import org.apache.phoenix.util.QueryUtil;
 import org.apache.phoenix.util.SchemaUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
 import org.apache.spark.sql.catalyst.encoders.RowEncoder$;
 import org.apache.spark.sql.execution.datasources.SparkJdbcUtil;
 import org.apache.spark.sql.execution.datasources.jdbc.PhoenixJdbcDialect$;
 import org.apache.spark.sql.connector.write.DataWriter;
 import org.apache.spark.sql.connector.write.WriterCommitMessage;
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$;
 import org.apache.spark.sql.catalyst.expressions.AttributeReference;
 import org.apache.spark.sql.catalyst.expressions.Attribute;

 import static org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.DEFAULT_UPSERT_BATCH_SIZE;
 import static org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.UPSERT_BATCH_SIZE;

 public class PhoenixDataWriter implements DataWriter<InternalRow> {

     private static final Logger logger = LoggerFactory.getLogger(PhoenixDataWriter.class);
     private final StructType schema;
     private final Connection conn;
     private final PreparedStatement statement;
     private final long batchSize;
     private long numRecords = 0;
     private ExpressionEncoder<Row> encoder;

     PhoenixDataWriter(StructType schema, PhoenixDataSourceWriteOptions options) {
         String jdbcUrl = options.getJdbcUrl();
         Properties connectionProps = options.getEffectiveProps();
         this.schema = options.getSchema();

         List<Attribute> attrs = new ArrayList<>();
         for (AttributeReference ref : scala.collection.JavaConverters.seqAsJavaListConverter(schema.toAttributes()).asJava()) {
             attrs.add(ref.toAttribute());
         }
         encoder = RowEncoder$.MODULE$.apply(schema).resolveAndBind( scala.collection.JavaConverters.asScalaIteratorConverter(attrs.iterator()).asScala().toSeq(), SimpleAnalyzer$.MODULE$);
         try {
             this.conn = DriverManager.getConnection(jdbcUrl, connectionProps);
             List<String> colNames =  new ArrayList<>(Arrays.asList(options.getSchema().names()));
             if (!options.skipNormalizingIdentifier()){
                 colNames = colNames.stream().map(SchemaUtil::normalizeIdentifier).collect(Collectors.toList());
             }
             String upsertSql = QueryUtil.constructUpsertStatement(options.getTableName(), colNames, null);
             this.statement = this.conn.prepareStatement(upsertSql);
             // Configure batch size to 0 or negative value to disable intermediate or batch commits in task.
             // So that commit can be called only once at the end to task execution.
             // This helps ensure consistent state of database when failures occurred and retried
             // mainly when transactions enabled.
             this.batchSize = Long.parseLong(connectionProps.getProperty(UPSERT_BATCH_SIZE,
                     String.valueOf(DEFAULT_UPSERT_BATCH_SIZE)));
         } catch (SQLException e) {
             throw new RuntimeException(e);
         }
     }

     void commitBatchUpdates() throws SQLException {
         conn.commit();
     }

     @Override
     public void write(InternalRow internalRow) throws IOException {
         try {
             int i=0;
             Row row = SparkJdbcUtil.toRow(encoder, internalRow);
             for (StructField field : schema.fields()) {
                 DataType dataType = field.dataType();
                 if (internalRow.isNullAt(i)) {
                     statement.setNull(i + 1, SparkJdbcUtil.getJdbcType(dataType,
                             PhoenixJdbcDialect$.MODULE$).jdbcNullType());
                 } else {
                 	SparkJdbcUtil.makeSetter(conn, PhoenixJdbcDialect$.MODULE$, dataType).apply(statement, row, i);
                 }
                 ++i;
             }
             numRecords++;
             statement.execute();
             // Run batch wise commits only when the batch size is positive value.
             // Otherwise commit gets called at the end of task
             if (batchSize > 0 && numRecords % batchSize == 0) {
                 if (logger.isDebugEnabled()) {
                     logger.debug("commit called on a batch of size : " + batchSize);
                 }
                 commitBatchUpdates();
             }
         } catch (SQLException e) {
             throw new IOException("Exception while executing Phoenix prepared statement", e);
         }
     }

     @Override
     public WriterCommitMessage commit() {
         try {
             conn.commit();
         } catch (SQLException e) {
             throw new RuntimeException(e);
         } finally {
             try {
                 statement.close();
                 conn.close();
             }
             catch (SQLException ex) {
                 throw new RuntimeException(ex);
             }
         }
         return null;
     }

     @Override
     public void abort() {
         try {
             // To rollback any ongoing transactions
             conn.rollback();
         } catch (SQLException ex) {
             throw new RuntimeException(ex);
         }
     }

     @Override
     public void close() {
         try {
             conn.close();
         } catch (SQLException ex) {
             throw new RuntimeException(ex);
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.phoenix.spark.sql.connector.writer;

	import java.io.IOException;
	import java.sql.Connection;
	import java.sql.DriverManager;
	import java.sql.PreparedStatement;
	import java.sql.SQLException;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.List;
	import java.util.Properties;
	import java.util.stream.Collectors;


	import org.apache.phoenix.util.QueryUtil;
	import org.apache.phoenix.util.SchemaUtil;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;
	import org.apache.spark.sql.Row;
	import org.apache.spark.sql.catalyst.InternalRow;
	import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
	import org.apache.spark.sql.catalyst.encoders.RowEncoder$;
	import org.apache.spark.sql.execution.datasources.SparkJdbcUtil;
	import org.apache.spark.sql.execution.datasources.jdbc.PhoenixJdbcDialect$;
	import org.apache.spark.sql.connector.write.DataWriter;
	import org.apache.spark.sql.connector.write.WriterCommitMessage;
	import org.apache.spark.sql.types.DataType;
	import org.apache.spark.sql.types.StructField;
	import org.apache.spark.sql.types.StructType;
	import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$;
	import org.apache.spark.sql.catalyst.expressions.AttributeReference;
	import org.apache.spark.sql.catalyst.expressions.Attribute;

	import static org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.DEFAULT_UPSERT_BATCH_SIZE;
	import static org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.UPSERT_BATCH_SIZE;

	public class PhoenixDataWriter implements DataWriter<InternalRow> {

	private static final Logger logger = LoggerFactory.getLogger(PhoenixDataWriter.class);
	private final StructType schema;
	private final Connection conn;
	private final PreparedStatement statement;
	private final long batchSize;
	private long numRecords = 0;
	private ExpressionEncoder<Row> encoder;

	PhoenixDataWriter(StructType schema, PhoenixDataSourceWriteOptions options) {
	String jdbcUrl = options.getJdbcUrl();
	Properties connectionProps = options.getEffectiveProps();
	this.schema = options.getSchema();

	List<Attribute> attrs = new ArrayList<>();
	for (AttributeReference ref : scala.collection.JavaConverters.seqAsJavaListConverter(schema.toAttributes()).asJava()) {
	attrs.add(ref.toAttribute());
	}
	encoder = RowEncoder$.MODULE$.apply(schema).resolveAndBind( scala.collection.JavaConverters.asScalaIteratorConverter(attrs.iterator()).asScala().toSeq(), SimpleAnalyzer$.MODULE$);
	try {
	this.conn = DriverManager.getConnection(jdbcUrl, connectionProps);
	List<String> colNames = new ArrayList<>(Arrays.asList(options.getSchema().names()));
	if (!options.skipNormalizingIdentifier()){
	colNames = colNames.stream().map(SchemaUtil::normalizeIdentifier).collect(Collectors.toList());
	}
	String upsertSql = QueryUtil.constructUpsertStatement(options.getTableName(), colNames, null);
	this.statement = this.conn.prepareStatement(upsertSql);
	// Configure batch size to 0 or negative value to disable intermediate or batch commits in task.
	// So that commit can be called only once at the end to task execution.
	// This helps ensure consistent state of database when failures occurred and retried
	// mainly when transactions enabled.
	this.batchSize = Long.parseLong(connectionProps.getProperty(UPSERT_BATCH_SIZE,
	String.valueOf(DEFAULT_UPSERT_BATCH_SIZE)));
	} catch (SQLException e) {
	throw new RuntimeException(e);
	}
	}

	void commitBatchUpdates() throws SQLException {
	conn.commit();
	}

	@Override
	public void write(InternalRow internalRow) throws IOException {
	try {
	int i=0;
	Row row = SparkJdbcUtil.toRow(encoder, internalRow);
	for (StructField field : schema.fields()) {
	DataType dataType = field.dataType();
	if (internalRow.isNullAt(i)) {
	statement.setNull(i + 1, SparkJdbcUtil.getJdbcType(dataType,
	PhoenixJdbcDialect$.MODULE$).jdbcNullType());
	} else {
	SparkJdbcUtil.makeSetter(conn, PhoenixJdbcDialect$.MODULE$, dataType).apply(statement, row, i);
	}
	++i;
	}
	numRecords++;
	statement.execute();
	// Run batch wise commits only when the batch size is positive value.
	// Otherwise commit gets called at the end of task
	if (batchSize > 0 && numRecords % batchSize == 0) {
	if (logger.isDebugEnabled()) {
	logger.debug("commit called on a batch of size : " + batchSize);
	}
	commitBatchUpdates();
	}
	} catch (SQLException e) {
	throw new IOException("Exception while executing Phoenix prepared statement", e);
	}
	}

	@Override
	public WriterCommitMessage commit() {
	try {
	conn.commit();
	} catch (SQLException e) {
	throw new RuntimeException(e);
	} finally {
	try {
	statement.close();
	conn.close();
	}
	catch (SQLException ex) {
	throw new RuntimeException(ex);
	}
	}
	return null;
	}

	@Override
	public void abort() {
	try {
	// To rollback any ongoing transactions
	conn.rollback();
	} catch (SQLException ex) {
	throw new RuntimeException(ex);
	}
	}

	@Override
	public void close() {
	try {
	conn.close();
	} catch (SQLException ex) {
	throw new RuntimeException(ex);
	}
	}
	}