| /**************************************************************** |
| * Licensed to the Apache Software Foundation (ASF) under one * |
| * or more contributor license agreements. See the NOTICE file * |
| * distributed with this work for additional information * |
| * regarding copyright ownership. The ASF licenses this file * |
| * to you under the Apache License, Version 2.0 (the * |
| * "License"); you may not use this file except in compliance * |
| * with the License. You may obtain a copy of the License at * |
| * * |
| * http://www.apache.org/licenses/LICENSE-2.0 * |
| * * |
| * Unless required by applicable law or agreed to in writing, * |
| * software distributed under the License is distributed on an * |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * |
| * KIND, either express or implied. See the License for the * |
| * specific language governing permissions and limitations * |
| * under the License. * |
| ****************************************************************/ |
| |
| package org.apache.james.ai.classic; |
| |
| import java.sql.Connection; |
| import java.sql.DatabaseMetaData; |
| import java.sql.PreparedStatement; |
| import java.sql.ResultSet; |
| import java.sql.SQLException; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| import org.w3c.dom.Document; |
| |
| /** |
| * Manages the persistence of the spam bayesian analysis corpus using a JDBC |
| * database. |
| * |
| * <p> |
| * This class is abstract to allow implementations to take advantage of |
| * different logging capabilities/interfaces in different parts of the code. |
| * </p> |
| * |
| * @since 2.3.0 |
| */ |
| |
| public class JDBCBayesianAnalyzer extends BayesianAnalyzer { |
| private static final Logger LOGGER = LoggerFactory.getLogger(JDBCBayesianAnalyzer.class); |
| |
| /** Public object representing a lock on database activity. */ |
| public static final String DATABASE_LOCK = "database lock"; |
| |
| /** |
| * The JDBCUtil helper class |
| */ |
| private final JDBCUtil theJDBCUtil = new JDBCUtil(); |
| |
| /** Contains all of the sql strings for this component. */ |
| private final SqlResources sqlQueries = new SqlResources(); |
| |
| /** Holds value of property sqlFileName. */ |
| private String sqlFileName; |
| |
| /** Holds value of property sqlParameters. */ |
| private Map<String, String> sqlParameters = new HashMap<>(); |
| |
| /** Holds value of property lastDatabaseUpdateTime. */ |
| private static long lastDatabaseUpdateTime; |
| |
| /** |
| * Getter for static lastDatabaseUpdateTime. |
| * |
| * @return Value of property lastDatabaseUpdateTime. |
| */ |
| public static long getLastDatabaseUpdateTime() { |
| |
| return lastDatabaseUpdateTime; |
| } |
| |
| /** |
| * Sets static lastDatabaseUpdateTime to System.currentTimeMillis(). |
| */ |
| public static void touchLastDatabaseUpdateTime() { |
| |
| lastDatabaseUpdateTime = System.currentTimeMillis(); |
| } |
| |
| |
| public JDBCBayesianAnalyzer() { |
| super(); |
| } |
| |
| /** |
| * Getter for property sqlFileName. |
| * |
| * @return Value of property sqlFileName. |
| */ |
| public String getSqlFileName() { |
| |
| return this.sqlFileName; |
| } |
| |
| /** |
| * Setter for property sqlFileName. |
| * |
| * @param sqlFileName |
| * New value of property sqlFileName. |
| */ |
| public void setSqlFileName(String sqlFileName) { |
| |
| this.sqlFileName = sqlFileName; |
| } |
| |
| /** |
| * Getter for property sqlParameters. |
| * |
| * @return Value of property sqlParameters. |
| */ |
| public Map<String, String> getSqlParameters() { |
| |
| return this.sqlParameters; |
| } |
| |
| /** |
| * Setter for property sqlParameters. |
| * |
| * @param sqlParameters |
| * New value of property sqlParameters. |
| */ |
| public void setSqlParameters(Map<String, String> sqlParameters) { |
| |
| this.sqlParameters = sqlParameters; |
| } |
| |
| /** |
| * Loads the token frequencies from the database. |
| * |
| * @param conn |
| * The connection for accessing the database |
| * @throws SQLException |
| * If a database error occurs |
| */ |
| public void loadHamNSpam(Connection conn) throws java.sql.SQLException { |
| PreparedStatement pstmt = null; |
| ResultSet rs = null; |
| |
| try { |
| pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectHamTokens", true)); |
| rs = pstmt.executeQuery(); |
| |
| Map<String, Integer> ham = getHamTokenCounts(); |
| while (rs.next()) { |
| String token = rs.getString(1); |
| int count = rs.getInt(2); |
| // to reduce memory, use the token only if the count is > 1 |
| if (count > 1) { |
| ham.put(token, count); |
| } |
| } |
| // Verbose. |
| LOGGER.debug("Ham tokens count: {}", ham.size()); |
| |
| rs.close(); |
| pstmt.close(); |
| |
| // Get the spam tokens/counts. |
| pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectSpamTokens", true)); |
| rs = pstmt.executeQuery(); |
| |
| Map<String, Integer> spam = getSpamTokenCounts(); |
| while (rs.next()) { |
| String token = rs.getString(1); |
| int count = rs.getInt(2); |
| // to reduce memory, use the token only if the count is > 1 |
| if (count > 1) { |
| spam.put(token, count); |
| } |
| } |
| |
| // Verbose. |
| LOGGER.debug("Spam tokens count: {}", spam.size()); |
| |
| rs.close(); |
| pstmt.close(); |
| |
| // Get the ham/spam message counts. |
| pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectMessageCounts", true)); |
| rs = pstmt.executeQuery(); |
| if (rs.next()) { |
| setHamMessageCount(rs.getInt(1)); |
| setSpamMessageCount(rs.getInt(2)); |
| } |
| |
| rs.close(); |
| pstmt.close(); |
| |
| } finally { |
| if (rs != null) { |
| try { |
| rs.close(); |
| } catch (SQLException se) { |
| LOGGER.error("Failed to close statement after selecting spam tokens.", se); |
| } |
| |
| rs = null; |
| } |
| |
| if (pstmt != null) { |
| try { |
| pstmt.close(); |
| } catch (SQLException se) { |
| LOGGER.error("Failed to close statement selecting message counts.", se); |
| } |
| |
| pstmt = null; |
| } |
| } |
| } |
| |
| /** |
| * Updates the database with new "ham" token frequencies. |
| * |
| * @param conn |
| * The connection for accessing the database |
| * @throws SQLException |
| * If a database error occurs |
| */ |
| public void updateHamTokens(Connection conn) throws java.sql.SQLException { |
| updateTokens(conn, getHamTokenCounts(), sqlQueries.getSqlString("insertHamToken", true), sqlQueries.getSqlString("updateHamToken", true)); |
| |
| setMessageCount(conn, sqlQueries.getSqlString("updateHamMessageCounts", true), getHamMessageCount()); |
| } |
| |
| /** |
| * Updates the database with new "spam" token frequencies. |
| * |
| * @param conn |
| * The connection for accessing the database |
| * @throws SQLException |
| * If a database error occurs |
| */ |
| public void updateSpamTokens(Connection conn) throws java.sql.SQLException { |
| updateTokens(conn, getSpamTokenCounts(), sqlQueries.getSqlString("insertSpamToken", true), sqlQueries.getSqlString("updateSpamToken", true)); |
| |
| setMessageCount(conn, sqlQueries.getSqlString("updateSpamMessageCounts", true), getSpamMessageCount()); |
| } |
| |
| /** |
| * Reset all trained data |
| * |
| * @param conn |
| * The connection for accessing the database |
| * @throws SQLException |
| * If a database error occours |
| */ |
| public void resetData(Connection conn) throws SQLException { |
| deleteData(conn, sqlQueries.getSqlString("deleteHamTokens", true)); |
| deleteData(conn, sqlQueries.getSqlString("deleteSpamTokens", true)); |
| deleteData(conn, sqlQueries.getSqlString("deleteMessageCounts", true)); |
| } |
| |
| private void setMessageCount(Connection conn, String sqlStatement, int count) throws java.sql.SQLException { |
| PreparedStatement init = null; |
| PreparedStatement update = null; |
| |
| try { |
| // set the ham/spam message counts. |
| init = conn.prepareStatement(sqlQueries.getSqlString("initializeMessageCounts", true)); |
| update = conn.prepareStatement(sqlStatement); |
| |
| update.setInt(1, count); |
| |
| if (update.executeUpdate() == 0) { |
| init.executeUpdate(); |
| update.executeUpdate(); |
| } |
| |
| } finally { |
| if (init != null) { |
| try { |
| init.close(); |
| } catch (SQLException ignore) { |
| LOGGER.error("Failed to close statement after initializing message count.", ignore); |
| } |
| } |
| if (update != null) { |
| try { |
| update.close(); |
| } catch (SQLException ignore) { |
| LOGGER.error("Failed to close statement after setting message count.", ignore); |
| } |
| } |
| } |
| } |
| |
| private void updateTokens(Connection conn, Map<String, Integer> tokens, String insertSqlStatement, String updateSqlStatement) throws java.sql.SQLException { |
| PreparedStatement insert = null; |
| PreparedStatement update = null; |
| |
| try { |
| // Used to insert new token entries. |
| insert = conn.prepareStatement(insertSqlStatement); |
| |
| // Used to update existing token entries. |
| update = conn.prepareStatement(updateSqlStatement); |
| |
| for (Map.Entry<String, Integer> entry : tokens.entrySet()) { |
| update.setInt(1, entry.getValue()); |
| update.setString(2, entry.getKey()); |
| |
| // If the update affected 0 (zero) rows, then the token hasn't |
| // been |
| // encountered before, and we need to add it to the corpus. |
| if (update.executeUpdate() == 0) { |
| insert.setString(1, entry.getKey()); |
| insert.setInt(2, entry.getValue()); |
| |
| insert.executeUpdate(); |
| } |
| } |
| } finally { |
| if (insert != null) { |
| try { |
| insert.close(); |
| } catch (SQLException ignore) { |
| LOGGER.error("Failed to close statement after updating tokens.", ignore); |
| } |
| |
| insert = null; |
| } |
| |
| if (update != null) { |
| try { |
| update.close(); |
| } catch (SQLException ignore) { |
| LOGGER.error("Failed to close statement after updating tokens.", ignore); |
| } |
| |
| update = null; |
| } |
| } |
| } |
| |
| /** |
| * Initializes the sql query environment from the SqlResources file. Will |
| * look for conf/sqlResources.xml. |
| * |
| * @param conn |
| * The connection for accessing the database |
| * @param sqlConfiguration |
| * The sqlResources configuration document |
| * @throws Exception |
| * If any error occurs |
| */ |
| public void initSqlQueries(Connection conn, Document sqlConfiguration) throws Exception { |
| try { |
| if (conn.getAutoCommit()) { |
| conn.setAutoCommit(false); |
| } |
| |
| sqlQueries.init(sqlConfiguration, JDBCBayesianAnalyzer.class.getName(), conn, getSqlParameters()); |
| |
| checkTables(conn); |
| } finally { |
| theJDBCUtil.closeJDBCConnection(conn); |
| } |
| } |
| |
| private void checkTables(Connection conn) throws SQLException { |
| // DatabaseMetaData dbMetaData = conn.getMetaData(); |
| // Need to ask in the case that identifiers are stored, ask the |
| // DatabaseMetaInfo. |
| // Try UPPER, lower, and MixedCase, to see if the table is there. |
| |
| boolean dbUpdated = false; |
| |
| dbUpdated = createTable(conn, "hamTableName", "createHamTable"); |
| |
| dbUpdated = createTable(conn, "spamTableName", "createSpamTable"); |
| |
| dbUpdated = createTable(conn, "messageCountsTableName", "createMessageCountsTable"); |
| |
| // Commit our changes if necessary. |
| if (conn != null && dbUpdated && !conn.getAutoCommit()) { |
| conn.commit(); |
| dbUpdated = false; |
| } |
| |
| } |
| |
| private boolean createTable(Connection conn, String tableNameSqlStringName, String createSqlStringName) throws SQLException { |
| String tableName = sqlQueries.getSqlString(tableNameSqlStringName, true); |
| |
| DatabaseMetaData dbMetaData = conn.getMetaData(); |
| |
| // Try UPPER, lower, and MixedCase, to see if the table is there. |
| if (theJDBCUtil.tableExists(dbMetaData, tableName)) { |
| return false; |
| } |
| |
| PreparedStatement createStatement = null; |
| |
| try { |
| createStatement = conn.prepareStatement(sqlQueries.getSqlString(createSqlStringName, true)); |
| createStatement.execute(); |
| |
| LOGGER.debug("Created table '{}' using sqlResources string '{}'.", tableName, createSqlStringName); |
| |
| } finally { |
| theJDBCUtil.closeJDBCStatement(createStatement); |
| } |
| |
| return true; |
| } |
| |
| private void deleteData(Connection conn, String deleteSqlStatement) throws SQLException { |
| PreparedStatement delete = null; |
| |
| try { |
| // Used to delete ham tokens |
| delete = conn.prepareStatement(deleteSqlStatement); |
| delete.executeUpdate(); |
| } finally { |
| if (delete != null) { |
| try { |
| delete.close(); |
| } catch (SQLException ignore) { |
| LOGGER.error("Failed to close statement after deleting ham statement. ", ignore); |
| } |
| |
| delete = null; |
| } |
| } |
| } |
| } |