blob: e151e6589808cd19adad32f79c52f21fa1fc1d08 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sis.internal.shapefile.jdbc;
import java.io.File;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.text.MessageFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Objects;
import java.util.logging.Level;
/**
* The Abstract Byte Reader.
* @author Marc Le Bihan
* @version 0.5
* @since 0.5
* @module
*/
abstract class AbstractDbase3ByteReader extends CommonByteReader<SQLInvalidDbaseFileFormatException, SQLDbaseFileNotFoundException> implements Dbase3ByteReader {
/** First data record position, in bytes. */
protected short firstRecordPosition;
/** Size of one record, in bytes. */
protected short recordLength;
/** Reserved (dBASE IV) Filled with 00h. */
protected byte[] reservedFiller1 = new byte[2];
/**
* Reserved : Incomplete transaction (dBASE IV).
* 00h : Transaction ended (or rolled back).
* 01h : Transaction started.
*/
protected byte reservedIncompleteTransaction;
/**
* Reserved : Encryption flag (dBASE IV).
* 00h : Not encrypted.
* 01h : Data encrypted.
*/
protected byte reservedEncryptionFlag;
/** Reserved : Free record thread (for LAN only). */
protected byte[] reservedFreeRecordThread = new byte[4];
/** Reserved : For multi-user (DBase 3+). */
protected byte[] reservedMultiUser = new byte[8];
/** Reserved : MDX flag (dBASE IV). */
protected byte reservedMDXFlag;
/** Binary code page value. */
protected byte codePage;
/** Reserved (dBASE IV) Filled with 00h. */
protected byte[] reservedFiller2 = new byte[2];
/** Marks the end of the descriptor : must be 0x0D. */
protected byte descriptorTerminator;
/** Valid dBASE III PLUS table file (03h without a memo .DBT file; 83h with a memo). */
protected byte dbaseVersion;
/** Number of records in the table. */
protected int rowCount;
/** Database charset. */
protected Charset charset;
/** Date of last update; in YYMMDD format. */
protected byte[] dbaseLastUpdate = new byte[3];
/**
* Map a dbf file.
* @param file Database file.
* @throws SQLDbaseFileNotFoundException if the DBF file has not been found.
* @throws SQLInvalidDbaseFileFormatException if the database has an invalid format.
*/
public AbstractDbase3ByteReader(File file) throws SQLDbaseFileNotFoundException, SQLInvalidDbaseFileFormatException {
super(file, SQLInvalidDbaseFileFormatException.class, SQLDbaseFileNotFoundException.class);
}
/**
* Returns the charset.
* @return Charset.
*/
@Override public Charset getCharset() {
return this.charset;
}
/**
* Returns the database last update date.
* @return Date of the last update.
*/
@Override public Date getDateOfLastUpdate() {
return toDate(this.dbaseLastUpdate);
}
/**
* Returns the first record position, in bytes, in the DBase file.
* @return First record position.
*/
@Override public short getFirstRecordPosition() {
return this.firstRecordPosition;
}
/**
* Returns the length (in bytes) of one record in this DBase file, including the delete flag.
* @return Record length.
*/
@Override public short getRecordLength() {
return this.recordLength;
}
/**
* Returns the record count.
* @return Record count.
*/
@Override public int getRowCount() {
return this.rowCount;
}
/**
* Convert the binary code page value of the Dbase 3 file to a recent Charset.
* @param codePageBinaryValue page code binary value.
* @return Charset.
* @throws UnsupportedCharsetException if the code page as no representation in recents Charset (legacy DOS or macintosh charsets).
*/
protected Charset toCharset(byte codePageBinaryValue) throws UnsupportedCharsetException {
// Attempt to find a known conversion.
String dbfCodePage = toCodePage(codePageBinaryValue);
// If no conversion has been found, decide if the cause is an unsupported value or an illegal value to choose the good exception to return.
if (dbfCodePage == null) {
switch(Byte.toUnsignedInt(codePageBinaryValue)) {
case 0x04: dbfCodePage = "unsupported"; break;
case 0x68: dbfCodePage = "unsupported"; break; // Kamenicky (Czech) MS-DOS
case 0x69: dbfCodePage = "unsupported"; break; // Mazovia (Polish) MS-DOS
case 0x96: dbfCodePage = "unsupported"; break; // russian mac
case 0x97: dbfCodePage = "unsupported"; break; // eastern european macintosh
case 0x98: dbfCodePage = "unsupported"; break; // greek macintosh
case 0xC8: dbfCodePage = "unsupported"; break; // windows ee
default: dbfCodePage = "unsupported"; break;
}
}
assert dbfCodePage != null;
// If the code page cannot find a match for a more recent Charset, we wont be able to handle this DBF.
if (dbfCodePage.equals("unsupported")) {
String message = format(Level.WARNING, "excp.unsupported_codepage", codePageBinaryValue, getFile().getAbsolutePath());
throw new UnsupportedCharsetException(message);
}
try {
return Charset.forName(dbfCodePage);
}
catch(IllegalArgumentException e) {
// If this happens here, it means that we have selected a wrong charset. We have a bug.
String message = format(Level.WARNING, "assert.wrong_charset_selection", dbfCodePage, getFile().getAbsolutePath());
throw new RuntimeException(message);
}
}
/**
* Return a Charset code page from a binary code page value.
* @param pageCodeBinaryValue binary code page value.
* @return Page code.
*/
private String toCodePage(byte pageCodeBinaryValue) {
// From http://trac.osgeo.org/gdal/ticket/2864
HashMap<Integer, String> knownConversions = new HashMap<>();
knownConversions.put(0x01, "cp437"); // U.S. MS–DOS
knownConversions.put(0x02, "cp850"); // International MS–DOS
knownConversions.put(0x03, "cp1252"); // Windows ANSI
knownConversions.put(0x08, "cp865"); // Danish OEM
knownConversions.put(0x09, "cp437"); // Dutch OEM
knownConversions.put(0x0a, "cp850"); // Dutch OEM*
knownConversions.put(0x0b, "cp437"); // Finnish OEM
knownConversions.put(0x0d, "cp437"); // French OEM
knownConversions.put(0x0e, "cp850"); // French OEM*
knownConversions.put(0x0f, "cp437"); // German OEM
knownConversions.put(0x10, "cp850"); // German OEM*
knownConversions.put(0x11, "cp437"); // Italian OEM
knownConversions.put(0x12, "cp850"); // Italian OEM*
knownConversions.put(0x13, "cp932"); // Japanese Shift-JIS
knownConversions.put(0x14, "cp850"); // Spanish OEM*
knownConversions.put(0x15, "cp437"); // Swedish OEM
knownConversions.put(0x16, "cp850"); // Swedish OEM*
knownConversions.put(0x17, "cp865"); // Norwegian OEM
knownConversions.put(0x18, "cp437"); // Spanish OEM
knownConversions.put(0x19, "cp437"); // English OEM (Britain)
knownConversions.put(0x1a, "cp850"); // English OEM (Britain)*
knownConversions.put(0x1b, "cp437"); // English OEM (U.S.)
knownConversions.put(0x1c, "cp863"); // French OEM (Canada)
knownConversions.put(0x1d, "cp850"); // French OEM*
knownConversions.put(0x1f, "cp852"); // Czech OEM
knownConversions.put(0x22, "cp852"); // Hungarian OEM
knownConversions.put(0x23, "cp852"); // Polish OEM
knownConversions.put(0x24, "cp860"); // Portuguese OEM
knownConversions.put(0x25, "cp850"); // Portuguese OEM*
knownConversions.put(0x26, "cp866"); // Russian OEM
knownConversions.put(0x37, "cp850"); // English OEM (U.S.)*
knownConversions.put(0x40, "cp852"); // Romanian OEM
knownConversions.put(0x4d, "cp936"); // Chinese GBK (PRC)
knownConversions.put(0x4e, "cp949"); // Korean (ANSI/OEM)
knownConversions.put(0x4f, "cp950"); // Chinese Big5 (Taiwan)
knownConversions.put(0x50, "cp874"); // Thai (ANSI/OEM)
knownConversions.put(0x57, "cp1252"); // ANSI
knownConversions.put(0x58, "cp1252"); // Western European ANSI
knownConversions.put(0x59, "cp1252"); // Spanish ANSI
knownConversions.put(0x64, "cp852"); // Eastern European MS–DOS
knownConversions.put(0x65, "cp866"); // Russian MS–DOS
knownConversions.put(0x66, "cp865"); // Nordic MS–DOS
knownConversions.put(0x67, "cp861"); // Icelandic MS–DOS
knownConversions.put(0x6a, "cp737"); // Greek MS–DOS (437G)
knownConversions.put(0x6b, "cp857"); // Turkish MS–DOS
knownConversions.put(0x6c, "cp863"); // French–Canadian MS–DOS
knownConversions.put(0x78, "cp950"); // Taiwan Big 5
knownConversions.put(0x79, "cp949"); // Hangul (Wansung)
knownConversions.put(0x7a, "cp936"); // PRC GBK
knownConversions.put(0x7b, "cp932"); // Japanese Shift-JIS
knownConversions.put(0x7c, "cp874"); // Thai Windows/MS–DOS
knownConversions.put(0x86, "cp737"); // Greek OEM
knownConversions.put(0x87, "cp852"); // Slovenian OEM
knownConversions.put(0x88, "cp857"); // Turkish OEM
knownConversions.put(0xc8, "cp1250"); // Eastern European Windows
knownConversions.put(0xc9, "cp1251"); // Russian Windows
knownConversions.put(0xca, "cp1254"); // Turkish Windows
knownConversions.put(0xcb, "cp1253"); // Greek Windows
knownConversions.put(0xcc, "cp1257"); // Baltic Windows
return(knownConversions.get(Byte.toUnsignedInt(pageCodeBinaryValue)));
}
/**
* Set a charset.
* @param cs Charset.
*/
public void setCharset(Charset cs) {
this.charset = cs;
}
/**
* Return a date from a byte array.
* @param yymmdd byte[3] with byte[0] = year (2 digits), [1] = month, [2] = day.
* @return Date.
*/
private Date toDate(byte[] yymmdd) {
Objects.requireNonNull(yymmdd, "the yymmdd bytes cannot be null");
if (yymmdd.length != 3)
throw new IllegalArgumentException(MessageFormat.format("Database:toDate() works only on a 3 bytes YY MM DD date. this array has {0} length", yymmdd.length));
Objects.requireNonNull(yymmdd[0], "the year byte cannot be null");
Objects.requireNonNull(yymmdd[1], "the month byte cannot be null");
Objects.requireNonNull(yymmdd[2], "the day byte cannot be null");
int year = yymmdd[0] < 70 ? 100 + yymmdd[0] : yymmdd[0];
int month = yymmdd[1];
int day = yymmdd[2];
@SuppressWarnings("deprecation") // But everything is deprecated in DBF files...
Date date = new Date(year, month, day);
return date;
}
}