| <!-- |
| |
| $PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.77.2.2 2007/04/18 02:29:39 momjian Exp $ |
| |
| PostgreSQL documentation |
| |
| --> |
| |
| |
| |
| |
| |
| <refentry id="SQL-COPY"> |
| |
| <refmeta> |
| |
| <refentrytitle id="sql-copy-title">COPY</refentrytitle> |
| |
| <refmiscinfo>SQL - Language Statements</refmiscinfo> |
| |
| </refmeta> |
| |
| |
| |
| <refnamediv> |
| |
| <refname>COPY</refname> |
| |
| <refpurpose>copy data between a file and a table</refpurpose> |
| |
| </refnamediv> |
| |
| |
| |
| <indexterm zone="sql-copy"> |
| |
| <primary>COPY</primary> |
| |
| </indexterm> |
| |
| |
| |
| <refsynopsisdiv> |
| |
| <synopsis> |
| |
| COPY table [(column [, ...])] FROM {'file' | STDIN} |
| [ [WITH] |
| [OIDS] |
| [HEADER] |
| [DELIMITER [ AS ] 'delimiter'] |
| [NULL [ AS ] 'null string'] |
| [ESCAPE [ AS ] 'escape' | 'OFF'] |
| [NEWLINE [ AS ] 'LF' | 'CR' | 'CRLF'] |
| [CSV [QUOTE [ AS ] 'quote'] |
| [FORCE NOT NULL column [, ...]] |
| [FILL MISSING FIELDS] |
| [ [LOG ERRORS INTO error_table] [KEEP] |
| SEGMENT REJECT LIMIT count [ROWS | PERCENT] ] |
| |
| |
| COPY {table [(column [, ...])] | (query)} TO {'file' | STDOUT} |
| [ [WITH] |
| [OIDS] |
| [HEADER] |
| [DELIMITER [ AS ] 'delimiter'] |
| [NULL [ AS ] 'null string'] |
| [ESCAPE [ AS ] 'escape' | 'OFF'] |
| [CSV [QUOTE [ AS ] 'quote'] |
| [FORCE QUOTE column [, ...]] ] |
| |
| </synopsis> |
| |
| </refsynopsisdiv> |
| |
| |
| |
| <refsect1> |
| |
| <title>Description</title> |
| |
| |
| |
| <para> |
| |
| <command>COPY</command> moves data between |
| |
| <productname>PostgreSQL</productname> tables and standard file-system |
| |
| files. <command>COPY TO</command> copies the contents of a table |
| |
| <emphasis>to</> a file, while <command>COPY FROM</command> copies |
| |
| data <emphasis>from</> a file to a table (appending the data to |
| |
| whatever is in the table already). <command>COPY TO</command> |
| |
| can also copy the results of a <command>SELECT</> query. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| If a list of columns is specified, <command>COPY</command> will |
| |
| only copy the data in the specified columns to or from the file. |
| |
| If there are any columns in the table that are not in the column list, |
| |
| <command>COPY FROM</command> will insert the default values for |
| |
| those columns. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| <command>COPY</command> with a file name instructs the |
| |
| <productname>PostgreSQL</productname> server to directly read from |
| |
| or write to a file. The file must be accessible to the server and |
| |
| the name must be specified from the viewpoint of the server. When |
| |
| <literal>STDIN</literal> or <literal>STDOUT</literal> is |
| |
| specified, data is transmitted via the connection between the |
| |
| client and the server. |
| |
| </para> |
| |
| </refsect1> |
| |
| |
| |
| <refsect1> |
| |
| <title>Parameters</title> |
| |
| |
| |
| <variablelist> |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">tablename</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| The name (optionally schema-qualified) of an existing table. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">column</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| An optional list of columns to be copied. If no column list is |
| |
| specified, all columns of the table will be copied. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">query</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| A <xref linkend="sql-select" endterm="sql-select-title"> or |
| |
| <xref linkend="sql-values" endterm="sql-values-title"> command |
| |
| whose results are to be copied. |
| |
| Note that parentheses are required around the query. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">filename</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| The absolute path name of the input or output file. Windows users |
| |
| might need to use an <literal>E''</> string and double backslashes |
| |
| used as path separators. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>STDIN</literal></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Specifies that input comes from the client application. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>STDOUT</literal></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Specifies that output goes to the client application. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>BINARY</literal></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Causes all data to be stored or read in binary format rather |
| |
| than as text. You cannot specify the <option>DELIMITER</option>, |
| |
| <option>NULL</option>, or <option>CSV</> options in binary mode. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>OIDS</literal></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Specifies copying the OID for each row. (An error is raised if |
| |
| <literal>OIDS</literal> is specified for a table that does not |
| |
| have OIDs, or in the case of copying a <replaceable |
| |
| class="parameter">query</replaceable>.) |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">delimiter</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| The single ASCII character that separates columns within each row |
| |
| (line) of the file. The default is a tab character in text mode, |
| |
| a comma in <literal>CSV</> mode. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">null string</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| The string that represents a null value. The default is |
| |
| <literal>\N</literal> (backslash-N) in text mode, and a empty |
| |
| value with no quotes in <literal>CSV</> mode. You might prefer an |
| |
| empty string even in text mode for cases where you don't want to |
| |
| distinguish nulls from empty strings. |
| |
| </para> |
| |
| |
| |
| <note> |
| |
| <para> |
| |
| When using <command>COPY FROM</command>, any data item that matches |
| |
| this string will be stored as a null value, so you should make |
| |
| sure that you use the same string as you used with |
| |
| <command>COPY TO</command>. |
| |
| </para> |
| |
| </note> |
| |
| |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>CSV</literal></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Selects Comma Separated Value (<literal>CSV</>) mode. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>HEADER</literal></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Specifies the file contains a header line with the names of each |
| |
| column in the file. On output, the first line contains the column |
| |
| names from the table, and on input, the first line is ignored. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">quote</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Specifies the ASCII quotation character in <literal>CSV</> mode. |
| |
| The default is double-quote. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><replaceable class="parameter">escape</replaceable></term> |
| |
| <listitem> |
| |
| <para> |
| |
| Specifies the ASCII character that should appear before a |
| |
| <literal>QUOTE</> data character value in <literal>CSV</> mode. |
| |
| The default is the <literal>QUOTE</> value (usually double-quote). |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>FORCE QUOTE</></term> |
| |
| <listitem> |
| |
| <para> |
| |
| In <literal>CSV</> <command>COPY TO</> mode, forces quoting to be |
| |
| used for all non-<literal>NULL</> values in each specified column. |
| |
| <literal>NULL</> output is never quoted. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term><literal>FORCE NOT NULL</></term> |
| |
| <listitem> |
| |
| <para> |
| |
| In <literal>CSV</> <command>COPY FROM</> mode, process each |
| |
| specified column as though it were quoted and hence not a |
| |
| <literal>NULL</> value. For the default null string in |
| |
| <literal>CSV</> mode (<literal>''</>), this causes missing |
| |
| values to be input as zero-length strings. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| </variablelist> |
| |
| </refsect1> |
| |
| |
| |
| <refsect1> |
| |
| <title>Outputs</title> |
| |
| |
| |
| <para> |
| |
| On successful completion, a <command>COPY</> command returns a command |
| |
| tag of the form |
| |
| <screen> |
| |
| COPY <replaceable class="parameter">count</replaceable> |
| |
| </screen> |
| |
| The <replaceable class="parameter">count</replaceable> is the number |
| |
| of rows copied. |
| |
| </para> |
| |
| </refsect1> |
| |
| |
| |
| <refsect1> |
| |
| <title>Notes</title> |
| |
| |
| |
| <para> |
| |
| <command>COPY</command> can only be used with plain tables, not |
| |
| with views. However, you can write <literal>COPY (SELECT * FROM |
| |
| <replaceable class="parameter">viewname</replaceable>) TO ...</literal>. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The <literal>BINARY</literal> key word causes all data to be |
| |
| stored/read as binary format rather than as text. It is |
| |
| somewhat faster than the normal text mode, but a binary-format |
| |
| file is less portable across machine architectures and |
| |
| <productname>PostgreSQL</productname> versions. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| You must have select privilege on the table |
| |
| whose values are read by <command>COPY TO</command>, and |
| |
| insert privilege on the table into which values |
| |
| are inserted by <command>COPY FROM</command>. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Files named in a <command>COPY</command> command are read or written |
| |
| directly by the server, not by the client application. Therefore, |
| |
| they must reside on or be accessible to the database server machine, |
| |
| not the client. They must be accessible to and readable or writable |
| |
| by the <productname>PostgreSQL</productname> user (the user ID the |
| |
| server runs as), not the client. <command>COPY</command> naming a |
| |
| file is only allowed to database superusers, since it allows reading |
| |
| or writing any file that the server has privileges to access. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Do not confuse <command>COPY</command> with the |
| |
| <application>psql</application> instruction |
| |
| <command>\copy</command>. <command>\copy</command> invokes |
| |
| <command>COPY FROM STDIN</command> or <command>COPY TO |
| |
| STDOUT</command>, and then fetches/stores the data in a file |
| |
| accessible to the <application>psql</application> client. Thus, |
| |
| file accessibility and access rights depend on the client rather |
| |
| than the server when <command>\copy</command> is used. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| It is recommended that the file name used in <command>COPY</command> |
| |
| always be specified as an absolute path. This is enforced by the |
| |
| server in the case of <command>COPY TO</command>, but for |
| |
| <command>COPY FROM</command> you do have the option of reading from |
| |
| a file specified by a relative path. The path will be interpreted |
| |
| relative to the working directory of the server process (normally |
| |
| the cluster's data directory), not the client's working directory. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| <command>COPY FROM</command> will invoke any triggers and check |
| |
| constraints on the destination table. However, it will not invoke rules. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| <command>COPY</command> input and output is affected by |
| |
| <varname>DateStyle</varname>. To ensure portability to other |
| |
| <productname>PostgreSQL</productname> installations that might use |
| |
| non-default <varname>DateStyle</varname> settings, |
| |
| <varname>DateStyle</varname> should be set to <literal>ISO</> before |
| |
| using <command>COPY TO</>. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| <command>COPY</command> stops operation at the first error. This |
| |
| should not lead to problems in the event of a <command>COPY |
| |
| TO</command>, but the target table will already have received |
| |
| earlier rows in a <command>COPY FROM</command>. These rows will not |
| |
| be visible or accessible, but they still occupy disk space. This may |
| |
| amount to a considerable amount of wasted disk space if the failure |
| |
| happened well into a large copy operation. You may wish to invoke |
| |
| <command>VACUUM</command> to recover the wasted space. |
| |
| </para> |
| |
| </refsect1> |
| |
| |
| |
| <refsect1> |
| |
| <title>File Formats</title> |
| |
| |
| |
| <refsect2> |
| |
| <title>Text Format</title> |
| |
| |
| |
| <para> |
| |
| When <command>COPY</command> is used without the <literal>BINARY</literal> |
| |
| or <literal>CSV</> options, |
| |
| the data read or written is a text file with one line per table row. |
| |
| Columns in a row are separated by the delimiter character. |
| |
| The column values themselves are strings generated by the |
| |
| output function, or acceptable to the input function, of each |
| |
| attribute's data type. The specified null string is used in |
| |
| place of columns that are null. |
| |
| <command>COPY FROM</command> will raise an error if any line of the |
| |
| input file contains more or fewer columns than are expected. |
| |
| If <literal>OIDS</literal> is specified, the OID is read or written as the first column, |
| |
| preceding the user data columns. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| End of data can be represented by a single line containing just |
| |
| backslash-period (<literal>\.</>). An end-of-data marker is |
| |
| not necessary when reading from a file, since the end of file |
| |
| serves perfectly well; it is needed only when copying data to or from |
| |
| client applications using pre-3.0 client protocol. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Backslash characters (<literal>\</>) may be used in the |
| |
| <command>COPY</command> data to quote data characters that might |
| |
| otherwise be taken as row or column delimiters. In particular, the |
| |
| following characters <emphasis>must</> be preceded by a backslash if |
| |
| they appear as part of a column value: backslash itself, |
| |
| newline, carriage return, and the current delimiter character. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The specified null string is sent by <command>COPY TO</command> without |
| |
| adding any backslashes; conversely, <command>COPY FROM</command> matches |
| |
| the input against the null string before removing backslashes. Therefore, |
| |
| a null string such as <literal>\N</literal> cannot be confused with |
| |
| the actual data value <literal>\N</literal> (which would be represented |
| |
| as <literal>\\N</literal>). |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The following special backslash sequences are recognized by |
| |
| <command>COPY FROM</command>: |
| |
| |
| |
| <informaltable> |
| |
| <tgroup cols="2"> |
| |
| <thead> |
| |
| <row> |
| |
| <entry>Sequence</entry> |
| |
| <entry>Represents</entry> |
| |
| </row> |
| |
| </thead> |
| |
| |
| |
| <tbody> |
| |
| <row> |
| |
| <entry><literal>\b</></entry> |
| |
| <entry>Backspace (ASCII 8)</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\f</></entry> |
| |
| <entry>Form feed (ASCII 12)</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\n</></entry> |
| |
| <entry>Newline (ASCII 10)</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\r</></entry> |
| |
| <entry>Carriage return (ASCII 13)</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\t</></entry> |
| |
| <entry>Tab (ASCII 9)</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\v</></entry> |
| |
| <entry>Vertical tab (ASCII 11)</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\</><replaceable>digits</></entry> |
| |
| <entry>Backslash followed by one to three octal digits specifies |
| |
| the character with that numeric code</entry> |
| |
| </row> |
| |
| <row> |
| |
| <entry><literal>\x</><replaceable>digits</></entry> |
| |
| <entry>Backslash <literal>x</> followed by one or two hex digits specifies |
| |
| the character with that numeric code</entry> |
| |
| </row> |
| |
| </tbody> |
| |
| </tgroup> |
| |
| </informaltable> |
| |
| |
| |
| Presently, <command>COPY TO</command> will never emit an octal or |
| |
| hex-digits backslash sequence, but it does use the other sequences |
| |
| listed above for those control characters. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Any other backslashed character that is not mentioned in the above table |
| |
| will be taken to represent itself. However, beware of adding backslashes |
| |
| unnecessarily, since that might accidentally produce a string matching the |
| |
| end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by |
| |
| default). These strings will be recognized before any other backslash |
| |
| processing is done. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| It is strongly recommended that applications generating <command>COPY</command> data convert |
| |
| data newlines and carriage returns to the <literal>\n</> and |
| |
| <literal>\r</> sequences respectively. At present it is |
| |
| possible to represent a data carriage return by a backslash and carriage |
| |
| return, and to represent a data newline by a backslash and newline. |
| |
| However, these representations might not be accepted in future releases. |
| |
| They are also highly vulnerable to corruption if the <command>COPY</command> file is |
| |
| transferred across different machines (for example, from Unix to Windows |
| |
| or vice versa). |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| <command>COPY TO</command> will terminate each row with a Unix-style |
| |
| newline (<quote><literal>\n</></>). Servers running on Microsoft Windows instead |
| |
| output carriage return/newline (<quote><literal>\r\n</></>), but only for |
| |
| <command>COPY</> to a server file; for consistency across platforms, |
| |
| <command>COPY TO STDOUT</> always sends <quote><literal>\n</></> |
| |
| regardless of server platform. |
| |
| <command>COPY FROM</command> can handle lines ending with newlines, |
| |
| carriage returns, or carriage return/newlines. To reduce the risk of |
| |
| error due to un-backslashed newlines or carriage returns that were |
| |
| meant as data, <command>COPY FROM</command> will complain if the line |
| |
| endings in the input are not all alike. |
| |
| </para> |
| |
| </refsect2> |
| |
| |
| |
| <refsect2> |
| |
| <title>CSV Format</title> |
| |
| |
| |
| <para> |
| |
| This format is used for importing and exporting the Comma |
| |
| Separated Value (<literal>CSV</>) file format used by many other |
| |
| programs, such as spreadsheets. Instead of the escaping used by |
| |
| <productname>PostgreSQL</productname>'s standard text mode, it |
| |
| produces and recognizes the common CSV escaping mechanism. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The values in each record are separated by the <literal>DELIMITER</> |
| |
| character. If the value contains the delimiter character, the |
| |
| <literal>QUOTE</> character, the <literal>NULL</> string, a carriage |
| |
| return, or line feed character, then the whole value is prefixed and |
| |
| suffixed by the <literal>QUOTE</> character, and any occurrence |
| |
| within the value of a <literal>QUOTE</> character or the |
| |
| <literal>ESCAPE</> character is preceded by the escape character. |
| |
| You can also use <literal>FORCE QUOTE</> to force quotes when outputting |
| |
| non-<literal>NULL</> values in specific columns. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The <literal>CSV</> format has no standard way to distinguish a |
| |
| <literal>NULL</> value from an empty string. |
| |
| <productname>PostgreSQL</>'s <command>COPY</> handles this by |
| |
| quoting. A <literal>NULL</> is output as the <literal>NULL</> |
| |
| string and is not quoted, while a data value matching the |
| |
| <literal>NULL</> string is quoted. Therefore, using the default |
| |
| settings, a <literal>NULL</> is written as an unquoted empty |
| |
| string, while an empty string is written with double quotes |
| |
| (<literal>""</>). Reading values follows similar rules. You can |
| |
| use <literal>FORCE NOT NULL</> to prevent <literal>NULL</> input |
| |
| comparisons for specific columns. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Because backslash is not a special character in the <literal>CSV</> |
| |
| format, <literal>\.</>, the end-of-data marker, could also appear |
| |
| as a data value. To avoid any misinterpretation, a <literal>\.</> |
| |
| data value appearing as a lone entry on a line is automatically |
| |
| quoted on output, and on input, if quoted, is not interpreted as the |
| |
| end-of-data marker. If you are loading a file created by another |
| |
| application that has a single unquoted column and might have a |
| |
| value of <literal>\.</>, you might need to quote that value in the |
| |
| input file. |
| |
| </para> |
| |
| |
| |
| <note> |
| |
| <para> |
| |
| In <literal>CSV</> mode, all characters are significant. A quoted value |
| |
| surrounded by white space, or any characters other than |
| |
| <literal>DELIMITER</>, will include those characters. This can cause |
| |
| errors if you import data from a system that pads <literal>CSV</> |
| |
| lines with white space out to some fixed width. If such a situation |
| |
| arises you might need to preprocess the <literal>CSV</> file to remove |
| |
| the trailing white space, before importing the data into |
| |
| <productname>PostgreSQL</>. |
| |
| </para> |
| |
| </note> |
| |
| |
| |
| <note> |
| |
| <para> |
| |
| CSV mode will both recognize and produce CSV files with quoted |
| |
| values containing embedded carriage returns and line feeds. Thus |
| |
| the files are not strictly one line per table row like text-mode |
| |
| files. |
| |
| </para> |
| |
| </note> |
| |
| |
| |
| <note> |
| |
| <para> |
| |
| Many programs produce strange and occasionally perverse CSV files, |
| |
| so the file format is more a convention than a standard. Thus you |
| |
| might encounter some files that cannot be imported using this |
| |
| mechanism, and <command>COPY</> might produce files that other |
| |
| programs cannot process. |
| |
| </para> |
| |
| </note> |
| |
| |
| |
| </refsect2> |
| |
| |
| |
| <refsect2> |
| |
| <title>Binary Format</title> |
| |
| |
| |
| <para> |
| |
| The file format used for <command>COPY BINARY</command> changed in |
| |
| <productname>PostgreSQL</productname> 7.4. The new format consists |
| |
| of a file header, zero or more tuples containing the row data, and |
| |
| a file trailer. Headers and data are now in network byte order. |
| |
| </para> |
| |
| |
| |
| <refsect3> |
| |
| <title>File Header</title> |
| |
| |
| |
| <para> |
| |
| The file header consists of 15 bytes of fixed fields, followed |
| |
| by a variable-length header extension area. The fixed fields are: |
| |
| |
| |
| <variablelist> |
| |
| <varlistentry> |
| |
| <term>Signature</term> |
| |
| <listitem> |
| |
| <para> |
| |
| 11-byte sequence <literal>PGCOPY\n\377\r\n\0</> — note that the zero byte |
| |
| is a required part of the signature. (The signature is designed to allow |
| |
| easy identification of files that have been munged by a non-8-bit-clean |
| |
| transfer. This signature will be changed by end-of-line-translation |
| |
| filters, dropped zero bytes, dropped high bits, or parity changes.) |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term>Flags field</term> |
| |
| <listitem> |
| |
| <para> |
| |
| 32-bit integer bit mask to denote important aspects of the file format. Bits |
| |
| are numbered from 0 (<acronym>LSB</>) to 31 (<acronym>MSB</>). Note that |
| |
| this field is stored in network byte order (most significant byte first), |
| |
| as are all the integer fields used in the file format. Bits |
| |
| 16-31 are reserved to denote critical file format issues; a reader |
| |
| should abort if it finds an unexpected bit set in this range. Bits 0-15 |
| |
| are reserved to signal backwards-compatible format issues; a reader |
| |
| should simply ignore any unexpected bits set in this range. Currently |
| |
| only one flag bit is defined, and the rest must be zero: |
| |
| <variablelist> |
| |
| <varlistentry> |
| |
| <term>Bit 16</term> |
| |
| <listitem> |
| |
| <para> |
| |
| if 1, OIDs are included in the data; if 0, not |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| </variablelist> |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| |
| |
| <varlistentry> |
| |
| <term>Header extension area length</term> |
| |
| <listitem> |
| |
| <para> |
| |
| 32-bit integer, length in bytes of remainder of header, not including self. |
| |
| Currently, this is zero, and the first tuple follows |
| |
| immediately. Future changes to the format might allow additional data |
| |
| to be present in the header. A reader should silently skip over any header |
| |
| extension data it does not know what to do with. |
| |
| </para> |
| |
| </listitem> |
| |
| </varlistentry> |
| |
| </variablelist> |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The header extension area is envisioned to contain a sequence of |
| |
| self-identifying chunks. The flags field is not intended to tell readers |
| |
| what is in the extension area. Specific design of header extension contents |
| |
| is left for a later release. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| This design allows for both backwards-compatible header additions (add |
| |
| header extension chunks, or set low-order flag bits) and |
| |
| non-backwards-compatible changes (set high-order flag bits to signal such |
| |
| changes, and add supporting data to the extension area if needed). |
| |
| </para> |
| |
| </refsect3> |
| |
| |
| |
| <refsect3> |
| |
| <title>Tuples</title> |
| |
| <para> |
| |
| Each tuple begins with a 16-bit integer count of the number of fields in the |
| |
| tuple. (Presently, all tuples in a table will have the same count, but that |
| |
| might not always be true.) Then, repeated for each field in the tuple, there |
| |
| is a 32-bit length word followed by that many bytes of field data. (The |
| |
| length word does not include itself, and can be zero.) As a special case, |
| |
| -1 indicates a NULL field value. No value bytes follow in the NULL case. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| There is no alignment padding or any other extra data between fields. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Presently, all data values in a <command>COPY BINARY</command> file are |
| |
| assumed to be in binary format (format code one). It is anticipated that a |
| |
| future extension may add a header field that allows per-column format codes |
| |
| to be specified. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| To determine the appropriate binary format for the actual tuple data you |
| |
| should consult the <productname>PostgreSQL</productname> source, in |
| |
| particular the <function>*send</> and <function>*recv</> functions for |
| |
| each column's data type (typically these functions are found in the |
| |
| <filename>src/backend/utils/adt/</filename> directory of the source |
| |
| distribution). |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| If OIDs are included in the file, the OID field immediately follows the |
| |
| field-count word. It is a normal field except that it's not included |
| |
| in the field-count. In particular it has a length word — this will allow |
| |
| handling of 4-byte vs. 8-byte OIDs without too much pain, and will allow |
| |
| OIDs to be shown as null if that ever proves desirable. |
| |
| </para> |
| |
| </refsect3> |
| |
| |
| |
| <refsect3> |
| |
| <title>File Trailer</title> |
| |
| |
| |
| <para> |
| |
| The file trailer consists of a 16-bit integer word containing -1. This |
| |
| is easily distinguished from a tuple's field-count word. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| A reader should report an error if a field-count word is neither -1 |
| |
| nor the expected number of columns. This provides an extra |
| |
| check against somehow getting out of sync with the data. |
| |
| </para> |
| |
| </refsect3> |
| |
| </refsect2> |
| |
| </refsect1> |
| |
| |
| |
| <refsect1> |
| |
| <title>Examples</title> |
| |
| |
| |
| <para> |
| |
| The following example copies a table to the client |
| |
| using the vertical bar (<literal>|</literal>) as the field delimiter: |
| |
| <programlisting> |
| |
| COPY country TO STDOUT WITH DELIMITER '|'; |
| |
| </programlisting> |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| To copy data from a file into the <literal>country</> table: |
| |
| <programlisting> |
| |
| COPY country FROM '/usr1/proj/bray/sql/country_data'; |
| |
| </programlisting> |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| To copy into a file just the countries whose names start with 'A': |
| |
| <programlisting> |
| |
| COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy'; |
| |
| </programlisting> |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| Here is a sample of data suitable for copying into a table from |
| |
| <literal>STDIN</literal>: |
| |
| <programlisting> |
| |
| AF AFGHANISTAN |
| |
| AL ALBANIA |
| |
| DZ ALGERIA |
| |
| ZM ZAMBIA |
| |
| ZW ZIMBABWE |
| |
| </programlisting> |
| |
| Note that the white space on each line is actually a tab character. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The following is the same data, output in binary format. |
| |
| The data is shown after filtering through the |
| |
| Unix utility <command>od -c</command>. The table has three columns; |
| |
| the first has type <type>char(2)</type>, the second has type <type>text</type>, |
| |
| and the third has type <type>integer</type>. All the rows have a null value |
| |
| in the third column. |
| |
| <programlisting> |
| |
| 0000000 P G C O P Y \n 377 \r \n \0 \0 \0 \0 \0 \0 |
| |
| 0000020 \0 \0 \0 \0 003 \0 \0 \0 002 A F \0 \0 \0 013 A |
| |
| 0000040 F G H A N I S T A N 377 377 377 377 \0 003 |
| |
| 0000060 \0 \0 \0 002 A L \0 \0 \0 007 A L B A N I |
| |
| 0000100 A 377 377 377 377 \0 003 \0 \0 \0 002 D Z \0 \0 \0 |
| |
| 0000120 007 A L G E R I A 377 377 377 377 \0 003 \0 \0 |
| |
| 0000140 \0 002 Z M \0 \0 \0 006 Z A M B I A 377 377 |
| |
| 0000160 377 377 \0 003 \0 \0 \0 002 Z W \0 \0 \0 \b Z I |
| |
| 0000200 M B A B W E 377 377 377 377 377 377 |
| |
| </programlisting> |
| |
| </para> |
| |
| </refsect1> |
| |
| |
| |
| <refsect1> |
| |
| <title>Compatibility</title> |
| |
| |
| |
| <para> |
| |
| There is no <command>COPY</command> statement in the SQL standard. |
| |
| </para> |
| |
| |
| |
| <para> |
| |
| The following syntax was used before <productname>PostgreSQL</> |
| |
| version 7.3 and is still supported: |
| |
| |
| |
| <synopsis> |
| |
| COPY [ BINARY ] <replaceable class="parameter">tablename</replaceable> [ WITH OIDS ] |
| |
| FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN } |
| |
| [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ] |
| |
| [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ] |
| |
| |
| |
| COPY [ BINARY ] <replaceable class="parameter">tablename</replaceable> [ WITH OIDS ] |
| |
| TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT } |
| |
| [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ] |
| |
| [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ] |
| |
| </synopsis> |
| |
| </para> |
| |
| </refsect1> |
| |
| </refentry> |
| |
| |
| |