2026-04-18 14:34:48 +00:00
|
|
|
filename: samples/sample_kitchensink.xpt
|
|
|
|
|
schemaname: public
|
|
|
|
|
tablename: kitchensink
|
|
|
|
|
|
|
|
|
|
# Optional. If set, only these columns are loaded. Mutually exclusive with exclude.
|
|
|
|
|
# include:
|
|
|
|
|
# - ID
|
|
|
|
|
# - INTCOL
|
|
|
|
|
# - DATECOL
|
|
|
|
|
|
|
|
|
|
# Optional. Columns to drop.
|
|
|
|
|
# exclude:
|
|
|
|
|
# - ALLNULL
|
|
|
|
|
|
|
|
|
|
# What to do if the target table already exists: fail | replace | append
|
|
|
|
|
# Defaults to fail.
|
|
|
|
|
if_exists: append
|
2026-04-20 14:56:00 +00:00
|
|
|
|
2026-04-22 01:05:26 +00:00
|
|
|
# file_type: Type of data file to load. One of: sas | text. Default: sas.
|
|
|
|
|
# sas - SAS files (.sas7bdat, .xpt, .xport) read via pyreadstat
|
|
|
|
|
# text - Delimited text files (.txt, .csv, .tsv) read via pandas
|
|
|
|
|
# file_type: sas
|
|
|
|
|
|
|
|
|
|
# delimiter: Column delimiter for text files. Only used when file_type: text.
|
|
|
|
|
# Accepts: "," (comma, default), "tab" or "\t" (tab), "pipe" or "|" (pipe),
|
|
|
|
|
# or any single character.
|
|
|
|
|
# delimiter: ","
|
|
|
|
|
|
|
|
|
|
# text_encoding: Character encoding for text files. Default: utf-8.
|
|
|
|
|
# Common alternatives: latin-1, cp1252, iso-8859-1.
|
|
|
|
|
# text_encoding: utf-8
|
|
|
|
|
|
|
|
|
|
# quotechar: Quote character for text files. Default: '"' (double quote).
|
|
|
|
|
# quotechar: '"'
|
|
|
|
|
|
2026-04-20 14:56:00 +00:00
|
|
|
# partition_by: Partition the table by unique values of these columns.
|
|
|
|
|
# Columns are applied in cascading order (first column = top-level partition).
|
|
|
|
|
# Requires if_exists: replace or fail (not append for initial creation).
|
|
|
|
|
# Single field:
|
|
|
|
|
# partition_by: state
|
|
|
|
|
# Multiple fields (cascading):
|
|
|
|
|
# partition_by:
|
|
|
|
|
# - state
|
|
|
|
|
# - zip
|
|
|
|
|
#
|
|
|
|
|
# max_partitions: Warning threshold for total partition count (default: 10000).
|
|
|
|
|
# If the number of partitions exceeds this, a warning is logged but loading continues.
|
|
|
|
|
# max_partitions: 10000
|
2026-04-20 15:18:09 +00:00
|
|
|
|
|
|
|
|
# indexes: Create B-tree indexes on these columns after data loading.
|
|
|
|
|
# Indexes are created with IF NOT EXISTS for safe use with append mode.
|
|
|
|
|
# Single column:
|
|
|
|
|
# indexes: state
|
|
|
|
|
# Multiple columns (one index per column):
|
|
|
|
|
# indexes:
|
|
|
|
|
# - state
|
|
|
|
|
# - zip
|
2026-04-21 17:14:44 +00:00
|
|
|
|
|
|
|
|
# column_types: Explicit {column_name: postgres_type} overrides that
|
|
|
|
|
# bypass automatic type inference for the listed columns. Useful when
|
|
|
|
|
# pyreadstat reports a column as NUM but you want it stored as TEXT
|
|
|
|
|
# (phone/ID columns that are conceptually strings), or when a column's
|
|
|
|
|
# inferred type is off for any other reason. Columns not listed here
|
|
|
|
|
# fall through to the normal inference path. Nullability is always
|
|
|
|
|
# computed from the data.
|
|
|
|
|
#
|
|
|
|
|
# column_types:
|
|
|
|
|
# RESP_PH_PREFIX_ID: TEXT
|
|
|
|
|
# SOMELONG_ID: BIGINT
|
2026-04-21 21:48:37 +00:00
|
|
|
|
|
|
|
|
# all_nullable: If true, every column is stamped nullable in the generated
|
|
|
|
|
# schema; NOT NULL inference is skipped entirely. Use this when the sampler
|
|
|
|
|
# wrongly concludes a column has no nulls (e.g. a dense sample followed by
|
|
|
|
|
# rare-null data downstream) and COPY blows up mid-load on the first null
|
|
|
|
|
# it hits. Off by default. The CLI flag --all-nullable overrides this to
|
|
|
|
|
# true when set.
|
|
|
|
|
#
|
|
|
|
|
# all_nullable: false
|