filename: samples/sample_kitchensink.xpt
schemaname: public
tablename: kitchensink

# Optional. If set, only these columns are loaded. Mutually exclusive with exclude.
# include:
#   - ID
#   - INTCOL
#   - DATECOL

# Optional. Columns to drop.
# exclude:
#   - ALLNULL

# What to do if the target table already exists: fail | replace | append
# Defaults to fail.
if_exists: append

# file_type: Type of data file to load. One of: sas | text. Default: sas.
#   sas  - SAS files (.sas7bdat, .xpt, .xport) read via pyreadstat
#   text - Delimited text files (.txt, .csv, .tsv) read via pandas
# file_type: sas

# delimiter: Column delimiter for text files. Only used when file_type: text.
# Accepts: "," (comma, default), "tab" or "\t" (tab), "pipe" or "|" (pipe),
# or any single character.
# delimiter: ","

# text_encoding: Character encoding for text files. Default: utf-8.
# Common alternatives: latin-1, cp1252, iso-8859-1.
# text_encoding: utf-8

# quotechar: Quote character for text files. Default: '"' (double quote).
# quotechar: '"'

# partition_by: Partition the table by unique values of these columns.
# Columns are applied in cascading order (first column = top-level partition).
# Requires if_exists: replace or fail (not append for initial creation).
# Single field:
#   partition_by: state
# Multiple fields (cascading):
#   partition_by:
#     - state
#     - zip
#
# max_partitions: Warning threshold for total partition count (default: 10000).
# If the number of partitions exceeds this, a warning is logged but loading continues.
#   max_partitions: 10000

# indexes: Create B-tree indexes on these columns after data loading.
# Indexes are created with IF NOT EXISTS for safe use with append mode.
# Single column:
#   indexes: state
# Multiple columns (one index per column):
#   indexes:
#     - state
#     - zip

# column_types: Explicit {column_name: postgres_type} overrides that
# bypass automatic type inference for the listed columns. Useful when
# pyreadstat reports a column as NUM but you want it stored as TEXT
# (phone/ID columns that are conceptually strings), or when a column's
# inferred type is off for any other reason. Columns not listed here
# fall through to the normal inference path. Nullability is always
# computed from the data.
#
# column_types:
#   RESP_PH_PREFIX_ID: TEXT
#   SOMELONG_ID: BIGINT

# all_nullable: If true, every column is stamped nullable in the generated
# schema; NOT NULL inference is skipped entirely. Use this when the sampler
# wrongly concludes a column has no nulls (e.g. a dense sample followed by
# rare-null data downstream) and COPY blows up mid-load on the first null
# it hits. Off by default. The CLI flag --all-nullable overrides this to
# true when set.
#
# all_nullable: false