foxtrot/generic_loader/sample_config.yaml

53 lines
1.6 KiB
YAML
Raw Normal View History

2026-04-18 14:34:48 +00:00
filename: samples/sample_kitchensink.xpt
schemaname: public
tablename: kitchensink
# Optional. If set, only these columns are loaded. Mutually exclusive with exclude.
# include:
# - ID
# - INTCOL
# - DATECOL
# Optional. Columns to drop.
# exclude:
# - ALLNULL
# What to do if the target table already exists: fail | replace | append
# Defaults to fail.
if_exists: append
2026-04-20 14:56:00 +00:00
# partition_by: Partition the table by unique values of these columns.
# Columns are applied in cascading order (first column = top-level partition).
# Requires if_exists: replace or fail (not append for initial creation).
# Single field:
# partition_by: state
# Multiple fields (cascading):
# partition_by:
# - state
# - zip
#
# max_partitions: Warning threshold for total partition count (default: 10000).
# If the number of partitions exceeds this, a warning is logged but loading continues.
# max_partitions: 10000
2026-04-20 15:18:09 +00:00
# indexes: Create B-tree indexes on these columns after data loading.
# Indexes are created with IF NOT EXISTS for safe use with append mode.
# Single column:
# indexes: state
# Multiple columns (one index per column):
# indexes:
# - state
# - zip
# column_types: Explicit {column_name: postgres_type} overrides that
# bypass automatic type inference for the listed columns. Useful when
# pyreadstat reports a column as NUM but you want it stored as TEXT
# (phone/ID columns that are conceptually strings), or when a column's
# inferred type is off for any other reason. Columns not listed here
# fall through to the normal inference path. Nullability is always
# computed from the data.
#
# column_types:
# RESP_PH_PREFIX_ID: TEXT
# SOMELONG_ID: BIGINT