filename: samples/sample_kitchensink.xpt schemaname: public tablename: kitchensink # Optional. If set, only these columns are loaded. Mutually exclusive with exclude. # include: # - ID # - INTCOL # - DATECOL # Optional. Columns to drop. # exclude: # - ALLNULL # What to do if the target table already exists: fail | replace | append # Defaults to fail. if_exists: append # file_type: Type of data file to load. One of: sas | text. Default: sas. # sas - SAS files (.sas7bdat, .xpt, .xport) read via pyreadstat # text - Delimited text files (.txt, .csv, .tsv) read via pandas # file_type: sas # delimiter: Column delimiter for text files. Only used when file_type: text. # Accepts: "," (comma, default), "tab" or "\t" (tab), "pipe" or "|" (pipe), # or any single character. # delimiter: "," # text_encoding: Character encoding for text files. Default: utf-8. # Common alternatives: latin-1, cp1252, iso-8859-1. # text_encoding: utf-8 # quotechar: Quote character for text files. Default: '"' (double quote). # quotechar: '"' # partition_by: Partition the table by unique values of these columns. # Columns are applied in cascading order (first column = top-level partition). # Requires if_exists: replace or fail (not append for initial creation). # Single field: # partition_by: state # Multiple fields (cascading): # partition_by: # - state # - zip # # max_partitions: Warning threshold for total partition count (default: 10000). # If the number of partitions exceeds this, a warning is logged but loading continues. # max_partitions: 10000 # indexes: Create B-tree indexes on these columns after data loading. # Indexes are created with IF NOT EXISTS for safe use with append mode. # Single column: # indexes: state # Multiple columns (one index per column): # indexes: # - state # - zip # column_types: Explicit {column_name: postgres_type} overrides that # bypass automatic type inference for the listed columns. Useful when # pyreadstat reports a column as NUM but you want it stored as TEXT # (phone/ID columns that are conceptually strings), or when a column's # inferred type is off for any other reason. Columns not listed here # fall through to the normal inference path. Nullability is always # computed from the data. # # column_types: # RESP_PH_PREFIX_ID: TEXT # SOMELONG_ID: BIGINT # all_nullable: If true, every column is stamped nullable in the generated # schema; NOT NULL inference is skipped entirely. Use this when the sampler # wrongly concludes a column has no nulls (e.g. a dense sample followed by # rare-null data downstream) and COPY blows up mid-load on the first null # it hits. Off by default. The CLI flag --all-nullable overrides this to # true when set. # # all_nullable: false