# Example folder-level loader config. # # Shape mirrors what `load_folder.py` expects: # # python load_folder.py --config sample_folder_config.yaml --dry-run # python load_folder.py --config sample_folder_config.yaml # # Relative paths are resolved against this config file's directory first, # falling back to the current working directory if that doesn't exist. folder: samples/folder_test schemaname: public # Applied when creating the first file of each cluster. # One of: fail | replace | append. Default: fail. if_exists: replace # When true (default), any file not matched by an explicit pattern below is # auto-grouped with its peers by stripping trailing digits (and any trailing # _ / -) from the file stem. Files with no trailing digits become their own # singleton cluster. auto_detect: true # Folder-level column filter. Every file in every cluster passes through # this filter. `include` and `exclude` are mutually exclusive. A cluster can # override these via its own `include` / `exclude` keys. # # include: # - ID # - INTCOL # exclude: # - ALLNULL # Folder-level partition_by: Partition every cluster's table by unique values # of these columns. Inherited by all clusters unless overridden per-cluster. # Requires if_exists: replace or fail (not append for initial creation). # Single field: # partition_by: state # Multiple fields (cascading): # partition_by: # - state # - zip # # Folder-level max_partitions: Warning threshold for total partition count # (default: 10000). Inherited by all clusters unless overridden per-cluster. # max_partitions: 10000 # Folder-level indexes: Create B-tree indexes on these columns after data # loading. Inherited by all clusters unless overridden per-cluster. # Indexes are created with IF NOT EXISTS for safe use with append mode. # Single column: # indexes: state # Multiple columns (one index per column): # indexes: # - state # - zip # Explicit cluster patterns. Each pattern is matched against the file # *basename*. Files matched by a pattern are pulled out of the auto-detect # pool, so explicit and auto clusters compose cleanly. # # `tablename` is required. `if_exists`, `include`, and `exclude` are # optional per-cluster overrides of the folder-level defaults above. clusters: - pattern: '^group_a\d+\.xpt$' tablename: group_a # Example of an explicit override. Uncomment to force the group_b cluster to # append instead of replace even though the folder default is "replace": # # - pattern: '^group_b\d+\.xpt$' # tablename: group_b # if_exists: append # Per-cluster partition_by / max_partitions override. These take precedence # over the folder-level defaults above. # # - pattern: '^group_c\d+\.xpt$' # tablename: group_c # partition_by: # - region # - year # max_partitions: 500 # Per-cluster indexes override. Takes precedence over the folder-level # indexes default above. An explicit empty list disables indexing for # this cluster even when the folder default has indexes. # # - pattern: '^group_d\d+\.xpt$' # tablename: group_d # indexes: # - region # - year # With only the gq pattern explicit, auto_detect: true will still bucket # group_b1.xpt + group_b2.xpt into a "group_b" cluster and the lone # standalone.xpt into a "standalone" cluster. See generate_sample_folder.py # for the fixture that exercises exactly this layout.