99 lines
3.4 KiB
YAML
99 lines
3.4 KiB
YAML
# Example folder-level loader config.
|
|
#
|
|
# Shape mirrors what `load_folder.py` expects:
|
|
#
|
|
# python load_folder.py --config sample_folder_config.yaml --dry-run
|
|
# python load_folder.py --config sample_folder_config.yaml
|
|
#
|
|
# Relative paths are resolved against this config file's directory first,
|
|
# falling back to the current working directory if that doesn't exist.
|
|
|
|
folder: samples/folder_test
|
|
schemaname: public
|
|
|
|
# Applied when creating the first file of each cluster.
|
|
# One of: fail | replace | append. Default: fail.
|
|
if_exists: replace
|
|
|
|
# When true (default), any file not matched by an explicit pattern below is
|
|
# auto-grouped with its peers by stripping trailing digits (and any trailing
|
|
# _ / -) from the file stem. Files with no trailing digits become their own
|
|
# singleton cluster.
|
|
auto_detect: true
|
|
|
|
# Folder-level column filter. Every file in every cluster passes through
|
|
# this filter. `include` and `exclude` are mutually exclusive. A cluster can
|
|
# override these via its own `include` / `exclude` keys.
|
|
#
|
|
# include:
|
|
# - ID
|
|
# - INTCOL
|
|
# exclude:
|
|
# - ALLNULL
|
|
|
|
# Folder-level partition_by: Partition every cluster's table by unique values
|
|
# of these columns. Inherited by all clusters unless overridden per-cluster.
|
|
# Requires if_exists: replace or fail (not append for initial creation).
|
|
# Single field:
|
|
# partition_by: state
|
|
# Multiple fields (cascading):
|
|
# partition_by:
|
|
# - state
|
|
# - zip
|
|
#
|
|
# Folder-level max_partitions: Warning threshold for total partition count
|
|
# (default: 10000). Inherited by all clusters unless overridden per-cluster.
|
|
# max_partitions: 10000
|
|
|
|
# Folder-level indexes: Create B-tree indexes on these columns after data
|
|
# loading. Inherited by all clusters unless overridden per-cluster.
|
|
# Indexes are created with IF NOT EXISTS for safe use with append mode.
|
|
# Single column:
|
|
# indexes: state
|
|
# Multiple columns (one index per column):
|
|
# indexes:
|
|
# - state
|
|
# - zip
|
|
|
|
# Explicit cluster patterns. Each pattern is matched against the file
|
|
# *basename*. Files matched by a pattern are pulled out of the auto-detect
|
|
# pool, so explicit and auto clusters compose cleanly.
|
|
#
|
|
# `tablename` is required. `if_exists`, `include`, and `exclude` are
|
|
# optional per-cluster overrides of the folder-level defaults above.
|
|
clusters:
|
|
- pattern: '^group_a\d+\.xpt$'
|
|
tablename: group_a
|
|
|
|
# Example of an explicit override. Uncomment to force the group_b cluster to
|
|
# append instead of replace even though the folder default is "replace":
|
|
#
|
|
# - pattern: '^group_b\d+\.xpt$'
|
|
# tablename: group_b
|
|
# if_exists: append
|
|
|
|
# Per-cluster partition_by / max_partitions override. These take precedence
|
|
# over the folder-level defaults above.
|
|
#
|
|
# - pattern: '^group_c\d+\.xpt$'
|
|
# tablename: group_c
|
|
# partition_by:
|
|
# - region
|
|
# - year
|
|
# max_partitions: 500
|
|
|
|
# Per-cluster indexes override. Takes precedence over the folder-level
|
|
# indexes default above. An explicit empty list disables indexing for
|
|
# this cluster even when the folder default has indexes.
|
|
#
|
|
# - pattern: '^group_d\d+\.xpt$'
|
|
# tablename: group_d
|
|
# indexes:
|
|
# - region
|
|
# - year
|
|
|
|
# With only the gq pattern explicit, auto_detect: true will still bucket
|
|
# group_b1.xpt + group_b2.xpt into a "group_b" cluster and the lone
|
|
# standalone.xpt into a "standalone" cluster. See generate_sample_folder.py
|
|
# for the fixture that exercises exactly this layout.
|