advanced_analyzer #8

Merged
dp merged 23 commits from advanced_analyzer into main 2026-04-21 22:32:18 +00:00
Showing only changes of commit 5e347f50ef - Show all commits

View File

@ -977,6 +977,30 @@ def _normalize_type(pg_type: str) -> str:
return _TYPE_NORMALIZATION.get(stripped, stripped.lower())
# Widening pairs: (inferred_from_source, existing_in_target). When the
# incoming spec is narrower than the target we accept it - the value is
# guaranteed to fit, and ``_prepare_for_copy`` already emits ``COPY``
# payloads that Postgres silently promotes to the wider column type. The
# INVERSE direction stays a hard failure: a BIGINT value does not fit in
# an INTEGER column, so we must not let a cluster whose first file had
# only small ints accept a later file with a value past int32. Comes up
# most often on cluster loads where file 1 pushed the target to BIGINT
# (a single value > 2_147_483_647) and file N happens to sit entirely
# within int32 range - strict equality would reject file N even though
# the copy is trivially safe.
_WIDENING_COMPATIBLE: set = {
("smallint", "integer"),
("smallint", "bigint"),
("integer", "bigint"),
("real", "double precision"),
# INTEGER / BIGINT into DOUBLE PRECISION is lossless for int32 and
# exact up to 2**53 for int64, which covers every value pandas could
# have carried through as Int64 without wrapping anyway.
("integer", "double precision"),
("bigint", "double precision"),
}
def _assert_schema_compatible(
conn, schema: str, table: str, columns: Dict[str, ColumnSpec]
) -> None:
@ -1003,11 +1027,22 @@ def _assert_schema_compatible(
inferred_norm = _normalize_type(spec.postgres_type)
target_norm = _normalize_type(target_type)
if inferred_norm != target_norm:
mismatches.append(
f"column {name!r}: inferred {spec.postgres_type} "
f"(normalized {inferred_norm!r}) but target is {target_type} "
f"(normalized {target_norm!r})"
)
if (inferred_norm, target_norm) in _WIDENING_COMPATIBLE:
# Narrower inferred type fits inside the wider target.
# Accept silently-but-noisily so the operator knows the
# file came in with a smaller range than the cluster's
# target was sized for.
warnings.append(
f"column {name!r}: inferred {spec.postgres_type} "
f"(narrower than target {target_type}); accepting - "
f"values fit in the wider target type"
)
else:
mismatches.append(
f"column {name!r}: inferred {spec.postgres_type} "
f"(normalized {inferred_norm!r}) but target is {target_type} "
f"(normalized {target_norm!r})"
)
target_is_notnull = (target_nullable == "NO")
if spec.nullable and target_is_notnull:
warnings.append(