altering such that commit is by batch
This commit is contained in:
parent
508cc974ea
commit
e39eb47a90
@ -385,9 +385,10 @@ def _infer_cluster_schema(path: Path, include, exclude):
|
|||||||
def load_cluster(conn, cluster: ClusterSpec, schemaname: str) -> int:
|
def load_cluster(conn, cluster: ClusterSpec, schemaname: str) -> int:
|
||||||
"""Load every file in ``cluster`` into one table. Returns total rows loaded.
|
"""Load every file in ``cluster`` into one table. Returns total rows loaded.
|
||||||
|
|
||||||
The caller owns transaction boundaries. This function does NOT commit or
|
Commits happen per chunk inside :func:`load_sas.copy_dataframes`. If a
|
||||||
roll back - :func:`main` does that per cluster so one bad cluster
|
file mid-cluster fails, earlier chunks - including chunks from earlier
|
||||||
doesn't poison the rest of the run.
|
files in the cluster - stay committed; only the in-flight chunk is
|
||||||
|
rolled back by :func:`main`.
|
||||||
"""
|
"""
|
||||||
if not cluster.files:
|
if not cluster.files:
|
||||||
return 0
|
return 0
|
||||||
@ -407,8 +408,8 @@ def load_cluster(conn, cluster: ClusterSpec, schemaname: str) -> int:
|
|||||||
for path in rest:
|
for path in rest:
|
||||||
columns = _infer_cluster_schema(path, cluster.include, cluster.exclude)
|
columns = _infer_cluster_schema(path, cluster.include, cluster.exclude)
|
||||||
# Uses the same check that if_exists=append runs. A type mismatch or
|
# Uses the same check that if_exists=append runs. A type mismatch or
|
||||||
# missing column aborts the cluster; the transaction rollback in
|
# missing column aborts the cluster; because chunks commit as they
|
||||||
# main() keeps the table from ending up half-loaded.
|
# load, earlier chunks in the cluster remain in the table.
|
||||||
assert_schema_compatible(conn, schemaname, cluster.tablename, columns)
|
assert_schema_compatible(conn, schemaname, cluster.tablename, columns)
|
||||||
total += _stream_file(
|
total += _stream_file(
|
||||||
conn, schemaname, cluster.tablename, path, columns,
|
conn, schemaname, cluster.tablename, path, columns,
|
||||||
|
|||||||
@ -194,8 +194,8 @@ will fail mid-stream and the whole transaction rolls back. Set
|
|||||||
matters more than speed.
|
matters more than speed.
|
||||||
|
|
||||||
Streaming loads use :func:`iter_sas_chunks` + :func:`copy_dataframes`, which
|
Streaming loads use :func:`iter_sas_chunks` + :func:`copy_dataframes`, which
|
||||||
share one cursor and transaction so a failure mid-file rolls back the whole
|
commit each chunk as it is copied so an interrupted load retains the rows
|
||||||
load.
|
that were already written.
|
||||||
|
|
||||||
7. Tunables
|
7. Tunables
|
||||||
-----------
|
-----------
|
||||||
@ -1032,10 +1032,12 @@ def copy_dataframes(
|
|||||||
dfs: Iterable[pd.DataFrame],
|
dfs: Iterable[pd.DataFrame],
|
||||||
columns: Dict[str, ColumnSpec],
|
columns: Dict[str, ColumnSpec],
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Stream an iterable of DataFrames into one ``COPY`` session.
|
"""Stream an iterable of DataFrames into Postgres, committing each chunk.
|
||||||
|
|
||||||
All chunks share a cursor and transaction, so a failure mid-stream
|
Each non-empty chunk is copied via ``COPY ... FROM STDIN`` and committed
|
||||||
rolls back the whole load when the caller hasn't committed yet.
|
before the next chunk is processed, so an interrupted or failed load
|
||||||
|
retains the rows from previously committed chunks. The first chunk's
|
||||||
|
commit also flushes any pending DDL (e.g. a preceding ``CREATE TABLE``).
|
||||||
Empty chunks are skipped. Returns the total rows inserted.
|
Empty chunks are skipped. Returns the total rows inserted.
|
||||||
"""
|
"""
|
||||||
col_list = ", ".join(_quote_ident(name) for name in columns.keys())
|
col_list = ", ".join(_quote_ident(name) for name in columns.keys())
|
||||||
@ -1060,6 +1062,7 @@ def copy_dataframes(
|
|||||||
)
|
)
|
||||||
buf.seek(0)
|
buf.seek(0)
|
||||||
cur.copy_expert(sql, buf)
|
cur.copy_expert(sql, buf)
|
||||||
|
conn.commit()
|
||||||
total += len(prepared)
|
total += len(prepared)
|
||||||
return total
|
return total
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user