COPY file to staging table. This assumes that, of course, you have a PC, which is a unique identifier for each line that does not mutate. I check the remaining columns and the same for the rows that you have already loaded into the destination table, and compare the source with the destination, this will detect updates, deletes new rows as well.
As you can see, I did not add any indexes or configure it in any other way. My goal was to make it function correctly.
create schema source;
create schema destination;
select x employee_id, CAST('Bob' as text) first_name,cast('H'as text) last_name, cast(21 as integer) age
INTO source.employee
from generate_series(1,10000000) x;
select x employee_id, CAST('Bob' as text) first_name,cast('H'as text) last_name, cast(21 as integer) age
INTO destination.employee
from generate_series(1,10000000) x;
select
destination.employee.*,
source.employee.*,
CASE WHEN (md5(source.employee.first_name || source.employee.last_name || source.employee.age)) != md5((destination.employee.first_name || destination.employee.last_name || destination.employee.age)) THEN 'CHECKSUM'
WHEN (destination.employee.employee_id IS NULL) THEN 'Missing'
WHEN (source.employee.employee_id IS NULL) THEN 'Orphan' END AS AuditFailureType
FROM destination.employee
FULL OUTER JOIN source.employee
on destination.employee.employee_id = source.employee.employee_id
WHERE (destination.employee.employee_id IS NULL OR source.employee.employee_id IS NULL)
OR (md5(source.employee.first_name || source.employee.last_name || source.employee.age)) != md5((destination.employee.first_name || destination.employee.last_name || destination.employee.age));
UPDATE source.employee
SET age = 99
where employee_id = 45000;
select
destination.employee.*,
source.employee.*,
CASE WHEN (md5(source.employee.first_name || source.employee.last_name || source.employee.age)) != md5((destination.employee.first_name || destination.employee.last_name || destination.employee.age)) THEN 'CHECKSUM'
WHEN (destination.employee.employee_id IS NULL) THEN 'Missing'
WHEN (source.employee.employee_id IS NULL) THEN 'Orphan' END AS AuditFailureType
FROM destination.employee
FULL OUTER JOIN source.employee
on destination.employee.employee_id = source.employee.employee_id
WHERE (destination.employee.employee_id IS NULL OR source.employee.employee_id IS NULL)
OR (md5(source.employee.first_name || source.employee.last_name || source.employee.age)) != md5((destination.employee.first_name || destination.employee.last_name || destination.employee.age));
source
share