PostgreSQL Version 14 to 17.
5
Step 1: Create Non-Partitioned Source Table and Insert Sample Data
DROP TABLE IF EXISTS sales_raw;
CREATE TABLE sales_raw (
id serial PRIMARY KEY,
sale_date date NOT NULL,
customer_name text,
amount numeric
);
-- Insert sample data only from 2020 to 2024
INSERT INTO sales_raw (sale_date, customer_name, amount)
SELECT
day,
'Customer_' || (random()*100)::int,
(random()*1000)::numeric(10,2)
FROM generate_series('2020-01-01'::date, '2024-12-31'::date, '1 day') day,
generate_series(1, 50); -- ~91,250 rows
Step 2: Create Partitioned Table with Composite Primary Key
CREATE TABLE sales_raw_new
(
id int NOT NULL,
sale_date date NOT NULL,
customer_name text,
amount numeric,
PRIMARY KEY (sale_date, id)
)
PARTITION BY RANGE (sale_date);
Step 3: Create Yearly Partitions (2020–2024)
CREATE TABLE sales_raw_2020 PARTITION OF sales_raw_new
FOR VALUES FROM ('2020-01-01') TO ('2021-01-01');
CREATE TABLE sales_raw_2021 PARTITION OF sales_raw_new
FOR VALUES FROM ('2021-01-01') TO ('2022-01-01');
CREATE TABLE sales_raw_2022 PARTITION OF sales_raw_new
FOR VALUES FROM ('2022-01-01') TO ('2023-01-01');
CREATE TABLE sales_raw_2023 PARTITION OF sales_raw_new
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
CREATE TABLE sales_raw_2024 PARTITION OF sales_raw_new
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01’);
Note :
PostgreSQL partitions use half-open intervals: [from, to) — so the end date is
exclusive.
Step 4: Optional Indexes on Partitions
CREATE INDEX idx_sales_2020_id ON sales_raw_2020(id);
CREATE INDEX idx_sales_2021_id ON sales_raw_2021(id);
CREATE INDEX idx_sales_2022_id ON sales_raw_2022(id);
CREATE INDEX idx_sales_2023_id ON sales_raw_2023(id);
CREATE INDEX idx_sales_2024_id ON sales_raw_2024(id);
Step 5: Create Backfill Log Table
DROP TABLE IF EXISTS backfill_log;
CREATE TABLE backfill_log (
id serial PRIMARY KEY,
batch_no int,
last_id int,
rows_copied int,
started_at timestamp,
ended_at timestamp,
status text
);
Step 6: Run Batch Backfill with Logging (Idempotent Safe)
DO $$
DECLARE
batch_size int := 10000;
last_id int := 0;
rows_copied int;
batch_no int := 0;
BEGIN
LOOP
batch_no := batch_no + 1;
WITH to_insert AS (
SELECT id, sale_date, customer_name, amount
FROM sales_raw
WHERE id > last_id
AND sale_date >= '2020-01-01' AND sale_date < '2025-01-01'
ORDER BY id
LIMIT batch_size
),
inserted AS (
INSERT INTO sales_raw_new(id, sale_date, customer_name, amount)
SELECT * FROM to_insert
ON CONFLICT (sale_date, id) DO NOTHING
RETURNING id
)
SELECT MAX(id), COUNT(*) INTO last_id, rows_copied FROM inserted;
IF rows_copied = 0 THEN
EXIT;
END IF;
INSERT INTO backfill_log(batch_no, last_id, rows_copied, started_at, ended_at, status)
VALUES (batch_no, last_id, rows_copied, clock_timestamp(), clock_timestamp(),
'done');
PERFORM pg_sleep(0.2); -- Optional pause to reduce I/O
END LOOP;
END $$;
Step 7: Monitor Progress (in another session)
SELECT * FROM backfill_log ORDER BY id DESC LIMIT 10;
Step 8: Setup Live Trigger Sync (for changes during migration)
CREATE OR REPLACE FUNCTION trg_sync_sales_raw()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
INSERT INTO sales_raw_new(id, sale_date, customer_name, amount)
VALUES (NEW.id, NEW.sale_date, NEW.customer_name, NEW.amount)
ON CONFLICT (sale_date, id) DO NOTHING;
ELSIF TG_OP = 'UPDATE' THEN
DELETE FROM sales_raw_new WHERE sale_date = OLD.sale_date AND id = OLD.id;
INSERT INTO sales_raw_new(id, sale_date, customer_name, amount)
VALUES (NEW.id, NEW.sale_date, NEW.customer_name, NEW.amount)
ON CONFLICT (sale_date, id) DO NOTHING;
ELSIF TG_OP = 'DELETE' THEN
DELETE FROM sales_raw_new WHERE sale_date = OLD.sale_date AND id = OLD.id;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-----------------------------------------------------------------------------------------------------------------------
CREATE TRIGGER trg_sync_all
AFTER INSERT OR UPDATE OR DELETE ON sales_raw
FOR EACH ROW EXECUTE FUNCTION trg_sync_sales_raw();
Step 9: Final Switchover
-- Backup old table
ALTER TABLE sales_raw RENAME TO sales_raw_old;
-- Activate partitioned version
ALTER TABLE sales_raw_new RENAME TO sales_raw;
Step 10: (Optional) Clean Up
-- Drop old table after verification
DROP TABLE sales_raw_old;
Step 11: Detach and Archive Old Partition (e.g., 2020)
Detach a Partition (e.g., sales_raw_2020)This will remove the partition from the main table but
preserve the table and its data:
ALTER TABLE sales_raw DETACH PARTITION sales_raw_2020;
Now, sales_raw_2020 is a standalone table.
Optional: Archive the Detached Table
You can export it using tools like:
a) Plain SQL dump:
pg_dump -t sales_raw_2020 -F c -f /home/postgres/backup/sales_raw_2020.backup your_db
b) CSV export:
COPY sales_raw_2020 TO '/home/postgres/backup/sales_raw_2020.csv' CSV HEADER;
Then:
Move to AWS S3 or Glacier
Delete the partition table locally (if desired)
DROP TABLE sales_raw_2020;
Step 12: ATTACH Old Partition (e.g., 2020)
Restore Later When Needed
If users request archived data, you can reattach it back as a
partition:
-- Ensure it still matches structure
ALTER TABLE sales_raw ATTACH PARTITION sales_raw_2020
FOR VALUES FROM ('2020-01-01') TO ('2021-01-01');
Thank you
I’m happy to discuss PostgreSQL features further, as it is open source and full of possibilities.
Disclaimer:
The information provided here is based on my personal knowledge, experience, and publicly
available sources.
https://www.linkedin.com/in/mariyanclement