From 8bbf2c07023b7370e1edfffe52b31d4a7ab21901 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 1 Aug 2024 11:56:36 +0800 Subject: [PATCH] add script to update original_content to NULL in db --- packages/db/remove_original_content.py | 49 ++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 packages/db/remove_original_content.py diff --git a/packages/db/remove_original_content.py b/packages/db/remove_original_content.py new file mode 100755 index 000000000..29285e134 --- /dev/null +++ b/packages/db/remove_original_content.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +import os + +import psycopg2 + +PG_HOST = os.getenv('PG_HOST', 'localhost') +PG_PORT = os.getenv('PG_PORT', 5432) +PG_USER = os.getenv('PG_USER', 'app_user') +PG_PASSWORD = os.getenv('PG_PASSWORD', 'app_pass') +PG_DB = os.getenv('PG_DB', 'omnivore') +PG_TIMEOUT = os.getenv('PG_TIMEOUT', 10) + + +def batch_update_library_items(conn): + batch_size = 100 + # update original_content to NULL in batches + with conn.cursor() as cursor: + while True: + cursor.execute(f""" + UPDATE omnivore.library_item + SET original_content = NULL + WHERE ctid IN ( + SELECT ctid + FROM omnivore.library_item + WHERE original_content IS NOT NULL + LIMIT {batch_size} + ) + """) + rows_updated = cursor.rowcount + conn.commit() + if rows_updated == 0: + break + + +# postgres connection +conn = psycopg2.connect( + f'host={PG_HOST} port={PG_PORT} dbname={PG_DB} user={PG_USER} \ + password={PG_PASSWORD} connect_timeout={PG_TIMEOUT}') +print('Postgres connection:', conn.info) + +try: + print('Starting migration') + batch_update_library_items(conn) + print('Migration complete') +except Exception as err: + print('Migration error', err) +finally: + print('Closing connections') + conn.close()