Clean up
This commit is contained in:
@ -90,13 +90,13 @@ def compute_interaction_score(user_id, item_features):
|
||||
'item_has_thumbnail': 1 if item_features.get('has_thumbnail') else 0,
|
||||
"item_has_site_icon": 1 if item_features.get('has_site_icon') else 0,
|
||||
|
||||
'item_word_count': item_features.get('words_count'),
|
||||
'item_word_count': item_features.get('words_count'),
|
||||
'is_subscription': 1 if item_features.get('is_subscription') else 0,
|
||||
'is_newsletter': 1 if item_features.get('is_newsletter') else 0,
|
||||
'is_newsletter': 1 if item_features.get('is_newsletter') else 0,
|
||||
'is_feed': 1 if item_features.get('is_feed') else 0,
|
||||
'days_since_subscribed': item_features.get('days_since_subscribed'),
|
||||
'subscription_count': item_features.get('subscription_count'),
|
||||
'subscription_auto_add_to_library': item_features.get('subscription_auto_add_to_library'),
|
||||
'days_since_subscribed': item_features.get('days_since_subscribed'),
|
||||
'subscription_count': item_features.get('subscription_count'),
|
||||
'subscription_auto_add_to_library': item_features.get('subscription_auto_add_to_library'),
|
||||
'subscription_fetch_content': item_features.get('subscription_fetch_content'),
|
||||
|
||||
'has_author': 1 if item_features.get('author') else 0,
|
||||
|
||||
@ -83,10 +83,6 @@ def parquet_to_dataframe(file_path):
|
||||
df = table.to_pandas()
|
||||
return df
|
||||
|
||||
def load_local_raw_library_items():
|
||||
local_file_path = '/Users/jacksonh/Downloads/data_raw_library_items_2024-03-01.parquet'
|
||||
df = parquet_to_dataframe(local_file_path)
|
||||
return df
|
||||
|
||||
def load_tables_from_pickle(pickle_file):
|
||||
with open(pickle_file, 'rb') as handle:
|
||||
@ -119,14 +115,6 @@ def load_feather_files(feature_directory):
|
||||
return dataframes
|
||||
|
||||
|
||||
# def save_tables_to_arrow_ipc(tables, output_file):
|
||||
# with pa.OSFile(output_file, 'wb') as sink:
|
||||
# with pa.ipc.new_stream(sink, tables[next(iter(tables))].schema) as writer:
|
||||
# for name, table in tables.items():
|
||||
# print("NAME:", name, "TABLE", table)
|
||||
# writer.write_table(table)
|
||||
|
||||
|
||||
def save_tables_to_arrow_ipc_with_schemas(tables, output_file):
|
||||
with pa.OSFile(output_file, 'wb') as sink:
|
||||
with pa.ipc.new_stream(sink, pa.schema([])) as writer:
|
||||
@ -153,7 +141,6 @@ def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
|
||||
|
||||
def generate_and_upload_user_history(execution_date, gcs_bucket_name):
|
||||
df = download_raw_library_items(execution_date, gcs_bucket_name)
|
||||
# df = load_local_raw_library_items()
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
user_preferences = aggregate_user_preferences(df, tmpdir)
|
||||
dataframes = load_feather_files(tmpdir)
|
||||
|
||||
Reference in New Issue
Block a user