From 49cce94b297e35c028786e0b5129daa218394f26 Mon Sep 17 00:00:00 2001
From: Jackson Harper <jacksonh@gmail.com>
Date: Fri, 21 Jun 2024 09:18:33 +0800
Subject: [PATCH] Linting clean ups

---
 ml/digest-score/train.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/ml/digest-score/train.py b/ml/digest-score/train.py
index fdd4d0b52..db8deff84 100644
--- a/ml/digest-score/train.py
+++ b/ml/digest-score/train.py
@@ -76,14 +76,10 @@ def load_and_sample_library_items_from_parquet(raw_file_path, sample_size):
 
 
 def merge_user_preference_data(sampled_raw_df, feature_dict):
-    # Start with the sampled raw DataFrame
     merged_df = sampled_raw_df
 
-    # Iterate through the files in the feature directory
     for key in feature_dict.keys():
         user_preference_df = feature_dict[key]
-            
-        # Determine the dimension to join on
         if 'author' in key:
             merge_keys = ['user_id', 'author']
         elif 'site' in key:
@@ -95,13 +91,8 @@ def merge_user_preference_data(sampled_raw_df, feature_dict):
         else:
             print("skipping feature: ", key)
             continue  # Skip files that don't match expected patterns
-            
-        # Merge with the current user preference DataFrame
         merged_df = pd.merge(merged_df, user_preference_df, on=merge_keys, how='left')
-
-        # Optionally, fill NaNs after each merge step to avoid growing NaNs
         merged_df = merged_df.fillna(0)
-            
     return merged_df
 
 def prepare_data(df):
@@ -119,7 +110,7 @@ def prepare_data(df):
     df['days_since_subscribed'] = df['days_since_subscribed'].fillna(0).astype(int)
 
     df['is_feed'] = df['subscription_type'].apply(lambda x: 1 if x == 'RSS' else 0)
-    df['is_newsletter'] = df['subscription_type'].apply(lambda x: 1 if x == 'NEWSLETTER' else 0)    
+    df['is_newsletter'] = df['subscription_type'].apply(lambda x: 1 if x == 'NEWSLETTER' else 0)
 
     df = df.dropna(subset=['user_clicked'])