import pandas as pd
# Load the dataset
df = pd.read_csv("processed_cleveland_clean.csv")
df.head()1. Data Cleaning
This notebook loads and cleans the heart disease dataset.
Drop rows with missing values
# Drop rows with any missing values
df = df.dropna()
print(f"Remaining rows after dropping NA: {len(df)}")Convert num to binary target target
# If 'num' is still present, use it to create a binary target
if 'num' in df.columns:
df['target'] = df['num'].apply(lambda x: 1 if x > 0 else 0)
df.drop('num', axis=1, inplace=True)
df['target'].value_counts()Save cleaned dataset
df.to_csv("cleaned_heart_data.csv", index=False)
print("✅ Cleaned data saved as 'cleaned_heart_data.csv'")