import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
# Data file: breast_cancer_diagnosis.csv
# Prepare data
# Check for missing values
df = pd.read_csv("breast_cancer_diagnosis.csv")
# Handle missing values
df.dropna(inplace=True)
# Check for missing values again
df.isnull().sum()
# Drop non-numeric variables
df.drop(["non_numeric_variable1", "non_numeric_variable2"], inplace=True)