import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Reproducability
os.environ['PYTHONHASHSEED'] = '0'
tf.keras.utils.set_random_seed(0)
tf.random.set_seed(1234)
tf.config.experimental.enable_op_determinism()
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Constants
DIR = "/home/ananyapam/Projects/Customer-Churn-Prediction-IIITD"
DATA_PATH = f"{DIR}/data/WA_Fn-UseC_-Telco-Customer-Churn.csv"
PREDICTOR_COLUMNS = [
    "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", 
    "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity",
    "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV",
    "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", 
    "MonthlyCharges", "TotalCharges"
]
TARGET_COLUMN = 'Churn'
factor_columns = [
    "gender", "SeniorCitizen", "Partner", "Dependents", "PhoneService", "MultipleLines", "InternetService",
    "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
    "Contract", "PaperlessBilling", "PaymentMethod"
]
numeric_columns = ["tenure", "MonthlyCharges", "TotalCharges"]

def compute_class_weights(data):
    """
    Compute class weights based on data.
    """
    weight_for_0 = 1 / np.sum(data == 0)
    weight_for_1 = (1 / np.sum(data == 1))
    return {
        0: weight_for_0,
        1: weight_for_1
    }

def build_model(input_shape):
    """
    Build and return the sequential model.
    """
    #model = keras.Sequential()
    #model.add(layers.Dense(64, input_dim=input_shape, activation='relu'))
    #model.add(layers.Dense(32, activation='relu'))
    #model.add(layers.Dense(8, activation='relu'))
    #model.add(layers.Dense(1, activation='sigmoid'))
    # model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(8, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])

    return model

def plot_history(history):
    plt.figure(figsize=(20, 5))

    # Plot training & validation loss values
    plt.subplot(1, 3, 1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')

    # Plot training & validation precision values
    plt.subplot(1, 3, 2)
    plt.plot(history.history['precision'])
    plt.plot(history.history['val_precision'])
    plt.title('Model Precision')
    plt.ylabel('Precision')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')

    # Plot training & validation recall values
    plt.subplot(1, 3, 3)
    plt.plot(history.history['recall'])
    plt.plot(history.history['val_recall'])
    plt.title('Model Recall')
    plt.ylabel('Recall')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')

    plt.tight_layout()
    plt.show()

def load_data(path):
    """
    Load dataset from the specified path.

    Parameters:
    - path (str): Path to the dataset.

    Returns:
    - DataFrame: Loaded dataset.
    """
    return pd.read_csv(path)

def impute_total_charges(churn_df):
    """
    Handle missing values in the TotalCharges column.

    Parameters:
    - churn_df (DataFrame): The dataset.

    Returns:
    - DataFrame: Dataset with imputed values in the TotalCharges column.
    """
    churn_df['TotalCharges'] = churn_df['TotalCharges'].replace(' ', np.NaN)
    churn_df['TotalCharges'] = churn_df['TotalCharges'].astype(float)
    churn_df.loc[churn_df['TotalCharges'].isna(), 'TotalCharges'] = churn_df.loc[churn_df['TotalCharges'].isna(), 'MonthlyCharges']
    return churn_df

def manipulate_columns(churn_df):
    """
    Perform various column manipulations including handling missing values, encoding, and more.

    Parameters:
    - churn_df (DataFrame): The dataset.

    Returns:
    - DataFrame: Modified dataset.
    """
    # Manipulate the SeniorCitizen column
    churn_df['SeniorCitizen'] = churn_df['SeniorCitizen'].replace({1: 'Yes', 0: 'No'})
    
    # Merge the 'No Service' and 'No' counts for various columns
    columns_to_merge = ['MultipleLines', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']
    for col in columns_to_merge:
        churn_df[col] = churn_df[col].replace(['No phone service', 'No internet service'], 'No')
    
    # Encode binary columns
    binary_columns = ['gender', 'Partner', 'SeniorCitizen','Dependents', 'PhoneService', 'MultipleLines',
                      'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
                      'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']
    le = LabelEncoder()
    for column in binary_columns:
        churn_df[column] = le.fit_transform(churn_df[column])
    
    # Ordered encoding
    churn_df['InternetService'] = churn_df['InternetService'].apply(lambda x: ['No', 'DSL', 'Fiber optic'].index(x))
    churn_df['Contract'] = churn_df['Contract'].apply(lambda x: ['Month-to-month', 'One year', 'Two year'].index(x))
    
    # One-hot encoding for 'PaymentMethod' and drop original column
    dummies_payment = pd.get_dummies(churn_df['PaymentMethod'], drop_first=True)
    churn_df = pd.concat([churn_df, dummies_payment], axis=1)
    churn_df.drop('PaymentMethod', axis=1, inplace=True)
    
    # Drop the 'customerID' column as it's likely a unique identifier
    churn_df.drop('customerID', axis=1, inplace=True)
    
    # Convert boolean columns to integer type
    for col in ['Credit card (automatic)', 'Electronic check', 'Mailed check']:
        churn_df[col] = churn_df[col].astype(int)
    
    return churn_df

# Load the data
churn = load_data(DATA_PATH)

# Perform data manipulations
churn = impute_total_charges(churn)
churn = manipulate_columns(churn)

# Display the first few rows of the dataframe to check the transformations
churn.head()

def prepare_data(churn_df):
    """
    Split the data into features and target, and standardize the features.

    Parameters:
    - churn_df (DataFrame): The dataset.

    Returns:
    - tuple: Standardized features and target.
    """
    X = churn_df.drop('Churn', axis=1)
    y = churn_df['Churn']

    sc = StandardScaler()
    X = sc.fit_transform(X)

    return X, y

def split_data(X, y):
    """
    Split data into training and testing sets.

    Parameters:
    - X (array-like): Features.
    - y (array-like): Target.

    Returns:
    - tuple: Training and testing data.
    """
    return train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

def build_model(input_shape):
    """
    Build and compile the neural network model.

    Parameters:
    - input_shape (tuple): Shape of the input data.

    Returns:
    - model: A compiled TensorFlow model.
    """
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=input_shape),
        tf.keras.layers.Dense(50, activation='relu'),
        tf.keras.layers.Dense(50, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.AUC(name='auc')]
    )

    return model

def plot_training_history(history, epochs):
    """
    Plot training and validation loss and AUC.

    Parameters:
    - history: Training history.
    - epochs (int): Number of epochs.
    """
    sns.set_style("whitegrid")
    plt.figure(figsize=(14, 6))

    # Plotting loss
    plt.subplot(1, 2, 1)
    epochs_range = range(1, epochs + 1)
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.plot(epochs_range, train_loss, label="Training Loss", marker='o')
    plt.plot(epochs_range, val_loss, label="Validation Loss", marker='o')
    plt.title("Training and Validation Loss", fontsize=16)
    plt.xlabel("Epoch", fontsize=14)
    plt.ylabel("Loss", fontsize=14)
    plt.legend(fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)

    # Plotting AUC
    plt.subplot(1, 2, 2)
    train_auc = history.history['auc']
    val_auc = history.history['val_auc']

    plt.plot(epochs_range, train_auc, label="Training AUC", marker='o')
    plt.plot(epochs_range, val_auc, label="Validation AUC", marker='o')
    plt.title("Training and Validation AUC", fontsize=16)
    plt.xlabel("Epoch", fontsize=14)
    plt.ylabel("AUC", fontsize=14)
    plt.legend(fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)

    plt.tight_layout()
    plt.show()

def plot_prediction_histogram(X_test, model):
    """
    Plot a histogram of prediction probabilities.
    """
    predictions = model.predict(X_test).reshape(-1)
    plt.figure(figsize=(10, 6))
    plt.hist(predictions, bins=50, alpha=0.7, color='blue')
    plt.axvline(x=0.5, color='red', linestyle='--')
    plt.title('Histogram of Prediction Probabilities', fontsize=16)
    plt.xlabel('Prediction Probability', fontsize=14)
    plt.ylabel('Frequency', fontsize=14)
    plt.show()

# Prepare data
X, y = prepare_data(churn)
X_train, X_test, y_train, y_test = split_data(X, y)

# Build model
model = build_model((X_train.shape[1],))

print(model)

# Define callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, verbose=0, mode='auto',
    restore_best_weights=True
)

# Train model
BATCH_SIZE = 100
EPOCHS = 10

history = model.fit(
    X_train, y_train,
    validation_split=0.20,
    class_weight= compute_class_weights(y_train),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=0,
    callbacks=[early_stopping]
)

<keras.src.engine.sequential.Sequential object at 0x7f562f474df0>

# Plot training history
plot_training_history(history, EPOCHS)

plot_prediction_histogram(X_test, model)

45/45 [==============================] - 0s 902us/step

predictions = (model.predict(X_test) > 0.5).astype(int)
conf_matrix = confusion_matrix(y_test, predictions)
print(conf_matrix)
model.evaluate(X_test, y_test)

45/45 [==============================] - 0s 2ms/step
[[742 293]
 [ 85 289]]
45/45 [==============================] - 0s 1ms/step - loss: 0.5134 - auc: 0.8300

[0.513434648513794, 0.8299555778503418]

Deep Learning Model for Customer Churn Prediction¶

Why Transformer-based Models Might Not Be Ideal for Customer Churn Prediction¶

1. No Need for Positional Embeddings:¶

2. Complexity:¶

3. Interpretability:¶

Key Observations in Model Training:¶

Visualization of Prediction Probabilities¶

Confusion Matrix Analysis¶

Classification Results:¶

Key Metrics:¶

	gender	Partner	tenure	PhoneService	InternetService	OnlineSecurity	OnlineBackup	...	Contract	PaperlessBilling	MonthlyCharges	TotalCharges	Churn	Electronic check	Mailed check
0	0	1	1	0	1	0	1	...	0	1	29.85	29.85	0	1	0
1	1	0	34	1	1	1	0	...	1	0	56.95	1889.50	0	0	1
2	1	0	2	1	1	1	1	...	0	1	53.85	108.15	1	0	1
3	1	0	45	0	1	1	0	...	1	0	42.30	1840.75	0	0	0
4	0	0	2	1	2	0	0	...	0	1	70.70	151.65	1	1	0