Implementing Single-Layer Perceptron for Binary Classification

Mathematical Formulation:

For input vector x, the perceptron computes:

  • Linear combination: z = w·x + b

  • Activation: a = σ(z) where σ is sigmoid function

  • Prediction: ŷ = 1 if a ≥ 0.5 else 0

  • Loss: Binary cross-entropy

The network learns by minimizing the loss through gradient descent, updating weights as:
w = w - η * ∂L/∂w
b = b - η * ∂L/∂b

This implementation provides a complete, working single-layer neural network for binary classification that can learn linear decision boundaries.

How to use 

# Create and train perceptron
perceptron = SingleLayerPerceptron(input_size=2, learning_rate=0.1, epochs=500)
perceptron.fit(X_train, y_train)
# Make predictions
predictions = perceptron.predict(X_test)
probabilities = perceptron.predict_proba(X_test)

Implementation 

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

class SingleLayerPerceptron:
    """
    Single-layer feedforward network (Perceptron) for binary classification
    """
   
    def __init__(self, input_size, learning_rate=0.01, epochs=1000):
        """
        Initialize the perceptron
       
        Parameters:
        - input_size: number of input features
        - learning_rate: learning rate for weight updates
        - epochs: number of training iterations
        """
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None
        self.loss_history = []
       
    def initialize_parameters(self):
        """Initialize weights and bias"""
        # Initialize weights with small random values
        self.weights = np.random.randn(self.input_size) * 0.01
        self.bias = np.random.randn() * 0.01
       
    def sigmoid(self, z):
        """Sigmoid activation function"""
        return 1 / (1 + np.exp(-z))
   
    def forward_propagation(self, X):
        """
        Forward pass through the network
       
        Returns:
        - z: linear combination of inputs
        - a: sigmoid activation (prediction)
        """
        z = np.dot(X, self.weights) + self.bias
        a = self.sigmoid(z)
        return z, a
   
    def compute_loss(self, y_pred, y_true):
        """Binary cross-entropy loss"""
        # Avoid log(0) by clipping predictions
        y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss
   
    def backward_propagation(self, X, y_true, y_pred):
        """Compute gradients for weights and bias"""
        m = X.shape[0]  # number of samples
       
        # Gradient of loss w.r.t z
        dz = y_pred - y_true
       
        # Gradients for weights and bias
        dw = (1/m) * np.dot(X.T, dz)
        db = (1/m) * np.sum(dz)
       
        return dw, db
   
    def update_parameters(self, dw, db):
        """Update weights and bias using gradient descent"""
        self.weights -= self.learning_rate * dw
        self.bias -= self.learning_rate * db
   
    def fit(self, X, y):
        """
        Train the perceptron
       
        Parameters:
        - X: training features (n_samples, n_features)
        - y: training labels (n_samples,)
        """
        # Initialize parameters
        self.initialize_parameters()
       
        # Reset loss history
        self.loss_history = []
       
        for epoch in range(self.epochs):
            # Forward propagation
            _, y_pred = self.forward_propagation(X)
           
            # Compute loss
            loss = self.compute_loss(y_pred, y)
            self.loss_history.append(loss)
           
            # Backward propagation
            dw, db = self.backward_propagation(X, y, y_pred)
           
            # Update parameters
            self.update_parameters(dw, db)
           
            # Print progress every 100 epochs
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
   
    def predict(self, X, threshold=0.5):
        """
        Make predictions
       
        Parameters:
        - X: input features
        - threshold: decision threshold
       
        Returns:
        - Binary predictions (0 or 1)
        """
        _, y_pred = self.forward_propagation(X)
        return (y_pred >= threshold).astype(int)
   
    def predict_proba(self, X):
        """Return probability predictions"""
        _, y_pred = self.forward_propagation(X)
        return y_pred
   
    def get_parameters(self):
        """Return weights and bias"""
        return self.weights, self.bias

# Example usage and testing
def test_perceptron():
    # Generate synthetic binary classification dataset
    X, y = make_classification(
        n_samples=1000,
        n_features=2,
        n_informative=2,
        n_redundant=0,
        n_clusters_per_class=1,
        random_state=42
    )
   
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
   
    # Normalize features for better convergence
    X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
    X_test = (X_test - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
   
    # Initialize and train perceptron
    input_size = X_train.shape[1]
    perceptron = SingleLayerPerceptron(
        input_size=input_size,
        learning_rate=0.1,
        epochs=500
    )
   
    print("Training perceptron...")
    perceptron.fit(X_train, y_train)
    print("\nTraining completed!")
   
    # Make predictions
    y_pred = perceptron.predict(X_test)
    y_pred_proba = perceptron.predict_proba(X_test)
   
    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nTest Accuracy: {accuracy:.4f}")
   
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:")
    print(cm)
   
    # Classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
   
    # Visualize results
    plot_results(perceptron, X_train, X_test, y_train, y_test)
   
    return perceptron

def plot_results(model, X_train, X_test, y_train, y_test):
    """Visualize the perceptron's performance"""
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
   
    # 1. Plot training loss
    axes[0].plot(model.loss_history)
    axes[0].set_title('Training Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].grid(True)
   
    # 2. Plot decision boundary on training data
    plot_decision_boundary(model, X_train, y_train, axes[1])
    axes[1].set_title('Decision Boundary (Training)')
    axes[1].set_xlabel('Feature 1')
    axes[1].set_ylabel('Feature 2')
   
    # 3. Plot decision boundary on test data
    plot_decision_boundary(model, X_test, y_test, axes[2])
    axes[2].set_title('Decision Boundary (Test)')
    axes[2].set_xlabel('Feature 1')
    axes[2].set_ylabel('Feature 2')
   
    plt.tight_layout()
    plt.show()

def plot_decision_boundary(model, X, y, ax):
    """Plot decision boundary for 2D data"""
    # Create mesh grid
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(
        np.arange(x_min, x_max, 0.02),
        np.arange(y_min, y_max, 0.02)
    )
   
    # Predict for each point in mesh
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
   
    # Plot contour and scatter
    ax.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')
    scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap='RdYlBu')
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())

# Alternative: Simple perceptron with step activation (without sigmoid)
class SimplePerceptron:
    """Simple perceptron with step activation (no probability outputs)"""
   
    def __init__(self, input_size, learning_rate=0.01, epochs=1000):
        self.input_size = input_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = np.zeros(input_size)
        self.bias = 0
       
    def activation(self, x):
        """Step activation function"""
        return 1 if x >= 0 else 0
   
    def fit(self, X, y):
        """Train using perceptron learning rule"""
        for epoch in range(self.epochs):
            errors = 0
            for xi, target in zip(X, y):
                # Forward pass
                linear_output = np.dot(xi, self.weights) + self.bias
                prediction = self.activation(linear_output)
               
                # Update if prediction is wrong
                update = self.learning_rate * (target - prediction)
                if update != 0:
                    self.weights += update * xi
                    self.bias += update
                    errors += 1
           
            # Stop early if no errors
            if errors == 0:
                print(f"Converged after {epoch + 1} epochs")
                break
   
    def predict(self, X):
        """Make binary predictions"""
        predictions = []
        for xi in X:
            linear_output = np.dot(xi, self.weights) + self.bias
            predictions.append(self.activation(linear_output))
        return np.array(predictions)

if __name__ == "__main__":
    # Test the perceptron
    perceptron = test_perceptron()
   
    # Example of using the trained model on new data
    print("\n" + "="*50)
    print("Example: Making predictions on new data")
    print("="*50)
   
    # Create some new test data
    new_data = np.array([[1.5, -0.5], [-1.0, 1.0], [0.0, 0.0]])
    new_data_normalized = (new_data - np.mean(new_data, axis=0)) / np.std(new_data, axis=0)
   
    predictions = perceptron.predict(new_data_normalized)
    probabilities = perceptron.predict_proba(new_data_normalized)
   
    for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
        print(f"Sample {i+1}: Prediction = {pred}, Probability = {prob:.4f}")

Comments

Popular posts from this blog

About me

A set of documents that need to be classified, use the Naive Bayesian Classifier

Keras