Implementing Single-Layer Perceptron for Binary Classification
Mathematical Formulation:
For input vector x, the perceptron computes:
Linear combination:
z = w·x + bActivation:
a = σ(z)where σ is sigmoid functionPrediction:
ŷ = 1 if a ≥ 0.5 else 0Loss: Binary cross-entropy
The network learns by minimizing the loss through gradient descent, updating weights as:w = w - η * ∂L/∂wb = b - η * ∂L/∂b
This implementation provides a complete, working single-layer neural network for binary classification that can learn linear decision boundaries.
How to use
# Create and train perceptronperceptron = SingleLayerPerceptron(input_size=2, learning_rate=0.1, epochs=500)
perceptron.fit(X_train, y_train)
# Make predictions
predictions = perceptron.predict(X_test)
probabilities = perceptron.predict_proba(X_test)
Implementation
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
class SingleLayerPerceptron:
"""
Single-layer feedforward network (Perceptron) for binary classification
"""
def __init__(self, input_size, learning_rate=0.01, epochs=1000):
"""
Initialize the perceptron
Parameters:
- input_size: number of input features
- learning_rate: learning rate for weight updates
- epochs: number of training iterations
"""
self.input_size = input_size
self.learning_rate = learning_rate
self.epochs = epochs
self.weights = None
self.bias = None
self.loss_history = []
def initialize_parameters(self):
"""Initialize weights and bias"""
# Initialize weights with small random values
self.weights = np.random.randn(self.input_size) * 0.01
self.bias = np.random.randn() * 0.01
def sigmoid(self, z):
"""Sigmoid activation function"""
return 1 / (1 + np.exp(-z))
def forward_propagation(self, X):
"""
Forward pass through the network
Returns:
- z: linear combination of inputs
- a: sigmoid activation (prediction)
"""
z = np.dot(X, self.weights) + self.bias
a = self.sigmoid(z)
return z, a
def compute_loss(self, y_pred, y_true):
"""Binary cross-entropy loss"""
# Avoid log(0) by clipping predictions
y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
return loss
def backward_propagation(self, X, y_true, y_pred):
"""Compute gradients for weights and bias"""
m = X.shape[0] # number of samples
# Gradient of loss w.r.t z
dz = y_pred - y_true
# Gradients for weights and bias
dw = (1/m) * np.dot(X.T, dz)
db = (1/m) * np.sum(dz)
return dw, db
def update_parameters(self, dw, db):
"""Update weights and bias using gradient descent"""
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def fit(self, X, y):
"""
Train the perceptron
Parameters:
- X: training features (n_samples, n_features)
- y: training labels (n_samples,)
"""
# Initialize parameters
self.initialize_parameters()
# Reset loss history
self.loss_history = []
for epoch in range(self.epochs):
# Forward propagation
_, y_pred = self.forward_propagation(X)
# Compute loss
loss = self.compute_loss(y_pred, y)
self.loss_history.append(loss)
# Backward propagation
dw, db = self.backward_propagation(X, y, y_pred)
# Update parameters
self.update_parameters(dw, db)
# Print progress every 100 epochs
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {loss:.4f}")
def predict(self, X, threshold=0.5):
"""
Make predictions
Parameters:
- X: input features
- threshold: decision threshold
Returns:
- Binary predictions (0 or 1)
"""
_, y_pred = self.forward_propagation(X)
return (y_pred >= threshold).astype(int)
def predict_proba(self, X):
"""Return probability predictions"""
_, y_pred = self.forward_propagation(X)
return y_pred
def get_parameters(self):
"""Return weights and bias"""
return self.weights, self.bias
# Example usage and testing
def test_perceptron():
# Generate synthetic binary classification dataset
X, y = make_classification(
n_samples=1000,
n_features=2,
n_informative=2,
n_redundant=0,
n_clusters_per_class=1,
random_state=42
)
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Normalize features for better convergence
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
# Initialize and train perceptron
input_size = X_train.shape[1]
perceptron = SingleLayerPerceptron(
input_size=input_size,
learning_rate=0.1,
epochs=500
)
print("Training perceptron...")
perceptron.fit(X_train, y_train)
print("\nTraining completed!")
# Make predictions
y_pred = perceptron.predict(X_test)
y_pred_proba = perceptron.predict_proba(X_test)
# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"\nTest Accuracy: {accuracy:.4f}")
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)
# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Visualize results
plot_results(perceptron, X_train, X_test, y_train, y_test)
return perceptron
def plot_results(model, X_train, X_test, y_train, y_test):
"""Visualize the perceptron's performance"""
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# 1. Plot training loss
axes[0].plot(model.loss_history)
axes[0].set_title('Training Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].grid(True)
# 2. Plot decision boundary on training data
plot_decision_boundary(model, X_train, y_train, axes[1])
axes[1].set_title('Decision Boundary (Training)')
axes[1].set_xlabel('Feature 1')
axes[1].set_ylabel('Feature 2')
# 3. Plot decision boundary on test data
plot_decision_boundary(model, X_test, y_test, axes[2])
axes[2].set_title('Decision Boundary (Test)')
axes[2].set_xlabel('Feature 1')
axes[2].set_ylabel('Feature 2')
plt.tight_layout()
plt.show()
def plot_decision_boundary(model, X, y, ax):
"""Plot decision boundary for 2D data"""
# Create mesh grid
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(
np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02)
)
# Predict for each point in mesh
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot contour and scatter
ax.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap='RdYlBu')
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
# Alternative: Simple perceptron with step activation (without sigmoid)
class SimplePerceptron:
"""Simple perceptron with step activation (no probability outputs)"""
def __init__(self, input_size, learning_rate=0.01, epochs=1000):
self.input_size = input_size
self.learning_rate = learning_rate
self.epochs = epochs
self.weights = np.zeros(input_size)
self.bias = 0
def activation(self, x):
"""Step activation function"""
return 1 if x >= 0 else 0
def fit(self, X, y):
"""Train using perceptron learning rule"""
for epoch in range(self.epochs):
errors = 0
for xi, target in zip(X, y):
# Forward pass
linear_output = np.dot(xi, self.weights) + self.bias
prediction = self.activation(linear_output)
# Update if prediction is wrong
update = self.learning_rate * (target - prediction)
if update != 0:
self.weights += update * xi
self.bias += update
errors += 1
# Stop early if no errors
if errors == 0:
print(f"Converged after {epoch + 1} epochs")
break
def predict(self, X):
"""Make binary predictions"""
predictions = []
for xi in X:
linear_output = np.dot(xi, self.weights) + self.bias
predictions.append(self.activation(linear_output))
return np.array(predictions)
if __name__ == "__main__":
# Test the perceptron
perceptron = test_perceptron()
# Example of using the trained model on new data
print("\n" + "="*50)
print("Example: Making predictions on new data")
print("="*50)
# Create some new test data
new_data = np.array([[1.5, -0.5], [-1.0, 1.0], [0.0, 0.0]])
new_data_normalized = (new_data - np.mean(new_data, axis=0)) / np.std(new_data, axis=0)
predictions = perceptron.predict(new_data_normalized)
probabilities = perceptron.predict_proba(new_data_normalized)
for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
print(f"Sample {i+1}: Prediction = {pred}, Probability = {prob:.4f}")
Comments