CS计算机代考程序代写 cuda Logistic Regression with L2 Regularization¶

Logistic Regression with L2 Regularization¶

Implementation of classic logistic regression for binary class labels.

Imports¶
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn.functional as F

device = torch.device(‘cuda:0’ if torch.cuda.is_available() else ‘cpu’)
LAMBDA = 2

Preparing a toy dataset¶
In [ ]:
##########################
### DATASET
##########################

data = np.genfromtxt(‘data/toydata.txt’, delimiter=’\t’)
x = data[:, :2].astype(np.float32)
y = data[:, 2].astype(np.int64)

np.random.seed(123)
idx = np.arange(y.shape[0])
np.random.shuffle(idx)
X_test, y_test = x[idx[:25]], y[idx[:25]]
X_train, y_train = x[idx[25:]], y[idx[25:]]
mu, std = np.mean(X_train, axis=0), np.std(X_train, axis=0)
X_train, X_test = (X_train – mu) / std, (X_test – mu) / std

fig, ax = plt.subplots(1, 2, figsize=(7, 2.5))
ax[0].scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1])
ax[0].scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1])
ax[1].scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1])
ax[1].scatter(X_test[y_test == 0, 0], X_test[y_test == 0, 1])
plt.xlim([x[:, 0].min()-0.5, x[:, 0].max()+0.5])
plt.ylim([x[:, 1].min()-0.5, x[:, 1].max()+0.5])
plt.show()






L2-Regularized Logistic Regression via weight_decay¶
In [ ]:
def custom_where(cond, x_1, x_2):
return (cond * x_1) + ((1-cond) * x_2)

class LogisticRegression(torch.nn.Module):

def __init__(self, num_features):
super(LogisticRegression, self).__init__()
self.linear = torch.nn.Linear(num_features, 1)
# initialize weights to zeros here,
# since we used zero weights in the
# manual approach

self.linear.weight.detach().zero_()
self.linear.bias.detach().zero_()
# Note: the trailing underscore
# means “in-place operation” in the context
# of PyTorch

def forward(self, x):
logits = self.linear(x)
probas = torch.sigmoid(logits)
return probas

model = LogisticRegression(num_features=2).to(device)

#########################################################
## Apply L2 regularization
optimizer = torch.optim.SGD(model.parameters(),
lr=0.1,
weight_decay=LAMBDA)
#——————————————————-

def comp_accuracy(label_var, pred_probas):
pred_labels = custom_where((pred_probas > 0.5).float(), 1, 0).view(-1)
acc = torch.sum(pred_labels == label_var.view(-1)).float() / label_var.size(0)
return acc

num_epochs = 30

X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device).view(-1, 1)

for epoch in range(num_epochs):

#### Compute outputs ####
out = model(X_train_tensor)

#### Compute gradients ####
cost = F.binary_cross_entropy(out, y_train_tensor, reduction=’sum’)
optimizer.zero_grad()
cost.backward()

#### Update weights ####
optimizer.step()

#### Logging ####
pred_probas = model(X_train_tensor)
acc = comp_accuracy(y_train_tensor, pred_probas)
print(‘Epoch: %03d’ % (epoch + 1), end=””)
print(‘ | Train ACC: %.3f’ % acc, end=””)
print(‘ | Cost: %.3f’ % F.binary_cross_entropy(pred_probas, y_train_tensor))

print(‘\nModel parameters:’)
print(‘ Weights: %s’ % model.linear.weight)
print(‘ Bias: %s’ % model.linear.bias)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)

pred_probas = model(X_test_tensor)
test_acc = comp_accuracy(y_test_tensor, pred_probas)

print(‘\n\nTest set accuracy: %.2f%%’ % (test_acc*100))

L2-Regularized Logistic Regression via Manual Regularization¶
In [ ]:
model = LogisticRegression(num_features=2).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

for epoch in range(num_epochs):

#### Compute outputs ####
out = model(X_train_tensor)

#### Compute gradients ####

#########################################################
## Apply L2 regularization (weight decay)
cost = F.binary_cross_entropy(out, y_train_tensor, reduction=’sum’)
cost = cost + 0.5 * LAMBDA * torch.mm(model.linear.weight,
model.linear.weight.t())

# note that PyTorch also regularizes the bias, hence, if we want
# to reproduce the behavior of SGD’s “weight_decay” param, we have to add
# the bias term as well:
cost = cost + 0.5 * LAMBDA * model.linear.bias**2
#——————————————————-

optimizer.zero_grad()
cost.backward()

#### Update weights ####
optimizer.step()

#### Logging ####
pred_probas = model(X_train_tensor)
acc = comp_accuracy(y_train_tensor, pred_probas)
print(‘Epoch: %03d’ % (epoch + 1), end=””)
print(‘ | Train ACC: %.3f’ % acc, end=””)
print(‘ | Cost: %.3f’ % F.binary_cross_entropy(pred_probas, y_train_tensor))

print(‘\nModel parameters:’)
print(‘ Weights: %s’ % model.linear.weight)
print(‘ Bias: %s’ % model.linear.bias)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)

pred_probas = model(X_test_tensor)
test_acc = comp_accuracy(y_test_tensor, pred_probas)

print(‘\n\nTest set accuracy: %.2f%%’ % (test_acc*100))

Note: for easier comparison we plotted the regular cost, not the regularized cost (strictly, plotting the regularized cost is more useful as the regular ost may not always go down while making “progress”)