# Split the data into train, val, and test sets. In addition we will # create a small development set as a subset of the training data; # we can use this for development so our code runs faster. num_training = 49000 num_validation = 1000 num_test = 1000 num_dev = 500
# Our validation set will be num_validation points from the original # training set. mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask]
# Our training set will be the first num_train points from the original # training set. mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask]
# We will also make a development set, which is a small subset of # the training set. mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask]
# We use the first num_test points of the original test set as our # test set. mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask]
# Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shheape) print('Test labels shape: ', y_test.shape) print('dev data shape: ', X_dev.shape)
Train data shape: (49000,3072) Train labels shape: (49000,)
Validation data shape: (1000, 3072) Validation labels shape: (1000,)
Test data shape: (1000, 3072) Test labels shape: (1000,)
# Preprocessing: subtract the mean image # first: compute the image mean based on the training data mean_image = np.mean(X_train, axis=0) print(mean_image[:10]) # print a few of the elements plt.figure(figsize=(4,4)) plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) # visualize the mean image plt.show()
# second: subtract the mean image from train and test data #减去均值,标准化 X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image
# third: append the bias dimension of ones (i.e. bias trick) so that our SVM # only has to worry about optimizing a single weight matrix W. X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
defsvm_loss_naive(W, X, y, reg): """ Structured SVM loss function, naive implementation (with loops). Inputs have dimension D, there are C classes, and we operate on minibatches of N examples. Inputs: - W: A numpy array of shape (D, C) containing weights. - X: A numpy array of shape (N, D) containing a minibatch of data. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label c, where 0 <= c < C. - reg: (float) regularization strength Returns a tuple of: - loss as single float - gradient with respect to weights W; an array of same shape as W """ import torch as t dW = np.zeros(W.shape) # initialize the gradient as zero X = t.from_numpy(X) W = t.from_numpy(W) W.requires_grad=True y = t.from_numpy(y) # compute the loss and the gradient num_classes = W.shape[1] num_train = X.shape[0] loss = t.tensor([0.0],requires_grad=True) for i inrange(num_train): scores = t.mm(X[i].unsqueeze(0),W) correct_class_score = scores[0][y[i]] for j inrange(num_classes): if j == y[i]: continue margin = scores[0][j] - correct_class_score + 1# note delta = 1 if margin > 0: loss = margin +loss
# Right now the loss is a sum over all training examples, but we want it # to be an average instead so we divide by num_train. loss = loss/num_train
# Add regularization to the loss. loss = reg * ((W * W).sum())+loss
############################################################################# # TODO: # # Compute the gradient of the loss function and store it dW. # # Rather that first computing the loss and then computing the derivative, # # it may be simpler to compute the derivative at the same time that the # # loss is being computed. As a result you may need to modify some of the # # code above to compute the gradient. # ############################################################################# # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
loss.backward() dW = W.grad # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW
调用试试
1 2 3 4 5 6 7 8 9
# Evaluate the naive implementation of the loss we provided for you: from cs231n.classifiers.linear_svm import svm_loss_naive import time
# generate a random SVM weight matrix of small numbers W = np.random.randn(3073, 10) * 0.0001
# Once you've implemented the gradient, recompute it with the code below # and gradient check it with the function we provided for you
# Compute the loss and its gradient at W. loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)
# Numerically compute the gradient along several randomly chosen dimensions, and # compare them with your analytically computed gradient. The numbers should match # almost exactly along all dimensions. from cs231n.gradient_check import grad_check_sparse f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad)
# do the gradient check once again with regularization turned on # you didn't forget the regularization gradient did you? loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1) f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad)