In: Computer Science
coefficients = np.array(initial_coefficients)
np.random.seed(seed=1)
permutation = np.random.permutation(len(feature_matrix))
feature_matrix = feature_matrix[permutation,:]
sentiment = sentiment[permutation]
i = 0
for itr in xrange(max_iter):
predictions = predict_probability(feature_matrix[i:(i+batch_size),:], coefficients)
indicator = (sentiment[i:i+batch_size]==+1)
errors = indicator - predictions
for j in xrange(len(coefficients)):
derivative = feature_derivative(errors, feature_matrix[i:i+batch_size,j])
coefficients[j] += step_size * derivative * 1. / batch_size
lp = compute_avg_log_likelihood(feature_matrix[i:i+batch_size,:], sentiment[i:i+batch_size],
coefficients)
log_likelihood_all.append(lp)
if itr <= 15 or (itr <= 1000 and itr % 100 == 0) or (itr <= 10000 and itr % 1000 == 0) \
or itr % 10000 == 0 or itr == max_iter-1:
data_size = len(feature_matrix)
print 'Iteration %*d: Average log likelihood (of data points [%0*d:%0*d]) = %.8f' % \
(int(np.ceil(np.log10(max_iter))), itr, \
int(np.ceil(np.log10(data_size))), i, \
int(np.ceil(np.log10(data_size))), i+batch_size, lp)
i += batch_size
if i+batch_size > len(feature_matrix):
permutation = np.random.permutation(len(feature_matrix))
feature_matrix = feature_matrix[permutation,:]
sentiment = sentiment[permutation]
i = 0
return coefficients, log_likelihood_all
sample_feature_matrix = np.array([[1.,2.,-1.], [1.,0.,1.]])
sample_sentiment = np.array([+1, -1])
coefficients, log_likelihood = logistic_regression_SG(sample_feature_matrix, sample_sentiment, np.zeros(3), step_si
###
Now run batch gradient ascent over the feature_matrix_train for 200 iterations using:
initial_coefficients = np.zeros(194)
step_size = 5e-1
batch_size = len(feature_matrix_train)
max_iter = 200
coefficients_batch, log_likelihood_batch = logistic_regression_SG(feature_matrix_train, sentiment_train,\
initial_coefficients=np.zeros(194),\
step_size=5e-1, batch_size=len(feature_matrix_train), max_iter=200)
Iteration 0: Average log likelihood (of data points [00000:47780]) = -0.68308119
Iteration 1: Average log likelihood (of data points [00000:47780]) = -0.67394599
Iteration 2: Aver
age log likelihood (of data points [00000:47780]) = -0.66555129
Iteration 3: Average log likelihood (of data points [00000:47780]) = -0.65779626
Iteration 4: Average log likelihood (of data points [00000:47780]) = -0.65060701
Iteration 5: Average log likelihood (of data points [00000:47780]) = -0.64392241
Iteration 6: Average log likelihood (of data points [00000:47780]) = -0.63769009
Iteration 7: Average log likelihood (of data points [00000:47780]) = -0.63186462
Iteration 8: Average log likelihood (of data points [00000:47780]) = -0.62640636
Iteration 9: Average log likelihood (of data points [00000:47780]) = -0.62128063
Iteration 10: Average log likelihood (of data points [00000:47780]) = -0.61645691
Iteration 11: Average log likelihood (of data points [00000:47780]) = -0.61190832
Iteration 12: Average log likelihood (of data points [00000:47780]) = -0.60761103
Iteration 13: Average log likelihood (of data points [00000:47780]) = -0.60354390
Iteration 14: Average log likelihood (of data points [00000:47780]) = -0.59968811
Iteration 15: Average log likelihood (of data points [00000:47780]) = -0.59602682
Iteration 100: Average log likelihood (of data points [00000:47780]) = -0.49520194
Iteration 199: Average log likelihood (of data points [00000:47780]) = -0.47126953
plt.plot(log_likelihood_batch)
plt.show()