def do_rnn_wordbag(trainX, testX, trainY, testY):
y_test=testY
#trainX = pad_sequences(trainX, maxlen=100, value=0.)
#testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)
# Network building
net = tflearn.input_data([None, 100])
net = tflearn.embedding(net, input_dim=1000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.1)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.005,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, validation_set=0.1, show_metric=True,
batch_size=1,run_id="uba",n_epoch=10)
y_predict_list = model.predict(testX)
#print y_predict_list
y_predict = []
for i in y_predict_list:
#print i[0]
if i[0] >= 0.5:
y_predict.append(0)
else:
y_predict.append(1)
print(classification_report(y_test, y_predict))
print metrics.confusion_matrix(y_test, y_predict)
print y_train
print "ture"
print y_test
print "pre"
print y_predict
传统方法贝叶斯:
def do_nb(x_train, x_test, y_train, y_test):
gnb = GaussianNB()
gnb.fit(x_train,y_train)
y_pred=gnb.predict(x_test)
print(classification_report(y_test, y_pred))
print metrics.confusion_matrix(y_test, y_pred)
传统方法hmm:
def do_hmm(trainX, testX, trainY, testY):
T=-580
N=2
lengths=[1]
X=[[0]]
print len(trainX)
for i in trainX:
z=[]
for j in i:
z.append([j])
#print z
#X.append(z)
X=np.concatenate([X,np.array(z)])
lengths.append(len(i))
#print lengths
#print X.shape
remodel = hmm.GaussianHMM(n_components=N, covariance_type="full", n_iter=100)
remodel.fit(X, lengths)
y_predict=[]
for i in testX:
z=[]
for j in i:
z.append([j])
y_pred=remodel.score(z)
print y_pred
if y_pred < T:
y_predict.append(1)
else:
y_predict.append(0)
y_predict=np.array(y_predict)
print(classification_report(testY, y_predict))
print metrics.confusion_matrix(testY, y_predict)
print testY
print y_predict