-
Book Overview & Buying
-
Table Of Contents
-
Feedback & Rating

Deep Learning for Natural Language Processing
By :

Solution:
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras import layers
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
epochs = 20
maxlen = 100
embedding_dim = 50
num_filters = 64
kernel_size = 5
batch_size = 32
data = pd.read_csv('data/sentiment labelled sentences/yelp_labelled.txt',names=['sentence', 'label'], sep='\t')
data.head()
Printing this out on a Jupyter notebook should display:
sentences=data['sentence'].values
labels=data['label'].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
sentences, labels, test_size=0.30, random_state=1000)
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
vocab_size = len(tokenizer.word_index) + 1 #The vocabulary size has an additional 1 due to the 0 reserved index
X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)
model = Sequential()
model.add(layers.Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(layers.Conv1D(num_filters, kernel_size, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
The above code should yield
The model can be visualized as follows as well:
model.fit(X_train, y_train,
epochs=epochs,
verbose=False,
validation_data=(X_test, y_test),
batch_size=batch_size)
loss, accuracy = model.evaluate(X_train, y_train, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))
loss, accuracy = model.evaluate(X_test, y_test, verbose=False)
print("Testing Accuracy: {:.4f}".format(accuracy))
The accuracy output should be as follows:
Change the font size
Change margin width
Change background colour