Appendix | Deep Learning for Natural Language Processing

Chapter 8: State of the art in Natural Language Processing

Activity 11: Build a Text Summarization Model

Solution:

Import the necessary Python packages and classes.
import os
import re
import pdb
import string
import numpy as np
import pandas as pd
from keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline
Load the dataset and read the file.
path_data = "news_summary_small.csv"
df_text_file = pd.read_csv(path_data)
df_text_file.headlines = df_text_file.headlines.str.lower()
df_text_file.text = df_text_file.text.str.lower()
lengths_text = df_text_file.text.apply(len)
dataset = list(zip(df_text_file.text.values, df_text_file.headlines.values))
Make vocab dictionary.
input_texts = []
target_texts = []
input_chars = set()
target_chars = set()
for line in dataset:
input_text, target_text = list(line[0]), list(line[1])
target_text = ['BEGIN_'] + target_text + ['_END']
input_texts.append(input_text)
target_texts.append(target_text)

for character in input_text:
if character not in input_chars:
input_chars.add(character)
for character in target_text:
if character not in target_chars:
target_chars.add(character)
input_chars.add("<unk>")
input_chars.add("<pad>")
target_chars.add("<pad>")
input_chars = sorted(input_chars)
target_chars = sorted(target_chars)
human_vocab = dict(zip(input_chars, range(len(input_chars))))
machine_vocab = dict(zip(target_chars, range(len(target_chars))))
inv_machine_vocab = dict(enumerate(sorted(machine_vocab)))
def string_to_int(string_in, length, vocab):
"""
Converts all strings in the vocabulary into a list of integers representing the positions of the
input string's characters in the "vocab"
Arguments:
string -- input string
length -- the number of time steps you'd like, determines if the output will be padded or cut
vocab -- vocabulary, dictionary used to index every character of your "string"
Returns:
rep -- list of integers (or '<unk>') (size = length) representing the position of the string's character in the vocabulary
"""
Convert lowercase to standardize.
string_in = string_in.lower()
string_in = string_in.replace(',','')
if len(string_in) > length:
string_in = string_in[:length]
rep = list(map(lambda x: vocab.get(x, '<unk>'), string_in))
if len(string_in) < length:
rep += [vocab['<pad>']] * (length - len(string_in))

return rep
def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):
X, Y = zip(*dataset)
X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
print("X shape from preprocess: {}".format(X.shape))
Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))
return X, np.array(Y), Xoh, Yoh
def softmax(x, axis=1):
"""Softmax activation function.
# Arguments
x : Tensor.
axis: Integer, axis along which the softmax normalization is applied.
# Returns
Tensor, output of softmax transformation.
# Raises
ValueError: In case 'dim(x) == 1'.
"""
ndim = K.ndim(x)
if ndim == 2:
return K.softmax(x)
elif ndim > 2:
e = K.exp(x - K.max(x, axis=axis, keepdims=True))
s = K.sum(e, axis=axis, keepdims=True)
return e / s
else:
raise ValueError('Cannot apply softmax to a tensor that is 1D')
Run the previous code snippet to load data, get vocab dictionaries and define some utility functions to be used later. Define length of input characters and output characters.
Tx = 460
Ty = 75
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)
Define the model functions (Repeator, Concatenate, Densors, Dotor)
# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes = 1)
Define one-step-attention function:
def one_step_attention(h, s_prev):
"""
Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
"alphas" and the hidden states "h" of the Bi-LSTM.

Arguments:
h -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_h)
s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)

Returns:
context -- context vector, input of the next (post-attetion) LSTM cell
"""
Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a"
s_prev = repeator(s_prev)
Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
concat = concatenator([h, s_prev])
Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e.
e = densor1(concat)
Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies.
energies = densor2(e)
Use "activator" on "energies" to compute the attention weights "alphas"
alphas = activator(energies)
Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell
context = dotor([alphas, h])

return context
Define the number of hidden states for decoder and encoder.
n_h = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)
Define the model architecture and run it to obtain a model.
def model(Tx, Ty, n_h, n_s, human_vocab_size, machine_vocab_size):
"""
Arguments:
Tx -- length of the input sequence
Ty -- length of the output sequence
n_h -- hidden state size of the Bi-LSTM
n_s -- hidden state size of the post-attention LSTM
human_vocab_size -- size of the python dictionary "human_vocab"
machine_vocab_size -- size of the python dictionary "machine_vocab"
Returns:
model -- Keras model instance
"""
Define the inputs of your model with a shape (Tx,)
Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
X = Input(shape=(Tx, human_vocab_size), name="input_first")
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
s = s0
c = c0
Initialize empty list of outputs
outputs = []
Define your pre-attention Bi-LSTM. Remember to use return_sequences=True.
a = Bidirectional(LSTM(n_h, return_sequences=True))(X)

# Iterate for Ty steps
for t in range(Ty):

# Perform one step of the attention mechanism to get back the context vector at step t
context = one_step_attention(h, s)
Apply the post-attention LSTM cell to the "context" vector.
# Pass: initial_state = [hidden state, cell state]
s, _, c = post_activation_LSTM_cell(context, initial_state = [s,c])
Apply Dense layer to the hidden state output of the post-attention LSTM
out = output_layer(s)
Append "out" to the "outputs" list
outputs.append(out)
Create model instance taking three inputs and returning the list of outputs.
model = Model(inputs=[X, s0, c0], outputs=outputs)

return model
model = model(Tx, Ty, n_h, n_s, len(human_vocab), len(machine_vocab))
#Define model loss functions and other hyperparameters. Also #initialize decoder state vectors.
opt = Adam(lr = 0.005, beta_1=0.9, beta_2=0.999, decay = 0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
s0 = np.zeros((10000, n_s))
c0 = np.zeros((10000, n_s))
outputs = list(Yoh.swapaxes(0,1))
Fit the model to our data:
model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)
#Run inference step for the new text.
EXAMPLES = ["Last night a meteorite was seen flying near the earth's moon."]
for example in EXAMPLES:

source = string_to_int(example, Tx, human_vocab)
source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source)))
source = source[np.newaxis, :]
prediction = model.predict([source, s0, c0])
prediction = np.argmax(prediction, axis = -1)
output = [inv_machine_vocab[int(i)] for i in prediction]

print("source:", example)
print("output:", ''.join(output))
The output is as follows:

Deep Learning for Natural Language Processing

By : Karthiek Reddy Bokka, Shubhangi Hora , Tanuj Jain, Monicah Wambugu

Deep Learning for Natural Language Processing

By: Karthiek Reddy Bokka, Shubhangi Hora , Tanuj Jain, Monicah Wambugu

Overview of this book

Chapter 8: State of the art in Natural Language Processing

Activity 11: Build a Text Summarization Model

Figure 8.18: Text summarization model output

Unlock full access

Continue reading for free

Deep Learning for Natural Language Processing

By : Karthiek Reddy Bokka, Shubhangi Hora , Tanuj Jain, Monicah Wambugu

Deep Learning for Natural Language Processing

By: Karthiek Reddy Bokka, Shubhangi Hora , Tanuj Jain, Monicah Wambugu

Overview of this book

Chapter 8: State of the art in Natural Language Processing

Activity 11: Build a Text Summarization Model

Figure 8.18: Text summarization model output

Unlock full access

Continue reading for free

Create a Note

Delete Bookmark

Delete Note

Edit Note

Confirmation

Buy this book with your credits?