eye_for_blind / inference_script.py
krishnapal2308
pipeline to manual
7301eb7
raw
history blame contribute delete
4.68 kB
import numpy as np
import tensorflow as tf
import keras
from keras.models import Model
import warnings
warnings.filterwarnings('ignore')
class Encoder(Model):
def __init__(self, embed_dim):
super(Encoder, self).__init__()
self.dense = tf.keras.layers.Dense(embed_dim)
def call(self, features):
features = self.dense(features)
features = tf.keras.activations.relu(features)
return features
class Attention_model(Model):
def __init__(self, units):
super(Attention_model, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
self.units = units
def call(self, features, hidden):
hidden_with_time_axis = hidden[:, tf.newaxis]
score = tf.keras.activations.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
attention_weights = tf.keras.activations.softmax(self.V(score), axis=1)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class Decoder(Model):
def __init__(self, embed_dim, units, vocab_size):
super(Decoder, self).__init__()
self.units = units
self.attention = Attention_model(self.units)
self.embed = tf.keras.layers.Embedding(vocab_size, embed_dim)
self.gru = tf.keras.layers.GRU(self.units, return_sequences=True, return_state=True,
recurrent_initializer='glorot_uniform')
self.d1 = tf.keras.layers.Dense(self.units)
self.d2 = tf.keras.layers.Dense(vocab_size)
def call(self, x, features, hidden):
context_vector, attention_weights = self.attention(features, hidden)
embed = self.embed(x)
embed = tf.concat([tf.expand_dims(context_vector, 1), embed], axis=-1)
output, state = self.gru(embed)
output = self.d1(output)
output = tf.reshape(output, (-1, output.shape[2]))
output = self.d2(output)
return output, state, attention_weights
def init_state(self, batch_size):
return tf.zeros((batch_size, self.units))
def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units))
# Loading the tokenizer
with open("efb-requirements/tokenizer.json", 'r', encoding='utf-8') as f:
loaded_tokenizer_json = f.read()
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json)
def load_and_process_image(image, target_size=(299, 299)):
img = tf.convert_to_tensor(image)
img = tf.cast(img, tf.uint8)
img = tf.image.resize(img, target_size)
img = tf.keras.applications.inception_v3.preprocess_input(img)
return img
image_features_extract_model = keras.models.load_model("efb-requirements/inception_v3.h5")
embedding_dim = 256
units = 512
vocab_size = 5001
encoder = Encoder(embedding_dim)
decoder = Decoder(embedding_dim, units, vocab_size)
# Creating dummy inputs
dummy_img_input = tf.ones((32, 64, 2048))
features = encoder(dummy_img_input)
hidden = decoder.init_state(32)
dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * 32, 1)
dec = decoder(dec_input, features, hidden)
# Loading saved models
encoder.load_weights("efb-requirements/encoder_50epoch_weights.h5")
decoder.load_weights("efb-requirements/decoder_50epoch_weights.h5")
def evaluate(image):
max_length = 39
attention_plot = np.zeros((max_length, 64))
hidden = decoder.reset_state(batch_size=1)
# processing the input image to desired format before extracting features
temp_input = tf.expand_dims(load_and_process_image(image), 0)
img_tensor_val = image_features_extract_model(temp_input)
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
cnn_features = encoder(img_tensor_val)
decoder_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
result = []
predictions = ''
for i in range(max_length):
predictions, hidden, attention_weights = decoder(decoder_input, cnn_features, hidden)
attention_plot[i] = tf.reshape(attention_weights, (-1,)).numpy()
predicted_id = tf.argmax(predictions[0]).numpy()
result.append(tokenizer.index_word[predicted_id])
if tokenizer.index_word[predicted_id] == '<end>':
# return result, attention_plot, predictions
return result
decoder_input = tf.expand_dims([predicted_id], 0)
attention_plot = attention_plot[:len(result), :]
# return result, attention_plot, predictions
return result