Spaces:

carlosabadia
/

WorldPuzzleSolver

Sleeping

App Files Files Community

carlosabadia commited on Oct 8, 2022

Commit

90ecf2e

1 Parent(s): 57a938e

Upload 6 files

Browse files

Files changed (4) hide show

app.py +10 -10
class_names.txt +32 -101
model.py +53 -20
model_1.pth +3 -0

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import os
 import torch
-from model import create_vit16_model
 from timeit import default_timer as timer
 from typing import Tuple, Dict
@@ -14,14 +14,14 @@ with open("class_names.txt", "r") as f: # reading them in from class_names.txt
 ### 2. Model and transforms preparation ###
 # Create model
-vit16, vit16_transforms = create_vit16_model(
-    num_classes=101, # could also use len(class_names)
 )
-vit16.load_state_dict(
     torch.load(
-        f="model_food101_20_percent.pth",
         map_location=torch.device("cpu"),  # load to CPU
     )
 )
@@ -38,13 +38,13 @@ def predict(img) -> Tuple[Dict, float]:
     start_time = timer()
     # Transform the target image and add a batch dimension
-    img = vit16_transforms(img).unsqueeze(0)
     # Put model into evaluation mode and turn on inference mode
-    vit16.eval()
     with torch.inference_mode():
         # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
-        pred_probs = torch.softmax(vit16(img), dim=1)
     # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
     pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
@@ -58,8 +58,8 @@ def predict(img) -> Tuple[Dict, float]:
 ### 4. Gradio app ###
 # Create title, description and article strings
-title = "FoodVision ViT 🍔👁"
-description = "A ViT_B_16 feature extractor computer vision model to classify images of food into 101 different classes using 20% of the data."
 article = ""
 # Create examples list from "examples/" directory

 import os
 import torch
+from model import create_model
 from timeit import default_timer as timer
 from typing import Tuple, Dict
 ### 2. Model and transforms preparation ###
 # Create model
+model_created, model_transforms = create_model(
+    num_classes=len(class_names),
 )
+model_created.load_state_dict(
     torch.load(
+        f="model_1.pth",
         map_location=torch.device("cpu"),  # load to CPU
     )
 )
     start_time = timer()
     # Transform the target image and add a batch dimension
+    img = model_transforms(img).unsqueeze(0)
     # Put model into evaluation mode and turn on inference mode
+    model_created.eval()
     with torch.inference_mode():
         # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
+        pred_probs = torch.softmax(model_created(img), dim=1)
     # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
     pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
 ### 4. Gradio app ###
 # Create title, description and article strings
+title = "World Puzzle Solver"
+description = "A World Puzzle Solver app that uses a PyTorch model to predict the letters in a target image."
 article = ""
 # Create examples list from "examples/" directory

class_names.txt CHANGED Viewed

@@ -1,101 +1,32 @@
-apple_pie
-baby_back_ribs
-baklava
-beef_carpaccio
-beef_tartare
-beet_salad
-beignets
-bibimbap
-bread_pudding
-breakfast_burrito
-bruschetta
-caesar_salad
-cannoli
-caprese_salad
-carrot_cake
-ceviche
-cheese_plate
-cheesecake
-chicken_curry
-chicken_quesadilla
-chicken_wings
-chocolate_cake
-chocolate_mousse
-churros
-clam_chowder
-club_sandwich
-crab_cakes
-creme_brulee
-croque_madame
-cup_cakes
-deviled_eggs
-donuts
-dumplings
-edamame
-eggs_benedict
-escargots
-falafel
-filet_mignon
-fish_and_chips
-foie_gras
-french_fries
-french_onion_soup
-french_toast
-fried_calamari
-fried_rice
-frozen_yogurt
-garlic_bread
-gnocchi
-greek_salad
-grilled_cheese_sandwich
-grilled_salmon
-guacamole
-gyoza
-hamburger
-hot_and_sour_soup
-hot_dog
-huevos_rancheros
-hummus
-ice_cream
-lasagna
-lobster_bisque
-lobster_roll_sandwich
-macaroni_and_cheese
-macarons
-miso_soup
-mussels
-nachos
-omelette
-onion_rings
-oysters
-pad_thai
-paella
-pancakes
-panna_cotta
-peking_duck
-pho
-pizza
-pork_chop
-poutine
-prime_rib
-pulled_pork_sandwich
-ramen
-ravioli
-red_velvet_cake
-risotto
-samosa
-sashimi
-scallops
-seaweed_salad
-shrimp_and_grits
-spaghetti_bolognese
-spaghetti_carbonara
-spring_rolls
-steak
-strawberry_shortcake
-sushi
-tacos
-takoyaki
-tiramisu
-tuna_tartare
-waffles

+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+Á
+É
+Í
+Ñ
+Ó
+Ú

model.py CHANGED Viewed

@@ -1,34 +1,67 @@
 import torch
 import torchvision
 from torch import nn
-def create_vit16_model(num_classes:int=101,
-                          seed:int=42):
-    """Creates an vit16 feature extractor model and transforms.
     Args:
-        num_classes (int, optional): number of classes in the classifier head.
-            Defaults to 3.
         seed (int, optional): random seed value. Defaults to 42.
     Returns:
-        model (torch.nn.Module): vit feature extractor model.
         transforms (torchvision.transforms): vit image transforms.
     """
-    # Create vit pretrained weights, transforms and model
-    weights = torchvision.models.ViT_B_16_Weights.DEFAULT;
-    transforms = weights.transforms()
-    model = torchvision.models.vit_b_16(weights=weights)
-    # Freeze all layers in base model
-    for param in model.parameters():
-        param.requires_grad = False
-    # Change classifier head with random seed for reproducibility
-    torch.manual_seed(seed)
-    model.heads = nn.Sequential(nn.Linear(in_features=768, # keep this the same as original model
-                                          out_features=num_classes)) # update to reflect target number of classes
-    return model, transforms

 import torch
 import torchvision
 from torch import nn
+def create_model(num_classes: int = 32,
+                          seed: int = 42):
+    """Creates a feature extractor model and transforms.
     Args:
+        num_classes (int, optional): number of classes in the classifier head.
+            Defaults to 32.
         seed (int, optional): random seed value. Defaults to 42.
     Returns:
+        model (torch.nn.Module): vit feature extractor model.
         transforms (torchvision.transforms): vit image transforms.
     """
+    IMG_SIZE = 28
+    transforms = transforms.Compose([
+        transforms.Resize((IMG_SIZE, IMG_SIZE)),
+        transforms.Grayscale(num_output_channels=1),
+        transforms.ToTensor()])
+        # Create a convolutional neural network
+    class Model(nn.Module):
+            def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
+                super().__init__()
+                self.block_1 = nn.Sequential(
+                        nn.Conv2d(in_channels=input_shape,
+                                        out_channels=hidden_units,
+                                  kernel_size=3,  # how big is the square that's going over the image?
+                                  stride=1,  # default
+                                        padding=1),  # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number
+                    nn.ReLU(),
+                        nn.Conv2d(in_channels=hidden_units,
+                                  out_channels=hidden_units,
+                                  kernel_size=3,
+                                  stride=1,
+                                  padding=1),
+                    nn.ReLU(),
+                    nn.MaxPool2d(kernel_size=2,
+                                     stride=2)  # default stride value is same as kernel_size
+                )
+                self.block_2 = nn.Sequential(
+                    nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
+                    nn.ReLU(),
+                    nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
+                    nn.ReLU(),
+                    nn.MaxPool2d(2)
+                )
+                self.classifier = nn.Sequential(
+                    nn.Flatten(),
+                        nn.Linear(in_features=hidden_units*7*7,
+                                  out_features=output_shape)
+                )
+            def forward(self, x: torch.Tensor):
+                # x = self.block_1(x)
+                # print(x.shape)
+                # x = self.block_2(x)
+                # print(x.shape)
+                # x = self.classifier(x)
+                # print(x.shape)
+                x = self.classifier(self.block_2(self.block_1(x)))
+                return x
+    return Model, transforms

model_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09d5f31bc58b2ae0b7b58d3730491a2708db1245fbaf688d1d6a3cb1b613ba3d
+size 77575