Spaces:

Ravenok
/

statosphere-backend

Running on Zero

App Files Files Community

Lord-Raven commited on Aug 11, 2024

Commit

0cca822

1 Parent(s): 19acbf6

Trying to use ONNX model.

Browse files

Files changed (2) hide show

app.py +2 -68
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,69 +1,13 @@
 import gradio
 import json
 import torch
-from transformers import pipeline
 from transformers import AutoTokenizer
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from onnxruntime import (
-    InferenceSession, SessionOptions, GraphOptimizationLevel
-)
-from transformers import (
-    TokenClassificationPipeline, AutoTokenizer, AutoModelForTokenClassification
-)
 class OnnxTokenClassificationPipeline(TokenClassificationPipeline):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-    def _forward(self, model_inputs):
-        """
-        Forward pass through the model. This method is not to be called by the user directly and is only used
-        by the pipeline to perform the actual predictions.
-        This is where we will define the actual process to do inference with the ONNX model and the session created
-        before.
-        """
-        # This comes from the original implementation of the pipeline
-        special_tokens_mask = model_inputs.pop("special_tokens_mask")
-        offset_mapping = model_inputs.pop("offset_mapping", None)
-        sentence = model_inputs.pop("sentence")
-        inputs = {k: v.cpu().detach().numpy() for k, v in model_inputs.items()} # dict of numpy arrays
-        outputs_name = session.get_outputs()[0].name # get the name of the output tensor
-        logits = session.run(output_names=[outputs_name], input_feed=inputs)[0] # run the session
-        logits = torch.tensor(logits) # convert to torch tensor to be compatible with the original implementation
-        return {
-            "logits": logits,
-            "special_tokens_mask": special_tokens_mask,
-            "offset_mapping": offset_mapping,
-            "sentence": sentence,
-            **model_inputs,
-        }
-    # We need to override the preprocess method because the onnx model is waiting for the attention masks as inputs
-    # along with the embeddings.
-    def preprocess(self, sentence, offset_mapping=None):
-        truncation = True if self.tokenizer.model_max_length and self.tokenizer.model_max_length > 0 else False
-        model_inputs = self.tokenizer(
-            sentence,
-            return_attention_mask=True, # This is the only difference from the original implementation
-            return_tensors=self.framework,
-            truncation=truncation,
-            return_special_tokens_mask=True,
-            return_offsets_mapping=self.tokenizer.is_fast,
-        )
-        if offset_mapping:
-            model_inputs["offset_mapping"] = offset_mapping
-        model_inputs["sentence"] = sentence
-        return model_inputs
 # CORS Config
 app = FastAPI()
@@ -75,19 +19,9 @@ app.add_middleware(
     allow_headers=["*"],
 )
-options = SessionOptions()
-options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
-session = InferenceSession("onnx/model.onnx", sess_options=options, providers=["CPUExecutionProvider"])
-session.disable_fallback()
 model_name = "xenova/mobilebert-uncased-mnli"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForTokenClassification.from_pretrained(model_name)
-classifier = OnnxTokenClassificationPipeline(task="zero-shot-classification", model=model, tokenizer=tokenizer, framework="pt", aggregation_strategy="simple")
 def zero_shot_classification(data_string):
     print(data_string)

 import gradio
 import json
 import torch
 from transformers import AutoTokenizer
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from onnx_transformers import pipeline
 class OnnxTokenClassificationPipeline(TokenClassificationPipeline):
 # CORS Config
 app = FastAPI()
     allow_headers=["*"],
 )
 model_name = "xenova/mobilebert-uncased-mnli"
+classifier = pipeline(task="zero-shot-classification", model=model_name, onnx=True)
 def zero_shot_classification(data_string):
     print(data_string)

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ fastapi==0.88.0
 json5==0.9.10
 numpy==1.23.4
 onnxruntime==1.18.1
 torch==1.12.1
 torchvision==0.13.1
 transformers==4.44.0

 json5==0.9.10
 numpy==1.23.4
 onnxruntime==1.18.1
+onnx_transformers
 torch==1.12.1
 torchvision==0.13.1
 transformers==4.44.0