Spaces:

sounar
/

ContactDoctor-API

Paused

App Files Files Community

sounar commited on Nov 18, 2024

Commit

9998c92

verified ·

1 Parent(s): 2738939

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -16

app.py CHANGED Viewed

@@ -11,37 +11,38 @@ model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
 # Load the Hugging Face model and tokenizer with required arguments
 tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    use_auth_token=api_token,  # Authenticate with Hugging Face token
-    trust_remote_code=True     # Allow custom code from the repository
 )
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    use_auth_token=api_token,  # Authenticate with Hugging Face token
-    trust_remote_code=True     # Allow custom code from the repository
 )
 # Define the function to process user input
 def generate_response(input_text):
     try:
         # Tokenize the input text
         inputs = tokenizer(input_text, return_tensors="pt")
         # Generate a response using the model
         outputs = model.generate(
             inputs["input_ids"],
-            max_length=256,  # Limit the output length
-            num_return_sequences=1,  # Generate a single response
-            temperature=0.7,  # Adjust for creativity vs. determinism
-            top_p=0.9,  # Nucleus sampling
-            top_k=50  # Top-k sampling
         )
         # Decode and return the generated text
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
     except Exception as e:
         return f"Error: {str(e)}"
@@ -52,7 +53,7 @@ iface = gr.Interface(
     outputs="text",
     title="ContactDoctor Medical Assistant",
     description="Provide input symptoms or queries and get AI-powered medical advice.",
-    enable_api=True  # Enables API for external calls
 )
 # Launch the Gradio app

 # Load the Hugging Face model and tokenizer with required arguments
 tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    token=api_token,  # Use `token` instead of `use_auth_token`
+    trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    token=api_token,
+    trust_remote_code=True,
+    device_map="auto",  # Efficiently allocate resources
+    torch_dtype=torch.float16  # Use half precision for faster inference
 )
 # Define the function to process user input
 def generate_response(input_text):
     try:
         # Tokenize the input text
         inputs = tokenizer(input_text, return_tensors="pt")
         # Generate a response using the model
         outputs = model.generate(
             inputs["input_ids"],
+            max_length=256,
+            num_return_sequences=1,
+            temperature=0.7,
+            top_p=0.9,
+            top_k=50
         )
         # Decode and return the generated text
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
     except Exception as e:
         return f"Error: {str(e)}"
     outputs="text",
     title="ContactDoctor Medical Assistant",
     description="Provide input symptoms or queries and get AI-powered medical advice.",
+    enable_api=True
 )
 # Launch the Gradio app