Spaces:

darylalim
/

granite-document-summarization

Running on Zero

App Files Files Community

Daryl Lim commited on 29 days ago

Commit

d700fcc

1 Parent(s): 55a0a7d

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -11

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ from langchain.schema import Document
 # Transformers imports for IBM Granite model
 import spaces
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # Initialize IBM Granite model and tokenizer
 print("Loading Granite model and tokenizer...")
@@ -31,12 +31,17 @@ model_name = "ibm-granite/granite-3.3-8b-instruct"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Load model with optimization for GPU
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     device_map="auto",
-    torch_dtype=torch.bfloat16,
-    load_in_8bit=True  # Use 8-bit quantization for memory efficiency
 )
 print("Model loaded successfully!")
@@ -198,6 +203,10 @@ def process_document_chunks(texts, batch_size=8):
     except Exception as e:
         print(f"Error in document processing: {str(e)}")
         # Fallback to basic processing if optimization fails
         return FAISS.from_documents(texts, embeddings)
 # Main function to process document and generate summary
@@ -359,10 +368,11 @@ def create_gradio_interface():
         # Add interactivity to show/hide appropriate count selector
         def update_count_visibility(length_type):
-            return {
-                sentence_count: length_type == "Sentences",
-                paragraph_count: length_type == "Paragraphs"
-            }
         length_type.change(
             fn=update_count_visibility,
@@ -370,15 +380,21 @@ def create_gradio_interface():
             outputs=[sentence_count, paragraph_count]
         )
-        # Function to convert paragraph count from string to int and handle capitalized length types
         def process_document_wrapper(file, length_type, sentence_count, paragraph_count):
             # Convert capitalized length_type to lowercase for processing
             length_type_lower = length_type.lower()
             if length_type_lower == "sentences":
-                return process_document(file, length_type_lower, int(sentence_count))
             else:
-                return process_document(file, length_type_lower, int(paragraph_count))
         submit_btn.click(
             fn=process_document_wrapper,
@@ -402,4 +418,4 @@ def create_gradio_interface():
 # Launch the application
 if __name__ == "__main__":
     app = create_gradio_interface()
-    app.launch()

 # Transformers imports for IBM Granite model
 import spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # Initialize IBM Granite model and tokenizer
 print("Loading Granite model and tokenizer...")
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Create quantization config
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,  # Use 4-bit quantization for better memory efficiency
+    bnb_4bit_compute_dtype=torch.bfloat16  # Use bfloat16 for computation with 4-bit quantization
+)
 # Load model with optimization for GPU
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     device_map="auto",
+    quantization_config=quantization_config
 )
 print("Model loaded successfully!")
     except Exception as e:
         print(f"Error in document processing: {str(e)}")
         # Fallback to basic processing if optimization fails
+        embeddings = HuggingFaceEmbeddings(
+            model_name="nomic-ai/nomic-embed-text-v1",
+            model_kwargs={'trust_remote_code': True}
+        )
         return FAISS.from_documents(texts, embeddings)
 # Main function to process document and generate summary
         # Add interactivity to show/hide appropriate count selector
         def update_count_visibility(length_type):
+            is_sentences = length_type == "Sentences"
+            return [
+                gr.update(visible=is_sentences),  # For sentence_count
+                gr.update(visible=not is_sentences)  # For paragraph_count
+            ]
         length_type.change(
             fn=update_count_visibility,
             outputs=[sentence_count, paragraph_count]
         )
+        # Function to handle form submission properly
         def process_document_wrapper(file, length_type, sentence_count, paragraph_count):
             # Convert capitalized length_type to lowercase for processing
             length_type_lower = length_type.lower()
             if length_type_lower == "sentences":
+                count = int(sentence_count)
             else:
+                # Handle potential type issues with paragraph_count
+                if isinstance(paragraph_count, bool):
+                    count = 1  # Default if boolean
+                else:
+                    count = int(paragraph_count)
+            return process_document(file, length_type_lower, count)
         submit_btn.click(
             fn=process_document_wrapper,
 # Launch the application
 if __name__ == "__main__":
     app = create_gradio_interface()
+    app.launch()