AA_TT2

Sleeping

App Files Files Community

ahm14 commited on Mar 2

Commit

d4724ab

verified ·

1 Parent(s): 3bcf154

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -35

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from transformers import pipeline
 # Load environment variables
 load_dotenv()
@@ -48,7 +47,6 @@ tone_categories = {
 }
 # Frame categories for fallback method
 # AI-Expanded Frame Categories for More Precise Categorization
 # Expanded Frame Categories for Better Categorization
 frame_categories = {
@@ -58,15 +56,8 @@ frame_categories = {
         "Civil Liberties": ["freedom", "expression", "privacy", "rights violations", "censorship", "surveillance", "press freedom", "free speech", "whistleblower"],
         "State Repression & Human Rights Abuses": ["police brutality", "enforced disappearances", "political prisoners", "arbitrary arrests", "martial law", "crackdowns"],
         "Women's Rights": [
-        "gender equality", "women's empowerment", "reproductive rights",
-        "gender-based violence", "sexual harassment", "domestic violence",
-        "equal pay", "education for women", "child marriage", "women's health",
-        "maternity leave", "women in leadership", "honor killings",
-        "karo kari", "patriarchal oppression", "honor-based violence",
-        "marital violence", "violence against women", "justice for women",
-        "reclaiming women's rights", "female autonomy", "societal control over women",
-        "women's freedom of choice", "women’s bodies, women’s rights",
-        "end honor killings", "violence against women must stop", "say no to patriarchy"]
     },
     "Political & State Accountability": {
         "Corruption & Governance": ["corruption", "government", "policy", "accountability", "transparency", "bribery", "misuse of power", "scandal", "nepotism", "tax fraud"],
@@ -159,8 +150,10 @@ def detect_language(text):
 # Extract tone using Groq API (or fallback method)
 def extract_tone(text):
     try:
-        response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
-                             {"role": "user", "content": text}])
         return response["choices"][0]["message"]["content"].split(", ")
     except Exception as e:
         logging.error(f"Groq API error: {e}")
@@ -183,9 +176,7 @@ def extract_hashtags(text):
 def categorize_frames(frame_list):
     frame_counter = Counter(frame_list)
     categorized_frames = {"Major Focus": [], "Significant Focus": [], "Minor Mention": []}
     sorted_frames = sorted(frame_counter.items(), key=lambda x: x[1], reverse=True)
     for i, (frame, count) in enumerate(sorted_frames):
         if i == 0:  # Highest frequency frame
             categorized_frames["Major Focus"].append(frame)
@@ -193,24 +184,18 @@ def categorize_frames(frame_list):
             categorized_frames["Significant Focus"].append(frame)
         else:
             categorized_frames["Minor Mention"].append(frame)
     return categorized_frames
 # Extract frames using keyword matching and categorize
 def extract_frames_fallback(text):
     detected_frames = []
     text_lower = text.lower()
     # Iterate through the activism topics to match keywords
     for main_category, subcategories in frame_categories.items():
         for subcategory, keywords in subcategories.items():
-            # Check how many keywords from the subcategory are present in the text
             keyword_count = sum(1 for word in keywords if word in text_lower)
             if keyword_count > 0:
-                # Append a tuple with main category and subcategory
                 detected_frames.append((main_category, subcategory))
-    # Categorize detected frames based on their frequency
     return categorize_frames(detected_frames)
 # Extract captions from DOCX
@@ -244,40 +229,78 @@ def merge_metadata_with_generated_data(generated_data, excel_metadata):
         if post_number in generated_data:
             generated_data[post_number].update(post_data)
         else:
-            generated_data[post_number] = post_data
     return generated_data
 # Create DOCX file matching the uploaded format
 def create_docx_from_data(extracted_data):
     doc = Document()
     for post_number, data in extracted_data.items():
         doc.add_heading(post_number, level=1)
         ordered_keys = [
             "Post Number", "Date of Post", "Media Type", "Number of Pictures",
-            "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
-            "Full Caption", "Language", "Tone", "Hashtags", "Frames"
         ]
         for key in ordered_keys:
             value = data.get(key, "N/A")
             if key in ["Tone", "Hashtags"]:
                 value = ", ".join(value) if isinstance(value, list) else value
             elif key == "Frames" and isinstance(value, dict):
-                frame_text = "\n".join([f"  {category}: {', '.join([' → '.join(frame) for frame in frames])}" for category, frames in value.items() if frames])
                 value = f"\n{frame_text}" if frame_text else "N/A"
             doc.add_paragraph(f"**{key}:** {value}")
         doc.add_paragraph("\n")
     return doc
-# Streamlit app
-st.title("AI-Powered Coding Sheet Generator")
 st.write("Enter text or upload a DOCX/Excel file for analysis:")
 input_text = st.text_area("Input Text", height=200)
@@ -285,7 +308,6 @@ uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
 uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
 output_data = {}
 if input_text:
     output_data["Manual Input"] = {
         "Full Caption": input_text,
@@ -317,10 +339,20 @@ if output_data:
             for key, value in data.items():
                 st.write(f"**{key}:** {value}")
-if output_data:
     docx_output = create_docx_from_data(output_data)
     docx_io = io.BytesIO()
     docx_output.save(docx_io)
     docx_io.seek(0)
     st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")

 from langchain_core.prompts import ChatPromptTemplate
 from transformers import pipeline
 # Load environment variables
 load_dotenv()
 }
 # Frame categories for fallback method
 # AI-Expanded Frame Categories for More Precise Categorization
 # Expanded Frame Categories for Better Categorization
 frame_categories = {
         "Civil Liberties": ["freedom", "expression", "privacy", "rights violations", "censorship", "surveillance", "press freedom", "free speech", "whistleblower"],
         "State Repression & Human Rights Abuses": ["police brutality", "enforced disappearances", "political prisoners", "arbitrary arrests", "martial law", "crackdowns"],
         "Women's Rights": [
+            "gender equality", "women's empowerment", "reproductive rights", "gender-based violence", "sexual harassment", "domestic violence", "equal pay", "education for women", "child marriage", "women's health", "maternity leave", "women in leadership", "honor killings", "karo kari", "patriarchal oppression", "honor-based violence", "marital violence", "violence against women", "justice for women", "reclaiming women's rights", "female autonomy", "societal control over women", "women's freedom of choice", "women’s bodies, women’s rights", "end honor killings", "violence against women must stop", "say no to patriarchy"
+        ]
     },
     "Political & State Accountability": {
         "Corruption & Governance": ["corruption", "government", "policy", "accountability", "transparency", "bribery", "misuse of power", "scandal", "nepotism", "tax fraud"],
 # Extract tone using Groq API (or fallback method)
 def extract_tone(text):
     try:
+        response = llm.chat([
+            {"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
+            {"role": "user", "content": text}
+        ])
         return response["choices"][0]["message"]["content"].split(", ")
     except Exception as e:
         logging.error(f"Groq API error: {e}")
 def categorize_frames(frame_list):
     frame_counter = Counter(frame_list)
     categorized_frames = {"Major Focus": [], "Significant Focus": [], "Minor Mention": []}
     sorted_frames = sorted(frame_counter.items(), key=lambda x: x[1], reverse=True)
     for i, (frame, count) in enumerate(sorted_frames):
         if i == 0:  # Highest frequency frame
             categorized_frames["Major Focus"].append(frame)
             categorized_frames["Significant Focus"].append(frame)
         else:
             categorized_frames["Minor Mention"].append(frame)
     return categorized_frames
 # Extract frames using keyword matching and categorize
 def extract_frames_fallback(text):
     detected_frames = []
     text_lower = text.lower()
     # Iterate through the activism topics to match keywords
     for main_category, subcategories in frame_categories.items():
         for subcategory, keywords in subcategories.items():
             keyword_count = sum(1 for word in keywords if word in text_lower)
             if keyword_count > 0:
                 detected_frames.append((main_category, subcategory))
     return categorize_frames(detected_frames)
 # Extract captions from DOCX
         if post_number in generated_data:
             generated_data[post_number].update(post_data)
         else:
+            generated_data[post_number] = post_data
     return generated_data
 # Create DOCX file matching the uploaded format
 def create_docx_from_data(extracted_data):
     doc = Document()
     for post_number, data in extracted_data.items():
         doc.add_heading(post_number, level=1)
         ordered_keys = [
             "Post Number", "Date of Post", "Media Type", "Number of Pictures",
+            "Number of Videos", "Number of Audios", "Likes", "Comments",
+            "Tagged Audience", "Full Caption", "Language", "Tone", "Hashtags", "Frames"
         ]
         for key in ordered_keys:
             value = data.get(key, "N/A")
             if key in ["Tone", "Hashtags"]:
                 value = ", ".join(value) if isinstance(value, list) else value
             elif key == "Frames" and isinstance(value, dict):
+                frame_text = "\n".join([f" {category}: {', '.join([' → '.join(frame) for frame in frames])}" for category, frames in value.items() if frames])
                 value = f"\n{frame_text}" if frame_text else "N/A"
             doc.add_paragraph(f"**{key}:** {value}")
         doc.add_paragraph("\n")
+    return doc
+# --------------------------
+# New functions for Frame Analysis
+# --------------------------
+# Aggregate frames from all posts into a simple dictionary (Frame 1: category, etc.)
+def aggregate_frames(output_data):
+    aggregated = {}
+    counter = 1
+    for post_data in output_data.values():
+        frames = post_data.get("Frames")
+        if frames and isinstance(frames, dict):
+            for category in ["Major Focus", "Significant Focus", "Minor Mention"]:
+                if category in frames and frames[category]:
+                    for frame in frames[category]:
+                        if isinstance(frame, tuple):
+                            frame_str = " → ".join(frame)
+                        else:
+                            frame_str = str(frame)
+                        aggregated[f"Frame {counter}"] = category
+                        counter += 1
+    return aggregated
+# Create a DOCX file for frame analysis with a table
+def create_frame_analysis_docx(frames_data):
+    doc = Document()
+    doc.add_heading("Frame Analysis", level=1)
+    table = doc.add_table(rows=1, cols=5)
+    table.style = 'Table Grid'
+    hdr_cells = table.rows[0].cells
+    hdr_cells[0].text = "Frame"
+    hdr_cells[1].text = "Major Focus"
+    hdr_cells[2].text = "Significant Focus"
+    hdr_cells[3].text = "Minor Mention"
+    hdr_cells[4].text = "Not Applicable"
+    for frame, category in frames_data.items():
+        row_cells = table.add_row().cells
+        row_cells[0].text = frame
+        row_cells[1].text = "✔ Major Focus" if category == "Major Focus" else "Major Focus"
+        row_cells[2].text = "✔ Significant Focus" if category == "Significant Focus" else "Significant Focus"
+        row_cells[3].text = "✔ Minor Mention" if category == "Minor Mention" else "Minor Mention"
+        row_cells[4].text = "✔ Not Applicable" if category == "Not Applicable" else "Not Applicable"
     return doc
+# --------------------------
+# Streamlit App
+# --------------------------
+st.title("AI-Powered Coding Sheet Generator")
 st.write("Enter text or upload a DOCX/Excel file for analysis:")
 input_text = st.text_area("Input Text", height=200)
 uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
 output_data = {}
 if input_text:
     output_data["Manual Input"] = {
         "Full Caption": input_text,
             for key, value in data.items():
                 st.write(f"**{key}:** {value}")
+    # Create and offer download for merged analysis DOCX
     docx_output = create_docx_from_data(output_data)
     docx_io = io.BytesIO()
     docx_output.save(docx_io)
     docx_io.seek(0)
     st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")
+    # Aggregate frames and create frame analysis DOCX
+    frames_data = aggregate_frames(output_data)
+    if frames_data:
+        frame_docx = create_frame_analysis_docx(frames_data)
+        frame_docx_io = io.BytesIO()
+        frame_docx.save(frame_docx_io)
+        frame_docx_io.seek(0)
+        st.download_button("Download Frame Analysis DOCX", data=frame_docx_io, file_name="frame_analysis.docx")