|
import streamlit as st |
|
import requests |
|
import json |
|
import os |
|
import csv |
|
from openpyxl import Workbook |
|
from openpyxl.styles import PatternFill |
|
import pandas as pd |
|
from datetime import datetime |
|
import time |
|
|
|
|
|
def parse_response_to_json(response): |
|
parsed_response = response.json() |
|
json_response_string = parsed_response['response'] |
|
json_response = json.loads(json_response_string) |
|
return json_response |
|
|
|
|
|
def process_files_with_ollama(uploaded_files, url, model, prompt_template, schema, output_file_path): |
|
|
|
with open(output_file_path, mode="w", newline="", encoding="utf-8") as file: |
|
writer = csv.writer(file) |
|
writer.writerow(["Input", "Sentiment", "Reasoning"]) |
|
|
|
|
|
progress_bar = st.progress(0) |
|
total_files = len(uploaded_files) |
|
|
|
|
|
start_time = time.time() |
|
|
|
for i, uploaded_file in enumerate(uploaded_files): |
|
|
|
st.write(f"Processing file {i + 1}/{total_files}: {uploaded_file.name}") |
|
|
|
|
|
content = uploaded_file.read().decode("utf-8") |
|
|
|
|
|
payload = { |
|
"model": model, |
|
"prompt": prompt_template.format(input=content), |
|
"stream": False, |
|
"format": schema |
|
} |
|
|
|
|
|
response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}) |
|
|
|
if response.status_code == 200: |
|
|
|
json_response = parse_response_to_json(response) |
|
sentiment = json_response['sentiment'] |
|
reasoning = json_response['reasoning'] |
|
|
|
|
|
with open(output_file_path, mode="a", newline="", encoding="utf-8") as file: |
|
writer = csv.writer(file) |
|
writer.writerow([content, sentiment, reasoning]) |
|
else: |
|
st.error(f"Error processing file {uploaded_file.name}: {response.status_code}") |
|
|
|
|
|
progress_bar.progress((i + 1) / total_files) |
|
|
|
|
|
elapsed_time = time.time() - start_time |
|
st.sidebar.write(f"Processing time: {elapsed_time:.2f} seconds") |
|
|
|
st.success("All files processed successfully!") |
|
|
|
|
|
st.title("Text File Sentiment Analysis with Ollama") |
|
st.write(""" |
|
This app allows you to analyze the sentiment of text files using Ollama. Follow these steps: |
|
1. **Upload text files**: Drag and drop your text files. |
|
2. **Configure Ollama**: Set the API URL, model, prompt template, and schema in the sidebar. |
|
3. **Analyze Sentiment**: Click the "Analyze Sentiment" button to process the files. |
|
4. **Download Results**: Download the results as a CSV file. |
|
5. **Highlight Mismatches**: Use the options in the sidebar to highlight mismatches in the results. |
|
""") |
|
|
|
|
|
st.sidebar.header("Ollama Configuration") |
|
url = st.sidebar.text_input("Ollama API URL", value="http://localhost:11434/api/generate") |
|
model = st.sidebar.text_input("Model", value="llama3.2:latest") |
|
prompt_template = st.sidebar.text_area( |
|
"Prompt Template", |
|
value="Do a sentiment analysis for the following text and return POSITIVE or NEGATIVE and your reasoning: {input}", |
|
height=100 |
|
) |
|
|
|
|
|
default_schema = { |
|
"type": "object", |
|
"properties": { |
|
"sentiment": {"enum": ["POSITIVE", "NEUTRAL", "NEGATIVE"]}, |
|
"reasoning": {"type": "string"} |
|
}, |
|
"required": ["sentiment", "reasoning"] |
|
} |
|
schema_input = st.sidebar.text_area( |
|
"Schema (JSON format)", |
|
value=json.dumps(default_schema, indent=2), |
|
height=400 |
|
) |
|
|
|
|
|
try: |
|
schema = json.loads(schema_input) |
|
except json.JSONDecodeError: |
|
st.error("Invalid JSON schema. Please check your input.") |
|
schema = default_schema |
|
|
|
|
|
st.sidebar.header("Highlighting Configuration") |
|
highlight_whole_row = st.sidebar.checkbox("Highlight the whole row", value=True) |
|
highlight_color = st.sidebar.color_picker("Choose a highlight color", "#FF0000") |
|
|
|
|
|
uploaded_files = st.file_uploader("Upload text files", type=["txt"], accept_multiple_files=True) |
|
|
|
|
|
if "results_df" not in st.session_state: |
|
st.session_state.results_df = None |
|
if "output_file_name" not in st.session_state: |
|
st.session_state.output_file_name = None |
|
if "uploaded_csv_df" not in st.session_state: |
|
st.session_state.uploaded_csv_df = None |
|
|
|
|
|
tab1, tab2 = st.tabs(["Analyze", "Highlight Mismatches"]) |
|
|
|
with tab1: |
|
if uploaded_files: |
|
|
|
if st.session_state.output_file_name is None: |
|
st.session_state.output_file_name = f"output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" |
|
|
|
|
|
if st.button("Analyze Sentiment"): |
|
with st.spinner("Processing files..."): |
|
process_files_with_ollama( |
|
uploaded_files, url, model, prompt_template, schema, st.session_state.output_file_name |
|
) |
|
|
|
|
|
st.session_state.results_df = pd.read_csv(st.session_state.output_file_name) |
|
|
|
|
|
if st.session_state.results_df is not None: |
|
st.write("Sentiment Analysis Results:") |
|
st.dataframe(st.session_state.results_df) |
|
|
|
|
|
if st.session_state.output_file_name is not None: |
|
with open(st.session_state.output_file_name, "rb") as file: |
|
st.download_button( |
|
label="Download Results CSV", |
|
data=file, |
|
file_name=st.session_state.output_file_name, |
|
mime="text/csv", |
|
) |
|
|
|
with tab2: |
|
|
|
uploaded_csv = st.file_uploader("Upload your own CSV file (optional)", type=["csv"]) |
|
|
|
|
|
if uploaded_csv is not None: |
|
st.session_state.uploaded_csv_df = pd.read_csv(uploaded_csv) |
|
st.write("Using the uploaded CSV file for highlighting mismatches.") |
|
elif st.session_state.uploaded_csv_df is not None: |
|
st.write("Using the previously uploaded CSV file for highlighting mismatches.") |
|
else: |
|
st.warning("No CSV file available. Please analyze text files in Tab 1 or upload a CSV file.") |
|
|
|
|
|
if st.session_state.uploaded_csv_df is not None: |
|
st.write("### Sentiment Analysis Results") |
|
st.dataframe(st.session_state.uploaded_csv_df) |
|
|
|
st.write("### Highlight Mismatches in Results") |
|
column_to_check = st.selectbox( |
|
"Select the column to check (e.g., Sentiment)", |
|
options=st.session_state.uploaded_csv_df.columns, |
|
index=1, |
|
) |
|
constant_value = st.text_input( |
|
"Enter the constant value to compare against (e.g., POSITIVE)", |
|
value="POSITIVE", |
|
) |
|
|
|
if st.button("Highlight Mismatches"): |
|
|
|
wb = Workbook() |
|
ws = wb.active |
|
|
|
|
|
for col_idx, header in enumerate(st.session_state.uploaded_csv_df.columns, start=1): |
|
ws.cell(row=1, column=col_idx, value=header) |
|
|
|
|
|
highlight_fill = PatternFill(start_color=highlight_color.lstrip("#"), end_color=highlight_color.lstrip("#"), fill_type="solid") |
|
|
|
|
|
mismatched_rows = [] |
|
|
|
|
|
for row_idx, row in st.session_state.uploaded_csv_df.iterrows(): |
|
for col_idx, value in enumerate(row, start=1): |
|
ws.cell(row=row_idx + 2, column=col_idx, value=value) |
|
|
|
|
|
if row[column_to_check] != constant_value: |
|
|
|
mismatched_rows.append(row_idx + 2) |
|
|
|
|
|
if highlight_whole_row: |
|
for col_idx in range(1, len(row) + 1): |
|
ws.cell(row=row_idx + 2, column=col_idx).fill = highlight_fill |
|
else: |
|
col_index = st.session_state.uploaded_csv_df.columns.get_loc(column_to_check) + 1 |
|
ws.cell(row=row_idx + 2, column=col_index).fill = highlight_fill |
|
|
|
|
|
highlighted_output_file = f"highlighted_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" |
|
wb.save(highlighted_output_file) |
|
|
|
|
|
mismatched_df = st.session_state.uploaded_csv_df[st.session_state.uploaded_csv_df[column_to_check] != constant_value] |
|
mismatched_output_file = f"mismatched_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" |
|
mismatched_df.to_excel(mismatched_output_file, index=False) |
|
|
|
|
|
st.write(f"**Total mismatches found:** {len(mismatched_rows)}") |
|
if mismatched_rows: |
|
st.write(f"**Mismatches found in rows (referring to the Excel file):** {', '.join(map(str, mismatched_rows))}") |
|
|
|
|
|
with open(highlighted_output_file, "rb") as file: |
|
st.download_button( |
|
label="Download Highlighted Results", |
|
data=file, |
|
file_name=highlighted_output_file, |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
) |
|
|
|
|
|
with open(mismatched_output_file, "rb") as file: |
|
st.download_button( |
|
label="Download Mismatched Rows", |
|
data=file, |
|
file_name=mismatched_output_file, |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
) |
|
|
|
st.success("Mismatches highlighted! Click the buttons above to download.") |