OSINT_Tool / app.py
Canstralian's picture
Update app.py
e511bc5 verified
raw
history blame
5.04 kB
import streamlit as st
import requests
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pandas as pd
from datasets import Dataset
# Title and description
st.title("OSINT Tool 🏢")
st.markdown("""
This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**.
""")
# Sidebar for navigation
st.sidebar.title("Navigation")
app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])
# GitHub Repository Analysis
if app_mode == "GitHub Repository Analysis":
st.header("GitHub Repository Analysis")
repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface")
repo_name = st.text_input("Enter GitHub Repository Name", "transformers")
if st.button("Analyze Repository"):
if repo_owner and repo_name:
try:
response = requests.get(f"https://api.github.com/repos/{repo_owner}/{repo_name}")
data = response.json()
if response.status_code == 200:
st.subheader("Repository Details")
st.write(f"**Name**: {data['name']}")
st.write(f"**Owner**: {data['owner']['login']}")
st.write(f"**Stars**: {data['stargazers_count']}")
st.write(f"**Forks**: {data['forks_count']}")
st.write(f"**Language**: {data['language']}")
st.write(f"**Description**: {data['description']}")
else:
st.error(f"Error: {data.get('message', 'Something went wrong with the request')}")
except Exception as e:
st.error(f"Error occurred: {e}")
else:
st.warning("Please enter both repository owner and name.")
# URL Title Fetcher
elif app_mode == "URL Title Fetcher":
st.header("URL Title Fetcher")
url = st.text_input("Enter URL", "https://www.huggingface.co")
if st.button("Fetch Title"):
if url:
try:
response = requests.get(url)
if response.status_code == 200:
# Try to extract the title from the HTML
match = re.search('<title>(.*?)</title>', response.text)
if match:
title = match.group(1)
st.write(f"**Page Title**: {title}")
else:
st.warning("Title tag not found in the page")
else:
st.error(f"Failed to retrieve the page. Status code: {response.status_code}")
except Exception as e:
st.error(f"Error occurred: {e}")
else:
st.warning("Please enter a valid URL.")
# Dataset Upload & Fine-Tuning
elif app_mode == "Dataset Upload & Fine-Tuning":
st.header("Dataset Upload & Fine-Tuning")
uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv")
if uploaded_file is not None:
# Load the CSV into a pandas DataFrame
df = pd.read_csv(uploaded_file)
# Display dataset preview
st.subheader("Dataset Preview")
st.write(df.head())
# Convert CSV to Hugging Face dataset format
dataset = Dataset.from_pandas(df)
model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"])
if st.button("Fine-tune Model"):
if model_name:
try:
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Prepare the dataset
def preprocess_function(examples):
return tokenizer(examples['text'], truncation=True, padding=True)
tokenized_datasets = dataset.map(preprocess_function, batched=True)
# Training loop (example)
train_args = {
"output_dir": "./results",
"num_train_epochs": 3,
"per_device_train_batch_size": 16,
"logging_dir": "./logs",
}
# Fine-tuning logic (for demonstration purposes, actual fine-tuning will need Hugging Face Trainer)
# model.train()
st.success("Fine-tuning started (demo)!")
except Exception as e:
st.error(f"Error during fine-tuning: {e}")
else:
st.warning("Please select a model for fine-tuning.")
else:
st.warning("Please upload a dataset.")