Spaces:

ModelForge
/

media-trust-analyser

Running

App Files Files Community

Sam Chaudry commited on 10 days ago

Commit

4d234fe

1 Parent(s): 1139f00

Initial commit with project files

Browse files

Files changed (3) hide show

app.py +20 -0
media_trust.py +196 -0
requirements.txt +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import gradio as gr
+import pandas as pd
+from media_trust import query, process_data, analyse_sentiment, add_bias_annotation, set_article_extremity, add_article_summaries
+def process_news(topic):
+    raw_df = query(topic)
+    processed_df = process_data(raw_df)
+    sentiment_df = analyse_sentiment(processed_df)
+    bias_df = add_bias_annotation(sentiment_df)
+    extremity_df = set_article_extremity(bias_df)
+    final_df = add_article_summaries(extremity_df)
+    return final_df[['title', 'summary', 'bias_score', 'extremity_pct', 'source']]
+with gr.Blocks() as interface:
+    with gr.Column():
+        topic_input = gr.Textbox(label="Enter a topic", placeholder="e.g., Tesla")
+        output_table = gr.DataFrame(headers=["Title", "Summary", "Bias", "Extremity %", "Source"], interactive=False)
+        topic_input.submit(process_news, inputs=topic_input, outputs=output_table)
+interface.launch()

media_trust.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import requests
+import pandas as pd
+import gradio as gr
+import datetime
+import nltk
+from datetime import datetime, timedelta
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+nltk.download('vader_lexicon')
+from transformers import pipeline
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+from dotenv import load_dotenv
+import os
+load_dotenv()
+api_key = os.getenv("API_KEY")
+if not api_key:
+    raise ValueError("API_KEY not found. Make sure to set it in the .env file.")
+SOURCE_BIAS_MAP = {
+    "fox news": "right",
+    "breitbart": "right",
+    "new york post": "right",
+    "the wall street journal": "center-right",
+    "reuters": "center",
+    "associated press": "center",
+    "bloomberg": "center",
+    "npr": "center-left",
+    "cnn": "left",
+    "msnbc": "left",
+    "the new york times": "left",
+    "the washington post": "left",
+    "the guardian": "left",
+    "bbc news": "center",
+    "sky news": "center-right",
+    "the telegraph": "right",
+    "the times": "center-right",
+    "daily mail": "right",
+    "the independent": "center-left",
+    "the sun": "right",
+    "financial times": "center",
+}
+BIAS_SCORE_MAP = {
+    "left": -1,
+    "center-left": -0.5,
+    "center": 0,
+    "center-right": 0.5,
+    "right": 1,
+    "unknown": 0
+}
+def query(query, sort_by="popularity", max_tokens=100):
+    if query == "":
+        print("Topic needs to be passed in")
+        return
+    today = datetime.today()
+    seven_days_ago = today - timedelta(days=20)
+    from_date = seven_days_ago.strftime('%Y-%m-%d')
+    to_date = today.strftime('%Y-%m-%d')
+    base_url = "https://newsapi.org/v2/everything"
+    url = f"{base_url}?q={query}&from={from_date}&to={to_date}&sortBy={sort_by}&apiKey={api_key}"
+    news = None
+    try:
+        news_response = requests.get(url, timeout=10)
+        if news_response.status_code == 200:
+            news = news_response.json()
+        else:
+            print("API error has occured", news_response.status_code)
+    except Exception:
+        print('An exception occurred')
+    article_arr = news["articles"]
+    extracted_data = []
+    for article in article_arr:
+        extracted_data.append({
+            "title": article.get("title", "N/A"),
+            "description": article.get("description", "N/A"),
+            "source_name": article.get("source", {}).get("name", "N/A"),
+            "url": article.get("url", "N/A"),
+            "publishedAt": article.get("publishedAt", "N/A")
+        })
+    df = pd.DataFrame(extracted_data)
+    return df
+def process_data(df):
+    df_cleaned = df.dropna(subset=["title", "description"])
+    df_cleaned = df_cleaned[df_cleaned["title"].str.strip() != ""]
+    df_cleaned = df_cleaned[df_cleaned["description"].str.strip() != ""]
+    df_cleaned = df_cleaned.drop_duplicates(subset=["title", "url"])
+    df_cleaned["text"] = df_cleaned["title"] + df_cleaned["description"].str.lower()
+    return df_cleaned
+def analyse_sentiment(df):
+    analyser = SentimentIntensityAnalyzer()
+    df['compound'] = [analyser.polarity_scores(x)['compound'] for x in df['text']]
+    df['neg'] = [analyser.polarity_scores(x)['neg'] for x in df['text']]
+    df['neu'] = [analyser.polarity_scores(x)['neu'] for x in df['text']]
+    df['pos'] = [analyser.polarity_scores(x)['pos'] for x in df['text']]
+    def label_sentiment(score):
+        if score >= 0.05:
+            return "positive"
+        elif score <= -0.05:
+            return "negative"
+        else:
+            return "neutral"
+    df['sentiment_label'] = df['compound'].apply(label_sentiment)
+    return df
+def get_bias_label(source_name):
+        source = source_name.strip().lower()
+        return SOURCE_BIAS_MAP.get(source, "unknown")
+def add_bias_annotation(df):
+    df['bias_label'] = df['source_name'].apply(get_bias_label)
+    return df
+def set_article_extremity(df, top_n=5):
+    def get_bias_extremity(label):
+        return BIAS_SCORE_MAP.get(label, 0)
+    df['bias_score'] = df['bias_label'].apply(get_bias_extremity)
+    df['extremity_score'] = df['compound'].abs() + df['bias_score'].abs()
+    df['extremity_pct'] = (df['extremity_score'] / 2) * 100
+    df['extremity_pct'] = df['extremity_pct'].round(1)
+    df = df.sort_values(by='extremity_score', ascending=False)
+    df['extreme'] = False
+    df.loc[df.index[:top_n], 'extreme'] = True
+    return df
+def summarise_text(row, max_tokens=512):
+    try:
+        text = row['text'] if 'text' in row and pd.notna(row['text']) else ''
+        source_name = row['source_name'] if 'source_name' in row and pd.notna(row['source_name']) else 'unknown'
+        input_length = len(text.split())
+        if input_length < 40:
+            max_length = max(10, int(input_length / 2))
+        else:
+            max_length = min(input_length - 10, max_tokens)
+        min_length = max(10, max_length - 10)
+        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
+        summary_text = summary[0]['summary_text']
+        bias_label = get_bias_label(source_name)
+        return pd.Series({
+            'summary': summary_text,
+            'bias_score': bias_label,
+            'source': source_name
+        })
+    except Exception as e:
+        print(f"Error summarising row: {e}")
+        return pd.Series({
+            'summary': 'Summary unavailable',
+            'bias_score': 'unknown',
+            'source': 'unknown'
+        })
+def add_article_summaries(df, max_tokens=512):
+    summary_df = df.apply(summarise_text, axis=1, max_tokens=max_tokens)
+    df[['summary', 'bias_score', 'source']] = summary_df
+    return df
+def main():
+    raw_df = query("Tesla")
+    processed_df = process_data(raw_df)
+    sentiment_df = analyse_sentiment(processed_df)
+    bias_df = add_bias_annotation(sentiment_df)
+    extremity_df = set_article_extremity(bias_df)
+    final_df = add_article_summaries(extremity_df)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,147 @@

+aiofiles==24.1.0
+annotated-types==0.7.0
+anyio==4.9.0
+appnope==0.1.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==3.0.0
+async-lru==2.0.4
+attrs==25.1.0
+babel==2.17.0
+beautifulsoup4==4.13.3
+bleach==6.2.0
+certifi==2025.4.26
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.0
+comm==0.2.2
+debugpy==1.8.13
+decorator==5.2.1
+defusedxml==0.7.1
+executing==2.2.0
+fastapi==0.115.12
+fastjsonschema==2.21.1
+ffmpy==0.5.0
+filelock==3.18.0
+fqdn==1.5.1
+fsspec==2025.3.2
+gradio==5.29.0
+gradio_client==1.10.0
+groovy==0.1.2
+h11==0.16.0
+hf-xet==1.1.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.31.1
+idna==3.10
+ipykernel==6.29.5
+ipython==9.0.2
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.5
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.5.0
+json5==0.10.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.15.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.3.5
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.13
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.2
+mpmath==1.3.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+nltk==3.9.1
+notebook==7.3.2
+notebook_shim==0.2.4
+numpy==2.2.5
+orjson==3.10.18
+overrides==7.7.0
+packaging==25.0
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.2.1
+platformdirs==4.3.6
+prometheus_client==0.21.1
+prompt_toolkit==3.0.50
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pydantic==2.11.4
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-json-logger==3.3.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==26.2.1
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==14.0.0
+rpds-py==0.23.1
+ruff==0.11.9
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.2
+semantic-version==2.10.0
+Send2Trash==1.8.3
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+starlette==0.46.2
+sympy==1.14.0
+terminado==0.18.1
+threadpoolctl==3.5.0
+tinycss2==1.4.0
+tokenizers==0.21.1
+tomlkit==0.13.2
+torch==2.7.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+typer==0.15.3
+types-python-dateutil==2.9.0.20241206
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+uri-template==1.3.0
+urllib3==2.4.0
+uvicorn==0.34.2
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+widgetsnbextension==4.0.13