File size: 2,534 Bytes
b11c8cd
0658229
2e937f5
 
0658229
50639ab
2e937f5
b11c8cd
2e937f5
 
 
 
b11c8cd
57d46c6
 
 
 
b11c8cd
 
2e937f5
 
 
 
 
b11c8cd
2e937f5
 
b11c8cd
 
 
 
 
21a1796
 
 
 
 
 
 
2e937f5
b11c8cd
0658229
 
 
2e937f5
 
 
0658229
21a1796
0658229
2e937f5
0658229
 
2e937f5
 
0658229
 
2e937f5
 
 
 
0658229
 
2e937f5
 
 
 
 
 
 
 
0658229
2e937f5
 
 
 
0658229
 
 
 
b11c8cd
2e937f5
 
2f7d2fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
from bs4 import BeautifulSoup
import gradio as gr
import os 


api_token = os.environ.get("TOKEN")
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
headers = {"Authorization": f"Bearer {api_token}"}

url = "https://huggingface.co/posts" 


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def analyze_sentiment(text):
    output = query({
        "inputs": f'''
system
you are going to analyse the prompt that I'll give to you and tell me if they are either talking about "chat bot", "AI dev", or something else.

user
{text}

assistant
'''
    })

    if isinstance(output, list) and len(output) > 0:
        response = output[0].get('generated_text', '').strip().lower()
        
        if "chat bot" in response:
            return "chat bot"
        elif "ai dev" in response:
            return "AI dev"
        else:
            return "autre"
    return "autre"

def scrape_and_analyze(url):
    try:
        response = requests.get(url)
        if response.status_code != 200:
            return f"Erreur lors de la requête : {response.status_code}"
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Ajustez ce sélecteur selon la structure du site cible
        posts = soup.find_all('div', class_='cursor-pointer')
        
        categories = {"chat bot": 0, "AI dev": 0, "autre": 0}
        total_posts = 0
        result = ""
        
        for post in posts:
            total_posts += 1
            content = post.find('div', class_='relative').text.strip() if post.find('div', class_='relative') else "Pas de contenu"
            
            # Analyse du texte
            category = analyze_sentiment(content)
            categories[category] += 1
            
            # Affichage en temps réel
            print(f"Post {total_posts} analysé. Catégorie : {category}")
            print(f"Compteurs actuels : {categories}")
            print("---")
            
            # Ajout des résultats à la chaîne finale
            result += f"Post {total_posts} : Catégorie {category}\n"
        
        # Résultat final
        result += f"\nTotal des posts analysés : {total_posts}\n"
        for cat, count in categories.items():
            result += f"{cat} : {count}\n"
        
        return result
    except Exception as e:
        return f"Une erreur s'est produite : {str(e)}"

result = scrape_and_analyze(url)
print(result)