File size: 1,962 Bytes
83f1514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from openai import OpenAI

from src.nlp.data.test_texts import TEXTS

class ModelName:
    DEEP_SEEK_R1 = "deepseek/deepseek-r1:free"
    DEEP_SEEK_R1_DISTILL_LLAMA = "deepseek/deepseek-r1-distill-llama-70b:free"
    QWEN_CODER_INSTRUCT = "qwen/qwen-2.5-coder-32b-instruct:free"

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key="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",
)

def deep_seek_extraction(text, model_name: str):
    return client.chat.completions.create(
      extra_headers={
        # "HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
        # "X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
      },
      extra_body={},
      model=model_name,
      messages=[
        {
          "role": "user",
          "content": """
            Extrahiere die Veranstaltungsdaten (wenn vorhanden) aus dem Text in folgendem JSON Format:
            { 
                "title": String,
                "start_date": String,
                "end_date": String | None,
                "start_time": String | None,
                "end_time": String | None,
                "admittance_time": String | None,
                "location_name": String | None,
                "adress": {
                    "street": String | None,
                    "housenumber": String | None,
                    "postal_code": String | None,
                    "city": String | None,
                }
                "categories": Array<String> | None,
                "organizers": Array<String> | None,
            }
            
            Text:
          """
            + text

        }
      ]
    )

for text in TEXTS:
    print("*"*100)
    print("TEXT")
    print(text)
    completion = deep_seek_extraction(text, ModelName.QWEN_CODER_INSTRUCT)
    print("DATA:")
    print(completion.choices[0].message.content)
    print("*"*100)