Oleh Kuznetsov commited on
Commit
e7fa297
·
1 Parent(s): 4825b08

feat(rec): Finish feedback + nickname; small fixes

Browse files
Files changed (1) hide show
  1. app.py +85 -12
app.py CHANGED
@@ -1,17 +1,22 @@
1
  import json
2
  import os
3
  import random
 
 
4
  import urllib.parse
 
5
  from pathlib import Path
6
  from typing import Optional
 
7
 
8
  import gradio as gr
9
  import numpy as np
10
  import pandas as pd
11
- from dotenv import load_dotenv
12
  from fastembed import SparseEmbedding, SparseTextEmbedding
13
  from google import genai
14
  from google.genai import types
 
15
  from pydantic import BaseModel, Field
16
  from qdrant_client import QdrantClient
17
  from qdrant_client import models as qmodels
@@ -19,7 +24,7 @@ from sentence_transformers import CrossEncoder, SentenceTransformer
19
  from vllm import LLM, SamplingParams
20
  from vllm.sampling_params import GuidedDecodingParams
21
 
22
- load_dotenv()
23
 
24
  HF_TOKEN = os.getenv("HF_TOKEN")
25
 
@@ -33,6 +38,21 @@ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
33
  DATA_PATH = Path(os.getenv("DATA_PATH"))
34
  DB_PATH = DATA_PATH / "db"
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  client = QdrantClient(path=str(DB_PATH))
37
  collection_name = "knowledge_cards"
38
  num_chunks_base = 500
@@ -64,12 +84,21 @@ def generate_recommendation_string(ranking: dict[str, float]) -> str:
64
  for idx, (genre, score) in enumerate(ranking.items(), start=1):
65
  youtube_link = youtube_search_link_for_genre(genre=genre)
66
  recommendation_string += (
67
- f"{idx}. **{genre.replace('_', ' ').capitalize()}** ({score:.2f}); "
68
  f"[YouTube link]({youtube_link})\n"
69
  )
70
  return recommendation_string
71
 
72
 
 
 
 
 
 
 
 
 
 
73
  # -------------------------------- Data Models -------------------------------
74
  class StructuredQueryRewriteResponse(BaseModel):
75
  general: str | None
@@ -396,11 +425,13 @@ def recommend_gemini(query: str):
396
  # -------------------------------------- INTERFACE -----------------------------
397
  pipelines = {
398
  "sadaimrec": recommend_sadaimrec,
399
- "chatgpt": recommend_gemini,
400
  }
401
 
402
 
403
  def generate_responses(query):
 
 
404
  # Randomize model order
405
  pipeline_names = list(pipelines.keys())
406
  random.shuffle(pipeline_names)
@@ -414,13 +445,39 @@ def generate_responses(query):
414
 
415
 
416
  # Callback to capture vote
417
- def handle_vote(selected, label1, label2, resp1, resp2):
418
- chosen_name = label1 if selected == "Option 1" else label2
419
- chosen_resp = resp1 if selected == "Option 1" else resp2
420
- print(f"User voted for {chosen_name}: '{chosen_resp}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  return (
422
- "Thank you for your vote! Restarting in 2 seconds...",
423
  gr.update(active=True),
 
424
  )
425
 
426
 
@@ -432,6 +489,8 @@ def reset_ui():
432
  gr.update(visible=False), # hide vote button
433
  gr.update(value="**Generating...**"), # clear Option 1 text
434
  gr.update(value="**Generating...**"), # clear Option 2 text
 
 
435
  gr.update(value=""), # clear result
436
  gr.update(active=False),
437
  )
@@ -445,13 +504,17 @@ with gr.Blocks(
445
  gr.Markdown(app_description)
446
  with gr.Accordion("Detailed usage instructions", open=False):
447
  gr.Markdown(app_instructions)
 
 
 
 
448
  query = gr.Textbox(
449
  label="Your Query",
450
  placeholder="Calming, music for deep relaxation with echoing sounds and deep bass",
451
  )
452
  submit_btn = gr.Button("Submit")
453
  # timer that resets ui after feedback is sent
454
- reset_timer = gr.Timer(value=2.0, active=False)
455
 
456
  # Hidden components to store model responses and names
457
  with gr.Row(visible=False) as response_row:
@@ -489,8 +552,16 @@ with gr.Blocks(
489
  # Feedback handling
490
  vote_btn.click(
491
  fn=handle_vote,
492
- inputs=[vote, model_label_1, model_label_2, response_1, response_2],
493
- outputs=[result, reset_timer],
 
 
 
 
 
 
 
 
494
  )
495
  reset_timer.tick(
496
  fn=reset_ui,
@@ -502,6 +573,8 @@ with gr.Blocks(
502
  vote_btn,
503
  response_1,
504
  response_2,
 
 
505
  result,
506
  reset_timer,
507
  ],
 
1
  import json
2
  import os
3
  import random
4
+ import signal
5
+ import sys
6
  import urllib.parse
7
+ from datetime import datetime
8
  from pathlib import Path
9
  from typing import Optional
10
+ from uuid import uuid4
11
 
12
  import gradio as gr
13
  import numpy as np
14
  import pandas as pd
15
+ # from dotenv import load_dotenv
16
  from fastembed import SparseEmbedding, SparseTextEmbedding
17
  from google import genai
18
  from google.genai import types
19
+ from huggingface_hub import CommitScheduler
20
  from pydantic import BaseModel, Field
21
  from qdrant_client import QdrantClient
22
  from qdrant_client import models as qmodels
 
24
  from vllm import LLM, SamplingParams
25
  from vllm.sampling_params import GuidedDecodingParams
26
 
27
+ # load_dotenv()
28
 
29
  HF_TOKEN = os.getenv("HF_TOKEN")
30
 
 
38
  DATA_PATH = Path(os.getenv("DATA_PATH"))
39
  DB_PATH = DATA_PATH / "db"
40
 
41
+ FEEDBACK_REPO = os.getenv("FEEDBACK_REPO")
42
+ FEEDBACK_DIR = DATA_PATH / "feedback"
43
+ FEEDBACK_DIR.mkdir(parents=True, exist_ok=True)
44
+ FEEDBACK_FILE = FEEDBACK_DIR / f"votes_{uuid4()}.jsonl"
45
+
46
+ scheduler = CommitScheduler(
47
+ repo_id=FEEDBACK_REPO,
48
+ repo_type="dataset",
49
+ folder_path=FEEDBACK_DIR,
50
+ path_in_repo="data",
51
+ every=5,
52
+ token=HF_TOKEN,
53
+ private=True,
54
+ )
55
+
56
  client = QdrantClient(path=str(DB_PATH))
57
  collection_name = "knowledge_cards"
58
  num_chunks_base = 500
 
84
  for idx, (genre, score) in enumerate(ranking.items(), start=1):
85
  youtube_link = youtube_search_link_for_genre(genre=genre)
86
  recommendation_string += (
87
+ f"{idx}. **{genre.replace('_', ' ').capitalize()}**; "
88
  f"[YouTube link]({youtube_link})\n"
89
  )
90
  return recommendation_string
91
 
92
 
93
+ def graceful_shutdown(signum, frame):
94
+ print(f"{signum} received - flushing feedback …", flush=True)
95
+ scheduler.trigger().result()
96
+ sys.exit(0)
97
+
98
+
99
+ signal.signal(signal.SIGTERM, graceful_shutdown)
100
+ signal.signal(signal.SIGINT, graceful_shutdown)
101
+
102
  # -------------------------------- Data Models -------------------------------
103
  class StructuredQueryRewriteResponse(BaseModel):
104
  general: str | None
 
425
  # -------------------------------------- INTERFACE -----------------------------
426
  pipelines = {
427
  "sadaimrec": recommend_sadaimrec,
428
+ "gemini": recommend_gemini,
429
  }
430
 
431
 
432
  def generate_responses(query):
433
+ if not query.strip():
434
+ raise gr.Error("Please enter a query before submitting.")
435
  # Randomize model order
436
  pipeline_names = list(pipelines.keys())
437
  random.shuffle(pipeline_names)
 
445
 
446
 
447
  # Callback to capture vote
448
+ def handle_vote(nickname, query, selected, label1, label2, resp1, resp2):
449
+ nick = nickname.strip() or uuid4().hex[:8]
450
+ winner_name, loser_name = (
451
+ (label1, label2) if selected == "Option 1 (left)" else (label2, label1)
452
+ )
453
+ winner_resp, loser_resp = (
454
+ (resp1, resp2) if selected == "Option 1 (left)" else (resp2, resp1)
455
+ )
456
+ print(
457
+ (
458
+ f"User voted:\nwinner = {winner_name}: {winner_resp};"
459
+ f" loser = {loser_name}: {loser_resp}"
460
+ ),
461
+ flush=True,
462
+ )
463
+
464
+ # ---------- persist feedback locally ----------
465
+ entry = {
466
+ "ts": datetime.now().isoformat(timespec="seconds") + "Z",
467
+ "nickname": nick,
468
+ "query": query,
469
+ "winner": winner_name,
470
+ "loser": loser_name,
471
+ "winner_response": winner_resp,
472
+ "loser_response": loser_resp,
473
+ }
474
+ with FEEDBACK_FILE.open("a", encoding="utf-8") as f:
475
+ f.write(json.dumps(entry) + "\n")
476
+
477
  return (
478
+ f"Thank you for your vote! Winner: {winner_name}. Restarting in 3 seconds...",
479
  gr.update(active=True),
480
+ gr.update(value=nick),
481
  )
482
 
483
 
 
489
  gr.update(visible=False), # hide vote button
490
  gr.update(value="**Generating...**"), # clear Option 1 text
491
  gr.update(value="**Generating...**"), # clear Option 2 text
492
+ gr.update(value=""), # clear Model Label 1 text
493
+ gr.update(value=""), # clear Model Label 2 text
494
  gr.update(value=""), # clear result
495
  gr.update(active=False),
496
  )
 
504
  gr.Markdown(app_description)
505
  with gr.Accordion("Detailed usage instructions", open=False):
506
  gr.Markdown(app_instructions)
507
+ nickname = gr.Textbox(
508
+ label="Your nickname",
509
+ placeholder="Leave empty to generate a random nickname on first vote within session",
510
+ )
511
  query = gr.Textbox(
512
  label="Your Query",
513
  placeholder="Calming, music for deep relaxation with echoing sounds and deep bass",
514
  )
515
  submit_btn = gr.Button("Submit")
516
  # timer that resets ui after feedback is sent
517
+ reset_timer = gr.Timer(value=3.0, active=False)
518
 
519
  # Hidden components to store model responses and names
520
  with gr.Row(visible=False) as response_row:
 
552
  # Feedback handling
553
  vote_btn.click(
554
  fn=handle_vote,
555
+ inputs=[
556
+ nickname,
557
+ query,
558
+ vote,
559
+ model_label_1,
560
+ model_label_2,
561
+ response_1,
562
+ response_2,
563
+ ],
564
+ outputs=[result, reset_timer, nickname],
565
  )
566
  reset_timer.tick(
567
  fn=reset_ui,
 
573
  vote_btn,
574
  response_1,
575
  response_2,
576
+ model_label_1,
577
+ model_label_2,
578
  result,
579
  reset_timer,
580
  ],