shigureui commited on
Commit
65cd580
·
1 Parent(s): 985d556
Files changed (1) hide show
  1. app.py +50 -35
app.py CHANGED
@@ -4,42 +4,45 @@ import base64
4
  from Crypto.Cipher import AES
5
  from Crypto.Util.Padding import unpad
6
 
 
7
  def decrypt_file(input_path, key):
8
  # 读取加密文件
9
- with open(input_path, 'rb') as f:
10
  encrypted_data = base64.b64decode(f.read())
11
-
12
- key = key.ljust(32, '0')[:32].encode('utf-8')
13
  iv = encrypted_data[:16]
14
  ciphertext = encrypted_data[16:]
15
 
16
  cipher = AES.new(key, AES.MODE_CBC, iv)
17
  plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
18
-
19
- return plaintext.decode('utf-8')
20
 
21
 
22
- llm = llama_cpp.Llama.from_pretrained(repo_id="mradermacher/bge-large-zh-v1.5-GGUF", filename="bge-large-zh-v1.5.Q4_K_M.gguf", embedding=True)
 
 
 
 
23
 
24
  # embedding_1 = llm.create_embedding("Hello, world!")
25
  # embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
26
 
27
  from pymilvus import MilvusClient
 
28
  client = MilvusClient("./books.db")
29
 
30
- client.create_collection(
31
- collection_name="collection_1",
32
- dimension=1024
33
- )
34
 
35
  import os, json
36
- aeskey = os.getenv('aeskey')
37
- decrypted_content = decrypt_file('encrypted.txt', aeskey)
38
- raw_jsons = json.loads(decrypted_content)
39
 
 
 
 
40
 
41
 
42
- with open('embeddings.json', mode='r') as embedding_file:
43
  all_embs = json.load(embedding_file)
44
 
45
 
@@ -51,14 +54,16 @@ for vhjx_index, vhjx_item in enumerate(raw_jsons):
51
  for jvvi_item in vhjx_item[1:]:
52
  content = jvvi_item["原文"]
53
  docs.append(content)
54
- metas.append({
55
- "index": jvvi_item["index"],
56
- "text": content,
57
- "annotation": jvvi_item.get("注释", ""),
58
- "critique": jvvi_item.get("批判", ""),
59
- "chapter": chapter
60
- })
61
-
 
 
62
  # 一个章节一次
63
  # 批量生成 embeddings(每个为 list[float])
64
  # emb_result = llm.create_embedding(docs)
@@ -68,30 +73,40 @@ for vhjx_index, vhjx_item in enumerate(raw_jsons):
68
  milvus_data = []
69
  for i, emb in enumerate(embeddings):
70
  item = metas[i]
71
- milvus_data.append({
72
- "id" : vhjx_index * 100 + i,
73
- "index": item["index"],
74
- "vector": emb,
75
- "text": item["text"],
76
- "annotation": item["annotation"],
77
- "critique": item["critique"],
78
- "chapter": item["chapter"]
79
- })
 
 
80
  print(f"✅ 共 {len(milvus_data)} 条数据")
81
 
82
  # 插入数据
83
  client.insert(collection_name="collection_1", data=milvus_data)
84
  print(f"✅ 插入完成:共 {len(milvus_data)} 条数据")
85
 
 
86
  def greet(name):
87
  embeddings = llm.create_embedding(name)
88
  res = client.search(
89
  collection_name="collection_1",
90
- data=[embeddings['data'][0]['embedding']],
91
- limit=2,
92
- output_fields=["text", "id"],
93
  )
94
  return res
95
 
96
- demo = gr.Interface(fn=greet, inputs="text", outputs=gr.JSON(label="查询结果"))
 
 
 
 
 
 
 
97
  demo.launch(mcp_server=True)
 
4
  from Crypto.Cipher import AES
5
  from Crypto.Util.Padding import unpad
6
 
7
+
8
  def decrypt_file(input_path, key):
9
  # 读取加密文件
10
+ with open(input_path, "rb") as f:
11
  encrypted_data = base64.b64decode(f.read())
12
+
13
+ key = key.ljust(32, "0")[:32].encode("utf-8")
14
  iv = encrypted_data[:16]
15
  ciphertext = encrypted_data[16:]
16
 
17
  cipher = AES.new(key, AES.MODE_CBC, iv)
18
  plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
19
+
20
+ return plaintext.decode("utf-8")
21
 
22
 
23
+ llm = llama_cpp.Llama.from_pretrained(
24
+ repo_id="mradermacher/bge-large-zh-v1.5-GGUF",
25
+ filename="bge-large-zh-v1.5.Q4_K_M.gguf",
26
+ embedding=True,
27
+ )
28
 
29
  # embedding_1 = llm.create_embedding("Hello, world!")
30
  # embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
31
 
32
  from pymilvus import MilvusClient
33
+
34
  client = MilvusClient("./books.db")
35
 
36
+ client.create_collection(collection_name="collection_1", dimension=1024)
 
 
 
37
 
38
  import os, json
 
 
 
39
 
40
+ aeskey = os.getenv("aeskey")
41
+ decrypted_content = decrypt_file("encrypted.txt", aeskey)
42
+ raw_jsons = json.loads(decrypted_content)
43
 
44
 
45
+ with open("embeddings.json", mode="r") as embedding_file:
46
  all_embs = json.load(embedding_file)
47
 
48
 
 
54
  for jvvi_item in vhjx_item[1:]:
55
  content = jvvi_item["原文"]
56
  docs.append(content)
57
+ metas.append(
58
+ {
59
+ "index": jvvi_item["index"],
60
+ "text": content,
61
+ "annotation": jvvi_item.get("注释", ""),
62
+ "critique": jvvi_item.get("批判", ""),
63
+ "chapter": chapter,
64
+ }
65
+ )
66
+
67
  # 一个章节一次
68
  # 批量生成 embeddings(每个为 list[float])
69
  # emb_result = llm.create_embedding(docs)
 
73
  milvus_data = []
74
  for i, emb in enumerate(embeddings):
75
  item = metas[i]
76
+ milvus_data.append(
77
+ {
78
+ "id": vhjx_index * 100 + i,
79
+ "index": item["index"],
80
+ "vector": emb,
81
+ "text": item["text"],
82
+ "annotation": item["annotation"],
83
+ "critique": item["critique"],
84
+ "chapter": item["chapter"],
85
+ }
86
+ )
87
  print(f"✅ 共 {len(milvus_data)} 条数据")
88
 
89
  # 插入数据
90
  client.insert(collection_name="collection_1", data=milvus_data)
91
  print(f"✅ 插入完成:共 {len(milvus_data)} 条数据")
92
 
93
+
94
  def greet(name):
95
  embeddings = llm.create_embedding(name)
96
  res = client.search(
97
  collection_name="collection_1",
98
+ data=[embeddings["data"][0]["embedding"]],
99
+ limit=5,
100
+ output_fields=["index", "text", "annotation", "critique"],
101
  )
102
  return res
103
 
104
+
105
+ demo = gr.Interface(
106
+ fn=greet,
107
+ inputs=gr.Textbox(label="输入部分原文句子"),
108
+ outputs=gr.JSON(label="查询结果"),
109
+ title="论语批判MCP (Embedding版本)",
110
+ description="输入模糊的论语原文,可以向量检索到对应的批判内容。",
111
+ )
112
  demo.launch(mcp_server=True)