Spaces:

shigureui
/

BookSearch

Sleeping

App Files Files Community

shigureui commited on 24 days ago

Commit

65cd580

1 Parent(s): 985d556

test

Browse files

Files changed (1) hide show

app.py +50 -35

app.py CHANGED Viewed

@@ -4,42 +4,45 @@ import base64
 from Crypto.Cipher import AES
 from Crypto.Util.Padding import unpad
 def decrypt_file(input_path, key):
     # 读取加密文件
-    with open(input_path, 'rb') as f:
         encrypted_data = base64.b64decode(f.read())
-    key = key.ljust(32, '0')[:32].encode('utf-8')
     iv = encrypted_data[:16]
     ciphertext = encrypted_data[16:]
     cipher = AES.new(key, AES.MODE_CBC, iv)
     plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
-    return plaintext.decode('utf-8')
-llm = llama_cpp.Llama.from_pretrained(repo_id="mradermacher/bge-large-zh-v1.5-GGUF", filename="bge-large-zh-v1.5.Q4_K_M.gguf", embedding=True)
 # embedding_1 = llm.create_embedding("Hello, world!")
 # embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
 from pymilvus import MilvusClient
 client = MilvusClient("./books.db")
-client.create_collection(
-    collection_name="collection_1",
-    dimension=1024
-)
 import os, json
-aeskey = os.getenv('aeskey')
-decrypted_content = decrypt_file('encrypted.txt', aeskey)
-raw_jsons = json.loads(decrypted_content)
-with open('embeddings.json', mode='r') as embedding_file:
     all_embs = json.load(embedding_file)
@@ -51,14 +54,16 @@ for vhjx_index, vhjx_item in enumerate(raw_jsons):
     for jvvi_item in vhjx_item[1:]:
         content = jvvi_item["原文"]
         docs.append(content)
-        metas.append({
-            "index": jvvi_item["index"],
-            "text": content,
-            "annotation": jvvi_item.get("注释", ""),
-            "critique": jvvi_item.get("批判", ""),
-            "chapter": chapter
-        })
     # 一个章节一次
     # 批量生成 embeddings（每个为 list[float]）
     # emb_result = llm.create_embedding(docs)
@@ -68,30 +73,40 @@ for vhjx_index, vhjx_item in enumerate(raw_jsons):
     milvus_data = []
     for i, emb in enumerate(embeddings):
         item = metas[i]
-        milvus_data.append({
-            "id" : vhjx_index * 100 + i,
-            "index": item["index"],
-            "vector": emb,
-            "text": item["text"],
-            "annotation": item["annotation"],
-            "critique": item["critique"],
-            "chapter": item["chapter"]
-        })
     print(f"✅ 共 {len(milvus_data)} 条数据")
     # 插入数据
     client.insert(collection_name="collection_1", data=milvus_data)
     print(f"✅ 插入完成：共 {len(milvus_data)} 条数据")
 def greet(name):
     embeddings = llm.create_embedding(name)
     res = client.search(
         collection_name="collection_1",
-        data=[embeddings['data'][0]['embedding']],
-        limit=2,
-        output_fields=["text", "id"],
     )
     return res
-demo = gr.Interface(fn=greet, inputs="text", outputs=gr.JSON(label="查询结果"))
 demo.launch(mcp_server=True)

 from Crypto.Cipher import AES
 from Crypto.Util.Padding import unpad
 def decrypt_file(input_path, key):
     # 读取加密文件
+    with open(input_path, "rb") as f:
         encrypted_data = base64.b64decode(f.read())
+    key = key.ljust(32, "0")[:32].encode("utf-8")
     iv = encrypted_data[:16]
     ciphertext = encrypted_data[16:]
     cipher = AES.new(key, AES.MODE_CBC, iv)
     plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
+    return plaintext.decode("utf-8")
+llm = llama_cpp.Llama.from_pretrained(
+    repo_id="mradermacher/bge-large-zh-v1.5-GGUF",
+    filename="bge-large-zh-v1.5.Q4_K_M.gguf",
+    embedding=True,
+)
 # embedding_1 = llm.create_embedding("Hello, world!")
 # embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list
 from pymilvus import MilvusClient
 client = MilvusClient("./books.db")
+client.create_collection(collection_name="collection_1", dimension=1024)
 import os, json
+aeskey = os.getenv("aeskey")
+decrypted_content = decrypt_file("encrypted.txt", aeskey)
+raw_jsons = json.loads(decrypted_content)
+with open("embeddings.json", mode="r") as embedding_file:
     all_embs = json.load(embedding_file)
     for jvvi_item in vhjx_item[1:]:
         content = jvvi_item["原文"]
         docs.append(content)
+        metas.append(
+            {
+                "index": jvvi_item["index"],
+                "text": content,
+                "annotation": jvvi_item.get("注释", ""),
+                "critique": jvvi_item.get("批判", ""),
+                "chapter": chapter,
+            }
+        )
     # 一个章节一次
     # 批量生成 embeddings（每个为 list[float]）
     # emb_result = llm.create_embedding(docs)
     milvus_data = []
     for i, emb in enumerate(embeddings):
         item = metas[i]
+        milvus_data.append(
+            {
+                "id": vhjx_index * 100 + i,
+                "index": item["index"],
+                "vector": emb,
+                "text": item["text"],
+                "annotation": item["annotation"],
+                "critique": item["critique"],
+                "chapter": item["chapter"],
+            }
+        )
     print(f"✅ 共 {len(milvus_data)} 条数据")
     # 插入数据
     client.insert(collection_name="collection_1", data=milvus_data)
     print(f"✅ 插入完成：共 {len(milvus_data)} 条数据")
 def greet(name):
     embeddings = llm.create_embedding(name)
     res = client.search(
         collection_name="collection_1",
+        data=[embeddings["data"][0]["embedding"]],
+        limit=5,
+        output_fields=["index", "text", "annotation", "critique"],
     )
     return res
+demo = gr.Interface(
+    fn=greet,
+    inputs=gr.Textbox(label="输入部分原文句子"),
+    outputs=gr.JSON(label="查询结果"),
+    title="论语批判MCP (Embedding版本)",
+    description="输入模糊的论语原文，可以向量检索到对应的批判内容。",
+)
 demo.launch(mcp_server=True)