georad commited on
Commit
a9a9d00
·
verified ·
1 Parent(s): 49287f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -181
app.py CHANGED
@@ -36,184 +36,4 @@ about_page = st.Page(
36
  #pg = st.navigation(pages=[home_page, type_text_page, upload_file_page, about_page]) # WITHOUT SECTIONS
37
  pg = st.navigation({"Home": [home_page], "Demo": [type_text_page, upload_file_page], "About": [about_page]}) # WITH SECTIONS
38
 
39
- pg.run()
40
-
41
- #import pandas as pd
42
- #from io import StringIO
43
- #import json
44
- #import torch
45
- #from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM #AutoModelForTokenClassification
46
- #from sentence_transformers import SentenceTransformer, util
47
- #import lmdeploy
48
- #import turbomind as tm
49
-
50
- #from backend.utils import get_current_ram_usage, ga
51
- #import backend.aragpt
52
- #import backend.home
53
- #import backend.processor
54
- #import backend.sa
55
- #import backend.qa
56
-
57
- #st.set_page_config(
58
- # page_title="TEST", page_icon="📖", initial_sidebar_state="expanded", layout="wide"
59
- #)
60
-
61
- #ga(st.__file__)
62
-
63
- #PAGES = {
64
- # "Home": backend.home,
65
- # "Demo": Demo,
66
- # "About": backend.home
67
- #}
68
-
69
- #st.sidebar.title("SBSmapper")
70
- #selection = st.sidebar.radio("Pages", list(PAGES.keys()))
71
-
72
- #page = PAGES[selection]
73
- # with st.spinner(f"Loading {selection} ..."):
74
- #ast.shared.components.write_page(page)
75
-
76
- #st.sidebar.header("Info")
77
- #st.sidebar.write("Project by JA RAD")
78
- #st.sidebar.write(
79
- # "Pre-trained models are available on [HF Hub](https://huggingface.co/)"
80
- #)
81
- #st.sidebar.write(
82
- # "Models source code available on [GitHub](https://github.com/)"
83
- #)
84
- #st.sidebar.write(
85
- # "App source code available on [GitHub](https://github.com/)"
86
- #)
87
- #if st.sidebar.checkbox("Show RAM usage"):
88
- # ram = get_current_ram_usage()
89
- # st.sidebar.write("Ram usage: {:.2f}/{:.2f} GB".format(ram[0], ram[1]))
90
-
91
- """
92
- import os
93
- os.getenv("HF_TOKEN")
94
-
95
- def on_click():
96
- st.session_state.user_input = ""
97
-
98
- #@st.cache
99
- def convert_df(df:pd.DataFrame):
100
- return df.to_csv(index=False).encode('utf-8')
101
-
102
- #@st.cache
103
- def convert_json(df:pd.DataFrame):
104
- result = df.to_json(orient="index")
105
- parsed = json.loads(result)
106
- json_string = json.dumps(parsed)
107
- #st.json(json_string, expanded=True)
108
- return json_string
109
-
110
- #st.title("📘SBS mapper")
111
-
112
- INTdesc_input = st.text_input("Type internal description and hit Enter", key="user_input")
113
-
114
- createSBScodes, right_column = st.columns(2)
115
- createSBScodes_clicked = createSBScodes.button("Map to SBS codes", key="user_createSBScodes")
116
- right_column.button("Reset", on_click=on_click)
117
-
118
- numMAPPINGS_input = 5
119
- #numMAPPINGS_input = st.text_input("Type number of mappings and hit Enter", key="user_input_numMAPPINGS")
120
- #st.button("Clear text", on_click=on_click)
121
-
122
-
123
- model = SentenceTransformer('all-MiniLM-L6-v2') # fastest
124
- #model = SentenceTransformer('all-mpnet-base-v2') # best performance
125
- #model = SentenceTransformers('all-distilroberta-v1')
126
- #model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
127
- #model = SentenceTransformer('clips/mfaq')
128
-
129
- INTdesc_embedding = model.encode(INTdesc_input)
130
-
131
- # Semantic search, Compute cosine similarity between all pairs of SBS descriptions
132
-
133
- #df_SBS = pd.read_csv("SBS_V2_Table.csv", index_col="SBS_Code", usecols=["Long_Description"]) # na_values=['NA']
134
- #df_SBS = pd.read_csv("SBS_V2_Table.csv", usecols=["SBS_Code_Hyphenated","Long_Description"])
135
- from_line = 7727 # Imaging services chapter start, adjust as needed
136
- to_line = 8239 # Imaging services chapter end, adjust as needed
137
- nrows = to_line - from_line + 1
138
- skiprows = list(range(1,from_line - 1))
139
- df_SBS = pd.read_csv("SBS_V2_Table.csv", header=0, skip_blank_lines=False, skiprows=skiprows, nrows=nrows)
140
- #st.write(df_SBS.head(5))
141
-
142
- SBScorpus = df_SBS['Long_Description'].values.tolist()
143
- SBScorpus_embeddings = model.encode(SBScorpus)
144
-
145
- #my_model_results = pipeline("ner", model= "checkpoint-92")
146
- HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
147
- HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
148
- HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
149
-
150
- model_id = "meta-llama/Llama-3.2-1B-Instruct"
151
- pipe = pipeline("text-generation", model=model_id, device_map="auto",) # torch_dtype=torch.bfloat16
152
-
153
-
154
- col1, col2, col3 = st.columns([1,1,2.5])
155
- col1.subheader("Score")
156
- col2.subheader("SBS code")
157
- col3.subheader("SBS description V2.0")
158
-
159
- dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
160
-
161
- if INTdesc_input is not None and createSBScodes_clicked == True:
162
- #for i, result in enumerate(HF_model_results_displayed):
163
- for result in HF_model_results_displayed:
164
- with st.container():
165
- col1.write("%.4f" % result[0]["score"])
166
- col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
167
- col3.write(SBScorpus[result[0]["corpus_id"]])
168
- dictA["Score"].append("%.4f" % result[0]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[0]["corpus_id"]])
169
-
170
- col1.write("%.4f" % result[1]["score"])
171
- col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
172
- col3.write(SBScorpus[result[1]["corpus_id"]])
173
- dictA["Score"].append("%.4f" % result[1]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[1]["corpus_id"]])
174
-
175
- col1.write("%.4f" % result[2]["score"])
176
- col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
177
- col3.write(SBScorpus[result[2]["corpus_id"]])
178
- dictA["Score"].append("%.4f" % result[2]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[2]["corpus_id"]])
179
-
180
- col1.write("%.4f" % result[3]["score"])
181
- col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
182
- col3.write(SBScorpus[result[3]["corpus_id"]])
183
- dictA["Score"].append("%.4f" % result[3]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[3]["corpus_id"]])
184
-
185
- col1.write("%.4f" % result[4]["score"])
186
- col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
187
- col3.write(SBScorpus[result[4]["corpus_id"]])
188
- dictA["Score"].append("%.4f" % result[4]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[4]["corpus_id"]])
189
-
190
- dfA = pd.DataFrame.from_dict(dictA)
191
-
192
- display_format = "ask REASONING MODEL: Which, if any, of the above Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
193
- st.write(display_format)
194
- question = "Which, if any, of the below Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
195
- shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]
196
- prompt = [question + " " + shortlist[0] + " " + shortlist[1] + " " + shortlist[2] + " " + shortlist[3] + " " + shortlist[4]]
197
- #st.write(prompt)
198
-
199
- messages = [
200
- {"role": "system", "content": "You are a knowledgable AI assistant who always answers truthfully and precisely!"},
201
- {"role": "user", "content": prompt},
202
- ]
203
- outputs = pipe(
204
- messages,
205
- max_new_tokens=256,
206
- )
207
- st.write(outputs[0]["generated_text"][-1]["content"])
208
-
209
- bs, b1, b2, b3, bLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
210
- with b1:
211
- #csvbutton = download_button(results, "results.csv", "📥 Download .csv")
212
- csvbutton = st.download_button(label="📥 Download .csv", data=convert_df(dfA), file_name= "results.csv", mime='text/csv', key='csv_b')
213
- with b2:
214
- #textbutton = download_button(results, "results.txt", "📥 Download .txt")
215
- textbutton = st.download_button(label="📥 Download .txt", data=convert_df(dfA), file_name= "results.text", mime='text/plain', key='text_b')
216
- with b3:
217
- #jsonbutton = download_button(results, "results.json", "📥 Download .json")
218
- jsonbutton = st.download_button(label="📥 Download .json", data=convert_json(dfA), file_name= "results.json", mime='application/json', key='json_b')
219
- """
 
36
  #pg = st.navigation(pages=[home_page, type_text_page, upload_file_page, about_page]) # WITHOUT SECTIONS
37
  pg = st.navigation({"Home": [home_page], "Demo": [type_text_page, upload_file_page], "About": [about_page]}) # WITH SECTIONS
38
 
39
+ pg.run()