Spaces:
Running
Running
theme_to_description
Browse files- app.py +7 -4
- theme_to_descripiton.json +49 -0
app.py
CHANGED
@@ -61,6 +61,9 @@ def load_assets():
|
|
61 |
base_path = Path(__file__).parent
|
62 |
with open(base_path/"label_to_theme.json") as f:
|
63 |
label_to_theme = json.load(f)
|
|
|
|
|
|
|
64 |
|
65 |
class_weights = torch.load(f"{base_path}/class_weights.pth").to(device)
|
66 |
|
@@ -69,7 +72,7 @@ def load_assets():
|
|
69 |
model.eval()
|
70 |
|
71 |
tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)
|
72 |
-
return model, tokenizer, label_to_theme,
|
73 |
|
74 |
|
75 |
def preprocess_text(text, tokenizer, max_length=MAX_LENGTH):
|
@@ -115,7 +118,7 @@ def main():
|
|
115 |
st.title("Paper Classification App")
|
116 |
st.write("Classify research papers using DeBERTa model")
|
117 |
|
118 |
-
model, tokenizer, label_to_theme,
|
119 |
|
120 |
title = st.text_input("Title")
|
121 |
abstract = st.text_area("Abstract")
|
@@ -136,13 +139,13 @@ def main():
|
|
136 |
probabilities = predict(text, model, tokenizer, device)
|
137 |
themes = get_themes(probabilities, label_to_theme)
|
138 |
|
139 |
-
st.success("
|
140 |
# for theme, prob in themes:
|
141 |
# st.write(f"- {theme}: {prob:.2%}")
|
142 |
|
143 |
for theme, prob in themes:
|
144 |
with st.expander(f"{theme} ({prob:.1%})"):
|
145 |
-
st.markdown(f"**Description**: ")
|
146 |
|
147 |
|
148 |
if __name__ == "__main__":
|
|
|
61 |
base_path = Path(__file__).parent
|
62 |
with open(base_path/"label_to_theme.json") as f:
|
63 |
label_to_theme = json.load(f)
|
64 |
+
|
65 |
+
with open(base_path/"theme_to_descripiton.json") as f:
|
66 |
+
theme_to_description = json.load(f)
|
67 |
|
68 |
class_weights = torch.load(f"{base_path}/class_weights.pth").to(device)
|
69 |
|
|
|
72 |
model.eval()
|
73 |
|
74 |
tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)
|
75 |
+
return model, tokenizer, device, label_to_theme, theme_to_description
|
76 |
|
77 |
|
78 |
def preprocess_text(text, tokenizer, max_length=MAX_LENGTH):
|
|
|
118 |
st.title("Paper Classification App")
|
119 |
st.write("Classify research papers using DeBERTa model")
|
120 |
|
121 |
+
model, tokenizer, device, label_to_theme, theme_to_description = load_assets()
|
122 |
|
123 |
title = st.text_input("Title")
|
124 |
abstract = st.text_area("Abstract")
|
|
|
139 |
probabilities = predict(text, model, tokenizer, device)
|
140 |
themes = get_themes(probabilities, label_to_theme)
|
141 |
|
142 |
+
st.success("Predicted themes (click to expand):")
|
143 |
# for theme, prob in themes:
|
144 |
# st.write(f"- {theme}: {prob:.2%}")
|
145 |
|
146 |
for theme, prob in themes:
|
147 |
with st.expander(f"{theme} ({prob:.1%})"):
|
148 |
+
st.markdown(f"**Description**: {theme_to_description[theme]}")
|
149 |
|
150 |
|
151 |
if __name__ == "__main__":
|
theme_to_descripiton.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cs.AI": "Artificial Intelligence: Focuses on creating intelligent systems capable of reasoning, learning, and problem-solving through techniques like machine learning and knowledge representation.",
|
3 |
+
"physics.soc-ph": "Physics and Society: Applies physics methodologies to social systems, analyzing phenomena like opinion dynamics, urban growth, and network behavior using statistical physics approaches.",
|
4 |
+
"stat.ML": "Machine Learning (Statistics): Develops statistical frameworks and algorithms for pattern recognition, predictive modeling, and data-driven decision making.",
|
5 |
+
"cs.CE": "Computational Engineering: Uses numerical methods and algorithms to solve complex problems in engineering design, financial modeling, and scientific simulations.",
|
6 |
+
"cs.DB": "Database Systems: Studies the design, implementation, and optimization of systems for efficient data storage, retrieval, and management.",
|
7 |
+
"cs.CL": "Natural Language Processing: Focuses on computational understanding and generation of human language, including machine translation and text analysis.",
|
8 |
+
"cs.NA": "Numerical Analysis: Develops algorithms for solving mathematical problems numerically, with emphasis on stability, accuracy, and computational efficiency.",
|
9 |
+
"cs.CY": "Computers and Society: Examines ethical implications, digital privacy, and societal impacts of computing technologies.",
|
10 |
+
"cs.GT": "Game Theory: Analyzes strategic decision-making in competitive situations through mathematical models of conflict and cooperation.",
|
11 |
+
"cs.SI": "Social Networks: Investigates structure and dynamics of online communities, information diffusion, and network-based prediction models.",
|
12 |
+
"stat.AP": "Applied Statistics: Implements statistical methods to solve practical problems in fields like medicine, economics, and environmental science.",
|
13 |
+
"cs.DL": "Deep Learning: Specializes in neural networks with multiple processing layers for complex pattern recognition in data-rich environments.",
|
14 |
+
"math.ST": "Statistical Theory: Develops fundamental principles of statistical inference, probability theory, and hypothesis testing.",
|
15 |
+
"nlin.AO": "Complex Systems: Studies emergent behavior in nonlinear systems, including biological networks and self-organizing phenomena.",
|
16 |
+
"cs.LO": "Formal Logic: Applies mathematical logic to computer science problems including program verification and automated theorem proving.",
|
17 |
+
"cs.MM": "Multimedia Computing: Handles processing and analysis of images, video, audio, and other rich media formats.",
|
18 |
+
"cond-mat.dis-nn": "Disordered Systems: Investigates materials with irregular structures and their computational analogs in neural network models.",
|
19 |
+
"cs.DM": "Discrete Mathematics: Focuses on mathematical structures like graphs and combinatorics with computer science applications.",
|
20 |
+
"cs.CC": "Computational Complexity: Classifies problems by their inherent difficulty and resource requirements for computation.",
|
21 |
+
"stat.CO": "Statistical Computing: Develops numerical algorithms and computational methods for statistical analysis.",
|
22 |
+
"cs.DC": "Distributed Computing: Coordinates computation across multiple networked devices for scalable problem solving.",
|
23 |
+
"cs.IT": "Information Theory: Quantifies information transmission and processing, including error-correcting codes and data compression.",
|
24 |
+
"cs.DS": "Data Structures: Designs efficient methods for organizing and manipulating data collections in computer systems.",
|
25 |
+
"cs.SY": "Control Systems: Develops theory and applications for automated control of dynamic systems in engineering and robotics.",
|
26 |
+
"q-bio.QM": "Quantitative Biology: Applies mathematical modeling and computational methods to biological systems analysis.",
|
27 |
+
"cs.PL": "Programming Languages: Creates and analyzes language constructs for efficient and secure software development.",
|
28 |
+
"cs.RO": "Robotics: Integrates sensing, actuation, and control algorithms for autonomous machine operation.",
|
29 |
+
"cs.NE": "Neurocomputing: Develops brain-inspired algorithms including neural networks and evolutionary optimization methods.",
|
30 |
+
"cs.CR": "Cybersecurity: Protects information systems through cryptography, secure protocols, and vulnerability analysis.",
|
31 |
+
"cs.MA": "Multi-Agent Systems: Coordinates intelligent agents in distributed environments for collaborative problem solving.",
|
32 |
+
"q-bio.NC": "Neural Cognition: Studies brain mechanisms underlying perception, learning, and decision-making processes.",
|
33 |
+
"cs.LG": "Machine Learning: Creates algorithms that automatically improve through experience using statistical techniques.",
|
34 |
+
"cs.GR": "Computer Graphics: Generates and manipulates visual content through rendering techniques and geometric modeling.",
|
35 |
+
"physics.data-an": "Physics Data Analysis: Develops specialized methods for processing experimental data in physical sciences.",
|
36 |
+
"quant-ph": "Quantum Physics: Explores quantum computing, quantum information theory, and quantum system implementations.",
|
37 |
+
"cs.IR": "Information Retrieval: Designs systems for effective search and organization of large-scale document collections.",
|
38 |
+
"math.NA": "Numerical Mathematics: Analyzes mathematical foundations of numerical algorithms for differential equations and linear algebra.",
|
39 |
+
"math.PR": "Probability Theory: Studies random phenomena, stochastic processes, and uncertainty quantification.",
|
40 |
+
"stat.ME": "Statistical Methods: Develops new techniques for experimental design, data collection, and analysis methodologies.",
|
41 |
+
"cs.SE": "Software Engineering: Addresses systematic approaches to software development, maintenance, and quality assurance.",
|
42 |
+
"math.OC": "Optimization Theory: Formulates and solves mathematical optimization problems with applications in logistics and operations research.",
|
43 |
+
"math.IT": "Mathematical Information Theory: Provides rigorous foundations for data transmission, compression, and coding theory.",
|
44 |
+
"cs.HC": "Human-Computer Interaction: Designs intuitive interfaces and studies user behavior with computing systems.",
|
45 |
+
"stat.TH": "Theoretical Statistics: Establishes mathematical foundations for statistical inference and decision-making processes.",
|
46 |
+
"cs.NI": "Network Systems: Architectures communication protocols and internet infrastructure for reliable data transmission.",
|
47 |
+
"cs.CV": "Computer Vision: Enables machines to interpret and understand visual information from images and video.",
|
48 |
+
"cs.SD": "Sound Processing: Analyzes and synthesizes audio signals for applications in speech recognition and music information retrieval."
|
49 |
+
}
|