bvd757 commited on
Commit
6222f2c
·
1 Parent(s): c8c5d3d

theme_to_description

Browse files
Files changed (2) hide show
  1. app.py +7 -4
  2. theme_to_descripiton.json +49 -0
app.py CHANGED
@@ -61,6 +61,9 @@ def load_assets():
61
  base_path = Path(__file__).parent
62
  with open(base_path/"label_to_theme.json") as f:
63
  label_to_theme = json.load(f)
 
 
 
64
 
65
  class_weights = torch.load(f"{base_path}/class_weights.pth").to(device)
66
 
@@ -69,7 +72,7 @@ def load_assets():
69
  model.eval()
70
 
71
  tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)
72
- return model, tokenizer, label_to_theme, device
73
 
74
 
75
  def preprocess_text(text, tokenizer, max_length=MAX_LENGTH):
@@ -115,7 +118,7 @@ def main():
115
  st.title("Paper Classification App")
116
  st.write("Classify research papers using DeBERTa model")
117
 
118
- model, tokenizer, label_to_theme, device = load_assets()
119
 
120
  title = st.text_input("Title")
121
  abstract = st.text_area("Abstract")
@@ -136,13 +139,13 @@ def main():
136
  probabilities = predict(text, model, tokenizer, device)
137
  themes = get_themes(probabilities, label_to_theme)
138
 
139
- st.success("Classification results:")
140
  # for theme, prob in themes:
141
  # st.write(f"- {theme}: {prob:.2%}")
142
 
143
  for theme, prob in themes:
144
  with st.expander(f"{theme} ({prob:.1%})"):
145
- st.markdown(f"**Description**: ")
146
 
147
 
148
  if __name__ == "__main__":
 
61
  base_path = Path(__file__).parent
62
  with open(base_path/"label_to_theme.json") as f:
63
  label_to_theme = json.load(f)
64
+
65
+ with open(base_path/"theme_to_descripiton.json") as f:
66
+ theme_to_description = json.load(f)
67
 
68
  class_weights = torch.load(f"{base_path}/class_weights.pth").to(device)
69
 
 
72
  model.eval()
73
 
74
  tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)
75
+ return model, tokenizer, device, label_to_theme, theme_to_description
76
 
77
 
78
  def preprocess_text(text, tokenizer, max_length=MAX_LENGTH):
 
118
  st.title("Paper Classification App")
119
  st.write("Classify research papers using DeBERTa model")
120
 
121
+ model, tokenizer, device, label_to_theme, theme_to_description = load_assets()
122
 
123
  title = st.text_input("Title")
124
  abstract = st.text_area("Abstract")
 
139
  probabilities = predict(text, model, tokenizer, device)
140
  themes = get_themes(probabilities, label_to_theme)
141
 
142
+ st.success("Predicted themes (click to expand):")
143
  # for theme, prob in themes:
144
  # st.write(f"- {theme}: {prob:.2%}")
145
 
146
  for theme, prob in themes:
147
  with st.expander(f"{theme} ({prob:.1%})"):
148
+ st.markdown(f"**Description**: {theme_to_description[theme]}")
149
 
150
 
151
  if __name__ == "__main__":
theme_to_descripiton.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cs.AI": "Artificial Intelligence: Focuses on creating intelligent systems capable of reasoning, learning, and problem-solving through techniques like machine learning and knowledge representation.",
3
+ "physics.soc-ph": "Physics and Society: Applies physics methodologies to social systems, analyzing phenomena like opinion dynamics, urban growth, and network behavior using statistical physics approaches.",
4
+ "stat.ML": "Machine Learning (Statistics): Develops statistical frameworks and algorithms for pattern recognition, predictive modeling, and data-driven decision making.",
5
+ "cs.CE": "Computational Engineering: Uses numerical methods and algorithms to solve complex problems in engineering design, financial modeling, and scientific simulations.",
6
+ "cs.DB": "Database Systems: Studies the design, implementation, and optimization of systems for efficient data storage, retrieval, and management.",
7
+ "cs.CL": "Natural Language Processing: Focuses on computational understanding and generation of human language, including machine translation and text analysis.",
8
+ "cs.NA": "Numerical Analysis: Develops algorithms for solving mathematical problems numerically, with emphasis on stability, accuracy, and computational efficiency.",
9
+ "cs.CY": "Computers and Society: Examines ethical implications, digital privacy, and societal impacts of computing technologies.",
10
+ "cs.GT": "Game Theory: Analyzes strategic decision-making in competitive situations through mathematical models of conflict and cooperation.",
11
+ "cs.SI": "Social Networks: Investigates structure and dynamics of online communities, information diffusion, and network-based prediction models.",
12
+ "stat.AP": "Applied Statistics: Implements statistical methods to solve practical problems in fields like medicine, economics, and environmental science.",
13
+ "cs.DL": "Deep Learning: Specializes in neural networks with multiple processing layers for complex pattern recognition in data-rich environments.",
14
+ "math.ST": "Statistical Theory: Develops fundamental principles of statistical inference, probability theory, and hypothesis testing.",
15
+ "nlin.AO": "Complex Systems: Studies emergent behavior in nonlinear systems, including biological networks and self-organizing phenomena.",
16
+ "cs.LO": "Formal Logic: Applies mathematical logic to computer science problems including program verification and automated theorem proving.",
17
+ "cs.MM": "Multimedia Computing: Handles processing and analysis of images, video, audio, and other rich media formats.",
18
+ "cond-mat.dis-nn": "Disordered Systems: Investigates materials with irregular structures and their computational analogs in neural network models.",
19
+ "cs.DM": "Discrete Mathematics: Focuses on mathematical structures like graphs and combinatorics with computer science applications.",
20
+ "cs.CC": "Computational Complexity: Classifies problems by their inherent difficulty and resource requirements for computation.",
21
+ "stat.CO": "Statistical Computing: Develops numerical algorithms and computational methods for statistical analysis.",
22
+ "cs.DC": "Distributed Computing: Coordinates computation across multiple networked devices for scalable problem solving.",
23
+ "cs.IT": "Information Theory: Quantifies information transmission and processing, including error-correcting codes and data compression.",
24
+ "cs.DS": "Data Structures: Designs efficient methods for organizing and manipulating data collections in computer systems.",
25
+ "cs.SY": "Control Systems: Develops theory and applications for automated control of dynamic systems in engineering and robotics.",
26
+ "q-bio.QM": "Quantitative Biology: Applies mathematical modeling and computational methods to biological systems analysis.",
27
+ "cs.PL": "Programming Languages: Creates and analyzes language constructs for efficient and secure software development.",
28
+ "cs.RO": "Robotics: Integrates sensing, actuation, and control algorithms for autonomous machine operation.",
29
+ "cs.NE": "Neurocomputing: Develops brain-inspired algorithms including neural networks and evolutionary optimization methods.",
30
+ "cs.CR": "Cybersecurity: Protects information systems through cryptography, secure protocols, and vulnerability analysis.",
31
+ "cs.MA": "Multi-Agent Systems: Coordinates intelligent agents in distributed environments for collaborative problem solving.",
32
+ "q-bio.NC": "Neural Cognition: Studies brain mechanisms underlying perception, learning, and decision-making processes.",
33
+ "cs.LG": "Machine Learning: Creates algorithms that automatically improve through experience using statistical techniques.",
34
+ "cs.GR": "Computer Graphics: Generates and manipulates visual content through rendering techniques and geometric modeling.",
35
+ "physics.data-an": "Physics Data Analysis: Develops specialized methods for processing experimental data in physical sciences.",
36
+ "quant-ph": "Quantum Physics: Explores quantum computing, quantum information theory, and quantum system implementations.",
37
+ "cs.IR": "Information Retrieval: Designs systems for effective search and organization of large-scale document collections.",
38
+ "math.NA": "Numerical Mathematics: Analyzes mathematical foundations of numerical algorithms for differential equations and linear algebra.",
39
+ "math.PR": "Probability Theory: Studies random phenomena, stochastic processes, and uncertainty quantification.",
40
+ "stat.ME": "Statistical Methods: Develops new techniques for experimental design, data collection, and analysis methodologies.",
41
+ "cs.SE": "Software Engineering: Addresses systematic approaches to software development, maintenance, and quality assurance.",
42
+ "math.OC": "Optimization Theory: Formulates and solves mathematical optimization problems with applications in logistics and operations research.",
43
+ "math.IT": "Mathematical Information Theory: Provides rigorous foundations for data transmission, compression, and coding theory.",
44
+ "cs.HC": "Human-Computer Interaction: Designs intuitive interfaces and studies user behavior with computing systems.",
45
+ "stat.TH": "Theoretical Statistics: Establishes mathematical foundations for statistical inference and decision-making processes.",
46
+ "cs.NI": "Network Systems: Architectures communication protocols and internet infrastructure for reliable data transmission.",
47
+ "cs.CV": "Computer Vision: Enables machines to interpret and understand visual information from images and video.",
48
+ "cs.SD": "Sound Processing: Analyzes and synthesizes audio signals for applications in speech recognition and music information retrieval."
49
+ }