Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,58 +19,25 @@ tokenizer = None
|
|
19 |
def fetch_arxiv_paper(arxiv_input):
|
20 |
"""Fetch paper details from arXiv URL or ID using requests."""
|
21 |
try:
|
22 |
-
# Extract arXiv ID from URL or use directly
|
23 |
if 'arxiv.org' in arxiv_input:
|
24 |
parsed = urlparse(arxiv_input)
|
25 |
-
|
26 |
-
arxiv_id = path.split('/')[-1].replace('.pdf', '')
|
27 |
else:
|
28 |
arxiv_id = arxiv_input.strip()
|
29 |
-
|
30 |
-
# Fetch metadata using arXiv API
|
31 |
api_url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'
|
32 |
response = requests.get(api_url)
|
33 |
-
|
34 |
if response.status_code != 200:
|
35 |
-
return {
|
36 |
-
"title": "",
|
37 |
-
"abstract": "",
|
38 |
-
"success": False,
|
39 |
-
"message": "Error fetching paper from arXiv API"
|
40 |
-
}
|
41 |
-
|
42 |
-
# Parse the response XML
|
43 |
root = ET.fromstring(response.text)
|
44 |
-
|
45 |
-
# ArXiv API uses namespaces
|
46 |
ns = {'arxiv': 'http://www.w3.org/2005/Atom'}
|
47 |
-
|
48 |
-
# Extract title and abstract
|
49 |
entry = root.find('.//arxiv:entry', ns)
|
50 |
if entry is None:
|
51 |
-
return {
|
52 |
-
"title": "",
|
53 |
-
"abstract": "",
|
54 |
-
"success": False,
|
55 |
-
"message": "Paper not found"
|
56 |
-
}
|
57 |
-
|
58 |
title = entry.find('arxiv:title', ns).text.strip()
|
59 |
abstract = entry.find('arxiv:summary', ns).text.strip()
|
60 |
-
|
61 |
-
return {
|
62 |
-
"title": title,
|
63 |
-
"abstract": abstract,
|
64 |
-
"success": True,
|
65 |
-
"message": "Paper fetched successfully!"
|
66 |
-
}
|
67 |
except Exception as e:
|
68 |
-
return {
|
69 |
-
"title": "",
|
70 |
-
"abstract": "",
|
71 |
-
"success": False,
|
72 |
-
"message": f"Error fetching paper: {str(e)}"
|
73 |
-
}
|
74 |
|
75 |
@spaces.GPU(duration=60, enable_queue=True)
|
76 |
def predict(title, abstract):
|
@@ -78,50 +45,48 @@ def predict(title, abstract):
|
|
78 |
abstract = abstract.replace("\n", " ").strip().replace("''", "'")
|
79 |
global model, tokenizer
|
80 |
if model is None:
|
|
|
81 |
try:
|
82 |
-
# Always load in full float32 precision
|
83 |
model = AutoModelForSequenceClassification.from_pretrained(
|
84 |
model_path,
|
85 |
num_labels=1,
|
86 |
device_map=None,
|
87 |
-
torch_dtype=torch.float32
|
|
|
|
|
|
|
88 |
)
|
89 |
-
# 명시적으로 device에 올리기
|
90 |
-
model.to(device)
|
91 |
except Exception as e:
|
92 |
-
print(f"
|
93 |
-
# Fallback: basic 로딩, 역시 float32
|
94 |
model = AutoModelForSequenceClassification.from_pretrained(
|
95 |
model_path,
|
96 |
num_labels=1,
|
97 |
torch_dtype=torch.float32
|
98 |
)
|
|
|
|
|
99 |
model.to(device)
|
|
|
|
|
100 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
101 |
model.eval()
|
102 |
-
|
103 |
text = (
|
104 |
f"Given a certain paper, Title: {title}\n"
|
105 |
f"Abstract: {abstract}.\n"
|
106 |
"Predict its normalized academic impact (between 0 and 1):"
|
107 |
)
|
108 |
-
|
109 |
try:
|
110 |
inputs = tokenizer(text, return_tensors="pt")
|
111 |
-
# inputs를 device로 이동
|
112 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
113 |
-
|
114 |
with torch.no_grad():
|
115 |
outputs = model(**inputs)
|
116 |
-
|
117 |
-
|
118 |
-
# 소폭 올림 보정
|
119 |
-
score = min(1.0, probability + 0.05)
|
120 |
return round(score, 4)
|
121 |
-
|
122 |
except Exception as e:
|
123 |
-
print(f"Prediction error: {
|
124 |
-
return 0.0
|
125 |
|
126 |
def get_grade_and_emoji(score):
|
127 |
if score >= 0.900: return "AAA 🌟"
|
@@ -158,46 +123,97 @@ example_papers = [
|
|
158 |
def validate_input(title, abstract):
|
159 |
title = title.replace("\n", " ").strip().replace("''", "'")
|
160 |
abstract = abstract.replace("\n", " ").strip().replace("''", "'")
|
161 |
-
|
162 |
-
|
163 |
-
non_latin_in_title = non_latin_pattern.findall(title)
|
164 |
-
non_latin_in_abstract = non_latin_pattern.findall(abstract)
|
165 |
-
|
166 |
-
if len(title.strip().split(' ')) < 3:
|
167 |
return False, "The title must be at least 3 words long."
|
168 |
-
if len(abstract.
|
169 |
return False, "The abstract must be at least 50 words long."
|
170 |
-
if
|
171 |
-
return
|
172 |
-
if
|
173 |
-
return False,
|
174 |
-
if non_latin_in_abstract:
|
175 |
-
return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."
|
176 |
-
|
177 |
return True, "Inputs are valid!"
|
178 |
|
179 |
def update_button_status(title, abstract):
|
180 |
-
valid,
|
181 |
if not valid:
|
182 |
-
return gr.update(value="Error: " +
|
183 |
-
return gr.update(value=
|
184 |
|
185 |
def process_arxiv_input(arxiv_input):
|
186 |
-
"""Process arXiv input and update title/abstract fields."""
|
187 |
if not arxiv_input.strip():
|
188 |
return "", "", "Please enter an arXiv URL or ID"
|
189 |
-
|
190 |
result = fetch_arxiv_paper(arxiv_input)
|
191 |
if result["success"]:
|
192 |
return result["title"], result["abstract"], result["message"]
|
193 |
-
|
194 |
-
return "", "", result["message"]
|
195 |
|
196 |
css = """
|
197 |
.gradio-container {
|
198 |
font-family: 'Arial', sans-serif;
|
199 |
}
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
"""
|
202 |
|
203 |
with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
|
@@ -291,21 +307,9 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
|
|
291 |
"""
|
292 |
)
|
293 |
|
294 |
-
title_input.change(
|
295 |
-
|
296 |
-
|
297 |
-
outputs=[validation_status, submit_button]
|
298 |
-
)
|
299 |
-
abstract_input.change(
|
300 |
-
update_button_status,
|
301 |
-
inputs=[title_input, abstract_input],
|
302 |
-
outputs=[validation_status, submit_button]
|
303 |
-
)
|
304 |
-
fetch_button.click(
|
305 |
-
process_arxiv_input,
|
306 |
-
inputs=[arxiv_input],
|
307 |
-
outputs=[title_input, abstract_input, validation_status]
|
308 |
-
)
|
309 |
|
310 |
def process_prediction(title, abstract):
|
311 |
score = predict(title, abstract)
|
|
|
19 |
def fetch_arxiv_paper(arxiv_input):
|
20 |
"""Fetch paper details from arXiv URL or ID using requests."""
|
21 |
try:
|
|
|
22 |
if 'arxiv.org' in arxiv_input:
|
23 |
parsed = urlparse(arxiv_input)
|
24 |
+
arxiv_id = parsed.path.split('/')[-1].replace('.pdf', '')
|
|
|
25 |
else:
|
26 |
arxiv_id = arxiv_input.strip()
|
|
|
|
|
27 |
api_url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'
|
28 |
response = requests.get(api_url)
|
|
|
29 |
if response.status_code != 200:
|
30 |
+
return {"title": "", "abstract": "", "success": False, "message": "Error fetching paper from arXiv API"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
root = ET.fromstring(response.text)
|
|
|
|
|
32 |
ns = {'arxiv': 'http://www.w3.org/2005/Atom'}
|
|
|
|
|
33 |
entry = root.find('.//arxiv:entry', ns)
|
34 |
if entry is None:
|
35 |
+
return {"title": "", "abstract": "", "success": False, "message": "Paper not found"}
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
title = entry.find('arxiv:title', ns).text.strip()
|
37 |
abstract = entry.find('arxiv:summary', ns).text.strip()
|
38 |
+
return {"title": title, "abstract": abstract, "success": True, "message": "Paper fetched successfully!"}
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
except Exception as e:
|
40 |
+
return {"title": "", "abstract": "", "success": False, "message": f"Error fetching paper: {e}"}
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
@spaces.GPU(duration=60, enable_queue=True)
|
43 |
def predict(title, abstract):
|
|
|
45 |
abstract = abstract.replace("\n", " ").strip().replace("''", "'")
|
46 |
global model, tokenizer
|
47 |
if model is None:
|
48 |
+
# 1) 전부 float32 로드
|
49 |
try:
|
|
|
50 |
model = AutoModelForSequenceClassification.from_pretrained(
|
51 |
model_path,
|
52 |
num_labels=1,
|
53 |
device_map=None,
|
54 |
+
torch_dtype=torch.float32,
|
55 |
+
load_in_8bit=False,
|
56 |
+
load_in_4bit=False,
|
57 |
+
low_cpu_mem_usage=False
|
58 |
)
|
|
|
|
|
59 |
except Exception as e:
|
60 |
+
print(f"첫 로딩 실패, 재시도: {e}")
|
|
|
61 |
model = AutoModelForSequenceClassification.from_pretrained(
|
62 |
model_path,
|
63 |
num_labels=1,
|
64 |
torch_dtype=torch.float32
|
65 |
)
|
66 |
+
# 2) device에 올려보기 (unsupported error 무시)
|
67 |
+
try:
|
68 |
model.to(device)
|
69 |
+
except ValueError as e:
|
70 |
+
print(f"model.to() 무시: {e}")
|
71 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
72 |
model.eval()
|
73 |
+
|
74 |
text = (
|
75 |
f"Given a certain paper, Title: {title}\n"
|
76 |
f"Abstract: {abstract}.\n"
|
77 |
"Predict its normalized academic impact (between 0 and 1):"
|
78 |
)
|
|
|
79 |
try:
|
80 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
81 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
|
|
82 |
with torch.no_grad():
|
83 |
outputs = model(**inputs)
|
84 |
+
prob = torch.sigmoid(outputs.logits).item()
|
85 |
+
score = min(1.0, prob + 0.05)
|
|
|
|
|
86 |
return round(score, 4)
|
|
|
87 |
except Exception as e:
|
88 |
+
print(f"Prediction error: {e}")
|
89 |
+
return 0.0
|
90 |
|
91 |
def get_grade_and_emoji(score):
|
92 |
if score >= 0.900: return "AAA 🌟"
|
|
|
123 |
def validate_input(title, abstract):
|
124 |
title = title.replace("\n", " ").strip().replace("''", "'")
|
125 |
abstract = abstract.replace("\n", " ").strip().replace("''", "'")
|
126 |
+
non_latin = re.compile(r'[^\u0000-\u007F]')
|
127 |
+
if len(title.split()) < 3:
|
|
|
|
|
|
|
|
|
128 |
return False, "The title must be at least 3 words long."
|
129 |
+
if len(abstract.split()) < 50:
|
130 |
return False, "The abstract must be at least 50 words long."
|
131 |
+
if non_latin.search(title):
|
132 |
+
return False, "Title에 영어 외 문자가 포함되어 있습니다."
|
133 |
+
if non_latin.search(abstract):
|
134 |
+
return False, "Abstract에 영어 외 문자가 포함되어 있습니다."
|
|
|
|
|
|
|
135 |
return True, "Inputs are valid!"
|
136 |
|
137 |
def update_button_status(title, abstract):
|
138 |
+
valid, msg = validate_input(title, abstract)
|
139 |
if not valid:
|
140 |
+
return gr.update(value="Error: " + msg), gr.update(interactive=False)
|
141 |
+
return gr.update(value=msg), gr.update(interactive=True)
|
142 |
|
143 |
def process_arxiv_input(arxiv_input):
|
|
|
144 |
if not arxiv_input.strip():
|
145 |
return "", "", "Please enter an arXiv URL or ID"
|
|
|
146 |
result = fetch_arxiv_paper(arxiv_input)
|
147 |
if result["success"]:
|
148 |
return result["title"], result["abstract"], result["message"]
|
149 |
+
return "", "", result["message"]
|
|
|
150 |
|
151 |
css = """
|
152 |
.gradio-container {
|
153 |
font-family: 'Arial', sans-serif;
|
154 |
}
|
155 |
+
.main-title {
|
156 |
+
text-align: center;
|
157 |
+
color: #2563eb;
|
158 |
+
font-size: 2.5rem !important;
|
159 |
+
margin-bottom: 1rem !important;
|
160 |
+
background: linear-gradient(45deg, #2563eb, #1d4ed8);
|
161 |
+
-webkit-background-clip: text;
|
162 |
+
-webkit-text-fill-color: transparent;
|
163 |
+
}
|
164 |
+
.sub-title {
|
165 |
+
text-align: center;
|
166 |
+
color: #4b5563;
|
167 |
+
font-size: 1.5rem !important;
|
168 |
+
margin-bottom: 2rem !important;
|
169 |
+
}
|
170 |
+
.input-section {
|
171 |
+
background: white;
|
172 |
+
padding: 2rem;
|
173 |
+
border-radius: 1rem;
|
174 |
+
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
|
175 |
+
}
|
176 |
+
.result-section {
|
177 |
+
background: #f8fafc;
|
178 |
+
padding: 2rem;
|
179 |
+
border-radius: 1rem;
|
180 |
+
margin-top: 2rem;
|
181 |
+
}
|
182 |
+
.methodology-section {
|
183 |
+
background: #ecfdf5;
|
184 |
+
padding: 2rem;
|
185 |
+
border-radius: 1rem;
|
186 |
+
margin-top: 2rem;
|
187 |
+
}
|
188 |
+
.example-section {
|
189 |
+
background: #fff7ed;
|
190 |
+
padding: 2rem;
|
191 |
+
border-radius: 1rem;
|
192 |
+
margin-top: 2rem;
|
193 |
+
}
|
194 |
+
.grade-display {
|
195 |
+
font-size: 3rem;
|
196 |
+
text-align: center;
|
197 |
+
margin: 1rem 0;
|
198 |
+
}
|
199 |
+
.arxiv-input {
|
200 |
+
margin-bottom: 1.5rem;
|
201 |
+
padding: 1rem;
|
202 |
+
background: #f3f4f6;
|
203 |
+
border-radius: 0.5rem;
|
204 |
+
}
|
205 |
+
.arxiv-link {
|
206 |
+
color: #2563eb;
|
207 |
+
text-decoration: underline;
|
208 |
+
font-size: 0.9em;
|
209 |
+
margin-top: 0.5em;
|
210 |
+
}
|
211 |
+
.arxiv-note {
|
212 |
+
color: #666;
|
213 |
+
font-size: 0.9em;
|
214 |
+
margin-top: 0.5em;
|
215 |
+
margin-bottom: 0.5em;
|
216 |
+
}
|
217 |
"""
|
218 |
|
219 |
with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
|
|
|
307 |
"""
|
308 |
)
|
309 |
|
310 |
+
title_input.change(update_button_status, [title_input, abstract_input], [validation_status, submit_button])
|
311 |
+
abstract_input.change(update_button_status, [title_input, abstract_input], [validation_status, submit_button])
|
312 |
+
fetch_button.click(process_arxiv_input, [arxiv_input], [title_input, abstract_input, validation_status])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
|
314 |
def process_prediction(title, abstract):
|
315 |
score = predict(title, abstract)
|