PapersImpact

Running on Zero

App Files Files Community

openfree commited on Apr 20

Commit

8cbcecb

verified ·

1 Parent(s): 408eb2c

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -92

app.py CHANGED Viewed

@@ -19,58 +19,25 @@ tokenizer = None
 def fetch_arxiv_paper(arxiv_input):
     """Fetch paper details from arXiv URL or ID using requests."""
     try:
-        # Extract arXiv ID from URL or use directly
         if 'arxiv.org' in arxiv_input:
             parsed = urlparse(arxiv_input)
-            path = parsed.path
-            arxiv_id = path.split('/')[-1].replace('.pdf', '')
         else:
             arxiv_id = arxiv_input.strip()
-        # Fetch metadata using arXiv API
         api_url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'
         response = requests.get(api_url)
         if response.status_code != 200:
-            return {
-                "title": "",
-                "abstract": "",
-                "success": False,
-                "message": "Error fetching paper from arXiv API"
-            }
-        # Parse the response XML
         root = ET.fromstring(response.text)
-        # ArXiv API uses namespaces
         ns = {'arxiv': 'http://www.w3.org/2005/Atom'}
-        # Extract title and abstract
         entry = root.find('.//arxiv:entry', ns)
         if entry is None:
-            return {
-                "title": "",
-                "abstract": "",
-                "success": False,
-                "message": "Paper not found"
-            }
         title = entry.find('arxiv:title', ns).text.strip()
         abstract = entry.find('arxiv:summary', ns).text.strip()
-        return {
-            "title": title,
-            "abstract": abstract,
-            "success": True,
-            "message": "Paper fetched successfully!"
-        }
     except Exception as e:
-        return {
-            "title": "",
-            "abstract": "",
-            "success": False,
-            "message": f"Error fetching paper: {str(e)}"
-        }
 @spaces.GPU(duration=60, enable_queue=True)
 def predict(title, abstract):
@@ -78,50 +45,48 @@ def predict(title, abstract):
     abstract = abstract.replace("\n", " ").strip().replace("''", "'")
     global model, tokenizer
     if model is None:
         try:
-            # Always load in full float32 precision
             model = AutoModelForSequenceClassification.from_pretrained(
                 model_path,
                 num_labels=1,
                 device_map=None,
-                torch_dtype=torch.float32
             )
-            # 명시적으로 device에 올리기
-            model.to(device)
         except Exception as e:
-            print(f"Standard loading failed, retrying in float32: {str(e)}")
-            # Fallback: basic 로딩, 역시 float32
             model = AutoModelForSequenceClassification.from_pretrained(
                 model_path,
                 num_labels=1,
                 torch_dtype=torch.float32
             )
             model.to(device)
         tokenizer = AutoTokenizer.from_pretrained(model_path)
         model.eval()
     text = (
         f"Given a certain paper, Title: {title}\n"
         f"Abstract: {abstract}.\n"
         "Predict its normalized academic impact (between 0 and 1):"
     )
     try:
         inputs = tokenizer(text, return_tensors="pt")
-        # inputs를 device로 이동
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
             outputs = model(**inputs)
-        probability = torch.sigmoid(outputs.logits).item()
-        # 소폭 올림 보정
-        score = min(1.0, probability + 0.05)
         return round(score, 4)
     except Exception as e:
-        print(f"Prediction error: {str(e)}")
-        return 0.0  # 오류 시 기본값
 def get_grade_and_emoji(score):
     if score >= 0.900: return "AAA 🌟"
@@ -158,46 +123,97 @@ example_papers = [
 def validate_input(title, abstract):
     title = title.replace("\n", " ").strip().replace("''", "'")
     abstract = abstract.replace("\n", " ").strip().replace("''", "'")
-    non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
-    non_latin_in_title = non_latin_pattern.findall(title)
-    non_latin_in_abstract = non_latin_pattern.findall(abstract)
-    if len(title.strip().split(' ')) < 3:
         return False, "The title must be at least 3 words long."
-    if len(abstract.strip().split(' ')) < 50:
         return False, "The abstract must be at least 50 words long."
-    if len((title + abstract).split(' ')) > 1024:
-        return True, "Warning, the input length is approaching tokenization limits (1024) and may be truncated without further warning!"
-    if non_latin_in_title:
-        return False, f"The title contains invalid characters: {', '.join(non_latin_in_title)}. Only English letters and special symbols are allowed."
-    if non_latin_in_abstract:
-        return False, f"The abstract contains invalid characters: {', '.join(non_latin_in_abstract)}. Only English letters and special symbols are allowed."
     return True, "Inputs are valid!"
 def update_button_status(title, abstract):
-    valid, message = validate_input(title, abstract)
     if not valid:
-        return gr.update(value="Error: " + message), gr.update(interactive=False)
-    return gr.update(value=message), gr.update(interactive=True)
 def process_arxiv_input(arxiv_input):
-    """Process arXiv input and update title/abstract fields."""
     if not arxiv_input.strip():
         return "", "", "Please enter an arXiv URL or ID"
     result = fetch_arxiv_paper(arxiv_input)
     if result["success"]:
         return result["title"], result["abstract"], result["message"]
-    else:
-        return "", "", result["message"]
 css = """
 .gradio-container {
     font-family: 'Arial', sans-serif;
 }
-/* ... 이하 CSS는 동일 ... */
 """
 with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
@@ -291,21 +307,9 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
                 """
             )
-    title_input.change(
-        update_button_status,
-        inputs=[title_input, abstract_input],
-        outputs=[validation_status, submit_button]
-    )
-    abstract_input.change(
-        update_button_status,
-        inputs=[title_input, abstract_input],
-        outputs=[validation_status, submit_button]
-    )
-    fetch_button.click(
-        process_arxiv_input,
-        inputs=[arxiv_input],
-        outputs=[title_input, abstract_input, validation_status]
-    )
     def process_prediction(title, abstract):
         score = predict(title, abstract)

 def fetch_arxiv_paper(arxiv_input):
     """Fetch paper details from arXiv URL or ID using requests."""
     try:
         if 'arxiv.org' in arxiv_input:
             parsed = urlparse(arxiv_input)
+            arxiv_id = parsed.path.split('/')[-1].replace('.pdf', '')
         else:
             arxiv_id = arxiv_input.strip()
         api_url = f'http://export.arxiv.org/api/query?id_list={arxiv_id}'
         response = requests.get(api_url)
         if response.status_code != 200:
+            return {"title": "", "abstract": "", "success": False, "message": "Error fetching paper from arXiv API"}
         root = ET.fromstring(response.text)
         ns = {'arxiv': 'http://www.w3.org/2005/Atom'}
         entry = root.find('.//arxiv:entry', ns)
         if entry is None:
+            return {"title": "", "abstract": "", "success": False, "message": "Paper not found"}
         title = entry.find('arxiv:title', ns).text.strip()
         abstract = entry.find('arxiv:summary', ns).text.strip()
+        return {"title": title, "abstract": abstract, "success": True, "message": "Paper fetched successfully!"}
     except Exception as e:
+        return {"title": "", "abstract": "", "success": False, "message": f"Error fetching paper: {e}"}
 @spaces.GPU(duration=60, enable_queue=True)
 def predict(title, abstract):
     abstract = abstract.replace("\n", " ").strip().replace("''", "'")
     global model, tokenizer
     if model is None:
+        # 1) 전부 float32 로드
         try:
             model = AutoModelForSequenceClassification.from_pretrained(
                 model_path,
                 num_labels=1,
                 device_map=None,
+                torch_dtype=torch.float32,
+                load_in_8bit=False,
+                load_in_4bit=False,
+                low_cpu_mem_usage=False
             )
         except Exception as e:
+            print(f"첫 로딩 실패, 재시도: {e}")
             model = AutoModelForSequenceClassification.from_pretrained(
                 model_path,
                 num_labels=1,
                 torch_dtype=torch.float32
             )
+        # 2) device에 올려보기 (unsupported error 무시)
+        try:
             model.to(device)
+        except ValueError as e:
+            print(f"model.to() 무시: {e}")
         tokenizer = AutoTokenizer.from_pretrained(model_path)
         model.eval()
     text = (
         f"Given a certain paper, Title: {title}\n"
         f"Abstract: {abstract}.\n"
         "Predict its normalized academic impact (between 0 and 1):"
     )
     try:
         inputs = tokenizer(text, return_tensors="pt")
         inputs = {k: v.to(device) for k, v in inputs.items()}
         with torch.no_grad():
             outputs = model(**inputs)
+        prob = torch.sigmoid(outputs.logits).item()
+        score = min(1.0, prob + 0.05)
         return round(score, 4)
     except Exception as e:
+        print(f"Prediction error: {e}")
+        return 0.0
 def get_grade_and_emoji(score):
     if score >= 0.900: return "AAA 🌟"
 def validate_input(title, abstract):
     title = title.replace("\n", " ").strip().replace("''", "'")
     abstract = abstract.replace("\n", " ").strip().replace("''", "'")
+    non_latin = re.compile(r'[^\u0000-\u007F]')
+    if len(title.split()) < 3:
         return False, "The title must be at least 3 words long."
+    if len(abstract.split()) < 50:
         return False, "The abstract must be at least 50 words long."
+    if non_latin.search(title):
+        return False, "Title에 영어 외 문자가 포함되어 있습니다."
+    if non_latin.search(abstract):
+        return False, "Abstract에 영어 외 문자가 포함되어 있습니다."
     return True, "Inputs are valid!"
 def update_button_status(title, abstract):
+    valid, msg = validate_input(title, abstract)
     if not valid:
+        return gr.update(value="Error: " + msg), gr.update(interactive=False)
+    return gr.update(value=msg), gr.update(interactive=True)
 def process_arxiv_input(arxiv_input):
     if not arxiv_input.strip():
         return "", "", "Please enter an arXiv URL or ID"
     result = fetch_arxiv_paper(arxiv_input)
     if result["success"]:
         return result["title"], result["abstract"], result["message"]
+    return "", "", result["message"]
 css = """
 .gradio-container {
     font-family: 'Arial', sans-serif;
 }
+.main-title {
+    text-align: center;
+    color: #2563eb;
+    font-size: 2.5rem !important;
+    margin-bottom: 1rem !important;
+    background: linear-gradient(45deg, #2563eb, #1d4ed8);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+}
+.sub-title {
+    text-align: center;
+    color: #4b5563;
+    font-size: 1.5rem !important;
+    margin-bottom: 2rem !important;
+}
+.input-section {
+    background: white;
+    padding: 2rem;
+    border-radius: 1rem;
+    box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
+}
+.result-section {
+    background: #f8fafc;
+    padding: 2rem;
+    border-radius: 1rem;
+    margin-top: 2rem;
+}
+.methodology-section {
+    background: #ecfdf5;
+    padding: 2rem;
+    border-radius: 1rem;
+    margin-top: 2rem;
+}
+.example-section {
+    background: #fff7ed;
+    padding: 2rem;
+    border-radius: 1rem;
+    margin-top: 2rem;
+}
+.grade-display {
+    font-size: 3rem;
+    text-align: center;
+    margin: 1rem 0;
+}
+.arxiv-input {
+    margin-bottom: 1.5rem;
+    padding: 1rem;
+    background: #f3f4f6;
+    border-radius: 0.5rem;
+}
+.arxiv-link {
+    color: #2563eb;
+    text-decoration: underline;
+    font-size: 0.9em;
+    margin-top: 0.5em;
+}
+.arxiv-note {
+    color: #666;
+    font-size: 0.9em;
+    margin-top: 0.5em;
+    margin-bottom: 0.5em;
+}
 """
 with gr.Blocks(theme=gr.themes.Default(), css=css) as iface:
                 """
             )
+    title_input.change(update_button_status, [title_input, abstract_input], [validation_status, submit_button])
+    abstract_input.change(update_button_status, [title_input, abstract_input], [validation_status, submit_button])
+    fetch_button.click(process_arxiv_input, [arxiv_input], [title_input, abstract_input, validation_status])
     def process_prediction(title, abstract):
         score = predict(title, abstract)