pay

Sleeping

Starchik1 commited on Mar 26

Commit

716efc1

verified ·

1 Parent(s): 7668236

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -90,11 +90,29 @@ def proxy(path):
         # Check if response is HTML and filter content if needed
         content_type = resp.headers.get('Content-Type', '')
-        if 'text/html' in content_type:
             # Parse HTML content
-            html_content = resp.content.decode('utf-8', errors='ignore')
             soup = BeautifulSoup(html_content, 'html.parser')
             # Filter out "Полная версия ETKA"
             for element in soup.find_all(string=re.compile('Полная версия ETKA')):
                 # Replace the text with empty string

         # Check if response is HTML and filter content if needed
         content_type = resp.headers.get('Content-Type', '')
+        if 'text/html' in content_type or resp.content.strip().startswith(b'<'):
+            # Try to determine the correct encoding
+            encoding = resp.encoding or 'utf-8'
+            try:
+                html_content = resp.content.decode(encoding, errors='ignore')
+            except UnicodeDecodeError:
+                html_content = resp.content.decode('utf-8', errors='ignore')
             # Parse HTML content
             soup = BeautifulSoup(html_content, 'html.parser')
+            # Ensure basic HTML structure exists
+            if not soup.html:
+                html_tag = soup.new_tag('html')
+                soup.append(html_tag)
+            if not soup.html.head:
+                head_tag = soup.new_tag('head')
+                soup.html.insert(0, head_tag)
+            if not soup.html.body:
+                body_tag = soup.new_tag('body')
+                soup.html.append(body_tag)
             # Filter out "Полная версия ETKA"
             for element in soup.find_all(string=re.compile('Полная версия ETKA')):
                 # Replace the text with empty string