Starchik1 commited on
Commit
716efc1
·
verified ·
1 Parent(s): 7668236

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +20 -2
main.py CHANGED
@@ -90,11 +90,29 @@ def proxy(path):
90
 
91
  # Check if response is HTML and filter content if needed
92
  content_type = resp.headers.get('Content-Type', '')
93
- if 'text/html' in content_type:
 
 
 
 
 
 
94
  # Parse HTML content
95
- html_content = resp.content.decode('utf-8', errors='ignore')
96
  soup = BeautifulSoup(html_content, 'html.parser')
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  # Filter out "Полная версия ETKA"
99
  for element in soup.find_all(string=re.compile('Полная версия ETKA')):
100
  # Replace the text with empty string
 
90
 
91
  # Check if response is HTML and filter content if needed
92
  content_type = resp.headers.get('Content-Type', '')
93
+ if 'text/html' in content_type or resp.content.strip().startswith(b'<'):
94
+ # Try to determine the correct encoding
95
+ encoding = resp.encoding or 'utf-8'
96
+ try:
97
+ html_content = resp.content.decode(encoding, errors='ignore')
98
+ except UnicodeDecodeError:
99
+ html_content = resp.content.decode('utf-8', errors='ignore')
100
  # Parse HTML content
 
101
  soup = BeautifulSoup(html_content, 'html.parser')
102
 
103
+ # Ensure basic HTML structure exists
104
+ if not soup.html:
105
+ html_tag = soup.new_tag('html')
106
+ soup.append(html_tag)
107
+
108
+ if not soup.html.head:
109
+ head_tag = soup.new_tag('head')
110
+ soup.html.insert(0, head_tag)
111
+
112
+ if not soup.html.body:
113
+ body_tag = soup.new_tag('body')
114
+ soup.html.append(body_tag)
115
+
116
  # Filter out "Полная версия ETKA"
117
  for element in soup.find_all(string=re.compile('Полная версия ETKA')):
118
  # Replace the text with empty string