lxml_html_clean gradio transformers==4.38.2 torch>=2.1.0 newspaper3k PyMuPDF bert-extractive-summarizer