Source code for scibib.abstract_collector
"""
--------------------------------
The abstract_collector module
--------------------------------
This module defines the main_paragraph function.
"""
from urllib import request
import bs4
[docs]def main_paragraph(url: str):
"""From a web page, return the paragraph with the biggest length.
Args:
url (str): the url of the web page to treat.
"""
html = request.urlopen(url)
soup = bs4.BeautifulSoup(html, "html.parser")
paragraphs = soup.find_all("p")
res=''
for paragraph in paragraphs:
text=paragraph.text
text=" ".join(text.split())
if len(text)>len(res):
res=text
return res