Source code for scibib.abstract_collector

"""
--------------------------------
The abstract_collector module
--------------------------------
This module defines the main_paragraph function.

"""


from urllib import request

import bs4
    


[docs]def main_paragraph(url: str):
    """From a web page, return the paragraph with the biggest length.

    Args:
        url (str): the url of the web page to treat.
    """
    html = request.urlopen(url)
    soup = bs4.BeautifulSoup(html, "html.parser")
    paragraphs = soup.find_all("p")
    res=''
    for paragraph in paragraphs:
        text=paragraph.text
        text=" ".join(text.split())
        if len(text)>len(res):
            res=text
    return res