How to fix this error during scraping using BeautifulSoup?

January 4, 2023

I am trying to do web scraping using BeautifulSoup and requests Python library. I want to filter the news titles from Hacker News website but its showing an error while implementing.

import requests
from bs4 import BeautifulSoup

res = requests.get('https://news.ycombinator.com/news')
soup = BeautifulSoup(res.text, 'html.parser')
links = soup.select('.titleline a')
subtext = soup.select('.subtext')


def create_custom_hn(links, subtext):
    hn = []
    for index, item in enumerate(links):
        title = links[index].getText()
        href = links[index].get('href', None)
        votes = subtext[index].select('.score')
        if len(votes):
            points = int(votes[0].getText().replace(' points', ''))
            print(points)
            hn.append({'title': title, 'href': href})
    return hn


print(create_custom_hn(links, subtext))

The error says

votes = subtext[index].select('.score')
            ~~~~~~~^^^^^^^
IndexError: list index out of range

>Solution :

Here is fixed version of the code from the question:

import requests
from bs4 import BeautifulSoup

res = requests.get("https://news.ycombinator.com/news")
soup = BeautifulSoup(res.text, "html.parser")
links = soup.select(".titleline > a")


def create_custom_hn(links):
    hn = []
    for link in links:
        title = link.getText()
        href = link.get("href", None)
        votes = link.find_next(class_="score")
        points = int(votes.getText().replace(" points", ""))

        hn.append({"title": title, "href": href, "points": points})
    return hn


print(create_custom_hn(links))

Prints:

[
    {
        "title": "Urllib3 in 2022",
        "href": "https://sethmlarson.dev/urllib3-in-2022",
        "points": 97,
    },
    {
        "title": "First public release of Pushup: a new compiler for making web apps in Go",
        "href": "https://github.com/adhocteam/pushup",
        "points": 18,
    },
    {
        "title": "Intelligence – A good collection of great OSINT Resources",
        "href": "https://github.com/ARPSyndicate/awesome-intelligence",
        "points": 113,
    },
    {
        "title": "Microsoft is preparing to add ChatGPT to Bing",
        "href": "https://www.bloomberg.com/news/articles/2023-01-04/microsoft-hopes-openai-s-chatbot-will-make-bing-smarter",
        "points": 760,
    },

...and so on.