Sorting Python Inside A Lists

January 11, 2023

data = []

while True:
    print(url)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select_one('li.page-item.nb.active')
    
    for links in soup.find_all("h6", {"class": "text-primary title"}):
        sublink = links.find("a").get("href")
        new_link = "LINK" + sublink
        response2 = requests.get(new_link)
        soup2 = BeautifulSoup(response2.content, 'html.parser')
        
        # print('-------------------')
        heading = soup2.find('h1').text
        print(heading)

        table = soup2.find_all('tbody')[0]
        for i in table.find_all('td', class_='title'):
            movies = i.find('a', class_="text-primary")
            for movie in movies:
                data.append((heading,movie))
                
        df = pd.DataFrame(data=data)
        df.to_csv('list.csv', index=False, encoding='utf-8')

    next_page = soup.select_one('li.page-item.next>a')
    if next_page:
        next_url = next_page.get('href')
        url = urljoin(url, next_url)
    else:
        break

Hello Guys! How can i sort the result on CSV like this one, i try my best on sorting but as a beginner its very hard to me to do it…………………….

EXAMPLE

Column1 | Column2  
James | Movie1, Movie2, Movie3
Peter | Movie1, Movie2, Movie3

Want i am getting right now is

Column1 | Column2
James, movie 1
James, movie 2
James, movie 3

>Solution :

Instead of iterating through movies, use ", ".join(movies)

while True:
    print(url)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select_one('li.page-item.nb.active')
    
    for links in soup.find_all("h6", {"class": "text-primary title"}):
        sublink = links.find("a").get("href")
        new_link = "LINK" + sublink
        response2 = requests.get(new_link)
        soup2 = BeautifulSoup(response2.content, 'html.parser')
        
        # print('-------------------')
        heading = soup2.find('h1').text
        print(heading)

        table = soup2.find_all('tbody')[0]
        movies = []
        for i in table.find_all('td', class_='title'):
            movies +=  i.find('a', class_="text-primary")
        data.append((heading, ", ".join(movies)))
                
        df = pd.DataFrame(data=data)
        df.to_csv('list.csv', index=False, encoding='utf-8')

    next_page = soup.select_one('li.page-item.next>a')
    if next_page:
        next_url = next_page.get('href')
        url = urljoin(url, next_url)
    else:
        break