Advertisements
Functioning code:
import requests
from bs4 import BeautifulSoup
import pyautogui
import csv
url = 'https://url~~'
res = requests.get(url)
html = res.text
soup = BeautifulSoup(html, 'html.parser')
total = soup.select('.gall_title')
searchList = []
contentList = []
for i in total:
searchList.append("https://url~~" + i.attrs['href'])
for i in searchList:
res2 = requests.get(i)
html2 = res2.text
soup2 = BeautifulSoup(html2, 'html.parser')
content_h = soup2.select('h3 > span.title_subject')
contentList.append(content_h)
print(contentList)
#save csv
f = open(1.csv', 'w', encoding='utf-8', newline='')
csvWriter = csv.writer(f)
for i in contentList:
csvWriter.writerow(i)
f.close()
★Result★
print(contentList):
# [[<span class="title_subject">tomato</span>], [<span class="title_subject">apple</span>]]
Image:
enter image description here
Non-functioning code:
import requests
from bs4 import BeautifulSoup
import pyautogui
import csv
url = 'https://url~~'
res = requests.get(url)
html = res.text
soup = BeautifulSoup(html, 'html.parser')
total = soup.select('.gall_title')
searchList = []
contentList = []
for i in total:
searchList.append("https://url~~" + i.attrs['href'])
for i in searchList:
res2 = requests.get(i)
html2 = res2.text
soup2 = BeautifulSoup(html2, 'html.parser')
content_h = str(soup2.select('h3 > span.title_subject')) // only changed
contentList.append(content_h)
print(contentList)
#save csv
f = open(1.csv', 'w', encoding='utf-8', newline='')
csvWriter = csv.writer(f)
for i in contentList:
csvWriter.writerow(i)
f.close()
★Result★
print(contentList):
[‘[tomato]’, ‘[apple]’]
Image:
enter image description here
How can I remove the issue where strings are being saved one character at a time in a ‘.csv’ file?
>Solution :
soup2.select('h3 > span.title_subject')
gives you the whole HTML tag. Extract its string value with .string
:
strings = [x.string for x in soup2.select('h3 > span.title_subject')]