I am trying to save the data tables to a txt file instead of showing the result on the console. However, my code result me to NONE in the txt file… Can someone please help me:
Result that I would like to save to the txt file:
from bs4 import BeautifulSoup
import requests
import re
import json
# Input
thislist = ["AAPL"]
for symbol in thislist:
print ('Getting data for ' + symbol + '...\n')
# Set up scraper
url1 = ("http://financials.morningstar.com/finan/financials/getFinancePart.html?&callback=xxx&t=" + symbol.lower())
url2 = ("http://financials.morningstar.com/finan/financials/getKeyStatPart.html?&callback=xxx&t=" + symbol.lower())
soup1 = BeautifulSoup(json.loads(re.findall(r'xxx\((.*)\)', requests.get(url1).text)[0])['componentData'], 'lxml')
soup2 = BeautifulSoup(json.loads(re.findall(r'xxx\((.*)\)', requests.get(url2).text)[0])['componentData'], 'lxml')
def print_table(soup):
for i, tr in enumerate(soup.select('tr')):
row_data = [td.text for td in tr.select('td, th') if td.text]
if not row_data:
continue
if len(row_data) < 12:
row_data = ['X'] + row_data
for j, td in enumerate(row_data):
if j==0:
print('{: >30}'.format(td), end='|')
else:
print('{: ^12}'.format(td), end='|')
print()
print_table(soup1)
print()
print_table(soup2)
So I have written the following code to save it to a txt file but the result is "NONE" in the txt file but the result is shown on the console:
from bs4 import BeautifulSoup
import requests
import re
import json
# Input
thislist = ["AAPL"]
for symbol in thislist:
print ('Getting data for ' + symbol + '...\n')
# Set up scraper
url1 = ("http://financials.morningstar.com/finan/financials/getFinancePart.html?&callback=xxx&t=" + symbol.lower())
url2 = ("http://financials.morningstar.com/finan/financials/getKeyStatPart.html?&callback=xxx&t=" + symbol.lower())
soup1 = BeautifulSoup(json.loads(re.findall(r'xxx\((.*)\)', requests.get(url1).text)[0])['componentData'], 'lxml')
soup2 = BeautifulSoup(json.loads(re.findall(r'xxx\((.*)\)', requests.get(url2).text)[0])['componentData'], 'lxml')
def print_table(soup):
for i, tr in enumerate(soup.select('tr')):
row_data = [td.text for td in tr.select('td, th') if td.text]
if not row_data:
continue
if len(row_data) < 12:
row_data = ['X'] + row_data
for j, td in enumerate(row_data):
if j==0:
print('{: >30}'.format(td), end='|')
else:
print('{: ^12}'.format(td), end='|')
print()
# Save the data tables to a TXT file
# Open a file with access mode 'a'
file_object = open('testing aapl data.txt', 'a')
# Append data at the end of file
file_object.write(str(print_table(soup1))+"\n")
file_object.write(str(print_table(soup2))+"\n")
# Close the file
file_object.close()
>Solution :
Your function, print_table, formats everthing on the go and passes everything to the console and nothing is kept or returned to be put into a file. As print_table doesn’t return anything, so None is given.
A solution to this could be to create a string finaltext and concatenate everything that will be printed together after formatting it, then print finaltext, then return that string for any other use
The following script is the solution implemented as well as some other improvements
from bs4 import BeautifulSoup
import requests
import re
import json
#Only needs to be made once, as the 'for' loop will re-create it repeated
def print_table(soup):
finaltext = ''
for i, tr in enumerate(soup.select('tr')):
row_data = [td.text for td in tr.select('td, th') if td.text]
if not row_data:
continue
if len(row_data) < 12:
row_data = ['X'] + row_data
for j, td in enumerate(row_data):
if j==0:
finaltext += '{: >30}'.format(td) + '|'
else:
finaltext += '{: ^12}'.format(td) + '|'
finaltext += '\n'
print(finaltext)
return finaltext
# Input
thislist = ["AAPL"]
for symbol in thislist:
print ('Getting data for ' + symbol + '...\n')
# Set up scraper
url1 = ("http://financials.morningstar.com/finan/financials/getFinancePart.html?&callback=xxx&t=" + symbol.lower())
url2 = ("http://financials.morningstar.com/finan/financials/getKeyStatPart.html?&callback=xxx&t=" + symbol.lower())
soup1 = BeautifulSoup(json.loads(re.findall(r'xxx\((.*)\)', requests.get(url1).text)[0])['componentData'], 'lxml')
soup2 = BeautifulSoup(json.loads(re.findall(r'xxx\((.*)\)', requests.get(url2).text)[0])['componentData'], 'lxml')
# Save the data tables to a TXT file
# Open a file with access mode 'a'
file_object = open('testing_aapl_data.txt', 'a') # Habit of mine to not have spaces in filenames
# Append data at the end of file
file_object.write(str(print_table(soup1))+"\n")
file_object.write(str(print_table(soup2))+"\n")
# Close the file
file_object.close()