Consume data from Yahoo Screener via requests

Advertisements

I ran a query on the Yahoo Screener at:

https://finance.yahoo.com/screener/equity/new

DevTools shows that the data came back as JSON via:

https://query2.finance.yahoo.com/v1/finance/screener?crumb=u0eNvTHfT6U&lang=en-US&region=US&formatted=true&corsDomain=finance.yahoo.com

So I tried to manually request the data with:

import json
import requests
url = "https://query2.finance.yahoo.com/v1/finance/screener"
payload = json.loads('{"size":25,"offset":0,"sortField":"intradaymarketcap","sortType":"DESC","quoteType":"EQUITY","topOperator":"AND","query":{"operator":"AND","operands":[{"operator":"or","operands":[{"operator":"EQ","operands":["region","us"]}]},{"operator":"or","operands":[{"operator":"LT","operands":["intradaymarketcap",2000000000]},{"operator":"BTWN","operands":["intradaymarketcap",2000000000,10000000000]}]}]},"userId":"","userIdType":"guid"}')
header = {
    "authority": "query2.finance.yahoo.com",
    "method":"POST",
    "path":"/v1/finance/screener?crumb=umZV3T8[ETC...]&lang=en-US&region=US&formatted=true&corsDomain=finance.yahoo.com",
    "scheme":"https",
    "Accept":"*/*",
    "Accept-Encoding":"gzip, deflate, br",
    "Accept-Language":"en-US,en;q=0.9",
    "Access-Control-Request-Headers":"content-type",
    "Access-Control-Request-Method":"POST",
    "Cache-Control":"no-cache",
    "Content-Type":"application/json",
    "Cookie":"tbla_id=33c52a3f-2fd9-41[ETC...]",
    "Origin":"https://finance.yahoo.com",
    "Pragma":"no-cache",
    "Referer":"https://finance.yahoo.com/screener/equity/new",
    "Sec-Ch-Ua":"\"Chromium\";v=\"116\",\"Google Chrome\";v=\"116\"",
    "Sec-Ch-Ua-Platform":"Windows",
    "Sec-Fetch-Dest":"empty",
    "Sec-Fetch-Mode":"cors",
    "Sec-Fetch-Site":"same-site",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }
response = requests.post(
    url = url,
    headers = header,
    data = json.dumps(payload),
    timeout = 30)
data_json = json.loads(response.content)

Even if I use the cookie and crumb from the original request header, I get this error:

{'code': 'Unauthorized', 'description': 'Invalid Crumb'}

Is this even possible via requests?

>Solution :

It seems, you need to set additional cookie A1 to get correct answer. You should find the value of the cookie in the Web Developer Tools:

import requests

api_url = "https://query1.finance.yahoo.com/v1/finance/screener"

payload = {
    "offset": 0,
    "query": {
        "operands": [
            {
                "operands": [{"operands": ["region", "us"], "operator": "EQ"}],
                "operator": "or",
            }
        ],
        "operator": "AND",
    },
    "quoteType": "EQUITY",
    "size": 25,
    "sortField": "intradaymarketcap",
    "sortType": "DESC",
    "topOperator": "AND",
    "userId": "",
    "userIdType": "guid",
}

params = {
    "crumb": "EwuCwsPbKM2",
    "lang": "en-US",
    "region": "US",
    "formatted": "true",
    "corsDomain": "finance.yahoo.com",
}

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/117.0",
}

with requests.session() as s:
    s.cookies[
        "A1"
    ] = "d=AQABBK8KXmQCEA8-VE0dBLqG5QEpQ7OglmEFEgABCAFH_2QyZfNtb2UB9qMAAAcIqgpeZJj7vK8&S=AQAAAnAOty-NkkMJle5hzDjUjSQ"

    data = s.post(api_url, params=params, json=payload, headers=headers).json()
    print(data)

Prints:

{
    "finance": {
        "result": [
            {
                "start": 0,
                "count": 25,
                "total": 14459,
                "quotes": [
                    {
                        "symbol": "AAPL",
                        "twoHundredDayAverageChangePercent": {
                            "raw": 0.07432321,
                            "fmt": "7.43%",
                        },
                        "dividendDate": {
                            "raw": 1692230400,
                            "fmt": "2023-08-16",
                            "longFmt": "2023-08-16T20:00",
                        },

...

Leave a ReplyCancel reply