Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

Extract html script variables into JSON

Here is a section of a HTML that I’m interested in extracting from.

<script id="P4EPconfig" type="text/javascript">
  REA = REA || {};

    REA.propertyId = "2879292";
    REA.state = "Vic";
    REA.suburb = "Gladstone Park";
    REA.channel = "property";
    REA.suburbForAds = "gladstonepark";
    REA.rawSuburb = "gladstone park";
    REA.postcode = "3043";
    REA.fullSuburb = "Gladstone Park, Vic 3043";
      REA.marketFlags = [];
    REA.buildingType = "house";
    REA.longStreetAddress = "1 Adam Court";
    REA.longStreetAddressWithSuburb = "1 Adam Court, Gladstone Park, Vic 3043";
    REA.lat = "-37.687113";
    REA.lon = "144.899982";
    REA.allImages = [{"name":"photo","server":"https://i2.au.reastatic.net","uri":"/2ecfc132e00792c90e1a5eb569d249672e0e9d9ec60b364d1f482fc2477b66b6/main.jpg","caption":"Listed November 2010"},{"name":"photo","server":"https://i2.au.reastatic.net","uri":"/ad424079d8c83822637f96cab86994ef0bb6fe6abb4b5a442e48b0df5e41c69b/image2.jpg","caption":"Listed November 2010"},{"name":"photo","server":"https://i2.au.reastatic.net","uri":"/539aa0167f2d1e04a4ab47cddc90f6cf2aa007c5bc0dbc925487742966596db7/image3.jpg","caption":"Listed November 2010"},{"name":"photo","server":"https://i2.au.reastatic.net","uri":"/dc017c3e0e9d28471cdc0586d3e1b01e5860fee6863847f26a832775b5751f35/image4.jpg","caption":"Listed November 2010"},{"name":"photo","server":"https://i2.au.reastatic.net","uri":"/35d03effe995ca4ca78b4846b137dc6d85259fc41d0f70b6acc5fd38a4714d09/image5.jpg","caption":"Listed November 2010"}];
    REA.propertyType = "house";
    REA.pluralPropertyType = "houses";
    REA.bedrooms = "3";
    REA.bathrooms = "2";
    REA.carSpaces = "2";
    REA.yearBuilt = "";
    REA.landArea = "551";
    REA.landAreaDisplay = "551 m²";
    REA.floorArea = "";
    REA.floorAreaDisplay = "-";
    REA.rawBedrooms = "3";
    REA.rawBathrooms = "2";
    REA.offMarket = true;
    REA.avmData = {"confidence":"high","range":{"text":"$710,000 - $850,000","min":710000,"max":850000},"value":"NzgwOTQ0","lastUpdated":"30 May, 2022"};
    REA.powerProfile = null;
    REA.propertyListing = null;
    REA.findAgentsURI ="https://www.realestate.com.au/find-agent/gladstone-park-vic-3043";
    REA.propertyMarketTrends = {"propertyType":"house","bedrooms":"3","medianSoldPrice":733000.0,"medianRentalPrice":400.0,"annualGrowth":0.057,"soldProperties":88,"rentalProperties":68,"soldDataIngestDateDisplay":"3 June 2022","rentDataIngestDateDisplay":"5 June 2022","trends":{"medianSoldPrice":[{"bedrooms":"ALL","yearly":[{"intervalStart":"2013-01-01","intervalEnd":"2013-12-31","value":392500.0,"count":99},{"intervalStart":"2014-01-01","intervalEnd":"2014-12-31","value":400000.0,"count":127},{"intervalStart":"2015-01-01","intervalEnd":"2015-12-31","value":460000.0,"count":142},{"intervalStart":"2016-01-01","intervalEnd":"2016-12-31","value":525000.0,"count":119},{"intervalStart":"2017-01-01","intervalEnd":"2017-12-31","value":645000.0,"count":105},{"intervalStart":"2018-01-01","intervalEnd":"2018-12-31","value":630000.0,"count":104},{"intervalStart":"2019-01-01","intervalEnd":"2019-12-31","value":640000.0,"count":93},{"intervalStart":"2020-01-01","intervalEnd":"2020-12-31","value":640000.0,"count":85},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":725000.0,"count":144}],"monthly":[{"intervalStart":"2020-07-01","intervalEnd":"2021-06-30","value":692500.0,"count":118},{"intervalStart":"2020-08-01","intervalEnd":"2021-07-31","value":700000.0,"count":119},{"intervalStart":"2020-09-01","intervalEnd":"2021-08-31","value":700500.0,"count":120},{"intervalStart":"2020-10-01","intervalEnd":"2021-09-30","value":705000.0,"count":127},{"intervalStart":"2020-11-01","intervalEnd":"2021-10-31","value":713000.0,"count":130},{"intervalStart":"2020-12-01","intervalEnd":"2021-11-30","value":720000.0,"count":139},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":725000.0,"count":144},{"intervalStart":"2021-02-01","intervalEnd":"2022-01-31","value":726000.0,"count":143},{"intervalStart":"2021-03-01","intervalEnd":"2022-02-28","value":732000.0,"count":143},{"intervalStart":"2021-04-01","intervalEnd":"2022-03-31","value":736000.0,"count":138},{"intervalStart":"2021-05-01","intervalEnd":"2022-04-30","value":740000.0,"count":139},{"intervalStart":"2021-06-01","intervalEnd":"2022-05-31","value":745000.0,"count":119}]},{"bedrooms":"3","yearly":[{"intervalStart":"2013-01-01","intervalEnd":"2013-12-31","value":385750.0,"count":74},{"intervalStart":"2014-01-01","intervalEnd":"2014-12-31","value":398000.0,"count":106},{"intervalStart":"2015-01-01","intervalEnd":"2015-12-31","value":455000.0,"count":109},{"intervalStart":"2016-01-01","intervalEnd":"2016-12-31","value":520000.0,"count":93},{"intervalStart":"2017-01-01","intervalEnd":"2017-12-31","value":635000.0,"count":84},{"intervalStart":"2018-01-01","intervalEnd":"2018-12-31","value":620000.0,"count":80},{"intervalStart":"2019-01-01","intervalEnd":"2019-12-31","value":625000.0,"count":67},{"intervalStart":"2020-01-01","intervalEnd":"2020-12-31","value":631000.0,"count":60},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":714000.0,"count":102}],"monthly":[{"intervalStart":"2020-07-01","intervalEnd":"2021-06-30","value":677000.0,"count":81},{"intervalStart":"2020-08-01","intervalEnd":"2021-07-31","value":680000.0,"count":83},{"intervalStart":"2020-09-01","intervalEnd":"2021-08-31","value":685000.0,"count":85},{"intervalStart":"2020-10-01","intervalEnd":"2021-09-30","value":700000.0,"count":89},{"intervalStart":"2020-11-01","intervalEnd":"2021-10-31","value":705000.0,"count":89},{"intervalStart":"2020-12-01","intervalEnd":"2021-11-30","value":710250.0,"count":98},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":714000.0,"count":102},{"intervalStart":"2021-02-01","intervalEnd":"2022-01-31","value":715000.0,"count":103},{"intervalStart":"2021-03-01","intervalEnd":"2022-02-28","value":720000.0,"count":102},{"intervalStart":"2021-04-01","intervalEnd":"2022-03-31","value":725500.0,"count":98},{"intervalStart":"2021-05-01","intervalEnd":"2022-04-30","value":732000.0,"count":103},{"intervalStart":"2021-06-01","intervalEnd":"2022-05-31","value":733000.0,"count":88}]}],"medianRentalPrice":[{"bedrooms":"ALL","yearly":[{"intervalStart":"2012-01-01","intervalEnd":"2012-12-31","value":330.0,"count":63},{"intervalStart":"2013-01-01","intervalEnd":"2013-12-31","value":330.0,"count":43},{"intervalStart":"2014-01-01","intervalEnd":"2014-12-31","value":350.0,"count":104},{"intervalStart":"2015-01-01","intervalEnd":"2015-12-31","value":350.0,"count":116},{"intervalStart":"2016-01-01","intervalEnd":"2016-12-31","value":360.0,"count":79},{"intervalStart":"2017-01-01","intervalEnd":"2017-12-31","value":390.5,"count":66},{"intervalStart":"2018-01-01","intervalEnd":"2018-12-31","value":400.0,"count":86},{"intervalStart":"2019-01-01","intervalEnd":"2019-12-31","value":400.0,"count":100},{"intervalStart":"2020-01-01","intervalEnd":"2020-12-31","value":400.5,"count":92},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":410.0,"count":105}],"monthly":[{"intervalStart":"2020-06-01","intervalEnd":"2021-05-31","value":400.0,"count":105},{"intervalStart":"2020-07-01","intervalEnd":"2021-06-30","value":400.0,"count":102},{"intervalStart":"2020-08-01","intervalEnd":"2021-07-31","value":400.0,"count":103},{"intervalStart":"2020-09-01","intervalEnd":"2021-08-31","value":400.5,"count":106},{"intervalStart":"2020-10-01","intervalEnd":"2021-09-30","value":400.0,"count":109},{"intervalStart":"2020-11-01","intervalEnd":"2021-10-31","value":410.0,"count":110},{"intervalStart":"2020-12-01","intervalEnd":"2021-11-30","value":410.0,"count":110},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":410.0,"count":105},{"intervalStart":"2021-02-01","intervalEnd":"2022-01-31","value":410.0,"count":104},{"intervalStart":"2021-03-01","intervalEnd":"2022-02-28","value":410.0,"count":97},{"intervalStart":"2021-04-01","intervalEnd":"2022-03-31","value":410.0,"count":98},{"intervalStart":"2021-05-01","intervalEnd":"2022-04-30","value":415.0,"count":104}]},{"bedrooms":"3","yearly":[{"intervalStart":"2012-01-01","intervalEnd":"2012-12-31","value":330.0,"count":46},{"intervalStart":"2013-01-01","intervalEnd":"2013-12-31","value":320.0,"count":32},{"intervalStart":"2014-01-01","intervalEnd":"2014-12-31","value":337.5,"count":68},{"intervalStart":"2015-01-01","intervalEnd":"2015-12-31","value":340.0,"count":94},{"intervalStart":"2016-01-01","intervalEnd":"2016-12-31","value":360.0,"count":59},{"intervalStart":"2017-01-01","intervalEnd":"2017-12-31","value":380.0,"count":43},{"intervalStart":"2018-01-01","intervalEnd":"2018-12-31","value":387.5,"count":60},{"intervalStart":"2019-01-01","intervalEnd":"2019-12-31","value":400.0,"count":66},{"intervalStart":"2020-01-01","intervalEnd":"2020-12-31","value":400.0,"count":62},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":400.0,"count":72}],"monthly":[{"intervalStart":"2020-06-01","intervalEnd":"2021-05-31","value":393.0,"count":70},{"intervalStart":"2020-07-01","intervalEnd":"2021-06-30","value":395.0,"count":67},{"intervalStart":"2020-08-01","intervalEnd":"2021-07-31","value":397.5,"count":70},{"intervalStart":"2020-09-01","intervalEnd":"2021-08-31","value":397.5,"count":70},{"intervalStart":"2020-10-01","intervalEnd":"2021-09-30","value":395.0,"count":73},{"intervalStart":"2020-11-01","intervalEnd":"2021-10-31","value":400.0,"count":73},{"intervalStart":"2020-12-01","intervalEnd":"2021-11-30","value":400.0,"count":76},{"intervalStart":"2021-01-01","intervalEnd":"2021-12-31","value":400.0,"count":72},{"intervalStart":"2021-02-01","intervalEnd":"2022-01-31","value":397.5,"count":70},{"intervalStart":"2021-03-01","intervalEnd":"2022-02-28","value":400.0,"count":68},{"intervalStart":"2021-04-01","intervalEnd":"2022-03-31","value":400.0,"count":69},{"intervalStart":"2021-05-01","intervalEnd":"2022-04-30","value":400.0,"count":72}]}]}};
    REA.leadGen = {"actionUrl":"https://property.value.realestate.com.au","data":{"listingCompany":{"id":"JR…
</script>

What would be a good method for formatting all these variables into a JSON format as shown below:

{
  "propertyId": "2879292",
  "state": "Vic",
  "suburb": "Gladstone Park"
  .
  .
  .
  "propertyMarketTrends": {...}
}

I can think of doing it using RE, but it may be a little tedious. Is there an easier way of parsing this data structure into JSON?

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

>Solution :

If html_string contains the data from your question you can try this example to parse it (but beware, there are many corner cases):

import re
import json

d = {
    k: json.loads(v)
    for k, v in re.findall(r"REA\.(.*?)\s*=\s*(.*);", html_string)
}

print(d)

Prints:

{
    "propertyId": "2879292",
    "state": "Vic",
    "suburb": "Gladstone Park",
    "channel": "property",
    "suburbForAds": "gladstonepark",
    "rawSuburb": "gladstone park",
    "postcode": "3043",
    "fullSuburb": "Gladstone Park, Vic 3043",
    "marketFlags": [],
    "buildingType": "house",
    "longStreetAddress": "1 Adam Court",
    "longStreetAddressWithSuburb": "1 Adam Court, Gladstone Park, Vic 3043",
    "lat": "-37.687113",
    "lon": "144.899982",
    "allImages": [
        {
            "name": "photo",
            "server": "https://i2.au.reastatic.net",
            "uri": "/2ecfc132e00792c90e1a5eb569d249672e0e9d9ec60b364d1f482fc2477b66b6/main.jpg",
            "caption": "Listed November 2010",
        },
        {
            "name": "photo",
            "server": "https://i2.au.reastatic.net",
            "uri": "/ad424079d8c83822637f96cab86994ef0bb6fe6abb4b5a442e48b0df5e41c69b/image2.jpg",
            "caption": "Listed November 2010",
        },

...and so on.
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading