3.4. Read JSON

  • File paths works also with URLs

  • File can be compressed with .gz, .bz2, .zip, .xz

3.4.1. SetUp

>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

3.4.2. Example

>>> DATA = 'https://python3.info/_static/example.json'
>>>
>>>
>>> pd.read_json(DATA)
  firstname    lastname  age                email   lastlogin  is_active              groups
0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN

3.4.3. Parameters

  • path_or_buf - str, path object or file-like object

  • encoding - str, default None. If None, defaults to 'utf-8'.

  • compression - str or dict, default 'infer'. If 'infer' and path_or_buf is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no compression).

>>> def read_json(
...     path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
...     *,
...     orient: str | None = ...,
...     typ: Literal["frame"] = ...,
...     dtype: DtypeArg | None = ...,
...     convert_axes: bool | None = ...,
...     convert_dates: bool | list[str] = ...,
...     keep_default_dates: bool = ...,
...     precise_float: bool = ...,
...     date_unit: str | None = ...,
...     encoding: str | None = ...,
...     encoding_errors: str | None = ...,
...     lines: bool = ...,
...     chunksize: int,
...     compression: CompressionOptions = ...,
...     nrows: int | None = ...,
...     storage_options: StorageOptions = ...,
...     dtype_backend: DtypeBackend | lib.NoDefault = ...,
...     engine: JSONEngine = ...,
... ) -> JsonReader[Literal["frame"]]: ...

3.4.4. Compressed

  • pd.read_json('myfile.zip', compression='zip')

  • If the extension is .gz, .bz2, .zip, and .xz, the corresponding compression method is automatically selected

>>> df = pd.read_json('myfile.zip', compression='zip')
>>> df = pd.read_json('myfile.gz', compression='infer')

3.4.5. JSON Pretty-Printing

  • python -m json

$ curl https://python3.info/_static/example.json
[{"firstname":"Alice","lastname":"Apricot","age":30,"email":"alice@example.com","lastlogin":"2000-01-01","is_active":true,"groups":"users;staff"},{"firstname":"Bob","lastname":"Blackthorn","age":31,"email":"bob@example.com","lastlogin":"2000-01-02","is_active":true,"groups":"users;staff"},{"firstname":"Carol","lastname":"Corn","age":32,"email":"carol@example.com","lastlogin":"2000-01-03","is_active":true,"groups":"users"},{"firstname":"Dave","lastname":"Durian","age":33,"email":"dave@example.org","lastlogin":"2000-01-04","is_active":true,"groups":"users"},{"firstname":"Eve","lastname":"Elderberry","age":34,"email":"eve@example.org","lastlogin":"2000-01-05","is_active":true,"groups":"users;staff;admins"},{"firstname":"Mallory","lastname":"Melon","age":15,"email":"mallory@example.net","lastlogin":null,"is_active":false,"groups":null}]
$ curl https://python3.info/_static/example.json |python3 -m json
[
    {
        "firstname": "Alice",
        "lastname": "Apricot",
        "age": 30,
        "email": "alice@example.com",
        "lastlogin": "2000-01-01",
        "is_active": true,
        "groups": "users;staff"
    },
    {
        "firstname": "Bob",
        "lastname": "Blackthorn",
        "age": 31,
        "email": "bob@example.com",
        "lastlogin": "2000-01-02",
        "is_active": true,
        "groups": "users;staff"
    },
    {
        "firstname": "Carol",
        "lastname": "Corn",
        "age": 32,
        "email": "carol@example.com",
        "lastlogin": "2000-01-03",
        "is_active": true,
        "groups": "users"
    },
    {
        "firstname": "Dave",
        "lastname": "Durian",
        "age": 33,
        "email": "dave@example.org",
        "lastlogin": "2000-01-04",
        "is_active": true,
        "groups": "users"
    },
    {
        "firstname": "Eve",
        "lastname": "Elderberry",
        "age": 34,
        "email": "eve@example.org",
        "lastlogin": "2000-01-05",
        "is_active": true,
        "groups": "users;staff;admins"
    },
    {
        "firstname": "Mallory",
        "lastname": "Melon",
        "age": 15,
        "email": "mallory@example.net",
        "lastlogin": null,
        "is_active": false,
        "groups": null
    }
]

3.4.6. Assignments

# %% About
# - Name: Pandas ReadJSON Data
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in JSON format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie JSON do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age                email   lastlogin  is_active              groups
# 0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
# 1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
# 2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
# 3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
# 4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
# 5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN

# %% Hints
# - `DataFrame.read_json()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age                email   lastlogin  is_active              groups
0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = 'https://python3.info/_static/example.json'

# %% Result
result = ...

# %% About
# - Name: Pandas ReadJSON Compression
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in JSON format to Pandas DataFrame
# 2. Using `storage_options` set `HEADERS` for the HTTP request
# 3. Define variable `result` with the solution
# 4. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie JSON do Pandas DataFrame
# 3. Używając `storage_options` ustaw `HEADERS` dla zapytania HTTP
# 3. Zdefiniuj zmienną `result` z rozwiązaniem
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age                email   lastlogin  is_active              groups
# 0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
# 1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
# 2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
# 3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
# 4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
# 5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN

# %% Hints
# - `DataFrame.read_json(storage_options=...)`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age                email   lastlogin  is_active              groups
0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = 'https://python3.info/_static/example.json'

HEADERS = {
    'User-Agent': 'My User-Agent',
    'Accept': 'application/json',
}


# %% Result
result = ...

# %% About
# - Name: Pandas ReadJSON Compression
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in JSON format to Pandas DataFrame
# 2. Use `infer` compression when reading the data
# 3. Define variable `result` with the solution
# 4. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie JSON do Pandas DataFrame
# 2. Ustaw kompresję `infer` podczas wczytywania danych
# 3. Zdefiniuj zmienną `result` z rozwiązaniem
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age                email   lastlogin  is_active              groups
# 0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
# 1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
# 2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
# 3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
# 4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
# 5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN

# %% Hints
# - `DataFrame.read_json(compression=...)`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age                email   lastlogin  is_active              groups
0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = 'https://python3.info/_static/example.tar.gz'

# %% Result
result = ...