3.3. Read Excel

  • File paths works also with URLs

3.3.1. SetUp

>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

3.3.2. Example

  • io - Filename, path, or URL of the Excel file to read.

  • DATA = 'https://python3.info/_static/example.xlsx'

>>> DATA = 'https://python3.info/_static/example.xlsx'
>>>
>>> pd.read_excel(DATA)
       users  Unnamed: 1 Unnamed: 2           Unnamed: 3  Unnamed: 4 Unnamed: 5          Unnamed: 6
0  firstname    lastname        age                email   lastlogin  is_active              groups
1      Alice     Apricot         30    alice@example.com  2000-01-01       True         users;staff
2        Bob  Blackthorn         31      bob@example.com  2000-01-02       True         users;staff
3      Carol        Corn         32    carol@example.com  2000-01-03       True               users
4       Dave      Durian         33     dave@example.org  2000-01-04       True               users
5        Eve  Elderberry         34      eve@example.org  2000-01-05       True  users;staff;admins
6    Mallory       Melon         15  mallory@example.net         NaN      False                 NaN

3.3.3. Parameters

  • io

  • sheet_name

  • header

  • index_col

>>> def read_excel(io: Any,
...                sheet_name: str | int | list[IntStrT] | None = 0,
...                *,
...                header: int | Sequence[int] | None = 0,
...                names: SequenceNotStr[Hashable] | range | None = None,
...                index_col: int | str | Sequence[int] | None = None,
...                usecols: int | str | Sequence[int] | Sequence[str] | (HashableT) | bool | None = None,
...                dtype: ExtensionDtype | str | dtype[Any] | type[str] | type[complex] | type[bool] | type[object] | Mapping[Hashable, ExtensionDtype | str | dtype[Any] | type[str] | type[complex] | type[bool] | type[object]] | None = None,
...                engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = None,
...                converters: dict[str, (...) | Any] | dict[int, (...) | Any] | None = None,
...                true_values: Iterable[Hashable] | None = None,
...                false_values: Iterable[Hashable] | None = None,
...                skiprows: Sequence[int] | int | (int) | object | None = None,
...                nrows: int | None = None,
...                na_values: Any = None,
...                keep_default_na: bool = True,
...                na_filter: bool = True,
...                verbose: bool = False,
...                parse_dates: list | dict | bool = False,
...                date_format: dict[Hashable, str] | str | None = None,
...                thousands: str | None = None,
...                decimal: str = ".",
...                comment: str | None = None,
...                skipfooter: int = 0,
...                storage_options: dict[str, Any] | None = None,
...                dtype_backend: Literal["pyarrow", "numpy_nullable"] | Any = 'lib.no_default',
...                engine_kwargs: dict | None = None) -> DataFrame | dict[IntStrT, DataFrame]:
...     ...

3.3.4. Header Row

  • DATA = 'https://python3.info/_static/example.xlsx'

>>> DATA = 'https://python3.info/_static/example.xlsx'
>>>
>>> pd.read_excel(DATA, header=1)
  firstname    lastname  age                email   lastlogin  is_active              groups
0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN

3.3.5. Sheet Name

  • DATA = 'https://python3.info/_static/example.xlsx'

  • sheet_name='users', sheet_name='groups'

  • Sheet names are case sensitive

>>> DATA = 'https://python3.info/_static/example.xlsx'
>>>
>>>
>>> pd.read_excel(DATA, header=1, sheet_name='users')
  firstname    lastname  age                email   lastlogin  is_active              groups
0     Alice     Apricot   30    alice@example.com  2000-01-01       True         users;staff
1       Bob  Blackthorn   31      bob@example.com  2000-01-02       True         users;staff
2     Carol        Corn   32    carol@example.com  2000-01-03       True               users
3      Dave      Durian   33     dave@example.org  2000-01-04       True               users
4       Eve  Elderberry   34      eve@example.org  2000-01-05       True  users;staff;admins
5   Mallory       Melon   15  mallory@example.net         NaN      False                 NaN
>>>
>>> pd.read_excel(DATA, header=1, sheet_name='groups')
   gid    name
0    1   users
1    2   staff
2    3  admins
>>>
>>>
>>> pd.read_excel(DATA, header=1, sheet_name='Groups')
Traceback (most recent call last):
ValueError: Worksheet named 'Groups' not found

3.3.6. Index Column

  • DATA = 'https://python3.info/_static/example.xlsx'

>>> DATA = 'https://python3.info/_static/example.xlsx'
>>>
>>> pd.read_excel(DATA, header=1, index_col='email')
                    firstname    lastname  age   lastlogin  is_active              groups
email
alice@example.com       Alice     Apricot   30  2000-01-01       True         users;staff
bob@example.com           Bob  Blackthorn   31  2000-01-02       True         users;staff
carol@example.com       Carol        Corn   32  2000-01-03       True               users
dave@example.org         Dave      Durian   33  2000-01-04       True               users
eve@example.org           Eve  Elderberry   34  2000-01-05       True  users;staff;admins
mallory@example.net   Mallory       Melon   15         NaN      False                 NaN

3.3.7. Use Case - 1

>>> import pandas as pd
>>>
>>>
>>> DATA = 'https://python3.info/_static/astro-trl.xlsx'
>>>
>>> df = pd.read_excel(
...     io=DATA,
...     sheet_name='Polish',
...     header=1,
...     index_col=0,
... )

3.3.8. Use Case - 2

>>> import pandas as pd
>>>
>>>
>>> DATA = 'https://python3.info/_static/aatc-mission-exp12.xlsx'
>>>
>>> df = pd.read_excel(
...     io=DATA,
...     sheet_name='Luminance',
...     header=1,
...     parse_dates=['datetime', 'date', 'time'],
...     index_col='datetime',
... )

3.3.9. Assignments

# %% About
# - Name: Pandas ReadExcel Data
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in Excel format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie Excel do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#        users  Unnamed: 1 Unnamed: 2           Unnamed: 3  Unnamed: 4 Unnamed: 5          Unnamed: 6
# 0  firstname    lastname        age                email   lastlogin  is_active              groups
# 1      Alice     Apricot         30    alice@example.com  2000-01-01       True         users;staff
# 2        Bob  Blackthorn         31      bob@example.com  2000-01-02       True         users;staff
# 3      Carol        Corn         32    carol@example.com  2000-01-03       True               users
# 4       Dave      Durian         33     dave@example.org  2000-01-04       True               users
# 5        Eve  Elderberry         34      eve@example.org  2000-01-05       True  users;staff;admins
# 6    Mallory       Melon         15  mallory@example.net         NaN      False                 NaN

# %% Hints
# - `DataFrame.read_excel()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
       users  Unnamed: 1 Unnamed: 2           Unnamed: 3  Unnamed: 4 Unnamed: 5          Unnamed: 6
0  firstname    lastname        age                email   lastlogin  is_active              groups
1      Alice     Apricot         30    alice@example.com  2000-01-01       True         users;staff
2        Bob  Blackthorn         31      bob@example.com  2000-01-02       True         users;staff
3      Carol        Corn         32    carol@example.com  2000-01-03       True               users
4       Dave      Durian         33     dave@example.org  2000-01-04       True               users
5        Eve  Elderberry         34      eve@example.org  2000-01-05       True  users;staff;admins
6    Mallory       Melon         15  mallory@example.net         NaN      False                 NaN
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = 'https://python3.info/_static/example.xlsx'

# %% Result
result = ...

# %% About
# - Name: Pandas ReadExcel Sheet Name
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in Excel format to Pandas DataFrame
# 2. Read the sheet named `users`
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie Excel do Pandas DataFrame
# 2. Wczytaj arkusz o nazwie `users`
# 3. Zdefiniuj zmienną `result` z rozwiązaniem
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#        users  Unnamed: 1 Unnamed: 2           Unnamed: 3  Unnamed: 4 Unnamed: 5          Unnamed: 6
# 0  firstname    lastname        age                email   lastlogin  is_active              groups
# 1      Alice     Apricot         30    alice@example.com  2000-01-01       True         users;staff
# 2        Bob  Blackthorn         31      bob@example.com  2000-01-02       True         users;staff
# 3      Carol        Corn         32    carol@example.com  2000-01-03       True               users
# 4       Dave      Durian         33     dave@example.org  2000-01-04       True               users
# 5        Eve  Elderberry         34      eve@example.org  2000-01-05       True  users;staff;admins
# 6    Mallory       Melon         15  mallory@example.net         NaN      False                 NaN

# %% Hints
# - `DataFrame.read_excel(sheet_name=...)`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
       users  Unnamed: 1 Unnamed: 2           Unnamed: 3  Unnamed: 4 Unnamed: 5          Unnamed: 6
0  firstname    lastname        age                email   lastlogin  is_active              groups
1      Alice     Apricot         30    alice@example.com  2000-01-01       True         users;staff
2        Bob  Blackthorn         31      bob@example.com  2000-01-02       True         users;staff
3      Carol        Corn         32    carol@example.com  2000-01-03       True               users
4       Dave      Durian         33     dave@example.org  2000-01-04       True               users
5        Eve  Elderberry         34      eve@example.org  2000-01-05       True  users;staff;admins
6    Mallory       Melon         15  mallory@example.net         NaN      False                 NaN
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = 'https://python3.info/_static/example.xlsx'

# %% Result
result = ...

# %% About
# - Name: Pandas ReadExcel Complex
# - Difficulty: medium
# - Lines: 1
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in Excel format to Pandas DataFrame
# 2. Read the sheet named `users`
# 3. Use `NAMES` as column names
# 4. Read only columns: 'firstname', 'lastname', 'age', 'email'
# 5. Set `email` as index column
# 6. Skip first row
# 7. Define variable `result` with the solution
# 8. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie Excel do Pandas DataFrame
# 2. Wczytaj arkusz o nazwie `users`
# 3. Użyj `NAMES` jako nazw kolumn
# 4. Wczytaj tylko kolumny: 'firstname', 'lastname', 'age', 'email'
# 5. Ustaw `email` jako kolumnę indeksu
# 6. Pomiń pierwszy wiersz
# 7. Zdefiniuj zmienną `result` z rozwiązaniem
# 8. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#                     firstname    lastname  age
# email
# alice@example.com       Alice     Apricot   30
# bob@example.com           Bob  Blackthorn   31
# carol@example.com       Carol        Corn   32
# dave@example.org         Dave      Durian   33
# eve@example.org           Eve  Elderberry   34
# mallory@example.net   Mallory       Melon   15

# %% Hints
# - `DataFrame.read_csv()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
                    firstname    lastname  age
email
alice@example.com       Alice     Apricot   30
bob@example.com           Bob  Blackthorn   31
carol@example.com       Carol        Corn   32
dave@example.org         Dave      Durian   33
eve@example.org           Eve  Elderberry   34
mallory@example.net   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = 'https://python3.info/_static/example.xlsx'

NAMES = [
    'firstname',
    'lastname',
    'age',
    'email',
    'lastlogin',
    'is_active',
    'groups',
]

# %% Result
result = ...