3.8. Read Python

  • pd.DataFrame()

3.8.1. SetUp

>>> import pandas as pd

3.8.2. Dict of List

>>> data = {
...     'firstname': ['Mark', 'Melissa', 'Rick'],
...     'lastname': ['Watney', 'Lewis', 'Martinez'],
...     'role': ['botanist', 'commander', 'pilot'],
... }
>>>
>>> df = pd.DataFrame(data)
>>> df
  firstname  lastname       role
0      Mark    Watney   botanist
1   Melissa     Lewis  commander
2      Rick  Martinez      pilot

3.8.3. List of Dict

>>> data = [
...     {'firstname': 'Mark', 'lastname': 'Watney', 'role': 'botanist'},
...     {'firstname': 'Melissa', 'lastname': 'Lewis', 'role': 'commander'},
...     {'firstname': 'Rick', 'lastname': 'Martinez', 'role': 'pilot'},
... ]
>>>
>>> df = pd.DataFrame(data)
>>> df
  firstname  lastname       role
0      Mark    Watney   botanist
1   Melissa     Lewis  commander
2      Rick  Martinez      pilot

3.8.4. List of Tuple

>>> data = [
...     ('Mark', 'Watney', 'botanist'),
...     ('Melissa', 'Lewis', 'commander'),
...     ('Rick', 'Martinez', 'pilot'),
... ]
>>>
>>> df = pd.DataFrame(data, columns=['firstname', 'lastname', 'role'])
>>> df
  firstname  lastname       role
0      Mark    Watney   botanist
1   Melissa     Lewis  commander
2      Rick  Martinez      pilot

3.8.5. Assignments

# %% About
# - Name: Pandas ReadPython DictList
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Read data `DATA` in Python format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj dane `DATA` w formacie Python do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  age
# 0     Alice     Apricot   30
# 1       Bob  Blackthorn   31
# 2     Carol        Corn   32
# 3      Dave      Durian   33
# 4       Eve  Elderberry   34
# 5   Mallory       Melon   15

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  age
0     Alice     Apricot   30
1       Bob  Blackthorn   31
2     Carol        Corn   32
3      Dave      Durian   33
4       Eve  Elderberry   34
5   Mallory       Melon   15
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = {
    'firstname': ['Alice', 'Bob', 'Carol', 'Dave', 'Eve', 'Mallory'],
    'lastname': ['Apricot', 'Blackthorn', 'Corn', 'Durian', 'Elderberry', 'Melon'],
    'age': [30, 31, 32, 33, 34, 15],
}

# %% Result
result = ...

# %% About
# - Name: Pandas ReadPython Enumerate
# - Difficulty: medium
# - Lines: 10
# - Minutes: 8

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert `DATA` to format with one column per each attribute for example:
#    - `group1_year`, `group2_year`,
#    - `group1_name`, `group2_name`
# 2. Note, that enumeration starts with one
# 3. Convert data to `result: pd.DataFrame`
# 4. Convert data in `group1_gid` and `group2_gid` to `int`
# 5. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` do formatu z jedną kolumną dla każdego atrybutu, np:
#    - `group1_year`, `group2_year`,
#    - `group1_name`, `group2_name`
# 2. Zwróć uwagę, że enumeracja zaczyna się od jeden
# 3. Przekonwertuj dane do `result: pd.DataFrame`
# 4. Przekonwertuj dane w `group1_gid` i `group2_gid` do `int`
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  group1_gid group1_name  group2_gid group2_name  group3_gid group3_name
# 0     Alice     Apricot           1       users           2       staff        <NA>        <NA>
# 1       Bob  Blackthorn           1       users           2       staff        <NA>        <NA>
# 2     Carol        Corn           1       users        <NA>        <NA>        <NA>        <NA>
# 3      Dave      Durian           1       users        <NA>        <NA>        <NA>        <NA>
# 4       Eve  Elderberry           1       users           2       staff           3      admins
# 5   Mallory       Melon        <NA>        <NA>        <NA>        <NA>        <NA>        <NA>

# %% Hints
# - `dict.pop()`
# - `enumerate(start=1)`
# - `column_name = f'group{i}_{field}'`
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result.convert_dtypes()  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  group1_gid group1_name  group2_gid group2_name  group3_gid group3_name
0     Alice     Apricot           1       users           2       staff        <NA>        <NA>
1       Bob  Blackthorn           1       users           2       staff        <NA>        <NA>
2     Carol        Corn           1       users        <NA>        <NA>        <NA>        <NA>
3      Dave      Durian           1       users        <NA>        <NA>        <NA>        <NA>
4       Eve  Elderberry           1       users           2       staff           3      admins
5   Mallory       Melon        <NA>        <NA>        <NA>        <NA>        <NA>        <NA>
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = [
    {"firstname": "Alice", "lastname": "Apricot", "groups": [
        {"gid": 1, "name": "users"},
        {"gid": 2, "name": "staff"},
    ]},

    {"firstname": "Bob", "lastname": "Blackthorn", "groups": [
        {"gid": 1, "name": "users"},
        {"gid": 2, "name": "staff"},
    ]},

    {"firstname": "Carol", "lastname": "Corn", "groups": [
        {"gid": 1, "name": "users"},
    ]},

    {"firstname": "Dave", "lastname": "Durian", "groups": [
        {"gid": 1, "name": "users"},
    ]},

    {"firstname": "Eve", "lastname": "Elderberry", "groups": [
        {"gid": 1, "name": "users"},
        {"gid": 2, "name": "staff"},
        {"gid": 3, "name": "admins"},
    ]},

    {"firstname": "Mallory", "lastname": "Melon", "groups": []},
]

# %% Result
result = ...

# %% About
# - Name: Pandas ReadPython Object
# - Difficulty: medium
# - Lines: 10
# - Minutes: 5

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Convert `DATA` to format with one column per each attribute for example:
#    - `group1_year`, `group2_year`,
#    - `group1_name`, `group2_name`
# 2. Note, that enumeration starts with one
# 3. Convert data to `result: pd.DataFrame`
# 4. Convert data in `group1_gid` and `group2_gid` to `int`
# 5. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` do formatu z jedną kolumną dla każdego atrybutu, np:
#    - `group1_year`, `group2_year`,
#    - `group1_name`, `group2_name`
# 2. Zwróć uwagę, że enumeracja zaczyna się od jeden
# 3. Przekonwertuj dane do `result: pd.DataFrame`
# 4. Przekonwertuj dane w `group1_gid` i `group2_gid` do `int`
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#   firstname    lastname  group1_gid group1_name  group2_gid group2_name  group3_gid group3_name
# 0     Alice     Apricot           1       users           2       staff        <NA>        <NA>
# 1       Bob  Blackthorn           1       users           2       staff        <NA>        <NA>
# 2     Carol        Corn           1       users        <NA>        <NA>        <NA>        <NA>
# 3      Dave      Durian           1       users        <NA>        <NA>        <NA>        <NA>
# 4       Eve  Elderberry           1       users           2       staff           3      admins
# 5   Mallory       Melon        <NA>        <NA>        <NA>        <NA>        <NA>        <NA>

# %% Hints
# - `vars()`
# - `dict.pop()`
# - `enumerate(start=1)`
# - `column_name = f'group{i}_{field}'`
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result.convert_dtypes()  # doctest: +NORMALIZE_WHITESPACE
  firstname    lastname  group1_gid group1_name  group2_gid group2_name  group3_gid group3_name
0     Alice     Apricot           1       users           2       staff        <NA>        <NA>
1       Bob  Blackthorn           1       users           2       staff        <NA>        <NA>
2     Carol        Corn           1       users        <NA>        <NA>        <NA>        <NA>
3      Dave      Durian           1       users        <NA>        <NA>        <NA>        <NA>
4       Eve  Elderberry           1       users           2       staff           3      admins
5   Mallory       Melon        <NA>        <NA>        <NA>        <NA>        <NA>        <NA>
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
class User:
    def __init__(self, firstname, lastname, groups=None):
        self.firstname = firstname
        self.lastname = lastname
        self.groups = groups if groups else []

    def __repr__(self):
        clsname = self.__class__.__qualname__
        arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
        return f'{clsname}({arguments})'

class Group:
    def __init__(self, gid, name):
        self.gid = gid
        self.name = name

    def __repr__(self):
        return f'{self.gid}({self.name})'


DATA = [
    User(firstname='Alice', lastname='Apricot', groups=[
        Group(1, 'users'),
        Group(2, 'staff'),
    ]),

    User(firstname='Bob', lastname='Blackthorn', groups=[
        Group(1, 'users'),
        Group(2, 'staff'),
    ]),

    User(firstname='Carol', lastname='Corn', groups=[
        Group(1, 'users'),
    ]),

    User(firstname='Dave', lastname='Durian', groups=[
        Group(1, 'users'),
    ]),

    User(firstname='Eve', lastname='Elderberry', groups=[
        Group(1, 'users'),
        Group(2, 'staff'),
        Group(3, 'admins'),
    ]),

    User(firstname='Mallory', lastname='Melon', groups=[]),
]

# %% Result
result = ...