3.8. Read Python
pd.DataFrame()
3.8.1. SetUp
>>> import pandas as pd
3.8.2. Dict of List
>>> data = {
... 'firstname': ['Mark', 'Melissa', 'Rick'],
... 'lastname': ['Watney', 'Lewis', 'Martinez'],
... 'role': ['botanist', 'commander', 'pilot'],
... }
>>>
>>> df = pd.DataFrame(data)
>>> df
firstname lastname role
0 Mark Watney botanist
1 Melissa Lewis commander
2 Rick Martinez pilot
3.8.3. List of Dict
>>> data = [
... {'firstname': 'Mark', 'lastname': 'Watney', 'role': 'botanist'},
... {'firstname': 'Melissa', 'lastname': 'Lewis', 'role': 'commander'},
... {'firstname': 'Rick', 'lastname': 'Martinez', 'role': 'pilot'},
... ]
>>>
>>> df = pd.DataFrame(data)
>>> df
firstname lastname role
0 Mark Watney botanist
1 Melissa Lewis commander
2 Rick Martinez pilot
3.8.4. List of Tuple
>>> data = [
... ('Mark', 'Watney', 'botanist'),
... ('Melissa', 'Lewis', 'commander'),
... ('Rick', 'Martinez', 'pilot'),
... ]
>>>
>>> df = pd.DataFrame(data, columns=['firstname', 'lastname', 'role'])
>>> df
firstname lastname role
0 Mark Watney botanist
1 Melissa Lewis commander
2 Rick Martinez pilot
3.8.5. Assignments
# %% About
# - Name: Pandas ReadPython DictList
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read data `DATA` in Python format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj dane `DATA` w formacie Python do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname age
# 0 Alice Apricot 30
# 1 Bob Blackthorn 31
# 2 Carol Corn 32
# 3 Dave Durian 33
# 4 Eve Elderberry 34
# 5 Mallory Melon 15
# %% Hints
# - `pd.DataFrame()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
firstname lastname age
0 Alice Apricot 30
1 Bob Blackthorn 31
2 Carol Corn 32
3 Dave Durian 33
4 Eve Elderberry 34
5 Mallory Melon 15
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = {
'firstname': ['Alice', 'Bob', 'Carol', 'Dave', 'Eve', 'Mallory'],
'lastname': ['Apricot', 'Blackthorn', 'Corn', 'Durian', 'Elderberry', 'Melon'],
'age': [30, 31, 32, 33, 34, 15],
}
# %% Result
result = ...
# %% About
# - Name: Pandas ReadPython Enumerate
# - Difficulty: medium
# - Lines: 10
# - Minutes: 8
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Convert `DATA` to format with one column per each attribute for example:
# - `group1_year`, `group2_year`,
# - `group1_name`, `group2_name`
# 2. Note, that enumeration starts with one
# 3. Convert data to `result: pd.DataFrame`
# 4. Convert data in `group1_gid` and `group2_gid` to `int`
# 5. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` do formatu z jedną kolumną dla każdego atrybutu, np:
# - `group1_year`, `group2_year`,
# - `group1_name`, `group2_name`
# 2. Zwróć uwagę, że enumeracja zaczyna się od jeden
# 3. Przekonwertuj dane do `result: pd.DataFrame`
# 4. Przekonwertuj dane w `group1_gid` i `group2_gid` do `int`
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname group1_gid group1_name group2_gid group2_name group3_gid group3_name
# 0 Alice Apricot 1 users 2 staff <NA> <NA>
# 1 Bob Blackthorn 1 users 2 staff <NA> <NA>
# 2 Carol Corn 1 users <NA> <NA> <NA> <NA>
# 3 Dave Durian 1 users <NA> <NA> <NA> <NA>
# 4 Eve Elderberry 1 users 2 staff 3 admins
# 5 Mallory Melon <NA> <NA> <NA> <NA> <NA> <NA>
# %% Hints
# - `dict.pop()`
# - `enumerate(start=1)`
# - `column_name = f'group{i}_{field}'`
# - `pd.DataFrame()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result.convert_dtypes() # doctest: +NORMALIZE_WHITESPACE
firstname lastname group1_gid group1_name group2_gid group2_name group3_gid group3_name
0 Alice Apricot 1 users 2 staff <NA> <NA>
1 Bob Blackthorn 1 users 2 staff <NA> <NA>
2 Carol Corn 1 users <NA> <NA> <NA> <NA>
3 Dave Durian 1 users <NA> <NA> <NA> <NA>
4 Eve Elderberry 1 users 2 staff 3 admins
5 Mallory Melon <NA> <NA> <NA> <NA> <NA> <NA>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = [
{"firstname": "Alice", "lastname": "Apricot", "groups": [
{"gid": 1, "name": "users"},
{"gid": 2, "name": "staff"},
]},
{"firstname": "Bob", "lastname": "Blackthorn", "groups": [
{"gid": 1, "name": "users"},
{"gid": 2, "name": "staff"},
]},
{"firstname": "Carol", "lastname": "Corn", "groups": [
{"gid": 1, "name": "users"},
]},
{"firstname": "Dave", "lastname": "Durian", "groups": [
{"gid": 1, "name": "users"},
]},
{"firstname": "Eve", "lastname": "Elderberry", "groups": [
{"gid": 1, "name": "users"},
{"gid": 2, "name": "staff"},
{"gid": 3, "name": "admins"},
]},
{"firstname": "Mallory", "lastname": "Melon", "groups": []},
]
# %% Result
result = ...
# %% About
# - Name: Pandas ReadPython Object
# - Difficulty: medium
# - Lines: 10
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Convert `DATA` to format with one column per each attribute for example:
# - `group1_year`, `group2_year`,
# - `group1_name`, `group2_name`
# 2. Note, that enumeration starts with one
# 3. Convert data to `result: pd.DataFrame`
# 4. Convert data in `group1_gid` and `group2_gid` to `int`
# 5. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` do formatu z jedną kolumną dla każdego atrybutu, np:
# - `group1_year`, `group2_year`,
# - `group1_name`, `group2_name`
# 2. Zwróć uwagę, że enumeracja zaczyna się od jeden
# 3. Przekonwertuj dane do `result: pd.DataFrame`
# 4. Przekonwertuj dane w `group1_gid` i `group2_gid` do `int`
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname group1_gid group1_name group2_gid group2_name group3_gid group3_name
# 0 Alice Apricot 1 users 2 staff <NA> <NA>
# 1 Bob Blackthorn 1 users 2 staff <NA> <NA>
# 2 Carol Corn 1 users <NA> <NA> <NA> <NA>
# 3 Dave Durian 1 users <NA> <NA> <NA> <NA>
# 4 Eve Elderberry 1 users 2 staff 3 admins
# 5 Mallory Melon <NA> <NA> <NA> <NA> <NA> <NA>
# %% Hints
# - `vars()`
# - `dict.pop()`
# - `enumerate(start=1)`
# - `column_name = f'group{i}_{field}'`
# - `pd.DataFrame()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result.convert_dtypes() # doctest: +NORMALIZE_WHITESPACE
firstname lastname group1_gid group1_name group2_gid group2_name group3_gid group3_name
0 Alice Apricot 1 users 2 staff <NA> <NA>
1 Bob Blackthorn 1 users 2 staff <NA> <NA>
2 Carol Corn 1 users <NA> <NA> <NA> <NA>
3 Dave Durian 1 users <NA> <NA> <NA> <NA>
4 Eve Elderberry 1 users 2 staff 3 admins
5 Mallory Melon <NA> <NA> <NA> <NA> <NA> <NA>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
class User:
def __init__(self, firstname, lastname, groups=None):
self.firstname = firstname
self.lastname = lastname
self.groups = groups if groups else []
def __repr__(self):
clsname = self.__class__.__qualname__
arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
return f'{clsname}({arguments})'
class Group:
def __init__(self, gid, name):
self.gid = gid
self.name = name
def __repr__(self):
return f'{self.gid}({self.name})'
DATA = [
User(firstname='Alice', lastname='Apricot', groups=[
Group(1, 'users'),
Group(2, 'staff'),
]),
User(firstname='Bob', lastname='Blackthorn', groups=[
Group(1, 'users'),
Group(2, 'staff'),
]),
User(firstname='Carol', lastname='Corn', groups=[
Group(1, 'users'),
]),
User(firstname='Dave', lastname='Durian', groups=[
Group(1, 'users'),
]),
User(firstname='Eve', lastname='Elderberry', groups=[
Group(1, 'users'),
Group(2, 'staff'),
Group(3, 'admins'),
]),
User(firstname='Mallory', lastname='Melon', groups=[]),
]
# %% Result
result = ...