12.4. Serialization Normalize
12.4.1. Sequence
>>> DATA = ('Alice', 'Apricot', '30')
>>> SCHEMA = (str, str, int)
>>>
>>>
>>> def normalize(data, schema):
... return tuple(t(v) for t,v in zip(schema,data))
>>>
>>>
>>> normalize(DATA, SCHEMA)
('Alice', 'Apricot', 30)
12.4.2. List of Sequences
>>> DATA = [
... ('firstname', 'lastname', 'age'),
... ('Alice', 'Apricot', '30'),
... ('Bob', 'Blackthorn', '31'),
... ('Carol', 'Corn', '32'),
... ('Dave', 'Durian', '33'),
... ('Eve', 'Elderberry', '34'),
... ('Mallory', 'Melon', '15'),
... ]
>>>
>>> SCHEMA = (str, str, int)
>>>
>>>
>>> def normalize(data, schema):
... header, *rows = data
... values = [tuple(t(v) for t,v in zip(schema,row)) for row in rows]
... return [header] + values
>>>
>>>
>>> normalize(DATA, SCHEMA)
[('firstname', 'lastname', 'age'),
('Alice', 'Apricot', 30),
('Bob', 'Blackthorn', 31),
('Carol', 'Corn', 32),
('Dave', 'Durian', 33),
('Eve', 'Elderberry', 34),
('Mallory', 'Melon', 15)]
12.4.3. Mapping
>>> DATA = {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'}
>>> SCHEMA = {'firstname': str, 'lastname': str, 'age': int}
>>>
>>>
>>> def normalize(data, schema):
... return {k:schema[k](v) for k,v in data.items()}
>>>
>>>
>>> normalize(DATA, SCHEMA)
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}
12.4.4. List of Mappings
>>> DATA = [
... {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'},
... {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': '31'},
... {'firstname': 'Carol', 'lastname': 'Corn', 'age': '32'},
... {'firstname': 'Dave', 'lastname': 'Durian', 'age': '33'},
... {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': '34'},
... {'firstname': 'Mallory', 'lastname': 'Melon', 'age': '15'},
... ]
>>>
>>> SCHEMA = {'firstname': str, 'lastname': str, 'age': int}
>>>
>>> def normalize(data, schema):
... return [{k:schema[k](v) for k,v in row.items()} for row in data]
>>>
>>>
>>> normalize(DATA, SCHEMA)
[{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
{'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
{'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
{'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
{'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
{'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]
12.4.5. List of Objects
>>> from inspect import get_annotations
>>>
>>>
>>> class User:
... def __init__(self, firstname, lastname, age=None):
... self.firstname = firstname
... self.lastname = lastname
... self.age = age
...
... def __repr__(self):
... clsname = self.__class__.__qualname__
... arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
... return f'{clsname}({arguments})'
>>>
>>>
>>> DATA = [
... User('Alice', 'Apricot', age='30'),
... User('Bob', 'Blackthorn', age='31'),
... User('Carol', 'Corn', age='32'),
... User('Dave', 'Durian', age='33'),
... User('Eve', 'Elderberry', age='34'),
... User('Mallory', 'Melon', age='15'),
... ]
>>>
>>>
>>> class Schema:
... firstname: str
... lastname: str
... age: int
>>>
>>>
>>> def normalize(data, schema):
... schema = get_annotations(schema)
... fields = [{k:schema[k](v) for k,v in vars(row).items()} for row in data]
... return [User(**f) for f in fields]
>>>
>>>
>>> normalize(DATA, Schema)
[User(firstname='Alice', lastname='Apricot', age=30),
User(firstname='Bob', lastname='Blackthorn', age=31),
User(firstname='Carol', lastname='Corn', age=32),
User(firstname='Dave', lastname='Durian', age=33),
User(firstname='Eve', lastname='Elderberry', age=34),
User(firstname='Mallory', lastname='Melon', age=15)]
12.4.6. Assignments
# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 3
# - Minutes: 3
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed
# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść
# %% Example
# >>> result
# ('Alice', 'Apricot', 30)
# %% Hints
# - `[x for x in data]`
# - `str.join()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is tuple, \
'Variable `result` has invalid type, should be tuple'
>>> print(result)
('Alice', 'Apricot', 30)
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]
# %% Data
DATA = ('Alice', 'Apricot', '30')
SCHEMA = (str, str, int)
# %% Result
result = ...
# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed
# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść
# %% Example
# >>> result
# [('firstname', 'lastname', 'age'),
# ('Alice', 'Apricot', 30),
# ('Bob', 'Blackthorn', 31),
# ('Carol', 'Corn', 32),
# ('Dave', 'Durian', 33),
# ('Eve', 'Elderberry', 34),
# ('Mallory', 'Melon', 15)]
# %% Hints
# - `[x for x in data]`
# - `str.join()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> print(result) # doctest: +NORMALIZE_WHITESPACE
[('firstname', 'lastname', 'age'),
('Alice', 'Apricot', 30),
('Bob', 'Blackthorn', 31),
('Carol', 'Corn', 32),
('Dave', 'Durian', 33),
('Eve', 'Elderberry', 34),
('Mallory', 'Melon', 15)]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]
# %% Data
DATA = [
('firstname', 'lastname', 'age'),
('Alice', 'Apricot', '30'),
('Bob', 'Blackthorn', '31'),
('Carol', 'Corn', '32'),
('Dave', 'Durian', '33'),
('Eve', 'Elderberry', '34'),
('Mallory', 'Melon', '15'),
]
SCHEMA = (str, str, int)
# %% Result
result = ...
# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 7
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed
# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść
# %% Example
# >>> result
# {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}
# %% Hints
# - `[x for x in data]`
# - `str.join()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is dict, \
'Variable `result` has invalid type, should be dict'
>>> print(result)
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]
# %% Data
DATA = {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'}
SCHEMA = {'firstname': str, 'lastname': str, 'age': int}
# %% Result
result = ...
# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: medium
# - Lines: 7
# - Minutes: 8
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed
# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść
# %% Example
# >>> result
# [{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
# {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
# {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
# {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
# {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
# {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]
# %% Hints
# - `[x for x in data]`
# - `str.join()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> print(result) # doctest: +NORMALIZE_WHITESPACE
[{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
{'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
{'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
{'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
{'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
{'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]
# %% Data
DATA = [
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'},
{'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': '31'},
{'firstname': 'Carol', 'lastname': 'Corn', 'age': '32'},
{'firstname': 'Dave', 'lastname': 'Durian', 'age': '33'},
{'firstname': 'Eve', 'lastname': 'Elderberry', 'age': '34'},
{'firstname': 'Mallory', 'lastname': 'Melon', 'age': '15'},
]
SCHEMA = {'firstname': str, 'lastname': str, 'age': int}
# %% Result
result = ...
# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: hard
# - Lines: 10
# - Minutes: 13
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed
# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść
# %% Example
# >>> result
# [User(firstname='Alice', lastname='Apricot', age=30),
# User(firstname='Bob', lastname='Blackthorn', age=31),
# User(firstname='Carol', lastname='Corn', age=32),
# User(firstname='Dave', lastname='Durian', age=33),
# User(firstname='Eve', lastname='Elderberry', age=34),
# User(firstname='Mallory', lastname='Melon', age=15)]
# %% Hints
# - `[x for x in data]`
# - `str.join()`
# - `get_annotations()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> print(result) # doctest: +NORMALIZE_WHITESPACE
[User(firstname='Alice', lastname='Apricot', age=30),
User(firstname='Bob', lastname='Blackthorn', age=31),
User(firstname='Carol', lastname='Corn', age=32),
User(firstname='Dave', lastname='Durian', age=33),
User(firstname='Eve', lastname='Elderberry', age=34),
User(firstname='Mallory', lastname='Melon', age=15)]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
from inspect import get_annotations
# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]
# %% Data
class User:
def __init__(self, firstname, lastname, age=None):
self.firstname = firstname
self.lastname = lastname
self.age = age
def __repr__(self):
clsname = self.__class__.__qualname__
arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
return f'{clsname}({arguments})'
DATA = [
User('Alice', 'Apricot', age='30'),
User('Bob', 'Blackthorn', age='31'),
User('Carol', 'Corn', age='32'),
User('Dave', 'Durian', age='33'),
User('Eve', 'Elderberry', age='34'),
User('Mallory', 'Melon', age='15'),
]
class Schema:
firstname: str
lastname: str
age: int
# %% Result
result = ...