12.4. Serialization Normalize

12.4.1. Sequence

>>> DATA = ('Alice', 'Apricot', '30')
>>> SCHEMA = (str, str, int)
>>>
>>>
>>> def normalize(data, schema):
...     return tuple(t(v) for t,v in zip(schema,data))
>>>
>>>
>>> normalize(DATA, SCHEMA)
('Alice', 'Apricot', 30)

12.4.2. List of Sequences

>>> DATA = [
...     ('firstname', 'lastname', 'age'),
...     ('Alice', 'Apricot', '30'),
...     ('Bob', 'Blackthorn', '31'),
...     ('Carol', 'Corn', '32'),
...     ('Dave', 'Durian', '33'),
...     ('Eve', 'Elderberry', '34'),
...     ('Mallory', 'Melon', '15'),
... ]
>>>
>>> SCHEMA = (str, str, int)
>>>
>>>
>>> def normalize(data, schema):
...     header, *rows = data
...     values = [tuple(t(v) for t,v in zip(schema,row)) for row in rows]
...     return [header] + values
>>>
>>>
>>> normalize(DATA, SCHEMA)
[('firstname', 'lastname', 'age'),
 ('Alice', 'Apricot', 30),
 ('Bob', 'Blackthorn', 31),
 ('Carol', 'Corn', 32),
 ('Dave', 'Durian', 33),
 ('Eve', 'Elderberry', 34),
 ('Mallory', 'Melon', 15)]

12.4.3. Mapping

>>> DATA = {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'}
>>> SCHEMA = {'firstname': str, 'lastname': str, 'age': int}
>>>
>>>
>>> def normalize(data, schema):
...     return {k:schema[k](v) for k,v in data.items()}
>>>
>>>
>>> normalize(DATA, SCHEMA)
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}

12.4.4. List of Mappings

>>> DATA = [
...     {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'},
...     {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': '31'},
...     {'firstname': 'Carol', 'lastname': 'Corn', 'age': '32'},
...     {'firstname': 'Dave', 'lastname': 'Durian', 'age': '33'},
...     {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': '34'},
...     {'firstname': 'Mallory', 'lastname': 'Melon', 'age': '15'},
... ]
>>>
>>> SCHEMA = {'firstname': str, 'lastname': str, 'age': int}
>>>
>>> def normalize(data, schema):
...     return [{k:schema[k](v) for k,v in row.items()} for row in data]
>>>
>>>
>>> normalize(DATA, SCHEMA)
[{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
 {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
 {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
 {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
 {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
 {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]

12.4.5. List of Objects

>>> from inspect import get_annotations
>>>
>>>
>>> class User:
...     def __init__(self, firstname, lastname, age=None):
...         self.firstname = firstname
...         self.lastname = lastname
...         self.age = age
...
...     def __repr__(self):
...         clsname = self.__class__.__qualname__
...         arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
...         return f'{clsname}({arguments})'
>>>
>>>
>>> DATA = [
...     User('Alice', 'Apricot', age='30'),
...     User('Bob', 'Blackthorn', age='31'),
...     User('Carol', 'Corn', age='32'),
...     User('Dave', 'Durian', age='33'),
...     User('Eve', 'Elderberry', age='34'),
...     User('Mallory', 'Melon', age='15'),
... ]
>>>
>>>
>>> class Schema:
...     firstname: str
...     lastname: str
...     age: int
>>>
>>>
>>> def normalize(data, schema):
...     schema = get_annotations(schema)
...     fields = [{k:schema[k](v) for k,v in vars(row).items()} for row in data]
...     return [User(**f) for f in fields]
>>>
>>>
>>> normalize(DATA, Schema)
[User(firstname='Alice', lastname='Apricot', age=30),
 User(firstname='Bob', lastname='Blackthorn', age=31),
 User(firstname='Carol', lastname='Corn', age=32),
 User(firstname='Dave', lastname='Durian', age=33),
 User(firstname='Eve', lastname='Elderberry', age=34),
 User(firstname='Mallory', lastname='Melon', age=15)]

12.4.6. Assignments

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 3
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Example
# >>> result
# ('Alice', 'Apricot', 30)

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'

>>> assert type(result) is tuple, \
'Variable `result` has invalid type, should be tuple'

>>> print(result)
('Alice', 'Apricot', 30)
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = ('Alice', 'Apricot', '30')
SCHEMA = (str, str, int)

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Example
# >>> result
# [('firstname', 'lastname', 'age'),
#  ('Alice', 'Apricot', 30),
#  ('Bob', 'Blackthorn', 31),
#  ('Carol', 'Corn', 32),
#  ('Dave', 'Durian', 33),
#  ('Eve', 'Elderberry', 34),
#  ('Mallory', 'Melon', 15)]

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'

>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
[('firstname', 'lastname', 'age'),
 ('Alice', 'Apricot', 30),
 ('Bob', 'Blackthorn', 31),
 ('Carol', 'Corn', 32),
 ('Dave', 'Durian', 33),
 ('Eve', 'Elderberry', 34),
 ('Mallory', 'Melon', 15)]
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = [
    ('firstname', 'lastname', 'age'),
    ('Alice', 'Apricot', '30'),
    ('Bob', 'Blackthorn', '31'),
    ('Carol', 'Corn', '32'),
    ('Dave', 'Durian', '33'),
    ('Eve', 'Elderberry', '34'),
    ('Mallory', 'Melon', '15'),
]

SCHEMA = (str, str, int)

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 7
# - Minutes: 5

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Example
# >>> result
# {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'

>>> assert type(result) is dict, \
'Variable `result` has invalid type, should be dict'

>>> print(result)
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'}
SCHEMA = {'firstname': str, 'lastname': str, 'age': int}

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: medium
# - Lines: 7
# - Minutes: 8

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Example
# >>> result
# [{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
#  {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
#  {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
#  {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
#  {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
#  {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'

>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
[{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
 {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
 {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
 {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
 {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
 {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = [
    {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'},
    {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': '31'},
    {'firstname': 'Carol', 'lastname': 'Corn', 'age': '32'},
    {'firstname': 'Dave', 'lastname': 'Durian', 'age': '33'},
    {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': '34'},
    {'firstname': 'Mallory', 'lastname': 'Melon', 'age': '15'},
]

SCHEMA = {'firstname': str, 'lastname': str, 'age': int}

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: hard
# - Lines: 10
# - Minutes: 13

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Example
# >>> result
# [User(firstname='Alice', lastname='Apricot', age=30),
#  User(firstname='Bob', lastname='Blackthorn', age=31),
#  User(firstname='Carol', lastname='Corn', age=32),
#  User(firstname='Dave', lastname='Durian', age=33),
#  User(firstname='Eve', lastname='Elderberry', age=34),
#  User(firstname='Mallory', lastname='Melon', age=15)]

# %% Hints
# - `[x for x in data]`
# - `str.join()`
# - `get_annotations()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 12), \
'Python 3.12+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'

>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
[User(firstname='Alice', lastname='Apricot', age=30),
 User(firstname='Bob', lastname='Blackthorn', age=31),
 User(firstname='Carol', lastname='Corn', age=32),
 User(firstname='Dave', lastname='Durian', age=33),
 User(firstname='Eve', lastname='Elderberry', age=34),
 User(firstname='Mallory', lastname='Melon', age=15)]
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
from inspect import get_annotations

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
class User:
    def __init__(self, firstname, lastname, age=None):
        self.firstname = firstname
        self.lastname = lastname
        self.age = age

    def __repr__(self):
        clsname = self.__class__.__qualname__
        arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
        return f'{clsname}({arguments})'


DATA = [
    User('Alice', 'Apricot', age='30'),
    User('Bob', 'Blackthorn', age='31'),
    User('Carol', 'Corn', age='32'),
    User('Dave', 'Durian', age='33'),
    User('Eve', 'Elderberry', age='34'),
    User('Mallory', 'Melon', age='15'),
]

class Schema:
    firstname: str
    lastname: str
    age: int

# %% Result
result = ...