12.4. Serialization Normalize

12.4.1. Typing

  • type soft keyword - Since Python 3.12

  • type T = tuple[str,str,int]

  • typing.get_origin(T.__value__)

  • typing.get_args(T.__value__)

>>> from typing import get_origin, get_args
>>>
>>>
>>> type T = tuple[str,str,int]
>>>
>>> get_origin(T.__value__)
<class 'tuple'>
>>>
>>> get_args(T.__value__)
(<class 'str'>, <class 'str'>, <class 'int'>)

12.4.2. Sequence

  • def normalize(types, data)

  • normalize(TYPES, DATA)

>>> DATA = ('Alice', 'Apricot', '30')
>>> TYPES = (str, str, int)
>>>
>>>
>>> def normalize(types, data):
...     origin = type(types)
...     args = zip(types, data, strict=True)
...     return origin(t(d) for t,d in args)
>>>
>>>
>>> normalize(TYPES, DATA)
('Alice', 'Apricot', 30)

12.4.3. List of Sequences

  • def normalize(types, data)

  • normalize(TYPES, DATA)

>>> DATA = [
...     ('firstname', 'lastname', 'age'),
...     ('Alice', 'Apricot', '30'),
...     ('Bob', 'Blackthorn', '31'),
...     ('Carol', 'Corn', '32'),
...     ('Dave', 'Durian', '33'),
...     ('Eve', 'Elderberry', '34'),
...     ('Mallory', 'Melon', '15'),
... ]
>>>
>>> TYPES = [
...     (str, str, str),
...     (str, str, int),
...     (str, str, int),
...     (str, str, int),
...     (str, str, int),
...     (str, str, int),
...     (str, str, int),
... ]
>>>
>>>
>>> def normalize(types, data):
...     origin = type(types)
...     args = []
...     for rowtypes, rowdata in zip(types, data, strict=True):
...         roworigin = type(rowtypes)
...         rowargs = zip(rowtypes, rowdata, strict=True)
...         result = roworigin(t(d) for t,d in rowargs)
...         args.append(result)
...     return origin(args)
>>>
>>>
>>> normalize(TYPES, DATA)
[('firstname', 'lastname', 'age'),
 ('Alice', 'Apricot', 30),
 ('Bob', 'Blackthorn', 31),
 ('Carol', 'Corn', 32),
 ('Dave', 'Durian', 33),
 ('Eve', 'Elderberry', 34),
 ('Mallory', 'Melon', 15)]

12.4.4. Mapping

  • def normalize(types, data)

  • normalize(TYPES, DATA)

>>> DATA = {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'}
>>> TYPES = {'firstname': str, 'lastname': str, 'age': int}
>>>
>>>
>>> def normalize(types, data):
...     origin = type(types)
...     args = {k: t(data[k]) for k,t in types.items()}
...     return origin(args)
>>>
>>>
>>> normalize(TYPES, DATA)
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}

12.4.5. List of Mappings

  • def normalize(types, data)

  • normalize(TYPES, DATA)

>>> DATA = [
...     {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'},
...     {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': '31'},
...     {'firstname': 'Carol', 'lastname': 'Corn', 'age': '32'},
...     {'firstname': 'Dave', 'lastname': 'Durian', 'age': '33'},
...     {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': '34'},
...     {'firstname': 'Mallory', 'lastname': 'Melon', 'age': '15'},
... ]
>>>
>>> TYPES = [
...     {'firstname': str, 'lastname': str, 'age': int},
...     {'firstname': str, 'lastname': str, 'age': int},
...     {'firstname': str, 'lastname': str, 'age': int},
...     {'firstname': str, 'lastname': str, 'age': int},
...     {'firstname': str, 'lastname': str, 'age': int},
...     {'firstname': str, 'lastname': str, 'age': int},
... ]
>>>
>>> def normalize(types, data):
...     origin = type(types)
...     args = []
...     for rowtypes, rowdata in zip(types, data, strict=True):
...         row = {k: t(rowdata[k]) for k,t in rowtypes.items()}
...         args.append(row)
...     return origin(args)
>>>
>>>
>>> normalize(TYPES, DATA)
[{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
 {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
 {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
 {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
 {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
 {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]

12.4.6. List of Objects

  • annotationlib.get_annotations() since Python 3.14

  • inspect.get_annotations() for Python 3.13 and earlier

  • def normalize(types, data)

  • normalize(TYPES, DATA)

>>> from annotationlib import get_annotations
>>>
>>>
>>> class User:
...     def __init__(self, firstname, lastname, age=None):
...         self.firstname = firstname
...         self.lastname = lastname
...         self.age = age
...
...     def __repr__(self):
...         clsname = self.__class__.__qualname__
...         arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
...         return f'{clsname}({arguments})'
>>>
>>>
>>> DATA = [
...     User('Alice', 'Apricot', age='30'),
...     User('Bob', 'Blackthorn', age='31'),
...     User('Carol', 'Corn', age='32'),
...     User('Dave', 'Durian', age='33'),
...     User('Eve', 'Elderberry', age='34'),
...     User('Mallory', 'Melon', age='15'),
... ]
>>>
>>>
>>> class Types:
...     firstname: str
...     lastname: str
...     age: int
>>>
>>>
>>> def normalize(types, data):
...     schema = get_annotations(types)
...     result = []
...     for obj in data:
...         origin = type(obj)
...         args = {k: schema[k](v) for k,v in vars(obj).items()}
...         result.append(origin(**args))
...     return result
>>>
>>>
>>> normalize(Types, DATA)
[User(firstname='Alice', lastname='Apricot', age=30),
 User(firstname='Bob', lastname='Blackthorn', age=31),
 User(firstname='Carol', lastname='Corn', age=32),
 User(firstname='Dave', lastname='Durian', age=33),
 User(firstname='Eve', lastname='Elderberry', age=34),
 User(firstname='Mallory', lastname='Melon', age=15)]

12.4.7. Assignments

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 3
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
# ('Alice', 'Apricot', 30)

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 12), \
'Python has an is invalid version; expected: `3.12` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is tuple, \
'Variable `result` has an invalid type; expected: `tuple`.'

>>> print(result)
('Alice', 'Apricot', 30)
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = ('Alice', 'Apricot', '30')
SCHEMA = (str, str, int)

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
# [('firstname', 'lastname', 'age'),
#  ('Alice', 'Apricot', 30),
#  ('Bob', 'Blackthorn', 31),
#  ('Carol', 'Corn', 32),
#  ('Dave', 'Durian', 33),
#  ('Eve', 'Elderberry', 34),
#  ('Mallory', 'Melon', 15)]

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 12), \
'Python has an is invalid version; expected: `3.12` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is list, \
'Variable `result` has an invalid type; expected: `list`.'

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
[('firstname', 'lastname', 'age'),
 ('Alice', 'Apricot', 30),
 ('Bob', 'Blackthorn', 31),
 ('Carol', 'Corn', 32),
 ('Dave', 'Durian', 33),
 ('Eve', 'Elderberry', 34),
 ('Mallory', 'Melon', 15)]
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = [
    ('firstname', 'lastname', 'age'),
    ('Alice', 'Apricot', '30'),
    ('Bob', 'Blackthorn', '31'),
    ('Carol', 'Corn', '32'),
    ('Dave', 'Durian', '33'),
    ('Eve', 'Elderberry', '34'),
    ('Mallory', 'Melon', '15'),
]

SCHEMA = (str, str, int)

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: easy
# - Lines: 7
# - Minutes: 5

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
# {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 12), \
'Python has an is invalid version; expected: `3.12` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is dict, \
'Variable `result` has an invalid type; expected: `dict`.'

>>> print(result)
{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30}
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'}
SCHEMA = {'firstname': str, 'lastname': str, 'age': int}

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: medium
# - Lines: 7
# - Minutes: 8

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
# [{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
#  {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
#  {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
#  {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
#  {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
#  {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]

# %% Hints
# - `[x for x in data]`
# - `str.join()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 12), \
'Python has an is invalid version; expected: `3.12` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is list, \
'Variable `result` has an invalid type; expected: `list`.'

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
[{'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30},
 {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31},
 {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32},
 {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33},
 {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34},
 {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15}]
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
DATA = [
    {'firstname': 'Alice', 'lastname': 'Apricot', 'age': '30'},
    {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': '31'},
    {'firstname': 'Carol', 'lastname': 'Corn', 'age': '32'},
    {'firstname': 'Dave', 'lastname': 'Durian', 'age': '33'},
    {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': '34'},
    {'firstname': 'Mallory', 'lastname': 'Melon', 'age': '15'},
]

SCHEMA = {'firstname': str, 'lastname': str, 'age': int}

# %% Result
result = ...

# %% About
# - Name: Serialization Normalize Sequence
# - Difficulty: hard
# - Lines: 10
# - Minutes: 13

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Normalize ``DATA`` according to ``SCHEMA``
# 2. Run doctests - all must succeed

# %% Polish
# 1. Znormalizuj ``DATA`` zgodnie ze ``SCHEMA``
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
# [User(firstname='Alice', lastname='Apricot', age=30),
#  User(firstname='Bob', lastname='Blackthorn', age=31),
#  User(firstname='Carol', lastname='Corn', age=32),
#  User(firstname='Dave', lastname='Durian', age=33),
#  User(firstname='Eve', lastname='Elderberry', age=34),
#  User(firstname='Mallory', lastname='Melon', age=15)]

# %% Hints
# - `[x for x in data]`
# - `str.join()`
# - `get_annotations()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> assert sys.version_info >= (3, 12), \
'Python has an is invalid version; expected: `3.12` or newer.'

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is list, \
'Variable `result` has an invalid type; expected: `list`.'

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
[User(firstname='Alice', lastname='Apricot', age=30),
 User(firstname='Bob', lastname='Blackthorn', age=31),
 User(firstname='Carol', lastname='Corn', age=32),
 User(firstname='Dave', lastname='Durian', age=33),
 User(firstname='Eve', lastname='Elderberry', age=34),
 User(firstname='Mallory', lastname='Melon', age=15)]
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
from inspect import get_annotations

# %% Types
from typing import Callable
type args = tuple[str,str,str]
type ret = tuple[str,str,int]
normalize: Callable[[args], ret]

# %% Data
class User:
    def __init__(self, firstname, lastname, age=None):
        self.firstname = firstname
        self.lastname = lastname
        self.age = age

    def __repr__(self):
        clsname = self.__class__.__qualname__
        arguments = ', '.join(f'{k}={v!r}' for k,v in vars(self).items())
        return f'{clsname}({arguments})'


DATA = [
    User('Alice', 'Apricot', age='30'),
    User('Bob', 'Blackthorn', age='31'),
    User('Carol', 'Corn', age='32'),
    User('Dave', 'Durian', age='33'),
    User('Eve', 'Elderberry', age='34'),
    User('Mallory', 'Melon', age='15'),
]

class Schema:
    firstname: str
    lastname: str
    age: int

# %% Result
result = ...