4.5. To SQL

  • File paths works also with DATAs

  • SQL functions uses SQLAlchemy, which supports many RDBMS

4.5.1. SetUp

>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>>
>>>
>>> data = pd.DataFrame([
...     {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30, 'lastlogin': pd.Timestamp('2000-01-01'), 'is_active': True},
...     {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31, 'lastlogin': pd.Timestamp('2000-01-02'), 'is_active': True},
...     {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32, 'lastlogin': pd.Timestamp('2000-01-03'), 'is_active': False},
...     {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33, 'lastlogin': pd.Timestamp('2000-01-04'), 'is_active': False},
...     {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34, 'lastlogin': pd.Timestamp('2000-01-05'), 'is_active': True},
...     {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15, 'lastlogin': pd.NaT, 'is_active': None},
... ])
>>>
>>> data
  firstname    lastname  age  lastlogin is_active
0     Alice     Apricot   30 2000-01-01      True
1       Bob  Blackthorn   31 2000-01-02      True
2     Carol        Corn   32 2000-01-03     False
3      Dave      Durian   33 2000-01-04     False
4       Eve  Elderberry   34 2000-01-05      True
5   Mallory       Melon   15        NaT      None

4.5.2. SQLite3

  • data.to_sql('users', db, if_exists='replace')

  • if_exists parameter can be used to control the behavior when the table already exists. Options are: 'fail', 'replace', 'append'.

>>> import sqlite3
>>>
>>> with sqlite3.connect('/tmp/myfile.db') as db:
...     data.to_sql('users', db, if_exists='replace')
6
$ sqlite3 /tmp/myfile.db 'SELECT * FROM users'
0|Alice|Apricot|30|2000-01-01 00:00:00|1
1|Bob|Blackthorn|31|2000-01-02 00:00:00|1
2|Carol|Corn|32|2000-01-03 00:00:00|0
3|Dave|Durian|33|2000-01-04 00:00:00|0
4|Eve|Elderberry|34|2000-01-05 00:00:00|1
5|Mallory|Melon|15||

4.5.3. SQLAlchemy

>>> from sqlalchemy import create_engine
>>>
>>> engine = create_engine('sqlite:////tmp/myfile.db')
>>>
>>> with engine.connect() as db:
...     data.to_sql('users', db, if_exists='replace')
6
$ sqlite3 /tmp/myfile.db 'SELECT * FROM users'
0|Alice|Apricot|30|2000-01-01 00:00:00.000000|1
1|Bob|Blackthorn|31|2000-01-02 00:00:00.000000|1
2|Carol|Corn|32|2000-01-03 00:00:00.000000|0
3|Dave|Durian|33|2000-01-04 00:00:00.000000|0
4|Eve|Elderberry|34|2000-01-05 00:00:00.000000|1
5|Mallory|Melon|15||

4.5.4. Assignments

# %% About
# - Name: Pandas To SQL
# - Difficulty: easy
# - Lines: 2
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Export data from `data` to file `FILE`
# 2. Data has to be in SQL format (table `users`)
# 3. Use sqlite3 module
# 4. Run doctests - all must succeed

# %% Polish
# 1. Wyeksportuj dane z `data` do pliku `FILE`
# 2. Dane mają być w formacie SQL (tabela `users`)
# 3. Użyj modułu sqlite3
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
#    firstname    lastname  age                email   lastlogin  is_active              groups
# id
# 1      Alice     Apricot   30    alice@example.com  2000-01-01          1         users;staff
# 2        Bob  Blackthorn   31      bob@example.com  2000-01-02          1         users;staff
# 3      Carol        Corn   32    carol@example.com  2000-01-03          1               users
# 4       Dave      Durian   33     dave@example.org  2000-01-04          1               users
# 5        Eve  Elderberry   34      eve@example.org  2000-01-05          1  users;staff;admins
# 6    Mallory       Melon   15  mallory@example.net         NaN          0                 NaN

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0

>>> with sqlite3.connect(FILE) as db:
...     result = pd.read_sql_query('SELECT * FROM users', db, index_col='id')

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> result  # doctest: +NORMALIZE_WHITESPACE
   firstname    lastname  age                email   lastlogin  is_active              groups
id
1      Alice     Apricot   30    alice@example.com  2000-01-01          1         users;staff
2        Bob  Blackthorn   31      bob@example.com  2000-01-02          1         users;staff
3      Carol        Corn   32    carol@example.com  2000-01-03          1               users
4       Dave      Durian   33     dave@example.org  2000-01-04          1               users
5        Eve  Elderberry   34      eve@example.org  2000-01-05          1  users;staff;admins
6    Mallory       Melon   15  mallory@example.net         NaN          0                 NaN

>>> from os import remove
>>> remove(FILE)
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
from urllib.request import urlopen
import sqlite3
import pandas as pd

# %% Types

# %% Data
DATA = 'https://python3.info/_static/example.db'
FILE = r'_temporary.db'

with urlopen(DATA) as response, open(FILE, mode='wb') as database:
    database.write(response.read())

with sqlite3.connect(FILE) as db:
    data = pd.read_sql_query('SELECT * FROM users', db, index_col='id')

# %% Result