4.5. To SQL
File paths works also with DATAs
SQL functions uses SQLAlchemy, which supports many RDBMS
4.5.1. SetUp
>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>>
>>>
>>> data = pd.DataFrame([
... {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30, 'lastlogin': pd.Timestamp('2000-01-01'), 'is_active': True},
... {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31, 'lastlogin': pd.Timestamp('2000-01-02'), 'is_active': True},
... {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32, 'lastlogin': pd.Timestamp('2000-01-03'), 'is_active': False},
... {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33, 'lastlogin': pd.Timestamp('2000-01-04'), 'is_active': False},
... {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34, 'lastlogin': pd.Timestamp('2000-01-05'), 'is_active': True},
... {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15, 'lastlogin': pd.NaT, 'is_active': None},
... ])
>>>
>>> data
firstname lastname age lastlogin is_active
0 Alice Apricot 30 2000-01-01 True
1 Bob Blackthorn 31 2000-01-02 True
2 Carol Corn 32 2000-01-03 False
3 Dave Durian 33 2000-01-04 False
4 Eve Elderberry 34 2000-01-05 True
5 Mallory Melon 15 NaT None
4.5.2. SQLite3
data.to_sql('users', db, if_exists='replace')if_existsparameter can be used to control the behavior when the table already exists. Options are: 'fail', 'replace', 'append'.
>>> import sqlite3
>>>
>>> with sqlite3.connect('/tmp/myfile.db') as db:
... data.to_sql('users', db, if_exists='replace')
6
$ sqlite3 /tmp/myfile.db 'SELECT * FROM users'
0|Alice|Apricot|30|2000-01-01 00:00:00|1
1|Bob|Blackthorn|31|2000-01-02 00:00:00|1
2|Carol|Corn|32|2000-01-03 00:00:00|0
3|Dave|Durian|33|2000-01-04 00:00:00|0
4|Eve|Elderberry|34|2000-01-05 00:00:00|1
5|Mallory|Melon|15||
4.5.3. SQLAlchemy
>>> from sqlalchemy import create_engine
>>>
>>> engine = create_engine('sqlite:////tmp/myfile.db')
>>>
>>> with engine.connect() as db:
... data.to_sql('users', db, if_exists='replace')
6
$ sqlite3 /tmp/myfile.db 'SELECT * FROM users'
0|Alice|Apricot|30|2000-01-01 00:00:00.000000|1
1|Bob|Blackthorn|31|2000-01-02 00:00:00.000000|1
2|Carol|Corn|32|2000-01-03 00:00:00.000000|0
3|Dave|Durian|33|2000-01-04 00:00:00.000000|0
4|Eve|Elderberry|34|2000-01-05 00:00:00.000000|1
5|Mallory|Melon|15||
4.5.4. Assignments
# %% About
# - Name: Pandas To SQL
# - Difficulty: easy
# - Lines: 2
# - Minutes: 3
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Export data from `data` to file `FILE`
# 2. Data has to be in SQL format (table `users`)
# 3. Use sqlite3 module
# 4. Run doctests - all must succeed
# %% Polish
# 1. Wyeksportuj dane z `data` do pliku `FILE`
# 2. Dane mają być w formacie SQL (tabela `users`)
# 3. Użyj modułu sqlite3
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname age email lastlogin is_active groups
# id
# 1 Alice Apricot 30 alice@example.com 2000-01-01 1 users;staff
# 2 Bob Blackthorn 31 bob@example.com 2000-01-02 1 users;staff
# 3 Carol Corn 32 carol@example.com 2000-01-03 1 users
# 4 Dave Durian 33 dave@example.org 2000-01-04 1 users
# 5 Eve Elderberry 34 eve@example.org 2000-01-05 1 users;staff;admins
# 6 Mallory Melon 15 mallory@example.net NaN 0 NaN
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> with sqlite3.connect(FILE) as db:
... result = pd.read_sql_query('SELECT * FROM users', db, index_col='id')
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
firstname lastname age email lastlogin is_active groups
id
1 Alice Apricot 30 alice@example.com 2000-01-01 1 users;staff
2 Bob Blackthorn 31 bob@example.com 2000-01-02 1 users;staff
3 Carol Corn 32 carol@example.com 2000-01-03 1 users
4 Dave Durian 33 dave@example.org 2000-01-04 1 users
5 Eve Elderberry 34 eve@example.org 2000-01-05 1 users;staff;admins
6 Mallory Melon 15 mallory@example.net NaN 0 NaN
>>> from os import remove
>>> remove(FILE)
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
from urllib.request import urlopen
import sqlite3
import pandas as pd
# %% Types
# %% Data
DATA = 'https://python3.info/_static/example.db'
FILE = r'_temporary.db'
with urlopen(DATA) as response, open(FILE, mode='wb') as database:
database.write(response.read())
with sqlite3.connect(FILE) as db:
data = pd.read_sql_query('SELECT * FROM users', db, index_col='id')
# %% Result