15.5. CSV Non-Standard
15.5.1. Custom Header
Read data from CSV file using csv.DictReader(). While giving custom names
note, that first line (typically a header) will be treated like normal data.
Therefore we skip it using header = file.readline():
>>> import csv
>>> from pprint import pprint
>>>
>>>
>>> fieldnames = ['fname', 'lname', 'age']
>>>
>>> with open('/tmp/myfile.csv', mode='rt') as file:
... reader = csv.DictReader(file, fieldnames)
... old_header = next(reader)
... result = list(reader)
Result:
>>> pprint(result, sort_dicts=False)
[{'fname': 'Alice', 'lname': 'Apricot', 'age': '30'},
{'fname': 'Bob', 'lname': 'Blackthorn', 'age': '31'},
{'fname': 'Carol', 'lname': 'Corn', 'age': '32'},
{'fname': 'Dave', 'lname': 'Durian', 'age': '33'},
{'fname': 'Eve', 'lname': 'Elderberry', 'age': '34'},
{'fname': 'Mallory', 'lname': 'Melon', 'age': '15'}]
15.5.2. Ini
setup.cfg
delimiter='='
key=MP
name=MyProject
language=py
encoding=UTF-8
verbose=true
>>> delimiter = '='
>>> result = [row.split(delimiter) for row in DATA.splitlines()]
15.5.3. Config
/etc/postgresql/*/main/postgresql.confdelimiter=' = '
listen_addresses = 'localhost'
port = 5432
max_connections = 100
ssl = on
password_encryption = on
db_user_namespace = off
>>> delimiter = ' = '
>>> result = [row.split(delimiter) for row in DATA.splitlines()]
15.5.4. Toml
pyproject.toml
delimiter='='
namespace_packages = false
explicit_package_bases = false
ignore_missing_imports = false
follow_imports = "normal"
follow_imports_for_stubs = false
no_site_packages = false
no_silence_site_packages = false
# Platform configuration
python_version = "3.13"
platform = "linux-64"
15.5.5. Passwd
/etc/passwddelimiter=':'
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
alice:x:1000:1000:Alice:/home/alice:/bin/bash
bob:x:1001:1001:Bob:/home/bob:/bin/bash
carol:x:1002:1002:Carol:/home/carol:/bin/bash
dave:x:1003:1003:Dave:/home/dave:/bin/bash
eve:x:1004:1004:Eve:/home/eve:/bin/bash
mallory:x:1005:1005:Mallory:/home/mallory:/bin/bash
nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
>>> delimiter = ':'
>>> result = [row.split(delimiter) for row in DATA.splitlines()]
15.5.6. SSHd Config
/etc/ssh/sshd_configdelimiter=' '
ChrootDirectory none
ClientAliveCountMax 3
ClientAliveInterval 0
Compression delayed
MaxStartups 10:30:100
PidFile /var/run/sshd.pid
X11Forwarding no
X11UseLocalhost yes
>>> delimiter = ' '
>>> result = [row.split(delimiter) for row in DATA.splitlines()]
15.5.7. Hosts
delimiter='\s+'
##
# File: /etc/hosts
# - ip: internet protocol address (IPv4 or IPv6)
# - hosts: host names
##
127.0.0.1 localhost
127.0.0.1 mycomputer
172.16.0.1 example.com
192.168.0.1 example.edu example.org
10.0.0.1 example.net
255.255.255.255 broadcasthost
::1 localhost
15.5.8. Crontab
/etc/crontabdelimiter='\s+'
# [Minute] [Hour] [Day_of_the_Month] [Month_of_the_Year] [Day_of_the_Week] [command]
*/5 * * * * /usr/bin/python3 /home/myapp/run-5min.py 1>/dev/null
* * * * * /usr/bin/python3 /home/myapp/run-1min.py 1>/dev/null
00 * * * * /home/myapp/run.py 1>/dev/null
* * * jan,may,aug * /home/myapp/run.py
0 17 * * sun,fri /home/myapp/run.py
0 */4 * * * /home/myapp/run.py
0 4,17 * * sun,mon /home/myapp/run.py
15.5.9. Key-Value
/etc/locate.rcdelimiter='='
TMPDIR="/tmp"
FCODES="/var/db/locate.database"
SEARCHPATHS="/"
PRUNEPATHS="/tmp /var/tmp"
# temp directory
TMPDIR="/tmp"
# the actual database
#FCODES="/var/db/locate.database"
# directories to be put in the database
SEARCHPATHS="/"
# directories unwanted in output
#PRUNEPATHS="/tmp /var/tmp"
15.5.10. Docker
.envfrom Dockerdelimiter='='
DATABASE_ENGINE=postgresql
DATABASE_SERVER=localhost
DATABASE_PORT=5432
DATABASE_NAME=mydatabase
DATABASE_USERNAME=myusername
DATABASE_PASSWORD=mypassword
15.5.11. Sensors
delimiter=';'
Name, Long, Lat, ModuleType
"ESA EAC", 50.8524881, 7.1315254, Indoor
Date, Time, Temperature, Humidity, CO2, Noise, Pressure
"2000-01-01", "00:00:00", 22.6, 46, 981, 32, 1019.1
"2000-01-01", "00:05:00", 22.6, 46, 981, 31, 1019.1
"2000-01-01", "00:10:00", 22.6, 46, 968, 32, 1019.1
Name;Long;Lat;ModuleName;ModuleType
"European Astronaut Centre";50.8524881,7.1315254;;Indoor
Timestamp;"Timezone : Europe/Berlin";Temperature;Humidity;CO2;Noise;Pressure
1622498702;"2021/06/01 00:05:02";22.6;46;981;32;1019.1
1622499004;"2021/06/01 00:10:04";22.6;46;981;31;1019.1
1622499306;"2021/06/01 00:15:06";22.6;46;968;32;1019.1
1622499608;"2021/06/01 00:20:08";22.5;46;940;31;1019.1
1622499912;"2021/06/01 00:25:12";22.5;46;907;32;1019
1622500214;"2021/06/01 00:30:14";22.5;46;877;31;1019
1622500517;"2021/06/01 00:35:17";22.4;46;873;32;1019
>>> DATA= """Name;Long;Lat;ModuleName;ModuleType
... "European Astronaut Centre";50.8524881,7.1315254;;Indoor
... Timestamp;"Timezone : Europe/Berlin";Temperature;Humidity;CO2;Noise;Pressure
... 1622498702;"2021/06/01 00:05:02";22.6;46;981;32;1019.1
... 1622499004;"2021/06/01 00:10:04";22.6;46;981;31;1019.1
... 1622499306;"2021/06/01 00:15:06";22.6;46;968;32;1019.1
... 1622499608;"2021/06/01 00:20:08";22.5;46;940;31;1019.1
... 1622499912;"2021/06/01 00:25:12";22.5;46;907;32;1019
... 1622500214;"2021/06/01 00:30:14";22.5;46;877;31;1019
... 1622500517;"2021/06/01 00:35:17";22.4;46;873;32;1019"""
>>>
>>>
>>> metadata_header, metadata_values, data_header, *data_values = DATA.splitlines()
>>>
>>> metadata_header = metadata_header.split(';')
>>> metadata_values = metadata_values.split(';')
>>> data_header = data_header.split(';')
>>> data_values = [line.split(';') for line in data_values]
>>>
>>>
>>> metadata_header
['Name', 'Long', 'Lat', 'ModuleName', 'ModuleType']
>>>
>>> metadata_values
['"European Astronaut Centre"', '50.8524881,7.1315254', '', 'Indoor']
>>>
>>> data_header
['Timestamp', '"Timezone : Europe/Berlin"', 'Temperature', 'Humidity', 'CO2', 'Noise', 'Pressure']
>>>
>>> data_values
[['1622498702', '"2021/06/01 00:05:02"', '22.6', '46', '981', '32', '1019.1'],
['1622499004', '"2021/06/01 00:10:04"', '22.6', '46', '981', '31', '1019.1'],
['1622499306', '"2021/06/01 00:15:06"', '22.6', '46', '968', '32', '1019.1'],
['1622499608', '"2021/06/01 00:20:08"', '22.5', '46', '940', '31', '1019.1'],
['1622499912', '"2021/06/01 00:25:12"', '22.5', '46', '907', '32', '1019'],
['1622500214', '"2021/06/01 00:30:14"', '22.5', '46', '877', '31', '1019'],
['1622500517', '"2021/06/01 00:35:17"', '22.4', '46', '873', '32', '1019']]
15.5.12. Use Case - 1
sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor
>>> import csv
>>> from pathlib import Path
>>> from pprint import pprint
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... """
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)
>>>
>>>
>>> def clean(row: dict) -> dict:
... return {
... 'sepal_length': float(row['sepal_length']),
... 'sepal_width': float(row['sepal_width']),
... 'petal_length': float(row['petal_length']),
... 'petal_width': float(row['petal_width']),
... 'species': row['species']
... }
>>>
>>>
>>> with open('/tmp/myfile.csv') as file:
... reader = csv.DictReader(file)
... result = map(clean, reader)
... result = list(result)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': 5.8,
'sepal_width': 2.7,
'petal_length': 5.1,
'petal_width': 1.9,
'species': 'virginica'},
{'sepal_length': 5.1,
'sepal_width': 3.5,
'petal_length': 1.4,
'petal_width': 0.2,
'species': 'setosa'},
{'sepal_length': 5.7,
'sepal_width': 2.8,
'petal_length': 4.1,
'petal_width': 1.3,
'species': 'versicolor'}]