15.5. CSV Non-Standard

15.5.1. Custom Header

Read data from CSV file using csv.DictReader(). While giving custom names note, that first line (typically a header) will be treated like normal data. Therefore we skip it using header = file.readline():

>>> import csv
>>> from pprint import pprint
>>>
>>>
>>> fieldnames = ['fname', 'lname', 'age']
>>>
>>> with open('/tmp/myfile.csv', mode='rt') as file:
...     reader = csv.DictReader(file, fieldnames)
...     old_header = next(reader)
...     result = list(reader)

Result:

>>> pprint(result, sort_dicts=False)
[{'fname': 'Alice', 'lname': 'Apricot', 'age': '30'},
 {'fname': 'Bob', 'lname': 'Blackthorn', 'age': '31'},
 {'fname': 'Carol', 'lname': 'Corn', 'age': '32'},
 {'fname': 'Dave', 'lname': 'Durian', 'age': '33'},
 {'fname': 'Eve', 'lname': 'Elderberry', 'age': '34'},
 {'fname': 'Mallory', 'lname': 'Melon', 'age': '15'}]

15.5.2. Ini

setup.cfg
delimiter='='

key=MP
name=MyProject
language=py
encoding=UTF-8
verbose=true

>>> delimiter = '='
>>> result = [row.split(delimiter) for row in DATA.splitlines()]

15.5.3. Config

/etc/postgresql/*/main/postgresql.conf
delimiter=' = '

listen_addresses = 'localhost'
port = 5432
max_connections = 100
ssl = on
password_encryption = on
db_user_namespace = off

>>> delimiter = ' = '
>>> result = [row.split(delimiter) for row in DATA.splitlines()]

15.5.4. Toml

pyproject.toml
delimiter='='

namespace_packages = false
explicit_package_bases = false
ignore_missing_imports = false
follow_imports = "normal"
follow_imports_for_stubs = false
no_site_packages = false
no_silence_site_packages = false
# Platform configuration
python_version = "3.13"
platform = "linux-64"

15.5.5. Passwd

/etc/passwd
delimiter=':'

root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
alice:x:1000:1000:Alice:/home/alice:/bin/bash
bob:x:1001:1001:Bob:/home/bob:/bin/bash
carol:x:1002:1002:Carol:/home/carol:/bin/bash
dave:x:1003:1003:Dave:/home/dave:/bin/bash
eve:x:1004:1004:Eve:/home/eve:/bin/bash
mallory:x:1005:1005:Mallory:/home/mallory:/bin/bash
nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin

>>> delimiter = ':'
>>> result = [row.split(delimiter) for row in DATA.splitlines()]

15.5.6. SSHd Config

/etc/ssh/sshd_config
delimiter=' '

ChrootDirectory none
ClientAliveCountMax 3
ClientAliveInterval 0
Compression delayed
MaxStartups 10:30:100
PidFile /var/run/sshd.pid
X11Forwarding no
X11UseLocalhost yes

>>> delimiter = ' '
>>> result = [row.split(delimiter) for row in DATA.splitlines()]

15.5.7. Hosts

delimiter='\s+'

##
# File: /etc/hosts
# - ip: internet protocol address (IPv4 or IPv6)
# - hosts: host names
##

127.0.0.1       localhost
127.0.0.1       mycomputer
172.16.0.1      example.com
192.168.0.1     example.edu example.org
10.0.0.1        example.net
255.255.255.255 broadcasthost
::1             localhost

15.5.8. Crontab

/etc/crontab
delimiter='\s+'

# [Minute] [Hour] [Day_of_the_Month] [Month_of_the_Year] [Day_of_the_Week] [command]
*/5 * * * *          /usr/bin/python3 /home/myapp/run-5min.py 1>/dev/null
* * * * *            /usr/bin/python3 /home/myapp/run-1min.py 1>/dev/null
00 * * * *           /home/myapp/run.py 1>/dev/null
* * * jan,may,aug *  /home/myapp/run.py
0 17 * * sun,fri     /home/myapp/run.py
0 */4 * * *          /home/myapp/run.py
0 4,17 * * sun,mon   /home/myapp/run.py

15.5.9. Key-Value

/etc/locate.rc
delimiter='='

TMPDIR="/tmp"
FCODES="/var/db/locate.database"
SEARCHPATHS="/"
PRUNEPATHS="/tmp /var/tmp"

# temp directory
TMPDIR="/tmp"

# the actual database
#FCODES="/var/db/locate.database"

# directories to be put in the database
SEARCHPATHS="/"

# directories unwanted in output
#PRUNEPATHS="/tmp /var/tmp"

15.5.10. Docker

.env from Docker
delimiter='='

DATABASE_ENGINE=postgresql
DATABASE_SERVER=localhost
DATABASE_PORT=5432
DATABASE_NAME=mydatabase
DATABASE_USERNAME=myusername
DATABASE_PASSWORD=mypassword

15.5.11. Sensors

delimiter=';'

Name,         Long,       Lat,        ModuleType
"ESA EAC",    50.8524881, 7.1315254,  Indoor

Date,         Time,       Temperature, Humidity, CO2, Noise, Pressure
"2000-01-01", "00:00:00", 22.6,        46,       981, 32,    1019.1
"2000-01-01", "00:05:00", 22.6,        46,       981, 31,    1019.1
"2000-01-01", "00:10:00", 22.6,        46,       968, 32,    1019.1

Name;Long;Lat;ModuleName;ModuleType
"European Astronaut Centre";50.8524881,7.1315254;;Indoor
Timestamp;"Timezone : Europe/Berlin";Temperature;Humidity;CO2;Noise;Pressure
1622498702;"2021/06/01 00:05:02";22.6;46;981;32;1019.1
1622499004;"2021/06/01 00:10:04";22.6;46;981;31;1019.1
1622499306;"2021/06/01 00:15:06";22.6;46;968;32;1019.1
1622499608;"2021/06/01 00:20:08";22.5;46;940;31;1019.1
1622499912;"2021/06/01 00:25:12";22.5;46;907;32;1019
1622500214;"2021/06/01 00:30:14";22.5;46;877;31;1019
1622500517;"2021/06/01 00:35:17";22.4;46;873;32;1019

>>> DATA= """Name;Long;Lat;ModuleName;ModuleType
... "European Astronaut Centre";50.8524881,7.1315254;;Indoor
... Timestamp;"Timezone : Europe/Berlin";Temperature;Humidity;CO2;Noise;Pressure
... 1622498702;"2021/06/01 00:05:02";22.6;46;981;32;1019.1
... 1622499004;"2021/06/01 00:10:04";22.6;46;981;31;1019.1
... 1622499306;"2021/06/01 00:15:06";22.6;46;968;32;1019.1
... 1622499608;"2021/06/01 00:20:08";22.5;46;940;31;1019.1
... 1622499912;"2021/06/01 00:25:12";22.5;46;907;32;1019
... 1622500214;"2021/06/01 00:30:14";22.5;46;877;31;1019
... 1622500517;"2021/06/01 00:35:17";22.4;46;873;32;1019"""
>>>
>>>
>>> metadata_header, metadata_values, data_header, *data_values = DATA.splitlines()
>>>
>>> metadata_header = metadata_header.split(';')
>>> metadata_values = metadata_values.split(';')
>>> data_header = data_header.split(';')
>>> data_values = [line.split(';') for line in data_values]
>>>
>>>
>>> metadata_header
['Name', 'Long', 'Lat', 'ModuleName', 'ModuleType']
>>>
>>> metadata_values
['"European Astronaut Centre"', '50.8524881,7.1315254', '', 'Indoor']
>>>
>>> data_header
['Timestamp', '"Timezone : Europe/Berlin"', 'Temperature', 'Humidity', 'CO2', 'Noise', 'Pressure']
>>>
>>> data_values
[['1622498702', '"2021/06/01 00:05:02"', '22.6', '46', '981', '32', '1019.1'],
 ['1622499004', '"2021/06/01 00:10:04"', '22.6', '46', '981', '31', '1019.1'],
 ['1622499306', '"2021/06/01 00:15:06"', '22.6', '46', '968', '32', '1019.1'],
 ['1622499608', '"2021/06/01 00:20:08"', '22.5', '46', '940', '31', '1019.1'],
 ['1622499912', '"2021/06/01 00:25:12"', '22.5', '46', '907', '32', '1019'],
 ['1622500214', '"2021/06/01 00:30:14"', '22.5', '46', '877', '31', '1019'],
 ['1622500517', '"2021/06/01 00:35:17"', '22.4', '46', '873', '32', '1019']]

15.5.12. Use Case - 1

sepal_length,sepal_width,petal_length,petal_width,species
8,2.7,5.1,1.9,virginica
1,3.5,1.4,0.2,setosa
7,2.8,4.1,1.3,versicolor

>>> import csv
>>> from pathlib import Path
>>> from pprint import pprint
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... """
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)
>>>
>>>
>>> def clean(row: dict) -> dict:
...     return {
...         'sepal_length': float(row['sepal_length']),
...         'sepal_width': float(row['sepal_width']),
...         'petal_length': float(row['petal_length']),
...         'petal_width': float(row['petal_width']),
...         'species': row['species']
...     }
>>>
>>>
>>> with open('/tmp/myfile.csv') as file:
...     reader = csv.DictReader(file)
...     result = map(clean, reader)
...     result = list(result)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': 5.8,
  'sepal_width': 2.7,
  'petal_length': 5.1,
  'petal_width': 1.9,
  'species': 'virginica'},
 {'sepal_length': 5.1,
  'sepal_width': 3.5,
  'petal_length': 1.4,
  'petal_width': 0.2,
  'species': 'setosa'},
 {'sepal_length': 5.7,
  'sepal_width': 2.8,
  'petal_length': 4.1,
  'petal_width': 1.3,
  'species': 'versicolor'}]