use python to speed up things

2026-03-03 06:27:23 +01:00 · 2017-06-07 15:06:11 +02:00
parent 8b83833074
commit daa737fc18
2 changed files with 38 additions and 19 deletions
--- a/README.md
+++ b/README.md
@@ -10,6 +10,6 @@ your InfluxDb.
 * delete all lines that you already have in your DB (keep the header)
 * export settings as environment vars in case they differ from the defaults
  (INFLUXDB_HOST, INFLUXDB_PORT, INFLUXDB_DATABASE, SENSOR_ID)
-* run `cat <csv.file> | ./toLineProtocol.sh | ./toInfluxDb.sh`
+* run `cat <csv.file> | ./toLineProtocol.py | ./toInfluxDb.sh`
 I should advise you to backup the DB first :stuck_out_tongue:
--- a/toLineProtocol.py
+++ b/toLineProtocol.py
@@ -1,10 +1,17 @@
-#!/usr/bin/env bash
+#!/usr/bin/env python3
 #
 # Converts CSV exports from https://www.madavi.de/sensor/csvfiles.php to
 # InfluxDB LineProtocol https://docs.influxdata.com/influxdb/v1.2/write_protocols/line_protocol_reference/
 #
 # Note: timestamps are in seconds, therefore precision "s" needs to be set
 # when writing, see https://docs.influxdata.com/influxdb/v1.2/tools/api/#write
 #
 # Settings:
 #
 # Please set
 #   * SENSOR_ID
 #   * INFLUXDB_DATABASE
 # in your environment
 # CSV file specs
 #
@@ -32,26 +39,38 @@
 # |             |          | 20 Signal             | -91
 # | node        | tag      | -- --                 | e.g. esp8266-16229960
 #
 # TODO:
 #   - using "date" to parse the UTC date for each line is super slow, but
 #     works. There must be something better out there.
-set -e
+import os
 import sys
 import csv
 from datetime import datetime, timedelta
-SRC_FILE=${1:-/dev/stdin}
+def getTimestamp(timestr):
    naiveDt = datetime.strptime(timestr, '%Y/%m/%d %H:%M:%S');
    utcTimestamp = (naiveDt - datetime(1970, 1, 1)) / timedelta(seconds=1)
    return int(utcTimestamp)
-DATABASE=${INFLUXDB_DATABASE:-feinstaub}
+sensor_id = os.environ.get('SENSOR_ID', '16229960')
-SENSOR_ID=${SENSOR_ID:-16229960}
+database = os.environ.get('INFLUXDB_DATABASE', 'feinstaub')
-NODE=esp8266-$SENSOR_ID
+node = 'esp8266-' + sensor_id
 outline = '{database},node={node} SDS_P1={sds_p1},SDS_P2={sds_p2},humidity={humidity},min_micro={min_micro},max_micro={max_micro},samples={samples},temperature={temperature} {timestamp}'
-cat $SRC_FILE                                                       \
+reader = csv.reader(sys.stdin, delimiter=';')
-    | gawk -v db="$DATABASE" -v node="$NODE"                        \
+for row in reader:
-        'BEGIN { FS = ";" } ;                                       \
+    if (row[0] == 'Time'):
-        {   if ($1 != "Time") {                                     \
+        continue
                convertDate = "date -u --date=\""$1"\" +%s";        \
                convertDate| getline timestamp;                     \
                close(convertDate);                                 \
                print db",node="node" SDS_P1="$8",SDS_P2="$9",humidity="$11",min_micro="$18",max_micro="$19",samples="$17",temperature="$10" "timestamp } \
            }'
    values = {}
    values['database'] = database
    values['node'] = node
    values['timestamp'] = getTimestamp(row[0])
    values['sds_p1'] = row[7]
    values['sds_p2'] = row[8]
    values['temperature'] = row[9]
    values['humidity'] = row[10]
    values['samples'] = row[16]
    values['min_micro'] = row[17]
    values['max_micro'] = row[18]
    print(outline.format(**values))