Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
rdmolony committed Jan 8, 2024
1 parent c7b9019 commit 68218cc
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 81 deletions.
2 changes: 1 addition & 1 deletion core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


urlpatterns = [
path('', lambda request: redirect('sensor/')),
path('', lambda request: redirect('sensor:root')),

path('admin/', admin.site.urls),
path('api/', include('core.api_urls')),
Expand Down
10 changes: 5 additions & 5 deletions sensor/migrations/0003_filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ class Migration(migrations.Migration):
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.TextField()),
('na_values', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=10), default=['NaN'], help_text='A list of strings to recognise as empty values. <br>\n\n Default: ["NaN"] <br>\n\n Note: "" is also included by default <br>\n\n Example: ["NAN", "-9999", "-9999.0"]\n', size=None)),
('delimiter', models.CharField(default=',', help_text='The character used to separate fields in the file. <br>\n\n Default: "," <br>\n\n Examples: "," or ";" or "\\s+" for whitespace or "\\t" for tabs\n', max_length=5)),
('datetime_fieldnames', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=50), default=['Tmstamp'], help_text='A list of datetime field names. <br>\n\n Examples: <br>\n\n 1) Data has a single datetime field named "Tmstamp" which has values like\n \'2021-06-29 00:00:00.000\': ["Tmstamp"] <br>\n\n 2) Data has two datetime fields named "Date" and "Time" which have values\n like \'01.01.1999\' and \'00:00\' respectively: ["Date","Time"] <br>\n', size=None)),
('encoding', models.CharField(default='utf-8', help_text='The encoding of the file. <br>\n\n Default: "utf-8" <br>\n\n Examples: utf-8 or latin-1 or cp1252\n', max_length=25)),
('datetime_formats', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=25), default=['%Y-%m-%d %H:%M:%S'], help_text='The datetime format of `datetime_columns`. <br>\n\n See https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes\n for format codes\n\n Default: "%Y-%m-%d %H:%M:%S" <br>\n\n Examples: "%Y-%m-%d %H:%M:%S" for "2021-03-01 00:00:00"\n', size=None)),
('na_values', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=10), default=['NaN'], help_text='A list of strings to recognise as empty values.\n\n Default: ["NaN"]\n\n Note: "" is also included by default\n\n Example: ["NAN", "-9999", "-9999.0"]\n', size=None)),
('delimiter', models.CharField(default=',', help_text='The character used to separate fields in the file.\n\n Default: ","\n\n Examples: "," or ";" or "\\s+" for whitespace or "\\t" for tabs\n', max_length=5)),
('datetime_fieldnames', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=50), default=['Tmstamp'], help_text='A list of datetime field names.\n\n Examples:\n\n 1) Data has a single datetime field named "Tmstamp" which has values like\n \'2021-06-29 00:00:00.000\': ["Tmstamp"]\n\n 2) Data has two datetime fields named "Date" and "Time" which have values\n like \'01.01.1999\' and \'00:00\' respectively: ["Date","Time"]\n', size=None)),
('encoding', models.CharField(default='utf-8', help_text='The encoding of the file.\n\n Default: "utf-8"\n\n Examples: utf-8 or latin-1 or cp1252\n', max_length=25)),
('datetime_formats', django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=25), default=['%Y-%m-%d %H:%M:%S'], help_text='The datetime format of `datetime_columns`.\n\n See https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes\n for format codes\n\n Default: "%Y-%m-%d %H:%M:%S"\n\n Examples: "%Y-%m-%d %H:%M:%S" for "2021-03-01 00:00:00"\n', size=None)),
],
),
]
52 changes: 0 additions & 52 deletions sensor/migrations/0005_source_alter_file_parsed_at_and_more.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 5.0 on 2024-01-05 18:16

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('sensor', '0004_file_type'),
]

operations = [
migrations.CreateModel(
name='Source',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.TextField()),
],
)
]
46 changes: 26 additions & 20 deletions sensor/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@


class Source(models.Model):

name = models.TextField()


class FileType(models.Model):

name = models.TextField()
na_values = ArrayField(
base_field=models.CharField(max_length=10),
Expand Down Expand Up @@ -90,10 +92,11 @@ class FileType(models.Model):


class File(models.Model):

file = models.FileField(upload_to="readings/", blank=False, null=False)
uploaded_at = models.DateTimeField(auto_now_add=True)
type = models.ForeignKey(FileType, on_delete=models.RESTRICT)
parsed_at = models.DateTimeField(blank=False, null=False)
parsed_at = models.DateTimeField(blank=True, null=True)
parse_error = models.TextField(blank=True, null=True)
hash = models.TextField(blank=True, null=True)

Expand All @@ -120,6 +123,7 @@ def import_to_db(self):


with self.file.open(mode="rb") as f:

reading_objs = (
Reading(
timestamp=r["timestamp"],
Expand All @@ -135,29 +139,31 @@ def import_to_db(self):
)
)

batch_size = 1_000
batch_size = 1_000

try:
with transaction.atomic():
while True:
batch = list(islice(reading_objs, batch_size))
if not batch:
break
Reading.objects.bulk_create(batch, batch_size)

except Exception as e:
self.parsed_at = None
self.parse_error = str(e)
self.save()
raise e

else:
self.parsed_at = datetime.now(timezone.utc)
self.parse_error = None
self.save()
try:
with transaction.atomic():
while True:
batch = list(islice(reading_objs, batch_size))
if not batch:
break
Reading.objects.bulk_create(batch, batch_size)

except Exception as e:
breakpoint()
self.parsed_at = None
self.parse_error = str(e)
self.save()
raise e

else:
self.parsed_at = datetime.now(timezone.utc)
self.parse_error = None
self.save()


class Reading(models.Model):

file = models.ForeignKey(File, on_delete=models.RESTRICT)
timestamp = models.DateTimeField(blank=False, null=False, primary_key=True)
sensor_name = models.TextField(blank=False, null=False)
Expand Down
3 changes: 1 addition & 2 deletions sensor/urls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from django.shortcuts import redirect
from django.urls import path

from . import views
Expand All @@ -8,7 +7,7 @@


urlpatterns = [
path('', views.index),
path('', views.index, name="root"),

path('create-file-type/', views.create_file_type, name="create-file-type"),
path('upload-file/', views.upload_file, name="upload-file"),
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pytest
from django.conf import settings


@pytest.fixture(scope='function', autouse=True)
def media_root(tmp_path):
original_media_root = settings.MEDIA_ROOT
settings.MEDIA_ROOT = tmp_path / 'media'
yield settings.MEDIA_ROOT
settings.MEDIA_ROOT = original_media_root
3 changes: 2 additions & 1 deletion tests/sensor/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ def test_import_to_db(
)
file = ContentFile(b"\n".join(l for l in lines), name="sensor-readings.txt")
file_obj = File(file=file, type=file_type_obj)
file_obj.save()

file_obj.import_to_db(file_obj)
file_obj.import_to_db()

output = Reading.objects.all()
assert output == snapshot

0 comments on commit 68218cc

Please sign in to comment.