From a293a7a87ff3610df39a0b507e063b64d0cbb4a6 Mon Sep 17 00:00:00 2001 From: James Murty Date: Mon, 29 May 2017 15:35:32 +1000 Subject: [PATCH 1/2] #14 #15 Serialize EDTFField values into DB Improve performance of EDTFField when populating model instances from DB values by storing EDTF data in pickled format, not as string values that need to be re-parsed every time they are loaded. This implementation is naive and could be improved with a more sophisticated serialization approach for EDTF fields, though it works for now. This change also handles the issue in #15 where already- parsed EDTF field values are re-parsed when an instance is saved, unless there is a `natural_text_field` value present to override and reset the EDTF field value. --- edtf/fields.py | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index 4d3535f..97c4bfd 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -1,3 +1,8 @@ +try: + import cPickle as pickle +except: + import pickle + from django.db import models from edtf import parse_edtf, EDTFObject @@ -22,7 +27,7 @@ def __init__( upper_fuzzy_field=None, **kwargs ): - kwargs['max_length'] = 255 + kwargs['max_length'] = 1000 self.natural_text_field, self.lower_strict_field, \ self.upper_strict_field, self.lower_fuzzy_field, \ self.upper_fuzzy_field = natural_text_field, lower_strict_field, \ @@ -48,6 +53,12 @@ def deconstruct(self): def from_db_value(self, value, expression, connection, context): # Converting values to Python objects + if not value: + return None + try: + return pickle.loads(str(value)) + except: + pass return parse_edtf(value, fail_silently=True) def to_python(self, value): @@ -59,11 +70,16 @@ def to_python(self, value): return parse_edtf(value, fail_silently=True) + def get_db_prep_save(self, value, connection): + if value: + return pickle.dumps(value) + return super(EDTFField, self).get_db_prep_save(value, connection) + def get_prep_value(self, value): # convert python objects to query values value = super(EDTFField, self).get_prep_value(value) if isinstance(value, EDTFObject): - return unicode(value) + return pickle.dumps(value) return value def pre_save(self, instance, add): @@ -74,19 +90,29 @@ def pre_save(self, instance, add): if not self.natural_text_field or self.attname not in instance.__dict__: return + edtf = getattr(instance, self.attname) + + # Update EDTF field based on latest natural text value, if any natural_text = getattr(instance, self.natural_text_field) if natural_text: - n = text_to_edtf(natural_text) - setattr(instance, self.attname, n) + edtf = text_to_edtf(natural_text) - e = parse_edtf(getattr(instance, self.attname), fail_silently=True) - if e: + # TODO If `natural_text_field` becomes cleared the derived EDTF field + # value should also be cleared, rather than left at original value? + # TODO Handle case where EDTF field is set to a string directly, not + # via `natural_text_field` (this is a slightly unexpected use-case, but + # is a very efficient way to set EDTF values in situations like for API + # imports so we probably want to continue to support it?) + if edtf and not isinstance(edtf, EDTFObject): + edtf = parse_edtf(edtf, fail_silently=True) + + setattr(instance, self.attname, edtf) + if edtf: # set related date fields on the instance for attr in DATE_ATTRS: field_attr = "%s_field" % attr g = getattr(self, field_attr, None) if g: - setattr(instance, g, getattr(e, attr)()) - - return unicode(e) + setattr(instance, g, getattr(edtf, attr)()) + return edtf From 846de98e94e162ef9dc62536b725f0bd462bc815 Mon Sep 17 00:00:00 2001 From: James Murty Date: Mon, 5 Jun 2017 16:50:05 +1000 Subject: [PATCH 2/2] #14 Increase DB column size for "serializing" EDTF fields Bump the character field size up to 2000 to avoid hitting character limits with this temporary fix, until we have a better serialization mechanism. --- edtf/fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/fields.py b/edtf/fields.py index 97c4bfd..da02f01 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -27,7 +27,7 @@ def __init__( upper_fuzzy_field=None, **kwargs ): - kwargs['max_length'] = 1000 + kwargs['max_length'] = 2000 self.natural_text_field, self.lower_strict_field, \ self.upper_strict_field, self.lower_fuzzy_field, \ self.upper_fuzzy_field = natural_text_field, lower_strict_field, \