Skip to content

Commit

Permalink
Merge pull request #6 from nueces/master
Browse files Browse the repository at this point in the history
working branch.
  • Loading branch information
hvelarde committed Apr 25, 2012
2 parents d23f569 + 40dc44d commit c415cf4
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/transmogrify/nitf/migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def render(self):

def transmogrify(self, context):
self.transmogrifier = Transmogrifier(context)
self.transmogrifier("nitfmigrator")
self.transmogrifier("nitfxmlimport")


class NewsItemSource(object):
Expand Down
40 changes: 35 additions & 5 deletions src/transmogrify/nitf/xmlimport.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@ pipeline =
sourcedirectory
xmlprocessor
path-update
logger
folderarchive
foldertitle
excludefromnav
atimage
constructor
schemaupdater
schemaupdater-dexterity
schemaupdater-atct
state-inserter
workflowupdater
reindexobject
logger

[sourcedirectory]
blueprint = transmogrify.nitf.import.sourcedirectory
Expand All @@ -17,22 +22,44 @@ suffix = xml

[xmlprocessor]
blueprint = transmogrify.nitf.xmlsource.xmlprocessor
datestorage = True
directory = transmogrify.nitf:data/images

[path-update]
blueprint = collective.transmogrifier.sections.inserter
key = string:_path
value = python:"/articulos/{0}/{1}".format(item['effective'].strftime("%Y/%M/%d"), item['_path'])
condition = python: item['_type'] in ['collective.nitf.content', 'Image']

[folderarchive]
blueprint = collective.transmogrifier.sections.folders

[foldertitle]
blueprint = collective.transmogrifier.sections.inserter
key = string:title
value = python:item['_path'].split('/')[-1]
condition = python: item['_type']=='Folder'

[excludefromnav]
blueprint = collective.transmogrifier.sections.inserter
key = string:excludeFromNav
value = python:True
condition = python: item['_type']=='Folder'

[atimage]
blueprint = plone.app.transmogrifier.mimeencapsulator
field = string:image
mimetype = python: item['_mimetype']
condition = python: item['_type']=='Image'

[constructor]
blueprint = collective.transmogrifier.sections.constructor

[schemaupdater]
[schemaupdater-dexterity]
blueprint = transmogrify.dexterity.schemaupdater

[schemaupdater-atct]
blueprint = plone.app.transmogrifier.atschemaupdater

[state-inserter]
blueprint = collective.transmogrifier.sections.inserter
key = string:_transitions
Expand All @@ -41,8 +68,11 @@ value = string:publish
[workflowupdater]
blueprint = plone.app.transmogrifier.workflowupdater

[reindexobject]
blueprint = plone.app.transmogrifier.reindexobject

[logger]
blueprint = collective.transmogrifier.sections.logger
name = logger
name = process item:
level = INFO
key = _path
91 changes: 74 additions & 17 deletions src/transmogrify/nitf/xmlsource.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
from isodate import parse_datetime
import logging
import urlparse
import xml.etree.ElementTree as etree
from isodate import parse_datetime
from urllib2 import urlopen, URLError

from zope.interface import classProvides, implements
from collective.transmogrifier.interfaces import ISection
from collective.transmogrifier.interfaces import ISectionBlueprint

#from collective.nitf.content import genre_default_value
#from collective.nitf.content import section_default_value
#from collective.nitf.content import urgency_default_value
from collective.transmogrifier.utils import resolvePackageReferenceOrFile


def get_text(dom, subelemet, attribute=None):
Expand All @@ -33,6 +33,12 @@ class XMLSource(object):

def __init__(self, transmogrifier, name, options, previous):
self.previous = previous
if 'directory' in options:
self.directory = resolvePackageReferenceOrFile(options['directory'])
else:
self.directory = None

self.logger = logging.getLogger(name)

def __iter__(self):
for data in self.previous:
Expand All @@ -46,18 +52,14 @@ def __iter__(self):
'effective': None, 'expires': None,
# plone.app.dexterity.behaviours.metadata.IOwnership
'creators': [], 'contributors': [], 'rights': None,
# TODO: How the standar manage refenreces and related items.
# TODO: How the standar manage refenreces and related items?.
# plone.app.referenceablebehavior.referenceable.IReferenceable
#'_plone.uuid': '',
# plone.app.relationfield.behavior.IRelatedItems
# 'relatedItems': (),
# collective.nitf.content.INITF
'subtitle': '', 'byline': '', 'text': '', 'genre': '',
'section': '', 'urgency': '', 'location': '',
# objects that should be created inside of the current
# NITF object.
'_media': {'images': [],
'videos': []}
}

dom = etree.fromstring(data)
Expand All @@ -77,6 +79,7 @@ def __iter__(self):
if sdate:
item['expires'] = parse_datetime(sdate)

# This field is not implemented in the collective.nitf
#sdate = get_text(head, 'docdata/date.issue', 'norm')
#if sdate:
# item['issue'] = parse_datetime(sdate)
Expand All @@ -91,19 +94,73 @@ def __iter__(self):
item['subtitle'] = get_text(body, 'body.head/hedline/hl2')
item['byline'] = get_text(body, 'body.head/byline/person')

# The list of media items to yield, like atimages objects and video
# references.
media_items = []
for elem in list(body.find('body.content')):
if elem.tag == 'media' and elem.get('media-type') == 'image':
image = elem.find('media-reference').attrib
image['media-caption'] = get_text(elem, 'media-caption')
item['_media']['images'].append(image)
# media-type image list of attributes:
# - mime-type, source, alternate-text, height, width.
image = {'_path': None,
'_type': 'Image',
'title': None,
'description': None,
'image': None,
'_data': None,
'_mimetype': None,
}

media = elem.find('media-reference')
src = media.get('source', None)
path = media.get('alternate-text', None)
image['title'] = media.get('alternate-text', None)
image['_mimetype'] = media.get('mime-type')
image['description'] = get_text(elem, 'media-caption')

if None in (src, path, image['_mimetype']):
self.logger.debug(
"item path: {0}, incomplete data image src: {1}"
.format(item['_path']), path)
continue

if self.directory is not None:
# Change the url schema to retrive the file from the
# filesystem and insert the source directory path.
url = urlparse.urlparse(src)
sdir = urlparse.urlparse(self.directory)
src = urlparse.urlunsplit(('file',
"{0}/{1}".format(sdir.path, url.netloc),
url.path, url.query, url.fragment))

try:
fd = urlopen(src)
except URLError:
self.logger.debug(
"item path: {0}, can't retrieve image from url: {1}"
.format(item['_path']), src)
continue

image['_data'] = fd.read()
fd.close()
image['_path'] = "{0}/{1}".format(item['_path'], path)
# HACK: This is to support folder archive based on the
# effective date (original publication date).
image['effective'] = item['effective']

media_items.append(image)

elif elem.tag == 'media' and elem.get('media-type') == 'video':
video = elem.find('media-reference').attrib
video['media-caption'] = get_text(elem, 'media-caption')
item['_media']['videos'].append(video)

# TODO: manage video refenrence.
# media-type video list of attributes:
# - media-type, source, alternate-text.
video = {}
# media_items.append(video)
else: # other tag are considered part of the body text and
# should be preserved.
item['text'] += etree.tostring(elem)

# First we need create the nitf object
yield item
# Media items should be created after the nitf object.
for media_item in media_items:
yield media_item

0 comments on commit c415cf4

Please sign in to comment.