[refactoring] Changes in the queue code and the IHarvester interface"

This commit is contained in:
Adrià Mercader 2011-04-07 16:59:11 +01:00
parent 90ae9d27db
commit b38fc57ec5
2 changed files with 31 additions and 21 deletions

View File

@ -23,7 +23,8 @@ class IHarvester(Interface):
responsible for: responsible for:
- gathering all the necessary objects to fetch on a later. - gathering all the necessary objects to fetch on a later.
stage (e.g. for a CSW server, perform a GetRecords request) stage (e.g. for a CSW server, perform a GetRecords request)
- creating the necessary HarvestObjects in the database. - creating the necessary HarvestObjects in the database, specifying
the guid and a reference to its source and job.
- creating and storing any suitable HarvestGatherErrors that may - creating and storing any suitable HarvestGatherErrors that may
occur. occur.
- returning a list with all the ids of the created HarvestObjects. - returning a list with all the ids of the created HarvestObjects.
@ -39,8 +40,6 @@ class IHarvester(Interface):
- getting the contents of the remote object (e.g. for a CSW server, - getting the contents of the remote object (e.g. for a CSW server,
perform a GetRecordById request). perform a GetRecordById request).
- saving the content in the provided HarvestObject. - saving the content in the provided HarvestObject.
- update the fetch_started, fetch_finished and retry_times as
necessary.
- creating and storing any suitable HarvestObjectErrors that may - creating and storing any suitable HarvestObjectErrors that may
occur. occur.
- returning True if everything went as expected, False otherwise. - returning True if everything went as expected, False otherwise.
@ -55,10 +54,10 @@ class IHarvester(Interface):
responsible for: responsible for:
- performing any necessary action with the fetched object (e.g - performing any necessary action with the fetched object (e.g
create a CKAN package). create a CKAN package).
- creatingg the HarvestObject - Package relation (if necessary) - creating the HarvestObject - Package relation (if necessary)
- creating and storing any suitable HarvestObjectErrors that may - creating and storing any suitable HarvestObjectErrors that may
occur. occur.
- returning True if everything went as expected, False otherwisie. - returning True if everything went as expected, False otherwise.
:param harvest_object: HarvestObject object :param harvest_object: HarvestObject object
:returns: True if everything went right, False if errors were found :returns: True if everything went right, False if errors were found

View File

@ -1,4 +1,5 @@
import logging import logging
import datetime
from carrot.connection import BrokerConnection from carrot.connection import BrokerConnection
from carrot.messaging import Publisher from carrot.messaging import Publisher
@ -57,17 +58,20 @@ def get_consumer(queue_name, routing_key):
exchange_type=EXCHANGE_TYPE, exchange_type=EXCHANGE_TYPE,
durable=True, auto_delete=False) durable=True, auto_delete=False)
def gather_callback(message_data,message): def gather_callback(message_data,message):
try: try:
id = message_data['harvest_job_id'] id = message_data['harvest_job_id']
log.info('Received harvest job id: %s' % id) log.debug('Received harvest job id: %s' % id)
# Get a publisher for the fetch queue # Get a publisher for the fetch queue
publisher = get_fetch_publisher() publisher = get_fetch_publisher()
try: try:
job = HarvestJob.get(id) job = HarvestJob.get(id)
except:
log.error('Harvest job does not exist: %s' % id)
else:
# Send the harvest job to the plugins that implement # Send the harvest job to the plugins that implement
# the Harvester interface, only if the source type # the Harvester interface, only if the source type
# matches # matches
@ -76,15 +80,15 @@ def gather_callback(message_data,message):
# Get a list of harvest object ids from the plugin # Get a list of harvest object ids from the plugin
harvest_object_ids = harvester.gather_stage(job) harvest_object_ids = harvester.gather_stage(job)
log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids)
if len(harvest_object_ids) > 0: if harvest_object_ids and len(harvest_object_ids) > 0:
for id in harvest_object_ids: for id in harvest_object_ids:
# Send the id to the fetch queue # Send the id to the fetch queue
publisher.send({'harvest_object_id':id}) publisher.send({'harvest_object_id':id})
log.info('Sent object %s to the fetch queue' % id) log.debug('Sent object %s to the fetch queue' % id)
except: job.status = u'Finished'
log.error('Harvest job does not exist: %s' % id) job.save()
finally: finally:
publisher.close() publisher.close()
@ -102,7 +106,9 @@ def fetch_callback(message_data,message):
try: try:
obj = HarvestObject.get(id) obj = HarvestObject.get(id)
except:
log.error('Harvest object does not exist: %s' % id)
else:
# Send the harvest object to the plugins that implement # Send the harvest object to the plugins that implement
# the Harvester interface, only if the source type # the Harvester interface, only if the source type
# matches # matches
@ -110,14 +116,16 @@ def fetch_callback(message_data,message):
if harvester.get_type() == obj.source.type: if harvester.get_type() == obj.source.type:
# See if the plugin can fetch the harvest object # See if the plugin can fetch the harvest object
obj.fetch_started = datetime.datetime.now()
success = harvester.fetch_stage(obj) success = harvester.fetch_stage(obj)
obj.fetch_finished = datetime.datetime.now()
obj.save()
#TODO: retry times?
if success: if success:
# If no errors where found, call the import method # If no errors where found, call the import method
harvester.import_stage(obj) harvester.import_stage(obj)
except:
log.error('Harvest object does not exist: %s' % id)
except KeyError: except KeyError:
log.error('No harvest object id received') log.error('No harvest object id received')
@ -140,3 +148,6 @@ def get_gather_publisher():
def get_fetch_publisher(): def get_fetch_publisher():
return get_publisher('harvest_object_id') return get_publisher('harvest_object_id')
# Get a publisher for the fetch queue
#fetch_publisher = get_fetch_publisher()