[refactoring] Changes in the queue code and the IHarvester interface"
This commit is contained in:
parent
90ae9d27db
commit
b38fc57ec5
|
@ -23,7 +23,8 @@ class IHarvester(Interface):
|
||||||
responsible for:
|
responsible for:
|
||||||
- gathering all the necessary objects to fetch on a later.
|
- gathering all the necessary objects to fetch on a later.
|
||||||
stage (e.g. for a CSW server, perform a GetRecords request)
|
stage (e.g. for a CSW server, perform a GetRecords request)
|
||||||
- creating the necessary HarvestObjects in the database.
|
- creating the necessary HarvestObjects in the database, specifying
|
||||||
|
the guid and a reference to its source and job.
|
||||||
- creating and storing any suitable HarvestGatherErrors that may
|
- creating and storing any suitable HarvestGatherErrors that may
|
||||||
occur.
|
occur.
|
||||||
- returning a list with all the ids of the created HarvestObjects.
|
- returning a list with all the ids of the created HarvestObjects.
|
||||||
|
@ -39,8 +40,6 @@ class IHarvester(Interface):
|
||||||
- getting the contents of the remote object (e.g. for a CSW server,
|
- getting the contents of the remote object (e.g. for a CSW server,
|
||||||
perform a GetRecordById request).
|
perform a GetRecordById request).
|
||||||
- saving the content in the provided HarvestObject.
|
- saving the content in the provided HarvestObject.
|
||||||
- update the fetch_started, fetch_finished and retry_times as
|
|
||||||
necessary.
|
|
||||||
- creating and storing any suitable HarvestObjectErrors that may
|
- creating and storing any suitable HarvestObjectErrors that may
|
||||||
occur.
|
occur.
|
||||||
- returning True if everything went as expected, False otherwise.
|
- returning True if everything went as expected, False otherwise.
|
||||||
|
@ -55,10 +54,10 @@ class IHarvester(Interface):
|
||||||
responsible for:
|
responsible for:
|
||||||
- performing any necessary action with the fetched object (e.g
|
- performing any necessary action with the fetched object (e.g
|
||||||
create a CKAN package).
|
create a CKAN package).
|
||||||
- creatingg the HarvestObject - Package relation (if necessary)
|
- creating the HarvestObject - Package relation (if necessary)
|
||||||
- creating and storing any suitable HarvestObjectErrors that may
|
- creating and storing any suitable HarvestObjectErrors that may
|
||||||
occur.
|
occur.
|
||||||
- returning True if everything went as expected, False otherwisie.
|
- returning True if everything went as expected, False otherwise.
|
||||||
|
|
||||||
:param harvest_object: HarvestObject object
|
:param harvest_object: HarvestObject object
|
||||||
:returns: True if everything went right, False if errors were found
|
:returns: True if everything went right, False if errors were found
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
|
import datetime
|
||||||
|
|
||||||
from carrot.connection import BrokerConnection
|
from carrot.connection import BrokerConnection
|
||||||
from carrot.messaging import Publisher
|
from carrot.messaging import Publisher
|
||||||
|
@ -57,17 +58,20 @@ def get_consumer(queue_name, routing_key):
|
||||||
exchange_type=EXCHANGE_TYPE,
|
exchange_type=EXCHANGE_TYPE,
|
||||||
durable=True, auto_delete=False)
|
durable=True, auto_delete=False)
|
||||||
|
|
||||||
|
|
||||||
def gather_callback(message_data,message):
|
def gather_callback(message_data,message):
|
||||||
try:
|
try:
|
||||||
id = message_data['harvest_job_id']
|
id = message_data['harvest_job_id']
|
||||||
log.info('Received harvest job id: %s' % id)
|
log.debug('Received harvest job id: %s' % id)
|
||||||
|
|
||||||
# Get a publisher for the fetch queue
|
# Get a publisher for the fetch queue
|
||||||
publisher = get_fetch_publisher()
|
publisher = get_fetch_publisher()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
job = HarvestJob.get(id)
|
job = HarvestJob.get(id)
|
||||||
|
except:
|
||||||
|
log.error('Harvest job does not exist: %s' % id)
|
||||||
|
else:
|
||||||
# Send the harvest job to the plugins that implement
|
# Send the harvest job to the plugins that implement
|
||||||
# the Harvester interface, only if the source type
|
# the Harvester interface, only if the source type
|
||||||
# matches
|
# matches
|
||||||
|
@ -76,15 +80,15 @@ def gather_callback(message_data,message):
|
||||||
|
|
||||||
# Get a list of harvest object ids from the plugin
|
# Get a list of harvest object ids from the plugin
|
||||||
harvest_object_ids = harvester.gather_stage(job)
|
harvest_object_ids = harvester.gather_stage(job)
|
||||||
|
log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids)
|
||||||
if len(harvest_object_ids) > 0:
|
if harvest_object_ids and len(harvest_object_ids) > 0:
|
||||||
for id in harvest_object_ids:
|
for id in harvest_object_ids:
|
||||||
# Send the id to the fetch queue
|
# Send the id to the fetch queue
|
||||||
publisher.send({'harvest_object_id':id})
|
publisher.send({'harvest_object_id':id})
|
||||||
log.info('Sent object %s to the fetch queue' % id)
|
log.debug('Sent object %s to the fetch queue' % id)
|
||||||
|
|
||||||
except:
|
job.status = u'Finished'
|
||||||
log.error('Harvest job does not exist: %s' % id)
|
job.save()
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
publisher.close()
|
publisher.close()
|
||||||
|
@ -102,7 +106,9 @@ def fetch_callback(message_data,message):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
obj = HarvestObject.get(id)
|
obj = HarvestObject.get(id)
|
||||||
|
except:
|
||||||
|
log.error('Harvest object does not exist: %s' % id)
|
||||||
|
else:
|
||||||
# Send the harvest object to the plugins that implement
|
# Send the harvest object to the plugins that implement
|
||||||
# the Harvester interface, only if the source type
|
# the Harvester interface, only if the source type
|
||||||
# matches
|
# matches
|
||||||
|
@ -110,14 +116,16 @@ def fetch_callback(message_data,message):
|
||||||
if harvester.get_type() == obj.source.type:
|
if harvester.get_type() == obj.source.type:
|
||||||
|
|
||||||
# See if the plugin can fetch the harvest object
|
# See if the plugin can fetch the harvest object
|
||||||
|
obj.fetch_started = datetime.datetime.now()
|
||||||
success = harvester.fetch_stage(obj)
|
success = harvester.fetch_stage(obj)
|
||||||
|
obj.fetch_finished = datetime.datetime.now()
|
||||||
|
obj.save()
|
||||||
|
#TODO: retry times?
|
||||||
if success:
|
if success:
|
||||||
# If no errors where found, call the import method
|
# If no errors where found, call the import method
|
||||||
harvester.import_stage(obj)
|
harvester.import_stage(obj)
|
||||||
|
|
||||||
|
|
||||||
except:
|
|
||||||
log.error('Harvest object does not exist: %s' % id)
|
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
log.error('No harvest object id received')
|
log.error('No harvest object id received')
|
||||||
|
@ -140,3 +148,6 @@ def get_gather_publisher():
|
||||||
def get_fetch_publisher():
|
def get_fetch_publisher():
|
||||||
return get_publisher('harvest_object_id')
|
return get_publisher('harvest_object_id')
|
||||||
|
|
||||||
|
# Get a publisher for the fetch queue
|
||||||
|
#fetch_publisher = get_fetch_publisher()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue