2015-10-21 18:26:57 +02:00
|
|
|
try:
|
|
|
|
from ckan.tests.helpers import reset_db
|
|
|
|
except ImportError:
|
|
|
|
from ckan.new_tests.helpers import reset_db
|
2012-10-24 12:58:00 +02:00
|
|
|
import ckanext.harvest.model as harvest_model
|
2012-10-25 20:01:54 +02:00
|
|
|
from ckanext.harvest.model import HarvestObject, HarvestObjectExtra
|
2012-10-24 12:58:00 +02:00
|
|
|
from ckanext.harvest.interfaces import IHarvester
|
|
|
|
import ckanext.harvest.queue as queue
|
|
|
|
from ckan.plugins.core import SingletonPlugin, implements
|
|
|
|
import json
|
|
|
|
import ckan.logic as logic
|
|
|
|
from ckan import model
|
2015-11-17 12:09:24 +01:00
|
|
|
from nose.tools import assert_equal
|
2012-10-24 12:58:00 +02:00
|
|
|
|
|
|
|
|
2015-11-03 01:22:53 +01:00
|
|
|
class MockHarvester(SingletonPlugin):
|
2012-10-24 12:58:00 +02:00
|
|
|
implements(IHarvester)
|
|
|
|
def info(self):
|
|
|
|
return {'name': 'test', 'title': 'test', 'description': 'test'}
|
|
|
|
|
|
|
|
def gather_stage(self, harvest_job):
|
|
|
|
|
2012-11-05 14:17:32 +01:00
|
|
|
if harvest_job.source.url.startswith('basic_test'):
|
2012-10-24 12:58:00 +02:00
|
|
|
obj = HarvestObject(guid = 'test1', job = harvest_job)
|
2012-10-25 20:01:54 +02:00
|
|
|
obj.extras.append(HarvestObjectExtra(key='key', value='value'))
|
2012-10-24 12:58:00 +02:00
|
|
|
obj2 = HarvestObject(guid = 'test2', job = harvest_job)
|
2012-12-18 00:50:26 +01:00
|
|
|
obj3 = HarvestObject(guid = 'test_to_delete', job = harvest_job)
|
2012-10-24 12:58:00 +02:00
|
|
|
obj.add()
|
2012-12-18 00:50:26 +01:00
|
|
|
obj2.add()
|
|
|
|
obj3.save() # this will commit both
|
|
|
|
return [obj.id, obj2.id, obj3.id]
|
2012-10-24 12:58:00 +02:00
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
def fetch_stage(self, harvest_object):
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(harvest_object.state, "FETCH")
|
2012-10-25 20:01:54 +02:00
|
|
|
assert harvest_object.fetch_started != None
|
2012-10-24 12:58:00 +02:00
|
|
|
harvest_object.content = json.dumps({'name': harvest_object.guid})
|
|
|
|
harvest_object.save()
|
|
|
|
return True
|
|
|
|
|
|
|
|
def import_stage(self, harvest_object):
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(harvest_object.state, "IMPORT")
|
2012-10-25 20:01:54 +02:00
|
|
|
assert harvest_object.fetch_finished != None
|
|
|
|
assert harvest_object.import_started != None
|
|
|
|
|
2012-10-24 12:58:00 +02:00
|
|
|
user = logic.get_action('get_site_user')(
|
|
|
|
{'model': model, 'ignore_auth': True}, {}
|
|
|
|
)['name']
|
2012-12-18 00:50:26 +01:00
|
|
|
|
|
|
|
package = json.loads(harvest_object.content)
|
|
|
|
name = package['name']
|
|
|
|
|
|
|
|
package_object = model.Package.get(name)
|
|
|
|
if package_object:
|
|
|
|
logic_function = 'package_update'
|
|
|
|
else:
|
|
|
|
logic_function = 'package_create'
|
|
|
|
|
|
|
|
package_dict = logic.get_action(logic_function)(
|
2012-10-30 19:07:05 +01:00
|
|
|
{'model': model, 'session': model.Session,
|
2013-08-14 13:28:27 +02:00
|
|
|
'user': user, 'api_version': 3, 'ignore_auth': True},
|
2012-10-24 12:58:00 +02:00
|
|
|
json.loads(harvest_object.content)
|
|
|
|
)
|
2012-12-18 00:50:26 +01:00
|
|
|
|
2013-04-22 18:55:46 +02:00
|
|
|
# set previous objects to not current
|
|
|
|
previous_object = model.Session.query(HarvestObject) \
|
|
|
|
.filter(HarvestObject.guid==harvest_object.guid) \
|
|
|
|
.filter(HarvestObject.current==True) \
|
|
|
|
.first()
|
|
|
|
if previous_object:
|
|
|
|
previous_object.current = False
|
|
|
|
previous_object.save()
|
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
# delete test_to_delete package on second run
|
|
|
|
harvest_object.package_id = package_dict['id']
|
|
|
|
harvest_object.current = True
|
|
|
|
if package_dict['name'] == 'test_to_delete' and package_object:
|
|
|
|
harvest_object.current = False
|
2012-12-18 03:39:14 +01:00
|
|
|
package_object.state = 'deleted'
|
|
|
|
package_object.save()
|
2012-12-18 00:50:26 +01:00
|
|
|
|
|
|
|
harvest_object.save()
|
2012-10-24 12:58:00 +02:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
class TestHarvestQueue(object):
|
|
|
|
@classmethod
|
|
|
|
def setup_class(cls):
|
2015-10-21 18:26:57 +02:00
|
|
|
reset_db()
|
2012-10-24 12:58:00 +02:00
|
|
|
harvest_model.setup()
|
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
def test_01_basic_harvester(self):
|
2012-10-24 12:58:00 +02:00
|
|
|
|
|
|
|
### make sure queues/exchanges are created first and are empty
|
2015-11-20 15:25:31 +01:00
|
|
|
consumer = queue.get_consumer('ckan.harvest.test.gather',
|
|
|
|
queue.get_gather_routing_key())
|
|
|
|
consumer_fetch = queue.get_consumer('ckan.harvest.test.fetch',
|
|
|
|
queue.get_fetch_routing_key())
|
2015-06-11 14:56:22 +02:00
|
|
|
consumer.queue_purge(queue='ckan.harvest.test.gather')
|
|
|
|
consumer_fetch.queue_purge(queue='ckan.harvest.test.fetch')
|
2012-10-24 12:58:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
user = logic.get_action('get_site_user')(
|
|
|
|
{'model': model, 'ignore_auth': True}, {}
|
|
|
|
)['name']
|
|
|
|
|
2012-10-30 19:07:05 +01:00
|
|
|
context = {'model': model, 'session': model.Session,
|
2013-08-14 13:28:27 +02:00
|
|
|
'user': user, 'api_version': 3, 'ignore_auth': True}
|
2012-10-24 12:58:00 +02:00
|
|
|
|
2012-12-14 15:52:19 +01:00
|
|
|
source_dict = {
|
|
|
|
'title': 'Test Source',
|
|
|
|
'name': 'test-source',
|
|
|
|
'url': 'basic_test',
|
|
|
|
'source_type': 'test',
|
|
|
|
}
|
|
|
|
|
2012-10-24 12:58:00 +02:00
|
|
|
harvest_source = logic.get_action('harvest_source_create')(
|
|
|
|
context,
|
2012-12-14 15:52:19 +01:00
|
|
|
source_dict
|
2012-10-24 12:58:00 +02:00
|
|
|
)
|
|
|
|
|
2012-12-14 15:52:19 +01:00
|
|
|
assert harvest_source['source_type'] == 'test', harvest_source
|
2012-10-24 12:58:00 +02:00
|
|
|
assert harvest_source['url'] == 'basic_test', harvest_source
|
|
|
|
|
|
|
|
harvest_job = logic.get_action('harvest_job_create')(
|
|
|
|
context,
|
2015-10-28 22:58:36 +01:00
|
|
|
{'source_id': harvest_source['id'], 'run': True}
|
2012-10-24 12:58:00 +02:00
|
|
|
)
|
2012-12-13 17:33:44 +01:00
|
|
|
|
|
|
|
job_id = harvest_job['id']
|
|
|
|
|
2012-10-24 12:58:00 +02:00
|
|
|
assert harvest_job['source_id'] == harvest_source['id'], harvest_job
|
|
|
|
|
2015-10-28 22:58:36 +01:00
|
|
|
assert harvest_job['status'] == u'Running'
|
2012-10-24 12:58:00 +02:00
|
|
|
|
2012-12-13 17:33:44 +01:00
|
|
|
assert logic.get_action('harvest_job_show')(
|
|
|
|
context,
|
|
|
|
{'id': job_id}
|
|
|
|
)['status'] == u'Running'
|
|
|
|
|
2012-10-24 12:58:00 +02:00
|
|
|
## pop on item off the queue and run the callback
|
|
|
|
reply = consumer.basic_get(queue='ckan.harvest.gather')
|
2013-04-22 18:55:46 +02:00
|
|
|
|
2012-10-24 12:58:00 +02:00
|
|
|
queue.gather_callback(consumer, *reply)
|
|
|
|
|
2012-10-25 20:01:54 +02:00
|
|
|
all_objects = model.Session.query(HarvestObject).all()
|
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
assert len(all_objects) == 3
|
2012-10-25 20:01:54 +02:00
|
|
|
assert all_objects[0].state == 'WAITING'
|
|
|
|
assert all_objects[1].state == 'WAITING'
|
2012-12-18 00:50:26 +01:00
|
|
|
assert all_objects[2].state == 'WAITING'
|
2012-10-30 19:07:05 +01:00
|
|
|
|
2012-10-25 20:01:54 +02:00
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
assert len(model.Session.query(HarvestObject).all()) == 3
|
2012-10-25 20:01:54 +02:00
|
|
|
assert len(model.Session.query(HarvestObjectExtra).all()) == 1
|
2012-10-30 19:07:05 +01:00
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
## do three times as three harvest objects
|
2012-12-14 15:52:19 +01:00
|
|
|
reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
|
2013-04-22 18:55:46 +02:00
|
|
|
queue.fetch_callback(consumer_fetch, *reply)
|
|
|
|
reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
|
|
|
|
queue.fetch_callback(consumer_fetch, *reply)
|
|
|
|
reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
|
|
|
|
queue.fetch_callback(consumer_fetch, *reply)
|
2012-10-24 12:58:00 +02:00
|
|
|
|
2012-12-14 15:52:19 +01:00
|
|
|
count = model.Session.query(model.Package) \
|
2013-04-22 18:55:46 +02:00
|
|
|
.filter(model.Package.type=='dataset') \
|
2012-12-14 15:52:19 +01:00
|
|
|
.count()
|
2012-12-20 17:16:30 +01:00
|
|
|
assert count == 3
|
2012-12-18 00:50:26 +01:00
|
|
|
all_objects = model.Session.query(HarvestObject).filter_by(current=True).all()
|
2012-10-30 19:07:05 +01:00
|
|
|
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(len(all_objects), 3)
|
|
|
|
assert_equal(all_objects[0].state, 'COMPLETE')
|
|
|
|
assert_equal(all_objects[0].report_status, 'added')
|
|
|
|
assert_equal(all_objects[1].state, 'COMPLETE')
|
|
|
|
assert_equal(all_objects[1].report_status, 'added')
|
|
|
|
assert_equal(all_objects[2].state, 'COMPLETE')
|
|
|
|
assert_equal(all_objects[2].report_status, 'added')
|
2012-10-24 12:58:00 +02:00
|
|
|
|
2012-12-13 17:33:44 +01:00
|
|
|
## fire run again to check if job is set to Finished
|
2015-10-28 22:58:36 +01:00
|
|
|
logic.get_action('harvest_jobs_run')(
|
|
|
|
context,
|
|
|
|
{'source_id':harvest_source['id']}
|
|
|
|
)
|
2012-12-13 17:33:44 +01:00
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
harvest_job = logic.get_action('harvest_job_show')(
|
|
|
|
context,
|
|
|
|
{'id': job_id}
|
|
|
|
)
|
|
|
|
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(harvest_job['status'], u'Finished')
|
2015-11-25 21:55:32 +01:00
|
|
|
assert_equal(harvest_job['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errored': 0, 'deleted': 0})
|
2012-12-18 03:39:14 +01:00
|
|
|
|
|
|
|
harvest_source_dict = logic.get_action('harvest_source_show')(
|
|
|
|
context,
|
|
|
|
{'id': harvest_source['id']}
|
|
|
|
)
|
|
|
|
|
2015-11-25 21:55:32 +01:00
|
|
|
assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 3, 'updated': 0, 'not modified': 0, 'errored': 0, 'deleted': 0})
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(harvest_source_dict['status']['total_datasets'], 3)
|
|
|
|
assert_equal(harvest_source_dict['status']['job_count'], 1)
|
2012-12-18 03:39:14 +01:00
|
|
|
|
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
########### Second run ########################
|
|
|
|
harvest_job = logic.get_action('harvest_job_create')(
|
|
|
|
context,
|
2015-10-28 22:58:36 +01:00
|
|
|
{'source_id': harvest_source['id'], 'run': True}
|
2012-12-18 00:50:26 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
job_id = harvest_job['id']
|
2013-04-22 18:55:46 +02:00
|
|
|
assert logic.get_action('harvest_job_show')(
|
|
|
|
context,
|
|
|
|
{'id': job_id}
|
|
|
|
)['status'] == u'Running'
|
2012-12-18 00:50:26 +01:00
|
|
|
|
|
|
|
## pop on item off the queue and run the callback
|
|
|
|
reply = consumer.basic_get(queue='ckan.harvest.gather')
|
|
|
|
queue.gather_callback(consumer, *reply)
|
|
|
|
|
|
|
|
all_objects = model.Session.query(HarvestObject).all()
|
|
|
|
|
|
|
|
assert len(all_objects) == 6
|
|
|
|
|
2013-04-22 18:55:46 +02:00
|
|
|
reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
|
|
|
|
queue.fetch_callback(consumer_fetch, *reply)
|
|
|
|
reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
|
|
|
|
queue.fetch_callback(consumer_fetch, *reply)
|
|
|
|
reply = consumer_fetch.basic_get(queue='ckan.harvest.fetch')
|
|
|
|
queue.fetch_callback(consumer_fetch, *reply)
|
2012-12-18 00:50:26 +01:00
|
|
|
|
2013-04-22 18:55:46 +02:00
|
|
|
count = model.Session.query(model.Package) \
|
|
|
|
.filter(model.Package.type=='dataset') \
|
|
|
|
.count()
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(count, 3)
|
2012-12-18 00:50:26 +01:00
|
|
|
|
2013-04-22 18:55:46 +02:00
|
|
|
all_objects = model.Session.query(HarvestObject).filter_by(report_status='added').all()
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(len(all_objects), 3)
|
2012-12-18 00:50:26 +01:00
|
|
|
|
|
|
|
all_objects = model.Session.query(HarvestObject).filter_by(report_status='updated').all()
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(len(all_objects), 2)
|
2012-12-18 00:50:26 +01:00
|
|
|
|
|
|
|
all_objects = model.Session.query(HarvestObject).filter_by(report_status='deleted').all()
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(len(all_objects), 1)
|
2012-12-18 00:50:26 +01:00
|
|
|
|
2012-12-18 03:39:14 +01:00
|
|
|
# run to make sure job is marked as finshed
|
2015-10-28 22:58:36 +01:00
|
|
|
logic.get_action('harvest_jobs_run')(
|
|
|
|
context,
|
|
|
|
{'source_id':harvest_source['id']}
|
|
|
|
)
|
2012-12-18 03:39:14 +01:00
|
|
|
|
2012-12-18 00:50:26 +01:00
|
|
|
harvest_job = logic.get_action('harvest_job_show')(
|
2012-12-13 17:33:44 +01:00
|
|
|
context,
|
|
|
|
{'id': job_id}
|
2012-12-18 00:50:26 +01:00
|
|
|
)
|
2015-11-25 21:55:32 +01:00
|
|
|
assert_equal(harvest_job['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errored': 0, 'deleted': 1})
|
2012-12-18 00:50:26 +01:00
|
|
|
|
2012-12-18 03:39:14 +01:00
|
|
|
harvest_source_dict = logic.get_action('harvest_source_show')(
|
|
|
|
context,
|
|
|
|
{'id': harvest_source['id']}
|
|
|
|
)
|
|
|
|
|
2015-11-25 21:55:32 +01:00
|
|
|
assert_equal(harvest_source_dict['status']['last_job']['stats'], {'added': 0, 'updated': 2, 'not modified': 0, 'errored': 0, 'deleted': 1})
|
2015-11-17 12:09:24 +01:00
|
|
|
assert_equal(harvest_source_dict['status']['total_datasets'], 2)
|
|
|
|
assert_equal(harvest_source_dict['status']['job_count'], 2)
|