diff --git a/ckanext/harvest/logic/dictization.py b/ckanext/harvest/logic/dictization.py index ece370a..c38559b 100644 --- a/ckanext/harvest/logic/dictization.py +++ b/ckanext/harvest/logic/dictization.py @@ -1,4 +1,5 @@ from sqlalchemy import distinct, func +import ckan.logic as logic from ckan.model import Package,Group from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, \ @@ -77,7 +78,7 @@ def _get_source_status(source, context): 'job_count': 0, 'next_harvest':'', 'last_harvest_request':'', - 'last_harvest_statistics':{'added':0,'updated':0,'errors':0}, + 'last_harvest_statistics':{'added':0,'updated':0,'errors':0,'deleted':0}, 'last_harvest_errors':{'gather':[],'object':[]}, 'overall_statistics':{'added':0, 'errors':0}, 'packages':[]} @@ -103,38 +104,21 @@ def _get_source_status(source, context): #TODO: Should we encode the dates as strings? out['last_harvest_request'] = str(last_job.gather_finished) - #Get HarvestObjects from last job whit links to packages if detailed: - last_objects = [obj for obj in last_job.objects if obj.package is not None] - - if len(last_objects) == 0: + harvest_job_dict = harvest_job_dictize(last_job, context) # No packages added or updated - out['last_harvest_statistics']['added'] = 0 - out['last_harvest_statistics']['updated'] = 0 - else: - # Check wether packages were added or updated - for last_object in last_objects: - # Check if the same package had been linked before - previous_objects = model.Session.query(HarvestObject) \ - .filter(HarvestObject.package==last_object.package) \ - .count() + statistics = out['last_harvest_statistics'] + statistics['added'] = harvest_job_dict['stats'].get('new',0) + statistics['updated'] = harvest_job_dict['stats'].get('updated',0) + statistics['deleted'] = harvest_job_dict['stats'].get('deleted',0) + statistics['errors'] = (harvest_job_dict['stats'].get('errored',0) + + len(last_job.gather_errors)) - if previous_objects == 1: - # It didn't previously exist, it has been added - out['last_harvest_statistics']['added'] += 1 - else: - # Pacakge already existed, but it has been updated - out['last_harvest_statistics']['updated'] += 1 - - # Last harvest errors - # We have the gathering errors in last_job.gather_errors, so let's also - # get also the object errors. - object_errors = model.Session.query(HarvestObjectError).join(HarvestObject) \ - .filter(HarvestObject.job==last_job) - - out['last_harvest_statistics']['errors'] = len(last_job.gather_errors) \ - + object_errors.count() if detailed: + # We have the gathering errors in last_job.gather_errors, so let's also + # get also the object errors. + object_errors = model.Session.query(HarvestObjectError).join(HarvestObject) \ + .filter(HarvestObject.job==last_job) for gather_error in last_job.gather_errors: out['last_harvest_errors']['gather'].append(gather_error.message) diff --git a/ckanext/harvest/tests/test_queue.py b/ckanext/harvest/tests/test_queue.py index e894fd2..dfa4c5b 100644 --- a/ckanext/harvest/tests/test_queue.py +++ b/ckanext/harvest/tests/test_queue.py @@ -63,6 +63,8 @@ class TestHarvester(SingletonPlugin): harvest_object.current = True if package_dict['name'] == 'test_to_delete' and package_object: harvest_object.current = False + package_object.state = 'deleted' + package_object.save() harvest_object.save() return True @@ -175,6 +177,17 @@ class TestHarvestQueue(object): assert harvest_job['status'] == u'Finished' assert harvest_job['stats'] == {'new': 3} + context['detailed'] = True + + harvest_source_dict = logic.get_action('harvest_source_show')( + context, + {'id': harvest_source['id']} + ) + + assert harvest_source_dict['status']['last_harvest_statistics'] == {'updated': 0, 'added': 3, 'deleted': 0, 'errors': 0L} + assert harvest_source_dict['status']['overall_statistics'] == {'added': 3L, 'errors': 0L} + + ########### Second run ######################## harvest_job = logic.get_action('harvest_job_create')( @@ -215,9 +228,26 @@ class TestHarvestQueue(object): all_objects = model.Session.query(HarvestObject).filter_by(report_status='deleted').all() assert len(all_objects) == 1, len(all_objects) + # run to make sure job is marked as finshed + try: + logic.get_action('harvest_jobs_run')( + context, + {'source_id':harvest_source['id']} + ) + except Exception, e: + assert 'There are no new harvesting jobs' in str(e) + harvest_job = logic.get_action('harvest_job_show')( context, {'id': job_id} ) assert harvest_job['stats'] == {'updated': 2, 'deleted': 1} + context['detailed'] = True + harvest_source_dict = logic.get_action('harvest_source_show')( + context, + {'id': harvest_source['id']} + ) + + assert harvest_source_dict['status']['last_harvest_statistics'] == {'updated': 2, 'added': 0, 'deleted': 1, 'errors': 0L} + assert harvest_source_dict['status']['overall_statistics'] == {'added': 2L, 'errors': 0L}