2020-10-01 21:48:59 +02:00
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from nose.tools import assert_equal, assert_in
|
|
|
|
import pytest
|
|
|
|
from ckan.tests import factories as ckan_factories
|
|
|
|
from ckan import model
|
|
|
|
from ckan.lib.base import config
|
|
|
|
from ckan.plugins.toolkit import get_action
|
|
|
|
from ckanext.harvest.tests import factories as harvest_factories
|
|
|
|
from ckanext.harvest.logic import HarvestJobExists
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.usefixtures('with_plugins', 'clean_db', 'harvest_setup', 'clean_queues')
|
|
|
|
@pytest.mark.ckan_config('ckan.plugins', 'harvest test_action_harvester')
|
|
|
|
class TestModelFunctions:
|
2021-02-03 14:50:12 +01:00
|
|
|
dataset_counter = 0
|
2021-03-05 07:49:10 +01:00
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
def test_timeout_jobs(self):
|
2021-02-03 14:50:12 +01:00
|
|
|
""" Create harvest source, job and objects
|
2020-10-01 21:48:59 +02:00
|
|
|
Validate we read the last object fished time
|
|
|
|
Validate we raise timeout in harvest_jobs_run_action
|
|
|
|
"""
|
|
|
|
source, job = self.get_source()
|
2021-03-05 07:49:10 +01:00
|
|
|
|
|
|
|
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=10)
|
2020-10-01 21:48:59 +02:00
|
|
|
ob2 = self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=5)
|
2021-03-05 07:49:10 +01:00
|
|
|
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=15)
|
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
assert_equal(job.get_last_finished_object(), ob2)
|
2020-10-02 16:19:35 +02:00
|
|
|
assert_equal(job.get_last_action_time(), ob2.import_finished)
|
2020-10-01 21:48:59 +02:00
|
|
|
|
2021-03-05 07:59:53 +01:00
|
|
|
gather_errors = self.run(timeout=3, source=source, job=job)
|
2020-10-01 21:48:59 +02:00
|
|
|
assert_equal(len(gather_errors), 1)
|
|
|
|
assert_equal(job.status, 'Finished')
|
|
|
|
gather_error = gather_errors[0]
|
|
|
|
assert_in('timeout', gather_error.message)
|
2021-03-05 07:49:10 +01:00
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
def test_no_timeout_jobs(self):
|
|
|
|
""" Test a job that don't raise timeout """
|
|
|
|
source, job = self.get_source()
|
|
|
|
|
2021-03-05 07:49:10 +01:00
|
|
|
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=10)
|
2020-10-01 21:48:59 +02:00
|
|
|
ob2 = self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=5)
|
2021-03-05 07:49:10 +01:00
|
|
|
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=15)
|
2021-03-05 07:59:53 +01:00
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
assert_equal(job.get_last_finished_object(), ob2)
|
2020-10-02 16:19:35 +02:00
|
|
|
assert_equal(job.get_last_action_time(), ob2.import_finished)
|
2020-10-01 21:48:59 +02:00
|
|
|
|
2021-03-05 07:59:53 +01:00
|
|
|
gather_errors = self.run(timeout=7, source=source, job=job)
|
2020-10-01 21:48:59 +02:00
|
|
|
assert_equal(len(gather_errors), 0)
|
|
|
|
assert_equal(job.status, 'Finished')
|
2021-03-05 07:59:53 +01:00
|
|
|
|
2020-10-02 16:19:35 +02:00
|
|
|
def test_no_objects_job(self):
|
|
|
|
""" Test a job that don't raise timeout """
|
|
|
|
_, job = self.get_source()
|
|
|
|
|
|
|
|
job.gather_finished = datetime.utcnow()
|
|
|
|
job.save()
|
|
|
|
|
|
|
|
assert_equal(job.get_last_finished_object(), None)
|
|
|
|
assert_equal(job.get_last_action_time(), job.gather_finished)
|
|
|
|
|
|
|
|
def test_no_gathered_job(self):
|
|
|
|
""" Test a job that don't raise timeout """
|
|
|
|
_, job = self.get_source()
|
|
|
|
|
|
|
|
job.gather_finished = None
|
|
|
|
job.save()
|
|
|
|
|
|
|
|
assert_equal(job.get_last_finished_object(), None)
|
|
|
|
assert_equal(job.get_last_action_time(), job.created)
|
|
|
|
|
2021-02-03 14:50:12 +01:00
|
|
|
def test_gather_get_last_action_time(self):
|
|
|
|
""" Test get_last_action_time at gather stage """
|
|
|
|
source, job = self.get_source()
|
|
|
|
|
2021-03-05 07:49:10 +01:00
|
|
|
self.add_object(job=job, source=source, state='WAITING')
|
|
|
|
self.add_object(job=job, source=source, state='WAITING')
|
2021-02-03 14:50:12 +01:00
|
|
|
ob3 = self.add_object(job=job, source=source, state='WAITING')
|
2021-03-05 07:49:10 +01:00
|
|
|
|
2021-02-03 14:50:12 +01:00
|
|
|
assert_equal(job.get_last_gathered_object(), ob3)
|
|
|
|
assert_equal(job.get_last_action_time(), ob3.gathered)
|
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
def run(self, timeout, source, job):
|
|
|
|
""" Run the havester_job_run and return the errors """
|
|
|
|
|
|
|
|
# check timeout
|
|
|
|
context = {'model': model, 'session': model.Session,
|
|
|
|
'ignore_auth': True, 'user': ''}
|
|
|
|
|
|
|
|
data_dict = {
|
|
|
|
'guid': 'guid',
|
|
|
|
'content': 'content',
|
|
|
|
'job_id': job.id,
|
|
|
|
'source_id': source.id
|
|
|
|
}
|
|
|
|
|
|
|
|
# prepare the job to run
|
|
|
|
job.gather_finished = datetime.utcnow()
|
|
|
|
job.save()
|
|
|
|
|
|
|
|
# run (we expect a timeout)
|
|
|
|
config['ckan.harvest.timeout'] = timeout
|
|
|
|
harvest_jobs_run_action = get_action('harvest_jobs_run')
|
|
|
|
harvest_jobs_run_action(context, data_dict)
|
2021-03-05 07:49:10 +01:00
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
return job.get_gather_errors()
|
|
|
|
|
|
|
|
def get_source(self):
|
|
|
|
|
|
|
|
SOURCE_DICT = {
|
|
|
|
"url": "http://test.timeout.com",
|
|
|
|
"name": "test-source-timeout",
|
|
|
|
"title": "Test source timeout",
|
|
|
|
"notes": "Notes source timeout",
|
|
|
|
"source_type": "test-for-action",
|
|
|
|
"frequency": "MANUAL"
|
|
|
|
}
|
|
|
|
source = harvest_factories.HarvestSourceObj(**SOURCE_DICT)
|
|
|
|
try:
|
|
|
|
job = harvest_factories.HarvestJobObj(source=source)
|
2021-03-05 07:59:53 +01:00
|
|
|
except HarvestJobExists: # not sure why
|
2020-10-01 21:48:59 +02:00
|
|
|
job = source.get_jobs()[0]
|
2021-03-05 07:49:10 +01:00
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
job.status = 'Running'
|
|
|
|
job.save()
|
2021-03-05 07:59:53 +01:00
|
|
|
|
2020-10-01 21:48:59 +02:00
|
|
|
jobs = source.get_jobs(status='Running')
|
|
|
|
assert_in(job, jobs)
|
|
|
|
|
|
|
|
return source, job
|
2021-03-05 07:49:10 +01:00
|
|
|
|
2021-02-03 14:50:12 +01:00
|
|
|
def add_object(self, job, source, state, minutes_ago=0):
|
2020-10-01 21:48:59 +02:00
|
|
|
now = datetime.utcnow()
|
2021-02-03 14:50:12 +01:00
|
|
|
self.dataset_counter += 1
|
|
|
|
name = 'dataset-{}-{}'.format(state.lower(), self.dataset_counter)
|
2020-10-01 21:48:59 +02:00
|
|
|
dataset = ckan_factories.Dataset(name=name)
|
|
|
|
obj = harvest_factories.HarvestObjectObj(
|
|
|
|
job=job,
|
|
|
|
source=source,
|
|
|
|
package_id=dataset['id'],
|
|
|
|
guid=dataset['id'],
|
|
|
|
content='{}',
|
|
|
|
# always is WAITING state=state,
|
|
|
|
)
|
|
|
|
|
|
|
|
obj.state = state
|
2021-02-03 14:50:12 +01:00
|
|
|
if minutes_ago > 0:
|
|
|
|
obj.import_finished = now - timedelta(minutes=minutes_ago)
|
2020-10-01 21:48:59 +02:00
|
|
|
obj.save()
|
|
|
|
return obj
|