harvester-d4science/ckanext/harvest/tests/test_timeouts.py

151 lines
5.2 KiB
Python
Raw Normal View History

2020-10-01 21:48:59 +02:00
from datetime import datetime, timedelta
from nose.tools import assert_equal, assert_in
import pytest
from ckan.tests import factories as ckan_factories
from ckan import model
from ckan.lib.base import config
from ckan.plugins.toolkit import get_action
from ckanext.harvest.tests import factories as harvest_factories
from ckanext.harvest.logic import HarvestJobExists
@pytest.mark.usefixtures('with_plugins', 'clean_db', 'harvest_setup', 'clean_queues')
@pytest.mark.ckan_config('ckan.plugins', 'harvest test_action_harvester')
class TestModelFunctions:
dataset_counter = 0
2021-03-05 07:49:10 +01:00
2020-10-01 21:48:59 +02:00
def test_timeout_jobs(self):
""" Create harvest source, job and objects
2020-10-01 21:48:59 +02:00
Validate we read the last object fished time
Validate we raise timeout in harvest_jobs_run_action
"""
source, job = self.get_source()
2021-03-05 07:49:10 +01:00
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=10)
2020-10-01 21:48:59 +02:00
ob2 = self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=5)
2021-03-05 07:49:10 +01:00
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=15)
2020-10-01 21:48:59 +02:00
assert_equal(job.get_last_finished_object(), ob2)
2020-10-02 16:19:35 +02:00
assert_equal(job.get_last_action_time(), ob2.import_finished)
2020-10-01 21:48:59 +02:00
2021-03-05 07:59:53 +01:00
gather_errors = self.run(timeout=3, source=source, job=job)
2020-10-01 21:48:59 +02:00
assert_equal(len(gather_errors), 1)
assert_equal(job.status, 'Finished')
gather_error = gather_errors[0]
assert_in('timeout', gather_error.message)
2021-03-05 07:49:10 +01:00
2020-10-01 21:48:59 +02:00
def test_no_timeout_jobs(self):
""" Test a job that don't raise timeout """
source, job = self.get_source()
2021-03-05 07:49:10 +01:00
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=10)
2020-10-01 21:48:59 +02:00
ob2 = self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=5)
2021-03-05 07:49:10 +01:00
self.add_object(job=job, source=source, state='COMPLETE', minutes_ago=15)
2021-03-05 07:59:53 +01:00
2020-10-01 21:48:59 +02:00
assert_equal(job.get_last_finished_object(), ob2)
2020-10-02 16:19:35 +02:00
assert_equal(job.get_last_action_time(), ob2.import_finished)
2020-10-01 21:48:59 +02:00
2021-03-05 07:59:53 +01:00
gather_errors = self.run(timeout=7, source=source, job=job)
2020-10-01 21:48:59 +02:00
assert_equal(len(gather_errors), 0)
assert_equal(job.status, 'Finished')
2021-03-05 07:59:53 +01:00
2020-10-02 16:19:35 +02:00
def test_no_objects_job(self):
""" Test a job that don't raise timeout """
_, job = self.get_source()
job.gather_finished = datetime.utcnow()
job.save()
assert_equal(job.get_last_finished_object(), None)
assert_equal(job.get_last_action_time(), job.gather_finished)
def test_no_gathered_job(self):
""" Test a job that don't raise timeout """
_, job = self.get_source()
job.gather_finished = None
job.save()
assert_equal(job.get_last_finished_object(), None)
assert_equal(job.get_last_action_time(), job.created)
def test_gather_get_last_action_time(self):
""" Test get_last_action_time at gather stage """
source, job = self.get_source()
2021-03-05 07:49:10 +01:00
self.add_object(job=job, source=source, state='WAITING')
self.add_object(job=job, source=source, state='WAITING')
ob3 = self.add_object(job=job, source=source, state='WAITING')
2021-03-05 07:49:10 +01:00
assert_equal(job.get_last_gathered_object(), ob3)
assert_equal(job.get_last_action_time(), ob3.gathered)
2020-10-01 21:48:59 +02:00
def run(self, timeout, source, job):
""" Run the havester_job_run and return the errors """
# check timeout
context = {'model': model, 'session': model.Session,
'ignore_auth': True, 'user': ''}
data_dict = {
'guid': 'guid',
'content': 'content',
'job_id': job.id,
'source_id': source.id
}
# prepare the job to run
job.gather_finished = datetime.utcnow()
job.save()
# run (we expect a timeout)
config['ckan.harvest.timeout'] = timeout
harvest_jobs_run_action = get_action('harvest_jobs_run')
harvest_jobs_run_action(context, data_dict)
2021-03-05 07:49:10 +01:00
2020-10-01 21:48:59 +02:00
return job.get_gather_errors()
def get_source(self):
SOURCE_DICT = {
"url": "http://test.timeout.com",
"name": "test-source-timeout",
"title": "Test source timeout",
"notes": "Notes source timeout",
"source_type": "test-for-action",
"frequency": "MANUAL"
}
source = harvest_factories.HarvestSourceObj(**SOURCE_DICT)
try:
job = harvest_factories.HarvestJobObj(source=source)
2021-03-05 07:59:53 +01:00
except HarvestJobExists: # not sure why
2020-10-01 21:48:59 +02:00
job = source.get_jobs()[0]
2021-03-05 07:49:10 +01:00
2020-10-01 21:48:59 +02:00
job.status = 'Running'
job.save()
2021-03-05 07:59:53 +01:00
2020-10-01 21:48:59 +02:00
jobs = source.get_jobs(status='Running')
assert_in(job, jobs)
return source, job
2021-03-05 07:49:10 +01:00
def add_object(self, job, source, state, minutes_ago=0):
2020-10-01 21:48:59 +02:00
now = datetime.utcnow()
self.dataset_counter += 1
name = 'dataset-{}-{}'.format(state.lower(), self.dataset_counter)
2020-10-01 21:48:59 +02:00
dataset = ckan_factories.Dataset(name=name)
obj = harvest_factories.HarvestObjectObj(
job=job,
source=source,
package_id=dataset['id'],
guid=dataset['id'],
content='{}',
# always is WAITING state=state,
)
obj.state = state
if minutes_ago > 0:
obj.import_finished = now - timedelta(minutes=minutes_ago)
2020-10-01 21:48:59 +02:00
obj.save()
return obj