Allow filtering by organisation for harvest_source_list

This commit allows users of the harvest_source_list endpoint to
return harvest sources by a specific organisation by using a query param
`organization_id`. Also this endpoint won't
return more than 100 harvest sources as we were having timing out issues
due to the amount being previously returned (500+).

I've also removed the check_access method as this currently wasn't doing
anything as you didn't need to be authorized to access this endpoint.

Trello:
https://trello.com/c/iAWEYNOv/1946-5-dgu-add-ability-for-harvestsourcelist-to-filter-by-organisation%F0%9F%8D%90

Zendesk:
https://govuk.zendesk.com/agent/tickets/4034244
This commit is contained in:
Peter Hartshorn 2020-05-19 16:30:39 +01:00
parent 19da5c5768
commit fa5987625c
2 changed files with 102 additions and 5 deletions

View File

@ -1,11 +1,13 @@
import logging
from ckan.lib.base import config
from sqlalchemy import or_
from ckan.model import User
from ckan.model import User, Package
import datetime
from ckan import logic
from ckan.plugins import PluginImplementations
from ckanext.harvest.interfaces import IHarvester
from ckan.common import request
import ckan.plugins as p
from ckan.logic import NotFound, check_access, side_effect_free
@ -124,9 +126,10 @@ def harvest_source_list(context, data_dict):
TODO: Use package search
'''
check_access('harvest_source_list', context, data_dict)
organization_id = request.params.get('organization_id')
limit = config.get('ckan.harvest.harvest_source_limit', 100)
sources = _get_sources_for_user(context, data_dict)
sources = _get_sources_for_user(context, data_dict, organization_id=organization_id, limit=limit)
last_job_status = p.toolkit.asbool(data_dict.get('return_last_job_status', False))
@ -361,7 +364,7 @@ def harvest_log_list(context, data_dict):
return out
def _get_sources_for_user(context, data_dict):
def _get_sources_for_user(context, data_dict, organization_id=None, limit=None):
session = context['session']
user = context.get('user', '')
@ -372,6 +375,11 @@ def _get_sources_for_user(context, data_dict):
query = session.query(HarvestSource) \
.order_by(HarvestSource.created.desc())
if organization_id:
query = query.join(
Package, HarvestSource.id == Package.id
).filter(Package.owner_org == organization_id)
if only_active:
query = query.filter(
HarvestSource.active == True # noqa: E712
@ -406,6 +414,6 @@ def _get_sources_for_user(context, data_dict):
log.debug('User %s with publishers %r has Harvest Sources: %r',
user, publishers_for_the_user, [(hs.id, hs.url) for hs in query])
sources = query.all()
sources = query.limit(limit).all() if limit else query.all()
return sources

View File

@ -11,6 +11,7 @@ from ckantoolkit.tests.helpers import _get_test_app, reset_db, FunctionalTestBas
from ckan import plugins as p
from ckan.plugins import toolkit
from ckan import model
from ckan.lib.base import config
from ckanext.harvest.interfaces import IHarvester
import ckanext.harvest.model as harvest_model
@ -258,6 +259,94 @@ class TestHarvestSourceActionCreate(HarvestSourceActionBase):
assert 'url' in result
assert u'There already is a Harvest Source for this URL' in result['url'][0]
class TestHarvestSourceActionList(FunctionalTestBase):
def test_list_with_organization(self):
organization = ckan_factories.Organization.create()
harvest_data = {
"owner_org": organization["id"],
"type": "harvest",
"url": "https://www.gov.uk/random",
"source_type": "test-nose"
}
other_harvest_data = {
"type": "harvest",
"url": "https://www.gov.uk/other-path",
"source_type": "test-nose"
}
harvest = ckan_factories.Dataset.create(**harvest_data)
harvest_source = factories.HarvestSource.create(id=harvest["id"])
other_harvest = ckan_factories.Dataset.create(**other_harvest_data)
other_harvest_source = factories.HarvestSource.create(id=other_harvest["id"])
app = _get_test_app()
response = app.get('/api/action/{0}'.format('harvest_source_list'),
params={"organization_id":organization["id"]},
status=200)
results = response.json['result']
result_harvest = model.Session.query(model.Package).get(results[0]["id"])
result_organization_id = result_harvest.owner_org
assert response.json['success'] is True
assert 1 is len(results)
assert_equal(organization["id"], result_organization_id)
def test_list_without_organization(self):
harvest_data = {
"type": "harvest",
"url": "https://www.gov.uk/random",
"source_type": "test-nose"
}
other_harvest_data = {
"type": "harvest",
"url": "https://www.gov.uk/other-path",
"source_type": "test-nose"
}
harvest_source = factories.HarvestSource.create()
other_harvest_source = factories.HarvestSource.create()
app = _get_test_app()
response = app.get('/api/action/{0}'.format('harvest_source_list'), status=200)
results = response.json['result']
assert response.json['success'] is True
assert 2 is len(results)
@patch.dict('ckanext.harvest.logic.action.get.config',
{'ckan.harvest.harvest_source_limit': 1})
def test_list_with_limit(self):
harvest_data = {
"type": "harvest",
"url": "https://www.gov.uk/random",
"source_type": "test-nose"
}
other_harvest_data = {
"type": "harvest",
"url": "https://www.gov.uk/other-path",
"source_type": "test-nose"
}
harvest_source = factories.HarvestSource.create()
other_harvest_source = factories.HarvestSource.create()
app = _get_test_app()
response = app.get('/api/action/{0}'.format('harvest_source_list'), status=200)
results = response.json['result']
assert response.json['success'] is True
assert 1 is len(results)
class HarvestSourceFixtureMixin(object):
def _get_source_dict(self):