From 2c41293c9cd5826b5a9a41abfde7a4c52b2c1b5a Mon Sep 17 00:00:00 2001 From: Mark Winterbottom Date: Thu, 29 Oct 2015 18:30:51 +0000 Subject: [PATCH] Updated the validator to check for unique sets as well as URL. --- ckanext/harvest/logic/validators.py | 32 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/ckanext/harvest/logic/validators.py b/ckanext/harvest/logic/validators.py index 369caa0..65a11ce 100644 --- a/ckanext/harvest/logic/validators.py +++ b/ckanext/harvest/logic/validators.py @@ -60,6 +60,11 @@ def _normalize_url(url): def harvest_source_url_validator(key, data, errors, context): + """Validate the provided harvest source URL. + + Checks that the URL is not already existing with the same config. + """ + package = context.get("package") if package: @@ -67,21 +72,36 @@ def harvest_source_url_validator(key, data, errors, context): else: package_id = data.get(key[:-1] + ("id",)) - new_url = _normalize_url(data[key]) - # pkg_id = data.get(('id',),'') + try: + new_config = data.get(key[:-1] + ('config',)) + new_config_dict = json.loads(new_config) + new_config_set = new_config_dict.get('set', None) + except: + new_config_set = None - q = model.Session.query(model.Package.url, model.Package.state) \ + new_url = _normalize_url(data[key]) + + # q = model.Session.query(model.Package.url, model.Package.state) \ + q = model.Session.query(HarvestSource.url, HarvestSource.config) \ .filter(model.Package.type == DATASET_TYPE_NAME) if package_id: - # When editing a source we need to avoid its own URL + # When editing a source we need to avoid its own URL. q = q.filter(model.Package.id != package_id) existing_sources = q.all() - for url, state in existing_sources: + for url, conf in existing_sources: url = _normalize_url(url) - if url == new_url: + try: + config_dict = json.loads(conf) + config_set = config_dict.get('set', None) + except: + config_set = None + + if url == new_url and config_set == new_config_set: + # You can have a duplicate URL if it's pointing to a unique + # set as it will be harvesting unique datasets. raise Invalid( 'There already is a Harvest Source for this URL: %s' % data[key]