diff --git a/README.rst b/README.rst index 0f91ab4..0a6466b 100644 --- a/README.rst +++ b/README.rst @@ -278,7 +278,8 @@ following methods:: - performing any necessary action with the fetched object (e.g create a CKAN package). Note: if this stage creates or updates a package, a reference - to the package should be added to the HarvestObject. + to the package must be added to the HarvestObject. + Additionally, the HarvestObject must be flagged as current. - creating the HarvestObject - Package relation (if necessary) - creating and storing any suitable HarvestObjectErrors that may occur. diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 1c9c673..46fd36e 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -216,7 +216,8 @@ class Harvester(CkanCommand): source_id = unicode(self.args[1]) else: source_id = None - import_last_objects(source_id) + objs = import_last_objects(source_id) + print '%s objects reimported' % len(objs) def create_harvest_job_all(self): jobs = create_harvest_job_all() diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index b146c88..2589159 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -42,7 +42,7 @@ class ViewController(BaseController): errors = errors or {} error_summary = error_summary or {} vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()} - + c.form = render('source/new_source_form.html', extra_vars=vars) return render('source/new.html') @@ -80,9 +80,9 @@ class ViewController(BaseController): data = data or old_data errors = errors or {} error_summary = error_summary or {} - #TODO: Use new description interface to build the types select and descriptions + vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()} - + c.form = render('source/new_source_form.html', extra_vars=vars) return render('source/edit.html') diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 3f01f6b..865a06f 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -1,6 +1,8 @@ import logging import re +from sqlalchemy.sql import update,and_, bindparam + from ckan import model from ckan.model import Session, Package from ckan.logic import ValidationError, NotFound, get_action @@ -145,10 +147,8 @@ class HarvesterBase(SingletonPlugin): log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid) # Update package context.update({'id':package_dict['id']}) - updated_package = get_action('package_update_rest')(context, package_dict) + new_package = get_action('package_update_rest')(context, package_dict) - harvest_object.package_id = updated_package['id'] - harvest_object.save() else: log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid) @@ -161,7 +161,21 @@ class HarvesterBase(SingletonPlugin): log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) new_package = get_action('package_create_rest')(context, package_dict) harvest_object.package_id = new_package['id'] - harvest_object.save() + + # Flag the other objects linking to this package as not current anymore + from ckanext.harvest.model import harvest_object_table + conn = Session.connection() + u = update(harvest_object_table) \ + .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \ + .values(current=False) + conn.execute(u, b_package_id=new_package['id']) + Session.commit() + + # Flag this as the current harvest object + + harvest_object.package_id = new_package['id'] + harvest_object.current = True + harvest_object.save() return True diff --git a/ckanext/harvest/lib/__init__.py b/ckanext/harvest/lib/__init__.py index d1da001..aafe09f 100644 --- a/ckanext/harvest/lib/__init__.py +++ b/ckanext/harvest/lib/__init__.py @@ -87,8 +87,9 @@ def _get_source_status(source, detailed=True): # Overall statistics packages = Session.query(distinct(HarvestObject.package_id),Package.name) \ - .join(Package).join(HarvestJob).join(HarvestSource) \ - .filter(HarvestJob.source==source) \ + .join(Package).join(HarvestSource) \ + .filter(HarvestObject.source==source) \ + .filter(HarvestObject.current==True) \ .filter(Package.state==u'active') out['overall_statistics']['added'] = packages.count() @@ -110,8 +111,6 @@ def _get_source_status(source, detailed=True): return out - - def _source_as_dict(source, detailed=True): out = source.as_dict() out['jobs'] = [] @@ -153,42 +152,6 @@ def _object_as_dict(obj): return out -def _url_exists(url): - new_url = _normalize_url(url) - - existing_sources = get_harvest_sources() - - for existing_source in existing_sources: - existing_url = _normalize_url(existing_source['url']) - if existing_url == new_url and existing_source['active'] == True: - return existing_source - return False - -def _normalize_url(url): - o = urlparse.urlparse(url) - - # Normalize port - if ':' in o.netloc: - parts = o.netloc.split(':') - if (o.scheme == 'http' and parts[1] == '80') or \ - (o.scheme == 'https' and parts[1] == '443'): - netloc = parts[0] - else: - netloc = ':'.join(parts) - else: - netloc = o.netloc - - # Remove trailing slash - path = o.path.rstrip('/') - - check_url = urlparse.urlunparse(( - o.scheme, - netloc, - path, - None,None,None)) - - return check_url - def _prettify(field_name): field_name = re.sub('(?Harvesting Sources + harvest + diff --git a/ckanext/harvest/templates/source/new_source_form.html b/ckanext/harvest/templates/source/new_source_form.html index 72c94e8..d3c5adb 100644 --- a/ckanext/harvest/templates/source/new_source_form.html +++ b/ckanext/harvest/templates/source/new_source_form.html @@ -35,11 +35,32 @@ + +
+
+
${errors.get('title', '')}
+
This will be shown as the datasets source.
+
You can add your own notes here about what the URL above represents to remind you later.
+ +
+
+ + +
This harvest source is Active
+
+ +
This harvest source is Inactive
+
+ +
or Return to the harvest sources list diff --git a/ckanext/harvest/templates/source/read.html b/ckanext/harvest/templates/source/read.html index dc01327..3ca8348 100644 --- a/ckanext/harvest/templates/source/read.html +++ b/ckanext/harvest/templates/source/read.html @@ -33,6 +33,11 @@ Active ${c.source.active} + + Title + ${c.source.title} + + Description ${c.source.description}