diff --git a/README.rst b/README.rst
index 0f91ab4..0a6466b 100644
--- a/README.rst
+++ b/README.rst
@@ -278,7 +278,8 @@ following methods::
- performing any necessary action with the fetched object (e.g
create a CKAN package).
Note: if this stage creates or updates a package, a reference
- to the package should be added to the HarvestObject.
+ to the package must be added to the HarvestObject.
+ Additionally, the HarvestObject must be flagged as current.
- creating the HarvestObject - Package relation (if necessary)
- creating and storing any suitable HarvestObjectErrors that may
occur.
diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py
index 1c9c673..46fd36e 100644
--- a/ckanext/harvest/commands/harvester.py
+++ b/ckanext/harvest/commands/harvester.py
@@ -216,7 +216,8 @@ class Harvester(CkanCommand):
source_id = unicode(self.args[1])
else:
source_id = None
- import_last_objects(source_id)
+ objs = import_last_objects(source_id)
+ print '%s objects reimported' % len(objs)
def create_harvest_job_all(self):
jobs = create_harvest_job_all()
diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py
index b146c88..2589159 100644
--- a/ckanext/harvest/controllers/view.py
+++ b/ckanext/harvest/controllers/view.py
@@ -42,7 +42,7 @@ class ViewController(BaseController):
errors = errors or {}
error_summary = error_summary or {}
vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
-
+
c.form = render('source/new_source_form.html', extra_vars=vars)
return render('source/new.html')
@@ -80,9 +80,9 @@ class ViewController(BaseController):
data = data or old_data
errors = errors or {}
error_summary = error_summary or {}
- #TODO: Use new description interface to build the types select and descriptions
+
vars = {'data': data, 'errors': errors, 'error_summary': error_summary, 'harvesters': get_registered_harvesters_info()}
-
+
c.form = render('source/new_source_form.html', extra_vars=vars)
return render('source/edit.html')
diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py
index 3f01f6b..865a06f 100644
--- a/ckanext/harvest/harvesters/base.py
+++ b/ckanext/harvest/harvesters/base.py
@@ -1,6 +1,8 @@
import logging
import re
+from sqlalchemy.sql import update,and_, bindparam
+
from ckan import model
from ckan.model import Session, Package
from ckan.logic import ValidationError, NotFound, get_action
@@ -145,10 +147,8 @@ class HarvesterBase(SingletonPlugin):
log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid)
# Update package
context.update({'id':package_dict['id']})
- updated_package = get_action('package_update_rest')(context, package_dict)
+ new_package = get_action('package_update_rest')(context, package_dict)
- harvest_object.package_id = updated_package['id']
- harvest_object.save()
else:
log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid)
@@ -161,7 +161,21 @@ class HarvesterBase(SingletonPlugin):
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
new_package = get_action('package_create_rest')(context, package_dict)
harvest_object.package_id = new_package['id']
- harvest_object.save()
+
+ # Flag the other objects linking to this package as not current anymore
+ from ckanext.harvest.model import harvest_object_table
+ conn = Session.connection()
+ u = update(harvest_object_table) \
+ .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \
+ .values(current=False)
+ conn.execute(u, b_package_id=new_package['id'])
+ Session.commit()
+
+ # Flag this as the current harvest object
+
+ harvest_object.package_id = new_package['id']
+ harvest_object.current = True
+ harvest_object.save()
return True
diff --git a/ckanext/harvest/lib/__init__.py b/ckanext/harvest/lib/__init__.py
index d1da001..aafe09f 100644
--- a/ckanext/harvest/lib/__init__.py
+++ b/ckanext/harvest/lib/__init__.py
@@ -87,8 +87,9 @@ def _get_source_status(source, detailed=True):
# Overall statistics
packages = Session.query(distinct(HarvestObject.package_id),Package.name) \
- .join(Package).join(HarvestJob).join(HarvestSource) \
- .filter(HarvestJob.source==source) \
+ .join(Package).join(HarvestSource) \
+ .filter(HarvestObject.source==source) \
+ .filter(HarvestObject.current==True) \
.filter(Package.state==u'active')
out['overall_statistics']['added'] = packages.count()
@@ -110,8 +111,6 @@ def _get_source_status(source, detailed=True):
return out
-
-
def _source_as_dict(source, detailed=True):
out = source.as_dict()
out['jobs'] = []
@@ -153,42 +152,6 @@ def _object_as_dict(obj):
return out
-def _url_exists(url):
- new_url = _normalize_url(url)
-
- existing_sources = get_harvest_sources()
-
- for existing_source in existing_sources:
- existing_url = _normalize_url(existing_source['url'])
- if existing_url == new_url and existing_source['active'] == True:
- return existing_source
- return False
-
-def _normalize_url(url):
- o = urlparse.urlparse(url)
-
- # Normalize port
- if ':' in o.netloc:
- parts = o.netloc.split(':')
- if (o.scheme == 'http' and parts[1] == '80') or \
- (o.scheme == 'https' and parts[1] == '443'):
- netloc = parts[0]
- else:
- netloc = ':'.join(parts)
- else:
- netloc = o.netloc
-
- # Remove trailing slash
- path = o.path.rstrip('/')
-
- check_url = urlparse.urlunparse((
- o.scheme,
- netloc,
- path,
- None,None,None))
-
- return check_url
-
def _prettify(field_name):
field_name = re.sub('(?Harvesting Sources
+