From 85a013f2c91ee2a0e3ecbe0d8717b7fb2efe6877 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Fri, 31 May 2013 14:56:53 +0200 Subject: [PATCH 1/2] [#42] Remove non-string extras from packages Remove extras whose values are not strings (e.g. dicts, lists..) from packages before attempting to create or update the packages on the target site. In CKAN 1 it was possible for the values of extras to be other types, but in CKAN 2 they must be strings, so when harvesting from a CKAN 1 site into a CKAN 2 site SQLAlchemy would crash when trying to create packages with non-string extras. The fix in this commit is to simply remove any non-string extras from the harvested package. (Alternatively, we could try to convert them to a string using JSON.) Fixes #42. --- ckanext/harvest/harvesters/ckanharvester.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 9ba05db..9fc41b3 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -254,6 +254,12 @@ class CKANHarvester(HarvesterBase): package_dict['groups'] = [] package_dict['groups'].extend([g for g in default_groups if g not in package_dict['groups']]) + # Delete any extras whose values are not strings, as these cause + # errors from CKAN when trying to create/update the package. + for key in package_dict['extras'].keys(): + if not isinstance(package_dict['extras'][key], basestring): + del package_dict['extras'][key] + # Set default extras if needed default_extras = self.config.get('default_extras',{}) if default_extras: From 01df3a1db48703e1f046f2ce17184a49c44b2fe1 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Fri, 31 May 2013 20:35:06 +0200 Subject: [PATCH 2/2] [#42] Dump non-string extras with json Convert any non-string extra values to strings using json.dumps(), instead of just deleting them. --- ckanext/harvest/harvesters/ckanharvester.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 9fc41b3..568602e 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -254,11 +254,17 @@ class CKANHarvester(HarvesterBase): package_dict['groups'] = [] package_dict['groups'].extend([g for g in default_groups if g not in package_dict['groups']]) - # Delete any extras whose values are not strings, as these cause - # errors from CKAN when trying to create/update the package. + # Find any extras whose values are not strings and try to convert + # them to strings, as non-string extras are not allowed anymore in + # CKAN 2.0. for key in package_dict['extras'].keys(): if not isinstance(package_dict['extras'][key], basestring): - del package_dict['extras'][key] + try: + package_dict['extras'][key] = json.dumps( + package_dict['extras'][key]) + except TypeError: + # If converting to a string fails, just delete it. + del package_dict['extras'][key] # Set default extras if needed default_extras = self.config.get('default_extras',{})