diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 23ab27f..835d1d9 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -18,6 +18,7 @@ class CKANHarvester(HarvesterBase): ''' A Harvester for CKAN instances ''' + config = None #TODO: check different API versions api_version = '2' @@ -40,6 +41,13 @@ class CKANHarvester(HarvesterBase): except Exception, e: raise e + def _set_config(self,config_str): + if config_str: + self.config = json.loads(config_str) + log.debug('Using config: %r', self.config) + else: + self.config = {} + def info(self): return { 'name': 'ckan', @@ -62,6 +70,9 @@ class CKANHarvester(HarvesterBase): get_all_packages = True package_ids = [] + if not self.config: + self._set_config(harvest_job.source.config) + # Check if this source has been harvested before previous_job = Session.query(HarvestJob) \ .filter(HarvestJob.source==harvest_job.source) \ @@ -145,6 +156,10 @@ class CKANHarvester(HarvesterBase): def fetch_stage(self,harvest_object): log.debug('In CKANHarvester fetch_stage') + + if not self.config: + self._set_config(harvest_object.job.source.config) + # Get source URL url = harvest_object.source.url.rstrip('/') url = url + self._get_rest_api_offset() + '/package/' + harvest_object.guid @@ -173,6 +188,9 @@ class CKANHarvester(HarvesterBase): harvest_object, 'Import') return False + if not self.config: + self._set_config(harvest_object.job.source.config) + try: package_dict = json.loads(harvest_object.content) return self._create_or_update_package(package_dict,harvest_object)