Load config in the CKAN harvester

This commit is contained in:
Adrià Mercader 2011-06-07 13:35:11 +01:00
parent 6e75d362e3
commit 98bfd50f47
1 changed files with 18 additions and 0 deletions

View File

@ -18,6 +18,7 @@ class CKANHarvester(HarvesterBase):
''' '''
A Harvester for CKAN instances A Harvester for CKAN instances
''' '''
config = None
#TODO: check different API versions #TODO: check different API versions
api_version = '2' api_version = '2'
@ -40,6 +41,13 @@ class CKANHarvester(HarvesterBase):
except Exception, e: except Exception, e:
raise e raise e
def _set_config(self,config_str):
if config_str:
self.config = json.loads(config_str)
log.debug('Using config: %r', self.config)
else:
self.config = {}
def info(self): def info(self):
return { return {
'name': 'ckan', 'name': 'ckan',
@ -62,6 +70,9 @@ class CKANHarvester(HarvesterBase):
get_all_packages = True get_all_packages = True
package_ids = [] package_ids = []
if not self.config:
self._set_config(harvest_job.source.config)
# Check if this source has been harvested before # Check if this source has been harvested before
previous_job = Session.query(HarvestJob) \ previous_job = Session.query(HarvestJob) \
.filter(HarvestJob.source==harvest_job.source) \ .filter(HarvestJob.source==harvest_job.source) \
@ -145,6 +156,10 @@ class CKANHarvester(HarvesterBase):
def fetch_stage(self,harvest_object): def fetch_stage(self,harvest_object):
log.debug('In CKANHarvester fetch_stage') log.debug('In CKANHarvester fetch_stage')
if not self.config:
self._set_config(harvest_object.job.source.config)
# Get source URL # Get source URL
url = harvest_object.source.url.rstrip('/') url = harvest_object.source.url.rstrip('/')
url = url + self._get_rest_api_offset() + '/package/' + harvest_object.guid url = url + self._get_rest_api_offset() + '/package/' + harvest_object.guid
@ -173,6 +188,9 @@ class CKANHarvester(HarvesterBase):
harvest_object, 'Import') harvest_object, 'Import')
return False return False
if not self.config:
self._set_config(harvest_object.job.source.config)
try: try:
package_dict = json.loads(harvest_object.content) package_dict = json.loads(harvest_object.content)
return self._create_or_update_package(package_dict,harvest_object) return self._create_or_update_package(package_dict,harvest_object)