Merge branch '2.0-dataset-sources' into 7-harvest-source-templates

This commit is contained in:
amercader 2013-03-01 17:55:16 +00:00
commit 574c69fa9c
4 changed files with 26 additions and 16 deletions

View File

@ -251,7 +251,7 @@ class Harvester(CkanCommand):
self.print_harvest_job(job) self.print_harvest_job(job)
jobs = get_action('harvest_job_list')(context,{'status':u'New'}) jobs = get_action('harvest_job_list')(context,{'status':u'New'})
self.print_there_are('harvest jobs', jobs, condition=u'New') self.print_there_are('harvest job', jobs, condition=u'New')
def list_harvest_jobs(self): def list_harvest_jobs(self):
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
@ -319,11 +319,11 @@ class Harvester(CkanCommand):
print ' Job id: %s' % job['id'] print ' Job id: %s' % job['id']
print ' status: %s' % job['status'] print ' status: %s' % job['status']
print ' source: %s' % job['source_id'] print ' source: %s' % job['source_id']
print ' objects: %s' % len(job['objects']) print ' objects: %s' % len(job.get('objects', []))
print 'gather_errors: %s' % len(job['gather_errors']) print 'gather_errors: %s' % len(job.get('gather_errors', []))
if (len(job['gather_errors']) > 0): if (len(job['gather_errors']) > 0):
for error in job['gather_errors']: for error in job.get('gather_errors', []):
print ' %s' % error['message'] print ' %s' % error['message']
print '' print ''

View File

@ -265,6 +265,9 @@ class ViewController(BaseController):
etree.fromstring(re.sub('<\?xml(.*)\?>','',content)) etree.fromstring(re.sub('<\?xml(.*)\?>','',content))
response.content_type = 'application/xml; charset=utf-8' response.content_type = 'application/xml; charset=utf-8'
if not '<?xml' in content.split('\n')[0]:
content = u'<?xml version="1.0" encoding="UTF-8"?>\n' + content
except XMLSyntaxError: except XMLSyntaxError:
try: try:
json.loads(obj['content']) json.loads(obj['content'])
@ -274,7 +277,7 @@ class ViewController(BaseController):
pass pass
response.headers['Content-Length'] = len(content) response.headers['Content-Length'] = len(content)
return content return content.encode('utf-8')
except NotFound: except NotFound:
abort(404,_('Harvest object not found')) abort(404,_('Harvest object not found'))
except NotAuthorized,e: except NotAuthorized,e:

View File

@ -5,6 +5,7 @@ import uuid
from sqlalchemy.sql import update,and_, bindparam from sqlalchemy.sql import update,and_, bindparam
from sqlalchemy.exc import InvalidRequestError from sqlalchemy.exc import InvalidRequestError
from ckan import plugins as p
from ckan import model from ckan import model
from ckan.model import Session, Package from ckan.model import Session, Package
from ckan.logic import ValidationError, NotFound, get_action from ckan.logic import ValidationError, NotFound, get_action
@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin):
package_dict['name'] = self._gen_new_name(package_dict['title']) package_dict['name'] = self._gen_new_name(package_dict['title'])
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
harvest_object.current = True
harvest_object.package_id = package_dict['id']
# Defer constraints and flush so the dataset can be indexed with
# the harvest object id (on the after_show hook from the harvester
# plugin)
harvest_object.add()
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
model.Session.flush()
new_package = get_action('package_create_rest')(context, package_dict) new_package = get_action('package_create_rest')(context, package_dict)
harvest_object.package_id = new_package['id']
# Flag the other objects linking to this package as not current anymore # Flag the other objects linking to this package as not current anymore
from ckanext.harvest.model import harvest_object_table from ckanext.harvest.model import harvest_object_table

View File

@ -35,15 +35,12 @@ class CKANHarvester(HarvesterBase):
url = url, url = url,
) )
try:
api_key = self.config.get('api_key',None) api_key = self.config.get('api_key',None)
if api_key: if api_key:
http_request.add_header('Authorization',api_key) http_request.add_header('Authorization',api_key)
http_response = urllib2.urlopen(http_request) http_response = urllib2.urlopen(http_request)
return http_response.read() return http_response.read()
except Exception, e:
raise e
def _set_config(self,config_str): def _set_config(self,config_str):
if config_str: if config_str:
@ -135,7 +132,7 @@ class CKANHarvester(HarvesterBase):
get_all_packages = False get_all_packages = False
# Request only the packages modified since last harvest job # Request only the packages modified since last harvest job
last_time = harvest_job.gather_started.isoformat() last_time = previous_job.gather_finished.isoformat()
url = base_search_url + '/revision?since_time=%s' % last_time url = base_search_url + '/revision?since_time=%s' % last_time
try: try:
@ -152,7 +149,7 @@ class CKANHarvester(HarvesterBase):
continue continue
revision = json.loads(content) revision = json.loads(content)
for package_id in revision.packages: for package_id in revision['packages']:
if not package_id in package_ids: if not package_id in package_ids:
package_ids.append(package_id) package_ids.append(package_id)
else: else: