Merge branch '2.0-dataset-sources' into source_extra_config_validation
This commit is contained in:
commit
23aa45cc71
|
@ -251,7 +251,7 @@ class Harvester(CkanCommand):
|
||||||
|
|
||||||
self.print_harvest_job(job)
|
self.print_harvest_job(job)
|
||||||
jobs = get_action('harvest_job_list')(context,{'status':u'New'})
|
jobs = get_action('harvest_job_list')(context,{'status':u'New'})
|
||||||
self.print_there_are('harvest jobs', jobs, condition=u'New')
|
self.print_there_are('harvest job', jobs, condition=u'New')
|
||||||
|
|
||||||
def list_harvest_jobs(self):
|
def list_harvest_jobs(self):
|
||||||
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
|
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
|
||||||
|
@ -319,11 +319,10 @@ class Harvester(CkanCommand):
|
||||||
print ' Job id: %s' % job['id']
|
print ' Job id: %s' % job['id']
|
||||||
print ' status: %s' % job['status']
|
print ' status: %s' % job['status']
|
||||||
print ' source: %s' % job['source_id']
|
print ' source: %s' % job['source_id']
|
||||||
print ' objects: %s' % len(job['objects'])
|
print ' objects: %s' % len(job.get('objects', []))
|
||||||
|
|
||||||
print 'gather_errors: %s' % len(job['gather_errors'])
|
print 'gather_errors: %s' % len(job.get('gather_errors', []))
|
||||||
if (len(job['gather_errors']) > 0):
|
for error in job.get('gather_errors', []):
|
||||||
for error in job['gather_errors']:
|
|
||||||
print ' %s' % error['message']
|
print ' %s' % error['message']
|
||||||
|
|
||||||
print ''
|
print ''
|
||||||
|
|
|
@ -265,6 +265,9 @@ class ViewController(BaseController):
|
||||||
|
|
||||||
etree.fromstring(re.sub('<\?xml(.*)\?>','',content))
|
etree.fromstring(re.sub('<\?xml(.*)\?>','',content))
|
||||||
response.content_type = 'application/xml; charset=utf-8'
|
response.content_type = 'application/xml; charset=utf-8'
|
||||||
|
if not '<?xml' in content.split('\n')[0]:
|
||||||
|
content = u'<?xml version="1.0" encoding="UTF-8"?>\n' + content
|
||||||
|
|
||||||
except XMLSyntaxError:
|
except XMLSyntaxError:
|
||||||
try:
|
try:
|
||||||
json.loads(obj['content'])
|
json.loads(obj['content'])
|
||||||
|
@ -274,7 +277,7 @@ class ViewController(BaseController):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
response.headers['Content-Length'] = len(content)
|
response.headers['Content-Length'] = len(content)
|
||||||
return content
|
return content.encode('utf-8')
|
||||||
except NotFound:
|
except NotFound:
|
||||||
abort(404,_('Harvest object not found'))
|
abort(404,_('Harvest object not found'))
|
||||||
except NotAuthorized,e:
|
except NotAuthorized,e:
|
||||||
|
|
|
@ -5,6 +5,7 @@ import uuid
|
||||||
from sqlalchemy.sql import update,and_, bindparam
|
from sqlalchemy.sql import update,and_, bindparam
|
||||||
from sqlalchemy.exc import InvalidRequestError
|
from sqlalchemy.exc import InvalidRequestError
|
||||||
|
|
||||||
|
from ckan import plugins as p
|
||||||
from ckan import model
|
from ckan import model
|
||||||
from ckan.model import Session, Package
|
from ckan.model import Session, Package
|
||||||
from ckan.logic import ValidationError, NotFound, get_action
|
from ckan.logic import ValidationError, NotFound, get_action
|
||||||
|
@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin):
|
||||||
package_dict['name'] = self._gen_new_name(package_dict['title'])
|
package_dict['name'] = self._gen_new_name(package_dict['title'])
|
||||||
|
|
||||||
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
|
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
|
||||||
|
harvest_object.current = True
|
||||||
|
harvest_object.package_id = package_dict['id']
|
||||||
|
# Defer constraints and flush so the dataset can be indexed with
|
||||||
|
# the harvest object id (on the after_show hook from the harvester
|
||||||
|
# plugin)
|
||||||
|
harvest_object.add()
|
||||||
|
|
||||||
|
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
||||||
|
model.Session.flush()
|
||||||
|
|
||||||
new_package = get_action('package_create_rest')(context, package_dict)
|
new_package = get_action('package_create_rest')(context, package_dict)
|
||||||
harvest_object.package_id = new_package['id']
|
|
||||||
|
|
||||||
# Flag the other objects linking to this package as not current anymore
|
# Flag the other objects linking to this package as not current anymore
|
||||||
from ckanext.harvest.model import harvest_object_table
|
from ckanext.harvest.model import harvest_object_table
|
||||||
|
|
|
@ -35,15 +35,12 @@ class CKANHarvester(HarvesterBase):
|
||||||
url = url,
|
url = url,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
api_key = self.config.get('api_key',None)
|
api_key = self.config.get('api_key',None)
|
||||||
if api_key:
|
if api_key:
|
||||||
http_request.add_header('Authorization',api_key)
|
http_request.add_header('Authorization',api_key)
|
||||||
http_response = urllib2.urlopen(http_request)
|
http_response = urllib2.urlopen(http_request)
|
||||||
|
|
||||||
return http_response.read()
|
return http_response.read()
|
||||||
except Exception, e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def _set_config(self,config_str):
|
def _set_config(self,config_str):
|
||||||
if config_str:
|
if config_str:
|
||||||
|
@ -135,7 +132,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
get_all_packages = False
|
get_all_packages = False
|
||||||
|
|
||||||
# Request only the packages modified since last harvest job
|
# Request only the packages modified since last harvest job
|
||||||
last_time = harvest_job.gather_started.isoformat()
|
last_time = previous_job.gather_finished.isoformat()
|
||||||
url = base_search_url + '/revision?since_time=%s' % last_time
|
url = base_search_url + '/revision?since_time=%s' % last_time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -152,7 +149,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
revision = json.loads(content)
|
revision = json.loads(content)
|
||||||
for package_id in revision.packages:
|
for package_id in revision['packages']:
|
||||||
if not package_id in package_ids:
|
if not package_id in package_ids:
|
||||||
package_ids.append(package_id)
|
package_ids.append(package_id)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -143,6 +143,7 @@ def gather_callback(channel, method, header, body):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
log.error('No harvest job id received')
|
log.error('No harvest job id received')
|
||||||
finally:
|
finally:
|
||||||
|
model.Session.remove()
|
||||||
channel.basic_ack(method.delivery_tag)
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,7 +215,7 @@ def fetch_callback(channel, method, header, body):
|
||||||
else:
|
else:
|
||||||
obj.report_status = 'added'
|
obj.report_status = 'added'
|
||||||
obj.save()
|
obj.save()
|
||||||
|
model.Session.remove()
|
||||||
channel.basic_ack(method.delivery_tag)
|
channel.basic_ack(method.delivery_tag)
|
||||||
|
|
||||||
def get_gather_consumer():
|
def get_gather_consumer():
|
||||||
|
|
Loading…
Reference in New Issue