Merge branch '2.0-dataset-sources' into 7-harvest-source-templates

This commit is contained in:
amercader 2013-03-01 17:55:16 +00:00
commit 574c69fa9c
4 changed files with 26 additions and 16 deletions

View File

@ -251,7 +251,7 @@ class Harvester(CkanCommand):
self.print_harvest_job(job)
jobs = get_action('harvest_job_list')(context,{'status':u'New'})
self.print_there_are('harvest jobs', jobs, condition=u'New')
self.print_there_are('harvest job', jobs, condition=u'New')
def list_harvest_jobs(self):
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
@ -319,11 +319,11 @@ class Harvester(CkanCommand):
print ' Job id: %s' % job['id']
print ' status: %s' % job['status']
print ' source: %s' % job['source_id']
print ' objects: %s' % len(job['objects'])
print ' objects: %s' % len(job.get('objects', []))
print 'gather_errors: %s' % len(job['gather_errors'])
print 'gather_errors: %s' % len(job.get('gather_errors', []))
if (len(job['gather_errors']) > 0):
for error in job['gather_errors']:
for error in job.get('gather_errors', []):
print ' %s' % error['message']
print ''

View File

@ -265,6 +265,9 @@ class ViewController(BaseController):
etree.fromstring(re.sub('<\?xml(.*)\?>','',content))
response.content_type = 'application/xml; charset=utf-8'
if not '<?xml' in content.split('\n')[0]:
content = u'<?xml version="1.0" encoding="UTF-8"?>\n' + content
except XMLSyntaxError:
try:
json.loads(obj['content'])
@ -274,7 +277,7 @@ class ViewController(BaseController):
pass
response.headers['Content-Length'] = len(content)
return content
return content.encode('utf-8')
except NotFound:
abort(404,_('Harvest object not found'))
except NotAuthorized,e:

View File

@ -5,6 +5,7 @@ import uuid
from sqlalchemy.sql import update,and_, bindparam
from sqlalchemy.exc import InvalidRequestError
from ckan import plugins as p
from ckan import model
from ckan.model import Session, Package
from ckan.logic import ValidationError, NotFound, get_action
@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin):
package_dict['name'] = self._gen_new_name(package_dict['title'])
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
harvest_object.current = True
harvest_object.package_id = package_dict['id']
# Defer constraints and flush so the dataset can be indexed with
# the harvest object id (on the after_show hook from the harvester
# plugin)
harvest_object.add()
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
model.Session.flush()
new_package = get_action('package_create_rest')(context, package_dict)
harvest_object.package_id = new_package['id']
# Flag the other objects linking to this package as not current anymore
from ckanext.harvest.model import harvest_object_table

View File

@ -35,15 +35,12 @@ class CKANHarvester(HarvesterBase):
url = url,
)
try:
api_key = self.config.get('api_key',None)
if api_key:
http_request.add_header('Authorization',api_key)
http_response = urllib2.urlopen(http_request)
return http_response.read()
except Exception, e:
raise e
def _set_config(self,config_str):
if config_str:
@ -135,7 +132,7 @@ class CKANHarvester(HarvesterBase):
get_all_packages = False
# Request only the packages modified since last harvest job
last_time = harvest_job.gather_started.isoformat()
last_time = previous_job.gather_finished.isoformat()
url = base_search_url + '/revision?since_time=%s' % last_time
try:
@ -152,7 +149,7 @@ class CKANHarvester(HarvesterBase):
continue
revision = json.loads(content)
for package_id in revision.packages:
for package_id in revision['packages']:
if not package_id in package_ids:
package_ids.append(package_id)
else: