Merge branch '2.0-dataset-sources' of github.com:okfn/ckanext-harvest into 2.0-dataset-sources
This commit is contained in:
commit
3b6468b181
|
@ -251,7 +251,7 @@ class Harvester(CkanCommand):
|
||||||
|
|
||||||
self.print_harvest_job(job)
|
self.print_harvest_job(job)
|
||||||
jobs = get_action('harvest_job_list')(context,{'status':u'New'})
|
jobs = get_action('harvest_job_list')(context,{'status':u'New'})
|
||||||
self.print_there_are('harvest jobs', jobs, condition=u'New')
|
self.print_there_are('harvest job', jobs, condition=u'New')
|
||||||
|
|
||||||
def list_harvest_jobs(self):
|
def list_harvest_jobs(self):
|
||||||
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
|
context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session}
|
||||||
|
@ -319,11 +319,11 @@ class Harvester(CkanCommand):
|
||||||
print ' Job id: %s' % job['id']
|
print ' Job id: %s' % job['id']
|
||||||
print ' status: %s' % job['status']
|
print ' status: %s' % job['status']
|
||||||
print ' source: %s' % job['source_id']
|
print ' source: %s' % job['source_id']
|
||||||
print ' objects: %s' % len(job['objects'])
|
print ' objects: %s' % len(job.get('objects', []))
|
||||||
|
|
||||||
print 'gather_errors: %s' % len(job['gather_errors'])
|
print 'gather_errors: %s' % len(job.get('gather_errors', []))
|
||||||
if (len(job['gather_errors']) > 0):
|
if (len(job['gather_errors']) > 0):
|
||||||
for error in job['gather_errors']:
|
for error in job.get('gather_errors', []):
|
||||||
print ' %s' % error['message']
|
print ' %s' % error['message']
|
||||||
|
|
||||||
print ''
|
print ''
|
||||||
|
|
|
@ -5,6 +5,7 @@ import uuid
|
||||||
from sqlalchemy.sql import update,and_, bindparam
|
from sqlalchemy.sql import update,and_, bindparam
|
||||||
from sqlalchemy.exc import InvalidRequestError
|
from sqlalchemy.exc import InvalidRequestError
|
||||||
|
|
||||||
|
from ckan import plugins as p
|
||||||
from ckan import model
|
from ckan import model
|
||||||
from ckan.model import Session, Package
|
from ckan.model import Session, Package
|
||||||
from ckan.logic import ValidationError, NotFound, get_action
|
from ckan.logic import ValidationError, NotFound, get_action
|
||||||
|
@ -173,8 +174,17 @@ class HarvesterBase(SingletonPlugin):
|
||||||
package_dict['name'] = self._gen_new_name(package_dict['title'])
|
package_dict['name'] = self._gen_new_name(package_dict['title'])
|
||||||
|
|
||||||
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
|
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
|
||||||
|
harvest_object.current = True
|
||||||
|
harvest_object.package_id = package_dict['id']
|
||||||
|
# Defer constraints and flush so the dataset can be indexed with
|
||||||
|
# the harvest object id (on the after_show hook from the harvester
|
||||||
|
# plugin)
|
||||||
|
harvest_object.add()
|
||||||
|
|
||||||
|
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
|
||||||
|
model.Session.flush()
|
||||||
|
|
||||||
new_package = get_action('package_create_rest')(context, package_dict)
|
new_package = get_action('package_create_rest')(context, package_dict)
|
||||||
harvest_object.package_id = new_package['id']
|
|
||||||
|
|
||||||
# Flag the other objects linking to this package as not current anymore
|
# Flag the other objects linking to this package as not current anymore
|
||||||
from ckanext.harvest.model import harvest_object_table
|
from ckanext.harvest.model import harvest_object_table
|
||||||
|
|
|
@ -35,15 +35,12 @@ class CKANHarvester(HarvesterBase):
|
||||||
url = url,
|
url = url,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
api_key = self.config.get('api_key',None)
|
api_key = self.config.get('api_key',None)
|
||||||
if api_key:
|
if api_key:
|
||||||
http_request.add_header('Authorization',api_key)
|
http_request.add_header('Authorization',api_key)
|
||||||
http_response = urllib2.urlopen(http_request)
|
http_response = urllib2.urlopen(http_request)
|
||||||
|
|
||||||
return http_response.read()
|
return http_response.read()
|
||||||
except Exception, e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def _set_config(self,config_str):
|
def _set_config(self,config_str):
|
||||||
if config_str:
|
if config_str:
|
||||||
|
@ -135,7 +132,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
get_all_packages = False
|
get_all_packages = False
|
||||||
|
|
||||||
# Request only the packages modified since last harvest job
|
# Request only the packages modified since last harvest job
|
||||||
last_time = harvest_job.gather_started.isoformat()
|
last_time = previous_job.gather_finished.isoformat()
|
||||||
url = base_search_url + '/revision?since_time=%s' % last_time
|
url = base_search_url + '/revision?since_time=%s' % last_time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -152,7 +149,7 @@ class CKANHarvester(HarvesterBase):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
revision = json.loads(content)
|
revision = json.loads(content)
|
||||||
for package_id in revision.packages:
|
for package_id in revision['packages']:
|
||||||
if not package_id in package_ids:
|
if not package_id in package_ids:
|
||||||
package_ids.append(package_id)
|
package_ids.append(package_id)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue