Merge branch 'change_logic_schema' of https://code-repo.d4science.org/CatalogueRevamping/harvester-d4science into change_logic_schema

This commit is contained in:
Alessio Fabrizio 2024-12-20 09:14:34 +01:00
commit fe29e1c7db
52 changed files with 550 additions and 644 deletions

View File

@ -4,10 +4,10 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.6'
python-version: '3.8'
- name: Install requirements
run: pip install flake8 pycodestyle
- name: Check syntax
@ -19,7 +19,7 @@ jobs:
needs: lint
strategy:
matrix:
ckan-version: [master, 2.9, 2.9-py2, 2.8, 2.7]
ckan-version: ["2.10", 2.9]
fail-fast: false
name: CKAN ${{ matrix.ckan-version }}
@ -28,7 +28,7 @@ jobs:
image: openknowledge/ckan-dev:${{ matrix.ckan-version }}
services:
solr:
image: ckan/ckan-solr-dev:${{ matrix.ckan-version }}
image: ckan/ckan-solr:${{ matrix.ckan-version }}
postgres:
image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }}
env:
@ -49,20 +49,14 @@ jobs:
- uses: actions/checkout@v2
- name: Install requirements
run: |
pip install -r pip-requirements.txt
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install -e .
# Replace default path to CKAN core config file with the one on the container
sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
- name: Setup extension (CKAN >= 2.9)
if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }}
run: |
ckan -c test.ini db init
ckan -c test.ini harvester initdb
- name: Setup extension (CKAN < 2.9)
if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }}
run: |
paster --plugin=ckan db init -c test.ini
paster --plugin=ckanext-harvest harvester initdb -c test.ini
- name: Run tests
run: pytest --ckan-ini=test.ini --cov=ckanext.harvest --disable-warnings ckanext/harvest/tests

4
.gitignore vendored
View File

@ -13,4 +13,6 @@ development.ini
node_modules
*.project
.eggs
.vscode/
.idea/
.vscode/

View File

@ -8,9 +8,123 @@ The format is based on `Keep a Changelog <http://keepachangelog.com>`_
and this project adheres to `Semantic Versioning <http://semver.org/>`_
***********
Unreleased_
1.5.6_ - 2023-06-26
***********
Fixed
-------
- Fix url endpoint for job_show #534
***********
1.5.5_ - 2023-06-05
***********
Fixed
-------
- Fix display of harvest job errors #533
***********
1.5.4_ - 2023-05-23
***********
Fixed
-------
- Fix a problem with data-dictization when using sqlalchemy 1.4+ #529
***********
1.5.3_ - 2023-04-03
***********
Fixed
-------
- Fix asset path in MANIFEST.in #525
***********
1.5.2_ - 2023-03-28
***********
Fixed
-------
- Fix URL endpoints: from ``harvest.object_show`` to ``harvester.object_show`` #524
***********
1.5.1_ - 2023-03-22
***********
Fixed
-------
- Fix ``url_for`` routing to point to harvester blueprint #523
***********
1.5.0_ - 2023-03-16
***********
Changed
-------
- Added unescape for email text body to avoid encoded characters #517
- Pick the right harvest_object_id if there are multiple #519
- Do not duplicate harvest_extras if exist in root schema #521
- Use 403 when actions are forbidden, not 401 #522
- Drop support old versions #520
Breaking Changes
-------
- ``h.bootstrap_version()`` no longer exist since it is no longer needed to inject CSS classes
- Support for old Pylon's route syntax has been removed. Example: calling ``url_for("harvest_read")`` will no longer work. URLs for ``ckanext-harvest`` needs to respect Flask's syntax: ``url_for("harvest.read")``, etc
***********
1.4.2_ - 2023-01-12
***********
Changed
-------
- Add DB index harvest_error_harvest_object_id_idx #514
- Remove pyopenssl requirement c87309a
- Add CSRF protection to new source form #516
***********
1.4.1_ - 2022-09-20
***********
Changed
-------
- Use requirements.txt instead of pip-requirements.txt (still working via symlink) 8ed1eca
Fixed
-----
- Bump pyopenssl requirement to avoid requirements error on install 98edcd3
- Fixes unicode error in Python 2 #502
- Fixes in email notification sendngi #499, #505
- Fix pagination for Dataset list on source page #504
***********
1.4.0_ - 2022-04-20
***********
Changed
-------
- Add ckan.harvest.not_overwrite_fields #472
- Support for Bootstrap 5 templates #490
- Support for CKAN 2.10 #492 #496
Fixed
-----
- Fix JSONDecode error #489
- Check if email exists before sending notification #498
***********
1.3.4_ - 2022-01-24
***********
@ -299,7 +413,19 @@ Categories
- ``Fixed`` for any bug fixes.
- ``Security`` to invite users to upgrade in case of vulnerabilities.
.. _Unreleased: https://github.com/ckan/ckanext-harvest/compare/v1.3.2...HEAD
.. _Unreleased: https://github.com/ckan/ckanext-harvest/compare/v1.5.6...HEAD
.. _1.5.6: https://github.com/ckan/ckanext-harvest/compare/v1.5.5...v1.5.6
.. _1.5.5: https://github.com/ckan/ckanext-harvest/compare/v1.5.4...v1.5.5
.. _1.5.4: https://github.com/ckan/ckanext-harvest/compare/v1.5.3...v1.5.4
.. _1.5.3: https://github.com/ckan/ckanext-harvest/compare/v1.5.2...v1.5.3
.. _1.5.2: https://github.com/ckan/ckanext-harvest/compare/v1.5.1...v1.5.2
.. _1.5.1: https://github.com/ckan/ckanext-harvest/compare/v1.5.0...v1.5.1
.. _1.5.0: https://github.com/ckan/ckanext-harvest/compare/v1.4.2...v1.5.0
.. _1.4.2: https://github.com/ckan/ckanext-harvest/compare/v1.4.1...v1.4.2
.. _1.4.1: https://github.com/ckan/ckanext-harvest/compare/v1.4.0...v1.4.1
.. _1.4.0: https://github.com/ckan/ckanext-harvest/compare/v1.3.4...v1.4.0
.. _1.3.4: https://github.com/ckan/ckanext-harvest/compare/v1.3.3...v1.3.4
.. _1.3.3: https://github.com/ckan/ckanext-harvest/compare/v1.3.2...v1.3.3
.. _1.3.2: https://github.com/ckan/ckanext-harvest/compare/v1.3.1...v1.3.2
.. _1.3.1: https://github.com/ckan/ckanext-harvest/compare/v1.3.0...v1.3.1
.. _1.3.0: https://github.com/ckan/ckanext-harvest/compare/v1.2.1...v1.3.0

View File

@ -1,4 +1,4 @@
recursive-include ckanext/harvest/templates *
recursive-include ckanext/harvest/fanstatic_library *
recursive-include ckanext/harvest/assets *
recursive-include ckanext/harvest/public *
recursive-include ckanext/harvest/i18n *

View File

@ -49,7 +49,7 @@ running a version lower than 2.0.
4. Install the python modules required by the extension (adjusting the path according to where ckanext-harvest was installed in the previous step)::
(pyenv) $ cd /usr/lib/ckan/default/src/ckanext-harvest/
(pyenv) $ pip install -r pip-requirements.txt
(pyenv) $ pip install -r requirements.txt
5. Make sure the CKAN configuration ini file contains the harvest main plugin, as
well as the harvester for CKAN instances if you need it (included with the extension)::
@ -94,14 +94,8 @@ Configuration
Run the following command to create the necessary tables in the database (ensuring the pyenv is activated):
ON CKAN >= 2.9::
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester initdb
ON CKAN <= 2.8::
(pyenv) $ paster --plugin=ckanext-harvest harvester initdb --config=/etc/ckan/default/production.ini
Finally, restart CKAN to have the changes take effect::
sudo service apache2 restart
@ -213,7 +207,7 @@ IF you want to set a timeout for harvest jobs, you can add this configuration op
ckan.harvest.timeout = 1440
The timeout value is in minutes, so 1440 represents 24 hours.
The timeout value is in minutes, so 1440 represents 24 hours.
Any jobs which are timed out will create an error message for the user to see.
If you don't specify this setting, the default will be False and there will be no timeout on harvest jobs.
@ -289,9 +283,9 @@ The following operations can be run from the command line as described underneat
import) without involving the web UI or the queue backends. This is
useful for testing a harvester without having to fire up
gather/fetch_consumer processes, as is done in production.
harvester run-test {source-id/name} force-import=guid1,guid2...
- In order to force an import of particular datasets, useful to
- In order to force an import of particular datasets, useful to
target a dataset for dev purposes or when forcing imports on other environments.
harvester gather-consumer
@ -335,22 +329,17 @@ The following operations can be run from the command line as described underneat
The commands should be run with the pyenv activated and refer to your CKAN configuration file:
ON CKAN >= 2.9::
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester --help
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester sources
ON CKAN <= 2.8::
(pyenv) $ paster --plugin=ckanext-harvest harvester sources --config=/etc/ckan/default/production.ini
**Note that on CKAN >= 2.9 all commands with an underscore in their name changed.** They now use a hyphen instead of an underscore (e.g. ``gather_consumer`` changed to ``gather-consumer``).
Authorization
=============
Starting from CKAN 2.0, harvest sources behave exactly the same as datasets
Harvest sources behave exactly the same as datasets
(they are actually internally implemented as a dataset type). That means they
can be searched and faceted, and that the same authorization rules can be
applied to them. The default authorization settings are based on organizations.
@ -700,10 +689,10 @@ harvester run-test
You can run a harvester simply using the ``run-test`` command. This is handy
for running a harvest with one command in the console and see all the output
in-line. It runs the gather, fetch and import stages all in the same process.
You must ensure that you have pip installed ``dev-requirements.txt``
You must ensure that you have pip installed ``dev-requirements.txt``
in ``/home/ckan/ckan/lib/default/src/ckanext-harvest`` before using the
``run-test`` command.
This is useful for developing a harvester because you can insert break-points
in your harvester, and rerun a harvest without having to restart the
gather_consumer and fetch_consumer processes each time. In addition, because it
@ -727,35 +716,17 @@ handles the gathering and another one that handles the fetching and importing.
To start the consumers run the following command (make sure you have your
python environment activated):
ON CKAN >= 2.9::
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester gather_consumer
ON CKAN <= 2.8::
(pyenv) $ paster --plugin=ckanext-harvest harvester gather_consumer --config=/etc/ckan/default/production.ini
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester gather-consumer
On another terminal, run the following command:
ON CKAN >= 2.9::
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester fetch_consumer
ON CKAN <= 2.8::
(pyenv) $ paster --plugin=ckanext-harvest harvester fetch_consumer --config=/etc/ckan/default/production.ini
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester fetch-consumer
Finally, on a third console, run the following command to start any
pending harvesting jobs:
ON CKAN >= 2.9::
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester run
ON CKAN <= 2.8::
(pyenv) $ paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/default/production.ini
The ``run`` command not only starts any pending harvesting jobs, but also
flags those that are finished, allowing new jobs to be created on that particular
source and refreshing the source statistics. That means that you will need to run
@ -771,13 +742,7 @@ circumstance, ensure that the gather & fetch consumers are running and have
nothing more to consume, and then run this abort command with the name or id of
the harvest source:
ON CKAN >= 2.9::
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester job_abort {source-id/name}
ON CKAN <= 2.8::
(pyenv) $ paster --plugin=ckanext-harvest harvester job_abort {source-id/name} --config=/etc/ckan/default/production.ini
(pyenv) $ ckan --config=/etc/ckan/default/ckan.ini harvester job-abort {source-id/name}
Setting up the harvesters on a production server
@ -828,7 +793,7 @@ following steps with the one you are using.
[program:ckan_gather_consumer]
command=/usr/lib/ckan/default/bin/ckan --config=/etc/ckan/default/ckan.ini harvester gather_consumer
command=/usr/lib/ckan/default/bin/ckan --config=/etc/ckan/default/ckan.ini harvester gather-consumer
; user that owns virtual environment.
user=ckan
@ -842,43 +807,7 @@ following steps with the one you are using.
[program:ckan_fetch_consumer]
command=/usr/lib/ckan/default/bin/ckan --config=/etc/ckan/default/ckan.ini harvester fetch_consumer
; user that owns virtual environment.
user=ckan
numprocs=1
stdout_logfile=/var/log/ckan/std/fetch_consumer.log
stderr_logfile=/var/log/ckan/std/fetch_consumer.log
autostart=true
autorestart=true
startsecs=10
ON CKAN <= 2.8::
; ===============================
; ckan harvester
; ===============================
[program:ckan_gather_consumer]
command=/usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester gather_consumer --config=/etc/ckan/default/production.ini
; user that owns virtual environment.
user=ckan
numprocs=1
stdout_logfile=/var/log/ckan/std/gather_consumer.log
stderr_logfile=/var/log/ckan/std/gather_consumer.log
autostart=true
autorestart=true
startsecs=10
[program:ckan_fetch_consumer]
command=/usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester fetch_consumer --config=/etc/ckan/default/production.ini
command=/usr/lib/ckan/default/bin/ckan --config=/etc/ckan/default/ckan.ini harvester fetch-consumer
; user that owns virtual environment.
user=ckan
@ -952,16 +881,9 @@ following steps with the one you are using.
Paste this line into your crontab, again replacing the paths to paster and
the ini file with yours:
ON CKAN >= 2.9::
# m h dom mon dow command
*/15 * * * * /usr/lib/ckan/default/bin/ckan -c /etc/ckan/default/ckan.ini harvester run
ON CKAN <= 2.8::
# m h dom mon dow command
*/15 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester run --config=/etc/ckan/default/production.ini
This particular example will check for pending jobs every fifteen minutes.
You can of course modify this periodicity, this `Wikipedia page <http://en.wikipedia.org/wiki/Cron#CRON_expression>`_
has a good overview of the crontab syntax.
@ -973,15 +895,8 @@ following steps with the one you are using.
Paste this line into your crontab, again replacing the paths to paster/ckan and
the ini file with yours:
ON CKAN >= 2.9::
# m h dom mon dow command
0 5 * * * /usr/lib/ckan/default/bin/ckan -c /etc/ckan/default/ckan.ini harvester clean_harvest_log
ON CKAN <= 2.8::
# m h dom mon dow command
0 5 * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-harvest harvester clean_harvest_log --config=/etc/ckan/default/production.ini
0 5 * * * /usr/lib/ckan/default/bin/ckan -c /etc/ckan/default/ckan.ini harvester clean-harvest-log
This particular example will perform clean-up each day at 05 AM.
You can tweak the value according to your needs.
@ -992,17 +907,17 @@ Extensible actions
Recipients on harvest jobs notifications
----------------------------------------
:code:`harvest_get_notifications_recipients`: you can *chain* this action from another extension to change
:code:`harvest_get_notifications_recipients`: you can *chain* this action from another extension to change
the recipients for harvest jobs notifications.
.. code-block:: python
@toolkit.chained_action
def harvest_get_notifications_recipients(up_func, context, data_dict):
""" Harvester plugin notify by default about harvest jobs only to
""" Harvester plugin notify by default about harvest jobs only to
admin users of the related organization.
Also allow to add custom recipients with this function.
Return a list of dicts with name and email like
{'name': 'John', 'email': 'john@source.com'} """
@ -1021,7 +936,7 @@ Tests
You can run the tests like this::
cd ckanext-harvest
nosetests --reset-db --ckan --with-pylons=test-core.ini ckanext/harvest/tests
pytest --ckan-ini=test.ini ckanext/harvest/tests
Here are some common errors and solutions:

View File

@ -1,15 +1,13 @@
from __future__ import print_function
import sys
import six
from ckan import model
from ckan.logic import get_action, ValidationError
from ckantoolkit import CkanCommand
import ckanext.harvest.utils as utils
from ckanext.harvest.logic.schema import unicode_safe
class Harvester(CkanCommand):
@ -275,23 +273,23 @@ class Harvester(CkanCommand):
def create_harvest_source(self):
if len(self.args) >= 2:
name = six.text_type(self.args[1])
name = unicode_safe(self.args[1])
else:
print("Please provide a source name")
sys.exit(1)
if len(self.args) >= 3:
url = six.text_type(self.args[2])
url = unicode_safe(self.args[2])
else:
print("Please provide a source URL")
sys.exit(1)
if len(self.args) >= 4:
type = six.text_type(self.args[3])
type = unicode_safe(self.args[3])
else:
print("Please provide a source type")
sys.exit(1)
if len(self.args) >= 5:
title = six.text_type(self.args[4])
title = unicode_safe(self.args[4])
else:
title = None
if len(self.args) >= 6:
@ -301,17 +299,17 @@ class Harvester(CkanCommand):
else:
active = True
if len(self.args) >= 7:
owner_org = six.text_type(self.args[6])
owner_org = unicode_safe(self.args[6])
else:
owner_org = None
if len(self.args) >= 8:
frequency = six.text_type(self.args[7])
frequency = unicode_safe(self.args[7])
if not frequency:
frequency = "MANUAL"
else:
frequency = "MANUAL"
if len(self.args) >= 9:
source_config = six.text_type(self.args[8])
source_config = unicode_safe(self.args[8])
else:
source_config = None
try:
@ -329,14 +327,14 @@ class Harvester(CkanCommand):
keep_current = bool(self.options.keep_current)
source_id = None
if len(self.args) >= 2:
source_id = six.text_type(self.args[1])
source_id = unicode_safe(self.args[1])
print(utils.clear_harvest_source_history(source_id, keep_current))
def show_harvest_source(self):
if len(self.args) >= 2:
source_id_or_name = six.text_type(self.args[1])
source_id_or_name = unicode_safe(self.args[1])
else:
print("Please provide a source name")
sys.exit(1)
@ -344,7 +342,7 @@ class Harvester(CkanCommand):
def remove_harvest_source(self):
if len(self.args) >= 2:
source_id_or_name = six.text_type(self.args[1])
source_id_or_name = unicode_safe(self.args[1])
else:
print("Please provide a source id")
sys.exit(1)
@ -352,7 +350,7 @@ class Harvester(CkanCommand):
def clear_harvest_source(self):
if len(self.args) >= 2:
source_id_or_name = six.text_type(self.args[1])
source_id_or_name = unicode_safe(self.args[1])
else:
print("Please provide a source id")
sys.exit(1)
@ -368,7 +366,7 @@ class Harvester(CkanCommand):
def create_harvest_job(self):
if len(self.args) >= 2:
source_id_or_name = six.text_type(self.args[1])
source_id_or_name = unicode_safe(self.args[1])
else:
print("Please provide a source id")
sys.exit(1)
@ -379,7 +377,7 @@ class Harvester(CkanCommand):
def job_abort(self):
if len(self.args) >= 2:
job_or_source_id_or_name = six.text_type(self.args[1])
job_or_source_id_or_name = unicode_safe(self.args[1])
else:
print("Please provide a job id or source name/id")
sys.exit(1)
@ -394,7 +392,7 @@ class Harvester(CkanCommand):
if len(self.args) >= 2:
if len(self.args) >= 3 and self.args[2].startswith('force-import='):
force_import = self.args[2].split('=')[-1]
source_id_or_name = six.text_type(self.args[1])
source_id_or_name = unicode_safe(self.args[1])
else:
print("Please provide a source id")
sys.exit(1)
@ -404,7 +402,7 @@ class Harvester(CkanCommand):
def import_stage(self):
if len(self.args) >= 2:
source_id_or_name = six.text_type(self.args[1])
source_id_or_name = unicode_safe(self.args[1])
context = {
"model": model,
"session": model.Session,
@ -440,7 +438,7 @@ class Harvester(CkanCommand):
def abort_failed_jobs(self):
job_life_span = False
if len(self.args) >= 2:
job_life_span = six.text_type(self.args[1])
job_life_span = unicode_safe(self.args[1])
utils.abort_failed_jobs(
job_life_span,

View File

@ -3,7 +3,6 @@
import logging
import re
import uuid
import six
from sqlalchemy import exists, and_
from sqlalchemy.sql import update, bindparam
@ -17,7 +16,7 @@ from ckan.model import Session, Package, PACKAGE_NAME_MAX_LENGTH
from ckan.logic.schema import default_create_package_schema
from ckan.lib.navl.validators import ignore_missing, ignore
from ckan.lib.munge import munge_title_to_name, substitute_ascii_equivalents
from ckan.lib.munge import munge_title_to_name, munge_tag
from ckanext.harvest.model import (HarvestObject, HarvestGatherError,
HarvestObjectError, HarvestJob)
@ -106,6 +105,8 @@ else:
tag = re.sub(r'[^a-zA-Z0-9\- ]', '', tag).replace(' ', '-')
tag = _munge_to_length(tag, model.MIN_TAG_LENGTH, model.MAX_TAG_LENGTH)
return tag
from ckanext.harvest.logic.schema import unicode_safe
log = logging.getLogger(__name__)
@ -340,7 +341,7 @@ class HarvesterBase(SingletonPlugin):
try:
# Change default schema
schema = default_create_package_schema()
schema['id'] = [ignore_missing, six.text_type]
schema['id'] = [ignore_missing, unicode_safe]
schema['__junk'] = [ignore]
#use custom function

View File

@ -1,12 +1,10 @@
from __future__ import absolute_import
import six
import requests
from requests.exceptions import HTTPError, RequestException
import datetime
from urllib3.contrib import pyopenssl
from six.moves.urllib.parse import urlencode
from urllib.parse import urlencode
from ckan import model
from ckan.logic import ValidationError, NotFound, get_action
from ckan.lib.helpers import json
@ -41,8 +39,6 @@ class CKANHarvester(HarvesterBase):
if api_key:
headers['Authorization'] = api_key
pyopenssl.inject_into_urllib3()
try:
http_request = requests.get(url, headers=headers)
except HTTPError as e:
@ -122,8 +118,7 @@ class CKANHarvester(HarvesterBase):
raise ValueError('default_groups must be a *list* of group'
' names/ids')
if config_obj['default_groups'] and \
not isinstance(config_obj['default_groups'][0],
six.string_types):
not isinstance(config_obj['default_groups'][0], str):
raise ValueError('default_groups must be a list of group '
'names/ids (i.e. strings)')
@ -523,7 +518,7 @@ class CKANHarvester(HarvesterBase):
if existing_extra:
package_dict['extras'].remove(existing_extra)
# Look for replacement strings
if isinstance(value, six.string_types):
if isinstance(value, str):
value = value.format(
harvest_source_id=harvest_object.job.source.id,
harvest_source_url=harvest_object.job.source.url.strip('/'),

View File

@ -36,7 +36,7 @@ def package_list_for_source(source_id):
It calls the package_list snippet and the pager.
'''
limit = 20
page = int(request.params.get('page', 1))
page = int(request.args.get('page', 1))
fq = '+harvest_source_id:"{0}"'.format(source_id)
search_dict = {
'fq': fq,
@ -56,7 +56,10 @@ def package_list_for_source(source_id):
query = logic.get_action('package_search')(context, search_dict)
base_url = h.url_for('{0}_read'.format(DATASET_TYPE_NAME), id=source_id)
base_url = h.url_for(
'{0}.read'.format(DATASET_TYPE_NAME),
id=harvest_source['name']
)
def pager_url(q=None, page=None):
url = base_url
@ -121,7 +124,7 @@ def link_for_harvest_object(id=None, guid=None, text=None):
obj = logic.get_action('harvest_object_show')(context, {'id': guid, 'attr': 'guid'})
id = obj.id
url = h.url_for('harvest_object_show', id=id)
url = h.url_for('harvester.object_show', id=id)
text = text or guid or id
link = '<a href="{url}">{text}</a>'.format(url=url, text=text)
@ -135,13 +138,3 @@ def harvest_source_extra_fields():
continue
fields[harvester.info()['name']] = list(harvester.extra_schema().keys())
return fields
def bootstrap_version():
if p.toolkit.check_ckan_version(max_version='2.7.99'):
return 'bs2'
else:
return (
'bs2' if
p.toolkit.config.get('ckan.base_public_folder') == 'public-bs2'
else 'bs3')

View File

@ -267,8 +267,9 @@ def harvest_object_show(context, data_dict):
obj = model.Session.query(HarvestObject) \
.filter(HarvestObject.package_id == pkg.id) \
.filter(
HarvestObject.current == True # noqa: E712
).first()
HarvestObject.current == True # noqa: E712
).order_by(HarvestObject.import_finished.desc()) \
.first()
else:
raise p.toolkit.ValidationError(
'Please provide either an "id" or a "dataset_id" parameter')
@ -436,11 +437,14 @@ def harvest_get_notifications_recipients(context, data_dict):
model.User.sysadmin == True # noqa: E712
).all()
# Send mail to all sysadmins with a non-empty email address
for sysadmin in sysadmins:
recipients.append({
'name': sysadmin.name,
'email': sysadmin.email
})
email_address = sysadmin.email
if email_address and email_address.strip():
recipients.append({
'name': sysadmin.name,
'email': email_address,
})
# gather organization-admins
if source.get('organization'):
@ -450,14 +454,17 @@ def harvest_get_notifications_recipients(context, data_dict):
'capacity': 'admin'
})
# Get access to email address by running action as admin user
context['user'] = p.toolkit.get_action('get_site_user')({'ignore_auth': True})['name']
for member in members:
member_details = p.toolkit.get_action(
'user_show')(context, {'id': member[0]})
if member_details['email']:
email_address = member_details.get('email', None)
if email_address and email_address.strip():
recipients.append({
'name': member_details['name'],
'email': member_details['email']
'email': email_address
})
return recipients

View File

@ -1,15 +1,15 @@
# -*- coding: utf-8 -*-
import hashlib
import html
import json
import six
import logging
import datetime
from ckantoolkit import config
from sqlalchemy import and_, or_
from six.moves.urllib.parse import urljoin
from urllib.parse import urljoin
from ckan.lib.search.index import PackageSearchIndex
from ckan.plugins import toolkit, PluginImplementations
@ -17,12 +17,6 @@ from ckan.logic import get_action
from ckanext.harvest.interfaces import IHarvester
from ckan.lib.search.common import SearchIndexError, make_connection
# Use render_jinja2 when work with CKAN core <= 2.9.4
try:
from ckan.lib.base import render_jinja2 as render
except ImportError:
from ckan.lib.base import render
from ckan.model import Package
from ckan import logic
@ -44,6 +38,8 @@ from ckanext.harvest.logic.action.get import (
import ckan.lib.mailer as mailer
from itertools import islice
from ckan.plugins.toolkit import render
log = logging.getLogger(__name__)
@ -550,8 +546,10 @@ def harvest_objects_import(context, data_dict):
last_objects_count = 0
for obj_id in last_objects_ids:
if segments and \
str(hashlib.md5(six.ensure_binary(obj_id[0])).hexdigest())[0] not in segments:
_id = obj_id[0]
if isinstance(_id, str):
_id = _id.encode()
if segments and str(hashlib.md5(_id).hexdigest())[0] not in segments:
continue
obj = session.query(HarvestObject).get(obj_id)
@ -760,7 +758,7 @@ def get_mail_extra_vars(context, source_id, status):
errors = job_errors + obj_errors
site_url = config.get('ckan.site_url')
job_url = toolkit.url_for('harvest_job_show', source=source['id'], id=last_job['id'])
job_url = toolkit.url_for('harvester.job_show', source=source['id'], id=last_job['id'])
full_job_url = urljoin(site_url, job_url)
extra_vars = {
'organization': organization,
@ -799,6 +797,8 @@ def prepare_summary_mail(context, source_id, status):
def prepare_error_mail(context, source_id, status):
extra_vars = get_mail_extra_vars(context, source_id, status)
body = render('emails/error_email.txt', extra_vars)
body = html.unescape(body)
subject = '{} - Harvesting Job - Error Notification'\
.format(config.get('ckan.site_title'))

View File

@ -68,7 +68,7 @@ def harvest_job_dictize(job, context):
.group_by(HarvestObjectError.message) \
.order_by(text('error_count desc')) \
.limit(context.get('error_summmary_limit', 20))
out['object_error_summary'] = q.all()
out['object_error_summary'] = harvest_error_dictize(q.all(), context)
q = model.Session.query(
HarvestGatherError.message,
func.count(HarvestGatherError.message).label('error_count')) \
@ -76,7 +76,8 @@ def harvest_job_dictize(job, context):
.group_by(HarvestGatherError.message) \
.order_by(text('error_count desc')) \
.limit(context.get('error_summmary_limit', 20))
out['gather_error_summary'] = q.all()
out['gather_error_summary'] = harvest_error_dictize(q.all(), context)
return out
@ -106,6 +107,13 @@ def harvest_log_dictize(obj, context):
return out
def harvest_error_dictize(obj, context):
out = []
for elem in obj:
out.append(elem._asdict())
return out
def _get_source_status(source, context):
'''
TODO: Deprecated, use harvest_source_show_status instead

View File

@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import ckan.plugins.toolkit as tk
import ckan.plugins as p
from ckan.logic.schema import default_extras_schema
from ckan.logic.validators import (package_id_exists,
@ -10,30 +9,25 @@ from ckan.logic.validators import (package_id_exists,
boolean_validator,
)
from ckan.logic.converters import convert_to_extras, convert_from_extras
from ckantoolkit import unicode_safe #added because needed for dcat
from ckantoolkit import unicode_safe
#from ckan.lib.navl.validators import (ignore_missing,
# not_empty,
# ignore,
# if_empty_same_as,
# )
from ckanext.harvest.logic.validators import (
harvest_source_url_validator,
harvest_source_type_exists,
harvest_source_config_validator,
harvest_source_extra_validator,
harvest_source_frequency_exists,
dataset_type_exists,
harvest_source_convert_from_config,
harvest_source_id_exists,
harvest_job_exists,
harvest_object_extras_validator,
)
ignore_missing = tk.get_validator("ignore_missing")
not_empty = tk.get_validator("not_empty")
ignore = tk.get_validator("ignore")
if_empty_same_as = tk.get_validator("if_empty_same_as")
from ckanext.harvest.logic.validators import (harvest_source_url_validator,
harvest_source_type_exists,
harvest_source_config_validator,
harvest_source_extra_validator,
harvest_source_frequency_exists,
dataset_type_exists,
harvest_source_convert_from_config,
harvest_source_id_exists,
harvest_job_exists,
harvest_object_extras_validator,
)
def harvest_source_schema():
@ -59,10 +53,6 @@ def harvest_source_schema():
schema['extras'] = extras_schema
if p.toolkit.check_ckan_version('2.2'):
from ckan.logic.validators import datasets_with_no_organization_cannot_be_private
schema['private'].append(datasets_with_no_organization_cannot_be_private)
return schema
@ -80,7 +70,6 @@ def harvest_source_update_package_schema():
schema = harvest_source_create_package_schema()
schema['owner_org'] = [ignore_missing, owner_org_validator, unicode_safe]
return schema

View File

@ -12,11 +12,7 @@ from ckanext.harvest.utils import (
)
from ckanext.harvest.model import HarvestSource, UPDATE_FREQUENCIES, HarvestJob
from ckanext.harvest.interfaces import IHarvester
import six
from six.moves.urllib.parse import (
urlparse, urlunparse
)
from urllib.parse import (urlparse, urlunparse)
log = logging.getLogger(__name__)
@ -233,7 +229,7 @@ def harvest_source_convert_from_config(key, data, errors, context):
def harvest_source_active_validator(value, context):
if isinstance(value, six.string_types):
if isinstance(value, str):
if value.lower() == 'true':
return True
else:
@ -259,6 +255,6 @@ def harvest_object_extras_validator(value, context):
if not isinstance(value, dict):
raise Invalid('extras must be a dict')
for v in value.values():
if not isinstance(v, six.string_types):
if not isinstance(v, str):
raise Invalid('extras must be a dict of strings')
return value

View File

@ -97,6 +97,11 @@ def setup():
log.debug('Creating index for harvest_object_extra')
Index("harvest_object_id_idx", harvest_object_extra_table.c.harvest_object_id).create()
index_names = [index['name'] for index in inspector.get_indexes("harvest_object_error")]
if "harvest_error_harvest_object_id_idx" not in index_names:
log.debug('Creating index for harvest_object_error')
Index("harvest_error_harvest_object_id_idx", harvest_object_error_table.c.harvest_object_id).create()
class HarvestError(Exception):
pass
@ -398,6 +403,7 @@ def define_harvester_tables():
Column('stage', types.UnicodeText),
Column('line', types.Integer),
Column('created', types.DateTime, default=datetime.datetime.utcnow),
Index('harvest_error_harvest_object_id_idx', 'harvest_object_id'),
)
# Harvest Log table
harvest_log_table = Table(

View File

@ -4,7 +4,6 @@ import os
import json
from logging import getLogger
from six import string_types, text_type
from collections import OrderedDict
from ckan import logic
@ -12,13 +11,10 @@ from ckan import model
import ckan.plugins as p
from ckan.lib.plugins import DefaultDatasetForm
try:
from ckan.lib.plugins import DefaultTranslation
except ImportError:
class DefaultTranslation():
pass
from ckan.lib.plugins import DefaultTranslation
import ckanext.harvest
from ckanext.harvest import cli, views
from ckanext.harvest.model import setup as model_setup
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
from ckanext.harvest.log import DBLogHandler
@ -27,17 +23,13 @@ from ckanext.harvest.utils import (
DATASET_TYPE_NAME
)
if p.toolkit.check_ckan_version(min_version='2.9.0'):
from ckanext.harvest.plugin.flask_plugin import MixinPlugin
else:
from ckanext.harvest.plugin.pylons_plugin import MixinPlugin
log = getLogger(__name__)
assert not log.disabled
class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTranslation):
class Harvest(p.SingletonPlugin, DefaultDatasetForm, DefaultTranslation):
p.implements(p.IClick)
p.implements(p.IBlueprint)
p.implements(p.IConfigurable)
p.implements(p.IConfigurer, inherit=True)
p.implements(p.IActions)
@ -46,11 +38,20 @@ class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTransla
p.implements(p.IPackageController, inherit=True)
p.implements(p.ITemplateHelpers)
p.implements(p.IFacets, inherit=True)
if p.toolkit.check_ckan_version(min_version='2.5.0'):
p.implements(p.ITranslation, inherit=True)
p.implements(p.ITranslation, inherit=True)
startup = False
# IClick
def get_commands(self):
return cli.get_commands()
# IBlueprint
def get_blueprint(self):
return views.get_blueprints()
# ITranslation
def i18n_directory(self):
u'''Change the directory of the .mo translation files'''
@ -61,108 +62,155 @@ class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTransla
# IPackageController
# CKAN < 2.10 hooks
def after_create(self, context, data_dict):
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME and not self.startup:
return self.after_dataset_create(context, data_dict)
def after_update(self, context, data_dict):
return self.after_dataset_update(context, data_dict)
def after_delete(self, context, data_dict):
return self.after_dataset_delete(context, data_dict)
def before_search(self, search_params):
return self.before_dataset_search(search_params)
def before_index(self, pkg_dict):
return self.before_dataset_index(pkg_dict)
def after_show(self, context, data_dict):
return self.after_dataset_show(context, data_dict)
# CKAN >= 2.10 hooks
def after_dataset_create(self, context, data_dict):
if (
"type" in data_dict
and data_dict["type"] == DATASET_TYPE_NAME
and not self.startup
):
# Create an actual HarvestSource object
_create_harvest_source_object(context, data_dict)
def after_update(self, context, data_dict):
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
def after_dataset_update(self, context, data_dict):
if "type" in data_dict and data_dict["type"] == DATASET_TYPE_NAME:
# Edit the actual HarvestSource object
_update_harvest_source_object(context, data_dict)
def after_delete(self, context, data_dict):
def after_dataset_delete(self, context, data_dict):
package_dict = p.toolkit.get_action('package_show')(context, {'id': data_dict['id']})
package_dict = p.toolkit.get_action("package_show")(
context, {"id": data_dict["id"]}
)
if 'type' in package_dict and package_dict['type'] == DATASET_TYPE_NAME:
if "type" in package_dict and package_dict["type"] == DATASET_TYPE_NAME:
# Delete the actual HarvestSource object
_delete_harvest_source_object(context, package_dict)
def before_search(self, search_params):
'''Prevents the harvesters being shown in dataset search results.'''
def before_dataset_search(self, search_params):
"""Prevents the harvesters being shown in dataset search results."""
fq = search_params.get('fq', '')
if 'dataset_type:harvest' not in fq:
fq = u"{0} -dataset_type:harvest".format(search_params.get('fq', ''))
search_params.update({'fq': fq})
fq = search_params.get("fq", "")
if "dataset_type:harvest" not in fq:
fq = "{0} -dataset_type:harvest".format(fq)
search_params.update({"fq": fq})
return search_params
def before_index(self, pkg_dict):
def _add_or_update_harvest_metadata(self, key, value, data_dict):
"""Adds extras fields or updates them if already exist."""
if not data_dict.get("extras"):
data_dict["extras"] = []
for e in data_dict.get("extras"):
if e.get("key") == key:
e.update({"value": value})
break
else:
data_dict["extras"].append({"key": key, "value": value})
def before_dataset_index(self, pkg_dict):
"""Adds harvest metadata to the extra field of the dataset.
This method will add or update harvest related metadata in `pkg_dict`,
`data_dict` and `validated_data_dict` so it can be obtained when
calling package_show API (that depends on Solr data). This metadata will
be stored in the `extras` field of the dictionaries ONLY if it does not
already exist in the root schema.
Note: If another extension adds any harvest extra to the `package_show`
schema then this method will not add them again in the `extras` field to avoid
validation errors when updating a package.
If the harvest extra has been added to the root schema, then we will not update
them since it is responsibility of the package validators to do it.
"""
# Fix to support Solr8
if isinstance(pkg_dict.get('status'), dict):
try:
pkg_dict['status'] = json.dumps(pkg_dict['status'])
except ValueError:
pkg_dict.pop('status', None)
harvest_object = model.Session.query(HarvestObject) \
.filter(HarvestObject.package_id == pkg_dict['id']) \
.filter(HarvestObject.current == True).first() # noqa
.filter(HarvestObject.package_id == pkg_dict["id"]) \
.filter(
HarvestObject.current == True # noqa
).order_by(HarvestObject.import_finished.desc()) \
.first()
if harvest_object:
if not harvest_object:
return pkg_dict
data_dict = json.loads(pkg_dict['data_dict'])
harvest_extras = [
("harvest_object_id", harvest_object.id),
("harvest_source_id", harvest_object.source.id),
("harvest_source_title", harvest_object.source.title),
]
validated_data_dict = json.loads(pkg_dict['validated_data_dict'])
data_dict = json.loads(pkg_dict["data_dict"])
for key, value in harvest_extras:
if key in data_dict.keys():
data_dict[key] = value
continue
self._add_or_update_harvest_metadata(key, value, data_dict)
harvest_extras = [
('harvest_object_id', harvest_object.id),
('harvest_source_id', harvest_object.source.id),
('harvest_source_title', harvest_object.source.title),
]
validated_data_dict = json.loads(pkg_dict["validated_data_dict"])
for key, value in harvest_extras:
if key in validated_data_dict.keys():
validated_data_dict[key] = value
continue
self._add_or_update_harvest_metadata(key, value, validated_data_dict)
for key, value in harvest_extras:
# If the harvest extras are there, update them. This can
# happen eg when calling package_update or resource_update,
# which call package_show
harvest_not_found = True
harvest_not_found_validated = True
if not data_dict.get('extras'):
data_dict['extras'] = []
for e in data_dict.get('extras'):
if e.get('key') == key:
e.update({'value': value})
harvest_not_found = False
if(harvest_not_found):
data_dict['extras'].append({'key': key, 'value': value})
if not validated_data_dict.get('extras'):
validated_data_dict['extras'] = []
for e in validated_data_dict.get('extras'):
if e.get('key') == key:
e.update({'value': value})
harvest_not_found_validated = False
if(harvest_not_found_validated):
validated_data_dict['extras'].append({'key': key, 'value': value})
# The commented line isn't cataloged correctly, if we pass the
# basic key the extras are prepended and the system works as
# expected.
# pkg_dict['extras_{0}'.format(key)] = value
# Add harvest extras to main indexed pkg_dict
for key, value in harvest_extras:
if key not in pkg_dict.keys():
pkg_dict[key] = value
pkg_dict['data_dict'] = json.dumps(data_dict)
pkg_dict['validated_data_dict'] = json.dumps(validated_data_dict)
pkg_dict["data_dict"] = json.dumps(data_dict)
pkg_dict["validated_data_dict"] = json.dumps(validated_data_dict)
return pkg_dict
def after_show(self, context, data_dict):
def after_dataset_show(self, context, data_dict):
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
if "type" in data_dict and data_dict["type"] == DATASET_TYPE_NAME:
# This is a harvest source dataset, add extra info from the
# HarvestSource object
source = HarvestSource.get(data_dict['id'])
source = HarvestSource.get(data_dict["id"])
if not source:
log.error('Harvest source not found for dataset {0}'.format(data_dict['id']))
log.error(
"Harvest source not found for dataset {0}".format(data_dict["id"])
)
return data_dict
st_action_name = 'harvest_source_show_status'
st_action_name = "harvest_source_show_status"
try:
status_action = p.toolkit.get_action(st_action_name)
except KeyError:
logic.clear_actions_cache()
status_action = p.toolkit.get_action(st_action_name)
data_dict['status'] = status_action(context, {'id': source.id})
data_dict["status"] = status_action(context, {"id": source.id})
return data_dict
@ -198,11 +246,10 @@ class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTransla
Returns the schema for mapping package data from a form to a format
suitable for the database.
'''
from ckanext.harvest.logic.schema import harvest_source_create_package_schema
from ckanext.harvest.logic.schema import harvest_source_create_package_schema, unicode_safe
schema = harvest_source_create_package_schema()
if self.startup:
schema['id'] = [text_type]
schema['id'] = [unicode_safe]
return schema
def update_package_schema(self):
@ -237,38 +284,10 @@ class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTransla
self.startup = False
def update_config(self, config):
if not p.toolkit.check_ckan_version(min_version='2.0'):
assert 0, 'CKAN before 2.0 not supported by ckanext-harvest - '\
'genshi templates not supported any more'
if p.toolkit.asbool(config.get('ckan.legacy_templates', False)):
log.warn('Old genshi templates not supported any more by '
'ckanext-harvest so you should set ckan.legacy_templates '
'option to True any more.')
p.toolkit.add_template_directory(config, '../templates')
p.toolkit.add_public_directory(config, '../public')
p.toolkit.add_resource('../fanstatic_library', 'ckanext-harvest')
p.toolkit.add_resource('../public/ckanext/harvest/javascript', 'harvest-extra-field')
if p.toolkit.check_ckan_version(min_version='2.9.0'):
mappings = config.get('ckan.legacy_route_mappings') or {}
if mappings and isinstance(mappings, string_types):
mappings = json.loads(mappings)
mappings.update({
'harvest_read': 'harvest.read',
'harvest_edit': 'harvest.edit',
})
bp_routes = [
"delete", "refresh", "admin", "about",
"clear", "job_list", "job_show_last", "job_show",
"job_abort", "object_show"
]
mappings.update({
'harvest_' + route: 'harvester.' + route
for route in bp_routes
})
# https://github.com/ckan/ckan/pull/4521
config['ckan.legacy_route_mappings'] = json.dumps(mappings)
p.toolkit.add_template_directory(config, 'templates')
p.toolkit.add_public_directory(config, 'public')
p.toolkit.add_resource('assets', 'ckanext-harvest')
p.toolkit.add_resource('public/ckanext/harvest/javascript', 'harvest-extra-field')
# IActions
@ -300,7 +319,6 @@ class Harvest(MixinPlugin, p.SingletonPlugin, DefaultDatasetForm, DefaultTransla
'harvest_frequencies': harvest_helpers.harvest_frequencies,
'link_for_harvest_object': harvest_helpers.link_for_harvest_object,
'harvest_source_extra_fields': harvest_helpers.harvest_source_extra_fields,
'bootstrap_version': harvest_helpers.bootstrap_version,
'get_harvest_source': harvest_helpers.get_harvest_source,
}

View File

@ -1,20 +0,0 @@
# -*- coding: utf-8 -*-
import ckan.plugins as p
import ckanext.harvest.cli as cli
import ckanext.harvest.views as views
class MixinPlugin(p.SingletonPlugin):
p.implements(p.IClick)
p.implements(p.IBlueprint)
# IClick
def get_commands(self):
return cli.get_commands()
# IBlueprint
def get_blueprint(self):
return views.get_blueprints()

View File

@ -1,88 +0,0 @@
# -*- coding: utf-8 -*-
import ckan.plugins as p
from ckanext.harvest.utils import DATASET_TYPE_NAME
class MixinPlugin(p.SingletonPlugin):
p.implements(p.IRoutes, inherit=True)
# IRoutes
def before_map(self, map):
# Most of the routes are defined via the IDatasetForm interface
# (ie they are the ones for a package type)
controller = "ckanext.harvest.controllers.view:ViewController"
map.connect(
"{0}_delete".format(DATASET_TYPE_NAME),
"/" + DATASET_TYPE_NAME + "/delete/:id",
controller=controller,
action="delete",
)
map.connect(
"{0}_refresh".format(DATASET_TYPE_NAME),
"/" + DATASET_TYPE_NAME + "/refresh/:id",
controller=controller,
action="refresh",
)
map.connect(
"{0}_admin".format(DATASET_TYPE_NAME),
"/" + DATASET_TYPE_NAME + "/admin/:id",
controller=controller,
action="admin",
)
map.connect(
"{0}_about".format(DATASET_TYPE_NAME),
"/" + DATASET_TYPE_NAME + "/about/:id",
controller=controller,
action="about",
)
map.connect(
"{0}_clear".format(DATASET_TYPE_NAME),
"/" + DATASET_TYPE_NAME + "/clear/:id",
controller=controller,
action="clear",
)
map.connect(
"harvest_job_list",
"/" + DATASET_TYPE_NAME + "/{source}/job",
controller=controller,
action="list_jobs",
)
map.connect(
"harvest_job_show_last",
"/" + DATASET_TYPE_NAME + "/{source}/job/last",
controller=controller,
action="show_last_job",
)
map.connect(
"harvest_job_show",
"/" + DATASET_TYPE_NAME + "/{source}/job/{id}",
controller=controller,
action="show_job",
)
map.connect(
"harvest_job_abort",
"/" + DATASET_TYPE_NAME + "/{source}/job/{id}/abort",
controller=controller,
action="abort_job",
)
map.connect(
"harvest_object_show",
"/" + DATASET_TYPE_NAME + "/object/:id",
controller=controller,
action="show_object",
)
map.connect(
"harvest_object_for_dataset_show",
"/dataset/harvest_object/:id",
controller=controller,
action="show_object",
ref_type="dataset",
)
return map

View File

@ -2,8 +2,5 @@
{% block styles %}
{{ super() }}
{% set type = 'asset' if h.ckan_version().split('.')|map('int')|list >= [2, 9, 0] else 'resource' %}
{% include 'harvest/snippets/harvest_' ~ type ~ '.html' %}
{% asset 'ckanext-harvest/harvest_css' %}
{% endblock %}

View File

@ -1 +0,0 @@
{% asset 'ckanext-harvest/harvest_css' %}

View File

@ -1 +0,0 @@
{% asset 'harvest-extra-field/main' %}

View File

@ -1 +0,0 @@
{% resource 'harvest-extra-field/main' %}

View File

@ -1 +0,0 @@
{% resource 'ckanext-harvest/styles/harvest.css' %}

View File

@ -1,7 +1,7 @@
{% set authorized_user = h.check_access('harvest_source_create') %}
{% if authorized_user %}
<a href="{{ h.url_for('{0}_new'.format(dataset_type)) }}" class="btn btn-primary">
<a href="{{ h.url_for('{0}.new'.format(dataset_type)) }}" class="btn btn-primary">
<i class="fa fa-plus-square icon-plus-sign-alt"></i>
{{ _('Add Harvest Source') }}
</a>

View File

@ -1,7 +1,7 @@
{#
Displays a table with a summary of the most common errors for a job
error_summary - List of tuples with (message, count)
error_summary - List of dicts with message and error_count
Example:
@ -22,8 +22,8 @@ Example:
<tbody>
{% for error in summary %}
<tr>
<td class="count">{{ error[1] }}</td>
<td>{{ error[0] }}</td>
<td class="count">{{ error["error_count"] }}</td>
<td>{{ error["message"] }}</td>
</tr>
{% endfor %}
</tbody>

View File

@ -14,16 +14,21 @@ Example:
{% snippet 'snippets/source_item.html', source=sources[0] %}
#}
{% set ckan_version = h.ckan_version().split('.')[1] %}
{% set truncate = truncate or 180 %}
{% set truncate_title = truncate_title or 80 %}
{% set title = source.title or source.name %}
{% set source_type = h.get_pkg_dict_extra(source, 'source_type') %}
{% set url = h.url_for('harvest_admin', id=source.name) if within_organization else h.url_for('harvest_read', id=source.name) %}
{% set url = h.url_for('harvest_admin', id=source.name) if within_organization else h.url_for('harvest.read', id=source.name) %}
<li class="{{ item_class or "dataset-item" }}">
<div class="dataset-content">
<h3 class="dataset-heading">
{{ h.link_to(title|truncate(truncate_title), url) }}
{% if ckan_version | int >= 9 %}
{{ h.link_to(title|truncate(truncate_title), url) }}
{% else %}
{{ h.link_to(h.truncate(title, truncate_title), url) }}
{% endif %}
{% if source.get(state, '').startswith('draft') %}
<span class="label label-info">{{ _('Draft') }}</span>
{% elif source.get(state, '').startswith('deleted') %}
@ -42,7 +47,7 @@ Example:
{{ _('Datasets') }}: {{ source.status.total_datasets }}
{% endif %}
{% if not within_organization and source.organization %}
&mdash; {{ _('Organization') }}: {{ h.link_to(source.organization.title or source.organization.name, h.url_for('organization_read', id=source.organization.name)) }}</a>
&mdash; {{ _('Organization') }}: {{ h.link_to(source.organization.title or source.organization.name, h.url_for('organization.read', id=source.organization.name)) }}</a>
{% endif %}
</p>

View File

@ -6,7 +6,7 @@
{% if harvest_source.status and harvest_source.status.last_job %}
{% snippet "snippets/job_details.html", job=harvest_source.status.last_job %}
<div class="form-actions">
<a href="{{ h.url_for('harvest_job_show_last', source=harvest_source.name) }}" class="btn pull-right btn-default">
<a href="{{ h.url_for('harvester.job_show_last', source=harvest_source.name) }}" class="btn pull-right btn-default">
<i class="fa fa-briefcase icon-briefcase"></i>
{{ _('View full job report') }}
</a>

View File

@ -7,58 +7,38 @@
<li class="active"><a href="">{{ _('Admin') }}</a></li>
{% endblock %}
{% block action_links %}
{% block content_action %}
<div class="content_action btn-group">
{% if harvest_source.status and harvest_source.status.last_job and (harvest_source.status.last_job.status == 'New' or harvest_source.status.last_job.status == 'Running') %}
<a class="btn btn-default disabled" rel="tooltip" title="There already is an unrun job for this source"><i class="fa fa-lg fa-refresh icon-refresh icon-large"></i> Reharvest</a>
{% else %}
{% set locale = h.dump_json({'content': _('This will re-run the harvesting for this source. Any updates at the source will overwrite the local datasets. Sources with a large number of datasets may take a significant amount of time to finish harvesting. Please confirm you would like us to start reharvesting.')}) %}
<a href="{{ h.url_for('harvest_refresh', id=harvest_source.id) }}" class="btn btn-default" data-module="confirm-action" data-module-i18n="{{ locale }}"
<a href="{{ h.url_for('harvester.refresh', id=harvest_source.id) }}" class="btn btn-default" data-module="confirm-action" data-module-i18n="{{ locale }}"
title="{{ _('Start a new harvesting job for this harvest source now') }}">
<i class="fa fa-refresh icon-refresh"></i>
{{ _('Reharvest') }}
</a>
{% endif %}
{% if harvest_source.status and harvest_source.status.last_job and (harvest_source.status.last_job.status == 'Running') %}
<a href="{{ h.url_for('harvest_job_abort', source=harvest_source.name, id=harvest_source.status.last_job.id) }}" class="btn btn-default" title="Stop this Job">
<a href="{{ h.url_for('harvester.job_abort', source=harvest_source.name, id=harvest_source.status.last_job.id) }}" class="btn btn-default" title="Stop this Job">
<i class="fa fa-ban icon-ban-circle"></i>
{{ _('Stop') }}
</a>
{% endif %}
{% set locale = h.dump_json({'content': _('Warning: This will remove all datasets for this source, as well as all previous job reports. Are you sure you want to continue?')}) %}
<a href="{{ h.url_for('harvest_clear', id=harvest_source.id) }}" class="btn btn-default" data-module="confirm-action" data-module-i18n="{{ locale }}"
<a href="{{ h.url_for('harvester.clear', id=harvest_source.id) }}" class="btn btn-default" data-module="confirm-action" data-module-i18n="{{ locale }}"
title="{{ _('Delete all harvest jobs and existing datasets from this source') }}">
{{ _('Clear') }}
</a>
<a href="{{ h.url_for('{0}_read'.format(c.dataset_type), id=harvest_source.id) }}" class="btn btn-default">
<a href="{{ h.url_for('{0}.read'.format(c.dataset_type), id=harvest_source.id) }}" class="btn btn-default">
<i class="fa fa-eye eye-open"></i>
{{ _('View harvest source') }}
</a>
{% endblock %}
{# CKAN 2.0 #}
{% block actions_content %}
{{ self.action_links() }}
{% endblock %}
{# CKAN 2.1 #}
{% block content_action %}
<div class="content_action btn-group">
{{ self.action_links() }}
</div>
</div>
{% endblock %}
{% block page_header_tabs %}
{{ h.build_nav_icon('{0}_admin'.format(c.dataset_type), _('Dashboard'), id=harvest_source.name, icon='dashboard') }}
{{ h.build_nav_icon('harvest_job_list'.format(c.dataset_type), _('Jobs'), source=harvest_source.name, icon='reorder') }}
{{ h.build_nav_icon('{0}_edit'.format(c.dataset_type), _('Edit'), id=harvest_source.name, icon='edit') }}
{{ h.build_nav_icon('harvester.admin', _('Dashboard'), id=harvest_source.name, icon='dashboard') }}
{{ h.build_nav_icon('harvester.job_list', _('Jobs'), source=harvest_source.name, icon='reorder') }}
{{ h.build_nav_icon(c.dataset_type ~ '.edit', _('Edit'), id=harvest_source.name, icon='edit') }}
{% endblock %}

View File

@ -6,12 +6,7 @@
<div class="module-content">
{% block form %}
{% if c.form %}
{# CKAN < 2.3 #}
{{ c.form | safe }}
{% else %}
{{- h.snippet(form_snippet, c=c, **form_vars) -}}
{% endif %}
{{- h.snippet(form_snippet, c=c, **form_vars) -}}
{% endblock %}
</div>
{% endblock %}

View File

@ -16,7 +16,7 @@
<li class="dataset-item">
<div class="dataset-content">
<h3 class="dataset-heading">
<a href="{{ h.url_for('harvest_job_show', source=harvest_source.name, id=job.id) }}">
<a href="{{ h.url_for('harvester.job_show', source=harvest_source.name, id=job.id) }}">
{{ _('Job: ') }} {{ job.id }}
</a>
{% if job.status != 'Finished' %}

View File

@ -6,7 +6,7 @@
<div class="module-content">
<p class="pull-right">
{{ h.nav_link(_('Back to job list'), named_route='harvest_job_list', source=harvest_source.name, class_='btn btn-default', icon='arrow-left')}}
{{ h.nav_link(_('Back to job list'), named_route='harvester.job_list', source=harvest_source.name, class_='btn btn-default', icon='arrow-left')}}
</p>
<h1>{{ _('Job Report') }}</h1>
@ -69,7 +69,7 @@
{{ _('Remote content') }}
</a>
{% endif %}
<a href="{{ h.url_for('harvest_object_show', id=harvest_object_id) }}" class="btn btn-small">
<a href="{{ h.url_for('harvester.object_show', id=harvest_object_id) }}" class="btn btn-small">
{{ _('Local content') }}
</a>

View File

@ -13,12 +13,7 @@
{% block primary_content %}
<section class="module">
<div class="module-content">
{% if c.form %}
{# CKAN < 2.3 #}
{{ c.form | safe }}
{% else %}
{{- h.snippet(form_snippet, c=c, **form_vars) -}}
{% endif %}
{{- h.snippet(form_snippet, c=c, **form_vars) -}}
</div>
</section>
{% endblock %}

View File

@ -1,10 +1,8 @@
{% import 'macros/form.html' as form %}
{% asset 'harvest-extra-field/main' %}
{% set type = 'asset' if h.ckan_version().split('.')|map('int')|list >= [2, 9, 0] else 'resource' %}
{% include 'harvest/snippets/harvest_extra_field_' ~ type ~ '.html' %}
<form id="source-new" class="form-horizontal dataset-form {{ h.bootstrap_version() }}" method="post" >
<form id="source-new" class="form-horizontal" method="post" >
{% block errors %}{{ form.errors(error_summary) }}{% endblock %}
@ -14,10 +12,12 @@
</span>
{% endcall %}
{{ h.csrf_input() if 'csrf_input' in h }}
{{ form.input('title', id='field-title', label=_('Title'), placeholder=_('eg. A descriptive title'), value=data.title, error=errors.title, classes=['control-full'], attrs={'data-module': 'slug-preview-target'}) }}
{% set prefix = 'harvest' %}
{% set domain = h.url_for('{0}_read'.format(c.dataset_type), id='', qualified=true) %}
{% set domain = h.url_for('{0}.read'.format(c.dataset_type), id='', qualified=true) %}
{% set domain = domain|replace("http://", "")|replace("https://", "") %}
{% set attrs = {'data-module': 'slug-preview-slug', 'data-module-prefix': domain, 'data-module-placeholder': '<harvest-source>'} %}

View File

@ -1,27 +1,13 @@
{% extends "source/base.html" %}
{% block admin_link %}
{% if h.check_access('harvest_source_update', {'id':harvest_source.id }) %}
{{ h.nav_link(_('Admin'), named_route='{0}_admin'.format(c.dataset_type), id=harvest_source.name, class_='btn btn-primary', icon='wrench')}}
{% endif %}
{% endblock %}
{# CKAN 2.0 #}
{% block actions_content %}
{% if authorized_user %}
<li>{{ self.admin_link() }}</li>
{% endif %}
{% endblock %}
{# TODO: once #354 is merged in CKAN core .profile-info doesn't exist #}
{% block secondary_content %}
<div class="module context-info profile-info">
<div class="module context-info">
<section class="module-content">
<h1 class="heading">{{ harvest_source.title }}</h1>
{% if harvest_source.notes %}
<p>
{{ h.markdown_extract(harvest_source.notes, 180) }}
{{ h.nav_link(_('read more'), named_route='{0}_about'.format(c.dataset_type), id=harvest_source.name) }}
{{ h.nav_link(_('read more'), named_route='harvester.about', id=harvest_source.name) }}
</p>
{% else %}
<p class="empty">{{ _('There is no description for this harvest source') }}</p>
@ -37,18 +23,20 @@
{% endblock %}
{% block primary_content %}
<article class="module prose">
<article class="module">
{% block page_header %}
<header class="module-content page-header">
{% block content_action %}
<div class="content_action">
{{ self.admin_link() }}
{% if h.check_access('harvest_source_update', {'id':harvest_source.id }) %}
{{ h.nav_link(_('Admin'), named_route='harvester.admin', id=harvest_source.name, class_='btn btn-primary', icon='wrench')}}
{% endif %}
</div>
{% endblock %}
<ul class="nav nav-tabs">
{% block page_header_tabs %}
{{ h.build_nav_icon('{0}_read'.format(c.dataset_type), _('Datasets'), id=harvest_source.name, icon='sitemap') }}
{{ h.build_nav_icon('{0}_about'.format(c.dataset_type), _('About'), id=harvest_source.name, icon='info-sign') }}
{{ h.build_nav_icon(c.dataset_type ~ '.read', _('Datasets'), id=harvest_source.name, icon='sitemap') }}
{{ h.build_nav_icon('harvester.about', _('About'), id=harvest_source.name, icon='info-sign') }}
{% endblock %}
</ul>
</header>

View File

@ -8,23 +8,7 @@
{% endblock %}
{% if g.ckan_base_version.startswith('2.0') %}
{# CKAN 2.0 #}
{% block add_action_content %}
{{ h.snippet('snippets/add_source_button.html', dataset_type=c.dataset_type) }}
{% endblock %}
{% endif %}
{% block primary_content %}
{% if g.ckan_base_version.startswith('2.0') %}
{# CKAN 2.0 #}
{% include 'source/search_2.0.html' %}
{% else %}
{# > CKAN 2.0 #}
<section class="module">
<div class="module-content">
{% block page_primary_action %}
@ -47,7 +31,7 @@
(_('Last Modified'), 'metadata_modified desc'),
(_('Popular'), 'views_recent desc') if g.tracking_enabled else (false, false) ]
%}
{% snippet 'snippets/search_form.html', type='harvest', query=c.q, sorting=sorting, sorting_selected=c.sort_by_selected, count=c.page.item_count, facets=facets, show_empty=request.params, error=c.query_error, placeholder=_("Search harvest sources...") %}
{% snippet 'snippets/search_form.html', type='harvest', query=c.q, sorting=sorting, sorting_selected=c.sort_by_selected, count=c.page.item_count, facets=facets, show_empty=request.args, error=c.query_error, placeholder=_("Search harvest sources...") %}
{{ h.snippet('snippets/source_list.html', sources=c.page.items, show_organization=true) }}
@ -56,14 +40,12 @@
{{ c.page.pager(q=c.q) }}
</section>
{% endif %}
{% endblock %}
{% endblock %}
{% block secondary_content %}
{% for facet in c.facet_titles %}
{{ h.snippet('snippets/facet_list.html', title=c.facet_titles[facet], name=facet, alternative_url=h.url_for('{0}_search'.format(c.dataset_type))) }}
{{ h.snippet('snippets/facet_list.html', title=c.facet_titles[facet], name=facet, alternative_url=h.url_for('{0}.search'.format(c.dataset_type))) }}
{% endfor %}
{% endblock %}

View File

@ -3,17 +3,12 @@ from __future__ import print_function
import json
import re
import copy
import six
from six.moves.urllib.parse import unquote_plus
from urllib.parse import unquote_plus
from threading import Thread
if six.PY2:
from SimpleHTTPServer import SimpleHTTPRequestHandler
from SocketServer import TCPServer
else:
from http.server import SimpleHTTPRequestHandler
from socketserver import TCPServer
from http.server import SimpleHTTPRequestHandler
from socketserver import TCPServer
PORT = 8998
@ -171,10 +166,7 @@ class MockCkanHandler(SimpleHTTPRequestHandler):
def get_url_params(self):
params_str = self.path.split('?')[-1]
if six.PY2:
params_unicode = unquote_plus(params_str).decode('utf8')
else:
params_unicode = unquote_plus(params_str)
params_unicode = unquote_plus(params_str)
params = params_unicode.split('&')
return dict([param.split('=') for param in params])

View File

@ -1,7 +1,11 @@
import re
import pytest
from mock import patch
try:
from unittest.mock import patch
except ImportError:
from mock import patch
from ckanext.harvest.harvesters.base import HarvesterBase, munge_tag
from ckantoolkit.tests import factories

View File

@ -2,7 +2,10 @@ from __future__ import absolute_import
import copy
import json
from mock import patch, MagicMock, Mock
try:
from unittest.mock import patch, MagicMock, Mock
except ImportError:
from mock import patch, MagicMock, Mock
import pytest
from requests.exceptions import HTTPError, RequestException
@ -320,11 +323,10 @@ class TestCkanHarvester(object):
config=json.dumps(config))
assert 'default_extras must be a dictionary' in str(harvest_context.value)
@patch('ckanext.harvest.harvesters.ckanharvester.pyopenssl.inject_into_urllib3')
@patch('ckanext.harvest.harvesters.ckanharvester.CKANHarvester.config')
@patch('ckanext.harvest.harvesters.ckanharvester.requests.get', side_effect=RequestException('Test.value'))
def test_get_content_handles_request_exception(
self, mock_requests_get, mock_config, mock_pyopenssl_inject
self, mock_requests_get, mock_config
):
mock_config.return_value = {}
@ -342,11 +344,10 @@ class TestCkanHarvester(object):
self.request = Mock()
self.request.url = "http://test.example.gov.uk"
@patch('ckanext.harvest.harvesters.ckanharvester.pyopenssl.inject_into_urllib3')
@patch('ckanext.harvest.harvesters.ckanharvester.CKANHarvester.config')
@patch('ckanext.harvest.harvesters.ckanharvester.requests.get', side_effect=MockHTTPError())
def test_get_content_handles_http_error(
self, mock_requests_get, mock_config, mock_pyopenssl_inject
self, mock_requests_get, mock_config
):
mock_config.return_value = {}

View File

@ -81,7 +81,7 @@ class HarvestSourceActionBase():
with pytest.raises(ValidationError) as e:
helpers.call_action(self.action, **source_dict)
for key in ('name', 'title', 'url', 'source_type'):
for key in ('name', 'url', 'source_type'):
assert e.value.error_dict[key] == [u'Missing value']
def test_invalid_unknown_type(self):
@ -180,7 +180,6 @@ class TestHarvestSourceActionUpdate(HarvestSourceFixtureMixin,
"frequency": "MONTHLY",
"config": json.dumps({"custom_option": ["c", "d"]})
})
result = helpers.call_action(
'harvest_source_update', **source_dict)
@ -756,3 +755,35 @@ class TestActions():
assert job['status'] == 'Running'
assert job['gather_started'] is None
assert 'stats' in job.keys()
def test_harvest_source_show_status(self):
source = factories.HarvestSourceObj(**SOURCE_DICT.copy())
job = factories.HarvestJobObj(source=source)
dataset = ckan_factories.Dataset()
obj = factories.HarvestObjectObj(
job=job, source=source, package_id=dataset['id'])
harvest_gather_error = harvest_model.HarvestGatherError(message="Unexpected gather error", job=job)
harvest_gather_error.save()
harvest_object_error = harvest_model.HarvestObjectError(message="Unexpected object error", object=obj)
harvest_object_error.save()
context = {'model': model}
data_dict = {'id': source.id}
source_status = get_action('harvest_source_show_status')(context, data_dict)
# verifiy that the response is dictized properly
json.dumps(source_status)
last_job = source_status['last_job']
assert last_job['source_id'] == source.id
assert last_job['status'] == 'New'
assert last_job['stats']['errored'] == 2
assert len(last_job['object_error_summary']) == 1
assert last_job['object_error_summary'][0]['message'] == harvest_object_error.message
assert last_job['object_error_summary'][0]['error_count'] == 1
assert len(last_job['gather_error_summary']) == 1
assert last_job['gather_error_summary'][0]['message'] == harvest_gather_error.message
assert last_job['gather_error_summary'][0]['error_count'] == 1

View File

@ -1,4 +1,3 @@
import six
import pytest
from ckantoolkit import url_for
@ -6,20 +5,9 @@ from ckantoolkit.tests import factories
from ckanext.harvest.tests import factories as harvest_factories
def _assert_in_body(string, response):
if six.PY2:
assert string in response.body.decode('utf8')
else:
assert string in response.body
@pytest.mark.usefixtures('clean_db', 'clean_index', 'harvest_setup')
class TestBlueprint():
def setup(self):
sysadmin = factories.Sysadmin()
self.extra_environ = {'REMOTE_USER': sysadmin['name'].encode('ascii')}
def test_index_page_is_rendered(self, app):
source1 = harvest_factories.HarvestSource()
@ -27,87 +15,106 @@ class TestBlueprint():
response = app.get(u'/harvest')
_assert_in_body(source1['title'], response)
_assert_in_body(source2['title'], response)
assert source1['title'] in response.body
assert source2['title'] in response.body
def test_new_form_is_rendered(self, app):
url = url_for('harvest_new')
url = url_for('harvest.new')
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
response = app.get(url, extra_environ=env)
_assert_in_body('<form id="source-new"', response)
assert '<form id="source-new"' in response.body
def test_edit_form_is_rendered(self, app):
source = harvest_factories.HarvestSource()
url = url_for('harvest_edit', id=source['id'])
url = url_for('harvest.edit', id=source['id'])
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
response = app.get(url, extra_environ=env)
_assert_in_body('<form id="source-new"', response)
assert '<form id="source-new"' in response.body
def test_source_page_rendered(self, app):
source = harvest_factories.HarvestSource()
url = url_for('harvest_read', id=source['name'])
url = url_for('harvest.read', id=source['name'])
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
response = app.get(url, extra_environ=env)
_assert_in_body(source['name'], response)
assert source['name'] in response.body
def test_admin_page_rendered(self, app):
source_obj = harvest_factories.HarvestSourceObj()
job = harvest_factories.HarvestJob(source=source_obj)
url = url_for('harvest_admin', id=source_obj.id)
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
url = url_for('harvester.admin', id=source_obj.id)
_assert_in_body(source_obj.title, response)
response = app.get(url, extra_environ=env)
_assert_in_body(job['id'], response)
assert source_obj.title in response.body
assert job['id'] in response.body
def test_about_page_rendered(self, app):
source = harvest_factories.HarvestSource()
url = url_for('harvest_about', id=source['name'])
url = url_for('harvester.about', id=source['name'])
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
response = app.get(url, extra_environ=env)
_assert_in_body(source['name'], response)
assert source['name'] in response.body
def test_job_page_rendered(self, app):
job = harvest_factories.HarvestJob()
url = url_for('harvest_job_list', source=job['source_id'])
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
url = url_for('harvester.job_list', source=job['source_id'])
_assert_in_body(job['id'], response)
response = app.get(url, extra_environ=env)
assert job['id'] in response.body
def test_job_show_last_page_rendered(self, app):
job = harvest_factories.HarvestJob()
url = url_for('harvest_job_show_last', source=job['source_id'])
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
url = url_for('harvester.job_show_last', source=job['source_id'])
_assert_in_body(job['id'], response)
response = app.get(url, extra_environ=env)
assert job['id'] in response.body
def test_job_show_page_rendered(self, app):
job = harvest_factories.HarvestJob()
url = url_for(
'harvest_job_show', source=job['source_id'], id=job['id'])
'harvester.job_show', source=job['source_id'], id=job['id'])
sysadmin = factories.Sysadmin()
env = {"REMOTE_USER": sysadmin['name'].encode('ascii')}
response = app.get(url, extra_environ=self.extra_environ)
response = app.get(url, extra_environ=env)
_assert_in_body(job['id'], response)
assert job['id'] in response.body

View File

@ -1,5 +1,8 @@
import pytest
from mock import patch
try:
from unittest.mock import patch
except ImportError:
from mock import patch
from ckanext.harvest.model import HarvestObject, HarvestObjectExtra
from ckanext.harvest.interfaces import IHarvester

View File

@ -10,22 +10,12 @@ import xml.etree.ElementTree as etree
import ckan.lib.helpers as h
import ckan.plugins.toolkit as tk
import six
from ckan import model
from ckantoolkit import _
from six import StringIO
from io import StringIO
from ckanext.harvest.logic import HarvestJobExists, HarvestSourceInactiveError
try:
# Python 2.7
xml_parser_exception = etree.ParseError
except AttributeError:
# Python 2.6
from xml.parsers import expat
xml_parser_exception = expat.ExpatError
log = logging.getLogger(__name__)
@ -394,8 +384,7 @@ def run_test_harvester(source_id_or_name, force_import):
if running_jobs:
print('\nSource "{0}" apparently has a "Running" job:\n{1}'.format(
source.get("name") or source["id"], running_jobs))
resp = six.moves.input("Abort it? (y/n)")
resp = input("Abort it? (y/n)")
if not resp.lower().startswith("y"):
sys.exit(1)
job_dict = tk.get_action("harvest_job_abort")(
@ -517,7 +506,7 @@ def _get_source_for_job(source_id):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except Exception as e:
msg = 'An error occurred: [%s]' % str(e)
return tk.abort(500, msg)
@ -537,7 +526,7 @@ def admin_view(id):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
def job_show_last_view(source):
@ -579,7 +568,7 @@ def job_show_view(id, source_dict=False, is_last=False):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest job not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except Exception as e:
msg = 'An error occurred: [%s]' % str(e)
return tk.abort(500, msg)
@ -607,7 +596,7 @@ def job_list_view(source):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except Exception as e:
msg = 'An error occurred: [%s]' % str(e)
return tk.abort(500, msg)
@ -625,7 +614,7 @@ def about_view(id):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
def job_abort_view(source, id):
@ -638,13 +627,13 @@ def job_abort_view(source, id):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest job not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except Exception as e:
msg = 'An error occurred: [%s]' % str(e)
return tk.abort(500, msg)
return h.redirect_to(
h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=source))
h.url_for('harvester.admin', id=source))
def refresh_view(id):
@ -659,7 +648,7 @@ def refresh_view(id):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except HarvestSourceInactiveError:
h.flash_error(
_('Cannot create new harvest jobs on inactive '
@ -674,7 +663,7 @@ def refresh_view(id):
h.flash_error(msg)
return h.redirect_to(
h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id))
h.url_for('harvester.admin', id=id))
def clear_view(id):
@ -685,24 +674,20 @@ def clear_view(id):
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except Exception as e:
msg = 'An error occurred: [%s]' % str(e)
h.flash_error(msg)
return h.redirect_to(
h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id))
h.url_for('harvester.admin', id=id))
def delete_view(id):
try:
context = {'model': model, 'user': tk.c.user}
context['clear_source'] = tk.request.params.get('clear',
'').lower() in (
u'true',
u'1',
)
clear = tk.request.args.get('clear', '').lower()
context['clear_source'] = clear in ('true', '1', )
tk.get_action('harvest_source_delete')(context, {'id': id})
@ -712,11 +697,11 @@ def delete_view(id):
h.flash_success(_('Harvesting source successfully inactivated'))
return h.redirect_to(
h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id))
h.url_for('harvester.admin', id=id))
except tk.ObjectNotFound:
return tk.abort(404, _('Harvest source not found'))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
def object_show_view(id, ref_type, response):
@ -748,7 +733,7 @@ def object_show_view(id, ref_type, response):
if '<?xml' not in content.split('\n')[0]:
content = u'<?xml version="1.0" encoding="UTF-8"?>\n' + content
except xml_parser_exception:
except etree.ParseError:
try:
json.loads(obj['content'])
response.content_type = 'application/json; charset=utf-8'
@ -757,12 +742,16 @@ def object_show_view(id, ref_type, response):
pass
response.headers['Content-Length'] = len(content)
return (response, six.ensure_str(content))
if isinstance(content, bytes):
content = content.decode("utf-8")
return (response, content)
except tk.ObjectNotFound as e:
return tk.abort(404, _(str(e)))
except tk.NotAuthorized:
return tk.abort(401, _not_auth_message())
return tk.abort(403, _not_auth_message())
except Exception as e:
msg = 'An error occurred: [%s]' % str(e)
return tk.abort(500, msg)

View File

@ -5,6 +5,8 @@ from flask import Blueprint, make_response
import ckanext.harvest.utils as utils
# IDatasetForm provides a "harvest" blueprint for the package type harvest.
# We name the extension blueprint "harvester" to avoid clashing of names.
harvester = Blueprint("harvester", __name__)

View File

@ -1,6 +0,0 @@
ckantoolkit==0.0.3
pika>=1.1.0
pyOpenSSL==18.0.0
redis
requests>=2.11.1
six>=1.12.0

1
pip-requirements.txt Symbolic link
View File

@ -0,0 +1 @@
requirements.txt

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
ckantoolkit>=0.0.7
pika>=1.1.0,<1.3.0
redis
requests>=2.11.1

View File

@ -1,6 +1,6 @@
from setuptools import setup, find_packages
version = '1.3.4'
version = '1.5.6'
setup(
name='ckanext-harvest',
@ -19,7 +19,7 @@ setup(
include_package_data=True,
zip_safe=False,
install_requires=[
# dependencies are specified in pip-requirements.txt
# dependencies are specified in requirements.txt
# instead of here
],
tests_require=[