Merge branch 'master' into dev-v2.0

This commit is contained in:
Adrià Mercader 2023-01-25 10:48:26 +01:00 committed by GitHub
commit 57e2ea40ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 25 additions and 5 deletions

View File

@ -8,7 +8,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.7'
python-version: '3.8'
- name: Install requirements
run: pip install flake8 pycodestyle
- name: Check syntax

View File

@ -235,6 +235,16 @@ apache = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress
,adjacent=False, joinString=' ').setResultsName('date')
)
nginx = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress() \
+ parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url') \
+ parse.SkipTo("</a>", include=True).suppress() \
+ parse.Optional(parse.Literal('</td><td align="right">')).suppress() \
+ parse.Optional(parse.Combine(
parse.Word(parse.alphanums+'-') +
parse.Word(parse.alphanums+':')
,adjacent=False, joinString=' ').setResultsName('date')
)
iis = parse.SkipTo("<br>").suppress() \
+ parse.OneOrMore("<br>").suppress() \
+ parse.Optional(parse.Combine(
@ -252,12 +262,15 @@ other = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress()
scrapers = {'apache': parse.OneOrMore(parse.Group(apache)),
'nginx': parse.OneOrMore(parse.Group(nginx)),
'other': parse.OneOrMore(parse.Group(other)),
'iis': parse.OneOrMore(parse.Group(iis))}
def _get_scraper(server):
if not server or 'apache' in server.lower():
return 'apache'
if 'nginx' in server.lower():
return 'nginx'
if server == 'Microsoft-IIS/7.5':
return 'iis'
else:

View File

@ -56,6 +56,13 @@ class TestHarvestedMetadataAPI(SpatialTestBase):
assert r.headers["Content-Type"] == "application/xml; charset=utf-8"
assert r.body == '<?xml version="1.0" encoding="UTF-8"?>\n<xml>Content 1</xml>'
# Access human-readable view of content
url = "/harvest/object/{0}/html".format(object_id_1)
r = app.get(url, status=200)
assert(
r.headers["Content-Type"] == "text/html; charset=utf-8"
)
# Access original content in object extra (if present)
url = "/harvest/object/{0}/original".format(object_id_1)
r = app.get(url, status=404)

View File

@ -191,11 +191,11 @@ def get_harvest_object_content(id):
return None
def _transform_to_html(content, xslt_package=None, xslt_path=None):
def transform_to_html(content, xslt_package=None, xslt_path=None):
xslt_package = xslt_package or __name__
xslt_path = xslt_path or \
'../templates/ckanext/spatial/gemini2-html-stylesheet.xsl'
'templates/ckanext/spatial/gemini2-html-stylesheet.xsl'
# optimise -- read transform only once and compile rather
# than at each request

View File

@ -7,4 +7,4 @@ argparse
pyparsing>=2.1.10
requests>=1.1.0
six
geojson==2.5.0
geojson==2.5.0

View File

@ -7,4 +7,4 @@ argparse
pyparsing>=2.1.10
requests>=1.1.0
six
geojson==2.5.0
geojson==2.5.0