added scraper for production gateways

This commit is contained in:
root 2021-01-12 10:26:39 +01:00
parent 26437ea437
commit 53d6d773aa
1 changed files with 94 additions and 0 deletions

View File

@ -0,0 +1,94 @@
declare variable $url := 'https://services.d4science.org/thematic-gateways';
declare function local:list() {
let $req := <http:request method="get" />
return http:send-request($req, $url)//a[@class='entry-link']/@href/data() ! replace(., '/explore', '')
};
declare %basex:inline function local:get-title($page){
substring-before($page/html/head/title, " -")
};
declare %basex:inline function local:get-favicon-url($page){
$page/html/head/link[@rel="Shortcut Icon"]/string(@href)
};
declare %basex:inline function local:get-background-image($page){
()
};
declare %basex:inline function local:get-logo-url($page){
$page/html/body//h1[contains(@class, "site-title")]//img/string(@src)
};
declare %basex:inline function local:get-logo-alt($page){
$page/html/body//h1[contains(@class, "site-title")]//img/string(@alt)
};
declare %basex:inline function local:get-infrastructure-logo($page){
if(exists($page/html/body//div[contains(@class,"poweredBy-link")]//img)) then "yes" else "no"
};
declare %basex:inline function local:get-terms-url($page){
$page/html/body//footer//a[@href = "/terms-of-use"]/@href
};
declare %basex:inline function local:get-cookiepolicy-url($page){
$page/html/body//footer//a[@href = "/cookie-policy"]/@href
};
declare %basex:inline function local:get-privacypolicy-url($page){
$page/html/body//footer//a[text() = "Privacy Policy"]/@href
};
declare %basex:inline function local:get-project-url($page){
$page/html/body//footer/div[contains(@class, "custom-footer-container")]/a[last()]/@href
};
declare %basex:inline function local:get-project-description($page){
()
};
declare %basex:inline function local:get-ec-logo($page){
if(exists(
$page/html/body//footer/div[not(contains(@class, "custom-footer-container"))]/a[@href = "http://ec.europa.eu/programmes/horizon2020/"]))
then "yes" else "no"
};
declare %basex:inline function local:get-footer($page){
string-join($page/html/body/footer/div/div//text() ! replace(., "'", "''"), "<br/>")
};
declare %basex:inline function local:to-ansible-call($params as map(*)){
string-join(("./keycloak-action.sh inject-theme",
map:keys($params) ! ("-e '" || . || "=&quot;" || $params(.) || "&quot;'")
), " ")
};
declare %basex:inline function local:transform($page-addr){
let $page := html:parse(fetch:text($page-addr))
return
local:to-ansible-call(
map{
"theme" : substring-after($page-addr, "://"),
"title_tag" : local:get-title($page),
"favicon_url" : local:get-favicon-url($page),
"logo_url" : $page-addr || local:get-logo-url($page),
"logo_alt" : local:get-logo-alt($page),
"infrastructure_logo" : local:get-infrastructure-logo($page),
"background_image" : local:get-background-image($page),
"terms_url" : $page-addr || local:get-terms-url($page),
"cookie_policy_url" : $page-addr || local:get-cookiepolicy-url($page),
"privacy_policy_url" : "" || local:get-privacypolicy-url($page),
"project_url" : "" || local:get-project-url($page),
"project_description" : local:get-project-description($page),
"EC_logo" : local:get-ec-logo($page),
"footer" : local:get-footer($page)
}
)
};
for $page-addr in local:list()
return xquery:fork-join(function(){ local:transform($page-addr)})