From 0f4b2094271294306d896216537cf43a5a275b95 Mon Sep 17 00:00:00 2001 From: George Kalampokis Date: Wed, 25 May 2022 17:37:20 +0300 Subject: [PATCH] Add {query} field for external urls with configurable queries and make ORCID query more strict (similar to orcid's website) (ref #7798) --- .../eudat/logic/proxy/config/QueryConfig.java | 33 +++++++++++++++ .../logic/proxy/config/UrlConfiguration.java | 14 +++++++ .../logic/proxy/fetching/RemoteFetcher.java | 42 ++++++++++++++----- .../proxy/fetching/RemoteFetcherUtils.java | 2 +- .../resources/externalUrls/ExternalUrls.xml | 20 ++++++++- 5 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/QueryConfig.java diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/QueryConfig.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/QueryConfig.java new file mode 100644 index 000000000..2e6223ab3 --- /dev/null +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/QueryConfig.java @@ -0,0 +1,33 @@ +package eu.eudat.logic.proxy.config; + +public class QueryConfig { + + private String condition; + private String separator; + private String value; + + + public String getCondition() { + return condition; + } + + public void setCondition(String condition) { + this.condition = condition; + } + + public String getSeparator() { + return separator; + } + + public void setSeparator(String separator) { + this.separator = separator; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/UrlConfiguration.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/UrlConfiguration.java index edbd176f9..e2ca66c48 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/UrlConfiguration.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/UrlConfiguration.java @@ -2,6 +2,8 @@ package eu.eudat.logic.proxy.config; import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlElementWrapper; +import java.util.List; public class UrlConfiguration { @@ -19,6 +21,8 @@ public class UrlConfiguration { private String requestBody = ""; private String filterType = "local"; + private List queries; + public String getKey() { return key; } @@ -120,4 +124,14 @@ public class UrlConfiguration { public void setFilterType(String filterType) { this.filterType = filterType; } + + public List getQueries() { + return queries; + } + + @XmlElementWrapper + @XmlElement(name = "query") + public void setQueries(List queries) { + this.queries = queries; + } } diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java index 92ae7012d..9c1e3ab86 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java @@ -5,10 +5,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; -import eu.eudat.logic.proxy.config.DataUrlConfiguration; -import eu.eudat.logic.proxy.config.ExternalUrlCriteria; -import eu.eudat.logic.proxy.config.FetchStrategy; -import eu.eudat.logic.proxy.config.UrlConfiguration; +import eu.eudat.logic.proxy.config.*; import eu.eudat.logic.proxy.config.configloaders.ConfigLoader; import eu.eudat.logic.proxy.config.entities.GenericUrls; import eu.eudat.logic.proxy.config.exceptions.HugeResultSet; @@ -197,7 +194,7 @@ public class RemoteFetcher { ifFunderQueryExist(urlConfiguration, externalUrlCriteria); if (urlConfiguration.getType() == null || urlConfiguration.getType().equals("External")) { try { - results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType(), urlConfiguration.getFilterType())); + results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType(), urlConfiguration.getFilterType(), urlConfiguration.getQueries())); } catch (Exception e) { logger.error(e.getLocalizedMessage(), e); } @@ -227,13 +224,37 @@ public class RemoteFetcher { } } - protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) { + private String calculateQuery(ExternalUrlCriteria externalUrlCriteria, List queryConfigs) { + String finalQuery = ""; + QueryConfig queryConfig = queryConfigs.stream().filter(queryConfigl -> externalUrlCriteria.getLike().matches(queryConfigl.getCondition())) + .findFirst().orElse(null); + if (queryConfig != null) { + if (queryConfig.getSeparator() != null) { + String[] likes = externalUrlCriteria.getLike().split(queryConfig.getSeparator()); + finalQuery = queryConfig.getValue(); + for (int i = 0; i < likes.length; i++) { + finalQuery = finalQuery.replaceAll("\\{like" + (i+1) + "}", likes[i]); + } + } else { + finalQuery = queryConfig.getValue().replaceAll("\\{like}", externalUrlCriteria.getLike()); + } + + } + return finalQuery; + } + + protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage, List queries) { String completedPath = path; if (externalUrlCriteria.getLike() != null) { if ((path.contains("openaire") || path.contains("orcid") || path.contains("ror")) && externalUrlCriteria.getLike().equals("")) { completedPath = completedPath.replaceAll("\\{like}", "*"); + completedPath = completedPath.replaceAll("\\{query}", "*"); } else { - completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike()); + if (completedPath.contains("{query}")) { + completedPath = completedPath.replaceAll("\\{query}", this.calculateQuery(externalUrlCriteria, queries)); + } else { + completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike()); + } } } else { completedPath = completedPath.replace("{like}", ""); @@ -274,14 +295,15 @@ public class RemoteFetcher { } else { completedPath = completedPath.replace("{path}", ""); } + logger.info(completedPath); return completedPath; } - private List> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType, String filterType) throws Exception { + private List> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType, String filterType, List queries) throws Exception { Set pages = new HashSet<>(); - String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage); - String replacedBody = replaceCriteriaOnUrl(requestBody, externalUrlCriteria, firstPage); + String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage, queries); + String replacedBody = replaceCriteriaOnUrl(requestBody, externalUrlCriteria, firstPage, queries); Results results = getResultsFromUrl(replacedPath, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType); if(filterType != null && filterType.equals("local") && (externalUrlCriteria.getLike() != null && !externalUrlCriteria.getLike().isEmpty())){ diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcherUtils.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcherUtils.java index 6d2d2a281..56f256254 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcherUtils.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcherUtils.java @@ -31,7 +31,7 @@ public class RemoteFetcherUtils { ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria(); externalUrlCriteria.setPath(result.get("path")); externalUrlCriteria.setHost(result.get("host")); - String replacedPath = remoteFetcher.replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage()); + String replacedPath = remoteFetcher.replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage(), jsonDataPath.getUrlConfiguration().getQueries()); return remoteFetcher.getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType(), requestBody, requestType); }).filter(Objects::nonNull).map(results1 -> results1.getResults().get(0)).collect(Collectors.toList()); return new Results(multiResults, new HashMap<>(1, 1)); diff --git a/dmp-backend/web/src/main/resources/externalUrls/ExternalUrls.xml b/dmp-backend/web/src/main/resources/externalUrls/ExternalUrls.xml index b8917c5ab..ffedc38ff 100644 --- a/dmp-backend/web/src/main/resources/externalUrls/ExternalUrls.xml +++ b/dmp-backend/web/src/main/resources/externalUrls/ExternalUrls.xml @@ -873,18 +873,34 @@ but not 2 External - https://pub.sandbox.orcid.org/v3.0/expanded-search/?q={like}&start={page}&rows={pageSize} + https://pub.sandbox.orcid.org/v3.0/expanded-search/?q={query}&start={page}&rows={pageSize} 0 application/json; charset=UTF-8 + + + ^[A-Za-z0-9]+ [A-Za-z0-9]+$ + + ((given-names:{like1}+AND+family-name:{like2})+OR+(given-names:{like2}+AND+family-name:{like1})) + + + \d{4}-\d{4}-\d{4}-\d{4} + orcid:{like} + + + .+ + given-names:{like}+OR+family-name:{like} + + $['expanded-result'][*] 'orcid-id' 'given-names' - 'family-names' + 'family-name' 'name' + api $['num-found']