Add {query} field for external urls with configurable queries and make ORCID query more strict (similar to orcid's website) (ref #7798)

This commit is contained in:
George Kalampokis 2022-05-25 17:37:20 +03:00
parent 3aff6ac90e
commit 0f4b209427
5 changed files with 98 additions and 13 deletions

View File

@ -0,0 +1,33 @@
package eu.eudat.logic.proxy.config;
public class QueryConfig {
private String condition;
private String separator;
private String value;
public String getCondition() {
return condition;
}
public void setCondition(String condition) {
this.condition = condition;
}
public String getSeparator() {
return separator;
}
public void setSeparator(String separator) {
this.separator = separator;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}

View File

@ -2,6 +2,8 @@ package eu.eudat.logic.proxy.config;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import java.util.List;
public class UrlConfiguration {
@ -19,6 +21,8 @@ public class UrlConfiguration {
private String requestBody = "";
private String filterType = "local";
private List<QueryConfig> queries;
public String getKey() {
return key;
}
@ -120,4 +124,14 @@ public class UrlConfiguration {
public void setFilterType(String filterType) {
this.filterType = filterType;
}
public List<QueryConfig> getQueries() {
return queries;
}
@XmlElementWrapper
@XmlElement(name = "query")
public void setQueries(List<QueryConfig> queries) {
this.queries = queries;
}
}

View File

@ -5,10 +5,7 @@ import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.eudat.logic.proxy.config.DataUrlConfiguration;
import eu.eudat.logic.proxy.config.ExternalUrlCriteria;
import eu.eudat.logic.proxy.config.FetchStrategy;
import eu.eudat.logic.proxy.config.UrlConfiguration;
import eu.eudat.logic.proxy.config.*;
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
import eu.eudat.logic.proxy.config.entities.GenericUrls;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
@ -197,7 +194,7 @@ public class RemoteFetcher {
ifFunderQueryExist(urlConfiguration, externalUrlCriteria);
if (urlConfiguration.getType() == null || urlConfiguration.getType().equals("External")) {
try {
results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType(), urlConfiguration.getFilterType()));
results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType(), urlConfiguration.getFilterType(), urlConfiguration.getQueries()));
} catch (Exception e) {
logger.error(e.getLocalizedMessage(), e);
}
@ -227,14 +224,38 @@ public class RemoteFetcher {
}
}
protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) {
private String calculateQuery(ExternalUrlCriteria externalUrlCriteria, List<QueryConfig> queryConfigs) {
String finalQuery = "";
QueryConfig queryConfig = queryConfigs.stream().filter(queryConfigl -> externalUrlCriteria.getLike().matches(queryConfigl.getCondition()))
.findFirst().orElse(null);
if (queryConfig != null) {
if (queryConfig.getSeparator() != null) {
String[] likes = externalUrlCriteria.getLike().split(queryConfig.getSeparator());
finalQuery = queryConfig.getValue();
for (int i = 0; i < likes.length; i++) {
finalQuery = finalQuery.replaceAll("\\{like" + (i+1) + "}", likes[i]);
}
} else {
finalQuery = queryConfig.getValue().replaceAll("\\{like}", externalUrlCriteria.getLike());
}
}
return finalQuery;
}
protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage, List<QueryConfig> queries) {
String completedPath = path;
if (externalUrlCriteria.getLike() != null) {
if ((path.contains("openaire") || path.contains("orcid") || path.contains("ror")) && externalUrlCriteria.getLike().equals("")) {
completedPath = completedPath.replaceAll("\\{like}", "*");
completedPath = completedPath.replaceAll("\\{query}", "*");
} else {
if (completedPath.contains("{query}")) {
completedPath = completedPath.replaceAll("\\{query}", this.calculateQuery(externalUrlCriteria, queries));
} else {
completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike());
}
}
} else {
completedPath = completedPath.replace("{like}", "");
}
@ -274,14 +295,15 @@ public class RemoteFetcher {
} else {
completedPath = completedPath.replace("{path}", "");
}
logger.info(completedPath);
return completedPath;
}
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType, String filterType) throws Exception {
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType, String filterType, List<QueryConfig> queries) throws Exception {
Set<Integer> pages = new HashSet<>();
String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage);
String replacedBody = replaceCriteriaOnUrl(requestBody, externalUrlCriteria, firstPage);
String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage, queries);
String replacedBody = replaceCriteriaOnUrl(requestBody, externalUrlCriteria, firstPage, queries);
Results results = getResultsFromUrl(replacedPath, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType);
if(filterType != null && filterType.equals("local") && (externalUrlCriteria.getLike() != null && !externalUrlCriteria.getLike().isEmpty())){

View File

@ -31,7 +31,7 @@ public class RemoteFetcherUtils {
ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria();
externalUrlCriteria.setPath(result.get("path"));
externalUrlCriteria.setHost(result.get("host"));
String replacedPath = remoteFetcher.replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage());
String replacedPath = remoteFetcher.replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage(), jsonDataPath.getUrlConfiguration().getQueries());
return remoteFetcher.getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType(), requestBody, requestType);
}).filter(Objects::nonNull).map(results1 -> results1.getResults().get(0)).collect(Collectors.toList());
return new Results(multiResults, new HashMap<>(1, 1));

View File

@ -873,18 +873,34 @@ but not
<label>ORCID</label>
<ordinal>2</ordinal>
<type>External</type>
<url>https://pub.sandbox.orcid.org/v3.0/expanded-search/?q={like}&amp;start={page}&amp;rows={pageSize}</url>
<url>https://pub.sandbox.orcid.org/v3.0/expanded-search/?q={query}&amp;start={page}&amp;rows={pageSize}</url>
<firstPage>0</firstPage>
<contenttype>application/json; charset=UTF-8</contenttype>
<queries>
<query>
<condition>^[A-Za-z0-9]+ [A-Za-z0-9]+$</condition>
<separator> </separator>
<value>((given-names:{like1}+AND+family-name:{like2})+OR+(given-names:{like2}+AND+family-name:{like1}))</value>
</query>
<query>
<condition>\d{4}-\d{4}-\d{4}-\d{4}</condition>
<value>orcid:{like}</value>
</query>
<query>
<condition>.+</condition>
<value>given-names:{like}+OR+family-name:{like}</value>
</query>
</queries>
<data>
<path>$['expanded-result'][*]</path>
<fields>
<id>'orcid-id'</id>
<firstName>'given-names'</firstName>
<lastName>'family-names'</lastName>
<lastName>'family-name'</lastName>
<name>'name'</name>
</fields>
</data>
<filterType>api</filterType>
<paginationpath>$['num-found']</paginationpath>
</urlConfig>
<!-- <urlConfig>