argos/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java

241 lines
12 KiB
Java
Raw Normal View History

2018-06-27 12:29:21 +02:00
package eu.eudat.logic.proxy.fetching;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
2018-06-27 12:29:21 +02:00
import eu.eudat.configurations.dynamicproject.DynamicProjectConfiguration;
2018-06-29 10:29:43 +02:00
import eu.eudat.logic.proxy.config.DataUrlConfiguration;
2018-06-27 12:29:21 +02:00
import eu.eudat.logic.proxy.config.FetchStrategy;
2018-06-29 10:29:43 +02:00
import eu.eudat.logic.proxy.config.UrlConfiguration;
2018-06-27 12:29:21 +02:00
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
import eu.eudat.logic.proxy.config.exceptions.NoURLFound;
2018-02-16 11:34:02 +01:00
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
2018-05-28 11:50:42 +02:00
import java.util.stream.Collectors;
2017-11-22 09:57:51 +01:00
@Service
public class RemoteFetcher {
2018-01-17 16:06:35 +01:00
private ConfigLoader configLoader;
2018-03-28 15:24:47 +02:00
private DynamicProjectConfiguration dynamicProjectConfiguration;
2018-01-17 16:06:35 +01:00
2018-03-28 15:24:47 +02:00
@Autowired
public RemoteFetcher(ConfigLoader configLoader, DynamicProjectConfiguration dynamicProjectConfiguration) {
this.configLoader = configLoader;
this.dynamicProjectConfiguration = dynamicProjectConfiguration;
}
2018-01-17 16:06:35 +01:00
@Cacheable("repositories")
2018-05-28 11:50:42 +02:00
public List<Map<String, String>> getRepositories(String query, String key) throws NoURLFound, HugeResultSet {
2018-06-29 10:29:43 +02:00
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRepositories().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
2018-05-28 11:50:42 +02:00
: configLoader.getExternalUrls().getRepositories().getUrls();
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRepositories().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
@Cacheable("projects")
public List<Map<String, String>> getProjects(String query) throws NoURLFound, HugeResultSet {
2018-06-29 10:29:43 +02:00
List<UrlConfiguration> urlConfigs = Arrays.asList(this.dynamicProjectConfiguration.getConfiguration().getMainExternalField().getUrlConfig());
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getProjects().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
@Cacheable("organisations")
2018-05-28 11:50:42 +02:00
public List<Map<String, String>> getOrganisations(String query, String key) throws NoURLFound, HugeResultSet {
2018-06-29 10:29:43 +02:00
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getOrganisations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
2018-05-28 11:50:42 +02:00
: configLoader.getExternalUrls().getOrganisations().getUrls();
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getOrganisations().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
@Cacheable("registries")
2018-05-28 11:50:42 +02:00
public List<Map<String, String>> getRegistries(String query, String key) throws NoURLFound, HugeResultSet {
2018-06-29 10:29:43 +02:00
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRegistries().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
2018-05-28 11:50:42 +02:00
: configLoader.getExternalUrls().getRegistries().getUrls();
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRegistries().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
@Cacheable("services")
2018-06-29 10:29:43 +02:00
public List<Map<String, String>> getServices(String query, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getServices().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
2018-05-28 11:50:42 +02:00
: configLoader.getExternalUrls().getServices().getUrls();
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getServices().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
@Cacheable("researchers")
2018-06-29 10:29:43 +02:00
public List<Map<String, String>> getResearchers(String query, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getResearchers().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
2018-05-28 11:50:42 +02:00
: configLoader.getExternalUrls().getResearchers().getUrls();
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getResearchers().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
2018-07-11 15:47:36 +02:00
@Cacheable("tags")
public List<Map<String, String>> getTags(String query, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getTags().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getTags().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getTags().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
2018-01-17 16:06:35 +01:00
@Cacheable("datasets")
2018-06-29 10:29:43 +02:00
public List<Map<String, String>> getDatasets(String query, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getDatasets().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
2018-05-28 11:50:42 +02:00
: configLoader.getExternalUrls().getDatasets().getUrls();
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getDatasets().getFetchMode();
return getAll(urlConfigs, fetchStrategy, query);
}
2018-06-29 10:29:43 +02:00
private List<Map<String, String>> getAll(List<UrlConfiguration> urlConfigs, FetchStrategy fetchStrategy, String query) throws NoURLFound, HugeResultSet {
2018-01-17 16:06:35 +01:00
if (urlConfigs == null || urlConfigs.isEmpty())
throw new NoURLFound("No Repository urls found in configuration");
2018-06-29 10:29:43 +02:00
Collections.sort(urlConfigs, Comparator.comparing(UrlConfiguration::getOrdinal));
List<Map<String, String>> results = new LinkedList<>();
for (UrlConfiguration urlConfig : urlConfigs) {
results.addAll(getAllResultsFromUrl(urlConfig.getUrl(), fetchStrategy, urlConfig.getData(), urlConfig.getPaginationPath(), query, urlConfig.getLabel()));
}
return results;
2018-01-17 16:06:35 +01:00
}
2018-06-29 10:29:43 +02:00
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, String query, String key) throws HugeResultSet {
2018-01-17 16:06:35 +01:00
Set<Integer> pages = new HashSet<Integer>();
final String searchQuery = (query != null) && !query.isEmpty() ? "&search=" + query : "";
Results results = getResultsFromUrl(path + "?page=1" + searchQuery, jsonDataPath, jsonPaginationPath);
if (fetchStrategy == FetchStrategy.FIRST)
2018-06-29 10:29:43 +02:00
return results == null ? new LinkedList<>() : results.getResults().stream().map(x -> {
x.put("tag", key);
return x;
}).collect(Collectors.toList());
2018-01-17 16:06:35 +01:00
if (results.getPagination() != null && results.getPagination().get("pages") != null) //if has more pages, add them to the pages set
for (int i = 2; i <= results.getPagination().get("pages"); i++)
pages.add(i);
Long maxResults = configLoader.getExternalUrls().getMaxresults();
if ((maxResults > 0) && (results.getPagination().get("count") > maxResults))
throw new HugeResultSet("The submitted search query " + query + " is about to return " + results.getPagination().get("count") + " results... Please submit a more detailed search query");
Optional<Results> optionalResults = pages.parallelStream()
.map(page -> getResultsFromUrl(path + "?page=" + page + searchQuery, jsonDataPath, jsonPaginationPath))
.reduce((result1, result2) -> {
result1.getResults().addAll(result2.getResults());
return result1;
});
Results remainingResults = optionalResults.isPresent() ? optionalResults.get() : new Results();
remainingResults.getResults().addAll(results.getResults());
2018-06-29 10:29:43 +02:00
return remainingResults.getResults().stream().map(x -> {
x.put("tag", key);
return x;
}).collect(Collectors.toList());
2018-01-17 16:06:35 +01:00
}
2018-06-29 10:29:43 +02:00
private Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath) {
2018-01-17 16:06:35 +01:00
try {
URL url = new URL(urlString);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
con.setRequestProperty("Accept", "application/vnd.api+json; charset=utf-8");
int responseCode = con.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) { // success
//do here all the parsing
DocumentContext jsonContext = JsonPath.parse(con.getInputStream());
2018-06-29 10:29:43 +02:00
Results results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
jsonContext.read(jsonPaginationPath));
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList());
2018-01-17 16:06:35 +01:00
return results;
}
} catch (MalformedURLException e1) {
2018-06-29 10:29:43 +02:00
e1.printStackTrace();
2018-01-17 16:06:35 +01:00
} //maybe print smth...
catch (IOException e2) {
2018-06-29 10:29:43 +02:00
e2.printStackTrace();
} //maybe print smth...
catch (Exception exception) {
exception.printStackTrace();
2018-01-17 16:06:35 +01:00
} //maybe print smth...
finally {
}
return null;
}
2018-06-29 10:29:43 +02:00
private String transformKey(DataUrlConfiguration dataUrlConfiguration, String key) {
if (key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getId().replace("'",""))) return "pid";
if (key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getDescription().replace("'",""))) return "description";
if (key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getUri().replace("'",""))) return "uri";
if (key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name";
return null;
}
2018-01-17 16:06:35 +01:00
class Results {
List<Map<String, String>> results;
Map<String, Integer> pagination;
public Results() {
this.results = new ArrayList<Map<String, String>>();
this.pagination = new HashMap<String, Integer>();
}
public Results(List<Map<String, String>> results, Map<String, Integer> pagination) {
this.results = results;
this.pagination = pagination;
}
public List<Map<String, String>> getResults() {
return results;
}
public void setResults(List<Map<String, String>> results) {
this.results = results;
}
public Map<String, Integer> getPagination() {
return pagination;
}
public void setPagination(Map<String, Integer> pagination) {
this.pagination = pagination;
}
}
}