cleaning up: let's remove the classes we do not need anymore, like Virtuoso-related classes and specific plugins that we have created for Parthenos

This commit is contained in:
Alessia Bardi 2019-12-16 18:34:08 +01:00
parent 5087750f96
commit c770fc40e0
22 changed files with 134 additions and 1893 deletions

View File

@ -1,122 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import eu.dnetlib.clients.enabling.ISLookUpClient;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
import eu.dnetlib.rmi.datasource.DatasourceManagerService;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Created by Alessia Bardi on 12/01/2018.
*
* @author Alessia Bardi
*/
public class ClarinAPIGeneratorJobNode extends AsyncJobNode {
private static final Log log = LogFactory.getLog(ClarinAPIGeneratorJobNode.class);
/** Path to a local folder containing the subfolders with Clarin mapping files **/
private String folderPath;
/** Path to a local folder containing the subfolders with Clarin input files **/
private String inputBaseUrlPrefix;
private String metadataIdentifierPath;
private String clarinDatasourceProfileID;
private String clarinDatasourceOriginalId;
@Autowired
private UniqueServiceLocator serviceLocator;
@Autowired
private ISLookUpClient isLookupClient;
@Override
protected String execute(final Env env) throws Exception {
Path folder = Paths.get(getFolderPath());
if (!Files.isDirectory(folder)){
throw new IllegalArgumentException(getFolderPath()+" must be a directory");
}
final DatasourceManagerService dsMan = getServiceLocator().getService(DatasourceManagerService.class);
List<String> apiIds = getIsLookupClient().search("//RESOURCE_PROFILE[./HEADER/RESOURCE_IDENTIFIER/@value='"+getClarinDatasourceProfileID()+"']//INTERFACE/@id/string()");
ClarinFileVisitor visitor = new ClarinFileVisitor(getClarinDatasourceProfileID(), getClarinDatasourceOriginalId(),
getInputBaseUrlPrefix(), getMetadataIdentifierPath(), apiIds, getServiceLocator(), getIsLookupClient());
Files.walkFileTree(folder, visitor);
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"visitedFiles", visitor.getCountVisitedFiles());
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"createdTDS", visitor.getCountCreatedTDS());
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"updatedTDS", visitor.getCountUpdatedTDS());
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"visitedFolders", visitor.getCountVisitedFolders());
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"createdInterfaces", visitor.getCountCreatedInterfaces());
return Arc.DEFAULT_ARC;
}
public String getFolderPath() {
return folderPath;
}
public void setFolderPath(final String folderPath) {
this.folderPath = folderPath;
}
public String getInputBaseUrlPrefix() {
return inputBaseUrlPrefix;
}
public void setInputBaseUrlPrefix(final String inputBaseUrlPrefix) {
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
}
public String getMetadataIdentifierPath() {
return metadataIdentifierPath;
}
public void setMetadataIdentifierPath(final String metadataIdentifierPath) {
this.metadataIdentifierPath = metadataIdentifierPath;
}
public String getClarinDatasourceProfileID() {
return clarinDatasourceProfileID;
}
public void setClarinDatasourceProfileID(final String clarinDatasourceProfileID) {
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
}
public String getClarinDatasourceOriginalId() {
return clarinDatasourceOriginalId;
}
public void setClarinDatasourceOriginalId(final String clarinDatasourceOriginalId) {
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
}
public UniqueServiceLocator getServiceLocator() {
return serviceLocator;
}
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
this.serviceLocator = serviceLocator;
}
public ISLookUpClient getIsLookupClient() {
return isLookupClient;
}
public void setIsLookupClient(final ISLookUpClient isLookupClient) {
this.isLookupClient = isLookupClient;
}
}

View File

@ -1,263 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import eu.dnetlib.clients.enabling.ISLookUpClient;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
import eu.dnetlib.miscutils.datetime.DateUtils;
import eu.dnetlib.rmi.datasource.DatasourceManagerService;
import eu.dnetlib.rmi.datasource.DatasourceManagerServiceException;
import eu.dnetlib.rmi.datasource.IfaceDesc;
import eu.dnetlib.rmi.enabling.ISLookUpException;
import eu.dnetlib.rmi.enabling.ISRegistryException;
import eu.dnetlib.rmi.enabling.ISRegistryService;
import org.antlr.stringtemplate.StringTemplate;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import static java.nio.file.FileVisitResult.CONTINUE;
/**
* Created by Alessia Bardi on 12/01/2018.
*
* @author Alessia Bardi
*/
public class ClarinFileVisitor extends SimpleFileVisitor<Path> {
private static final Log log = LogFactory.getLog(ClarinFileVisitor.class);
private static final String API_PREFIX = "api_________::";
private static final String TDS_TEMPLATE ="/eu/dnetlib/ariadneplus/workflows/nodes/clarin_tds.xml.st";
private List<String> interfaces = Lists.newArrayList();
private String clarinDatasourceProfileID;
private String clarinDatasourceOriginalId;
private DatasourceManagerService dsMan = null;
private String inputBaseUrlPrefix;
private String metadataIdentifierPath;
private ISLookUpClient lookupClient;
private ISRegistryService registryService;
private int countVisitedFiles = 0;
private int countCreatedTDS = 0;
private int countUpdatedTDS = 0;
private int countVisitedFolders = 0;
private int countCreatedInterfaces = 0;
@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
log.info("Processing " + dir.toString());
countVisitedFolders++;
String dirName = dir.getFileName().toString();
if(dirName.equalsIgnoreCase("x3ml-mappings")) return CONTINUE;
String apiId = API_PREFIX + getClarinDatasourceOriginalId() + "::" + dirName;
if (!interfaces.contains(apiId)) {
//API TO BE CREATED
IfaceDesc iface = new IfaceDesc();
iface.setActive(false);
iface.setCompliance("metadata");
iface.setContentDescription("metadata");
iface.setId(apiId);
iface.setRemovable(true);
iface.setTypology("dnet:repository::clarin");
iface.setAccessProtocol("filesystem");
Map<String, String> accessParams = Maps.newHashMap();
accessParams.put("extensions", "xml");
iface.setAccessParams(accessParams);
iface.setBaseUrl(StringUtils.appendIfMissing(getInputBaseUrlPrefix(), "/") + dirName);
Map<String, String> extraFields = Maps.newHashMap();
extraFields.put("metadata_identifier_path", getMetadataIdentifierPath());
iface.setExtraFields(extraFields);
try {
dsMan.addInterface(getClarinDatasourceProfileID(), iface);
countCreatedInterfaces++;
log.info("CREATED NEW INTERFACE " + iface.getId() + " for " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")");
} catch (DatasourceManagerServiceException e) {
log.error("Can't add interface " + iface.getId() + " to " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")", e);
return CONTINUE;
}
} else {
if (log.isDebugEnabled()) {
log.debug("Interface " + apiId + " already exists");
}
}
return CONTINUE;
}
@Override
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
String filename = file.getFileName().toString();
log.info("Processing " + file.toString());
countVisitedFiles++;
String tdsTitle = filename;
//call Files.lines which will use a stream to iterate over each line of the file.
//Next we will convert the stream to a string by calling Collectors.joining() which will join all the strings together.
String updatedCode = Files.lines(file).collect(Collectors.joining()).replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>", "");
try {
List<String> res = this.lookupClient.search("//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value=\"TransformationRuleDSResourceType\" and .//CONFIGURATION/SCRIPT/TITLE/string()=\""+tdsTitle+"\"]/HEADER/RESOURCE_IDENTIFIER/@value/string()");
if(res == null || res.isEmpty()){
log.debug("Creating new TDS profile for "+filename);
final String template = IOUtils.toString(getClass().getResourceAsStream(TDS_TEMPLATE), Charset.forName("UTF-8"));
final StringTemplate st = new StringTemplate(template);
st.setAttribute("date", DateUtils.calculate_ISO8601(DateUtils.now()));
st.setAttribute("title", tdsTitle);
st.setAttribute("mapping", "<![CDATA["+updatedCode+"]]>");
String profId = this.registryService.registerProfile(st.toString());
countCreatedTDS++;
log.info("REGISTERED NEW TDS FOR "+filename+": "+profId);
}
else{
String tdsProfileId = res.get(0);
log.debug("Updating TDS profile "+tdsProfileId+"for "+filename);
boolean done = this.registryService.updateProfileNode(tdsProfileId, "//CONFIGURATION/SCRIPT/CODE", "<CODE><![CDATA["+updatedCode+"]]></CODE>");
if(done){
log.info("TDS PROFILE "+tdsProfileId+" UPDATED with contents from "+filename);
countUpdatedTDS++;
}
if(!done){
log.error("!!! TDS PROFILE "+tdsProfileId+" COULD NOT BE UPDATED with contents from "+filename);
}
}
} catch (ISLookUpException | ISRegistryException e) {
log.error("CANNOT UPDATE/CREATE TDS PROFILE FOR "+filename, e);
}
return CONTINUE;
}
protected ClarinFileVisitor() {
super();
}
public ClarinFileVisitor(final String clarinDatasourceProfileID, final String clarinDatasourceOriginalId,
final String inputBaseUrlPrefix, final String metadataIdentifierPath, final List<String> interfaces,
final UniqueServiceLocator locator, final ISLookUpClient lookupClient) {
super();
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
this.metadataIdentifierPath = metadataIdentifierPath;
this.interfaces = interfaces;
this.dsMan = locator.getService(DatasourceManagerService.class);
this.registryService = locator.getService(ISRegistryService.class);
this.lookupClient = lookupClient;
}
public List<String> getInterfaces() {
return interfaces;
}
public void setInterfaces(final List<String> interfaces) {
this.interfaces = interfaces;
}
public String getClarinDatasourceOriginalId() {
return clarinDatasourceOriginalId;
}
public void setClarinDatasourceOriginalId(final String clarinDatasourceOriginalId) {
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
}
public DatasourceManagerService getDsMan() {
return dsMan;
}
public void setDsMan(final DatasourceManagerService dsMan) {
this.dsMan = dsMan;
}
public String getInputBaseUrlPrefix() {
return inputBaseUrlPrefix;
}
public void setInputBaseUrlPrefix(final String inputBaseUrlPrefix) {
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
}
public String getMetadataIdentifierPath() {
return metadataIdentifierPath;
}
public void setMetadataIdentifierPath(final String metadataIdentifierPath) {
this.metadataIdentifierPath = metadataIdentifierPath;
}
public String getClarinDatasourceProfileID() {
return clarinDatasourceProfileID;
}
public void setClarinDatasourceProfileID(final String clarinDatasourceProfileID) {
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
}
public ISLookUpClient getLookupClient() {
return lookupClient;
}
public void setLookupClient(final ISLookUpClient lookupClient) {
this.lookupClient = lookupClient;
}
public ISRegistryService getRegistryService() {
return registryService;
}
public void setRegistryService(final ISRegistryService registryService) {
this.registryService = registryService;
}
public int getCountVisitedFiles() {
return countVisitedFiles;
}
public void setCountVisitedFiles(final int countVisitedFiles) {
this.countVisitedFiles = countVisitedFiles;
}
public int getCountCreatedTDS() {
return countCreatedTDS;
}
public void setCountCreatedTDS(final int countCreatedTDS) {
this.countCreatedTDS = countCreatedTDS;
}
public int getCountUpdatedTDS() {
return countUpdatedTDS;
}
public void setCountUpdatedTDS(final int countUpdatedTDS) {
this.countUpdatedTDS = countUpdatedTDS;
}
public int getCountVisitedFolders() {
return countVisitedFolders;
}
public void setCountVisitedFolders(final int countVisitedFolders) {
this.countVisitedFolders = countVisitedFolders;
}
public int getCountCreatedInterfaces() {
return countCreatedInterfaces;
}
public void setCountCreatedInterfaces(final int countCreatedInterfaces) {
this.countCreatedInterfaces = countCreatedInterfaces;
}
}

View File

@ -1,51 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.rmi.common.ResultSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.web.client.RestTemplate;
/**
* Created by Alessia Bardi on 26/01/2018.
*
* @author Alessia Bardi
*/
public class ReadVirtuosoByTypeJobNode extends ReadVirtuosoJobNode {
private static final Log log = LogFactory.getLog(ReadVirtuosoByTypeJobNode.class);
private String typeNamespace;
private String typeName;
@Override
protected String execute(final Env env){
log.info("Using virtuoso reader at : " + getVirtuosoReaderAPIUrl());
RestTemplate restTemplate = new RestTemplate(getClientHttpRequestFactory());
VirtuosoAriadnePlusByTypeIterator iterator = new VirtuosoAriadnePlusByTypeIterator().typeNamespace(typeNamespace).typeName(typeName).datasourceInterface(getDatasourceInterface()).virtuosoReaderAPIUrl(getVirtuosoReaderAPIUrl()).datasourceName(getDatasourceName()).restTemplate(restTemplate);
log.debug(String.format("Created iterator for %s:%s : ", typeNamespace, typeName ));
ResultSet<String> rs = getRsFactory().createResultSet(() -> iterator);
env.setAttribute("virtuoso_rs", rs);
return Arc.DEFAULT_ARC;
}
public String getTypeNamespace() {
return typeNamespace;
}
public void setTypeNamespace(final String typeNamespace) {
this.typeNamespace = typeNamespace;
}
public String getTypeName() {
return typeName;
}
public void setTypeName(final String typeName) {
this.typeName = typeName;
}
}

View File

@ -1,87 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.rmi.common.ResultSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.web.client.RestTemplate;
/**
* Created by Alessia Bardi on 26/01/2018.
*
* @author Alessia Bardi
*/
public class ReadVirtuosoJobNode extends AsyncJobNode {
private static final Log log = LogFactory.getLog(ReadVirtuosoJobNode.class);
private String datasourceName;
private String datasourceInterface;
private String virtuosoReaderAPIUrl;
private int readTimeout = 30000;
@Autowired
private ResultSetFactory rsFactory;
@Override
protected String execute(final Env env){
log.info("Using virtuoso reader at : " + getVirtuosoReaderAPIUrl());
RestTemplate restTemplate = new RestTemplate(getClientHttpRequestFactory());
VirtuosoAriadnePlusIterator iterator = new VirtuosoAriadnePlusIterator().datasourceInterface(getDatasourceInterface()).virtuosoReaderAPIUrl(getVirtuosoReaderAPIUrl()).datasourceName(getDatasourceName()).restTemplate(restTemplate);
ResultSet<String> rs = rsFactory.createResultSet(() -> iterator);
env.setAttribute("virtuoso_rs", rs);
return Arc.DEFAULT_ARC;
}
protected ClientHttpRequestFactory getClientHttpRequestFactory() {
HttpComponentsClientHttpRequestFactory clientHttpRequestFactory = new HttpComponentsClientHttpRequestFactory();
clientHttpRequestFactory.setReadTimeout(readTimeout);
return clientHttpRequestFactory;
}
public String getDatasourceName() {
return datasourceName;
}
public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
}
public String getDatasourceInterface() {
return datasourceInterface;
}
public void setDatasourceInterface(final String datasourceInterface) {
this.datasourceInterface = datasourceInterface;
}
public String getVirtuosoReaderAPIUrl() {
return virtuosoReaderAPIUrl;
}
public void setVirtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
this.virtuosoReaderAPIUrl = virtuosoReaderAPIUrl;
}
public int getReadTimeout() {
return readTimeout;
}
public void setReadTimeout(final int readTimeout) {
this.readTimeout = readTimeout;
}
public ResultSetFactory getRsFactory() {
return rsFactory;
}
}

View File

@ -1,108 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.nio.charset.Charset;
import com.google.common.base.Joiner;
import eu.dnetlib.clients.enabling.ISLookUpClient;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.msro.workflows.procs.ProcessAware;
import eu.dnetlib.msro.workflows.procs.WorkflowProcess;
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
import eu.dnetlib.rmi.enabling.ISLookUpException;
import eu.dnetlib.rmi.enabling.ISRegistryService;
import org.antlr.stringtemplate.StringTemplate;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Copied from eu.dnetlib.msro.workflows.nodes.repohi.RegisterWorkflowFromTemplateJobNode
*
* Clarin specific implementation for setting the transformation rules automatically.
*
* @author alessia
*/
public class RegisterClarinWorkflowFromTemplateJobNode extends SimpleJobNode implements ProcessAware {
private static final Log log = LogFactory.getLog(RegisterClarinWorkflowFromTemplateJobNode.class);
private String wfName;
private String wfTemplate;
private String description;
private WorkflowProcess process;
@Autowired
private UniqueServiceLocator serviceLocator;
@Autowired
private ISLookUpClient isLookUpClient;
public String getWfName() {
return this.wfName;
}
public void setWfName(final String wfName) {
this.wfName = wfName;
}
public String getWfTemplate() {
return this.wfTemplate;
}
public void setWfTemplate(final String wfTemplate) {
this.wfTemplate = wfTemplate;
}
public String getDescription() {
return this.description;
}
public void setDescription(final String description) {
this.description = description;
}
@Override
protected String execute(final Env env) throws Exception {
final String dsId = this.process.getDsId();
final String ifaceId = this.process.getDsInterface();
final String dsName = this.process.getDsName();
final StringTemplate profTemplate = new StringTemplate(IOUtils.toString(getClass().getResourceAsStream(getWfTemplate()), Charset.forName("UTF-8")));
profTemplate.setAttribute("name", StringEscapeUtils.escapeXml11(this.wfName));
profTemplate.setAttribute("desc", StringEscapeUtils.escapeXml11(this.description));
profTemplate.setAttribute("priority", WorkflowsConstants.DEFAULT_WF_PRIORITY);
profTemplate.setAttribute("dsId", StringEscapeUtils.escapeXml11(dsId));
profTemplate.setAttribute("interface", StringEscapeUtils.escapeXml11(ifaceId));
profTemplate.setAttribute("dsName", StringEscapeUtils.escapeXml11(dsName));
profTemplate.setAttribute("tdsCsv", getListOfTDS(ifaceId));
final String profId = this.serviceLocator.getService(ISRegistryService.class).registerProfile(profTemplate.toString());
env.setAttribute("repoWfId", profId);
log.info("A new repo wf has been registered, id: " + profId);
return Arc.DEFAULT_ARC;
}
private String getListOfTDS(final String ifaceId) throws ISLookUpException {
String lastAPIPart = StringUtils.substringAfterLast(ifaceId, "::");
String query = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') return $x[starts-with(.//TITLE/text() , '"+lastAPIPart+"')]//RESOURCE_IDENTIFIER/@value/string()";
return Joiner.on(',').join(isLookUpClient.search(query));
}
public WorkflowProcess getProcess() {
return this.process;
}
@Override
public void setProcess(final WorkflowProcess process) {
this.process = process;
}
}

View File

@ -1,50 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.util.Map;
import com.google.gson.Gson;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Created by Alessia Bardi on 05/04/17.
*
* @author Alessia Bardi
*/
public class ReportReaderJobNode extends AsyncJobNode {
private static final Log log = LogFactory.getLog(ReportReaderJobNode.class);
private String reportEnvParam = "validationReport";
private String outputParam = reportEnvParam;
@Override
protected String execute(final Env env) throws Exception {
Map<String, String> report = env.getAttribute(reportEnvParam, Map.class);
Gson gson = new Gson();
String theReport = gson.toJson(report);
env.setAttribute( WorkflowsConstants.MAIN_LOG_PREFIX+outputParam, theReport);
log.debug("Found the report: "+theReport);
return Arc.DEFAULT_ARC;
}
public String getReportEnvParam() {
return reportEnvParam;
}
public void setReportEnvParam(final String reportEnvParam) {
this.reportEnvParam = reportEnvParam;
}
public String getOutputParam() {
return outputParam;
}
public void setOutputParam(final String outputParam) {
this.outputParam = outputParam;
}
}

View File

@ -1,91 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.utils.URIBuilder;
import org.springframework.web.client.RestTemplate;
/**
* Created by Alessia Bardi on 01/03/2018.
*
* @author Alessia Bardi
*/
public class VirtuosoAriadnePlusByTypeIterator extends VirtuosoAriadnePlusIterator {
private static final Log log = LogFactory.getLog(VirtuosoAriadnePlusByTypeIterator.class);
private String typeName;
private String typeNamespace;
@Override
protected URI getURIForSubjectList(final int offset) throws URISyntaxException {
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/apiSubjectsWithType");
builder.addParameter("api", getDatasourceInterface());
builder.addParameter("typeNamespace", typeNamespace);
builder.addParameter("typeName", typeName);
builder.addParameter("limit", Integer.toString(LIMIT));
builder.addParameter("offset", Integer.toString(offset));
return builder.build();
}
@Override
protected URI getURIForRDFRequest(final String subjectURL) throws URISyntaxException {
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/subject").addParameter("subjectURL", subjectURL).addParameter("typeName", typeName).addParameter("timeout", ANY_TIME_QUERY_MS);
return builder.build();
}
public String getTypeName() {
return typeName;
}
public void setTypeName(final String typeName) {
this.typeName = typeName;
}
public String getTypeNamespace() {
return typeNamespace;
}
public void setTypeNamespace(final String typeNamespace) {
this.typeNamespace = typeNamespace;
}
public VirtuosoAriadnePlusByTypeIterator typeName(final String typeName) {
this.typeName = typeName;
return this;
}
public VirtuosoAriadnePlusByTypeIterator typeNamespace(final String typeNamespace) {
this.typeNamespace = typeNamespace;
return this;
}
@Override
public VirtuosoAriadnePlusByTypeIterator restTemplate(final RestTemplate restTemplate) {
setRestTemplate(restTemplate);
return this;
}
@Override
public VirtuosoAriadnePlusByTypeIterator datasourceInterface(final String datasourceInterface) {
setDatasourceInterface(datasourceInterface);
return this;
}
@Override
public VirtuosoAriadnePlusByTypeIterator virtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
setVirtuosoReaderAPIUrl(virtuosoReaderAPIUrl);
return this;
}
@Override
public VirtuosoAriadnePlusByTypeIterator datasourceName(final String datasourceName) {
setDatasourceName(datasourceName);
return this;
}
@Override
public VirtuosoAriadnePlusByTypeIterator errors(final Map<String, Integer> errors) {
setErrors(errors);
return this;
}
}

View File

@ -1,24 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
/**
* Created by Alessia Bardi on 17/02/2018.
*
* @author Alessia Bardi
*/
public class VirtuosoAriadnePlusException extends Exception{
public VirtuosoAriadnePlusException() {
}
public VirtuosoAriadnePlusException(final String message) {
super(message);
}
public VirtuosoAriadnePlusException(final String message, final Throwable cause) {
super(message, cause);
}
public VirtuosoAriadnePlusException(final Throwable cause) {
super(cause);
}
}

View File

@ -1,352 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;
import eu.dnetlib.data.collector.ThreadSafeIterator;
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.utils.URIBuilder;
import org.springframework.http.*;
import org.springframework.web.client.ResourceAccessException;
import org.springframework.web.client.RestClientException;
import org.springframework.web.client.RestTemplate;
/**
* Created by Alessia Bardi on 31/01/2018.
*
* @author Alessia Bardi
*/
public class VirtuosoAriadnePlusIterator extends ThreadSafeIterator {
private static final Log log = LogFactory.getLog(VirtuosoAriadnePlusIterator.class);
protected static final String ANY_TIME_QUERY_MS = "1800000"; //1800000 == 3 mins
protected static final int QUEUE_TIMEOUT_SECONDS = 600;
public final static String TERMINATOR = "ARNOLD";
public final static String ERROR_TERMINATOR = "SCHWARZ";
protected final static int SLEEP_MS = 5000;
protected final static int MAX_RETRIES = 3;
protected final static int LIMIT = 100;
private String datasourceName;
private String datasourceInterface;
private String virtuosoReaderAPIUrl;
private boolean started = false;
private Map<String, Integer> errors = Maps.newHashMap();
private List<String> listForClass = Lists.newArrayList();
private BlockingQueue<String> elements = Queues.newArrayBlockingQueue(10);
private String currentElement = null;
private ExecutorService executor = Executors.newSingleThreadExecutor();
private RestTemplate restTemplate;
private synchronized void verifyStarted(){
if (!this.started) {
this.started = true;
fillQueue();
getNextElement(MAX_RETRIES);
}
}
protected void fillQueue(){
log.info("Virtuoso reader at : " + getVirtuosoReaderAPIUrl());
getExecutor().submit(() -> {
try {
int offset = 0;
boolean again;
do {
List<String> subjectList = getSubjectList(offset);
for (String subject : subjectList) {
String xmlFile = tryGetRDF(subject, MAX_RETRIES);
if (StringUtils.isBlank(xmlFile)) {
log.warn("Skipping blank RDF for " + subject);
} else {
getElements().offer(xmlFile, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
}
}
again = subjectList.size() == LIMIT;
offset += LIMIT;
} while(again);
log.debug("End of subject list, adding terminator to the queue");
getElements().offer(TERMINATOR, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
} catch (Exception e) {
log.error(e.getMessage());
try {
getElements().offer(ERROR_TERMINATOR, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
} catch (InterruptedException e1) {
log.error(e1.getMessage());
}
}
});
getExecutor().shutdown();
}
protected String tryGetRDF(final String subjectURL, int attempt) throws URISyntaxException, InterruptedException, VirtuosoAriadnePlusException {
//this is something George said: if it has no http URI, then it is not to be considered relevant by itself
if(!subjectURL.startsWith("http")){
log.debug("Skipping as non-http url: "+subjectURL);
errors.merge("Non-http URLs", 1, Integer::sum);
return null;
}
log.debug("Querying Api, remaining attempts: "+attempt);
if (attempt <= 0) {
errors.merge("Failed tryGetRDF", 1, Integer::sum);
return null;
}
ResponseEntity<String> response = null;
try {
response = getRDF(subjectURL);
}catch(ResourceAccessException e){
//request timed out --> sleep and try again
log.warn("Request timeout for "+subjectURL+": I'll sleep and then try again");
Thread.sleep(SLEEP_MS);
return tryGetRDF(subjectURL, --attempt);
}
HttpStatus responseStatus = response.getStatusCode();
if (responseStatus.is2xxSuccessful()) {
String rdfFile = response.getBody();
if(StringUtils.isBlank(rdfFile)){
log.warn("Got blank RDF for "+subjectURL+" , let's try again...");
Thread.sleep(SLEEP_MS);
return tryGetRDF(subjectURL, --attempt);
}
else {
final String xmlFile = completeXML(rdfFile, subjectURL);
return xmlFile;
}
} else {
if (responseStatus.is5xxServerError()) {
//sleep for a while and re-try
log.warn("HTTP ERROR: "+responseStatus.value() + ": " + responseStatus.getReasonPhrase()+": I'll sleep and then try again");
Thread.sleep(SLEEP_MS);
return tryGetRDF(subjectURL, --attempt);
} else {
log.error("ERROR: Can't get the RDF for " + subjectURL + " " + responseStatus.value() + ": " + responseStatus.getReasonPhrase());
errors.merge(responseStatus.value() + ": " + responseStatus.getReasonPhrase(), 1, Integer::sum);
}
}
return null;
}
protected URI getURIForSubjectList(final int offset) throws URISyntaxException {
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/apiSubjects");
builder.addParameter("api", getDatasourceInterface());
builder.addParameter("limit", Integer.toString(LIMIT));
builder.addParameter("offset", Integer.toString(offset));
return builder.build();
}
protected List<String> getSubjectList(final int offset) throws URISyntaxException, VirtuosoAriadnePlusException {
URI uri = getURIForSubjectList(offset);
log.debug("fillQueue -- Calling for subject list: " + uri.toString());
List<String> subjectList;
try {
subjectList = getRestTemplate().getForObject(uri, getListForClass().getClass());
}catch(RestClientException rce){
throw new VirtuosoAriadnePlusException(rce);
}
return subjectList;
}
protected URI getURIForRDFRequest(final String subjectURL) throws URISyntaxException {
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/subject").addParameter("subjectURL", subjectURL).addParameter("timeout", ANY_TIME_QUERY_MS);
return builder.build();
}
protected ResponseEntity<String> getRDF(final String subjectURL) throws URISyntaxException {
HttpHeaders headers = new HttpHeaders();
headers.setAccept(Lists.newArrayList(MediaType.APPLICATION_XML));
URI uri = getURIForRDFRequest(subjectURL);
log.debug("fillQueue -- Calling for subject RDF: " + uri.toString());
HttpEntity<String> entity = new HttpEntity<>("parameters", headers);
return restTemplate.exchange(uri, HttpMethod.GET, entity, String.class);
}
public String completeXML(final String rdfFile, final String url) {
String xmlEscapedURL = StringEscapeUtils.escapeXml11(url);
String rdfFileNoXmlDecl = rdfFile.replaceAll("\\<\\?xml(.+?)\\?\\>", "").trim();
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?><record xmlns=\"http://www.openarchives.org/OAI/2.0/\"><header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\"><dri:objIdentifier>"
+ xmlEscapedURL + "</dri:objIdentifier><dri:datasourceapi>" + datasourceInterface + "</dri:datasourceapi><dri:datasourcename>" + datasourceName
+ "</dri:datasourcename></header><metadata>" + rdfFileNoXmlDecl + "</metadata></record>";
}
@Override
public boolean doHasNext() {
try {
verifyStarted();
} catch (Exception e) {
getExecutor().shutdownNow();
throw new CollectorServiceRuntimeException(e);
}
switch(currentElement){
case TERMINATOR:
if(!executor.isTerminated()) executor.shutdownNow();
return false;
case ERROR_TERMINATOR:
executor.shutdownNow();
throw new CollectorServiceRuntimeException("Error getting elements from virtuoso");
default:
return true;
}
}
@Override
public String doNext() {
if(!hasNext()) {
log.error("Next called but hasNext is false", new NoSuchElementException());
throw new NoSuchElementException();
}
switch(currentElement){
case TERMINATOR:
case ERROR_TERMINATOR:
executor.shutdownNow();
throw new NoSuchElementException();
default:
String res = currentElement;
getNextElement(MAX_RETRIES);
return res;
}
}
private void getNextElement(int attempt){
log.debug("polling from queue, remaining attempts: "+attempt);
if(attempt <= 0) currentElement = ERROR_TERMINATOR;
else{
try {
currentElement = elements.take();
} catch (InterruptedException e) {
//current thread interrupted. Let's end.
currentElement = ERROR_TERMINATOR;
executor.shutdownNow();
}
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
public VirtuosoAriadnePlusIterator datasourceInterface(final String datasourceInterface) {
this.datasourceInterface = datasourceInterface;
return this;
}
public VirtuosoAriadnePlusIterator virtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
this.virtuosoReaderAPIUrl = virtuosoReaderAPIUrl;
return this;
}
public VirtuosoAriadnePlusIterator datasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
return this;
}
public VirtuosoAriadnePlusIterator errors(final Map<String, Integer> errors) {
this.errors = errors;
return this;
}
public String getDatasourceInterface() {
return datasourceInterface;
}
public String getVirtuosoReaderAPIUrl() {
return virtuosoReaderAPIUrl;
}
public Map<String, Integer> getErrors() {
return errors;
}
public BlockingQueue<String> getElements() {
return elements;
}
public RestTemplate getRestTemplate() {
return restTemplate;
}
public VirtuosoAriadnePlusIterator restTemplate(final RestTemplate restTemplate) {
this.restTemplate = restTemplate;
return this;
}
public String getDatasourceName() {
return datasourceName;
}
public boolean isStarted() {
return started;
}
public List<String> getListForClass() {
return listForClass;
}
public String getCurrentElement() {
return currentElement;
}
public ExecutorService getExecutor() {
return executor;
}
public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
}
public void setDatasourceInterface(final String datasourceInterface) {
this.datasourceInterface = datasourceInterface;
}
public void setVirtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
this.virtuosoReaderAPIUrl = virtuosoReaderAPIUrl;
}
public void setStarted(final boolean started) {
this.started = started;
}
public void setErrors(final Map<String, Integer> errors) {
this.errors = errors;
}
public void setListForClass(final List<String> listForClass) {
this.listForClass = listForClass;
}
public void setElements(final BlockingQueue<String> elements) {
this.elements = elements;
}
public void setCurrentElement(final String currentElement) {
this.currentElement = currentElement;
}
public void setExecutor(final ExecutorService executor) {
this.executor = executor;
}
public void setRestTemplate(final RestTemplate restTemplate) {
this.restTemplate = restTemplate;
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
/**
* This collector plugins collects metadata records from the EHRI portal export URL in the form https://portal.ehri-project.eu/units/<ID>/export.
* IDs of units are retrieved from the EHRI GraphQL API, see docs at https://portal.ehri-project.eu/api/graphql
* Created by Alessia Bardi on 19/04/2017.
*
* @author Alessia Bardi
*/
public class EHRICollectorPlugin extends AbstractCollectorPlugin {
private EHRIIteratorFactory ehriIteratorFactory;
@Override
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
throws CollectorServiceException {
// https://portal.ehri-project.eu/units
final String baseUrl = interfaceDescriptor.getBaseUrl();
// export
final String suffixToUrl = interfaceDescriptor.getParams().get("suffixToBaseUrl");
//info to get the identifiers from the graphQL API:
final String graphQLURL = interfaceDescriptor.getParams().get("graphQLURL");
final String graphQLQuery = interfaceDescriptor.getParams().get("graphQLQuery");
return () -> ehriIteratorFactory.newIterator(graphQLURL, graphQLQuery, baseUrl, suffixToUrl);
}
public EHRIIteratorFactory getEhriIteratorFactory() {
return ehriIteratorFactory;
}
public void setEhriIteratorFactory(final EHRIIteratorFactory ehriIteratorFactory) {
this.ehriIteratorFactory = ehriIteratorFactory;
}
}

View File

@ -1,77 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import eu.dnetlib.rmi.data.CollectorServiceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* IDs of units are retrieved from the EHRI GraphQL API, see docs at https://portal.ehri-project.eu/api/graphql
*
* Created by Alessia Bardi on 19/04/2017.
*
* @author Alessia Bardi
*/
public class EHRIGraphQLClient {
private static final Log log = LogFactory.getLog(EHRIGraphQLClient.class);
public Iterator<String> collect(final String baseURL, final String graphQLQuery) throws URISyntaxException, IOException, CollectorServiceException {
/*
The curl equivalent:
curl --header X-Stream:true \
--header Content-type:application/json \
https://portal.ehri-project.eu/api/graphql \
--data-binary '{"query":"{topLevelDocumentaryUnits{items{id}}}"}'
*/
CloseableHttpClient httpClient = HttpClients.createDefault();
URI baseURI = new URI(baseURL);
HttpPost httpPost = new HttpPost();
httpPost.setURI(baseURI);
httpPost.setHeader("X-Stream", "true");
httpPost.setHeader("Content-type", "application/json" );
log.info(graphQLQuery);
StringEntity postQuery = new StringEntity(graphQLQuery);
httpPost.setEntity(postQuery);
HttpEntity entity = null;
try(CloseableHttpResponse response = httpClient.execute(httpPost)) {
switch(response.getStatusLine().getStatusCode()){
case 200:
entity = response.getEntity();
InputStreamReader reader = new InputStreamReader(entity.getContent());
return getIdentifiers(reader);
default:
log.error(httpPost);
log.error(response.getStatusLine());
throw new CollectorServiceException(response.getStatusLine().toString());
}
} finally {
if(entity != null) EntityUtils.consume(entity);
}
}
protected Iterator<String> getIdentifiers(final InputStreamReader input){
JsonObject jsonObject = new JsonParser().parse(input).getAsJsonObject();
JsonArray items = jsonObject.getAsJsonObject("data").getAsJsonObject("topLevelDocumentaryUnits").getAsJsonArray("items");
log.debug(items);
return Lists.newArrayList(Iterables.transform(items, jelem -> jelem.getAsJsonObject().get("id").getAsString())).iterator();
}
}

View File

@ -1,107 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Iterator;
import com.ximpleware.*;
import eu.dnetlib.data.collector.ThreadSafeIterator;
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Created by Alessia Bardi on 03/05/2017.
*
* @author Alessia Bardi
*/
public class EHRIIterator extends ThreadSafeIterator {
private static final Log log = LogFactory.getLog(EHRIIterator.class);
private static int MAX_FAILED = 100;
private Iterator<String> identifiers;
private String baseUrl;
private String suffix;
private int failed = 0;
public EHRIIterator(final Iterator<String> idIterator, final String baseUrl, final String suffix){
this.identifiers = idIterator;
this.baseUrl = baseUrl;
this.suffix = suffix;
}
@Override
public boolean doHasNext() {
return identifiers.hasNext();
}
@Override
public String doNext() {
String target = baseUrl+"/"+identifiers.next()+"/"+suffix;
log.debug("Getting "+target);
try {
URL url = new URL(target);
String record = IOUtils.toString(url, "UTF-8");
return removeDefaultEADNamespace(record);
} catch (IOException e) {
log.error("Unable to get "+target, e);
failed++;
if(failed > MAX_FAILED){
throw new CollectorServiceRuntimeException("Could not download more than "+MAX_FAILED+"documents from EHRI. Stopping.");
}
if(this.hasNext()){
return this.next();
}
else return "";
}
}
protected String removeDefaultEADNamespace(final String xml) {
try {
VTDGen vg = new VTDGen();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
vg.setDoc(xml.getBytes());
vg.parse(false); // turn off namespace awareness so that
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
XMLModifier xm = new XMLModifier(vn);
ap.selectXPath("/ead/@xmlns");
int i = 0;
while ((i = ap.evalXPath()) != -1) {
xm.remove();
}
xm.output(baos);
return baos.toString();
}catch(Exception e){
log.error("Cannot remove default namespace from ead element: "+xml);
throw new CollectorServiceRuntimeException("Cannot remove default namespace from ead element", e);
}
}
public Iterator<String> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(final Iterator<String> identifiers) {
this.identifiers = identifiers;
}
public String getBaseUrl() {
return baseUrl;
}
public void setBaseUrl(final String baseUrl) {
this.baseUrl = baseUrl;
}
public String getSuffix() {
return suffix;
}
public void setSuffix(final String suffix) {
this.suffix = suffix;
}
}

View File

@ -1,38 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Iterator;
import eu.dnetlib.rmi.data.CollectorServiceException;
import org.springframework.beans.factory.annotation.Autowired;
/**
* Created by Alessia Bardi on 03/05/2017.
*
* @author Alessia Bardi
*/
public class EHRIIteratorFactory {
@Autowired
private EHRIGraphQLClient ehriGraphQLClient;
public Iterator<String> newIterator(final String baseURLIdentifiers, final String queryIdentifiers,
final String baseUrl,
final String suffix) {
try {
Iterator<String> ids = ehriGraphQLClient.collect(baseURLIdentifiers, queryIdentifiers);
return new EHRIIterator(ids, baseUrl, suffix);
} catch (CollectorServiceException | IOException | URISyntaxException e) {
throw new RuntimeException(e);
}
}
public EHRIGraphQLClient getEhriGraphQLClient() {
return ehriGraphQLClient;
}
public void setEhriGraphQLClient(final EHRIGraphQLClient ehriGraphQLClient) {
this.ehriGraphQLClient = ehriGraphQLClient;
}
}

View File

@ -1,53 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.isidore;
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
/**
* @author alessia bardi
*
*/
public class IsidoreCollectorPlugin extends AbstractCollectorPlugin {
@Autowired
private SaxonHelper saxonHelper;
@Override
public Iterable<String> collect(InterfaceDescriptor ifDescriptor, String arg1, String arg2)
throws CollectorServiceException {
final String baseUrl = ifDescriptor.getBaseUrl();
final String queryParams = ifDescriptor.getParams().get("queryParams");
final String pageParam = ifDescriptor.getParams().get("pageParam");
final String startFromPage = ifDescriptor.getParams().get("startFromPage");
final String nextPagePath = ifDescriptor.getParams().get("nextPagePath");
final String pageSizeParam = ifDescriptor.getParams().get("pageSizeParam");
final String pageSize = ifDescriptor.getParams().get("pageSize");
final String resultTotalXpath = ifDescriptor.getParams().get("resultTotalXpath");
final String entityXpath = ifDescriptor.getParams().get("entityXpath");
if (StringUtils.isBlank(baseUrl)) {throw new CollectorServiceException("Param 'baseUrl' is null or empty");}
if (StringUtils.isBlank(pageParam)) {throw new CollectorServiceException("Param 'pageParam' is null or empty");}
if (StringUtils.isBlank(startFromPage)) {throw new CollectorServiceException("Param 'startFromPage' is null or empty");}
if (StringUtils.isBlank(nextPagePath)) {throw new CollectorServiceException("Param 'nextPagePath' is null or empty");}
if (StringUtils.isBlank(pageSizeParam)) {throw new CollectorServiceException("Param 'pageSizeParam' is null or empty");}
if (StringUtils.isBlank(pageSize)) {throw new CollectorServiceException("Param 'pageSize' is null or empty");}
if (StringUtils.isBlank(resultTotalXpath)) {throw new CollectorServiceException("Param 'resultTotalXpath' is null or empty");}
if (StringUtils.isBlank(entityXpath)) {throw new CollectorServiceException("Param 'entityXpath' is null or empty");}
return () -> new IsidoreIterator(
saxonHelper,
baseUrl,
queryParams,
pageParam,
Integer.parseInt(startFromPage),
nextPagePath,
pageSizeParam,
Integer.parseInt(pageSize),
resultTotalXpath,
entityXpath);
}
}

View File

@ -1,169 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.isidore; /**
* log.debug(...) equal to log.trace(...) in the application-logs
* <p>
* known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
*/
import java.io.InputStream;
import java.net.URL;
import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
import com.google.common.collect.Maps;
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
import eu.dnetlib.rmi.data.CollectorServiceException;
import net.sf.saxon.s9api.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak, Alessia Bardi
* @date 2018-09-03
*
*/
public class IsidoreIterator implements Iterator<String> {
private static final Log log = LogFactory.getLog(IsidoreIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
private SaxonHelper saxonHelper;
private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
private InputStream resultStream;
private String query;
private String querySize;
private String queryPage;
private int resultTotal = -1;
private String baseUrl;
private String queryParams;
private String pageParam;
private int page;
private String pageSizeParam;
private int pageSize;
private XPathSelector xprEntity;
private XPathSelector xprResultTotalPath;
private XPathSelector xprNextPagePath;
public IsidoreIterator(
final SaxonHelper saxonHelper,
final String baseUrl,
final String queryParams,
final String pageParam,
final int startFromPage,
final String nextPagePath,
final String pageSizeParam,
final int pageSize,
final String resultTotalXpath,
final String entityXpath
) {
this.saxonHelper = saxonHelper;
this.baseUrl = baseUrl;
this.queryParams = queryParams;
this.pageParam = pageParam;
this.page = startFromPage;
this.queryPage = this.pageParam+"="+this.page;
this.pageSizeParam = pageSizeParam;
this.pageSize = pageSize;
this.querySize = this.pageSizeParam+"="+this.pageSize;
try {
initXpathSelector(resultTotalXpath, nextPagePath, entityXpath);
} catch (Exception e) {
throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
}
initQuery();
}
private void initXpathSelector(final String resultTotalXpath,final String nextPagePath, final String entityXpath)
throws SaxonApiException {
xprResultTotalPath = this.saxonHelper.help().prepareXPathSelector(resultTotalXpath, Maps.newHashMap());
xprNextPagePath = this.saxonHelper.help().prepareXPathSelector(nextPagePath, Maps.newHashMap());
xprEntity = this.saxonHelper.help().prepareXPathSelector(entityXpath, Maps.newHashMap());
}
private void initQuery() {
query = baseUrl + "?" + queryParams +"&"+ querySize +"&"+ queryPage;
}
private void disconnect() {
// TODO close inputstream
}
/* (non-Javadoc)
* @see java.util.Iterator#hasNext()
*/
@Override
public boolean hasNext() {
if (recordQueue.isEmpty() && query.isEmpty()) {
disconnect();
return false;
} else {
return true;
}
}
/* (non-Javadoc)
* @see java.util.Iterator#next()
*/
@Override
public String next() {
synchronized (recordQueue) {
while (recordQueue.isEmpty() && !query.isEmpty()) {
try {
log.info("get Query: " + query);
query = downloadPage(query);
log.debug("next queryURL from downloadPage(): " + query);
} catch (CollectorServiceException e) {
log.debug("CollectorPlugin.next()-Exception: " + e);
throw new RuntimeException(e);
}
}
return recordQueue.poll();
}
}
/*
* download page and return nextQuery
*/
private String downloadPage(String query) throws CollectorServiceException {
String nextQuery = "";
XdmValue nodeList;
try {
URL qUrl = new URL(query);
resultStream = qUrl.openStream();
String resultPage = IOUtils.toString(resultStream);
nodeList = this.saxonHelper.help().evaluate(resultPage, xprEntity);
log.debug("nodeList.size: " + nodeList.size());
for(XdmItem entity : nodeList){
recordQueue.add(this.saxonHelper.help().serialize((XdmNode) entity));
}
String nextPage = this.saxonHelper.help().evaluateSingleAsString(resultPage, xprNextPagePath);
if(StringUtils.isBlank(nextPage)){
log.info("No next page available, we reached the end");
}
else{
this.queryPage = pageParam+"="+nextPage;
return baseUrl + "?" + queryParams +"&"+ querySize +"&"+ queryPage;
}
if (resultTotal == -1) {
String tot = this.saxonHelper.help().evaluateSingleAsString(resultPage, xprResultTotalPath);
resultTotal = Integer.parseInt(tot);
log.info("resultTotal was -1 is now: " + resultTotal);
}
log.info("resultTotal: " + resultTotal);
log.debug("nextQueryUrl: " + nextQuery);
return nextQuery;
} catch (Exception e) {
log.error(e);
throw new IllegalStateException("collection failed: " + e.getMessage());
}
}
}

View File

@ -5,15 +5,11 @@
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd">
<bean id="wfNodeReportReader" class="eu.dnetlib.ariadneplus.workflows.nodes.ReportReaderJobNode" scope="prototype"/>
<bean id="wfNodePublishGraphDB" class="eu.dnetlib.ariadneplus.workflows.nodes.PublishGraphDBJobNode" scope="prototype"/>
<bean id="wfNodeUnpublishGraphDB" class="eu.dnetlib.ariadneplus.workflows.nodes.UnpublishGraphDBJobNode" scope="prototype"/>
<bean id="wfNodeReadVirtuoso" class="eu.dnetlib.ariadneplus.workflows.nodes.ReadVirtuosoJobNode" scope="prototype"/>
<bean id="wfNodeReadVirtuosoByType" class="eu.dnetlib.ariadneplus.workflows.nodes.ReadVirtuosoByTypeJobNode" scope="prototype"/>
<bean id="wfNodeX3MTransformAriadnePlus" class="eu.dnetlib.ariadneplus.workflows.nodes.X3MTransformAriadnePlusJobNode" scope="prototype"/>
<bean id="wfNodeElasticSearchIndex" class="eu.dnetlib.ariadneplus.workflows.nodes.ElasticSearchIndexJobNode" scope="prototype"/>

View File

@ -1,69 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Created by Alessia Bardi on 09/02/2018.
*
* @author Alessia Bardi
*/
public class VirtuosoAriadnePlusIteratorTest {
private VirtuosoAriadnePlusIterator it;
@Before
public void setup(){
it = new VirtuosoAriadnePlusMockIterator();
}
@Test
public void fillQueueTest() throws Exception {
it.fillQueue();
String s = it.getElements().take();
while(!s.equals(VirtuosoAriadnePlusIterator.TERMINATOR)){
System.out.println(s);
s = it.getElements().take();
}
Map<String, Integer> errors = it.getErrors();
for (Iterator<Entry<String, Integer>> it1 = errors.entrySet().iterator(); it1.hasNext(); ) {
final Entry e = it1.next();
System.out.println(e.getKey() +" - "+e.getValue());
}
}
@Test
public void testHasNext(){
assertTrue(it.hasNext());
}
@Test
public void testIterate(){
while(it.hasNext())
System.out.println(it.next());
}
@Test
public void testCompleteXml(){
String s = "<?xml version=\"1.0\" encoding=\"utf-8\" ?><rdf:RDF/>";
String completed = it.completeXML(s, "http://ariadneplus.d4science.org/handle/AriadnePlus/REG/Thing/Appellation/urp%40regione.umbria.it");
System.out.println(completed);
}
@Test
public void testRemoveXmlDeclaration(){
String s = "<?xml version=\"1.0\" encoding=\"utf-8\" ?><rdf:RDF";
String res = s.replaceAll("\\<\\?xml(.+?)\\?\\>", "").trim();
assertEquals("<rdf:RDF", res);
s = "<rdf:RDF ";
res = s.replaceAll("\\<\\?xml(.+?)\\?\\>", "").trim();
assertEquals("<rdf:RDF", res);
}
}

View File

@ -1,36 +0,0 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Lists;
/**
* Created by Alessia Bardi on 12/02/2018.
*
* @author Alessia Bardi
*/
public class VirtuosoAriadnePlusMockIterator extends VirtuosoAriadnePlusIterator {
List<String> elements = Lists.newArrayList("rdf1", "rdf2", "rdf3", "rdf4", "rdf5", "rdf6");
@Override
protected void fillQueue() {
new Thread(() -> {
try {
for (String e : elements) {
getElements().offer(e, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
}
} catch (InterruptedException ee) {
ee.printStackTrace();
} finally {
try {
getElements().offer(TERMINATOR, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
} catch (InterruptedException ee) {
System.out.println(ee);
}
}
}).start();
}
}

View File

@ -0,0 +1,134 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import org.junit.Test;
import java.time.Duration;
import java.time.LocalDateTime;
/**
* Created by Alessia Bardi on 13/04/2017.
*
* @author Alessia Bardi
*/
public class X3MTransformAriadnePlusJobNodeTest {
private X3MTransformAriadnePlusJobNode transformJob = new X3MTransformAriadnePlusJobNode();
private String header = "<oai:header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n"
+ " <dri:objIdentifier>ariadne_mock::0000023f507999464aa2b78875b7e5d6</dri:objIdentifier>\n"
+ " <dri:recordIdentifier>2420500</dri:recordIdentifier>\n"
+ " <dri:dateOfCollection>2017-04-10T18:44:46.85+02:00</dri:dateOfCollection>\n"
+ " <dri:datasourceprefix>ariadne_mock</dri:datasourceprefix>\n"
+ " <dri:datasourcename>Ariadne Mock</dri:datasourcename>\n"
+ " <dri:dateOfTransformation>2017-04-12T16:31:45.766</dri:dateOfTransformation>\n"
+ " <dri:invalid value=\"true\">\n"
+ " <dri:error vocabularies=\"dnet:languages\" xpath=\"//*[local-name()='P72_has_language']\"\n"
+ " term=\"en\"/>\n"
+ " </dri:invalid>\n"
+ " </oai:header>";
private String footer = "<oai:about xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n"
+ " <provenance xmlns=\"http://www.openarchives.org/OAI/2.0/provenance\"\n"
+ " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd\">\n"
+ " <originDescription xmlns=\"\" altered=\"true\" harvestDate=\"2017-04-10T18:44:46.85+02:00\">\n"
+ " <baseURL>sftp%3A%2F%2Fariadne2.isti.cnr.it%2F..%2F..%2Fdata%2Ftransform%2Facdm_correct</baseURL>\n"
+ " <identifier/>\n"
+ " <datestamp/>\n"
+ " <metadataNamespace/>\n"
+ " </originDescription>\n"
+ " </provenance>\n"
+ " </oai:about>";
private String metadata=" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"
+ " xmlns:dbpedia-owl=\"http://dbpedia.org/ontology/\"\n"
+ " xmlns:acdm=\"http://registry.ariadne-infrastructure.eu/\"\n"
+ " xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\"\n"
+ " xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"\n"
+ " xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n"
+ " xmlns:frbr=\"http://www.cidoc-crm.org/frbroo/\" xmlns:dcterms=\"http://purl.org/dc/terms/\"\n"
+ " xmlns:dcat=\"http://www.w3.org/ns/dcat#\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n"
+ " xmlns:crm=\"http://www.cidoc-crm.org/cidoc-crm/\"\n"
+ " xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAA\"/>\n"
+ " <crm:E65_Creation rdf:about=\"uuid:AAAB\"/>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/dataset/2420500\">\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/CAIRN\">\n"
+ " <crm:P129_is_about>CAIRN</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/HUT%20CIRCLE\">\n"
+ " <crm:P129_is_about>HUT CIRCLE</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P104_is_subject_to>ADS Terms and Conditions</crm:P104_is_subject_to>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/SHEEPFOLD\">\n"
+ " <crm:P129_is_about>SHEEPFOLD</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/FIELD%20SYSTEM\">\n"
+ " <crm:P129_is_about>FIELD SYSTEM</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P102_has_title>MID GLEN CROE</crm:P102_has_title>\n"
+ " <crm:P165_incorporates>\n"
+ " <crm:E33_Linguistic_Object rdf:about=\"uuid:AAAG\">\n"
+ " <crm:P72_has_language>en</crm:P72_has_language>\n"
+ " </crm:E33_Linguistic_Object>\n"
+ " </crm:P165_incorporates>\n"
+ " <crm:P67_refers_to>\n"
+ " <crm:E1_CRM_Entity rdf:about=\"uuid:AAAH\">\n"
+ " <crm:P2_has_type>Sites and monuments databases or\n"
+ " inventories</crm:P2_has_type>\n"
+ " </crm:E1_CRM_Entity>\n"
+ " </crm:P67_refers_to>\n"
+ " <crm:P93i_was_taken_out_of_existence_by>\n"
+ " <crm:E6_Destruction rdf:about=\"uuid:AAAE\">\n"
+ " <crm:P4_has_time-span>\n"
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAF\">\n"
+ " <crm:P81_ongoing_throughout>2013-12-09\n"
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n"
+ " </crm:E52_Time-Span>\n"
+ " </crm:P4_has_time-span>\n"
+ " </crm:E6_Destruction>\n"
+ " </crm:P93i_was_taken_out_of_existence_by>\n"
+ " <crm:P94i_was_created_by>\n"
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAC\">\n"
+ " <crm:P4_has_time-span>\n"
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAD\">\n"
+ " <crm:P81_ongoing_throughout>2013-12-09\n"
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n"
+ " </crm:E52_Time-Span>\n"
+ " </crm:P4_has_time-span>\n"
+ " </frbr:F30_Publication_Event>\n"
+ " </crm:P94i_was_created_by>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/BUILDING\">\n"
+ " <crm:P129_is_about>BUILDING</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P1_is_identified_by>2420500</crm:P1_is_identified_by>\n"
+ " <crm:P106i_forms_part_of>http://registry.ariadne-infrastructure.eu/collection/22721290</crm:P106i_forms_part_of>\n"
+ " <crm:P3_has_note>Multiple instances of: SHEEPFOLD&lt;br /&gt;&lt;br /&gt;Multiple\n"
+ " instances of: BUILDING&lt;br /&gt;Multiple instances of: FIELD SYSTEM&lt;br\n"
+ " /&gt;Possible instance of: CAIRN&lt;br /&gt;Multiple instances of: HUT\n"
+ " CIRCLE&lt;br /&gt;Possible instance of: HUT CIRCLE</crm:P3_has_note>\n"
+ " </crm:E73_Information_Object>\n"
+ " </rdf:RDF>";
@Test
public void testBuildXML(){
LocalDateTime now = LocalDateTime.now();
String res = transformJob.buildXML(header, now.toString(), metadata, footer);
LocalDateTime end = LocalDateTime.now();
System.out.println("Building XML took:"+ Duration.between(now, end).toMillis());
System.out.println(res);
}
}

View File

@ -1,72 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Iterator;
import eu.dnetlib.rmi.data.CollectorServiceException;
import org.apache.commons.io.IOUtils;
import org.junit.Ignore;
import org.junit.Test;
/**
* Created by Alessia Bardi on 19/04/2017.
*
* @author Alessia Bardi
*/
public class EHRIGraphQLClientTest {
private EHRIGraphQLClient client = new EHRIGraphQLClient();
private String baseUrl = "https://portal.ehri-project.eu/api/graphql";
private String query = "{\"query\":\"{topLevelDocumentaryUnits{items{id}}}\"}";
private String simpleOutput = "{\n"
+ " \"data\" : {\n"
+ " \"topLevelDocumentaryUnits\" : {\n"
+ " \"items\" : [ {\n"
+ " \"id\" : \"at-001985-ikg_ar_1\"\n"
+ " }, {\n"
+ " \"id\" : \"at-001985-ikg_ar_11\"\n"
+ " }]\n"
+ " }\n"
+ " }\n"
+ "}";
@Test
public void testGetIdentifiers() throws IOException {
Iterator<String> it = client.getIdentifiers(new InputStreamReader(IOUtils.toInputStream(simpleOutput, "UTF-8")));
while(it.hasNext())
System.out.println(it.next());
}
@Ignore
@Test
public void testRemoteCollect() throws CollectorServiceException, IOException, URISyntaxException {
Iterator<String> it =client.collect(baseUrl, query);
int stopAt = 5;
for(int i = 0; i < stopAt && it.hasNext(); i++){
System.out.println(it.next());
}
}
@Ignore
@Test
public void testGetResource() throws IOException {
String id = "at-001985-ikg_ar_11";
String target = "https://portal.ehri-project.eu/units/"+id+"/export";
URL url = new URL(target);
String res = IOUtils.toString(url, "UTF-8");
System.out.println(res);
}
// @Test
// public void testDotNotation(){
// JsonObject jsonObject = new JsonParser().parse(simpleOutput).getAsJsonObject();
// JsonArray items = jsonObject.getAsJsonArray("data.topLevelDocumentaryUnits.items");
// for(JsonElement id : items){
// System.out.println(id.getAsJsonObject().get("id"));
// }
// }
}

View File

@ -1,32 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
import org.junit.Test;
/**
* Created by Alessia Bardi on 13/06/2017.
*
* @author Alessia Bardi
*/
public class EHRIIteratorTest {
private EHRIIterator ehriIterator = new EHRIIterator(null, null, null);
final String test = "<ead xmlns=\"urn:isbn:1-931666-22-9\"\n"
+ "\txmlns:xlink=\"http://www.w3.org/1999/xlink\"\n"
+ "\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ "\txsi:schemaLocation=\"urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd\">\n"
+ " <eadheader xmlns=\"\"\n"
+ "\tcountryencoding=\"iso3166-1\"\n"
+ "\tdateencoding=\"iso8601\"\n"
+ "\trelatedencoding=\"DC\"\n"
+ "\trepositoryencoding=\"iso15511\"\n"
+ "\tscriptencoding=\"iso15924\">\n"
+ " <eadid>us-005578-irn516886</eadid>\n"
+ "\t</eadheader>\n"
+ "\t</ead>";
@Test
public void testRemoveDefaultEADNamespace(){
System.out.println(ehriIterator.removeDefaultEADNamespace(test));
}
}

View File

@ -1,47 +0,0 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.isidore;
import javax.xml.transform.TransformerFactory;
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
/**
* Created by Alessia Bardi on 17/11/2018.
*
* @author Alessia Bardi
*/
public class IsidoreIteratorTest {
TransformerFactory tf = TransformerFactory.newInstance();
SaxonHelper saxonHelper = new SaxonHelper();
private String baseUrl = "https://api.rechercheisidore.fr/resource/search";
private String queryParams = "source_tree=10670/3.ji0kr6|10670/2.rao8tk";
private String pageParam = "page";
private int startFromPage = 1;
private String nextPagePath = "//page/@next";
private String pageSizeParam = "size";
private int pageSize = 10;
private String resultTotalXpath = "//replies/meta/@items";
private String entityXpath = "//reply";
@Before
public void setup(){
saxonHelper.setSaxonTransformerFactory(tf);
}
@Ignore
@Test
public void test(){
IsidoreIterator rip = new IsidoreIterator(saxonHelper, baseUrl, queryParams,pageParam, startFromPage,
nextPagePath, pageSizeParam, pageSize, resultTotalXpath, entityXpath);
while(rip.hasNext()){
String record = rip.next();
System.out.println(record);
}
}
}