cleaning up: let's remove the classes we do not need anymore, like Virtuoso-related classes and specific plugins that we have created for Parthenos
This commit is contained in:
parent
5087750f96
commit
c770fc40e0
|
@ -1,122 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.clients.enabling.ISLookUpClient;
|
||||
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
||||
import eu.dnetlib.msro.workflows.graph.Arc;
|
||||
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
||||
import eu.dnetlib.msro.workflows.procs.Env;
|
||||
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
|
||||
import eu.dnetlib.rmi.datasource.DatasourceManagerService;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 12/01/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class ClarinAPIGeneratorJobNode extends AsyncJobNode {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ClarinAPIGeneratorJobNode.class);
|
||||
|
||||
/** Path to a local folder containing the subfolders with Clarin mapping files **/
|
||||
private String folderPath;
|
||||
/** Path to a local folder containing the subfolders with Clarin input files **/
|
||||
private String inputBaseUrlPrefix;
|
||||
|
||||
private String metadataIdentifierPath;
|
||||
|
||||
private String clarinDatasourceProfileID;
|
||||
private String clarinDatasourceOriginalId;
|
||||
|
||||
@Autowired
|
||||
private UniqueServiceLocator serviceLocator;
|
||||
@Autowired
|
||||
private ISLookUpClient isLookupClient;
|
||||
|
||||
|
||||
@Override
|
||||
protected String execute(final Env env) throws Exception {
|
||||
Path folder = Paths.get(getFolderPath());
|
||||
if (!Files.isDirectory(folder)){
|
||||
throw new IllegalArgumentException(getFolderPath()+" must be a directory");
|
||||
}
|
||||
final DatasourceManagerService dsMan = getServiceLocator().getService(DatasourceManagerService.class);
|
||||
|
||||
List<String> apiIds = getIsLookupClient().search("//RESOURCE_PROFILE[./HEADER/RESOURCE_IDENTIFIER/@value='"+getClarinDatasourceProfileID()+"']//INTERFACE/@id/string()");
|
||||
|
||||
ClarinFileVisitor visitor = new ClarinFileVisitor(getClarinDatasourceProfileID(), getClarinDatasourceOriginalId(),
|
||||
getInputBaseUrlPrefix(), getMetadataIdentifierPath(), apiIds, getServiceLocator(), getIsLookupClient());
|
||||
Files.walkFileTree(folder, visitor);
|
||||
|
||||
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"visitedFiles", visitor.getCountVisitedFiles());
|
||||
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"createdTDS", visitor.getCountCreatedTDS());
|
||||
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"updatedTDS", visitor.getCountUpdatedTDS());
|
||||
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"visitedFolders", visitor.getCountVisitedFolders());
|
||||
env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX +"createdInterfaces", visitor.getCountCreatedInterfaces());
|
||||
return Arc.DEFAULT_ARC;
|
||||
|
||||
}
|
||||
|
||||
public String getFolderPath() {
|
||||
return folderPath;
|
||||
}
|
||||
|
||||
public void setFolderPath(final String folderPath) {
|
||||
this.folderPath = folderPath;
|
||||
}
|
||||
|
||||
public String getInputBaseUrlPrefix() {
|
||||
return inputBaseUrlPrefix;
|
||||
}
|
||||
|
||||
public void setInputBaseUrlPrefix(final String inputBaseUrlPrefix) {
|
||||
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
||||
}
|
||||
|
||||
public String getMetadataIdentifierPath() {
|
||||
return metadataIdentifierPath;
|
||||
}
|
||||
|
||||
public void setMetadataIdentifierPath(final String metadataIdentifierPath) {
|
||||
this.metadataIdentifierPath = metadataIdentifierPath;
|
||||
}
|
||||
|
||||
public String getClarinDatasourceProfileID() {
|
||||
return clarinDatasourceProfileID;
|
||||
}
|
||||
|
||||
public void setClarinDatasourceProfileID(final String clarinDatasourceProfileID) {
|
||||
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
||||
}
|
||||
|
||||
public String getClarinDatasourceOriginalId() {
|
||||
return clarinDatasourceOriginalId;
|
||||
}
|
||||
|
||||
public void setClarinDatasourceOriginalId(final String clarinDatasourceOriginalId) {
|
||||
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
||||
}
|
||||
|
||||
public UniqueServiceLocator getServiceLocator() {
|
||||
return serviceLocator;
|
||||
}
|
||||
|
||||
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
|
||||
this.serviceLocator = serviceLocator;
|
||||
}
|
||||
|
||||
public ISLookUpClient getIsLookupClient() {
|
||||
return isLookupClient;
|
||||
}
|
||||
|
||||
public void setIsLookupClient(final ISLookUpClient isLookupClient) {
|
||||
this.isLookupClient = isLookupClient;
|
||||
}
|
||||
}
|
|
@ -1,263 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.FileVisitResult;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.SimpleFileVisitor;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.clients.enabling.ISLookUpClient;
|
||||
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
||||
import eu.dnetlib.miscutils.datetime.DateUtils;
|
||||
import eu.dnetlib.rmi.datasource.DatasourceManagerService;
|
||||
import eu.dnetlib.rmi.datasource.DatasourceManagerServiceException;
|
||||
import eu.dnetlib.rmi.datasource.IfaceDesc;
|
||||
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
||||
import eu.dnetlib.rmi.enabling.ISRegistryException;
|
||||
import eu.dnetlib.rmi.enabling.ISRegistryService;
|
||||
import org.antlr.stringtemplate.StringTemplate;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import static java.nio.file.FileVisitResult.CONTINUE;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 12/01/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class ClarinFileVisitor extends SimpleFileVisitor<Path> {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ClarinFileVisitor.class);
|
||||
private static final String API_PREFIX = "api_________::";
|
||||
private static final String TDS_TEMPLATE ="/eu/dnetlib/ariadneplus/workflows/nodes/clarin_tds.xml.st";
|
||||
|
||||
private List<String> interfaces = Lists.newArrayList();
|
||||
private String clarinDatasourceProfileID;
|
||||
private String clarinDatasourceOriginalId;
|
||||
private DatasourceManagerService dsMan = null;
|
||||
private String inputBaseUrlPrefix;
|
||||
private String metadataIdentifierPath;
|
||||
private ISLookUpClient lookupClient;
|
||||
private ISRegistryService registryService;
|
||||
|
||||
private int countVisitedFiles = 0;
|
||||
private int countCreatedTDS = 0;
|
||||
private int countUpdatedTDS = 0;
|
||||
private int countVisitedFolders = 0;
|
||||
private int countCreatedInterfaces = 0;
|
||||
|
||||
|
||||
@Override
|
||||
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
|
||||
log.info("Processing " + dir.toString());
|
||||
countVisitedFolders++;
|
||||
String dirName = dir.getFileName().toString();
|
||||
if(dirName.equalsIgnoreCase("x3ml-mappings")) return CONTINUE;
|
||||
String apiId = API_PREFIX + getClarinDatasourceOriginalId() + "::" + dirName;
|
||||
if (!interfaces.contains(apiId)) {
|
||||
//API TO BE CREATED
|
||||
IfaceDesc iface = new IfaceDesc();
|
||||
iface.setActive(false);
|
||||
iface.setCompliance("metadata");
|
||||
iface.setContentDescription("metadata");
|
||||
iface.setId(apiId);
|
||||
iface.setRemovable(true);
|
||||
iface.setTypology("dnet:repository::clarin");
|
||||
iface.setAccessProtocol("filesystem");
|
||||
Map<String, String> accessParams = Maps.newHashMap();
|
||||
accessParams.put("extensions", "xml");
|
||||
iface.setAccessParams(accessParams);
|
||||
iface.setBaseUrl(StringUtils.appendIfMissing(getInputBaseUrlPrefix(), "/") + dirName);
|
||||
Map<String, String> extraFields = Maps.newHashMap();
|
||||
extraFields.put("metadata_identifier_path", getMetadataIdentifierPath());
|
||||
iface.setExtraFields(extraFields);
|
||||
try {
|
||||
dsMan.addInterface(getClarinDatasourceProfileID(), iface);
|
||||
countCreatedInterfaces++;
|
||||
log.info("CREATED NEW INTERFACE " + iface.getId() + " for " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")");
|
||||
} catch (DatasourceManagerServiceException e) {
|
||||
log.error("Can't add interface " + iface.getId() + " to " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")", e);
|
||||
return CONTINUE;
|
||||
}
|
||||
} else {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Interface " + apiId + " already exists");
|
||||
}
|
||||
}
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
|
||||
String filename = file.getFileName().toString();
|
||||
log.info("Processing " + file.toString());
|
||||
countVisitedFiles++;
|
||||
String tdsTitle = filename;
|
||||
//call Files.lines which will use a stream to iterate over each line of the file.
|
||||
//Next we will convert the stream to a string by calling Collectors.joining() which will join all the strings together.
|
||||
String updatedCode = Files.lines(file).collect(Collectors.joining()).replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>", "");
|
||||
try {
|
||||
List<String> res = this.lookupClient.search("//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value=\"TransformationRuleDSResourceType\" and .//CONFIGURATION/SCRIPT/TITLE/string()=\""+tdsTitle+"\"]/HEADER/RESOURCE_IDENTIFIER/@value/string()");
|
||||
if(res == null || res.isEmpty()){
|
||||
log.debug("Creating new TDS profile for "+filename);
|
||||
final String template = IOUtils.toString(getClass().getResourceAsStream(TDS_TEMPLATE), Charset.forName("UTF-8"));
|
||||
final StringTemplate st = new StringTemplate(template);
|
||||
st.setAttribute("date", DateUtils.calculate_ISO8601(DateUtils.now()));
|
||||
st.setAttribute("title", tdsTitle);
|
||||
st.setAttribute("mapping", "<![CDATA["+updatedCode+"]]>");
|
||||
String profId = this.registryService.registerProfile(st.toString());
|
||||
countCreatedTDS++;
|
||||
log.info("REGISTERED NEW TDS FOR "+filename+": "+profId);
|
||||
}
|
||||
else{
|
||||
String tdsProfileId = res.get(0);
|
||||
log.debug("Updating TDS profile "+tdsProfileId+"for "+filename);
|
||||
|
||||
boolean done = this.registryService.updateProfileNode(tdsProfileId, "//CONFIGURATION/SCRIPT/CODE", "<CODE><![CDATA["+updatedCode+"]]></CODE>");
|
||||
if(done){
|
||||
log.info("TDS PROFILE "+tdsProfileId+" UPDATED with contents from "+filename);
|
||||
countUpdatedTDS++;
|
||||
}
|
||||
if(!done){
|
||||
log.error("!!! TDS PROFILE "+tdsProfileId+" COULD NOT BE UPDATED with contents from "+filename);
|
||||
}
|
||||
}
|
||||
} catch (ISLookUpException | ISRegistryException e) {
|
||||
log.error("CANNOT UPDATE/CREATE TDS PROFILE FOR "+filename, e);
|
||||
}
|
||||
|
||||
return CONTINUE;
|
||||
}
|
||||
|
||||
protected ClarinFileVisitor() {
|
||||
super();
|
||||
}
|
||||
|
||||
public ClarinFileVisitor(final String clarinDatasourceProfileID, final String clarinDatasourceOriginalId,
|
||||
final String inputBaseUrlPrefix, final String metadataIdentifierPath, final List<String> interfaces,
|
||||
final UniqueServiceLocator locator, final ISLookUpClient lookupClient) {
|
||||
super();
|
||||
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
||||
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
||||
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
||||
this.metadataIdentifierPath = metadataIdentifierPath;
|
||||
this.interfaces = interfaces;
|
||||
this.dsMan = locator.getService(DatasourceManagerService.class);
|
||||
this.registryService = locator.getService(ISRegistryService.class);
|
||||
this.lookupClient = lookupClient;
|
||||
}
|
||||
|
||||
public List<String> getInterfaces() {
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
public void setInterfaces(final List<String> interfaces) {
|
||||
this.interfaces = interfaces;
|
||||
}
|
||||
|
||||
public String getClarinDatasourceOriginalId() {
|
||||
return clarinDatasourceOriginalId;
|
||||
}
|
||||
|
||||
public void setClarinDatasourceOriginalId(final String clarinDatasourceOriginalId) {
|
||||
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
||||
}
|
||||
|
||||
public DatasourceManagerService getDsMan() {
|
||||
return dsMan;
|
||||
}
|
||||
|
||||
public void setDsMan(final DatasourceManagerService dsMan) {
|
||||
this.dsMan = dsMan;
|
||||
}
|
||||
|
||||
public String getInputBaseUrlPrefix() {
|
||||
return inputBaseUrlPrefix;
|
||||
}
|
||||
|
||||
public void setInputBaseUrlPrefix(final String inputBaseUrlPrefix) {
|
||||
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
||||
}
|
||||
|
||||
public String getMetadataIdentifierPath() {
|
||||
return metadataIdentifierPath;
|
||||
}
|
||||
|
||||
public void setMetadataIdentifierPath(final String metadataIdentifierPath) {
|
||||
this.metadataIdentifierPath = metadataIdentifierPath;
|
||||
}
|
||||
|
||||
public String getClarinDatasourceProfileID() {
|
||||
return clarinDatasourceProfileID;
|
||||
}
|
||||
|
||||
public void setClarinDatasourceProfileID(final String clarinDatasourceProfileID) {
|
||||
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
||||
}
|
||||
|
||||
public ISLookUpClient getLookupClient() {
|
||||
return lookupClient;
|
||||
}
|
||||
|
||||
public void setLookupClient(final ISLookUpClient lookupClient) {
|
||||
this.lookupClient = lookupClient;
|
||||
}
|
||||
|
||||
public ISRegistryService getRegistryService() {
|
||||
return registryService;
|
||||
}
|
||||
|
||||
public void setRegistryService(final ISRegistryService registryService) {
|
||||
this.registryService = registryService;
|
||||
}
|
||||
|
||||
public int getCountVisitedFiles() {
|
||||
return countVisitedFiles;
|
||||
}
|
||||
|
||||
public void setCountVisitedFiles(final int countVisitedFiles) {
|
||||
this.countVisitedFiles = countVisitedFiles;
|
||||
}
|
||||
|
||||
public int getCountCreatedTDS() {
|
||||
return countCreatedTDS;
|
||||
}
|
||||
|
||||
public void setCountCreatedTDS(final int countCreatedTDS) {
|
||||
this.countCreatedTDS = countCreatedTDS;
|
||||
}
|
||||
|
||||
public int getCountUpdatedTDS() {
|
||||
return countUpdatedTDS;
|
||||
}
|
||||
|
||||
public void setCountUpdatedTDS(final int countUpdatedTDS) {
|
||||
this.countUpdatedTDS = countUpdatedTDS;
|
||||
}
|
||||
|
||||
public int getCountVisitedFolders() {
|
||||
return countVisitedFolders;
|
||||
}
|
||||
|
||||
public void setCountVisitedFolders(final int countVisitedFolders) {
|
||||
this.countVisitedFolders = countVisitedFolders;
|
||||
}
|
||||
|
||||
public int getCountCreatedInterfaces() {
|
||||
return countCreatedInterfaces;
|
||||
}
|
||||
|
||||
public void setCountCreatedInterfaces(final int countCreatedInterfaces) {
|
||||
this.countCreatedInterfaces = countCreatedInterfaces;
|
||||
}
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import eu.dnetlib.msro.workflows.graph.Arc;
|
||||
import eu.dnetlib.msro.workflows.procs.Env;
|
||||
import eu.dnetlib.rmi.common.ResultSet;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 26/01/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class ReadVirtuosoByTypeJobNode extends ReadVirtuosoJobNode {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ReadVirtuosoByTypeJobNode.class);
|
||||
|
||||
private String typeNamespace;
|
||||
private String typeName;
|
||||
|
||||
@Override
|
||||
protected String execute(final Env env){
|
||||
log.info("Using virtuoso reader at : " + getVirtuosoReaderAPIUrl());
|
||||
RestTemplate restTemplate = new RestTemplate(getClientHttpRequestFactory());
|
||||
|
||||
VirtuosoAriadnePlusByTypeIterator iterator = new VirtuosoAriadnePlusByTypeIterator().typeNamespace(typeNamespace).typeName(typeName).datasourceInterface(getDatasourceInterface()).virtuosoReaderAPIUrl(getVirtuosoReaderAPIUrl()).datasourceName(getDatasourceName()).restTemplate(restTemplate);
|
||||
log.debug(String.format("Created iterator for %s:%s : ", typeNamespace, typeName ));
|
||||
ResultSet<String> rs = getRsFactory().createResultSet(() -> iterator);
|
||||
|
||||
env.setAttribute("virtuoso_rs", rs);
|
||||
|
||||
return Arc.DEFAULT_ARC;
|
||||
}
|
||||
|
||||
public String getTypeNamespace() {
|
||||
return typeNamespace;
|
||||
}
|
||||
|
||||
public void setTypeNamespace(final String typeNamespace) {
|
||||
this.typeNamespace = typeNamespace;
|
||||
}
|
||||
|
||||
public String getTypeName() {
|
||||
return typeName;
|
||||
}
|
||||
|
||||
public void setTypeName(final String typeName) {
|
||||
this.typeName = typeName;
|
||||
}
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
|
||||
import eu.dnetlib.msro.workflows.graph.Arc;
|
||||
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
||||
import eu.dnetlib.msro.workflows.procs.Env;
|
||||
import eu.dnetlib.rmi.common.ResultSet;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.http.client.ClientHttpRequestFactory;
|
||||
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 26/01/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class ReadVirtuosoJobNode extends AsyncJobNode {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ReadVirtuosoJobNode.class);
|
||||
|
||||
private String datasourceName;
|
||||
private String datasourceInterface;
|
||||
private String virtuosoReaderAPIUrl;
|
||||
private int readTimeout = 30000;
|
||||
|
||||
@Autowired
|
||||
private ResultSetFactory rsFactory;
|
||||
|
||||
@Override
|
||||
protected String execute(final Env env){
|
||||
log.info("Using virtuoso reader at : " + getVirtuosoReaderAPIUrl());
|
||||
RestTemplate restTemplate = new RestTemplate(getClientHttpRequestFactory());
|
||||
|
||||
VirtuosoAriadnePlusIterator iterator = new VirtuosoAriadnePlusIterator().datasourceInterface(getDatasourceInterface()).virtuosoReaderAPIUrl(getVirtuosoReaderAPIUrl()).datasourceName(getDatasourceName()).restTemplate(restTemplate);
|
||||
ResultSet<String> rs = rsFactory.createResultSet(() -> iterator);
|
||||
|
||||
env.setAttribute("virtuoso_rs", rs);
|
||||
|
||||
return Arc.DEFAULT_ARC;
|
||||
}
|
||||
|
||||
|
||||
protected ClientHttpRequestFactory getClientHttpRequestFactory() {
|
||||
HttpComponentsClientHttpRequestFactory clientHttpRequestFactory = new HttpComponentsClientHttpRequestFactory();
|
||||
clientHttpRequestFactory.setReadTimeout(readTimeout);
|
||||
return clientHttpRequestFactory;
|
||||
}
|
||||
|
||||
public String getDatasourceName() {
|
||||
return datasourceName;
|
||||
}
|
||||
|
||||
public void setDatasourceName(final String datasourceName) {
|
||||
this.datasourceName = datasourceName;
|
||||
}
|
||||
|
||||
public String getDatasourceInterface() {
|
||||
return datasourceInterface;
|
||||
}
|
||||
|
||||
public void setDatasourceInterface(final String datasourceInterface) {
|
||||
this.datasourceInterface = datasourceInterface;
|
||||
}
|
||||
|
||||
public String getVirtuosoReaderAPIUrl() {
|
||||
return virtuosoReaderAPIUrl;
|
||||
}
|
||||
|
||||
public void setVirtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
|
||||
this.virtuosoReaderAPIUrl = virtuosoReaderAPIUrl;
|
||||
}
|
||||
|
||||
public int getReadTimeout() {
|
||||
return readTimeout;
|
||||
}
|
||||
|
||||
public void setReadTimeout(final int readTimeout) {
|
||||
this.readTimeout = readTimeout;
|
||||
}
|
||||
|
||||
public ResultSetFactory getRsFactory() {
|
||||
return rsFactory;
|
||||
}
|
||||
}
|
|
@ -1,108 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import eu.dnetlib.clients.enabling.ISLookUpClient;
|
||||
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
||||
import eu.dnetlib.msro.workflows.graph.Arc;
|
||||
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
|
||||
import eu.dnetlib.msro.workflows.procs.Env;
|
||||
import eu.dnetlib.msro.workflows.procs.ProcessAware;
|
||||
import eu.dnetlib.msro.workflows.procs.WorkflowProcess;
|
||||
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
|
||||
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
||||
import eu.dnetlib.rmi.enabling.ISRegistryService;
|
||||
import org.antlr.stringtemplate.StringTemplate;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringEscapeUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
/**
|
||||
* Copied from eu.dnetlib.msro.workflows.nodes.repohi.RegisterWorkflowFromTemplateJobNode
|
||||
*
|
||||
* Clarin specific implementation for setting the transformation rules automatically.
|
||||
*
|
||||
* @author alessia
|
||||
*/
|
||||
public class RegisterClarinWorkflowFromTemplateJobNode extends SimpleJobNode implements ProcessAware {
|
||||
|
||||
private static final Log log = LogFactory.getLog(RegisterClarinWorkflowFromTemplateJobNode.class);
|
||||
private String wfName;
|
||||
private String wfTemplate;
|
||||
private String description;
|
||||
private WorkflowProcess process;
|
||||
@Autowired
|
||||
private UniqueServiceLocator serviceLocator;
|
||||
@Autowired
|
||||
private ISLookUpClient isLookUpClient;
|
||||
|
||||
public String getWfName() {
|
||||
return this.wfName;
|
||||
}
|
||||
|
||||
public void setWfName(final String wfName) {
|
||||
this.wfName = wfName;
|
||||
}
|
||||
|
||||
public String getWfTemplate() {
|
||||
return this.wfTemplate;
|
||||
}
|
||||
|
||||
public void setWfTemplate(final String wfTemplate) {
|
||||
this.wfTemplate = wfTemplate;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
public void setDescription(final String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String execute(final Env env) throws Exception {
|
||||
final String dsId = this.process.getDsId();
|
||||
final String ifaceId = this.process.getDsInterface();
|
||||
final String dsName = this.process.getDsName();
|
||||
|
||||
final StringTemplate profTemplate = new StringTemplate(IOUtils.toString(getClass().getResourceAsStream(getWfTemplate()), Charset.forName("UTF-8")));
|
||||
profTemplate.setAttribute("name", StringEscapeUtils.escapeXml11(this.wfName));
|
||||
profTemplate.setAttribute("desc", StringEscapeUtils.escapeXml11(this.description));
|
||||
profTemplate.setAttribute("priority", WorkflowsConstants.DEFAULT_WF_PRIORITY);
|
||||
profTemplate.setAttribute("dsId", StringEscapeUtils.escapeXml11(dsId));
|
||||
profTemplate.setAttribute("interface", StringEscapeUtils.escapeXml11(ifaceId));
|
||||
profTemplate.setAttribute("dsName", StringEscapeUtils.escapeXml11(dsName));
|
||||
profTemplate.setAttribute("tdsCsv", getListOfTDS(ifaceId));
|
||||
|
||||
final String profId = this.serviceLocator.getService(ISRegistryService.class).registerProfile(profTemplate.toString());
|
||||
|
||||
env.setAttribute("repoWfId", profId);
|
||||
|
||||
log.info("A new repo wf has been registered, id: " + profId);
|
||||
|
||||
return Arc.DEFAULT_ARC;
|
||||
}
|
||||
|
||||
private String getListOfTDS(final String ifaceId) throws ISLookUpException {
|
||||
String lastAPIPart = StringUtils.substringAfterLast(ifaceId, "::");
|
||||
String query = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') return $x[starts-with(.//TITLE/text() , '"+lastAPIPart+"')]//RESOURCE_IDENTIFIER/@value/string()";
|
||||
return Joiner.on(',').join(isLookUpClient.search(query));
|
||||
}
|
||||
|
||||
public WorkflowProcess getProcess() {
|
||||
return this.process;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setProcess(final WorkflowProcess process) {
|
||||
this.process = process;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.msro.workflows.graph.Arc;
|
||||
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
||||
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
|
||||
import eu.dnetlib.msro.workflows.procs.Env;
|
||||
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 05/04/17.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class ReportReaderJobNode extends AsyncJobNode {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ReportReaderJobNode.class);
|
||||
private String reportEnvParam = "validationReport";
|
||||
private String outputParam = reportEnvParam;
|
||||
|
||||
@Override
|
||||
protected String execute(final Env env) throws Exception {
|
||||
Map<String, String> report = env.getAttribute(reportEnvParam, Map.class);
|
||||
Gson gson = new Gson();
|
||||
String theReport = gson.toJson(report);
|
||||
env.setAttribute( WorkflowsConstants.MAIN_LOG_PREFIX+outputParam, theReport);
|
||||
log.debug("Found the report: "+theReport);
|
||||
return Arc.DEFAULT_ARC;
|
||||
}
|
||||
|
||||
public String getReportEnvParam() {
|
||||
return reportEnvParam;
|
||||
}
|
||||
|
||||
public void setReportEnvParam(final String reportEnvParam) {
|
||||
this.reportEnvParam = reportEnvParam;
|
||||
}
|
||||
|
||||
public String getOutputParam() {
|
||||
return outputParam;
|
||||
}
|
||||
|
||||
public void setOutputParam(final String outputParam) {
|
||||
this.outputParam = outputParam;
|
||||
}
|
||||
}
|
|
@ -1,91 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.client.utils.URIBuilder;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 01/03/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class VirtuosoAriadnePlusByTypeIterator extends VirtuosoAriadnePlusIterator {
|
||||
|
||||
private static final Log log = LogFactory.getLog(VirtuosoAriadnePlusByTypeIterator.class);
|
||||
private String typeName;
|
||||
private String typeNamespace;
|
||||
|
||||
@Override
|
||||
protected URI getURIForSubjectList(final int offset) throws URISyntaxException {
|
||||
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/apiSubjectsWithType");
|
||||
builder.addParameter("api", getDatasourceInterface());
|
||||
builder.addParameter("typeNamespace", typeNamespace);
|
||||
builder.addParameter("typeName", typeName);
|
||||
builder.addParameter("limit", Integer.toString(LIMIT));
|
||||
builder.addParameter("offset", Integer.toString(offset));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected URI getURIForRDFRequest(final String subjectURL) throws URISyntaxException {
|
||||
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/subject").addParameter("subjectURL", subjectURL).addParameter("typeName", typeName).addParameter("timeout", ANY_TIME_QUERY_MS);
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
public String getTypeName() {
|
||||
return typeName;
|
||||
}
|
||||
|
||||
public void setTypeName(final String typeName) {
|
||||
this.typeName = typeName;
|
||||
}
|
||||
|
||||
public String getTypeNamespace() {
|
||||
return typeNamespace;
|
||||
}
|
||||
|
||||
public void setTypeNamespace(final String typeNamespace) {
|
||||
this.typeNamespace = typeNamespace;
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusByTypeIterator typeName(final String typeName) {
|
||||
this.typeName = typeName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusByTypeIterator typeNamespace(final String typeNamespace) {
|
||||
this.typeNamespace = typeNamespace;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VirtuosoAriadnePlusByTypeIterator restTemplate(final RestTemplate restTemplate) {
|
||||
setRestTemplate(restTemplate);
|
||||
return this;
|
||||
}
|
||||
@Override
|
||||
public VirtuosoAriadnePlusByTypeIterator datasourceInterface(final String datasourceInterface) {
|
||||
setDatasourceInterface(datasourceInterface);
|
||||
return this;
|
||||
}
|
||||
@Override
|
||||
public VirtuosoAriadnePlusByTypeIterator virtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
|
||||
setVirtuosoReaderAPIUrl(virtuosoReaderAPIUrl);
|
||||
return this;
|
||||
}
|
||||
@Override
|
||||
public VirtuosoAriadnePlusByTypeIterator datasourceName(final String datasourceName) {
|
||||
setDatasourceName(datasourceName);
|
||||
return this;
|
||||
}
|
||||
@Override
|
||||
public VirtuosoAriadnePlusByTypeIterator errors(final Map<String, Integer> errors) {
|
||||
setErrors(errors);
|
||||
return this;
|
||||
}
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 17/02/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class VirtuosoAriadnePlusException extends Exception{
|
||||
|
||||
public VirtuosoAriadnePlusException() {
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusException(final String message, final Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusException(final Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
|
@ -1,352 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Queues;
|
||||
import eu.dnetlib.data.collector.ThreadSafeIterator;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
|
||||
import org.apache.commons.lang3.StringEscapeUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.client.utils.URIBuilder;
|
||||
import org.springframework.http.*;
|
||||
import org.springframework.web.client.ResourceAccessException;
|
||||
import org.springframework.web.client.RestClientException;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 31/01/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class VirtuosoAriadnePlusIterator extends ThreadSafeIterator {
|
||||
|
||||
private static final Log log = LogFactory.getLog(VirtuosoAriadnePlusIterator.class);
|
||||
protected static final String ANY_TIME_QUERY_MS = "1800000"; //1800000 == 3 mins
|
||||
protected static final int QUEUE_TIMEOUT_SECONDS = 600;
|
||||
public final static String TERMINATOR = "ARNOLD";
|
||||
public final static String ERROR_TERMINATOR = "SCHWARZ";
|
||||
protected final static int SLEEP_MS = 5000;
|
||||
protected final static int MAX_RETRIES = 3;
|
||||
protected final static int LIMIT = 100;
|
||||
|
||||
private String datasourceName;
|
||||
private String datasourceInterface;
|
||||
private String virtuosoReaderAPIUrl;
|
||||
private boolean started = false;
|
||||
private Map<String, Integer> errors = Maps.newHashMap();
|
||||
private List<String> listForClass = Lists.newArrayList();
|
||||
private BlockingQueue<String> elements = Queues.newArrayBlockingQueue(10);
|
||||
|
||||
private String currentElement = null;
|
||||
private ExecutorService executor = Executors.newSingleThreadExecutor();
|
||||
|
||||
private RestTemplate restTemplate;
|
||||
|
||||
|
||||
private synchronized void verifyStarted(){
|
||||
if (!this.started) {
|
||||
this.started = true;
|
||||
fillQueue();
|
||||
getNextElement(MAX_RETRIES);
|
||||
}
|
||||
}
|
||||
|
||||
protected void fillQueue(){
|
||||
log.info("Virtuoso reader at : " + getVirtuosoReaderAPIUrl());
|
||||
getExecutor().submit(() -> {
|
||||
try {
|
||||
int offset = 0;
|
||||
boolean again;
|
||||
do {
|
||||
List<String> subjectList = getSubjectList(offset);
|
||||
for (String subject : subjectList) {
|
||||
String xmlFile = tryGetRDF(subject, MAX_RETRIES);
|
||||
if (StringUtils.isBlank(xmlFile)) {
|
||||
log.warn("Skipping blank RDF for " + subject);
|
||||
} else {
|
||||
getElements().offer(xmlFile, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
}
|
||||
}
|
||||
again = subjectList.size() == LIMIT;
|
||||
offset += LIMIT;
|
||||
} while(again);
|
||||
log.debug("End of subject list, adding terminator to the queue");
|
||||
getElements().offer(TERMINATOR, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
try {
|
||||
getElements().offer(ERROR_TERMINATOR, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
} catch (InterruptedException e1) {
|
||||
log.error(e1.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
getExecutor().shutdown();
|
||||
}
|
||||
|
||||
|
||||
protected String tryGetRDF(final String subjectURL, int attempt) throws URISyntaxException, InterruptedException, VirtuosoAriadnePlusException {
|
||||
//this is something George said: if it has no http URI, then it is not to be considered relevant by itself
|
||||
if(!subjectURL.startsWith("http")){
|
||||
log.debug("Skipping as non-http url: "+subjectURL);
|
||||
errors.merge("Non-http URLs", 1, Integer::sum);
|
||||
return null;
|
||||
}
|
||||
log.debug("Querying Api, remaining attempts: "+attempt);
|
||||
if (attempt <= 0) {
|
||||
errors.merge("Failed tryGetRDF", 1, Integer::sum);
|
||||
return null;
|
||||
}
|
||||
ResponseEntity<String> response = null;
|
||||
try {
|
||||
response = getRDF(subjectURL);
|
||||
}catch(ResourceAccessException e){
|
||||
//request timed out --> sleep and try again
|
||||
log.warn("Request timeout for "+subjectURL+": I'll sleep and then try again");
|
||||
Thread.sleep(SLEEP_MS);
|
||||
return tryGetRDF(subjectURL, --attempt);
|
||||
}
|
||||
HttpStatus responseStatus = response.getStatusCode();
|
||||
if (responseStatus.is2xxSuccessful()) {
|
||||
String rdfFile = response.getBody();
|
||||
if(StringUtils.isBlank(rdfFile)){
|
||||
log.warn("Got blank RDF for "+subjectURL+" , let's try again...");
|
||||
Thread.sleep(SLEEP_MS);
|
||||
return tryGetRDF(subjectURL, --attempt);
|
||||
}
|
||||
else {
|
||||
final String xmlFile = completeXML(rdfFile, subjectURL);
|
||||
return xmlFile;
|
||||
}
|
||||
} else {
|
||||
if (responseStatus.is5xxServerError()) {
|
||||
//sleep for a while and re-try
|
||||
log.warn("HTTP ERROR: "+responseStatus.value() + ": " + responseStatus.getReasonPhrase()+": I'll sleep and then try again");
|
||||
Thread.sleep(SLEEP_MS);
|
||||
return tryGetRDF(subjectURL, --attempt);
|
||||
} else {
|
||||
log.error("ERROR: Can't get the RDF for " + subjectURL + " " + responseStatus.value() + ": " + responseStatus.getReasonPhrase());
|
||||
errors.merge(responseStatus.value() + ": " + responseStatus.getReasonPhrase(), 1, Integer::sum);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected URI getURIForSubjectList(final int offset) throws URISyntaxException {
|
||||
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/apiSubjects");
|
||||
builder.addParameter("api", getDatasourceInterface());
|
||||
builder.addParameter("limit", Integer.toString(LIMIT));
|
||||
builder.addParameter("offset", Integer.toString(offset));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
protected List<String> getSubjectList(final int offset) throws URISyntaxException, VirtuosoAriadnePlusException {
|
||||
URI uri = getURIForSubjectList(offset);
|
||||
log.debug("fillQueue -- Calling for subject list: " + uri.toString());
|
||||
List<String> subjectList;
|
||||
try {
|
||||
subjectList = getRestTemplate().getForObject(uri, getListForClass().getClass());
|
||||
}catch(RestClientException rce){
|
||||
throw new VirtuosoAriadnePlusException(rce);
|
||||
}
|
||||
return subjectList;
|
||||
}
|
||||
|
||||
protected URI getURIForRDFRequest(final String subjectURL) throws URISyntaxException {
|
||||
URIBuilder builder = new URIBuilder(getVirtuosoReaderAPIUrl() + "/subject").addParameter("subjectURL", subjectURL).addParameter("timeout", ANY_TIME_QUERY_MS);
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
protected ResponseEntity<String> getRDF(final String subjectURL) throws URISyntaxException {
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setAccept(Lists.newArrayList(MediaType.APPLICATION_XML));
|
||||
URI uri = getURIForRDFRequest(subjectURL);
|
||||
log.debug("fillQueue -- Calling for subject RDF: " + uri.toString());
|
||||
HttpEntity<String> entity = new HttpEntity<>("parameters", headers);
|
||||
return restTemplate.exchange(uri, HttpMethod.GET, entity, String.class);
|
||||
}
|
||||
|
||||
|
||||
public String completeXML(final String rdfFile, final String url) {
|
||||
String xmlEscapedURL = StringEscapeUtils.escapeXml11(url);
|
||||
String rdfFileNoXmlDecl = rdfFile.replaceAll("\\<\\?xml(.+?)\\?\\>", "").trim();
|
||||
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?><record xmlns=\"http://www.openarchives.org/OAI/2.0/\"><header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\"><dri:objIdentifier>"
|
||||
+ xmlEscapedURL + "</dri:objIdentifier><dri:datasourceapi>" + datasourceInterface + "</dri:datasourceapi><dri:datasourcename>" + datasourceName
|
||||
+ "</dri:datasourcename></header><metadata>" + rdfFileNoXmlDecl + "</metadata></record>";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doHasNext() {
|
||||
try {
|
||||
verifyStarted();
|
||||
} catch (Exception e) {
|
||||
getExecutor().shutdownNow();
|
||||
throw new CollectorServiceRuntimeException(e);
|
||||
}
|
||||
switch(currentElement){
|
||||
case TERMINATOR:
|
||||
if(!executor.isTerminated()) executor.shutdownNow();
|
||||
return false;
|
||||
case ERROR_TERMINATOR:
|
||||
executor.shutdownNow();
|
||||
throw new CollectorServiceRuntimeException("Error getting elements from virtuoso");
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String doNext() {
|
||||
if(!hasNext()) {
|
||||
log.error("Next called but hasNext is false", new NoSuchElementException());
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
switch(currentElement){
|
||||
case TERMINATOR:
|
||||
case ERROR_TERMINATOR:
|
||||
executor.shutdownNow();
|
||||
throw new NoSuchElementException();
|
||||
default:
|
||||
String res = currentElement;
|
||||
getNextElement(MAX_RETRIES);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
private void getNextElement(int attempt){
|
||||
log.debug("polling from queue, remaining attempts: "+attempt);
|
||||
if(attempt <= 0) currentElement = ERROR_TERMINATOR;
|
||||
else{
|
||||
try {
|
||||
currentElement = elements.take();
|
||||
} catch (InterruptedException e) {
|
||||
//current thread interrupted. Let's end.
|
||||
currentElement = ERROR_TERMINATOR;
|
||||
executor.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusIterator datasourceInterface(final String datasourceInterface) {
|
||||
this.datasourceInterface = datasourceInterface;
|
||||
return this;
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusIterator virtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
|
||||
this.virtuosoReaderAPIUrl = virtuosoReaderAPIUrl;
|
||||
return this;
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusIterator datasourceName(final String datasourceName) {
|
||||
this.datasourceName = datasourceName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusIterator errors(final Map<String, Integer> errors) {
|
||||
this.errors = errors;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getDatasourceInterface() {
|
||||
return datasourceInterface;
|
||||
}
|
||||
|
||||
public String getVirtuosoReaderAPIUrl() {
|
||||
return virtuosoReaderAPIUrl;
|
||||
}
|
||||
|
||||
public Map<String, Integer> getErrors() {
|
||||
return errors;
|
||||
}
|
||||
|
||||
public BlockingQueue<String> getElements() {
|
||||
return elements;
|
||||
}
|
||||
|
||||
public RestTemplate getRestTemplate() {
|
||||
return restTemplate;
|
||||
}
|
||||
|
||||
public VirtuosoAriadnePlusIterator restTemplate(final RestTemplate restTemplate) {
|
||||
this.restTemplate = restTemplate;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getDatasourceName() {
|
||||
return datasourceName;
|
||||
}
|
||||
|
||||
public boolean isStarted() {
|
||||
return started;
|
||||
}
|
||||
|
||||
public List<String> getListForClass() {
|
||||
return listForClass;
|
||||
}
|
||||
|
||||
public String getCurrentElement() {
|
||||
return currentElement;
|
||||
}
|
||||
|
||||
public ExecutorService getExecutor() {
|
||||
return executor;
|
||||
}
|
||||
|
||||
public void setDatasourceName(final String datasourceName) {
|
||||
this.datasourceName = datasourceName;
|
||||
}
|
||||
|
||||
public void setDatasourceInterface(final String datasourceInterface) {
|
||||
this.datasourceInterface = datasourceInterface;
|
||||
}
|
||||
|
||||
public void setVirtuosoReaderAPIUrl(final String virtuosoReaderAPIUrl) {
|
||||
this.virtuosoReaderAPIUrl = virtuosoReaderAPIUrl;
|
||||
}
|
||||
|
||||
public void setStarted(final boolean started) {
|
||||
this.started = started;
|
||||
}
|
||||
|
||||
public void setErrors(final Map<String, Integer> errors) {
|
||||
this.errors = errors;
|
||||
}
|
||||
|
||||
public void setListForClass(final List<String> listForClass) {
|
||||
this.listForClass = listForClass;
|
||||
}
|
||||
|
||||
public void setElements(final BlockingQueue<String> elements) {
|
||||
this.elements = elements;
|
||||
}
|
||||
|
||||
public void setCurrentElement(final String currentElement) {
|
||||
this.currentElement = currentElement;
|
||||
}
|
||||
|
||||
public void setExecutor(final ExecutorService executor) {
|
||||
this.executor = executor;
|
||||
}
|
||||
|
||||
public void setRestTemplate(final RestTemplate restTemplate) {
|
||||
this.restTemplate = restTemplate;
|
||||
}
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
|
||||
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
||||
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
|
||||
|
||||
/**
|
||||
* This collector plugins collects metadata records from the EHRI portal export URL in the form https://portal.ehri-project.eu/units/<ID>/export.
|
||||
* IDs of units are retrieved from the EHRI GraphQL API, see docs at https://portal.ehri-project.eu/api/graphql
|
||||
* Created by Alessia Bardi on 19/04/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class EHRICollectorPlugin extends AbstractCollectorPlugin {
|
||||
|
||||
private EHRIIteratorFactory ehriIteratorFactory;
|
||||
|
||||
@Override
|
||||
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
|
||||
throws CollectorServiceException {
|
||||
|
||||
// https://portal.ehri-project.eu/units
|
||||
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
||||
// export
|
||||
final String suffixToUrl = interfaceDescriptor.getParams().get("suffixToBaseUrl");
|
||||
|
||||
//info to get the identifiers from the graphQL API:
|
||||
final String graphQLURL = interfaceDescriptor.getParams().get("graphQLURL");
|
||||
final String graphQLQuery = interfaceDescriptor.getParams().get("graphQLQuery");
|
||||
|
||||
return () -> ehriIteratorFactory.newIterator(graphQLURL, graphQLQuery, baseUrl, suffixToUrl);
|
||||
}
|
||||
|
||||
public EHRIIteratorFactory getEhriIteratorFactory() {
|
||||
return ehriIteratorFactory;
|
||||
}
|
||||
|
||||
public void setEhriIteratorFactory(final EHRIIteratorFactory ehriIteratorFactory) {
|
||||
this.ehriIteratorFactory = ehriIteratorFactory;
|
||||
}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpPost;
|
||||
import org.apache.http.entity.StringEntity;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
|
||||
/**
|
||||
* IDs of units are retrieved from the EHRI GraphQL API, see docs at https://portal.ehri-project.eu/api/graphql
|
||||
*
|
||||
* Created by Alessia Bardi on 19/04/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class EHRIGraphQLClient {
|
||||
|
||||
private static final Log log = LogFactory.getLog(EHRIGraphQLClient.class);
|
||||
|
||||
public Iterator<String> collect(final String baseURL, final String graphQLQuery) throws URISyntaxException, IOException, CollectorServiceException {
|
||||
/*
|
||||
The curl equivalent:
|
||||
curl --header X-Stream:true \
|
||||
--header Content-type:application/json \
|
||||
https://portal.ehri-project.eu/api/graphql \
|
||||
--data-binary '{"query":"{topLevelDocumentaryUnits{items{id}}}"}'
|
||||
*/
|
||||
CloseableHttpClient httpClient = HttpClients.createDefault();
|
||||
URI baseURI = new URI(baseURL);
|
||||
HttpPost httpPost = new HttpPost();
|
||||
httpPost.setURI(baseURI);
|
||||
httpPost.setHeader("X-Stream", "true");
|
||||
httpPost.setHeader("Content-type", "application/json" );
|
||||
log.info(graphQLQuery);
|
||||
StringEntity postQuery = new StringEntity(graphQLQuery);
|
||||
httpPost.setEntity(postQuery);
|
||||
HttpEntity entity = null;
|
||||
try(CloseableHttpResponse response = httpClient.execute(httpPost)) {
|
||||
switch(response.getStatusLine().getStatusCode()){
|
||||
case 200:
|
||||
entity = response.getEntity();
|
||||
InputStreamReader reader = new InputStreamReader(entity.getContent());
|
||||
return getIdentifiers(reader);
|
||||
default:
|
||||
log.error(httpPost);
|
||||
log.error(response.getStatusLine());
|
||||
throw new CollectorServiceException(response.getStatusLine().toString());
|
||||
}
|
||||
} finally {
|
||||
if(entity != null) EntityUtils.consume(entity);
|
||||
}
|
||||
}
|
||||
|
||||
protected Iterator<String> getIdentifiers(final InputStreamReader input){
|
||||
JsonObject jsonObject = new JsonParser().parse(input).getAsJsonObject();
|
||||
JsonArray items = jsonObject.getAsJsonObject("data").getAsJsonObject("topLevelDocumentaryUnits").getAsJsonArray("items");
|
||||
log.debug(items);
|
||||
return Lists.newArrayList(Iterables.transform(items, jelem -> jelem.getAsJsonObject().get("id").getAsString())).iterator();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,107 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Iterator;
|
||||
|
||||
import com.ximpleware.*;
|
||||
import eu.dnetlib.data.collector.ThreadSafeIterator;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 03/05/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class EHRIIterator extends ThreadSafeIterator {
|
||||
|
||||
private static final Log log = LogFactory.getLog(EHRIIterator.class);
|
||||
private static int MAX_FAILED = 100;
|
||||
private Iterator<String> identifiers;
|
||||
private String baseUrl;
|
||||
private String suffix;
|
||||
private int failed = 0;
|
||||
|
||||
|
||||
public EHRIIterator(final Iterator<String> idIterator, final String baseUrl, final String suffix){
|
||||
this.identifiers = idIterator;
|
||||
this.baseUrl = baseUrl;
|
||||
this.suffix = suffix;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doHasNext() {
|
||||
return identifiers.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String doNext() {
|
||||
String target = baseUrl+"/"+identifiers.next()+"/"+suffix;
|
||||
log.debug("Getting "+target);
|
||||
try {
|
||||
URL url = new URL(target);
|
||||
String record = IOUtils.toString(url, "UTF-8");
|
||||
return removeDefaultEADNamespace(record);
|
||||
} catch (IOException e) {
|
||||
log.error("Unable to get "+target, e);
|
||||
failed++;
|
||||
if(failed > MAX_FAILED){
|
||||
throw new CollectorServiceRuntimeException("Could not download more than "+MAX_FAILED+"documents from EHRI. Stopping.");
|
||||
}
|
||||
if(this.hasNext()){
|
||||
return this.next();
|
||||
}
|
||||
else return "";
|
||||
}
|
||||
}
|
||||
|
||||
protected String removeDefaultEADNamespace(final String xml) {
|
||||
try {
|
||||
VTDGen vg = new VTDGen();
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
vg.setDoc(xml.getBytes());
|
||||
vg.parse(false); // turn off namespace awareness so that
|
||||
VTDNav vn = vg.getNav();
|
||||
AutoPilot ap = new AutoPilot(vn);
|
||||
XMLModifier xm = new XMLModifier(vn);
|
||||
ap.selectXPath("/ead/@xmlns");
|
||||
int i = 0;
|
||||
while ((i = ap.evalXPath()) != -1) {
|
||||
xm.remove();
|
||||
}
|
||||
xm.output(baos);
|
||||
return baos.toString();
|
||||
}catch(Exception e){
|
||||
log.error("Cannot remove default namespace from ead element: "+xml);
|
||||
throw new CollectorServiceRuntimeException("Cannot remove default namespace from ead element", e);
|
||||
}
|
||||
}
|
||||
|
||||
public Iterator<String> getIdentifiers() {
|
||||
return identifiers;
|
||||
}
|
||||
|
||||
public void setIdentifiers(final Iterator<String> identifiers) {
|
||||
this.identifiers = identifiers;
|
||||
}
|
||||
|
||||
public String getBaseUrl() {
|
||||
return baseUrl;
|
||||
}
|
||||
|
||||
public void setBaseUrl(final String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public String getSuffix() {
|
||||
return suffix;
|
||||
}
|
||||
|
||||
public void setSuffix(final String suffix) {
|
||||
this.suffix = suffix;
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 03/05/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class EHRIIteratorFactory {
|
||||
|
||||
@Autowired
|
||||
private EHRIGraphQLClient ehriGraphQLClient;
|
||||
|
||||
public Iterator<String> newIterator(final String baseURLIdentifiers, final String queryIdentifiers,
|
||||
final String baseUrl,
|
||||
final String suffix) {
|
||||
try {
|
||||
Iterator<String> ids = ehriGraphQLClient.collect(baseURLIdentifiers, queryIdentifiers);
|
||||
return new EHRIIterator(ids, baseUrl, suffix);
|
||||
} catch (CollectorServiceException | IOException | URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public EHRIGraphQLClient getEhriGraphQLClient() {
|
||||
return ehriGraphQLClient;
|
||||
}
|
||||
|
||||
public void setEhriGraphQLClient(final EHRIGraphQLClient ehriGraphQLClient) {
|
||||
this.ehriGraphQLClient = ehriGraphQLClient;
|
||||
}
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.isidore;
|
||||
|
||||
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
||||
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
/**
|
||||
* @author alessia bardi
|
||||
*
|
||||
*/
|
||||
public class IsidoreCollectorPlugin extends AbstractCollectorPlugin {
|
||||
|
||||
@Autowired
|
||||
private SaxonHelper saxonHelper;
|
||||
|
||||
@Override
|
||||
public Iterable<String> collect(InterfaceDescriptor ifDescriptor, String arg1, String arg2)
|
||||
throws CollectorServiceException {
|
||||
final String baseUrl = ifDescriptor.getBaseUrl();
|
||||
final String queryParams = ifDescriptor.getParams().get("queryParams");
|
||||
final String pageParam = ifDescriptor.getParams().get("pageParam");
|
||||
final String startFromPage = ifDescriptor.getParams().get("startFromPage");
|
||||
final String nextPagePath = ifDescriptor.getParams().get("nextPagePath");
|
||||
final String pageSizeParam = ifDescriptor.getParams().get("pageSizeParam");
|
||||
final String pageSize = ifDescriptor.getParams().get("pageSize");
|
||||
final String resultTotalXpath = ifDescriptor.getParams().get("resultTotalXpath");
|
||||
final String entityXpath = ifDescriptor.getParams().get("entityXpath");
|
||||
|
||||
if (StringUtils.isBlank(baseUrl)) {throw new CollectorServiceException("Param 'baseUrl' is null or empty");}
|
||||
if (StringUtils.isBlank(pageParam)) {throw new CollectorServiceException("Param 'pageParam' is null or empty");}
|
||||
if (StringUtils.isBlank(startFromPage)) {throw new CollectorServiceException("Param 'startFromPage' is null or empty");}
|
||||
if (StringUtils.isBlank(nextPagePath)) {throw new CollectorServiceException("Param 'nextPagePath' is null or empty");}
|
||||
if (StringUtils.isBlank(pageSizeParam)) {throw new CollectorServiceException("Param 'pageSizeParam' is null or empty");}
|
||||
if (StringUtils.isBlank(pageSize)) {throw new CollectorServiceException("Param 'pageSize' is null or empty");}
|
||||
if (StringUtils.isBlank(resultTotalXpath)) {throw new CollectorServiceException("Param 'resultTotalXpath' is null or empty");}
|
||||
if (StringUtils.isBlank(entityXpath)) {throw new CollectorServiceException("Param 'entityXpath' is null or empty");}
|
||||
|
||||
return () -> new IsidoreIterator(
|
||||
saxonHelper,
|
||||
baseUrl,
|
||||
queryParams,
|
||||
pageParam,
|
||||
Integer.parseInt(startFromPage),
|
||||
nextPagePath,
|
||||
pageSizeParam,
|
||||
Integer.parseInt(pageSize),
|
||||
resultTotalXpath,
|
||||
entityXpath);
|
||||
}
|
||||
}
|
|
@ -1,169 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.isidore; /**
|
||||
* log.debug(...) equal to log.trace(...) in the application-logs
|
||||
* <p>
|
||||
* known bug: at resumptionType 'discover' if the (resultTotal % resultSizeValue) == 0 the collecting fails -> change the resultSizeValue
|
||||
*/
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.Iterator;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.PriorityBlockingQueue;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import net.sf.saxon.s9api.*;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* @author Jochen Schirrwagen, Aenne Loehden, Andreas Czerniak, Alessia Bardi
|
||||
* @date 2018-09-03
|
||||
*
|
||||
*/
|
||||
public class IsidoreIterator implements Iterator<String> {
|
||||
|
||||
private static final Log log = LogFactory.getLog(IsidoreIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
|
||||
|
||||
private SaxonHelper saxonHelper;
|
||||
private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
|
||||
private InputStream resultStream;
|
||||
private String query;
|
||||
private String querySize;
|
||||
private String queryPage;
|
||||
private int resultTotal = -1;
|
||||
|
||||
private String baseUrl;
|
||||
private String queryParams;
|
||||
private String pageParam;
|
||||
private int page;
|
||||
private String pageSizeParam;
|
||||
private int pageSize;
|
||||
|
||||
private XPathSelector xprEntity;
|
||||
private XPathSelector xprResultTotalPath;
|
||||
private XPathSelector xprNextPagePath;
|
||||
|
||||
public IsidoreIterator(
|
||||
final SaxonHelper saxonHelper,
|
||||
final String baseUrl,
|
||||
final String queryParams,
|
||||
final String pageParam,
|
||||
final int startFromPage,
|
||||
final String nextPagePath,
|
||||
final String pageSizeParam,
|
||||
final int pageSize,
|
||||
final String resultTotalXpath,
|
||||
final String entityXpath
|
||||
) {
|
||||
this.saxonHelper = saxonHelper;
|
||||
this.baseUrl = baseUrl;
|
||||
this.queryParams = queryParams;
|
||||
this.pageParam = pageParam;
|
||||
this.page = startFromPage;
|
||||
this.queryPage = this.pageParam+"="+this.page;
|
||||
this.pageSizeParam = pageSizeParam;
|
||||
this.pageSize = pageSize;
|
||||
this.querySize = this.pageSizeParam+"="+this.pageSize;
|
||||
try {
|
||||
initXpathSelector(resultTotalXpath, nextPagePath, entityXpath);
|
||||
} catch (Exception e) {
|
||||
throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
|
||||
}
|
||||
initQuery();
|
||||
}
|
||||
|
||||
private void initXpathSelector(final String resultTotalXpath,final String nextPagePath, final String entityXpath)
|
||||
throws SaxonApiException {
|
||||
xprResultTotalPath = this.saxonHelper.help().prepareXPathSelector(resultTotalXpath, Maps.newHashMap());
|
||||
xprNextPagePath = this.saxonHelper.help().prepareXPathSelector(nextPagePath, Maps.newHashMap());
|
||||
xprEntity = this.saxonHelper.help().prepareXPathSelector(entityXpath, Maps.newHashMap());
|
||||
}
|
||||
|
||||
private void initQuery() {
|
||||
query = baseUrl + "?" + queryParams +"&"+ querySize +"&"+ queryPage;
|
||||
}
|
||||
|
||||
private void disconnect() {
|
||||
// TODO close inputstream
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.util.Iterator#hasNext()
|
||||
*/
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (recordQueue.isEmpty() && query.isEmpty()) {
|
||||
disconnect();
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see java.util.Iterator#next()
|
||||
*/
|
||||
@Override
|
||||
public String next() {
|
||||
synchronized (recordQueue) {
|
||||
while (recordQueue.isEmpty() && !query.isEmpty()) {
|
||||
try {
|
||||
log.info("get Query: " + query);
|
||||
query = downloadPage(query);
|
||||
log.debug("next queryURL from downloadPage(): " + query);
|
||||
} catch (CollectorServiceException e) {
|
||||
log.debug("CollectorPlugin.next()-Exception: " + e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return recordQueue.poll();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* download page and return nextQuery
|
||||
*/
|
||||
private String downloadPage(String query) throws CollectorServiceException {
|
||||
String nextQuery = "";
|
||||
XdmValue nodeList;
|
||||
|
||||
try {
|
||||
URL qUrl = new URL(query);
|
||||
resultStream = qUrl.openStream();
|
||||
String resultPage = IOUtils.toString(resultStream);
|
||||
nodeList = this.saxonHelper.help().evaluate(resultPage, xprEntity);
|
||||
log.debug("nodeList.size: " + nodeList.size());
|
||||
for(XdmItem entity : nodeList){
|
||||
recordQueue.add(this.saxonHelper.help().serialize((XdmNode) entity));
|
||||
}
|
||||
|
||||
String nextPage = this.saxonHelper.help().evaluateSingleAsString(resultPage, xprNextPagePath);
|
||||
if(StringUtils.isBlank(nextPage)){
|
||||
log.info("No next page available, we reached the end");
|
||||
}
|
||||
else{
|
||||
this.queryPage = pageParam+"="+nextPage;
|
||||
return baseUrl + "?" + queryParams +"&"+ querySize +"&"+ queryPage;
|
||||
}
|
||||
|
||||
if (resultTotal == -1) {
|
||||
String tot = this.saxonHelper.help().evaluateSingleAsString(resultPage, xprResultTotalPath);
|
||||
resultTotal = Integer.parseInt(tot);
|
||||
log.info("resultTotal was -1 is now: " + resultTotal);
|
||||
}
|
||||
log.info("resultTotal: " + resultTotal);
|
||||
log.debug("nextQueryUrl: " + nextQuery);
|
||||
return nextQuery;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error(e);
|
||||
throw new IllegalStateException("collection failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -5,15 +5,11 @@
|
|||
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
|
||||
http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd">
|
||||
|
||||
<bean id="wfNodeReportReader" class="eu.dnetlib.ariadneplus.workflows.nodes.ReportReaderJobNode" scope="prototype"/>
|
||||
|
||||
<bean id="wfNodePublishGraphDB" class="eu.dnetlib.ariadneplus.workflows.nodes.PublishGraphDBJobNode" scope="prototype"/>
|
||||
|
||||
<bean id="wfNodeUnpublishGraphDB" class="eu.dnetlib.ariadneplus.workflows.nodes.UnpublishGraphDBJobNode" scope="prototype"/>
|
||||
|
||||
<bean id="wfNodeReadVirtuoso" class="eu.dnetlib.ariadneplus.workflows.nodes.ReadVirtuosoJobNode" scope="prototype"/>
|
||||
<bean id="wfNodeReadVirtuosoByType" class="eu.dnetlib.ariadneplus.workflows.nodes.ReadVirtuosoByTypeJobNode" scope="prototype"/>
|
||||
|
||||
<bean id="wfNodeX3MTransformAriadnePlus" class="eu.dnetlib.ariadneplus.workflows.nodes.X3MTransformAriadnePlusJobNode" scope="prototype"/>
|
||||
|
||||
<bean id="wfNodeElasticSearchIndex" class="eu.dnetlib.ariadneplus.workflows.nodes.ElasticSearchIndexJobNode" scope="prototype"/>
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 09/02/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class VirtuosoAriadnePlusIteratorTest {
|
||||
|
||||
private VirtuosoAriadnePlusIterator it;
|
||||
|
||||
@Before
|
||||
public void setup(){
|
||||
it = new VirtuosoAriadnePlusMockIterator();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void fillQueueTest() throws Exception {
|
||||
it.fillQueue();
|
||||
String s = it.getElements().take();
|
||||
while(!s.equals(VirtuosoAriadnePlusIterator.TERMINATOR)){
|
||||
System.out.println(s);
|
||||
s = it.getElements().take();
|
||||
}
|
||||
Map<String, Integer> errors = it.getErrors();
|
||||
for (Iterator<Entry<String, Integer>> it1 = errors.entrySet().iterator(); it1.hasNext(); ) {
|
||||
final Entry e = it1.next();
|
||||
System.out.println(e.getKey() +" - "+e.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasNext(){
|
||||
assertTrue(it.hasNext());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIterate(){
|
||||
while(it.hasNext())
|
||||
System.out.println(it.next());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompleteXml(){
|
||||
String s = "<?xml version=\"1.0\" encoding=\"utf-8\" ?><rdf:RDF/>";
|
||||
String completed = it.completeXML(s, "http://ariadneplus.d4science.org/handle/AriadnePlus/REG/Thing/Appellation/urp%40regione.umbria.it");
|
||||
System.out.println(completed);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemoveXmlDeclaration(){
|
||||
String s = "<?xml version=\"1.0\" encoding=\"utf-8\" ?><rdf:RDF";
|
||||
String res = s.replaceAll("\\<\\?xml(.+?)\\?\\>", "").trim();
|
||||
assertEquals("<rdf:RDF", res);
|
||||
s = "<rdf:RDF ";
|
||||
res = s.replaceAll("\\<\\?xml(.+?)\\?\\>", "").trim();
|
||||
assertEquals("<rdf:RDF", res);
|
||||
}
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 12/02/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class VirtuosoAriadnePlusMockIterator extends VirtuosoAriadnePlusIterator {
|
||||
|
||||
List<String> elements = Lists.newArrayList("rdf1", "rdf2", "rdf3", "rdf4", "rdf5", "rdf6");
|
||||
|
||||
@Override
|
||||
protected void fillQueue() {
|
||||
new Thread(() -> {
|
||||
try {
|
||||
for (String e : elements) {
|
||||
|
||||
getElements().offer(e, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
}
|
||||
} catch (InterruptedException ee) {
|
||||
ee.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
getElements().offer(TERMINATOR, QUEUE_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
} catch (InterruptedException ee) {
|
||||
System.out.println(ee);
|
||||
}
|
||||
}
|
||||
}).start();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 13/04/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class X3MTransformAriadnePlusJobNodeTest {
|
||||
|
||||
private X3MTransformAriadnePlusJobNode transformJob = new X3MTransformAriadnePlusJobNode();
|
||||
private String header = "<oai:header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n"
|
||||
+ " <dri:objIdentifier>ariadne_mock::0000023f507999464aa2b78875b7e5d6</dri:objIdentifier>\n"
|
||||
+ " <dri:recordIdentifier>2420500</dri:recordIdentifier>\n"
|
||||
+ " <dri:dateOfCollection>2017-04-10T18:44:46.85+02:00</dri:dateOfCollection>\n"
|
||||
+ " <dri:datasourceprefix>ariadne_mock</dri:datasourceprefix>\n"
|
||||
+ " <dri:datasourcename>Ariadne Mock</dri:datasourcename>\n"
|
||||
+ " <dri:dateOfTransformation>2017-04-12T16:31:45.766</dri:dateOfTransformation>\n"
|
||||
+ " <dri:invalid value=\"true\">\n"
|
||||
+ " <dri:error vocabularies=\"dnet:languages\" xpath=\"//*[local-name()='P72_has_language']\"\n"
|
||||
+ " term=\"en\"/>\n"
|
||||
+ " </dri:invalid>\n"
|
||||
+ " </oai:header>";
|
||||
private String footer = "<oai:about xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n"
|
||||
+ " <provenance xmlns=\"http://www.openarchives.org/OAI/2.0/provenance\"\n"
|
||||
+ " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
|
||||
+ " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd\">\n"
|
||||
+ " <originDescription xmlns=\"\" altered=\"true\" harvestDate=\"2017-04-10T18:44:46.85+02:00\">\n"
|
||||
+ " <baseURL>sftp%3A%2F%2Fariadne2.isti.cnr.it%2F..%2F..%2Fdata%2Ftransform%2Facdm_correct</baseURL>\n"
|
||||
+ " <identifier/>\n"
|
||||
+ " <datestamp/>\n"
|
||||
+ " <metadataNamespace/>\n"
|
||||
+ " </originDescription>\n"
|
||||
+ " </provenance>\n"
|
||||
+ " </oai:about>";
|
||||
private String metadata=" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"
|
||||
+ " xmlns:dbpedia-owl=\"http://dbpedia.org/ontology/\"\n"
|
||||
+ " xmlns:acdm=\"http://registry.ariadne-infrastructure.eu/\"\n"
|
||||
+ " xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\"\n"
|
||||
+ " xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"\n"
|
||||
+ " xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n"
|
||||
+ " xmlns:frbr=\"http://www.cidoc-crm.org/frbroo/\" xmlns:dcterms=\"http://purl.org/dc/terms/\"\n"
|
||||
+ " xmlns:dcat=\"http://www.w3.org/ns/dcat#\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n"
|
||||
+ " xmlns:crm=\"http://www.cidoc-crm.org/cidoc-crm/\"\n"
|
||||
+ " xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"
|
||||
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAA\"/>\n"
|
||||
+ " <crm:E65_Creation rdf:about=\"uuid:AAAB\"/>\n"
|
||||
+ " <crm:E73_Information_Object\n"
|
||||
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/dataset/2420500\">\n"
|
||||
+ " <crm:P129_is_about>\n"
|
||||
+ " <crm:E73_Information_Object\n"
|
||||
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/CAIRN\">\n"
|
||||
+ " <crm:P129_is_about>CAIRN</crm:P129_is_about>\n"
|
||||
+ " </crm:E73_Information_Object>\n"
|
||||
+ " </crm:P129_is_about>\n"
|
||||
+ " <crm:P129_is_about>\n"
|
||||
+ " <crm:E73_Information_Object\n"
|
||||
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/HUT%20CIRCLE\">\n"
|
||||
+ " <crm:P129_is_about>HUT CIRCLE</crm:P129_is_about>\n"
|
||||
+ " </crm:E73_Information_Object>\n"
|
||||
+ " </crm:P129_is_about>\n"
|
||||
+ " <crm:P104_is_subject_to>ADS Terms and Conditions</crm:P104_is_subject_to>\n"
|
||||
+ " <crm:P129_is_about>\n"
|
||||
+ " <crm:E73_Information_Object\n"
|
||||
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/SHEEPFOLD\">\n"
|
||||
+ " <crm:P129_is_about>SHEEPFOLD</crm:P129_is_about>\n"
|
||||
+ " </crm:E73_Information_Object>\n"
|
||||
+ " </crm:P129_is_about>\n"
|
||||
+ " <crm:P129_is_about>\n"
|
||||
+ " <crm:E73_Information_Object\n"
|
||||
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/FIELD%20SYSTEM\">\n"
|
||||
+ " <crm:P129_is_about>FIELD SYSTEM</crm:P129_is_about>\n"
|
||||
+ " </crm:E73_Information_Object>\n"
|
||||
+ " </crm:P129_is_about>\n"
|
||||
+ " <crm:P102_has_title>MID GLEN CROE</crm:P102_has_title>\n"
|
||||
+ " <crm:P165_incorporates>\n"
|
||||
+ " <crm:E33_Linguistic_Object rdf:about=\"uuid:AAAG\">\n"
|
||||
+ " <crm:P72_has_language>en</crm:P72_has_language>\n"
|
||||
+ " </crm:E33_Linguistic_Object>\n"
|
||||
+ " </crm:P165_incorporates>\n"
|
||||
+ " <crm:P67_refers_to>\n"
|
||||
+ " <crm:E1_CRM_Entity rdf:about=\"uuid:AAAH\">\n"
|
||||
+ " <crm:P2_has_type>Sites and monuments databases or\n"
|
||||
+ " inventories</crm:P2_has_type>\n"
|
||||
+ " </crm:E1_CRM_Entity>\n"
|
||||
+ " </crm:P67_refers_to>\n"
|
||||
+ " <crm:P93i_was_taken_out_of_existence_by>\n"
|
||||
+ " <crm:E6_Destruction rdf:about=\"uuid:AAAE\">\n"
|
||||
+ " <crm:P4_has_time-span>\n"
|
||||
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAF\">\n"
|
||||
+ " <crm:P81_ongoing_throughout>2013-12-09\n"
|
||||
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n"
|
||||
+ " </crm:E52_Time-Span>\n"
|
||||
+ " </crm:P4_has_time-span>\n"
|
||||
+ " </crm:E6_Destruction>\n"
|
||||
+ " </crm:P93i_was_taken_out_of_existence_by>\n"
|
||||
+ " <crm:P94i_was_created_by>\n"
|
||||
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAC\">\n"
|
||||
+ " <crm:P4_has_time-span>\n"
|
||||
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAD\">\n"
|
||||
+ " <crm:P81_ongoing_throughout>2013-12-09\n"
|
||||
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n"
|
||||
+ " </crm:E52_Time-Span>\n"
|
||||
+ " </crm:P4_has_time-span>\n"
|
||||
+ " </frbr:F30_Publication_Event>\n"
|
||||
+ " </crm:P94i_was_created_by>\n"
|
||||
+ " <crm:P129_is_about>\n"
|
||||
+ " <crm:E73_Information_Object\n"
|
||||
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/BUILDING\">\n"
|
||||
+ " <crm:P129_is_about>BUILDING</crm:P129_is_about>\n"
|
||||
+ " </crm:E73_Information_Object>\n"
|
||||
+ " </crm:P129_is_about>\n"
|
||||
+ " <crm:P1_is_identified_by>2420500</crm:P1_is_identified_by>\n"
|
||||
+ " <crm:P106i_forms_part_of>http://registry.ariadne-infrastructure.eu/collection/22721290</crm:P106i_forms_part_of>\n"
|
||||
+ " <crm:P3_has_note>Multiple instances of: SHEEPFOLD<br /><br />Multiple\n"
|
||||
+ " instances of: BUILDING<br />Multiple instances of: FIELD SYSTEM<br\n"
|
||||
+ " />Possible instance of: CAIRN<br />Multiple instances of: HUT\n"
|
||||
+ " CIRCLE<br />Possible instance of: HUT CIRCLE</crm:P3_has_note>\n"
|
||||
+ " </crm:E73_Information_Object>\n"
|
||||
+ " </rdf:RDF>";
|
||||
|
||||
@Test
|
||||
public void testBuildXML(){
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
String res = transformJob.buildXML(header, now.toString(), metadata, footer);
|
||||
LocalDateTime end = LocalDateTime.now();
|
||||
System.out.println("Building XML took:"+ Duration.between(now, end).toMillis());
|
||||
System.out.println(res);
|
||||
}
|
||||
}
|
|
@ -1,72 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.Iterator;
|
||||
|
||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 19/04/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class EHRIGraphQLClientTest {
|
||||
|
||||
private EHRIGraphQLClient client = new EHRIGraphQLClient();
|
||||
private String baseUrl = "https://portal.ehri-project.eu/api/graphql";
|
||||
private String query = "{\"query\":\"{topLevelDocumentaryUnits{items{id}}}\"}";
|
||||
|
||||
private String simpleOutput = "{\n"
|
||||
+ " \"data\" : {\n"
|
||||
+ " \"topLevelDocumentaryUnits\" : {\n"
|
||||
+ " \"items\" : [ {\n"
|
||||
+ " \"id\" : \"at-001985-ikg_ar_1\"\n"
|
||||
+ " }, {\n"
|
||||
+ " \"id\" : \"at-001985-ikg_ar_11\"\n"
|
||||
+ " }]\n"
|
||||
+ " }\n"
|
||||
+ " }\n"
|
||||
+ "}";
|
||||
|
||||
@Test
|
||||
public void testGetIdentifiers() throws IOException {
|
||||
Iterator<String> it = client.getIdentifiers(new InputStreamReader(IOUtils.toInputStream(simpleOutput, "UTF-8")));
|
||||
while(it.hasNext())
|
||||
System.out.println(it.next());
|
||||
}
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void testRemoteCollect() throws CollectorServiceException, IOException, URISyntaxException {
|
||||
Iterator<String> it =client.collect(baseUrl, query);
|
||||
int stopAt = 5;
|
||||
for(int i = 0; i < stopAt && it.hasNext(); i++){
|
||||
System.out.println(it.next());
|
||||
}
|
||||
}
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void testGetResource() throws IOException {
|
||||
String id = "at-001985-ikg_ar_11";
|
||||
String target = "https://portal.ehri-project.eu/units/"+id+"/export";
|
||||
URL url = new URL(target);
|
||||
String res = IOUtils.toString(url, "UTF-8");
|
||||
System.out.println(res);
|
||||
}
|
||||
|
||||
// @Test
|
||||
// public void testDotNotation(){
|
||||
// JsonObject jsonObject = new JsonParser().parse(simpleOutput).getAsJsonObject();
|
||||
// JsonArray items = jsonObject.getAsJsonArray("data.topLevelDocumentaryUnits.items");
|
||||
// for(JsonElement id : items){
|
||||
// System.out.println(id.getAsJsonObject().get("id"));
|
||||
// }
|
||||
// }
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 13/06/2017.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
public class EHRIIteratorTest {
|
||||
|
||||
|
||||
private EHRIIterator ehriIterator = new EHRIIterator(null, null, null);
|
||||
final String test = "<ead xmlns=\"urn:isbn:1-931666-22-9\"\n"
|
||||
+ "\txmlns:xlink=\"http://www.w3.org/1999/xlink\"\n"
|
||||
+ "\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
|
||||
+ "\txsi:schemaLocation=\"urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd\">\n"
|
||||
+ " <eadheader xmlns=\"\"\n"
|
||||
+ "\tcountryencoding=\"iso3166-1\"\n"
|
||||
+ "\tdateencoding=\"iso8601\"\n"
|
||||
+ "\trelatedencoding=\"DC\"\n"
|
||||
+ "\trepositoryencoding=\"iso15511\"\n"
|
||||
+ "\tscriptencoding=\"iso15924\">\n"
|
||||
+ " <eadid>us-005578-irn516886</eadid>\n"
|
||||
+ "\t</eadheader>\n"
|
||||
+ "\t</ead>";
|
||||
|
||||
@Test
|
||||
public void testRemoveDefaultEADNamespace(){
|
||||
System.out.println(ehriIterator.removeDefaultEADNamespace(test));
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
package eu.dnetlib.data.collector.plugins.ariadneplus.isidore;
|
||||
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
|
||||
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Created by Alessia Bardi on 17/11/2018.
|
||||
*
|
||||
* @author Alessia Bardi
|
||||
*/
|
||||
|
||||
public class IsidoreIteratorTest {
|
||||
|
||||
TransformerFactory tf = TransformerFactory.newInstance();
|
||||
SaxonHelper saxonHelper = new SaxonHelper();
|
||||
|
||||
private String baseUrl = "https://api.rechercheisidore.fr/resource/search";
|
||||
private String queryParams = "source_tree=10670/3.ji0kr6|10670/2.rao8tk";
|
||||
private String pageParam = "page";
|
||||
private int startFromPage = 1;
|
||||
private String nextPagePath = "//page/@next";
|
||||
private String pageSizeParam = "size";
|
||||
private int pageSize = 10;
|
||||
private String resultTotalXpath = "//replies/meta/@items";
|
||||
private String entityXpath = "//reply";
|
||||
|
||||
@Before
|
||||
public void setup(){
|
||||
saxonHelper.setSaxonTransformerFactory(tf);
|
||||
}
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void test(){
|
||||
IsidoreIterator rip = new IsidoreIterator(saxonHelper, baseUrl, queryParams,pageParam, startFromPage,
|
||||
nextPagePath, pageSizeParam, pageSize, resultTotalXpath, entityXpath);
|
||||
while(rip.hasNext()){
|
||||
String record = rip.next();
|
||||
System.out.println(record);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue