This commit is contained in:
Miriam Baglioni 2019-10-23 16:15:13 +03:00
commit 1dab575b58
5 changed files with 93 additions and 73 deletions

View File

@ -1,5 +1,6 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import java.net.URL;
import java.time.Duration;
import java.time.Instant;
import java.time.LocalDateTime;
@ -73,7 +74,7 @@ public class X3MTransformAriadnePlusJobNode extends X3MTransformJobNode {
log.info("Mapping profile ids read from node configuration: " + mappingProfileIds);
log.info("Mapping Policy profile id read from node configuration: " + mappingPolicyProfileId);
log.info("Mapping url read from node configuration: " + mappingUrl);
final String[] mappings = getMappingsCode(mappingProfileIds.split(","));
final URL mappingURL = new URL(mappingUrl);
final String policy = getProfileCode(mappingPolicyProfileId);
LocalDateTime now = LocalDateTime.now();
@ -88,7 +89,7 @@ public class X3MTransformAriadnePlusJobNode extends X3MTransformJobNode {
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
log.debug("Transforming record objIdentifier: " + objIdentifier);
}
ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappings, policy, verboseLogging);
ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
String toTransform = record;
Instant startExtraction = Instant.now();

View File

@ -1,27 +1,67 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.common.collect.Iterators;
import eu.dnetlib.data.collector.plugins.FileCollectorPlugin;
import eu.dnetlib.data.collector.plugins.filesystem.FileSystemIterator;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
public class ADSCollectorPlugin extends FileCollectorPlugin {
private Iterator<String> recordIterator;
private URL basePath;
/** The Constant log. */
private static final Log log = LogFactory.getLog(ADSCollectorPlugin.class);
@Override
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
throws CollectorServiceException {
final String baseUrl = interfaceDescriptor.getBaseUrl();
if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
String url = "file://".concat(baseUrl);
try {
basePath = new URL(url);
} catch (MalformedURLException mue) {
log.error("Failed collecting from base url " + url, mue);
throw new CollectorServiceException(mue);
}
File baseDir = new File(basePath.getPath());
if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
final String suffixToUrl = interfaceDescriptor.getParams().get("suffixToBaseUrl");
log.debug("Start collecting from folder " + baseDir + " ...");
final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), "xml");
return () -> {
boolean emptyIterator = true;
while (fsi.hasNext()) {
String nextFilePath = fsi.next();
interfaceDescriptor.setBaseUrl("file://".concat(nextFilePath));
try {
return new ADSIterator(super.collect(interfaceDescriptor, fromDate, untilDate).iterator(), baseUrl, suffixToUrl);
log.debug("Add iterator from " + nextFilePath);
if (emptyIterator) {
recordIterator = new ADSIterator(super.collect(interfaceDescriptor, fromDate, untilDate).iterator(), null, null);
emptyIterator = false;
}
else {
recordIterator = Iterators.concat(recordIterator, new ADSIterator(super.collect(interfaceDescriptor, fromDate, untilDate).iterator(), null, null));
}
} catch (CollectorServiceException e) {
throw new RuntimeException(e);
log.error("Failed collecting from path: " + nextFilePath, e);
}
} ;
}
return new ADSIterable(recordIterator);
}
}

View File

@ -0,0 +1,41 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* The Class FilesystemIterable.
*
*
*/
public class ADSIterable implements Iterable<String> {
/** The Constant log. */
private static final Log log = LogFactory.getLog(ADSIterable.class);
private Iterator<String> recordIterator;
public ADSIterable(Iterator<String> recordIterator ) {
this.recordIterator = recordIterator;
}
/**
* {@inheritDoc}
*
* @see java.lang.Iterable#iterator()
*/
@Override
public Iterator<String> iterator() {
return recordIterator;
}
public Iterator<String> getRecordIterator() {
return recordIterator;
}
public void setRecordIterator(Iterator<String> recordIterator) {
this.recordIterator = recordIterator;
}
}

View File

@ -4,7 +4,7 @@
<RESOURCE_TYPE value="RepositoryServiceResourceType"/>
<RESOURCE_KIND value="RepositoryServiceResources"/>
<RESOURCE_URI value=""/>
<DATE_OF_CREATION value="2019-09-22T19:16:54+02:00"/>
<DATE_OF_CREATION value="2019-10-22T15:27:44+02:00"/>
<PROTOCOL/>
</HEADER>
<BODY>
@ -33,9 +33,9 @@
<ADMIN_INFO>julian.richards@york.ac.uk</ADMIN_INFO>
<INTERFACES>
<INTERFACE active="true" compliance="metadata" contentDescription="metadata" id="api_________::ariadne_plus::ads::1" label="dnet:repository (metadata)" removable="false" typology="dnet:repository">
<ACCESS_PROTOCOL splitOnElement="record" suffixToBaseUrl="aaa">ads</ACCESS_PROTOCOL>
<BASE_URL>file:///var/lib/dnet/ariadne/ariadne_398_part1.xml</BASE_URL>
<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='record']/*[namespace-uri()='http://purl.org/dc/elements/1.1/' and local-name()='identifier'][2]</INTERFACE_EXTRA_FIELD>
<ACCESS_PROTOCOL splitOnElement="record">ads</ACCESS_PROTOCOL>
<BASE_URL>file:///var/lib/dnet/ariadne/398</BASE_URL>
<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='record']/*[namespace-uri()='http://purl.org/dc/elements/1.1/' and local-name()='identifier'][not(contains(text(), ' '))]</INTERFACE_EXTRA_FIELD>
</INTERFACE>
</INTERFACES>
<EXTRA_FIELDS>
@ -43,14 +43,6 @@
<key>NamespacePrefix</key>
<value>ads_________</value>
</FIELD>
<FIELD>
<key>dateOfCollection</key>
<value>Fri Sep 20 11:37:39 CEST 2019</value>
</FIELD>
<FIELD>
<key>dateOfValidation</key>
<value>null</value>
</FIELD>
</EXTRA_FIELDS>
<REGISTERED_BY/>
</CONFIGURATION>

View File

@ -1,54 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:p="http://www.springframework.org/schema/p"
xmlns:http="http://cxf.apache.org/transports/http/configuration"
xmlns="http://www.springframework.org/schema/beans"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
<bean id="ehriCollectorPlugin" class="eu.dnetlib.data.collector.plugins.ariadneplus.ehri.EHRICollectorPlugin" p:ehriIteratorFactory-ref="ehriIteratorFactory">
<property name="protocolDescriptor">
<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="ehri">
<property name="params">
<list>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="suffixToBaseUrl"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="graphQLURL"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="graphQLQuery"/>
</list>
</property>
</bean>
</property>
</bean>
<bean id="ehriIteratorFactory" class="eu.dnetlib.data.collector.plugins.ariadneplus.ehri.EHRIIteratorFactory"/>
<bean id="ehriGraphQLClient" class="eu.dnetlib.data.collector.plugins.ariadneplus.ehri.EHRIGraphQLClient"/>
<bean id="isidoreCollectorPlugin" class="eu.dnetlib.data.collector.plugins.ariadneplus.isidore.IsidoreCollectorPlugin">
<property name="protocolDescriptor">
<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="isidore">
<property name="params">
<list>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="queryParams"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="pageParam"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="startFromPage"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="nextPagePath"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="pageSizeParam"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="pageSize"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="resultTotalXpath"/>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="entityXpath"/>
</list>
</property>
</bean>
</property>
</bean>
</beans>