merge with upstream

This commit is contained in:
Miriam Baglioni 2020-04-30 11:15:54 +02:00
commit 3abb76ff7a
42 changed files with 783 additions and 819 deletions

View File

@ -12,6 +12,8 @@
<artifactId>dhp-build-assembly-resources</artifactId> <artifactId>dhp-build-assembly-resources</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<description>This module contains a set of scripts supporting the build lifecycle for the dnet-hadoop project</description>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>

View File

@ -12,22 +12,29 @@
<artifactId>dhp-build-properties-maven-plugin</artifactId> <artifactId>dhp-build-properties-maven-plugin</artifactId>
<packaging>maven-plugin</packaging> <packaging>maven-plugin</packaging>
<description>This module is a maven plugin implementing custom properties substitutions in the build lifecycle</description>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.maven</groupId> <groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId> <artifactId>maven-plugin-api</artifactId>
<version>2.0</version> <version>3.6.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.maven</groupId> <groupId>org.apache.maven</groupId>
<artifactId>maven-project</artifactId> <artifactId>maven-project</artifactId>
<version>2.0</version> <version>2.2.1</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-artifact</artifactId>
<version>2.2.1</version>
</dependency>
<dependency> <dependency>
<groupId>org.kuali.maven.plugins</groupId> <groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId> <artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version> <version>${properties.maven.plugin.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.google.code.findbugs</groupId> <groupId>com.google.code.findbugs</groupId>
@ -73,44 +80,10 @@
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<configuration> <configuration>
<detectLinks>true</detectLinks> <detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-plugin-plugin
</artifactId>
<versionRange>
[3.2,)
</versionRange>
<goals>
<goal>descriptor</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build> </build>
</project> </project>

View File

@ -40,7 +40,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/** /**
* Writes project properties for the keys listed in specified properties files. Based on: * Writes project properties for the keys listed in specified properties files. Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html * http://site.kuali.org/maven/plugins/properties-maven-plugin/2.0.1/write-project-properties-mojo.html
* *
* @author mhorst * @author mhorst
* @goal write-project-properties * @goal write-project-properties

View File

@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest { public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach @BeforeEach
public void clearSystemProperties() { public void clearSystemProperties() {

View File

@ -366,7 +366,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
private Properties getStoredProperties(File testFolder) private Properties getStoredProperties(File testFolder)
throws FileNotFoundException, IOException { throws IOException {
Properties properties = new Properties(); Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder))); properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties; return properties;

View File

@ -11,6 +11,38 @@
<packaging>jar</packaging> <packaging>jar</packaging>
<description>This module contains resources supporting common code style conventions</description>
<distributionManagement>
<snapshotRepository>
<id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout>
</snapshotRepository>
<repository>
<id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
</repository>
</distributionManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties> </properties>

View File

@ -8,6 +8,9 @@
</parent> </parent>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<packaging>pom</packaging> <packaging>pom</packaging>
<description>This module is a container for the build tools used in dnet-hadoop</description>
<modules> <modules>
<module>dhp-code-style</module> <module>dhp-code-style</module>
<module>dhp-build-assembly-resources</module> <module>dhp-build-assembly-resources</module>

View File

@ -12,6 +12,8 @@
<artifactId>dhp-common</artifactId> <artifactId>dhp-common</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -21,7 +21,7 @@ public class DHPUtils {
public static String md5(final String s) { public static String md5(final String s) {
try { try {
final MessageDigest md = MessageDigest.getInstance("MD5"); final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8")); md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest())); return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) { } catch (final Exception e) {
System.err.println("Error creating id"); System.err.println("Error creating id");

View File

@ -17,7 +17,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
}; };
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
@Override @Override
public String getName() { public String getName() {

View File

@ -21,7 +21,7 @@ public class MessageManager {
private Connection connection; private Connection connection;
private Map<String, Channel> channels = new HashMap<>(); private final Map<String, Channel> channels = new HashMap<>();
private boolean durable; private boolean durable;

View File

@ -12,7 +12,7 @@
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<description>This module contains common schema classes meant to be used across the dnet-hadoop submodules</description>
<dependencies> <dependencies>

View File

@ -16,7 +16,7 @@ public class AtomicActionDeserializer extends JsonDeserializer {
@Override @Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt) public Object deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException { throws IOException {
JsonNode node = jp.getCodec().readTree(jp); JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText(); String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload"); JsonNode payload = node.get("payload");

View File

@ -9,190 +9,195 @@ import java.util.function.Function;
/** Oaf model utility methods. */ /** Oaf model utility methods. */
public class ModelSupport { public class ModelSupport {
/** Defines the mapping between the actual entity type and the main entity type */ /** Defines the mapping between the actual entity type and the main entity type */
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap(); private static final Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static { static {
entityMapping.put(EntityType.publication, MainEntityType.result); entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result); entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result); entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource); entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization); entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project); entityMapping.put(EntityType.project, MainEntityType.project);
} }
/** /**
* Defines the mapping between the actual entity types and the relative classes implementing them * Defines the mapping between the actual entity types and the relative classes implementing them
*/ */
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap(); public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
static { static {
entityTypes.put(EntityType.datasource, Datasource.class); entityTypes.put(EntityType.datasource, Datasource.class);
entityTypes.put(EntityType.organization, Organization.class); entityTypes.put(EntityType.organization, Organization.class);
entityTypes.put(EntityType.project, Project.class); entityTypes.put(EntityType.project, Project.class);
entityTypes.put(EntityType.dataset, Dataset.class); entityTypes.put(EntityType.dataset, Dataset.class);
entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class); entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
entityTypes.put(EntityType.software, Software.class); entityTypes.put(EntityType.software, Software.class);
entityTypes.put(EntityType.publication, Publication.class); entityTypes.put(EntityType.publication, Publication.class);
} }
public static final Map<String, Class> oafTypes = Maps.newHashMap(); public static final Map<String, Class> oafTypes = Maps.newHashMap();
static { static {
oafTypes.put("datasource", Datasource.class); oafTypes.put("datasource", Datasource.class);
oafTypes.put("organization", Organization.class); oafTypes.put("organization", Organization.class);
oafTypes.put("project", Project.class); oafTypes.put("project", Project.class);
oafTypes.put("dataset", Dataset.class); oafTypes.put("dataset", Dataset.class);
oafTypes.put("otherresearchproduct", OtherResearchProduct.class); oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
oafTypes.put("software", Software.class); oafTypes.put("software", Software.class);
oafTypes.put("publication", Publication.class); oafTypes.put("publication", Publication.class);
oafTypes.put("relation", Relation.class); oafTypes.put("relation", Relation.class);
} }
private static final String schemeTemplate = "dnet:%s_%s_relations"; private static final String schemeTemplate = "dnet:%s_%s_relations";
private ModelSupport() {} private ModelSupport() {
}
/** /**
* Checks subclass-superclass relationship. * Checks subclass-superclass relationship.
* *
* @param subClazzObject Subclass object instance * @param subClazzObject Subclass object instance
* @param superClazzObject Superclass object instance * @param superClazzObject Superclass object instance
* @param <X> Subclass type * @param <X> Subclass type
* @param <Y> Superclass type * @param <Y> Superclass type
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass( public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) { X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
} }
/** /**
* Checks subclass-superclass relationship. * Checks subclass-superclass relationship.
* *
* @param subClazzObject Subclass object instance * @param subClazzObject Subclass object instance
* @param superClazz Superclass class * @param superClazz Superclass class
* @param <X> Subclass type * @param <X> Subclass type
* @param <Y> Superclass type * @param <Y> Superclass type
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass( public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) { X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz); return isSubClass(subClazzObject.getClass(), superClazz);
} }
/** /**
* Checks subclass-superclass relationship. * Checks subclass-superclass relationship.
* *
* @param subClazz Subclass class * @param subClazz Subclass class
* @param superClazz Superclass class * @param superClazz Superclass class
* @param <X> Subclass type * @param <X> Subclass type
* @param <Y> Superclass type * @param <Y> Superclass type
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass( public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) { Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz); return superClazz.isAssignableFrom(subClazz);
} }
/** /**
* Lists all the OAF model classes * Lists all the OAF model classes
* *
* @param <T> * @param <T>
* @return * @return
*/ */
public static <T extends Oaf> Class<T>[] getOafModelClasses() { public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[] { return new Class[] {
Author.class, Author.class,
Context.class, Context.class,
Country.class, Country.class,
DataInfo.class, DataInfo.class,
Dataset.class, Dataset.class,
Datasource.class, Datasource.class,
ExternalReference.class, ExternalReference.class,
ExtraInfo.class, ExtraInfo.class,
Field.class, Field.class,
GeoLocation.class, GeoLocation.class,
Instance.class, Instance.class,
Journal.class, Journal.class,
KeyValue.class, KeyValue.class,
Oaf.class, Oaf.class,
OafEntity.class, OafEntity.class,
OAIProvenance.class, OAIProvenance.class,
Organization.class, Organization.class,
OriginDescription.class, OriginDescription.class,
OtherResearchProduct.class, OtherResearchProduct.class,
Project.class, Project.class,
Publication.class, Publication.class,
Qualifier.class, Qualifier.class,
Relation.class, Relation.class,
Result.class, Result.class,
Software.class, Software.class,
StructuredProperty.class StructuredProperty.class
}; };
} }
public static String getMainType(final EntityType type) { public static String getMainType(final EntityType type) {
return entityMapping.get(type).name(); return entityMapping.get(type).name();
} }
public static boolean isResult(EntityType type) { public static boolean isResult(EntityType type) {
return MainEntityType.result.name().equals(getMainType(type)); return MainEntityType.result.name().equals(getMainType(type));
} }
public static String getScheme(final String sourceType, final String targetType) { public static String getScheme(final String sourceType, final String targetType) {
return String.format( return String
schemeTemplate, .format(
entityMapping.get(EntityType.valueOf(sourceType)).name(), schemeTemplate,
entityMapping.get(EntityType.valueOf(targetType)).name()); entityMapping.get(EntityType.valueOf(sourceType)).name(),
} entityMapping.get(EntityType.valueOf(targetType)).name());
}
public static <T extends Oaf> Function<T, String> idFn() { public static <T extends Oaf> Function<T, String> idFn() {
return x -> { return x -> {
if (isSubClass(x, Relation.class)) { if (isSubClass(x, Relation.class)) {
return idFnForRelation(x); return idFnForRelation(x);
} }
return idFnForOafEntity(x); return idFnForOafEntity(x);
}; };
} }
private static <T extends Oaf> String idFnForRelation(T t) { private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t; Relation r = (Relation) t;
return Optional.ofNullable(r.getSource()) return Optional
.map( .ofNullable(r.getSource())
source -> .map(
Optional.ofNullable(r.getTarget()) source -> Optional
.map( .ofNullable(r.getTarget())
target -> .map(
Optional.ofNullable(r.getRelType()) target -> Optional
.map( .ofNullable(r.getRelType())
relType -> .map(
Optional.ofNullable(r.getSubRelType()) relType -> Optional
.map( .ofNullable(r.getSubRelType())
subRelType -> .map(
Optional.ofNullable(r.getRelClass()) subRelType -> Optional
.map( .ofNullable(r.getRelClass())
relClass -> .map(
String.join( relClass -> String
source, .join(
target, source,
relType, target,
subRelType, relType,
relClass)) subRelType,
.orElse( relClass))
String.join( .orElse(
source, String
target, .join(
relType, source,
subRelType))) target,
.orElse(String.join(source, target, relType))) relType,
.orElse(String.join(source, target))) subRelType)))
.orElse(source)) .orElse(String.join(source, target, relType)))
.orElse(null); .orElse(String.join(source, target)))
} .orElse(source))
.orElse(null);
}
private static <T extends Oaf> String idFnForOafEntity(T t) {
return ((OafEntity) t).getId();
}
private static <T extends Oaf> String idFnForOafEntity(T t) {
return ((OafEntity) t).getId();
}
} }

View File

@ -32,7 +32,7 @@ public class ISClient implements Serializable {
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private ISLookUpService isLookup; private final ISLookUpService isLookup;
public ISClient(String isLookupUrl) { public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);

View File

@ -123,10 +123,10 @@ public class PromoteActionPayloadFunctions {
* @param <G> Type of graph table row * @param <G> Type of graph table row
*/ */
public static class TableAggregator<G extends Oaf> extends Aggregator<G, G, G> { public static class TableAggregator<G extends Oaf> extends Aggregator<G, G, G> {
private SerializableSupplier<G> zeroFn; private final SerializableSupplier<G> zeroFn;
private SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn; private final SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn;
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn; private final SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private Class<G> rowClazz; private final Class<G> rowClazz;
public TableAggregator( public TableAggregator(
SerializableSupplier<G> zeroFn, SerializableSupplier<G> zeroFn,

View File

@ -20,7 +20,7 @@ public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class); private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
private static CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
private static ArgumentApplicationParser argumentParser; private static ArgumentApplicationParser argumentParser;

View File

@ -9,7 +9,7 @@ public class CollectorPluginErrorLogList extends LinkedList<String> {
@Override @Override
public String toString() { public String toString() {
String log = new String(); String log = "";
int index = 0; int index = 0;
for (final String errorMessage : this) { for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage); log += String.format("Retry #%s: %s / ", index++, errorMessage);

View File

@ -11,22 +11,22 @@ import java.util.regex.Pattern;
public class XmlCleaner { public class XmlCleaner {
/** Pattern for numeric entities. */ /** Pattern for numeric entities. */
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$ private static final Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); // private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
// //$NON-NLS-1$ // //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11; // see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];"); private static final Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
/** /**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | * Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] |
* [#xE000-#xFFFD] | [#x10000-#x10FFFF] * [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/ */
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$ private static final Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent // Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>(); private static final Set<String> goodEntities = new HashSet<>();
private static Map<String, String> badEntities = new HashMap<>(); private static final Map<String, String> badEntities = new HashMap<>();
static { static {
// pre-defined XML entities // pre-defined XML entities

View File

@ -21,8 +21,8 @@ import eu.dnetlib.message.MessageManager;
public class DnetCollectorWorkerApplicationTests { public class DnetCollectorWorkerApplicationTests {
private ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class); private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
private MessageManager messageManager = mock(MessageManager.class); private final MessageManager messageManager = mock(MessageManager.class);
private DnetCollectorWorker worker; private DnetCollectorWorker worker;

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.dedup; package eu.dnetlib.dhp.oa.dedup;
import java.io.StringReader; import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.*; import java.util.*;
@ -73,7 +74,7 @@ public class DedupUtility {
public static String md5(final String s) { public static String md5(final String s) {
try { try {
final MessageDigest md = MessageDigest.getInstance("MD5"); final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8")); md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest())); return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) { } catch (final Exception e) {
System.err.println("Error creating id"); System.err.println("Error creating id");

View File

@ -15,7 +15,7 @@ public class SparkReporter implements Serializable, Reporter {
private final List<Tuple2<String, String>> relations = new ArrayList<>(); private final List<Tuple2<String, String>> relations = new ArrayList<>();
private Map<String, LongAccumulator> accumulators; private final Map<String, LongAccumulator> accumulators;
public SparkReporter(Map<String, LongAccumulator> accumulators) { public SparkReporter(Map<String, LongAccumulator> accumulators) {
this.accumulators = accumulators; this.accumulators = accumulators;

View File

@ -106,7 +106,7 @@ public class DedupUtility {
public static String md5(final String s) { public static String md5(final String s) {
try { try {
final MessageDigest md = MessageDigest.getInstance("MD5"); final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8")); md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest())); return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) { } catch (final Exception e) {
System.err.println("Error creating id"); System.err.println("Error creating id");

View File

@ -410,14 +410,10 @@ public abstract class AbstractMdRecordToOafMapper {
final String identifier = n.valueOf("./*[local-name()='identifier']"); final String identifier = n.valueOf("./*[local-name()='identifier']");
final String baseURL = n.valueOf("./*[local-name()='baseURL']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']");
;
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']"); final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
;
final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true"); final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true");
final String datestamp = n.valueOf("./*[local-name()='datestamp']"); final String datestamp = n.valueOf("./*[local-name()='datestamp']");
;
final String harvestDate = n.valueOf("@harvestDate"); final String harvestDate = n.valueOf("@harvestDate");
;
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
} }

View File

@ -51,497 +51,458 @@ import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
implements Closeable { implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient; private final DbClient dbClient;
private final long lastUpdateTimestamp; private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = final ArgumentApplicationParser parser = new ArgumentApplicationParser(
new ArgumentApplicationParser( IOUtils
IOUtils.toString( .toString(MigrateDbEntitiesApplication.class
MigrateDbEntitiesApplication.class.getResourceAsStream( .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
"/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
parser.parseArgument(args);
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUrl = parser.get("postgresUrl"); final String dbUser = parser.get("postgresUser");
final String dbUser = parser.get("postgresUser"); final String dbPassword = parser.get("postgresPassword");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsPath = parser.get("hdfsPath");
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
final boolean processClaims =
parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
dbPassword)) {
try (final MigrateDbEntitiesApplication smdbe = if (processClaims) {
new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) { log.info("Processing claims...");
if (processClaims) { smdbe.execute("queryClaims.sql", smdbe::processClaims);
log.info("Processing claims..."); } else {
smdbe.execute("queryClaims.sql", smdbe::processClaims); log.info("Processing datasources...");
} else { smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
log.info("Processing datasources...");
smdbe.execute("queryDatasources.sql", smdbe::processDatasource); log.info("Processing projects...");
smdbe.execute("queryProjects.sql", smdbe::processProject);
log.info("Processing projects...");
smdbe.execute("queryProjects.sql", smdbe::processProject); log.info("Processing orgs...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
log.info("Processing orgs...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization); log.info("Processing relations ds <-> orgs ...");
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
log.info("Processing relations ds <-> orgs ...");
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization); log.info("Processing projects <-> orgs ...");
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
log.info("Processing projects <-> orgs ..."); }
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization); log.info("All done.");
} }
log.info("All done."); }
}
} protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
super();
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST this.dbClient = null;
super(); this.lastUpdateTimestamp = new Date().getTime();
this.dbClient = null; }
this.lastUpdateTimestamp = new Date().getTime();
} public MigrateDbEntitiesApplication(
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
public MigrateDbEntitiesApplication( throws Exception {
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) super(hdfsPath);
throws Exception { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
super(hdfsPath); this.lastUpdateTimestamp = new Date().getTime();
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); }
this.lastUpdateTimestamp = new Date().getTime();
} public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
throws Exception {
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer) final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
throws Exception {
final String sql = final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
dbClient.processResults(sql, consumer);
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf)); }
dbClient.processResults(sql, consumer); public List<Oaf> processDatasource(final ResultSet rs) {
}
try {
public List<Oaf> processDatasource(final ResultSet rs) {
final DataInfo info = prepareDataInfo(rs);
try {
final Datasource ds = new Datasource();
final DataInfo info = prepareDataInfo(rs);
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
final Datasource ds = new Datasource(); ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true)); .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid"))); ds.setPid(new ArrayList<>());
ds.setCollectedfrom( ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
listKeyValues( ds.setDateoftransformation(null); // Value not returned by the SQL query
createOpenaireId(10, rs.getString("collectedfromid"), true), ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
rs.getString("collectedfromname"))); ds.setOaiprovenance(null); // Values not present in the DB
ds.setPid(new ArrayList<>()); ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setDateoftransformation(null); // Value not returned by the SQL query ds.setOfficialname(field(rs.getString("officialname"), info));
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setEnglishname(field(rs.getString("englishname"), info));
ds.setOaiprovenance(null); // Values not present in the DB ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); ds.setLogourl(field(rs.getString("logourl"), info));
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setContactemail(field(rs.getString("contactemail"), info));
ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
ds.setEnglishname(field(rs.getString("englishname"), info)); ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
ds.setWebsiteurl(field(rs.getString("websiteurl"), info)); ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
ds.setLogourl(field(rs.getString("logourl"), info)); ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
ds.setContactemail(field(rs.getString("contactemail"), info)); ds.setDescription(field(rs.getString("description"), info));
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info)); ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info)); ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info)); ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info)); ds.setOdpolicies(field(rs.getString("odpolicies"), info));
ds.setDescription(field(rs.getString("description"), info)); ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info)); ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
ds.setOdpolicies(field(rs.getString("odpolicies"), info)); ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info)); ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info)); ds.setVersioning(field(rs.getBoolean("versioning"), info));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); ds.setPidsystems(field(rs.getString("pidsystems"), info));
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); ds.setCertificates(field(rs.getString("certificates"), info));
ds.setVersioning(field(rs.getBoolean("versioning"), info)); ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); ds
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info)); .setJournal(prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
ds.setPidsystems(field(rs.getString("pidsystems"), info)); ds.setDataInfo(info);
ds.setCertificates(field(rs.getString("certificates"), info)); ds.setLastupdatetimestamp(lastUpdateTimestamp);
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds.setJournal( return Arrays.asList(ds);
prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal } catch (final Exception e) {
ds.setDataInfo(info); throw new RuntimeException(e);
ds.setLastupdatetimestamp(lastUpdateTimestamp); }
}
return Arrays.asList(ds);
} catch (final Exception e) { public List<Oaf> processProject(final ResultSet rs) {
throw new RuntimeException(e); try {
}
} final DataInfo info = prepareDataInfo(rs);
public List<Oaf> processProject(final ResultSet rs) { final Project p = new Project();
try {
p.setId(createOpenaireId(40, rs.getString("projectid"), true));
final DataInfo info = prepareDataInfo(rs); p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p
final Project p = new Project(); .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
p.setPid(new ArrayList<>());
p.setId(createOpenaireId(40, rs.getString("projectid"), true)); p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setOriginalId(Arrays.asList(rs.getString("projectid"))); p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
p.setCollectedfrom( p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
listKeyValues( p.setOaiprovenance(null); // Values not present in the DB
createOpenaireId(10, rs.getString("collectedfromid"), true), p.setWebsiteurl(field(rs.getString("websiteurl"), info));
rs.getString("collectedfromname"))); p.setCode(field(rs.getString("code"), info));
p.setPid(new ArrayList<>()); p.setAcronym(field(rs.getString("acronym"), info));
p.setDateofcollection(asString(rs.getDate("dateofcollection"))); p.setTitle(field(rs.getString("title"), info));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); p.setStartdate(field(asString(rs.getDate("startdate")), info));
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB p.setEnddate(field(asString(rs.getDate("enddate")), info));
p.setOaiprovenance(null); // Values not present in the DB p.setCallidentifier(field(rs.getString("callidentifier"), info));
p.setWebsiteurl(field(rs.getString("websiteurl"), info)); p.setKeywords(field(rs.getString("keywords"), info));
p.setCode(field(rs.getString("code"), info)); p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
p.setAcronym(field(rs.getString("acronym"), info)); p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setTitle(field(rs.getString("title"), info)); p
p.setStartdate(field(asString(rs.getDate("startdate")), info)); .setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEnddate(field(asString(rs.getDate("enddate")), info)); p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setCallidentifier(field(rs.getString("callidentifier"), info)); p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setKeywords(field(rs.getString("keywords"), info)); p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info)); p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info)); p.setOptional1(field(rs.getString("optional1"), info));
p.setOamandatepublications( p.setOptional2(field(rs.getString("optional2"), info));
field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); p.setContactfullname(field(rs.getString("contactfullname"), info));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); p.setContactfax(field(rs.getString("contactfax"), info));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info)); p.setContactphone(field(rs.getString("contactphone"), info));
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype"))); p.setContactemail(field(rs.getString("contactemail"), info));
p.setOptional1(field(rs.getString("optional1"), info)); p.setSummary(field(rs.getString("summary"), info));
p.setOptional2(field(rs.getString("optional2"), info)); p.setCurrency(field(rs.getString("currency"), info));
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info)); p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setContactfullname(field(rs.getString("contactfullname"), info)); p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setContactfax(field(rs.getString("contactfax"), info)); p.setDataInfo(info);
p.setContactphone(field(rs.getString("contactphone"), info)); p.setLastupdatetimestamp(lastUpdateTimestamp);
p.setContactemail(field(rs.getString("contactemail"), info));
p.setSummary(field(rs.getString("summary"), info)); return Arrays.asList(p);
p.setCurrency(field(rs.getString("currency"), info));
p.setTotalcost(new Float(rs.getDouble("totalcost"))); } catch (final Exception e) {
p.setFundedamount(new Float(rs.getDouble("fundedamount"))); throw new RuntimeException(e);
p.setDataInfo(info); }
p.setLastupdatetimestamp(lastUpdateTimestamp); }
return Arrays.asList(p); public List<Oaf> processOrganization(final ResultSet rs) {
} catch (final Exception e) { try {
throw new RuntimeException(e);
} final DataInfo info = prepareDataInfo(rs);
}
final Organization o = new Organization();
public List<Oaf> processOrganization(final ResultSet rs) {
o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
try { o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o
final DataInfo info = prepareDataInfo(rs); .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
o.setPid(new ArrayList<>());
final Organization o = new Organization(); o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
o.setId(createOpenaireId(20, rs.getString("organizationid"), true)); o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
o.setOriginalId(Arrays.asList(rs.getString("organizationid"))); o.setOaiprovenance(null); // Values not present in the DB
o.setCollectedfrom( o.setLegalshortname(field(rs.getString("legalshortname"), info));
listKeyValues( o.setLegalname(field(rs.getString("legalname"), info));
createOpenaireId(10, rs.getString("collectedfromid"), true), o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query
rs.getString("collectedfromname"))); o.setWebsiteurl(field(rs.getString("websiteurl"), info));
o.setPid(new ArrayList<>()); o.setLogourl(field(rs.getString("logourl"), info));
o.setDateofcollection(asString(rs.getDate("dateofcollection"))); o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o.setOaiprovenance(null); // Values not present in the DB o
o.setLegalshortname(field(rs.getString("legalshortname"), info)); .setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setLegalname(field(rs.getString("legalname"), info)); o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query o
o.setWebsiteurl(field(rs.getString("websiteurl"), info)); .setEcinternationalorganizationeurinterests(field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setLogourl(field(rs.getString("logourl"), info)); o
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); .setEcinternationalorganization(field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info)); o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info)); o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcresearchorganization( o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info)); o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info)); o.setDataInfo(info);
o.setEcinternationalorganizationeurinterests( o.setLastupdatetimestamp(lastUpdateTimestamp);
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setEcinternationalorganization( return Arrays.asList(o);
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info)); } catch (final Exception e) {
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); throw new RuntimeException(e);
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); }
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); }
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info); public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
o.setLastupdatetimestamp(lastUpdateTimestamp); try {
final DataInfo info = prepareDataInfo(rs);
return Arrays.asList(o); final String orgId = createOpenaireId(20, rs.getString("organization"), true);
} catch (final Exception e) { final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
throw new RuntimeException(e); final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
}
} final Relation r1 = new Relation();
r1.setRelType("datasourceOrganization");
public List<Oaf> processDatasourceOrganization(final ResultSet rs) { r1.setSubRelType("provision");
try { r1.setRelClass("isProvidedBy");
final DataInfo info = prepareDataInfo(rs); r1.setSource(dsId);
final String orgId = createOpenaireId(20, rs.getString("organization"), true); r1.setTarget(orgId);
final String dsId = createOpenaireId(10, rs.getString("datasource"), true); r1.setCollectedfrom(collectedFrom);
final List<KeyValue> collectedFrom = r1.setDataInfo(info);
listKeyValues( r1.setLastupdatetimestamp(lastUpdateTimestamp);
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")); final Relation r2 = new Relation();
r2.setRelType("datasourceOrganization");
final Relation r1 = new Relation(); r2.setSubRelType("provision");
r1.setRelType("datasourceOrganization"); r2.setRelClass("provides");
r1.setSubRelType("provision"); r2.setSource(orgId);
r1.setRelClass("isProvidedBy"); r2.setTarget(dsId);
r1.setSource(dsId); r2.setCollectedfrom(collectedFrom);
r1.setTarget(orgId); r2.setDataInfo(info);
r1.setCollectedfrom(collectedFrom); r2.setLastupdatetimestamp(lastUpdateTimestamp);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp); return Arrays.asList(r1, r2);
} catch (final Exception e) {
final Relation r2 = new Relation(); throw new RuntimeException(e);
r2.setRelType("datasourceOrganization"); }
r2.setSubRelType("provision"); }
r2.setRelClass("provides");
r2.setSource(orgId); public List<Oaf> processProjectOrganization(final ResultSet rs) {
r2.setTarget(dsId); try {
r2.setCollectedfrom(collectedFrom); final DataInfo info = prepareDataInfo(rs);
r2.setDataInfo(info); final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
r2.setLastupdatetimestamp(lastUpdateTimestamp); final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
return Arrays.asList(r1, r2);
} catch (final Exception e) { final Relation r1 = new Relation();
throw new RuntimeException(e); r1.setRelType("projectOrganization");
} r1.setSubRelType("participation");
} r1.setRelClass("hasParticipant");
r1.setSource(projectId);
public List<Oaf> processProjectOrganization(final ResultSet rs) { r1.setTarget(orgId);
try { r1.setCollectedfrom(collectedFrom);
final DataInfo info = prepareDataInfo(rs); r1.setDataInfo(info);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true); r1.setLastupdatetimestamp(lastUpdateTimestamp);
final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = final Relation r2 = new Relation();
listKeyValues( r2.setRelType("projectOrganization");
createOpenaireId(10, rs.getString("collectedfromid"), true), r2.setSubRelType("participation");
rs.getString("collectedfromname")); r2.setRelClass("isParticipant");
r2.setSource(orgId);
final Relation r1 = new Relation(); r2.setTarget(projectId);
r1.setRelType("projectOrganization"); r2.setCollectedfrom(collectedFrom);
r1.setSubRelType("participation"); r2.setDataInfo(info);
r1.setRelClass("isParticipant"); r2.setLastupdatetimestamp(lastUpdateTimestamp);
r1.setSource(projectId);
r1.setTarget(orgId); return Arrays.asList(r1, r2);
r1.setCollectedfrom(collectedFrom); } catch (final Exception e) {
r1.setDataInfo(info); throw new RuntimeException(e);
r1.setLastupdatetimestamp(lastUpdateTimestamp); }
}
final Relation r2 = new Relation();
r2.setRelType("projectOrganization"); public List<Oaf> processClaims(final ResultSet rs) {
r2.setSubRelType("participation");
r2.setRelClass("hasParticipant"); final DataInfo info =
r2.setSource(orgId); dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9");
r2.setTarget(projectId);
r2.setCollectedfrom(collectedFrom); final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); try {
return Arrays.asList(r1, r2); if (rs.getString("source_type").equals("context")) {
} catch (final Exception e) { final Result r;
throw new RuntimeException(e);
} if (rs.getString("target_type").equals("dataset")) {
} r = new Dataset();
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
public List<Oaf> processClaims(final ResultSet rs) { } else if (rs.getString("target_type").equals("software")) {
r = new Software();
final DataInfo info = r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
dataInfo( } else if (rs.getString("target_type").equals("other")) {
false, r = new OtherResearchProduct();
null, r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
false, } else {
false, r = new Publication();
qualifier( r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
"user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), }
"0.9"); r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp);
final List<KeyValue> collectedFrom = r.setContext(prepareContext(rs.getString("source_id"), info));
listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); r.setDataInfo(info);
r.setCollectedfrom(collectedFrom);
try {
return Arrays.asList(r);
if (rs.getString("source_type").equals("context")) { } else {
final Result r; final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
if (rs.getString("target_type").equals("dataset")) {
r = new Dataset(); final Relation r1 = new Relation();
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER); final Relation r2 = new Relation();
} else if (rs.getString("target_type").equals("software")) {
r = new Software(); if (rs.getString("source_type").equals("project")) {
r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER); r1.setCollectedfrom(collectedFrom);
} else if (rs.getString("target_type").equals("other")) { r1.setRelType("resultProject");
r = new OtherResearchProduct(); r1.setSubRelType("outcome");
r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER); r1.setRelClass("produces");
} else {
r = new Publication(); r2.setCollectedfrom(collectedFrom);
r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER); r2.setRelType("resultProject");
} r2.setSubRelType("outcome");
r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r2.setRelClass("isProducedBy");
r.setLastupdatetimestamp(lastUpdateTimestamp); } else {
r.setContext(prepareContext(rs.getString("source_id"), info)); r1.setCollectedfrom(collectedFrom);
r.setDataInfo(info); r1.setRelType("resultResult");
r.setCollectedfrom(collectedFrom); r1.setSubRelType("relationship");
r1.setRelClass("isRelatedTo");
return Arrays.asList(r);
} else { r2.setCollectedfrom(collectedFrom);
final String sourceId = r2.setRelType("resultResult");
createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false); r2.setSubRelType("relationship");
final String targetId = r2.setRelClass("isRelatedTo");
createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false); }
final Relation r1 = new Relation(); r1.setSource(sourceId);
final Relation r2 = new Relation(); r1.setTarget(targetId);
r1.setDataInfo(info);
if (rs.getString("source_type").equals("project")) { r1.setLastupdatetimestamp(lastUpdateTimestamp);
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultProject"); r2.setSource(targetId);
r1.setSubRelType("outcome"); r2.setTarget(sourceId);
r1.setRelClass("produces"); r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultProject"); return Arrays.asList(r1, r2);
r2.setSubRelType("outcome"); }
r2.setRelClass("isProducedBy");
} else { } catch (final Exception e) {
r1.setCollectedfrom(collectedFrom); throw new RuntimeException(e);
r1.setRelType("resultResult"); }
r1.setSubRelType("relationship"); }
r1.setRelClass("isRelatedTo");
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
r2.setCollectedfrom(collectedFrom); final Context context = new Context();
r2.setRelType("resultResult"); context.setId(id);
r2.setSubRelType("relationship"); context.setDataInfo(Arrays.asList(dataInfo));
r2.setRelClass("isRelatedTo"); return Arrays.asList(context);
} }
r1.setSource(sourceId); private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
r1.setTarget(targetId); final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
r1.setDataInfo(info); final String inferenceprovenance = rs.getString("inferenceprovenance");
r1.setLastupdatetimestamp(lastUpdateTimestamp); final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
r2.setSource(targetId); return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, trust);
r2.setTarget(sourceId); }
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) { return null; }
return Arrays.asList(r1, r2); final String[] arr = s.split("@@@");
} return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
} catch (final Exception e) {
throw new RuntimeException(e); private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
} try {
} return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
} catch (final SQLException e) {
private List<Context> prepareContext(final String id, final DataInfo dataInfo) { throw new RuntimeException("Invalid SQL array", e);
final Context context = new Context(); }
context.setId(id); }
context.setDataInfo(Arrays.asList(dataInfo));
return Arrays.asList(context); private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
} if (StringUtils.isBlank(s)) { return null; }
final String[] parts = s.split("###");
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException { if (parts.length == 2) {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); final String value = parts[0];
final String inferenceprovenance = rs.getString("inferenceprovenance"); final String[] arr = parts[1].split("@@@");
final Boolean inferred = rs.getBoolean("inferred"); if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
final String trust = rs.getString("trust"); }
return dataInfo( return null;
deletedbyinference, }
inferenceprovenance,
inferred, private List<StructuredProperty> prepareListOfStructProps(
false, final Array array,
MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, final DataInfo dataInfo) throws SQLException {
trust); final List<StructuredProperty> res = new ArrayList<>();
} if (array != null) {
for (final String s : (String[]) array.getArray()) {
private Qualifier prepareQualifierSplitting(final String s) { final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (StringUtils.isBlank(s)) { if (sp != null) {
return null; res.add(sp);
} }
final String[] arr = s.split("@@@"); }
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; }
}
return res;
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) { }
try {
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>(); private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
} catch (final SQLException e) { if (StringUtils.isNotBlank(sj)) {
throw new RuntimeException("Invalid SQL array", e); final String[] arr = sj.split("@@@");
} if (arr.length == 3) {
} final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0].trim() : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1].trim() : null;;
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) { final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;;
if (StringUtils.isBlank(s)) { if (issn != null || eissn != null || lissn != null) {
return null; return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
} }
final String[] parts = s.split("###"); }
if (parts.length == 2) { }
final String value = parts[0]; return null;
final String[] arr = parts[1].split("@@@"); }
if (arr.length == 4) {
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); @Override
} public void close() throws IOException {
} super.close();
return null; dbClient.close();
} }
private List<StructuredProperty> prepareListOfStructProps(
final Array array, final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (sp != null) {
res.add(sp);
}
}
}
return res;
}
private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
if (StringUtils.isNotBlank(sj)) {
final String[] arr = sj.split("@@@");
if (arr.length == 3) {
final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;
;
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;
;
if (issn != null || eissn != null || lissn != null) {
return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
}
}
}
return null;
}
@Override
public void close() throws IOException {
super.close();
dbClient.close();
}
} }

View File

@ -14,7 +14,7 @@ public class DbClient implements Closeable {
private static final Log log = LogFactory.getLog(DbClient.class); private static final Log log = LogFactory.getLog(DbClient.class);
private Connection connection; private final Connection connection;
public DbClient(final String address, final String login, final String password) { public DbClient(final String address, final String login, final String password) {

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -141,7 +142,7 @@ public class PacePerson {
public String hash() { public String hash() {
return Hashing return Hashing
.murmur3_128() .murmur3_128()
.hashString(getNormalisedFullname(), Charset.forName(UTF8)) .hashString(getNormalisedFullname(), StandardCharsets.UTF_8)
.toString(); .toString();
} }

View File

@ -25,7 +25,7 @@ public abstract class AbstractScholexplorerParser {
protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class); protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class);
static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE); static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE);
private List<String> datasetSubTypes = Arrays private final List<String> datasetSubTypes = Arrays
.asList( .asList(
"dataset", "dataset",
"software", "software",

View File

@ -108,7 +108,7 @@ SELECT
ELSE 'Other' ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype, END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
FROM dsm_datasources d FROM dsm_datasources d

View File

@ -18,7 +18,7 @@ import eu.dnetlib.dhp.utils.DHPUtils;
public class CrossRefParserJSON { public class CrossRefParserJSON {
private static List<ScholixCollectedFrom> collectedFrom = generateCrossrefCollectedFrom("complete"); private static final List<ScholixCollectedFrom> collectedFrom = generateCrossrefCollectedFrom("complete");
public static ScholixResource parseRecord(final String record) { public static ScholixResource parseRecord(final String record) {
if (record == null) if (record == null)

View File

@ -16,7 +16,7 @@ public class DataciteClient {
private String host; private String host;
private String index = "datacite"; private String index = "datacite";
private String indexType = "dump"; private String indexType = "dump";
private Datacite2Scholix d2s; private final Datacite2Scholix d2s;
public DataciteClient(String host) { public DataciteClient(String host) {
this.host = host; this.host = host;

View File

@ -12,7 +12,7 @@ import eu.dnetlib.dhp.oa.provision.model.SortableRelation;
*/ */
public class RelationPartitioner extends Partitioner { public class RelationPartitioner extends Partitioner {
private int numPartitions; private final int numPartitions;
public RelationPartitioner(int numPartitions) { public RelationPartitioner(int numPartitions) {
this.numPartitions = numPartitions; this.numPartitions = numPartitions;

View File

@ -46,7 +46,7 @@ public class StreamingInputDocumentFactory {
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier"; private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final List<String> dateFormats = Arrays private static final List<String> dateFormats = Arrays
.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy"); .asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
@ -61,15 +61,18 @@ public class StreamingInputDocumentFactory {
private static final int MAX_FIELD_LENGTH = 25000; private static final int MAX_FIELD_LENGTH = 25000;
private ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal.withInitial(() -> XMLInputFactory.newInstance()); private final ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal
.withInitial(() -> XMLInputFactory.newInstance());
private ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal.withInitial(() -> XMLOutputFactory.newInstance()); private final ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal
.withInitial(() -> XMLOutputFactory.newInstance());
private ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal.withInitial(() -> XMLEventFactory.newInstance()); private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
.withInitial(() -> XMLEventFactory.newInstance());
private String version; private final String version;
private String dsId; private final String dsId;
private String resultName = DEFAULTDNETRESULT; private String resultName = DEFAULTDNETRESULT;

View File

@ -17,7 +17,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class TemplateFactory { public class TemplateFactory {
private TemplateResources resources; private final TemplateResources resources;
private static final char DELIMITER = '$'; private static final char DELIMITER = '$';

View File

@ -8,17 +8,17 @@ import com.google.common.io.Resources;
public class TemplateResources { public class TemplateResources {
private String record = read("eu/dnetlib/dhp/oa/provision/template/record.st"); private final String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
private String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st"); private final String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
private String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st"); private final String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
private String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st"); private final String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
private String child = read("eu/dnetlib/dhp/oa/provision/template/child.st"); private final String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
private String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st"); private final String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
private static String read(final String classpathResource) throws IOException { private static String read(final String classpathResource) throws IOException {
return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8); return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8);

View File

@ -48,13 +48,13 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class XmlRecordFactory implements Serializable { public class XmlRecordFactory implements Serializable {
public static final String REL_SUBTYPE_DEDUP = "dedup"; public static final String REL_SUBTYPE_DEDUP = "dedup";
private Map<String, LongAccumulator> accumulators; private final Map<String, LongAccumulator> accumulators;
private Set<String> specialDatasourceTypes; private final Set<String> specialDatasourceTypes;
private ContextMapper contextMapper; private final ContextMapper contextMapper;
private String schemaLocation; private final String schemaLocation;
private boolean indent = false; private boolean indent = false;

View File

@ -41,7 +41,7 @@ public class XmlSerializationUtils {
public static String mapStructuredProperty(String name, StructuredProperty t) { public static String mapStructuredProperty(String name, StructuredProperty t) {
return asXmlElement( return asXmlElement(
name, t.getValue(), t.getQualifier(), t.getDataInfo() != null ? t.getDataInfo() : null); name, t.getValue(), t.getQualifier(), t.getDataInfo());
} }
public static String mapQualifier(String name, Qualifier q) { public static String mapQualifier(String name, Qualifier q) {

View File

@ -9,7 +9,7 @@ import org.junit.jupiter.api.BeforeEach;
public class GraphJoinerTest { public class GraphJoinerTest {
private ClassLoader cl = getClass().getClassLoader(); private final ClassLoader cl = getClass().getClassLoader();
private Path workingDir; private Path workingDir;
private Path inputDir; private Path inputDir;
private Path outputDir; private Path outputDir;

View File

@ -22,6 +22,7 @@
<plugin> <plugin>
<groupId>pl.project13.maven</groupId> <groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId> <artifactId>git-commit-id-plugin</artifactId>
<version>2.1.11</version>
<configuration> <configuration>
<failOnNoGitDirectory>false</failOnNoGitDirectory> <failOnNoGitDirectory>false</failOnNoGitDirectory>
</configuration> </configuration>

View File

@ -3,13 +3,27 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent> <parent>
<artifactId>dhp</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.1.7-SNAPSHOT</version> <version>1.1.7-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-worfklow-profiles</artifactId> <artifactId>dhp-worfklow-profiles</artifactId>
<packaging>jar</packaging>
<!--
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
-->
</project> </project>

View File

@ -13,6 +13,8 @@
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<packaging>pom</packaging> <packaging>pom</packaging>
<description>This module is the container for the oozie workflow definitions in dnet-hadoop project</description>
<modules> <modules>
<module>dhp-worfklow-profiles</module> <module>dhp-worfklow-profiles</module>
<module>dhp-aggregation</module> <module>dhp-aggregation</module>
@ -184,7 +186,7 @@
<plugin> <plugin>
<groupId>org.kuali.maven.plugins</groupId> <groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId> <artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version> <version>${properties.maven.plugin.version}</version>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
@ -564,60 +566,4 @@
</profile> </profile>
</profiles> </profiles>
<build>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself. -->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.kuali.maven.plugins
</groupId>
<artifactId>
properties-maven-plugin
</artifactId>
<versionRange>
[1.3.2,)
</versionRange>
<goals>
<goal>
read-project-properties
</goal>
<goal>
write-project-properties
</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
<!-- copy-dependency plugin -->
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<versionRange>[1.0.0,)</versionRange>
<goals>
<goal>copy-dependencies</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project> </project>

38
pom.xml
View File

@ -6,14 +6,14 @@
<version>1.1.7-SNAPSHOT</version> <version>1.1.7-SNAPSHOT</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<url>http://www.d-net.research-infrastructures.eu</url>
<licenses> <licenses>
<license> <license>
<name>The Apache Software License, Version 2.0</name> <name>GNU Affero General Public License v3.0 or later</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> <url>https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText</url>
<distribution>repo</distribution> <distribution>repo</distribution>
<comments>A business-friendly OSS license</comments> <comments>This program is free software: you can redistribute it and/or modify it under the terms of the
GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.</comments>
</license> </license>
</licenses> </licenses>
@ -41,6 +41,8 @@
<tag>HEAD</tag> <tag>HEAD</tag>
</scm> </scm>
<description>This module is the root descriptor for the dnet-hadoop project</description>
<pluginRepositories> <pluginRepositories>
</pluginRepositories> </pluginRepositories>
@ -406,6 +408,18 @@
<testOutputDirectory>target/test-classes</testOutputDirectory> <testOutputDirectory>target/test-classes</testOutputDirectory>
<pluginManagement> <pluginManagement>
<plugins> <plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
@ -449,9 +463,10 @@
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version> <version>3.2.0</version>
<configuration> <configuration>
<detectLinks>true</detectLinks> <detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration> </configuration>
</plugin> </plugin>
<plugin> <plugin>
@ -475,6 +490,14 @@
</plugins> </plugins>
</pluginManagement> </pluginManagement>
<plugins> <plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
</plugin>
<plugin> <plugin>
<groupId>net.revelc.code.formatter</groupId> <groupId>net.revelc.code.formatter</groupId>
<artifactId>formatter-maven-plugin</artifactId> <artifactId>formatter-maven-plugin</artifactId>
@ -569,9 +592,9 @@
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration> <configuration>
<detectLinks>true</detectLinks> <detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
@ -582,6 +605,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version> <maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version> <maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version>
<properties.maven.plugin.version>2.0.1</properties.maven.plugin.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version> <dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version> <dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version> <dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>