diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml
index c837cd5386..9b03536ddd 100644
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@@ -12,6 +12,8 @@
dhp-build-assembly-resources
jar
+ This module contains a set of scripts supporting the build lifecycle for the dnet-hadoop project
+
diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
index df5045fcb6..4d40edd997 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@@ -12,22 +12,29 @@
dhp-build-properties-maven-plugin
maven-plugin
+ This module is a maven plugin implementing custom properties substitutions in the build lifecycle
org.apache.maven
maven-plugin-api
- 2.0
+ 3.6.3
org.apache.maven
maven-project
- 2.0
+ 2.2.1
+
+ org.apache.maven
+ maven-artifact
+ 2.2.1
+
+
org.kuali.maven.plugins
properties-maven-plugin
- 1.3.2
+ ${properties.maven.plugin.version}
com.google.code.findbugs
@@ -73,44 +80,10 @@
maven-javadoc-plugin
true
+ none
-
-
-
-
- org.eclipse.m2e
- lifecycle-mapping
- 1.0.0
-
-
-
-
-
-
- org.apache.maven.plugins
-
-
- maven-plugin-plugin
-
-
- [3.2,)
-
-
- descriptor
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
index c1c567f954..d195ca86e4 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
@@ -40,7 +40,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files. Based on:
- * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
+ * http://site.kuali.org/maven/plugins/properties-maven-plugin/2.0.1/write-project-properties-mojo.html
*
* @author mhorst
* @goal write-project-properties
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
index b8075ba5dd..4bfcd3b33e 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
@@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest {
- private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
+ private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach
public void clearSystemProperties() {
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
index e0b2eff37b..0b3ea9653b 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
@@ -366,7 +366,7 @@ public class WritePredefinedProjectPropertiesTest {
}
private Properties getStoredProperties(File testFolder)
- throws FileNotFoundException, IOException {
+ throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties;
diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml
index 830723c9f6..5e896e7a5c 100644
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@@ -11,6 +11,38 @@
jar
+ This module contains resources supporting common code style conventions
+
+
+
+ dnet45-snapshots
+ DNet45 Snapshots
+ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots
+ default
+
+
+ dnet45-releases
+ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-project-info-reports-plugin
+ 3.0.0
+
+
+ org.apache.maven.plugins
+ maven-site-plugin
+ 3.7.1
+
+
+
+
+
UTF-8
diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml
index ff6a858bba..041641fcfb 100644
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@@ -8,6 +8,9 @@
dhp-build
pom
+
+ This module is a container for the build tools used in dnet-hadoop
+
dhp-code-style
dhp-build-assembly-resources
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index 7b073397f0..51af8d954b 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -12,6 +12,8 @@
dhp-common
jar
+ This module contains common utilities meant to be used across the dnet-hadoop submodules
+
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
index 18e489a21c..dfbaf3a6ca 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
@@ -21,7 +21,7 @@ public class DHPUtils {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
- md.update(s.getBytes("UTF-8"));
+ md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java
index 4a719909a5..9fb60e1452 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java
@@ -17,7 +17,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
- private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
+ private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
@Override
public String getName() {
diff --git a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
index 4c5c48c559..5ca79f3cc6 100644
--- a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
+++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java
@@ -21,7 +21,7 @@ public class MessageManager {
private Connection connection;
- private Map channels = new HashMap<>();
+ private final Map channels = new HashMap<>();
private boolean durable;
diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml
index 8deb2eab23..4a123cedad 100644
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@@ -12,7 +12,7 @@
dhp-schemas
jar
-
+ This module contains common schema classes meant to be used across the dnet-hadoop submodules
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java
index a9543d27a1..7b88e9c7eb 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java
@@ -16,7 +16,7 @@ public class AtomicActionDeserializer extends JsonDeserializer {
@Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt)
- throws IOException, JsonProcessingException {
+ throws IOException {
JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload");
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
index 0054e6d6fd..7838cc0cff 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
@@ -9,190 +9,195 @@ import java.util.function.Function;
/** Oaf model utility methods. */
public class ModelSupport {
- /** Defines the mapping between the actual entity type and the main entity type */
- private static Map entityMapping = Maps.newHashMap();
+ /** Defines the mapping between the actual entity type and the main entity type */
+ private static final Map entityMapping = Maps.newHashMap();
- static {
- entityMapping.put(EntityType.publication, MainEntityType.result);
- entityMapping.put(EntityType.dataset, MainEntityType.result);
- entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
- entityMapping.put(EntityType.software, MainEntityType.result);
- entityMapping.put(EntityType.datasource, MainEntityType.datasource);
- entityMapping.put(EntityType.organization, MainEntityType.organization);
- entityMapping.put(EntityType.project, MainEntityType.project);
- }
+ static {
+ entityMapping.put(EntityType.publication, MainEntityType.result);
+ entityMapping.put(EntityType.dataset, MainEntityType.result);
+ entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
+ entityMapping.put(EntityType.software, MainEntityType.result);
+ entityMapping.put(EntityType.datasource, MainEntityType.datasource);
+ entityMapping.put(EntityType.organization, MainEntityType.organization);
+ entityMapping.put(EntityType.project, MainEntityType.project);
+ }
- /**
- * Defines the mapping between the actual entity types and the relative classes implementing them
- */
- public static final Map entityTypes = Maps.newHashMap();
+ /**
+ * Defines the mapping between the actual entity types and the relative classes implementing them
+ */
+ public static final Map entityTypes = Maps.newHashMap();
- static {
- entityTypes.put(EntityType.datasource, Datasource.class);
- entityTypes.put(EntityType.organization, Organization.class);
- entityTypes.put(EntityType.project, Project.class);
- entityTypes.put(EntityType.dataset, Dataset.class);
- entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
- entityTypes.put(EntityType.software, Software.class);
- entityTypes.put(EntityType.publication, Publication.class);
- }
+ static {
+ entityTypes.put(EntityType.datasource, Datasource.class);
+ entityTypes.put(EntityType.organization, Organization.class);
+ entityTypes.put(EntityType.project, Project.class);
+ entityTypes.put(EntityType.dataset, Dataset.class);
+ entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
+ entityTypes.put(EntityType.software, Software.class);
+ entityTypes.put(EntityType.publication, Publication.class);
+ }
- public static final Map oafTypes = Maps.newHashMap();
+ public static final Map oafTypes = Maps.newHashMap();
- static {
- oafTypes.put("datasource", Datasource.class);
- oafTypes.put("organization", Organization.class);
- oafTypes.put("project", Project.class);
- oafTypes.put("dataset", Dataset.class);
- oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
- oafTypes.put("software", Software.class);
- oafTypes.put("publication", Publication.class);
- oafTypes.put("relation", Relation.class);
- }
+ static {
+ oafTypes.put("datasource", Datasource.class);
+ oafTypes.put("organization", Organization.class);
+ oafTypes.put("project", Project.class);
+ oafTypes.put("dataset", Dataset.class);
+ oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
+ oafTypes.put("software", Software.class);
+ oafTypes.put("publication", Publication.class);
+ oafTypes.put("relation", Relation.class);
+ }
- private static final String schemeTemplate = "dnet:%s_%s_relations";
+ private static final String schemeTemplate = "dnet:%s_%s_relations";
- private ModelSupport() {}
+ private ModelSupport() {
+ }
- /**
- * Checks subclass-superclass relationship.
- *
- * @param subClazzObject Subclass object instance
- * @param superClazzObject Superclass object instance
- * @param Subclass type
- * @param Superclass type
- * @return True if X is a subclass of Y
- */
- public static Boolean isSubClass(
- X subClazzObject, Y superClazzObject) {
- return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
- }
+ /**
+ * Checks subclass-superclass relationship.
+ *
+ * @param subClazzObject Subclass object instance
+ * @param superClazzObject Superclass object instance
+ * @param Subclass type
+ * @param Superclass type
+ * @return True if X is a subclass of Y
+ */
+ public static Boolean isSubClass(
+ X subClazzObject, Y superClazzObject) {
+ return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
+ }
- /**
- * Checks subclass-superclass relationship.
- *
- * @param subClazzObject Subclass object instance
- * @param superClazz Superclass class
- * @param Subclass type
- * @param Superclass type
- * @return True if X is a subclass of Y
- */
- public static Boolean isSubClass(
- X subClazzObject, Class superClazz) {
- return isSubClass(subClazzObject.getClass(), superClazz);
- }
+ /**
+ * Checks subclass-superclass relationship.
+ *
+ * @param subClazzObject Subclass object instance
+ * @param superClazz Superclass class
+ * @param Subclass type
+ * @param Superclass type
+ * @return True if X is a subclass of Y
+ */
+ public static Boolean isSubClass(
+ X subClazzObject, Class superClazz) {
+ return isSubClass(subClazzObject.getClass(), superClazz);
+ }
- /**
- * Checks subclass-superclass relationship.
- *
- * @param subClazz Subclass class
- * @param superClazz Superclass class
- * @param Subclass type
- * @param Superclass type
- * @return True if X is a subclass of Y
- */
- public static Boolean isSubClass(
- Class subClazz, Class superClazz) {
- return superClazz.isAssignableFrom(subClazz);
- }
+ /**
+ * Checks subclass-superclass relationship.
+ *
+ * @param subClazz Subclass class
+ * @param superClazz Superclass class
+ * @param Subclass type
+ * @param Superclass type
+ * @return True if X is a subclass of Y
+ */
+ public static Boolean isSubClass(
+ Class subClazz, Class superClazz) {
+ return superClazz.isAssignableFrom(subClazz);
+ }
- /**
- * Lists all the OAF model classes
- *
- * @param
- * @return
- */
- public static Class[] getOafModelClasses() {
- return new Class[] {
- Author.class,
- Context.class,
- Country.class,
- DataInfo.class,
- Dataset.class,
- Datasource.class,
- ExternalReference.class,
- ExtraInfo.class,
- Field.class,
- GeoLocation.class,
- Instance.class,
- Journal.class,
- KeyValue.class,
- Oaf.class,
- OafEntity.class,
- OAIProvenance.class,
- Organization.class,
- OriginDescription.class,
- OtherResearchProduct.class,
- Project.class,
- Publication.class,
- Qualifier.class,
- Relation.class,
- Result.class,
- Software.class,
- StructuredProperty.class
- };
- }
+ /**
+ * Lists all the OAF model classes
+ *
+ * @param
+ * @return
+ */
+ public static Class[] getOafModelClasses() {
+ return new Class[] {
+ Author.class,
+ Context.class,
+ Country.class,
+ DataInfo.class,
+ Dataset.class,
+ Datasource.class,
+ ExternalReference.class,
+ ExtraInfo.class,
+ Field.class,
+ GeoLocation.class,
+ Instance.class,
+ Journal.class,
+ KeyValue.class,
+ Oaf.class,
+ OafEntity.class,
+ OAIProvenance.class,
+ Organization.class,
+ OriginDescription.class,
+ OtherResearchProduct.class,
+ Project.class,
+ Publication.class,
+ Qualifier.class,
+ Relation.class,
+ Result.class,
+ Software.class,
+ StructuredProperty.class
+ };
+ }
- public static String getMainType(final EntityType type) {
- return entityMapping.get(type).name();
- }
+ public static String getMainType(final EntityType type) {
+ return entityMapping.get(type).name();
+ }
- public static boolean isResult(EntityType type) {
- return MainEntityType.result.name().equals(getMainType(type));
- }
+ public static boolean isResult(EntityType type) {
+ return MainEntityType.result.name().equals(getMainType(type));
+ }
- public static String getScheme(final String sourceType, final String targetType) {
- return String.format(
- schemeTemplate,
- entityMapping.get(EntityType.valueOf(sourceType)).name(),
- entityMapping.get(EntityType.valueOf(targetType)).name());
- }
+ public static String getScheme(final String sourceType, final String targetType) {
+ return String
+ .format(
+ schemeTemplate,
+ entityMapping.get(EntityType.valueOf(sourceType)).name(),
+ entityMapping.get(EntityType.valueOf(targetType)).name());
+ }
- public static Function idFn() {
- return x -> {
- if (isSubClass(x, Relation.class)) {
- return idFnForRelation(x);
- }
- return idFnForOafEntity(x);
- };
- }
+ public static Function idFn() {
+ return x -> {
+ if (isSubClass(x, Relation.class)) {
+ return idFnForRelation(x);
+ }
+ return idFnForOafEntity(x);
+ };
+ }
- private static String idFnForRelation(T t) {
- Relation r = (Relation) t;
- return Optional.ofNullable(r.getSource())
- .map(
- source ->
- Optional.ofNullable(r.getTarget())
- .map(
- target ->
- Optional.ofNullable(r.getRelType())
- .map(
- relType ->
- Optional.ofNullable(r.getSubRelType())
- .map(
- subRelType ->
- Optional.ofNullable(r.getRelClass())
- .map(
- relClass ->
- String.join(
- source,
- target,
- relType,
- subRelType,
- relClass))
- .orElse(
- String.join(
- source,
- target,
- relType,
- subRelType)))
- .orElse(String.join(source, target, relType)))
- .orElse(String.join(source, target)))
- .orElse(source))
- .orElse(null);
- }
+ private static String idFnForRelation(T t) {
+ Relation r = (Relation) t;
+ return Optional
+ .ofNullable(r.getSource())
+ .map(
+ source -> Optional
+ .ofNullable(r.getTarget())
+ .map(
+ target -> Optional
+ .ofNullable(r.getRelType())
+ .map(
+ relType -> Optional
+ .ofNullable(r.getSubRelType())
+ .map(
+ subRelType -> Optional
+ .ofNullable(r.getRelClass())
+ .map(
+ relClass -> String
+ .join(
+ source,
+ target,
+ relType,
+ subRelType,
+ relClass))
+ .orElse(
+ String
+ .join(
+ source,
+ target,
+ relType,
+ subRelType)))
+ .orElse(String.join(source, target, relType)))
+ .orElse(String.join(source, target)))
+ .orElse(source))
+ .orElse(null);
+ }
+
+ private static String idFnForOafEntity(T t) {
+ return ((OafEntity) t).getId();
+ }
- private static String idFnForOafEntity(T t) {
- return ((OafEntity) t).getId();
- }
}
diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java
index 0914381954..0f0d21e11f 100644
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java
@@ -32,7 +32,7 @@ public class ISClient implements Serializable {
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
- private ISLookUpService isLookup;
+ private final ISLookUpService isLookup;
public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
index ffde658bd4..56c8dd05a1 100644
--- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
+++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java
@@ -123,10 +123,10 @@ public class PromoteActionPayloadFunctions {
* @param Type of graph table row
*/
public static class TableAggregator extends Aggregator {
- private SerializableSupplier zeroFn;
- private SerializableSupplier> mergeAndGetFn;
- private SerializableSupplier> isNotZeroFn;
- private Class rowClazz;
+ private final SerializableSupplier zeroFn;
+ private final SerializableSupplier> mergeAndGetFn;
+ private final SerializableSupplier> isNotZeroFn;
+ private final Class rowClazz;
public TableAggregator(
SerializableSupplier zeroFn,
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
index cda07d1515..da30e87937 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
@@ -20,7 +20,7 @@ public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
- private static CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
+ private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
private static ArgumentApplicationParser argumentParser;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
index 6ee8a8b496..dcaf0ea562 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
@@ -9,7 +9,7 @@ public class CollectorPluginErrorLogList extends LinkedList {
@Override
public String toString() {
- String log = new String();
+ String log = "";
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
index 32eeeab4b8..44aeb4d029 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
@@ -11,22 +11,22 @@ import java.util.regex.Pattern;
public class XmlCleaner {
/** Pattern for numeric entities. */
- private static Pattern validCharacterEntityPattern = Pattern.compile("^?\\d{2,4};"); // $NON-NLS-1$
+ private static final Pattern validCharacterEntityPattern = Pattern.compile("^?\\d{2,4};"); // $NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^?\\d{2,4};");
// //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to
- private static Pattern invalidControlCharPattern = Pattern.compile("?1[0-9a-fA-F];");
+ private static final Pattern invalidControlCharPattern = Pattern.compile("?1[0-9a-fA-F];");
/**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] |
* [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
- private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
+ private static final Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent
- private static Set goodEntities = new HashSet<>();
- private static Map badEntities = new HashMap<>();
+ private static final Set goodEntities = new HashSet<>();
+ private static final Map badEntities = new HashMap<>();
static {
// pre-defined XML entities
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java
index 1a4fafb66c..87bd3be3d8 100644
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java
@@ -21,8 +21,8 @@ import eu.dnetlib.message.MessageManager;
public class DnetCollectorWorkerApplicationTests {
- private ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
- private MessageManager messageManager = mock(MessageManager.class);
+ private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
+ private final MessageManager messageManager = mock(MessageManager.class);
private DnetCollectorWorker worker;
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java
index 4f797f7f77..d3ae8ee4f9 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java
@@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.dedup;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.text.Normalizer;
import java.util.*;
@@ -73,7 +74,7 @@ public class DedupUtility {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
- md.update(s.getBytes("UTF-8"));
+ md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");
diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java
index 7100c90372..005e65ddf0 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java
@@ -15,7 +15,7 @@ public class SparkReporter implements Serializable, Reporter {
private final List> relations = new ArrayList<>();
- private Map accumulators;
+ private final Map accumulators;
public SparkReporter(Map accumulators) {
this.accumulators = accumulators;
diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java
index 70a2e35917..364b49c16e 100644
--- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java
+++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java
@@ -106,7 +106,7 @@ public class DedupUtility {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
- md.update(s.getBytes("UTF-8"));
+ md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
index e20d1eb796..82f5cbfd0d 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@@ -410,14 +410,10 @@ public abstract class AbstractMdRecordToOafMapper {
final String identifier = n.valueOf("./*[local-name()='identifier']");
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
- ;
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
- ;
final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true");
final String datestamp = n.valueOf("./*[local-name()='datestamp']");
- ;
final String harvestDate = n.valueOf("@harvestDate");
- ;
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
index aa63f9ebc0..997cb8f03f 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@@ -51,497 +51,458 @@ import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
- implements Closeable {
-
- private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
-
- private final DbClient dbClient;
-
- private final long lastUpdateTimestamp;
-
- public static void main(final String[] args) throws Exception {
- final ArgumentApplicationParser parser =
- new ArgumentApplicationParser(
- IOUtils.toString(
- MigrateDbEntitiesApplication.class.getResourceAsStream(
- "/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
-
- parser.parseArgument(args);
-
- final String dbUrl = parser.get("postgresUrl");
- final String dbUser = parser.get("postgresUser");
- final String dbPassword = parser.get("postgresPassword");
-
- final String hdfsPath = parser.get("hdfsPath");
-
- final boolean processClaims =
- parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
-
- try (final MigrateDbEntitiesApplication smdbe =
- new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) {
- if (processClaims) {
- log.info("Processing claims...");
- smdbe.execute("queryClaims.sql", smdbe::processClaims);
- } else {
- log.info("Processing datasources...");
- smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
-
- log.info("Processing projects...");
- smdbe.execute("queryProjects.sql", smdbe::processProject);
-
- log.info("Processing orgs...");
- smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
-
- log.info("Processing relations ds <-> orgs ...");
- smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
-
- log.info("Processing projects <-> orgs ...");
- smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
- }
- log.info("All done.");
- }
- }
-
- protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
- super();
- this.dbClient = null;
- this.lastUpdateTimestamp = new Date().getTime();
- }
-
- public MigrateDbEntitiesApplication(
- final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
- throws Exception {
- super(hdfsPath);
- this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
- this.lastUpdateTimestamp = new Date().getTime();
- }
-
- public void execute(final String sqlFile, final Function> producer)
- throws Exception {
- final String sql =
- IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
-
- final Consumer consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
-
- dbClient.processResults(sql, consumer);
- }
-
- public List processDatasource(final ResultSet rs) {
-
- try {
-
- final DataInfo info = prepareDataInfo(rs);
-
- final Datasource ds = new Datasource();
-
- ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
- ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
- ds.setCollectedfrom(
- listKeyValues(
- createOpenaireId(10, rs.getString("collectedfromid"), true),
- rs.getString("collectedfromname")));
- ds.setPid(new ArrayList<>());
- ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
- ds.setDateoftransformation(null); // Value not returned by the SQL query
- ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
- ds.setOaiprovenance(null); // Values not present in the DB
- ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
- ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
- ds.setOfficialname(field(rs.getString("officialname"), info));
- ds.setEnglishname(field(rs.getString("englishname"), info));
- ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
- ds.setLogourl(field(rs.getString("logourl"), info));
- ds.setContactemail(field(rs.getString("contactemail"), info));
- ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
- ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
- ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
- ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
- ds.setDescription(field(rs.getString("description"), info));
- ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
- ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
- ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
- ds.setOdpolicies(field(rs.getString("odpolicies"), info));
- ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
- ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
- ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
- ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
- ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
- ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
- ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
- ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
- ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
- ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
- ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
- ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
- ds.setVersioning(field(rs.getBoolean("versioning"), info));
- ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
- ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
- ds.setPidsystems(field(rs.getString("pidsystems"), info));
- ds.setCertificates(field(rs.getString("certificates"), info));
- ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
- ds.setJournal(
- prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
- ds.setDataInfo(info);
- ds.setLastupdatetimestamp(lastUpdateTimestamp);
-
- return Arrays.asList(ds);
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- public List processProject(final ResultSet rs) {
- try {
-
- final DataInfo info = prepareDataInfo(rs);
-
- final Project p = new Project();
-
- p.setId(createOpenaireId(40, rs.getString("projectid"), true));
- p.setOriginalId(Arrays.asList(rs.getString("projectid")));
- p.setCollectedfrom(
- listKeyValues(
- createOpenaireId(10, rs.getString("collectedfromid"), true),
- rs.getString("collectedfromname")));
- p.setPid(new ArrayList<>());
- p.setDateofcollection(asString(rs.getDate("dateofcollection")));
- p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
- p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
- p.setOaiprovenance(null); // Values not present in the DB
- p.setWebsiteurl(field(rs.getString("websiteurl"), info));
- p.setCode(field(rs.getString("code"), info));
- p.setAcronym(field(rs.getString("acronym"), info));
- p.setTitle(field(rs.getString("title"), info));
- p.setStartdate(field(asString(rs.getDate("startdate")), info));
- p.setEnddate(field(asString(rs.getDate("enddate")), info));
- p.setCallidentifier(field(rs.getString("callidentifier"), info));
- p.setKeywords(field(rs.getString("keywords"), info));
- p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
- p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
- p.setOamandatepublications(
- field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
- p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
- p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
- p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
- p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
- p.setOptional1(field(rs.getString("optional1"), info));
- p.setOptional2(field(rs.getString("optional2"), info));
- p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
- p.setContactfullname(field(rs.getString("contactfullname"), info));
- p.setContactfax(field(rs.getString("contactfax"), info));
- p.setContactphone(field(rs.getString("contactphone"), info));
- p.setContactemail(field(rs.getString("contactemail"), info));
- p.setSummary(field(rs.getString("summary"), info));
- p.setCurrency(field(rs.getString("currency"), info));
- p.setTotalcost(new Float(rs.getDouble("totalcost")));
- p.setFundedamount(new Float(rs.getDouble("fundedamount")));
- p.setDataInfo(info);
- p.setLastupdatetimestamp(lastUpdateTimestamp);
-
- return Arrays.asList(p);
-
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- public List processOrganization(final ResultSet rs) {
-
- try {
-
- final DataInfo info = prepareDataInfo(rs);
-
- final Organization o = new Organization();
-
- o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
- o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
- o.setCollectedfrom(
- listKeyValues(
- createOpenaireId(10, rs.getString("collectedfromid"), true),
- rs.getString("collectedfromname")));
- o.setPid(new ArrayList<>());
- o.setDateofcollection(asString(rs.getDate("dateofcollection")));
- o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
- o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
- o.setOaiprovenance(null); // Values not present in the DB
- o.setLegalshortname(field(rs.getString("legalshortname"), info));
- o.setLegalname(field(rs.getString("legalname"), info));
- o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query
- o.setWebsiteurl(field(rs.getString("websiteurl"), info));
- o.setLogourl(field(rs.getString("logourl"), info));
- o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
- o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
- o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
- o.setEcresearchorganization(
- field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
- o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
- o.setEcinternationalorganizationeurinterests(
- field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
- o.setEcinternationalorganization(
- field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
- o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
- o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
- o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
- o.setCountry(prepareQualifierSplitting(rs.getString("country")));
- o.setDataInfo(info);
- o.setLastupdatetimestamp(lastUpdateTimestamp);
-
- return Arrays.asList(o);
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- public List processDatasourceOrganization(final ResultSet rs) {
- try {
- final DataInfo info = prepareDataInfo(rs);
- final String orgId = createOpenaireId(20, rs.getString("organization"), true);
- final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
- final List collectedFrom =
- listKeyValues(
- createOpenaireId(10, rs.getString("collectedfromid"), true),
- rs.getString("collectedfromname"));
-
- final Relation r1 = new Relation();
- r1.setRelType("datasourceOrganization");
- r1.setSubRelType("provision");
- r1.setRelClass("isProvidedBy");
- r1.setSource(dsId);
- r1.setTarget(orgId);
- r1.setCollectedfrom(collectedFrom);
- r1.setDataInfo(info);
- r1.setLastupdatetimestamp(lastUpdateTimestamp);
-
- final Relation r2 = new Relation();
- r2.setRelType("datasourceOrganization");
- r2.setSubRelType("provision");
- r2.setRelClass("provides");
- r2.setSource(orgId);
- r2.setTarget(dsId);
- r2.setCollectedfrom(collectedFrom);
- r2.setDataInfo(info);
- r2.setLastupdatetimestamp(lastUpdateTimestamp);
-
- return Arrays.asList(r1, r2);
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- public List processProjectOrganization(final ResultSet rs) {
- try {
- final DataInfo info = prepareDataInfo(rs);
- final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
- final String projectId = createOpenaireId(40, rs.getString("project"), true);
- final List collectedFrom =
- listKeyValues(
- createOpenaireId(10, rs.getString("collectedfromid"), true),
- rs.getString("collectedfromname"));
-
- final Relation r1 = new Relation();
- r1.setRelType("projectOrganization");
- r1.setSubRelType("participation");
- r1.setRelClass("isParticipant");
- r1.setSource(projectId);
- r1.setTarget(orgId);
- r1.setCollectedfrom(collectedFrom);
- r1.setDataInfo(info);
- r1.setLastupdatetimestamp(lastUpdateTimestamp);
-
- final Relation r2 = new Relation();
- r2.setRelType("projectOrganization");
- r2.setSubRelType("participation");
- r2.setRelClass("hasParticipant");
- r2.setSource(orgId);
- r2.setTarget(projectId);
- r2.setCollectedfrom(collectedFrom);
- r2.setDataInfo(info);
- r2.setLastupdatetimestamp(lastUpdateTimestamp);
-
- return Arrays.asList(r1, r2);
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- public List processClaims(final ResultSet rs) {
-
- final DataInfo info =
- dataInfo(
- false,
- null,
- false,
- false,
- qualifier(
- "user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"),
- "0.9");
-
- final List collectedFrom =
- listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
-
- try {
-
- if (rs.getString("source_type").equals("context")) {
- final Result r;
-
- if (rs.getString("target_type").equals("dataset")) {
- r = new Dataset();
- r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
- } else if (rs.getString("target_type").equals("software")) {
- r = new Software();
- r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
- } else if (rs.getString("target_type").equals("other")) {
- r = new OtherResearchProduct();
- r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
- } else {
- r = new Publication();
- r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
- }
- r.setId(createOpenaireId(50, rs.getString("target_id"), false));
- r.setLastupdatetimestamp(lastUpdateTimestamp);
- r.setContext(prepareContext(rs.getString("source_id"), info));
- r.setDataInfo(info);
- r.setCollectedfrom(collectedFrom);
-
- return Arrays.asList(r);
- } else {
- final String sourceId =
- createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
- final String targetId =
- createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
-
- final Relation r1 = new Relation();
- final Relation r2 = new Relation();
-
- if (rs.getString("source_type").equals("project")) {
- r1.setCollectedfrom(collectedFrom);
- r1.setRelType("resultProject");
- r1.setSubRelType("outcome");
- r1.setRelClass("produces");
-
- r2.setCollectedfrom(collectedFrom);
- r2.setRelType("resultProject");
- r2.setSubRelType("outcome");
- r2.setRelClass("isProducedBy");
- } else {
- r1.setCollectedfrom(collectedFrom);
- r1.setRelType("resultResult");
- r1.setSubRelType("relationship");
- r1.setRelClass("isRelatedTo");
-
- r2.setCollectedfrom(collectedFrom);
- r2.setRelType("resultResult");
- r2.setSubRelType("relationship");
- r2.setRelClass("isRelatedTo");
- }
-
- r1.setSource(sourceId);
- r1.setTarget(targetId);
- r1.setDataInfo(info);
- r1.setLastupdatetimestamp(lastUpdateTimestamp);
-
- r2.setSource(targetId);
- r2.setTarget(sourceId);
- r2.setDataInfo(info);
- r2.setLastupdatetimestamp(lastUpdateTimestamp);
-
- return Arrays.asList(r1, r2);
- }
-
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- private List prepareContext(final String id, final DataInfo dataInfo) {
- final Context context = new Context();
- context.setId(id);
- context.setDataInfo(Arrays.asList(dataInfo));
- return Arrays.asList(context);
- }
-
- private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
- final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
- final String inferenceprovenance = rs.getString("inferenceprovenance");
- final Boolean inferred = rs.getBoolean("inferred");
- final String trust = rs.getString("trust");
- return dataInfo(
- deletedbyinference,
- inferenceprovenance,
- inferred,
- false,
- MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION,
- trust);
- }
-
- private Qualifier prepareQualifierSplitting(final String s) {
- if (StringUtils.isBlank(s)) {
- return null;
- }
- final String[] arr = s.split("@@@");
- return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
- }
-
- private List> prepareListFields(final Array array, final DataInfo info) {
- try {
- return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
- } catch (final SQLException e) {
- throw new RuntimeException("Invalid SQL array", e);
- }
- }
-
- private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
- if (StringUtils.isBlank(s)) {
- return null;
- }
- final String[] parts = s.split("###");
- if (parts.length == 2) {
- final String value = parts[0];
- final String[] arr = parts[1].split("@@@");
- if (arr.length == 4) {
- return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo);
- }
- }
- return null;
- }
-
- private List prepareListOfStructProps(
- final Array array, final DataInfo dataInfo) throws SQLException {
- final List res = new ArrayList<>();
- if (array != null) {
- for (final String s : (String[]) array.getArray()) {
- final StructuredProperty sp = prepareStructProp(s, dataInfo);
- if (sp != null) {
- res.add(sp);
- }
- }
- }
-
- return res;
- }
-
- private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
- if (StringUtils.isNotBlank(sj)) {
- final String[] arr = sj.split("@@@");
- if (arr.length == 3) {
- final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null;
- final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;
- ;
- final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;
- ;
- if (issn != null || eissn != null || lissn != null) {
- return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
- }
- }
- }
- return null;
- }
-
- @Override
- public void close() throws IOException {
- super.close();
- dbClient.close();
- }
-
+ implements Closeable {
+
+ private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
+
+ private final DbClient dbClient;
+
+ private final long lastUpdateTimestamp;
+
+ public static void main(final String[] args) throws Exception {
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(MigrateDbEntitiesApplication.class
+ .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
+
+ parser.parseArgument(args);
+
+ final String dbUrl = parser.get("postgresUrl");
+ final String dbUser = parser.get("postgresUser");
+ final String dbPassword = parser.get("postgresPassword");
+
+ final String hdfsPath = parser.get("hdfsPath");
+
+ final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
+
+ try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
+ dbPassword)) {
+ if (processClaims) {
+ log.info("Processing claims...");
+ smdbe.execute("queryClaims.sql", smdbe::processClaims);
+ } else {
+ log.info("Processing datasources...");
+ smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
+
+ log.info("Processing projects...");
+ smdbe.execute("queryProjects.sql", smdbe::processProject);
+
+ log.info("Processing orgs...");
+ smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
+
+ log.info("Processing relations ds <-> orgs ...");
+ smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
+
+ log.info("Processing projects <-> orgs ...");
+ smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
+ }
+ log.info("All done.");
+ }
+ }
+
+ protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
+ super();
+ this.dbClient = null;
+ this.lastUpdateTimestamp = new Date().getTime();
+ }
+
+ public MigrateDbEntitiesApplication(
+ final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
+ throws Exception {
+ super(hdfsPath);
+ this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
+ this.lastUpdateTimestamp = new Date().getTime();
+ }
+
+ public void execute(final String sqlFile, final Function> producer)
+ throws Exception {
+ final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
+
+ final Consumer consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
+
+ dbClient.processResults(sql, consumer);
+ }
+
+ public List processDatasource(final ResultSet rs) {
+
+ try {
+
+ final DataInfo info = prepareDataInfo(rs);
+
+ final Datasource ds = new Datasource();
+
+ ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
+ ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
+ ds
+ .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
+ ds.setPid(new ArrayList<>());
+ ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
+ ds.setDateoftransformation(null); // Value not returned by the SQL query
+ ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
+ ds.setOaiprovenance(null); // Values not present in the DB
+ ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
+ ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
+ ds.setOfficialname(field(rs.getString("officialname"), info));
+ ds.setEnglishname(field(rs.getString("englishname"), info));
+ ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
+ ds.setLogourl(field(rs.getString("logourl"), info));
+ ds.setContactemail(field(rs.getString("contactemail"), info));
+ ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
+ ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
+ ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
+ ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
+ ds.setDescription(field(rs.getString("description"), info));
+ ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
+ ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
+ ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
+ ds.setOdpolicies(field(rs.getString("odpolicies"), info));
+ ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
+ ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
+ ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
+ ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
+ ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
+ ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
+ ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
+ ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
+ ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
+ ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
+ ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
+ ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
+ ds.setVersioning(field(rs.getBoolean("versioning"), info));
+ ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
+ ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
+ ds.setPidsystems(field(rs.getString("pidsystems"), info));
+ ds.setCertificates(field(rs.getString("certificates"), info));
+ ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
+ ds
+ .setJournal(prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
+ ds.setDataInfo(info);
+ ds.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ return Arrays.asList(ds);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public List processProject(final ResultSet rs) {
+ try {
+
+ final DataInfo info = prepareDataInfo(rs);
+
+ final Project p = new Project();
+
+ p.setId(createOpenaireId(40, rs.getString("projectid"), true));
+ p.setOriginalId(Arrays.asList(rs.getString("projectid")));
+ p
+ .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
+ p.setPid(new ArrayList<>());
+ p.setDateofcollection(asString(rs.getDate("dateofcollection")));
+ p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
+ p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
+ p.setOaiprovenance(null); // Values not present in the DB
+ p.setWebsiteurl(field(rs.getString("websiteurl"), info));
+ p.setCode(field(rs.getString("code"), info));
+ p.setAcronym(field(rs.getString("acronym"), info));
+ p.setTitle(field(rs.getString("title"), info));
+ p.setStartdate(field(asString(rs.getDate("startdate")), info));
+ p.setEnddate(field(asString(rs.getDate("enddate")), info));
+ p.setCallidentifier(field(rs.getString("callidentifier"), info));
+ p.setKeywords(field(rs.getString("keywords"), info));
+ p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
+ p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
+ p
+ .setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
+ p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
+ p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
+ p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
+ p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
+ p.setOptional1(field(rs.getString("optional1"), info));
+ p.setOptional2(field(rs.getString("optional2"), info));
+ p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
+ p.setContactfullname(field(rs.getString("contactfullname"), info));
+ p.setContactfax(field(rs.getString("contactfax"), info));
+ p.setContactphone(field(rs.getString("contactphone"), info));
+ p.setContactemail(field(rs.getString("contactemail"), info));
+ p.setSummary(field(rs.getString("summary"), info));
+ p.setCurrency(field(rs.getString("currency"), info));
+ p.setTotalcost(new Float(rs.getDouble("totalcost")));
+ p.setFundedamount(new Float(rs.getDouble("fundedamount")));
+ p.setDataInfo(info);
+ p.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ return Arrays.asList(p);
+
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public List processOrganization(final ResultSet rs) {
+
+ try {
+
+ final DataInfo info = prepareDataInfo(rs);
+
+ final Organization o = new Organization();
+
+ o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
+ o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
+ o
+ .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
+ o.setPid(new ArrayList<>());
+ o.setDateofcollection(asString(rs.getDate("dateofcollection")));
+ o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
+ o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
+ o.setOaiprovenance(null); // Values not present in the DB
+ o.setLegalshortname(field(rs.getString("legalshortname"), info));
+ o.setLegalname(field(rs.getString("legalname"), info));
+ o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query
+ o.setWebsiteurl(field(rs.getString("websiteurl"), info));
+ o.setLogourl(field(rs.getString("logourl"), info));
+ o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
+ o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
+ o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
+ o
+ .setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
+ o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
+ o
+ .setEcinternationalorganizationeurinterests(field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
+ o
+ .setEcinternationalorganization(field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
+ o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
+ o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
+ o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
+ o.setCountry(prepareQualifierSplitting(rs.getString("country")));
+ o.setDataInfo(info);
+ o.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ return Arrays.asList(o);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public List processDatasourceOrganization(final ResultSet rs) {
+ try {
+ final DataInfo info = prepareDataInfo(rs);
+ final String orgId = createOpenaireId(20, rs.getString("organization"), true);
+ final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
+ final List collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
+
+ final Relation r1 = new Relation();
+ r1.setRelType("datasourceOrganization");
+ r1.setSubRelType("provision");
+ r1.setRelClass("isProvidedBy");
+ r1.setSource(dsId);
+ r1.setTarget(orgId);
+ r1.setCollectedfrom(collectedFrom);
+ r1.setDataInfo(info);
+ r1.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ final Relation r2 = new Relation();
+ r2.setRelType("datasourceOrganization");
+ r2.setSubRelType("provision");
+ r2.setRelClass("provides");
+ r2.setSource(orgId);
+ r2.setTarget(dsId);
+ r2.setCollectedfrom(collectedFrom);
+ r2.setDataInfo(info);
+ r2.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ return Arrays.asList(r1, r2);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public List processProjectOrganization(final ResultSet rs) {
+ try {
+ final DataInfo info = prepareDataInfo(rs);
+ final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
+ final String projectId = createOpenaireId(40, rs.getString("project"), true);
+ final List collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
+
+ final Relation r1 = new Relation();
+ r1.setRelType("projectOrganization");
+ r1.setSubRelType("participation");
+ r1.setRelClass("hasParticipant");
+ r1.setSource(projectId);
+ r1.setTarget(orgId);
+ r1.setCollectedfrom(collectedFrom);
+ r1.setDataInfo(info);
+ r1.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ final Relation r2 = new Relation();
+ r2.setRelType("projectOrganization");
+ r2.setSubRelType("participation");
+ r2.setRelClass("isParticipant");
+ r2.setSource(orgId);
+ r2.setTarget(projectId);
+ r2.setCollectedfrom(collectedFrom);
+ r2.setDataInfo(info);
+ r2.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ return Arrays.asList(r1, r2);
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public List processClaims(final ResultSet rs) {
+
+ final DataInfo info =
+ dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9");
+
+ final List collectedFrom = listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
+
+ try {
+
+ if (rs.getString("source_type").equals("context")) {
+ final Result r;
+
+ if (rs.getString("target_type").equals("dataset")) {
+ r = new Dataset();
+ r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
+ } else if (rs.getString("target_type").equals("software")) {
+ r = new Software();
+ r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
+ } else if (rs.getString("target_type").equals("other")) {
+ r = new OtherResearchProduct();
+ r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
+ } else {
+ r = new Publication();
+ r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
+ }
+ r.setId(createOpenaireId(50, rs.getString("target_id"), false));
+ r.setLastupdatetimestamp(lastUpdateTimestamp);
+ r.setContext(prepareContext(rs.getString("source_id"), info));
+ r.setDataInfo(info);
+ r.setCollectedfrom(collectedFrom);
+
+ return Arrays.asList(r);
+ } else {
+ final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
+ final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
+
+ final Relation r1 = new Relation();
+ final Relation r2 = new Relation();
+
+ if (rs.getString("source_type").equals("project")) {
+ r1.setCollectedfrom(collectedFrom);
+ r1.setRelType("resultProject");
+ r1.setSubRelType("outcome");
+ r1.setRelClass("produces");
+
+ r2.setCollectedfrom(collectedFrom);
+ r2.setRelType("resultProject");
+ r2.setSubRelType("outcome");
+ r2.setRelClass("isProducedBy");
+ } else {
+ r1.setCollectedfrom(collectedFrom);
+ r1.setRelType("resultResult");
+ r1.setSubRelType("relationship");
+ r1.setRelClass("isRelatedTo");
+
+ r2.setCollectedfrom(collectedFrom);
+ r2.setRelType("resultResult");
+ r2.setSubRelType("relationship");
+ r2.setRelClass("isRelatedTo");
+ }
+
+ r1.setSource(sourceId);
+ r1.setTarget(targetId);
+ r1.setDataInfo(info);
+ r1.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ r2.setSource(targetId);
+ r2.setTarget(sourceId);
+ r2.setDataInfo(info);
+ r2.setLastupdatetimestamp(lastUpdateTimestamp);
+
+ return Arrays.asList(r1, r2);
+ }
+
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private List prepareContext(final String id, final DataInfo dataInfo) {
+ final Context context = new Context();
+ context.setId(id);
+ context.setDataInfo(Arrays.asList(dataInfo));
+ return Arrays.asList(context);
+ }
+
+ private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
+ final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
+ final String inferenceprovenance = rs.getString("inferenceprovenance");
+ final Boolean inferred = rs.getBoolean("inferred");
+ final String trust = rs.getString("trust");
+ return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, trust);
+ }
+
+ private Qualifier prepareQualifierSplitting(final String s) {
+ if (StringUtils.isBlank(s)) { return null; }
+ final String[] arr = s.split("@@@");
+ return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
+ }
+
+ private List> prepareListFields(final Array array, final DataInfo info) {
+ try {
+ return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
+ } catch (final SQLException e) {
+ throw new RuntimeException("Invalid SQL array", e);
+ }
+ }
+
+ private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
+ if (StringUtils.isBlank(s)) { return null; }
+ final String[] parts = s.split("###");
+ if (parts.length == 2) {
+ final String value = parts[0];
+ final String[] arr = parts[1].split("@@@");
+ if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
+ }
+ return null;
+ }
+
+ private List prepareListOfStructProps(
+ final Array array,
+ final DataInfo dataInfo) throws SQLException {
+ final List res = new ArrayList<>();
+ if (array != null) {
+ for (final String s : (String[]) array.getArray()) {
+ final StructuredProperty sp = prepareStructProp(s, dataInfo);
+ if (sp != null) {
+ res.add(sp);
+ }
+ }
+ }
+
+ return res;
+ }
+
+ private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
+ if (StringUtils.isNotBlank(sj)) {
+ final String[] arr = sj.split("@@@");
+ if (arr.length == 3) {
+ final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0].trim() : null;
+ final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1].trim() : null;;
+ final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;;
+ if (issn != null || eissn != null || lissn != null) {
+ return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
+ }
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ dbClient.close();
+ }
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java
index 121df81316..94f17aad52 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java
@@ -14,7 +14,7 @@ public class DbClient implements Closeable {
private static final Log log = LogFactory.getLog(DbClient.class);
- private Connection connection;
+ private final Connection connection;
public DbClient(final String address, final String login, final String password) {
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
index 8adcd565b7..d1c615dcdd 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
@@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.graph.raw.common;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.List;
@@ -141,7 +142,7 @@ public class PacePerson {
public String hash() {
return Hashing
.murmur3_128()
- .hashString(getNormalisedFullname(), Charset.forName(UTF8))
+ .hashString(getNormalisedFullname(), StandardCharsets.UTF_8)
.toString();
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java
index c97753fdc7..0db2b26884 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java
@@ -25,7 +25,7 @@ public abstract class AbstractScholexplorerParser {
protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class);
static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE);
- private List datasetSubTypes = Arrays
+ private final List datasetSubTypes = Arrays
.asList(
"dataset",
"software",
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
index 9a0a45f78c..ff1178c711 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql
@@ -108,7 +108,7 @@ SELECT
ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
- CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
+ CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
FROM dsm_datasources d
diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java
index bc9562e08b..a172ef698c 100644
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java
@@ -18,7 +18,7 @@ import eu.dnetlib.dhp.utils.DHPUtils;
public class CrossRefParserJSON {
- private static List collectedFrom = generateCrossrefCollectedFrom("complete");
+ private static final List collectedFrom = generateCrossrefCollectedFrom("complete");
public static ScholixResource parseRecord(final String record) {
if (record == null)
diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java
index e84ec4376e..9e9f0d5c99 100644
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java
@@ -16,7 +16,7 @@ public class DataciteClient {
private String host;
private String index = "datacite";
private String indexType = "dump";
- private Datacite2Scholix d2s;
+ private final Datacite2Scholix d2s;
public DataciteClient(String host) {
this.host = host;
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java
index bac2278e6a..a09a278372 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java
@@ -12,7 +12,7 @@ import eu.dnetlib.dhp.oa.provision.model.SortableRelation;
*/
public class RelationPartitioner extends Partitioner {
- private int numPartitions;
+ private final int numPartitions;
public RelationPartitioner(int numPartitions) {
this.numPartitions = numPartitions;
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java
index de221b2ee3..3e8abbd9f1 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java
@@ -46,7 +46,7 @@ public class StreamingInputDocumentFactory {
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
- private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
+ private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final List dateFormats = Arrays
.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
@@ -61,15 +61,18 @@ public class StreamingInputDocumentFactory {
private static final int MAX_FIELD_LENGTH = 25000;
- private ThreadLocal inputFactory = ThreadLocal.withInitial(() -> XMLInputFactory.newInstance());
+ private final ThreadLocal inputFactory = ThreadLocal
+ .withInitial(() -> XMLInputFactory.newInstance());
- private ThreadLocal outputFactory = ThreadLocal.withInitial(() -> XMLOutputFactory.newInstance());
+ private final ThreadLocal outputFactory = ThreadLocal
+ .withInitial(() -> XMLOutputFactory.newInstance());
- private ThreadLocal eventFactory = ThreadLocal.withInitial(() -> XMLEventFactory.newInstance());
+ private final ThreadLocal eventFactory = ThreadLocal
+ .withInitial(() -> XMLEventFactory.newInstance());
- private String version;
+ private final String version;
- private String dsId;
+ private final String dsId;
private String resultName = DEFAULTDNETRESULT;
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java
index 3d9cf1ae78..6cb025b4fd 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java
@@ -17,7 +17,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class TemplateFactory {
- private TemplateResources resources;
+ private final TemplateResources resources;
private static final char DELIMITER = '$';
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java
index 746f8ebe68..878a582bff 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java
@@ -8,17 +8,17 @@ import com.google.common.io.Resources;
public class TemplateResources {
- private String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
+ private final String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
- private String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
+ private final String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
- private String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
+ private final String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
- private String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
+ private final String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
- private String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
+ private final String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
- private String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
+ private final String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
private static String read(final String classpathResource) throws IOException {
return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8);
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
index f667d9f3cb..2cff2124e1 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@@ -48,13 +48,13 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class XmlRecordFactory implements Serializable {
public static final String REL_SUBTYPE_DEDUP = "dedup";
- private Map accumulators;
+ private final Map accumulators;
- private Set specialDatasourceTypes;
+ private final Set specialDatasourceTypes;
- private ContextMapper contextMapper;
+ private final ContextMapper contextMapper;
- private String schemaLocation;
+ private final String schemaLocation;
private boolean indent = false;
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java
index bc3b3107d8..8195467b16 100644
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java
@@ -41,7 +41,7 @@ public class XmlSerializationUtils {
public static String mapStructuredProperty(String name, StructuredProperty t) {
return asXmlElement(
- name, t.getValue(), t.getQualifier(), t.getDataInfo() != null ? t.getDataInfo() : null);
+ name, t.getValue(), t.getQualifier(), t.getDataInfo());
}
public static String mapQualifier(String name, Qualifier q) {
diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java
index 8afe03d6d2..1336a1cf7c 100644
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java
@@ -9,7 +9,7 @@ import org.junit.jupiter.api.BeforeEach;
public class GraphJoinerTest {
- private ClassLoader cl = getClass().getClassLoader();
+ private final ClassLoader cl = getClass().getClassLoader();
private Path workingDir;
private Path inputDir;
private Path outputDir;
diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml
index 4d6318c3e1..0f5e180824 100644
--- a/dhp-workflows/dhp-stats-update/pom.xml
+++ b/dhp-workflows/dhp-stats-update/pom.xml
@@ -22,6 +22,7 @@
pl.project13.maven
git-commit-id-plugin
+ 2.1.11
false
diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml
index df90014ba7..bad72a9ef0 100644
--- a/dhp-workflows/dhp-worfklow-profiles/pom.xml
+++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml
@@ -3,13 +3,27 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- dhp
+ dhp-workflows
eu.dnetlib.dhp
1.1.7-SNAPSHOT
4.0.0
dhp-worfklow-profiles
+ jar
+
\ No newline at end of file
diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml
index faa5d65ddd..a13df1de65 100644
--- a/dhp-workflows/pom.xml
+++ b/dhp-workflows/pom.xml
@@ -13,6 +13,8 @@
dhp-workflows
pom
+ This module is the container for the oozie workflow definitions in dnet-hadoop project
+
dhp-worfklow-profiles
dhp-aggregation
@@ -184,7 +186,7 @@
org.kuali.maven.plugins
properties-maven-plugin
- 1.3.2
+ ${properties.maven.plugin.version}
eu.dnetlib.dhp
@@ -564,60 +566,4 @@
-
-
-
-
-
- org.eclipse.m2e
- lifecycle-mapping
- 1.0.0
-
-
-
-
-
-
- org.kuali.maven.plugins
-
-
- properties-maven-plugin
-
-
- [1.3.2,)
-
-
-
- read-project-properties
-
-
- write-project-properties
-
-
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
- [1.0.0,)
-
- copy-dependencies
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/pom.xml b/pom.xml
index bb48c04a1e..4838732190 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,14 +6,14 @@
1.1.7-SNAPSHOT
pom
- http://www.d-net.research-infrastructures.eu
-
- The Apache Software License, Version 2.0
- http://www.apache.org/licenses/LICENSE-2.0.txt
+ GNU Affero General Public License v3.0 or later
+ https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText
repo
- A business-friendly OSS license
+ This program is free software: you can redistribute it and/or modify it under the terms of the
+ GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
@@ -41,6 +41,8 @@
HEAD
+ This module is the root descriptor for the dnet-hadoop project
+
@@ -406,6 +408,18 @@
target/test-classes
+
+
+ org.apache.maven.plugins
+ maven-project-info-reports-plugin
+ 3.0.0
+
+
+ org.apache.maven.plugins
+ maven-site-plugin
+ 3.7.1
+
+
org.apache.maven.plugins
maven-compiler-plugin
@@ -449,9 +463,10 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 2.10.4
+ 3.2.0
true
+ none
@@ -475,6 +490,14 @@
+
+ org.apache.maven.plugins
+ maven-site-plugin
+
+
+ org.apache.maven.plugins
+ maven-project-info-reports-plugin
+
net.revelc.code.formatter
formatter-maven-plugin
@@ -569,9 +592,9 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 2.10.4
true
+ none
@@ -582,6 +605,7 @@
UTF-8
3.6.0
2.22.2
+ 2.0.1
cdh5.9.2
2.6.0-${dhp.cdh.version}
4.1.0-${dhp.cdh.version}