diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml index c837cd538..9b03536dd 100644 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -12,6 +12,8 @@ dhp-build-assembly-resources jar + This module contains a set of scripts supporting the build lifecycle for the dnet-hadoop project + diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml index df5045fcb..4d40edd99 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -12,22 +12,29 @@ dhp-build-properties-maven-plugin maven-plugin + This module is a maven plugin implementing custom properties substitutions in the build lifecycle org.apache.maven maven-plugin-api - 2.0 + 3.6.3 org.apache.maven maven-project - 2.0 + 2.2.1 + + org.apache.maven + maven-artifact + 2.2.1 + + org.kuali.maven.plugins properties-maven-plugin - 1.3.2 + ${properties.maven.plugin.version} com.google.code.findbugs @@ -73,44 +80,10 @@ maven-javadoc-plugin true + none - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - - org.apache.maven.plugins - - - maven-plugin-plugin - - - [3.2,) - - - descriptor - - - - - - - - - - - - diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java index c1c567f95..d195ca86e 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -40,7 +40,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; /** * Writes project properties for the keys listed in specified properties files. Based on: - * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html + * http://site.kuali.org/maven/plugins/properties-maven-plugin/2.0.1/write-project-properties-mojo.html * * @author mhorst * @goal write-project-properties diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java index b8075ba5d..4bfcd3b33 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; /** @author mhorst, claudio.atzori */ public class GenerateOoziePropertiesMojoTest { - private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); + private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); @BeforeEach public void clearSystemProperties() { diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index e0b2eff37..0b3ea9653 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -366,7 +366,7 @@ public class WritePredefinedProjectPropertiesTest { } private Properties getStoredProperties(File testFolder) - throws FileNotFoundException, IOException { + throws IOException { Properties properties = new Properties(); properties.load(new FileInputStream(getPropertiesFileLocation(testFolder))); return properties; diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml index 830723c9f..5e896e7a5 100644 --- a/dhp-build/dhp-code-style/pom.xml +++ b/dhp-build/dhp-code-style/pom.xml @@ -11,6 +11,38 @@ jar + This module contains resources supporting common code style conventions + + + + dnet45-snapshots + DNet45 Snapshots + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots + default + + + dnet45-releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + + + + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.0.0 + + + org.apache.maven.plugins + maven-site-plugin + 3.7.1 + + + + + UTF-8 diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index ff6a858bb..041641fcf 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -8,6 +8,9 @@ dhp-build pom + + This module is a container for the build tools used in dnet-hadoop + dhp-code-style dhp-build-assembly-resources diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 7b073397f..51af8d954 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -12,6 +12,8 @@ dhp-common jar + This module contains common utilities meant to be used across the dnet-hadoop submodules + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 18e489a21..dfbaf3a6c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -21,7 +21,7 @@ public class DHPUtils { public static String md5(final String s) { try { final MessageDigest md = MessageDigest.getInstance("MD5"); - md.update(s.getBytes("UTF-8")); + md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { System.err.println("Error creating id"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java index 4a719909a..9fb60e145 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java @@ -17,7 +17,7 @@ public class NormalizeDate extends AbstractExtensionFunction { "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" }; - private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); + private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'"; @Override public String getName() { diff --git a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java index 4c5c48c55..5ca79f3cc 100644 --- a/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java +++ b/dhp-common/src/main/java/eu/dnetlib/message/MessageManager.java @@ -21,7 +21,7 @@ public class MessageManager { private Connection connection; - private Map channels = new HashMap<>(); + private final Map channels = new HashMap<>(); private boolean durable; diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 8deb2eab2..4a123ceda 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -12,7 +12,7 @@ dhp-schemas jar - + This module contains common schema classes meant to be used across the dnet-hadoop submodules diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java index a9543d27a..7b88e9c7e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java @@ -16,7 +16,7 @@ public class AtomicActionDeserializer extends JsonDeserializer { @Override public Object deserialize(JsonParser jp, DeserializationContext ctxt) - throws IOException, JsonProcessingException { + throws IOException { JsonNode node = jp.getCodec().readTree(jp); String classTag = node.get("clazz").asText(); JsonNode payload = node.get("payload"); diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 0054e6d6f..7838cc0cf 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -9,190 +9,195 @@ import java.util.function.Function; /** Oaf model utility methods. */ public class ModelSupport { - /** Defines the mapping between the actual entity type and the main entity type */ - private static Map entityMapping = Maps.newHashMap(); + /** Defines the mapping between the actual entity type and the main entity type */ + private static final Map entityMapping = Maps.newHashMap(); - static { - entityMapping.put(EntityType.publication, MainEntityType.result); - entityMapping.put(EntityType.dataset, MainEntityType.result); - entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); - entityMapping.put(EntityType.software, MainEntityType.result); - entityMapping.put(EntityType.datasource, MainEntityType.datasource); - entityMapping.put(EntityType.organization, MainEntityType.organization); - entityMapping.put(EntityType.project, MainEntityType.project); - } + static { + entityMapping.put(EntityType.publication, MainEntityType.result); + entityMapping.put(EntityType.dataset, MainEntityType.result); + entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); + entityMapping.put(EntityType.software, MainEntityType.result); + entityMapping.put(EntityType.datasource, MainEntityType.datasource); + entityMapping.put(EntityType.organization, MainEntityType.organization); + entityMapping.put(EntityType.project, MainEntityType.project); + } - /** - * Defines the mapping between the actual entity types and the relative classes implementing them - */ - public static final Map entityTypes = Maps.newHashMap(); + /** + * Defines the mapping between the actual entity types and the relative classes implementing them + */ + public static final Map entityTypes = Maps.newHashMap(); - static { - entityTypes.put(EntityType.datasource, Datasource.class); - entityTypes.put(EntityType.organization, Organization.class); - entityTypes.put(EntityType.project, Project.class); - entityTypes.put(EntityType.dataset, Dataset.class); - entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class); - entityTypes.put(EntityType.software, Software.class); - entityTypes.put(EntityType.publication, Publication.class); - } + static { + entityTypes.put(EntityType.datasource, Datasource.class); + entityTypes.put(EntityType.organization, Organization.class); + entityTypes.put(EntityType.project, Project.class); + entityTypes.put(EntityType.dataset, Dataset.class); + entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class); + entityTypes.put(EntityType.software, Software.class); + entityTypes.put(EntityType.publication, Publication.class); + } - public static final Map oafTypes = Maps.newHashMap(); + public static final Map oafTypes = Maps.newHashMap(); - static { - oafTypes.put("datasource", Datasource.class); - oafTypes.put("organization", Organization.class); - oafTypes.put("project", Project.class); - oafTypes.put("dataset", Dataset.class); - oafTypes.put("otherresearchproduct", OtherResearchProduct.class); - oafTypes.put("software", Software.class); - oafTypes.put("publication", Publication.class); - oafTypes.put("relation", Relation.class); - } + static { + oafTypes.put("datasource", Datasource.class); + oafTypes.put("organization", Organization.class); + oafTypes.put("project", Project.class); + oafTypes.put("dataset", Dataset.class); + oafTypes.put("otherresearchproduct", OtherResearchProduct.class); + oafTypes.put("software", Software.class); + oafTypes.put("publication", Publication.class); + oafTypes.put("relation", Relation.class); + } - private static final String schemeTemplate = "dnet:%s_%s_relations"; + private static final String schemeTemplate = "dnet:%s_%s_relations"; - private ModelSupport() {} + private ModelSupport() { + } - /** - * Checks subclass-superclass relationship. - * - * @param subClazzObject Subclass object instance - * @param superClazzObject Superclass object instance - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass( - X subClazzObject, Y superClazzObject) { - return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); - } + /** + * Checks subclass-superclass relationship. + * + * @param subClazzObject Subclass object instance + * @param superClazzObject Superclass object instance + * @param Subclass type + * @param Superclass type + * @return True if X is a subclass of Y + */ + public static Boolean isSubClass( + X subClazzObject, Y superClazzObject) { + return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); + } - /** - * Checks subclass-superclass relationship. - * - * @param subClazzObject Subclass object instance - * @param superClazz Superclass class - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass( - X subClazzObject, Class superClazz) { - return isSubClass(subClazzObject.getClass(), superClazz); - } + /** + * Checks subclass-superclass relationship. + * + * @param subClazzObject Subclass object instance + * @param superClazz Superclass class + * @param Subclass type + * @param Superclass type + * @return True if X is a subclass of Y + */ + public static Boolean isSubClass( + X subClazzObject, Class superClazz) { + return isSubClass(subClazzObject.getClass(), superClazz); + } - /** - * Checks subclass-superclass relationship. - * - * @param subClazz Subclass class - * @param superClazz Superclass class - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass( - Class subClazz, Class superClazz) { - return superClazz.isAssignableFrom(subClazz); - } + /** + * Checks subclass-superclass relationship. + * + * @param subClazz Subclass class + * @param superClazz Superclass class + * @param Subclass type + * @param Superclass type + * @return True if X is a subclass of Y + */ + public static Boolean isSubClass( + Class subClazz, Class superClazz) { + return superClazz.isAssignableFrom(subClazz); + } - /** - * Lists all the OAF model classes - * - * @param - * @return - */ - public static Class[] getOafModelClasses() { - return new Class[] { - Author.class, - Context.class, - Country.class, - DataInfo.class, - Dataset.class, - Datasource.class, - ExternalReference.class, - ExtraInfo.class, - Field.class, - GeoLocation.class, - Instance.class, - Journal.class, - KeyValue.class, - Oaf.class, - OafEntity.class, - OAIProvenance.class, - Organization.class, - OriginDescription.class, - OtherResearchProduct.class, - Project.class, - Publication.class, - Qualifier.class, - Relation.class, - Result.class, - Software.class, - StructuredProperty.class - }; - } + /** + * Lists all the OAF model classes + * + * @param + * @return + */ + public static Class[] getOafModelClasses() { + return new Class[] { + Author.class, + Context.class, + Country.class, + DataInfo.class, + Dataset.class, + Datasource.class, + ExternalReference.class, + ExtraInfo.class, + Field.class, + GeoLocation.class, + Instance.class, + Journal.class, + KeyValue.class, + Oaf.class, + OafEntity.class, + OAIProvenance.class, + Organization.class, + OriginDescription.class, + OtherResearchProduct.class, + Project.class, + Publication.class, + Qualifier.class, + Relation.class, + Result.class, + Software.class, + StructuredProperty.class + }; + } - public static String getMainType(final EntityType type) { - return entityMapping.get(type).name(); - } + public static String getMainType(final EntityType type) { + return entityMapping.get(type).name(); + } - public static boolean isResult(EntityType type) { - return MainEntityType.result.name().equals(getMainType(type)); - } + public static boolean isResult(EntityType type) { + return MainEntityType.result.name().equals(getMainType(type)); + } - public static String getScheme(final String sourceType, final String targetType) { - return String.format( - schemeTemplate, - entityMapping.get(EntityType.valueOf(sourceType)).name(), - entityMapping.get(EntityType.valueOf(targetType)).name()); - } + public static String getScheme(final String sourceType, final String targetType) { + return String + .format( + schemeTemplate, + entityMapping.get(EntityType.valueOf(sourceType)).name(), + entityMapping.get(EntityType.valueOf(targetType)).name()); + } - public static Function idFn() { - return x -> { - if (isSubClass(x, Relation.class)) { - return idFnForRelation(x); - } - return idFnForOafEntity(x); - }; - } + public static Function idFn() { + return x -> { + if (isSubClass(x, Relation.class)) { + return idFnForRelation(x); + } + return idFnForOafEntity(x); + }; + } - private static String idFnForRelation(T t) { - Relation r = (Relation) t; - return Optional.ofNullable(r.getSource()) - .map( - source -> - Optional.ofNullable(r.getTarget()) - .map( - target -> - Optional.ofNullable(r.getRelType()) - .map( - relType -> - Optional.ofNullable(r.getSubRelType()) - .map( - subRelType -> - Optional.ofNullable(r.getRelClass()) - .map( - relClass -> - String.join( - source, - target, - relType, - subRelType, - relClass)) - .orElse( - String.join( - source, - target, - relType, - subRelType))) - .orElse(String.join(source, target, relType))) - .orElse(String.join(source, target))) - .orElse(source)) - .orElse(null); - } + private static String idFnForRelation(T t) { + Relation r = (Relation) t; + return Optional + .ofNullable(r.getSource()) + .map( + source -> Optional + .ofNullable(r.getTarget()) + .map( + target -> Optional + .ofNullable(r.getRelType()) + .map( + relType -> Optional + .ofNullable(r.getSubRelType()) + .map( + subRelType -> Optional + .ofNullable(r.getRelClass()) + .map( + relClass -> String + .join( + source, + target, + relType, + subRelType, + relClass)) + .orElse( + String + .join( + source, + target, + relType, + subRelType))) + .orElse(String.join(source, target, relType))) + .orElse(String.join(source, target))) + .orElse(source)) + .orElse(null); + } + + private static String idFnForOafEntity(T t) { + return ((OafEntity) t).getId(); + } - private static String idFnForOafEntity(T t) { - return ((OafEntity) t).getId(); - } } diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 091438195..0f0d21e11 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -32,7 +32,7 @@ public class ISClient implements Serializable { private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; - private ISLookUpService isLookup; + private final ISLookUpService isLookup; public ISClient(String isLookupUrl) { isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java index ffde658bd..56c8dd05a 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java @@ -123,10 +123,10 @@ public class PromoteActionPayloadFunctions { * @param Type of graph table row */ public static class TableAggregator extends Aggregator { - private SerializableSupplier zeroFn; - private SerializableSupplier> mergeAndGetFn; - private SerializableSupplier> isNotZeroFn; - private Class rowClazz; + private final SerializableSupplier zeroFn; + private final SerializableSupplier> mergeAndGetFn; + private final SerializableSupplier> isNotZeroFn; + private final Class rowClazz; public TableAggregator( SerializableSupplier zeroFn, diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java index cda07d151..da30e8793 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java @@ -20,7 +20,7 @@ public class DnetCollectorWorkerApplication { private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class); - private static CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); + private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory(); private static ArgumentApplicationParser argumentParser; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java index 6ee8a8b49..dcaf0ea56 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java @@ -9,7 +9,7 @@ public class CollectorPluginErrorLogList extends LinkedList { @Override public String toString() { - String log = new String(); + String log = ""; int index = 0; for (final String errorMessage : this) { log += String.format("Retry #%s: %s / ", index++, errorMessage); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java index 32eeeab4b..44aeb4d02 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java @@ -11,22 +11,22 @@ import java.util.regex.Pattern; public class XmlCleaner { /** Pattern for numeric entities. */ - private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$ + private static final Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$ // private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); // //$NON-NLS-1$ // see https://www.w3.org/TR/REC-xml/#charsets , not only limited to - private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];"); + private static final Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];"); /** * Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | * [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ - private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$ + private static final Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$ // Map entities to their unicode equivalent - private static Set goodEntities = new HashSet<>(); - private static Map badEntities = new HashMap<>(); + private static final Set goodEntities = new HashSet<>(); + private static final Map badEntities = new HashMap<>(); static { // pre-defined XML entities diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java index 1a4fafb66..87bd3be3d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java @@ -21,8 +21,8 @@ import eu.dnetlib.message.MessageManager; public class DnetCollectorWorkerApplicationTests { - private ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class); - private MessageManager messageManager = mock(MessageManager.class); + private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class); + private final MessageManager messageManager = mock(MessageManager.class); private DnetCollectorWorker worker; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index 4f797f7f7..d3ae8ee4f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.text.Normalizer; import java.util.*; @@ -73,7 +74,7 @@ public class DedupUtility { public static String md5(final String s) { try { final MessageDigest md = MessageDigest.getInstance("MD5"); - md.update(s.getBytes("UTF-8")); + md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { System.err.println("Error creating id"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java index 7100c9037..005e65ddf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkReporter.java @@ -15,7 +15,7 @@ public class SparkReporter implements Serializable, Reporter { private final List> relations = new ArrayList<>(); - private Map accumulators; + private final Map accumulators; public SparkReporter(Map accumulators) { this.accumulators = accumulators; diff --git a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java index 70a2e3591..364b49c16 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-scholexplorer/src/main/java/eu/dnetlib/dedup/DedupUtility.java @@ -106,7 +106,7 @@ public class DedupUtility { public static String md5(final String s) { try { final MessageDigest md = MessageDigest.getInstance("MD5"); - md.update(s.getBytes("UTF-8")); + md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { System.err.println("Error creating id"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index e20d1eb79..82f5cbfd0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -410,14 +410,10 @@ public abstract class AbstractMdRecordToOafMapper { final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); - ; final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']"); - ; final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true"); final String datestamp = n.valueOf("./*[local-name()='datestamp']"); - ; final String harvestDate = n.valueOf("@harvestDate"); - ; return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index aa63f9ebc..997cb8f03 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -51,497 +51,458 @@ import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class MigrateDbEntitiesApplication extends AbstractMigrationApplication - implements Closeable { - - private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); - - private final DbClient dbClient; - - private final long lastUpdateTimestamp; - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = - new ArgumentApplicationParser( - IOUtils.toString( - MigrateDbEntitiesApplication.class.getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))); - - parser.parseArgument(args); - - final String dbUrl = parser.get("postgresUrl"); - final String dbUser = parser.get("postgresUser"); - final String dbPassword = parser.get("postgresPassword"); - - final String hdfsPath = parser.get("hdfsPath"); - - final boolean processClaims = - parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); - - try (final MigrateDbEntitiesApplication smdbe = - new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) { - if (processClaims) { - log.info("Processing claims..."); - smdbe.execute("queryClaims.sql", smdbe::processClaims); - } else { - log.info("Processing datasources..."); - smdbe.execute("queryDatasources.sql", smdbe::processDatasource); - - log.info("Processing projects..."); - smdbe.execute("queryProjects.sql", smdbe::processProject); - - log.info("Processing orgs..."); - smdbe.execute("queryOrganizations.sql", smdbe::processOrganization); - - log.info("Processing relations ds <-> orgs ..."); - smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization); - - log.info("Processing projects <-> orgs ..."); - smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization); - } - log.info("All done."); - } - } - - protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST - super(); - this.dbClient = null; - this.lastUpdateTimestamp = new Date().getTime(); - } - - public MigrateDbEntitiesApplication( - final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { - super(hdfsPath); - this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.lastUpdateTimestamp = new Date().getTime(); - } - - public void execute(final String sqlFile, final Function> producer) - throws Exception { - final String sql = - IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile)); - - final Consumer consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf)); - - dbClient.processResults(sql, consumer); - } - - public List processDatasource(final ResultSet rs) { - - try { - - final DataInfo info = prepareDataInfo(rs); - - final Datasource ds = new Datasource(); - - ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true)); - ds.setOriginalId(Arrays.asList(rs.getString("datasourceid"))); - ds.setCollectedfrom( - listKeyValues( - createOpenaireId(10, rs.getString("collectedfromid"), true), - rs.getString("collectedfromname"))); - ds.setPid(new ArrayList<>()); - ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); - ds.setDateoftransformation(null); // Value not returned by the SQL query - ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB - ds.setOaiprovenance(null); // Values not present in the DB - ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); - ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); - ds.setOfficialname(field(rs.getString("officialname"), info)); - ds.setEnglishname(field(rs.getString("englishname"), info)); - ds.setWebsiteurl(field(rs.getString("websiteurl"), info)); - ds.setLogourl(field(rs.getString("logourl"), info)); - ds.setContactemail(field(rs.getString("contactemail"), info)); - ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info)); - ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info)); - ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info)); - ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info)); - ds.setDescription(field(rs.getString("description"), info)); - ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); - ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info)); - ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); - ds.setOdpolicies(field(rs.getString("odpolicies"), info)); - ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); - ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); - ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); - ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); - ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); - ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); - ds.setDataprovider(field(rs.getBoolean("dataprovider"), info)); - ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info)); - ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); - ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); - ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); - ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); - ds.setVersioning(field(rs.getBoolean("versioning"), info)); - ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); - ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info)); - ds.setPidsystems(field(rs.getString("pidsystems"), info)); - ds.setCertificates(field(rs.getString("certificates"), info)); - ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array - ds.setJournal( - prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal - ds.setDataInfo(info); - ds.setLastupdatetimestamp(lastUpdateTimestamp); - - return Arrays.asList(ds); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - public List processProject(final ResultSet rs) { - try { - - final DataInfo info = prepareDataInfo(rs); - - final Project p = new Project(); - - p.setId(createOpenaireId(40, rs.getString("projectid"), true)); - p.setOriginalId(Arrays.asList(rs.getString("projectid"))); - p.setCollectedfrom( - listKeyValues( - createOpenaireId(10, rs.getString("collectedfromid"), true), - rs.getString("collectedfromname"))); - p.setPid(new ArrayList<>()); - p.setDateofcollection(asString(rs.getDate("dateofcollection"))); - p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); - p.setExtraInfo(new ArrayList<>()); // Values not present in the DB - p.setOaiprovenance(null); // Values not present in the DB - p.setWebsiteurl(field(rs.getString("websiteurl"), info)); - p.setCode(field(rs.getString("code"), info)); - p.setAcronym(field(rs.getString("acronym"), info)); - p.setTitle(field(rs.getString("title"), info)); - p.setStartdate(field(asString(rs.getDate("startdate")), info)); - p.setEnddate(field(asString(rs.getDate("enddate")), info)); - p.setCallidentifier(field(rs.getString("callidentifier"), info)); - p.setKeywords(field(rs.getString("keywords"), info)); - p.setDuration(field(Integer.toString(rs.getInt("duration")), info)); - p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info)); - p.setOamandatepublications( - field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); - p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); - p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); - p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info)); - p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype"))); - p.setOptional1(field(rs.getString("optional1"), info)); - p.setOptional2(field(rs.getString("optional2"), info)); - p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info)); - p.setContactfullname(field(rs.getString("contactfullname"), info)); - p.setContactfax(field(rs.getString("contactfax"), info)); - p.setContactphone(field(rs.getString("contactphone"), info)); - p.setContactemail(field(rs.getString("contactemail"), info)); - p.setSummary(field(rs.getString("summary"), info)); - p.setCurrency(field(rs.getString("currency"), info)); - p.setTotalcost(new Float(rs.getDouble("totalcost"))); - p.setFundedamount(new Float(rs.getDouble("fundedamount"))); - p.setDataInfo(info); - p.setLastupdatetimestamp(lastUpdateTimestamp); - - return Arrays.asList(p); - - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - public List processOrganization(final ResultSet rs) { - - try { - - final DataInfo info = prepareDataInfo(rs); - - final Organization o = new Organization(); - - o.setId(createOpenaireId(20, rs.getString("organizationid"), true)); - o.setOriginalId(Arrays.asList(rs.getString("organizationid"))); - o.setCollectedfrom( - listKeyValues( - createOpenaireId(10, rs.getString("collectedfromid"), true), - rs.getString("collectedfromname"))); - o.setPid(new ArrayList<>()); - o.setDateofcollection(asString(rs.getDate("dateofcollection"))); - o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); - o.setExtraInfo(new ArrayList<>()); // Values not present in the DB - o.setOaiprovenance(null); // Values not present in the DB - o.setLegalshortname(field(rs.getString("legalshortname"), info)); - o.setLegalname(field(rs.getString("legalname"), info)); - o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query - o.setWebsiteurl(field(rs.getString("websiteurl"), info)); - o.setLogourl(field(rs.getString("logourl"), info)); - o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); - o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info)); - o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info)); - o.setEcresearchorganization( - field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info)); - o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info)); - o.setEcinternationalorganizationeurinterests( - field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info)); - o.setEcinternationalorganization( - field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info)); - o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); - o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); - o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); - o.setCountry(prepareQualifierSplitting(rs.getString("country"))); - o.setDataInfo(info); - o.setLastupdatetimestamp(lastUpdateTimestamp); - - return Arrays.asList(o); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - public List processDatasourceOrganization(final ResultSet rs) { - try { - final DataInfo info = prepareDataInfo(rs); - final String orgId = createOpenaireId(20, rs.getString("organization"), true); - final String dsId = createOpenaireId(10, rs.getString("datasource"), true); - final List collectedFrom = - listKeyValues( - createOpenaireId(10, rs.getString("collectedfromid"), true), - rs.getString("collectedfromname")); - - final Relation r1 = new Relation(); - r1.setRelType("datasourceOrganization"); - r1.setSubRelType("provision"); - r1.setRelClass("isProvidedBy"); - r1.setSource(dsId); - r1.setTarget(orgId); - r1.setCollectedfrom(collectedFrom); - r1.setDataInfo(info); - r1.setLastupdatetimestamp(lastUpdateTimestamp); - - final Relation r2 = new Relation(); - r2.setRelType("datasourceOrganization"); - r2.setSubRelType("provision"); - r2.setRelClass("provides"); - r2.setSource(orgId); - r2.setTarget(dsId); - r2.setCollectedfrom(collectedFrom); - r2.setDataInfo(info); - r2.setLastupdatetimestamp(lastUpdateTimestamp); - - return Arrays.asList(r1, r2); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - public List processProjectOrganization(final ResultSet rs) { - try { - final DataInfo info = prepareDataInfo(rs); - final String orgId = createOpenaireId(20, rs.getString("resporganization"), true); - final String projectId = createOpenaireId(40, rs.getString("project"), true); - final List collectedFrom = - listKeyValues( - createOpenaireId(10, rs.getString("collectedfromid"), true), - rs.getString("collectedfromname")); - - final Relation r1 = new Relation(); - r1.setRelType("projectOrganization"); - r1.setSubRelType("participation"); - r1.setRelClass("isParticipant"); - r1.setSource(projectId); - r1.setTarget(orgId); - r1.setCollectedfrom(collectedFrom); - r1.setDataInfo(info); - r1.setLastupdatetimestamp(lastUpdateTimestamp); - - final Relation r2 = new Relation(); - r2.setRelType("projectOrganization"); - r2.setSubRelType("participation"); - r2.setRelClass("hasParticipant"); - r2.setSource(orgId); - r2.setTarget(projectId); - r2.setCollectedfrom(collectedFrom); - r2.setDataInfo(info); - r2.setLastupdatetimestamp(lastUpdateTimestamp); - - return Arrays.asList(r1, r2); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - public List processClaims(final ResultSet rs) { - - final DataInfo info = - dataInfo( - false, - null, - false, - false, - qualifier( - "user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), - "0.9"); - - final List collectedFrom = - listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); - - try { - - if (rs.getString("source_type").equals("context")) { - final Result r; - - if (rs.getString("target_type").equals("dataset")) { - r = new Dataset(); - r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER); - } else if (rs.getString("target_type").equals("software")) { - r = new Software(); - r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER); - } else if (rs.getString("target_type").equals("other")) { - r = new OtherResearchProduct(); - r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER); - } else { - r = new Publication(); - r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER); - } - r.setId(createOpenaireId(50, rs.getString("target_id"), false)); - r.setLastupdatetimestamp(lastUpdateTimestamp); - r.setContext(prepareContext(rs.getString("source_id"), info)); - r.setDataInfo(info); - r.setCollectedfrom(collectedFrom); - - return Arrays.asList(r); - } else { - final String sourceId = - createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false); - final String targetId = - createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false); - - final Relation r1 = new Relation(); - final Relation r2 = new Relation(); - - if (rs.getString("source_type").equals("project")) { - r1.setCollectedfrom(collectedFrom); - r1.setRelType("resultProject"); - r1.setSubRelType("outcome"); - r1.setRelClass("produces"); - - r2.setCollectedfrom(collectedFrom); - r2.setRelType("resultProject"); - r2.setSubRelType("outcome"); - r2.setRelClass("isProducedBy"); - } else { - r1.setCollectedfrom(collectedFrom); - r1.setRelType("resultResult"); - r1.setSubRelType("relationship"); - r1.setRelClass("isRelatedTo"); - - r2.setCollectedfrom(collectedFrom); - r2.setRelType("resultResult"); - r2.setSubRelType("relationship"); - r2.setRelClass("isRelatedTo"); - } - - r1.setSource(sourceId); - r1.setTarget(targetId); - r1.setDataInfo(info); - r1.setLastupdatetimestamp(lastUpdateTimestamp); - - r2.setSource(targetId); - r2.setTarget(sourceId); - r2.setDataInfo(info); - r2.setLastupdatetimestamp(lastUpdateTimestamp); - - return Arrays.asList(r1, r2); - } - - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - private List prepareContext(final String id, final DataInfo dataInfo) { - final Context context = new Context(); - context.setId(id); - context.setDataInfo(Arrays.asList(dataInfo)); - return Arrays.asList(context); - } - - private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException { - final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); - final String inferenceprovenance = rs.getString("inferenceprovenance"); - final Boolean inferred = rs.getBoolean("inferred"); - final String trust = rs.getString("trust"); - return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, - trust); - } - - private Qualifier prepareQualifierSplitting(final String s) { - if (StringUtils.isBlank(s)) { - return null; - } - final String[] arr = s.split("@@@"); - return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; - } - - private List> prepareListFields(final Array array, final DataInfo info) { - try { - return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>(); - } catch (final SQLException e) { - throw new RuntimeException("Invalid SQL array", e); - } - } - - private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) { - if (StringUtils.isBlank(s)) { - return null; - } - final String[] parts = s.split("###"); - if (parts.length == 2) { - final String value = parts[0]; - final String[] arr = parts[1].split("@@@"); - if (arr.length == 4) { - return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); - } - } - return null; - } - - private List prepareListOfStructProps( - final Array array, final DataInfo dataInfo) throws SQLException { - final List res = new ArrayList<>(); - if (array != null) { - for (final String s : (String[]) array.getArray()) { - final StructuredProperty sp = prepareStructProp(s, dataInfo); - if (sp != null) { - res.add(sp); - } - } - } - - return res; - } - - private Journal prepareJournal(final String name, final String sj, final DataInfo info) { - if (StringUtils.isNotBlank(sj)) { - final String[] arr = sj.split("@@@"); - if (arr.length == 3) { - final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null; - final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null; - ; - final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null; - ; - if (issn != null || eissn != null || lissn != null) { - return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info); - } - } - } - return null; - } - - @Override - public void close() throws IOException { - super.close(); - dbClient.close(); - } - + implements Closeable { + + private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); + + private final DbClient dbClient; + + private final long lastUpdateTimestamp; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString(MigrateDbEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))); + + parser.parseArgument(args); + + final String dbUrl = parser.get("postgresUrl"); + final String dbUser = parser.get("postgresUser"); + final String dbPassword = parser.get("postgresPassword"); + + final String hdfsPath = parser.get("hdfsPath"); + + final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); + + try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, + dbPassword)) { + if (processClaims) { + log.info("Processing claims..."); + smdbe.execute("queryClaims.sql", smdbe::processClaims); + } else { + log.info("Processing datasources..."); + smdbe.execute("queryDatasources.sql", smdbe::processDatasource); + + log.info("Processing projects..."); + smdbe.execute("queryProjects.sql", smdbe::processProject); + + log.info("Processing orgs..."); + smdbe.execute("queryOrganizations.sql", smdbe::processOrganization); + + log.info("Processing relations ds <-> orgs ..."); + smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization); + + log.info("Processing projects <-> orgs ..."); + smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization); + } + log.info("All done."); + } + } + + protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST + super(); + this.dbClient = null; + this.lastUpdateTimestamp = new Date().getTime(); + } + + public MigrateDbEntitiesApplication( + final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) + throws Exception { + super(hdfsPath); + this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); + this.lastUpdateTimestamp = new Date().getTime(); + } + + public void execute(final String sqlFile, final Function> producer) + throws Exception { + final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile)); + + final Consumer consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf)); + + dbClient.processResults(sql, consumer); + } + + public List processDatasource(final ResultSet rs) { + + try { + + final DataInfo info = prepareDataInfo(rs); + + final Datasource ds = new Datasource(); + + ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true)); + ds.setOriginalId(Arrays.asList(rs.getString("datasourceid"))); + ds + .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"))); + ds.setPid(new ArrayList<>()); + ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); + ds.setDateoftransformation(null); // Value not returned by the SQL query + ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB + ds.setOaiprovenance(null); // Values not present in the DB + ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); + ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); + ds.setOfficialname(field(rs.getString("officialname"), info)); + ds.setEnglishname(field(rs.getString("englishname"), info)); + ds.setWebsiteurl(field(rs.getString("websiteurl"), info)); + ds.setLogourl(field(rs.getString("logourl"), info)); + ds.setContactemail(field(rs.getString("contactemail"), info)); + ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info)); + ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info)); + ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info)); + ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info)); + ds.setDescription(field(rs.getString("description"), info)); + ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); + ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info)); + ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info)); + ds.setOdpolicies(field(rs.getString("odpolicies"), info)); + ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); + ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); + ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); + ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); + ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); + ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); + ds.setDataprovider(field(rs.getBoolean("dataprovider"), info)); + ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info)); + ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); + ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); + ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); + ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info)); + ds.setVersioning(field(rs.getBoolean("versioning"), info)); + ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info)); + ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info)); + ds.setPidsystems(field(rs.getString("pidsystems"), info)); + ds.setCertificates(field(rs.getString("certificates"), info)); + ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array + ds + .setJournal(prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal + ds.setDataInfo(info); + ds.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(ds); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public List processProject(final ResultSet rs) { + try { + + final DataInfo info = prepareDataInfo(rs); + + final Project p = new Project(); + + p.setId(createOpenaireId(40, rs.getString("projectid"), true)); + p.setOriginalId(Arrays.asList(rs.getString("projectid"))); + p + .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"))); + p.setPid(new ArrayList<>()); + p.setDateofcollection(asString(rs.getDate("dateofcollection"))); + p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); + p.setExtraInfo(new ArrayList<>()); // Values not present in the DB + p.setOaiprovenance(null); // Values not present in the DB + p.setWebsiteurl(field(rs.getString("websiteurl"), info)); + p.setCode(field(rs.getString("code"), info)); + p.setAcronym(field(rs.getString("acronym"), info)); + p.setTitle(field(rs.getString("title"), info)); + p.setStartdate(field(asString(rs.getDate("startdate")), info)); + p.setEnddate(field(asString(rs.getDate("enddate")), info)); + p.setCallidentifier(field(rs.getString("callidentifier"), info)); + p.setKeywords(field(rs.getString("keywords"), info)); + p.setDuration(field(Integer.toString(rs.getInt("duration")), info)); + p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info)); + p + .setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); + p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); + p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info)); + p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info)); + p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype"))); + p.setOptional1(field(rs.getString("optional1"), info)); + p.setOptional2(field(rs.getString("optional2"), info)); + p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info)); + p.setContactfullname(field(rs.getString("contactfullname"), info)); + p.setContactfax(field(rs.getString("contactfax"), info)); + p.setContactphone(field(rs.getString("contactphone"), info)); + p.setContactemail(field(rs.getString("contactemail"), info)); + p.setSummary(field(rs.getString("summary"), info)); + p.setCurrency(field(rs.getString("currency"), info)); + p.setTotalcost(new Float(rs.getDouble("totalcost"))); + p.setFundedamount(new Float(rs.getDouble("fundedamount"))); + p.setDataInfo(info); + p.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(p); + + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public List processOrganization(final ResultSet rs) { + + try { + + final DataInfo info = prepareDataInfo(rs); + + final Organization o = new Organization(); + + o.setId(createOpenaireId(20, rs.getString("organizationid"), true)); + o.setOriginalId(Arrays.asList(rs.getString("organizationid"))); + o + .setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"))); + o.setPid(new ArrayList<>()); + o.setDateofcollection(asString(rs.getDate("dateofcollection"))); + o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); + o.setExtraInfo(new ArrayList<>()); // Values not present in the DB + o.setOaiprovenance(null); // Values not present in the DB + o.setLegalshortname(field(rs.getString("legalshortname"), info)); + o.setLegalname(field(rs.getString("legalname"), info)); + o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query + o.setWebsiteurl(field(rs.getString("websiteurl"), info)); + o.setLogourl(field(rs.getString("logourl"), info)); + o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); + o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info)); + o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info)); + o + .setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info)); + o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info)); + o + .setEcinternationalorganizationeurinterests(field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info)); + o + .setEcinternationalorganization(field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info)); + o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); + o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); + o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); + o.setCountry(prepareQualifierSplitting(rs.getString("country"))); + o.setDataInfo(info); + o.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(o); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public List processDatasourceOrganization(final ResultSet rs) { + try { + final DataInfo info = prepareDataInfo(rs); + final String orgId = createOpenaireId(20, rs.getString("organization"), true); + final String dsId = createOpenaireId(10, rs.getString("datasource"), true); + final List collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + + final Relation r1 = new Relation(); + r1.setRelType("datasourceOrganization"); + r1.setSubRelType("provision"); + r1.setRelClass("isProvidedBy"); + r1.setSource(dsId); + r1.setTarget(orgId); + r1.setCollectedfrom(collectedFrom); + r1.setDataInfo(info); + r1.setLastupdatetimestamp(lastUpdateTimestamp); + + final Relation r2 = new Relation(); + r2.setRelType("datasourceOrganization"); + r2.setSubRelType("provision"); + r2.setRelClass("provides"); + r2.setSource(orgId); + r2.setTarget(dsId); + r2.setCollectedfrom(collectedFrom); + r2.setDataInfo(info); + r2.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(r1, r2); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public List processProjectOrganization(final ResultSet rs) { + try { + final DataInfo info = prepareDataInfo(rs); + final String orgId = createOpenaireId(20, rs.getString("resporganization"), true); + final String projectId = createOpenaireId(40, rs.getString("project"), true); + final List collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + + final Relation r1 = new Relation(); + r1.setRelType("projectOrganization"); + r1.setSubRelType("participation"); + r1.setRelClass("hasParticipant"); + r1.setSource(projectId); + r1.setTarget(orgId); + r1.setCollectedfrom(collectedFrom); + r1.setDataInfo(info); + r1.setLastupdatetimestamp(lastUpdateTimestamp); + + final Relation r2 = new Relation(); + r2.setRelType("projectOrganization"); + r2.setSubRelType("participation"); + r2.setRelClass("isParticipant"); + r2.setSource(orgId); + r2.setTarget(projectId); + r2.setCollectedfrom(collectedFrom); + r2.setDataInfo(info); + r2.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(r1, r2); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + public List processClaims(final ResultSet rs) { + + final DataInfo info = + dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9"); + + final List collectedFrom = listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); + + try { + + if (rs.getString("source_type").equals("context")) { + final Result r; + + if (rs.getString("target_type").equals("dataset")) { + r = new Dataset(); + r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER); + } else if (rs.getString("target_type").equals("software")) { + r = new Software(); + r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER); + } else if (rs.getString("target_type").equals("other")) { + r = new OtherResearchProduct(); + r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER); + } else { + r = new Publication(); + r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER); + } + r.setId(createOpenaireId(50, rs.getString("target_id"), false)); + r.setLastupdatetimestamp(lastUpdateTimestamp); + r.setContext(prepareContext(rs.getString("source_id"), info)); + r.setDataInfo(info); + r.setCollectedfrom(collectedFrom); + + return Arrays.asList(r); + } else { + final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false); + final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false); + + final Relation r1 = new Relation(); + final Relation r2 = new Relation(); + + if (rs.getString("source_type").equals("project")) { + r1.setCollectedfrom(collectedFrom); + r1.setRelType("resultProject"); + r1.setSubRelType("outcome"); + r1.setRelClass("produces"); + + r2.setCollectedfrom(collectedFrom); + r2.setRelType("resultProject"); + r2.setSubRelType("outcome"); + r2.setRelClass("isProducedBy"); + } else { + r1.setCollectedfrom(collectedFrom); + r1.setRelType("resultResult"); + r1.setSubRelType("relationship"); + r1.setRelClass("isRelatedTo"); + + r2.setCollectedfrom(collectedFrom); + r2.setRelType("resultResult"); + r2.setSubRelType("relationship"); + r2.setRelClass("isRelatedTo"); + } + + r1.setSource(sourceId); + r1.setTarget(targetId); + r1.setDataInfo(info); + r1.setLastupdatetimestamp(lastUpdateTimestamp); + + r2.setSource(targetId); + r2.setTarget(sourceId); + r2.setDataInfo(info); + r2.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(r1, r2); + } + + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + private List prepareContext(final String id, final DataInfo dataInfo) { + final Context context = new Context(); + context.setId(id); + context.setDataInfo(Arrays.asList(dataInfo)); + return Arrays.asList(context); + } + + private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException { + final Boolean deletedbyinference = rs.getBoolean("deletedbyinference"); + final String inferenceprovenance = rs.getString("inferenceprovenance"); + final Boolean inferred = rs.getBoolean("inferred"); + final String trust = rs.getString("trust"); + return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, trust); + } + + private Qualifier prepareQualifierSplitting(final String s) { + if (StringUtils.isBlank(s)) { return null; } + final String[] arr = s.split("@@@"); + return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; + } + + private List> prepareListFields(final Array array, final DataInfo info) { + try { + return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>(); + } catch (final SQLException e) { + throw new RuntimeException("Invalid SQL array", e); + } + } + + private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) { + if (StringUtils.isBlank(s)) { return null; } + final String[] parts = s.split("###"); + if (parts.length == 2) { + final String value = parts[0]; + final String[] arr = parts[1].split("@@@"); + if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); } + } + return null; + } + + private List prepareListOfStructProps( + final Array array, + final DataInfo dataInfo) throws SQLException { + final List res = new ArrayList<>(); + if (array != null) { + for (final String s : (String[]) array.getArray()) { + final StructuredProperty sp = prepareStructProp(s, dataInfo); + if (sp != null) { + res.add(sp); + } + } + } + + return res; + } + + private Journal prepareJournal(final String name, final String sj, final DataInfo info) { + if (StringUtils.isNotBlank(sj)) { + final String[] arr = sj.split("@@@"); + if (arr.length == 3) { + final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0].trim() : null; + final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1].trim() : null;; + final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;; + if (issn != null || eissn != null || lissn != null) { + return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info); + } + } + } + return null; + } + + @Override + public void close() throws IOException { + super.close(); + dbClient.close(); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java index 121df8131..94f17aad5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/DbClient.java @@ -14,7 +14,7 @@ public class DbClient implements Closeable { private static final Log log = LogFactory.getLog(DbClient.class); - private Connection connection; + private final Connection connection; public DbClient(final String address, final String login, final String password) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java index 8adcd565b..d1c615dcd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw.common; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.Normalizer; import java.util.HashSet; import java.util.List; @@ -141,7 +142,7 @@ public class PacePerson { public String hash() { return Hashing .murmur3_128() - .hashString(getNormalisedFullname(), Charset.forName(UTF8)) + .hashString(getNormalisedFullname(), StandardCharsets.UTF_8) .toString(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java index c97753fdc..0db2b2688 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java @@ -25,7 +25,7 @@ public abstract class AbstractScholexplorerParser { protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class); static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE); - private List datasetSubTypes = Arrays + private final List datasetSubTypes = Arrays .asList( "dataset", "software", diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index 9a0a45f78..ff1178c71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -108,7 +108,7 @@ SELECT ELSE 'Other' END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype, 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, - CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal + CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal FROM dsm_datasources d diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java index bc9562e08..a172ef698 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/CrossRefParserJSON.java @@ -18,7 +18,7 @@ import eu.dnetlib.dhp.utils.DHPUtils; public class CrossRefParserJSON { - private static List collectedFrom = generateCrossrefCollectedFrom("complete"); + private static final List collectedFrom = generateCrossrefCollectedFrom("complete"); public static ScholixResource parseRecord(final String record) { if (record == null) diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java index e84ec4376..9e9f0d5c9 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/provision/update/DataciteClient.java @@ -16,7 +16,7 @@ public class DataciteClient { private String host; private String index = "datacite"; private String indexType = "dump"; - private Datacite2Scholix d2s; + private final Datacite2Scholix d2s; public DataciteClient(String host) { this.host = host; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java index bac2278e6..a09a27837 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/RelationPartitioner.java @@ -12,7 +12,7 @@ import eu.dnetlib.dhp.oa.provision.model.SortableRelation; */ public class RelationPartitioner extends Partitioner { - private int numPartitions; + private final int numPartitions; public RelationPartitioner(int numPartitions) { this.numPartitions = numPartitions; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index de221b2ee..3e8abbd9f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -46,7 +46,7 @@ public class StreamingInputDocumentFactory { private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier"; - private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); + private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'"; private static final List dateFormats = Arrays .asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy"); @@ -61,15 +61,18 @@ public class StreamingInputDocumentFactory { private static final int MAX_FIELD_LENGTH = 25000; - private ThreadLocal inputFactory = ThreadLocal.withInitial(() -> XMLInputFactory.newInstance()); + private final ThreadLocal inputFactory = ThreadLocal + .withInitial(() -> XMLInputFactory.newInstance()); - private ThreadLocal outputFactory = ThreadLocal.withInitial(() -> XMLOutputFactory.newInstance()); + private final ThreadLocal outputFactory = ThreadLocal + .withInitial(() -> XMLOutputFactory.newInstance()); - private ThreadLocal eventFactory = ThreadLocal.withInitial(() -> XMLEventFactory.newInstance()); + private final ThreadLocal eventFactory = ThreadLocal + .withInitial(() -> XMLEventFactory.newInstance()); - private String version; + private final String version; - private String dsId; + private final String dsId; private String resultName = DEFAULTDNETRESULT; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 3d9cf1ae7..6cb025b4f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -17,7 +17,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity; public class TemplateFactory { - private TemplateResources resources; + private final TemplateResources resources; private static final char DELIMITER = '$'; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java index 746f8ebe6..878a582bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateResources.java @@ -8,17 +8,17 @@ import com.google.common.io.Resources; public class TemplateResources { - private String record = read("eu/dnetlib/dhp/oa/provision/template/record.st"); + private final String record = read("eu/dnetlib/dhp/oa/provision/template/record.st"); - private String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st"); + private final String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st"); - private String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st"); + private final String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st"); - private String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st"); + private final String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st"); - private String child = read("eu/dnetlib/dhp/oa/provision/template/child.st"); + private final String child = read("eu/dnetlib/dhp/oa/provision/template/child.st"); - private String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st"); + private final String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st"); private static String read(final String classpathResource) throws IOException { return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index f667d9f3c..2cff2124e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -48,13 +48,13 @@ import eu.dnetlib.dhp.schema.oaf.Result; public class XmlRecordFactory implements Serializable { public static final String REL_SUBTYPE_DEDUP = "dedup"; - private Map accumulators; + private final Map accumulators; - private Set specialDatasourceTypes; + private final Set specialDatasourceTypes; - private ContextMapper contextMapper; + private final ContextMapper contextMapper; - private String schemaLocation; + private final String schemaLocation; private boolean indent = false; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index bc3b3107d..8195467b1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -41,7 +41,7 @@ public class XmlSerializationUtils { public static String mapStructuredProperty(String name, StructuredProperty t) { return asXmlElement( - name, t.getValue(), t.getQualifier(), t.getDataInfo() != null ? t.getDataInfo() : null); + name, t.getValue(), t.getQualifier(), t.getDataInfo()); } public static String mapQualifier(String name, Qualifier q) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java index 8afe03d6d..1336a1cf7 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/GraphJoinerTest.java @@ -9,7 +9,7 @@ import org.junit.jupiter.api.BeforeEach; public class GraphJoinerTest { - private ClassLoader cl = getClass().getClassLoader(); + private final ClassLoader cl = getClass().getClassLoader(); private Path workingDir; private Path inputDir; private Path outputDir; diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index 4d6318c3e..0f5e18082 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -22,6 +22,7 @@ pl.project13.maven git-commit-id-plugin + 2.1.11 false diff --git a/dhp-workflows/dhp-worfklow-profiles/pom.xml b/dhp-workflows/dhp-worfklow-profiles/pom.xml index df90014ba..bad72a9ef 100644 --- a/dhp-workflows/dhp-worfklow-profiles/pom.xml +++ b/dhp-workflows/dhp-worfklow-profiles/pom.xml @@ -3,13 +3,27 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - dhp + dhp-workflows eu.dnetlib.dhp 1.1.7-SNAPSHOT 4.0.0 dhp-worfklow-profiles + jar + \ No newline at end of file diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index faa5d65dd..a13df1de6 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -13,6 +13,8 @@ dhp-workflows pom + This module is the container for the oozie workflow definitions in dnet-hadoop project + dhp-worfklow-profiles dhp-aggregation @@ -184,7 +186,7 @@ org.kuali.maven.plugins properties-maven-plugin - 1.3.2 + ${properties.maven.plugin.version} eu.dnetlib.dhp @@ -564,60 +566,4 @@ - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - - org.kuali.maven.plugins - - - properties-maven-plugin - - - [1.3.2,) - - - - read-project-properties - - - write-project-properties - - - - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - [1.0.0,) - - copy-dependencies - - - - - - - - - - - - - diff --git a/pom.xml b/pom.xml index bb48c04a1..483873219 100644 --- a/pom.xml +++ b/pom.xml @@ -6,14 +6,14 @@ 1.1.7-SNAPSHOT pom - http://www.d-net.research-infrastructures.eu - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt + GNU Affero General Public License v3.0 or later + https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText repo - A business-friendly OSS license + This program is free software: you can redistribute it and/or modify it under the terms of the + GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. @@ -41,6 +41,8 @@ HEAD + This module is the root descriptor for the dnet-hadoop project + @@ -406,6 +408,18 @@ target/test-classes + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.0.0 + + + org.apache.maven.plugins + maven-site-plugin + 3.7.1 + + org.apache.maven.plugins maven-compiler-plugin @@ -449,9 +463,10 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.10.4 + 3.2.0 true + none @@ -475,6 +490,14 @@ + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.plugins + maven-project-info-reports-plugin + net.revelc.code.formatter formatter-maven-plugin @@ -569,9 +592,9 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.10.4 true + none @@ -582,6 +605,7 @@ UTF-8 3.6.0 2.22.2 + 2.0.1 cdh5.9.2 2.6.0-${dhp.cdh.version} 4.1.0-${dhp.cdh.version}