merge with upstream

This commit is contained in:
Miriam Baglioni 2020-04-30 11:15:54 +02:00
commit 3abb76ff7a
42 changed files with 783 additions and 819 deletions

View File

@ -12,6 +12,8 @@
<artifactId>dhp-build-assembly-resources</artifactId>
<packaging>jar</packaging>
<description>This module contains a set of scripts supporting the build lifecycle for the dnet-hadoop project</description>
<build>
<plugins>
<plugin>

View File

@ -12,22 +12,29 @@
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<packaging>maven-plugin</packaging>
<description>This module is a maven plugin implementing custom properties substitutions in the build lifecycle</description>
<dependencies>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<version>2.0</version>
<version>3.6.3</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-project</artifactId>
<version>2.0</version>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-artifact</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version>
<version>${properties.maven.plugin.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
@ -73,44 +80,10 @@
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-plugin-plugin
</artifactId>
<versionRange>
[3.2,)
</versionRange>
<goals>
<goal>descriptor</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -40,7 +40,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files. Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
* http://site.kuali.org/maven/plugins/properties-maven-plugin/2.0.1/write-project-properties-mojo.html
*
* @author mhorst
* @goal write-project-properties

View File

@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach
public void clearSystemProperties() {

View File

@ -366,7 +366,7 @@ public class WritePredefinedProjectPropertiesTest {
}
private Properties getStoredProperties(File testFolder)
throws FileNotFoundException, IOException {
throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties;

View File

@ -11,6 +11,38 @@
<packaging>jar</packaging>
<description>This module contains resources supporting common code style conventions</description>
<distributionManagement>
<snapshotRepository>
<id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout>
</snapshotRepository>
<repository>
<id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
</repository>
</distributionManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

View File

@ -8,6 +8,9 @@
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>
<description>This module is a container for the build tools used in dnet-hadoop</description>
<modules>
<module>dhp-code-style</module>
<module>dhp-build-assembly-resources</module>

View File

@ -12,6 +12,8 @@
<artifactId>dhp-common</artifactId>
<packaging>jar</packaging>
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
<dependencies>
<dependency>

View File

@ -21,7 +21,7 @@ public class DHPUtils {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8"));
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");

View File

@ -17,7 +17,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
@Override
public String getName() {

View File

@ -21,7 +21,7 @@ public class MessageManager {
private Connection connection;
private Map<String, Channel> channels = new HashMap<>();
private final Map<String, Channel> channels = new HashMap<>();
private boolean durable;

View File

@ -12,7 +12,7 @@
<artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging>
<description>This module contains common schema classes meant to be used across the dnet-hadoop submodules</description>
<dependencies>

View File

@ -16,7 +16,7 @@ public class AtomicActionDeserializer extends JsonDeserializer {
@Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException {
throws IOException {
JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload");

View File

@ -9,190 +9,195 @@ import java.util.function.Function;
/** Oaf model utility methods. */
public class ModelSupport {
/** Defines the mapping between the actual entity type and the main entity type */
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
/** Defines the mapping between the actual entity type and the main entity type */
private static final Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static {
entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project);
}
static {
entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project);
}
/**
* Defines the mapping between the actual entity types and the relative classes implementing them
*/
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
/**
* Defines the mapping between the actual entity types and the relative classes implementing them
*/
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
static {
entityTypes.put(EntityType.datasource, Datasource.class);
entityTypes.put(EntityType.organization, Organization.class);
entityTypes.put(EntityType.project, Project.class);
entityTypes.put(EntityType.dataset, Dataset.class);
entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
entityTypes.put(EntityType.software, Software.class);
entityTypes.put(EntityType.publication, Publication.class);
}
static {
entityTypes.put(EntityType.datasource, Datasource.class);
entityTypes.put(EntityType.organization, Organization.class);
entityTypes.put(EntityType.project, Project.class);
entityTypes.put(EntityType.dataset, Dataset.class);
entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
entityTypes.put(EntityType.software, Software.class);
entityTypes.put(EntityType.publication, Publication.class);
}
public static final Map<String, Class> oafTypes = Maps.newHashMap();
public static final Map<String, Class> oafTypes = Maps.newHashMap();
static {
oafTypes.put("datasource", Datasource.class);
oafTypes.put("organization", Organization.class);
oafTypes.put("project", Project.class);
oafTypes.put("dataset", Dataset.class);
oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
oafTypes.put("software", Software.class);
oafTypes.put("publication", Publication.class);
oafTypes.put("relation", Relation.class);
}
static {
oafTypes.put("datasource", Datasource.class);
oafTypes.put("organization", Organization.class);
oafTypes.put("project", Project.class);
oafTypes.put("dataset", Dataset.class);
oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
oafTypes.put("software", Software.class);
oafTypes.put("publication", Publication.class);
oafTypes.put("relation", Relation.class);
}
private static final String schemeTemplate = "dnet:%s_%s_relations";
private static final String schemeTemplate = "dnet:%s_%s_relations";
private ModelSupport() {}
private ModelSupport() {
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazzObject Superclass object instance
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazzObject Superclass object instance
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz);
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz);
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazz Subclass class
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz);
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazz Subclass class
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz);
}
/**
* Lists all the OAF model classes
*
* @param <T>
* @return
*/
public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[] {
Author.class,
Context.class,
Country.class,
DataInfo.class,
Dataset.class,
Datasource.class,
ExternalReference.class,
ExtraInfo.class,
Field.class,
GeoLocation.class,
Instance.class,
Journal.class,
KeyValue.class,
Oaf.class,
OafEntity.class,
OAIProvenance.class,
Organization.class,
OriginDescription.class,
OtherResearchProduct.class,
Project.class,
Publication.class,
Qualifier.class,
Relation.class,
Result.class,
Software.class,
StructuredProperty.class
};
}
/**
* Lists all the OAF model classes
*
* @param <T>
* @return
*/
public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[] {
Author.class,
Context.class,
Country.class,
DataInfo.class,
Dataset.class,
Datasource.class,
ExternalReference.class,
ExtraInfo.class,
Field.class,
GeoLocation.class,
Instance.class,
Journal.class,
KeyValue.class,
Oaf.class,
OafEntity.class,
OAIProvenance.class,
Organization.class,
OriginDescription.class,
OtherResearchProduct.class,
Project.class,
Publication.class,
Qualifier.class,
Relation.class,
Result.class,
Software.class,
StructuredProperty.class
};
}
public static String getMainType(final EntityType type) {
return entityMapping.get(type).name();
}
public static String getMainType(final EntityType type) {
return entityMapping.get(type).name();
}
public static boolean isResult(EntityType type) {
return MainEntityType.result.name().equals(getMainType(type));
}
public static boolean isResult(EntityType type) {
return MainEntityType.result.name().equals(getMainType(type));
}
public static String getScheme(final String sourceType, final String targetType) {
return String.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name());
}
public static String getScheme(final String sourceType, final String targetType) {
return String
.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name());
}
public static <T extends Oaf> Function<T, String> idFn() {
return x -> {
if (isSubClass(x, Relation.class)) {
return idFnForRelation(x);
}
return idFnForOafEntity(x);
};
}
public static <T extends Oaf> Function<T, String> idFn() {
return x -> {
if (isSubClass(x, Relation.class)) {
return idFnForRelation(x);
}
return idFnForOafEntity(x);
};
}
private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t;
return Optional.ofNullable(r.getSource())
.map(
source ->
Optional.ofNullable(r.getTarget())
.map(
target ->
Optional.ofNullable(r.getRelType())
.map(
relType ->
Optional.ofNullable(r.getSubRelType())
.map(
subRelType ->
Optional.ofNullable(r.getRelClass())
.map(
relClass ->
String.join(
source,
target,
relType,
subRelType,
relClass))
.orElse(
String.join(
source,
target,
relType,
subRelType)))
.orElse(String.join(source, target, relType)))
.orElse(String.join(source, target)))
.orElse(source))
.orElse(null);
}
private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t;
return Optional
.ofNullable(r.getSource())
.map(
source -> Optional
.ofNullable(r.getTarget())
.map(
target -> Optional
.ofNullable(r.getRelType())
.map(
relType -> Optional
.ofNullable(r.getSubRelType())
.map(
subRelType -> Optional
.ofNullable(r.getRelClass())
.map(
relClass -> String
.join(
source,
target,
relType,
subRelType,
relClass))
.orElse(
String
.join(
source,
target,
relType,
subRelType)))
.orElse(String.join(source, target, relType)))
.orElse(String.join(source, target)))
.orElse(source))
.orElse(null);
}
private static <T extends Oaf> String idFnForOafEntity(T t) {
return ((OafEntity) t).getId();
}
private static <T extends Oaf> String idFnForOafEntity(T t) {
return ((OafEntity) t).getId();
}
}

View File

@ -32,7 +32,7 @@ public class ISClient implements Serializable {
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private ISLookUpService isLookup;
private final ISLookUpService isLookup;
public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);

View File

@ -123,10 +123,10 @@ public class PromoteActionPayloadFunctions {
* @param <G> Type of graph table row
*/
public static class TableAggregator<G extends Oaf> extends Aggregator<G, G, G> {
private SerializableSupplier<G> zeroFn;
private SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn;
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private Class<G> rowClazz;
private final SerializableSupplier<G> zeroFn;
private final SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn;
private final SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private final Class<G> rowClazz;
public TableAggregator(
SerializableSupplier<G> zeroFn,

View File

@ -20,7 +20,7 @@ public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
private static CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
private static ArgumentApplicationParser argumentParser;

View File

@ -9,7 +9,7 @@ public class CollectorPluginErrorLogList extends LinkedList<String> {
@Override
public String toString() {
String log = new String();
String log = "";
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);

View File

@ -11,22 +11,22 @@ import java.util.regex.Pattern;
public class XmlCleaner {
/** Pattern for numeric entities. */
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
private static final Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
// //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
private static final Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
/**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] |
* [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
private static final Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>();
private static Map<String, String> badEntities = new HashMap<>();
private static final Set<String> goodEntities = new HashSet<>();
private static final Map<String, String> badEntities = new HashMap<>();
static {
// pre-defined XML entities

View File

@ -21,8 +21,8 @@ import eu.dnetlib.message.MessageManager;
public class DnetCollectorWorkerApplicationTests {
private ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
private MessageManager messageManager = mock(MessageManager.class);
private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
private final MessageManager messageManager = mock(MessageManager.class);
private DnetCollectorWorker worker;

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.dedup;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.text.Normalizer;
import java.util.*;
@ -73,7 +74,7 @@ public class DedupUtility {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8"));
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");

View File

@ -15,7 +15,7 @@ public class SparkReporter implements Serializable, Reporter {
private final List<Tuple2<String, String>> relations = new ArrayList<>();
private Map<String, LongAccumulator> accumulators;
private final Map<String, LongAccumulator> accumulators;
public SparkReporter(Map<String, LongAccumulator> accumulators) {
this.accumulators = accumulators;

View File

@ -106,7 +106,7 @@ public class DedupUtility {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8"));
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");

View File

@ -410,14 +410,10 @@ public abstract class AbstractMdRecordToOafMapper {
final String identifier = n.valueOf("./*[local-name()='identifier']");
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
;
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
;
final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true");
final String datestamp = n.valueOf("./*[local-name()='datestamp']");
;
final String harvestDate = n.valueOf("@harvestDate");
;
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
}

View File

@ -51,497 +51,458 @@ import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient;
private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
MigrateDbEntitiesApplication.class.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUser = parser.get("postgresUser");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final boolean processClaims =
parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
try (final MigrateDbEntitiesApplication smdbe =
new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) {
if (processClaims) {
log.info("Processing claims...");
smdbe.execute("queryClaims.sql", smdbe::processClaims);
} else {
log.info("Processing datasources...");
smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
log.info("Processing projects...");
smdbe.execute("queryProjects.sql", smdbe::processProject);
log.info("Processing orgs...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
log.info("Processing relations ds <-> orgs ...");
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
log.info("Processing projects <-> orgs ...");
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
}
log.info("All done.");
}
}
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
super();
this.dbClient = null;
this.lastUpdateTimestamp = new Date().getTime();
}
public MigrateDbEntitiesApplication(
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
throws Exception {
super(hdfsPath);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime();
}
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
throws Exception {
final String sql =
IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
dbClient.processResults(sql, consumer);
}
public List<Oaf> processDatasource(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource();
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds.setCollectedfrom(
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
ds.setPid(new ArrayList<>());
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
ds.setDateoftransformation(null); // Value not returned by the SQL query
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
ds.setOaiprovenance(null); // Values not present in the DB
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setOfficialname(field(rs.getString("officialname"), info));
ds.setEnglishname(field(rs.getString("englishname"), info));
ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
ds.setLogourl(field(rs.getString("logourl"), info));
ds.setContactemail(field(rs.getString("contactemail"), info));
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
ds.setDescription(field(rs.getString("description"), info));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
ds.setVersioning(field(rs.getBoolean("versioning"), info));
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
ds.setPidsystems(field(rs.getString("pidsystems"), info));
ds.setCertificates(field(rs.getString("certificates"), info));
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds.setJournal(
prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
ds.setDataInfo(info);
ds.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(ds);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processProject(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Project p = new Project();
p.setId(createOpenaireId(40, rs.getString("projectid"), true));
p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p.setCollectedfrom(
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
p.setPid(new ArrayList<>());
p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
p.setOaiprovenance(null); // Values not present in the DB
p.setWebsiteurl(field(rs.getString("websiteurl"), info));
p.setCode(field(rs.getString("code"), info));
p.setAcronym(field(rs.getString("acronym"), info));
p.setTitle(field(rs.getString("title"), info));
p.setStartdate(field(asString(rs.getDate("startdate")), info));
p.setEnddate(field(asString(rs.getDate("enddate")), info));
p.setCallidentifier(field(rs.getString("callidentifier"), info));
p.setKeywords(field(rs.getString("keywords"), info));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setOamandatepublications(
field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setOptional1(field(rs.getString("optional1"), info));
p.setOptional2(field(rs.getString("optional2"), info));
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
p.setContactfullname(field(rs.getString("contactfullname"), info));
p.setContactfax(field(rs.getString("contactfax"), info));
p.setContactphone(field(rs.getString("contactphone"), info));
p.setContactemail(field(rs.getString("contactemail"), info));
p.setSummary(field(rs.getString("summary"), info));
p.setCurrency(field(rs.getString("currency"), info));
p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setDataInfo(info);
p.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(p);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Organization o = new Organization();
o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o.setCollectedfrom(
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
o.setPid(new ArrayList<>());
o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
o.setOaiprovenance(null); // Values not present in the DB
o.setLegalshortname(field(rs.getString("legalshortname"), info));
o.setLegalname(field(rs.getString("legalname"), info));
o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query
o.setWebsiteurl(field(rs.getString("websiteurl"), info));
o.setLogourl(field(rs.getString("logourl"), info));
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o.setEcresearchorganization(
field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
o.setEcinternationalorganizationeurinterests(
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setEcinternationalorganization(
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info);
o.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(o);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
final List<KeyValue> collectedFrom =
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType("datasourceOrganization");
r1.setSubRelType("provision");
r1.setRelClass("isProvidedBy");
r1.setSource(dsId);
r1.setTarget(orgId);
r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
final Relation r2 = new Relation();
r2.setRelType("datasourceOrganization");
r2.setSubRelType("provision");
r2.setRelClass("provides");
r2.setSource(orgId);
r2.setTarget(dsId);
r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processProjectOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom =
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType("projectOrganization");
r1.setSubRelType("participation");
r1.setRelClass("isParticipant");
r1.setSource(projectId);
r1.setTarget(orgId);
r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
final Relation r2 = new Relation();
r2.setRelType("projectOrganization");
r2.setSubRelType("participation");
r2.setRelClass("hasParticipant");
r2.setSource(orgId);
r2.setTarget(projectId);
r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processClaims(final ResultSet rs) {
final DataInfo info =
dataInfo(
false,
null,
false,
false,
qualifier(
"user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"),
"0.9");
final List<KeyValue> collectedFrom =
listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try {
if (rs.getString("source_type").equals("context")) {
final Result r;
if (rs.getString("target_type").equals("dataset")) {
r = new Dataset();
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
} else if (rs.getString("target_type").equals("software")) {
r = new Software();
r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
} else if (rs.getString("target_type").equals("other")) {
r = new OtherResearchProduct();
r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
} else {
r = new Publication();
r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
}
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), info));
r.setDataInfo(info);
r.setCollectedfrom(collectedFrom);
return Arrays.asList(r);
} else {
final String sourceId =
createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
final String targetId =
createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
final Relation r1 = new Relation();
final Relation r2 = new Relation();
if (rs.getString("source_type").equals("project")) {
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultProject");
r1.setSubRelType("outcome");
r1.setRelClass("produces");
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultProject");
r2.setSubRelType("outcome");
r2.setRelClass("isProducedBy");
} else {
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultResult");
r1.setSubRelType("relationship");
r1.setRelClass("isRelatedTo");
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultResult");
r2.setSubRelType("relationship");
r2.setRelClass("isRelatedTo");
}
r1.setSource(sourceId);
r1.setTarget(targetId);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
r2.setSource(targetId);
r2.setTarget(sourceId);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
}
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
final Context context = new Context();
context.setId(id);
context.setDataInfo(Arrays.asList(dataInfo));
return Arrays.asList(context);
}
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
return dataInfo(
deletedbyinference,
inferenceprovenance,
inferred,
false,
MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION,
trust);
}
private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) {
return null;
}
final String[] arr = s.split("@@@");
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
try {
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
} catch (final SQLException e) {
throw new RuntimeException("Invalid SQL array", e);
}
}
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
if (StringUtils.isBlank(s)) {
return null;
}
final String[] parts = s.split("###");
if (parts.length == 2) {
final String value = parts[0];
final String[] arr = parts[1].split("@@@");
if (arr.length == 4) {
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo);
}
}
return null;
}
private List<StructuredProperty> prepareListOfStructProps(
final Array array, final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (sp != null) {
res.add(sp);
}
}
}
return res;
}
private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
if (StringUtils.isNotBlank(sj)) {
final String[] arr = sj.split("@@@");
if (arr.length == 3) {
final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;
;
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;
;
if (issn != null || eissn != null || lissn != null) {
return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
}
}
}
return null;
}
@Override
public void close() throws IOException {
super.close();
dbClient.close();
}
implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient;
private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(MigrateDbEntitiesApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUser = parser.get("postgresUser");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
dbPassword)) {
if (processClaims) {
log.info("Processing claims...");
smdbe.execute("queryClaims.sql", smdbe::processClaims);
} else {
log.info("Processing datasources...");
smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
log.info("Processing projects...");
smdbe.execute("queryProjects.sql", smdbe::processProject);
log.info("Processing orgs...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
log.info("Processing relations ds <-> orgs ...");
smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
log.info("Processing projects <-> orgs ...");
smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
}
log.info("All done.");
}
}
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
super();
this.dbClient = null;
this.lastUpdateTimestamp = new Date().getTime();
}
public MigrateDbEntitiesApplication(
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
throws Exception {
super(hdfsPath);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime();
}
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
throws Exception {
final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
dbClient.processResults(sql, consumer);
}
public List<Oaf> processDatasource(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource();
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds
.setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
ds.setPid(new ArrayList<>());
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
ds.setDateoftransformation(null); // Value not returned by the SQL query
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
ds.setOaiprovenance(null); // Values not present in the DB
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setOfficialname(field(rs.getString("officialname"), info));
ds.setEnglishname(field(rs.getString("englishname"), info));
ds.setWebsiteurl(field(rs.getString("websiteurl"), info));
ds.setLogourl(field(rs.getString("logourl"), info));
ds.setContactemail(field(rs.getString("contactemail"), info));
ds.setNamespaceprefix(field(rs.getString("namespaceprefix"), info));
ds.setLatitude(field(Double.toString(rs.getDouble("latitude")), info));
ds.setLongitude(field(Double.toString(rs.getDouble("longitude")), info));
ds.setDateofvalidation(field(asString(rs.getDate("dateofvalidation")), info));
ds.setDescription(field(rs.getString("description"), info));
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setOdnumberofitems(field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
ds.setVersioning(field(rs.getBoolean("versioning"), info));
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
ds.setPidsystems(field(rs.getString("pidsystems"), info));
ds.setCertificates(field(rs.getString("certificates"), info));
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds
.setJournal(prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
ds.setDataInfo(info);
ds.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(ds);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processProject(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Project p = new Project();
p.setId(createOpenaireId(40, rs.getString("projectid"), true));
p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p
.setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
p.setPid(new ArrayList<>());
p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
p.setExtraInfo(new ArrayList<>()); // Values not present in the DB
p.setOaiprovenance(null); // Values not present in the DB
p.setWebsiteurl(field(rs.getString("websiteurl"), info));
p.setCode(field(rs.getString("code"), info));
p.setAcronym(field(rs.getString("acronym"), info));
p.setTitle(field(rs.getString("title"), info));
p.setStartdate(field(asString(rs.getDate("startdate")), info));
p.setEnddate(field(asString(rs.getDate("enddate")), info));
p.setCallidentifier(field(rs.getString("callidentifier"), info));
p.setKeywords(field(rs.getString("keywords"), info));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p
.setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setOptional1(field(rs.getString("optional1"), info));
p.setOptional2(field(rs.getString("optional2"), info));
p.setJsonextrainfo(field(rs.getString("jsonextrainfo"), info));
p.setContactfullname(field(rs.getString("contactfullname"), info));
p.setContactfax(field(rs.getString("contactfax"), info));
p.setContactphone(field(rs.getString("contactphone"), info));
p.setContactemail(field(rs.getString("contactemail"), info));
p.setSummary(field(rs.getString("summary"), info));
p.setCurrency(field(rs.getString("currency"), info));
p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setDataInfo(info);
p.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(p);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Organization o = new Organization();
o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o
.setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
o.setPid(new ArrayList<>());
o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
o.setExtraInfo(new ArrayList<>()); // Values not present in the DB
o.setOaiprovenance(null); // Values not present in the DB
o.setLegalshortname(field(rs.getString("legalshortname"), info));
o.setLegalname(field(rs.getString("legalname"), info));
o.setAlternativeNames(new ArrayList<>()); // Values not returned by the SQL query
o.setWebsiteurl(field(rs.getString("websiteurl"), info));
o.setLogourl(field(rs.getString("logourl"), info));
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o
.setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
o
.setEcinternationalorganizationeurinterests(field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o
.setEcinternationalorganization(field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info);
o.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(o);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType("datasourceOrganization");
r1.setSubRelType("provision");
r1.setRelClass("isProvidedBy");
r1.setSource(dsId);
r1.setTarget(orgId);
r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
final Relation r2 = new Relation();
r2.setRelType("datasourceOrganization");
r2.setSubRelType("provision");
r2.setRelClass("provides");
r2.setSource(orgId);
r2.setTarget(dsId);
r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processProjectOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType("projectOrganization");
r1.setSubRelType("participation");
r1.setRelClass("hasParticipant");
r1.setSource(projectId);
r1.setTarget(orgId);
r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
final Relation r2 = new Relation();
r2.setRelType("projectOrganization");
r2.setSubRelType("participation");
r2.setRelClass("isParticipant");
r2.setSource(orgId);
r2.setTarget(projectId);
r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
public List<Oaf> processClaims(final ResultSet rs) {
final DataInfo info =
dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9");
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try {
if (rs.getString("source_type").equals("context")) {
final Result r;
if (rs.getString("target_type").equals("dataset")) {
r = new Dataset();
r.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
} else if (rs.getString("target_type").equals("software")) {
r = new Software();
r.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
} else if (rs.getString("target_type").equals("other")) {
r = new OtherResearchProduct();
r.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
} else {
r = new Publication();
r.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
}
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), info));
r.setDataInfo(info);
r.setCollectedfrom(collectedFrom);
return Arrays.asList(r);
} else {
final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
final Relation r1 = new Relation();
final Relation r2 = new Relation();
if (rs.getString("source_type").equals("project")) {
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultProject");
r1.setSubRelType("outcome");
r1.setRelClass("produces");
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultProject");
r2.setSubRelType("outcome");
r2.setRelClass("isProducedBy");
} else {
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultResult");
r1.setSubRelType("relationship");
r1.setRelClass("isRelatedTo");
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultResult");
r2.setSubRelType("relationship");
r2.setRelClass("isRelatedTo");
}
r1.setSource(sourceId);
r1.setTarget(targetId);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
r2.setSource(targetId);
r2.setTarget(sourceId);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
}
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
final Context context = new Context();
context.setId(id);
context.setDataInfo(Arrays.asList(dataInfo));
return Arrays.asList(context);
}
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, trust);
}
private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) { return null; }
final String[] arr = s.split("@@@");
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
try {
return array != null ? listFields(info, (String[]) array.getArray()) : new ArrayList<>();
} catch (final SQLException e) {
throw new RuntimeException("Invalid SQL array", e);
}
}
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
if (StringUtils.isBlank(s)) { return null; }
final String[] parts = s.split("###");
if (parts.length == 2) {
final String value = parts[0];
final String[] arr = parts[1].split("@@@");
if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
}
return null;
}
private List<StructuredProperty> prepareListOfStructProps(
final Array array,
final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (sp != null) {
res.add(sp);
}
}
}
return res;
}
private Journal prepareJournal(final String name, final String sj, final DataInfo info) {
if (StringUtils.isNotBlank(sj)) {
final String[] arr = sj.split("@@@");
if (arr.length == 3) {
final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0].trim() : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1].trim() : null;;
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;;
if (issn != null || eissn != null || lissn != null) {
return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
}
}
}
return null;
}
@Override
public void close() throws IOException {
super.close();
dbClient.close();
}
}

View File

@ -14,7 +14,7 @@ public class DbClient implements Closeable {
private static final Log log = LogFactory.getLog(DbClient.class);
private Connection connection;
private final Connection connection;
public DbClient(final String address, final String login, final String password) {

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.graph.raw.common;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.List;
@ -141,7 +142,7 @@ public class PacePerson {
public String hash() {
return Hashing
.murmur3_128()
.hashString(getNormalisedFullname(), Charset.forName(UTF8))
.hashString(getNormalisedFullname(), StandardCharsets.UTF_8)
.toString();
}

View File

@ -25,7 +25,7 @@ public abstract class AbstractScholexplorerParser {
protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class);
static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE);
private List<String> datasetSubTypes = Arrays
private final List<String> datasetSubTypes = Arrays
.asList(
"dataset",
"software",

View File

@ -108,7 +108,7 @@ SELECT
ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
FROM dsm_datasources d

View File

@ -18,7 +18,7 @@ import eu.dnetlib.dhp.utils.DHPUtils;
public class CrossRefParserJSON {
private static List<ScholixCollectedFrom> collectedFrom = generateCrossrefCollectedFrom("complete");
private static final List<ScholixCollectedFrom> collectedFrom = generateCrossrefCollectedFrom("complete");
public static ScholixResource parseRecord(final String record) {
if (record == null)

View File

@ -16,7 +16,7 @@ public class DataciteClient {
private String host;
private String index = "datacite";
private String indexType = "dump";
private Datacite2Scholix d2s;
private final Datacite2Scholix d2s;
public DataciteClient(String host) {
this.host = host;

View File

@ -12,7 +12,7 @@ import eu.dnetlib.dhp.oa.provision.model.SortableRelation;
*/
public class RelationPartitioner extends Partitioner {
private int numPartitions;
private final int numPartitions;
public RelationPartitioner(int numPartitions) {
this.numPartitions = numPartitions;

View File

@ -46,7 +46,7 @@ public class StreamingInputDocumentFactory {
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final List<String> dateFormats = Arrays
.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
@ -61,15 +61,18 @@ public class StreamingInputDocumentFactory {
private static final int MAX_FIELD_LENGTH = 25000;
private ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal.withInitial(() -> XMLInputFactory.newInstance());
private final ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal
.withInitial(() -> XMLInputFactory.newInstance());
private ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal.withInitial(() -> XMLOutputFactory.newInstance());
private final ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal
.withInitial(() -> XMLOutputFactory.newInstance());
private ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal.withInitial(() -> XMLEventFactory.newInstance());
private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
.withInitial(() -> XMLEventFactory.newInstance());
private String version;
private final String version;
private String dsId;
private final String dsId;
private String resultName = DEFAULTDNETRESULT;

View File

@ -17,7 +17,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class TemplateFactory {
private TemplateResources resources;
private final TemplateResources resources;
private static final char DELIMITER = '$';

View File

@ -8,17 +8,17 @@ import com.google.common.io.Resources;
public class TemplateResources {
private String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
private final String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
private String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
private final String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
private String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
private final String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
private String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
private final String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
private String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
private final String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
private String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
private final String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
private static String read(final String classpathResource) throws IOException {
return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8);

View File

@ -48,13 +48,13 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class XmlRecordFactory implements Serializable {
public static final String REL_SUBTYPE_DEDUP = "dedup";
private Map<String, LongAccumulator> accumulators;
private final Map<String, LongAccumulator> accumulators;
private Set<String> specialDatasourceTypes;
private final Set<String> specialDatasourceTypes;
private ContextMapper contextMapper;
private final ContextMapper contextMapper;
private String schemaLocation;
private final String schemaLocation;
private boolean indent = false;

View File

@ -41,7 +41,7 @@ public class XmlSerializationUtils {
public static String mapStructuredProperty(String name, StructuredProperty t) {
return asXmlElement(
name, t.getValue(), t.getQualifier(), t.getDataInfo() != null ? t.getDataInfo() : null);
name, t.getValue(), t.getQualifier(), t.getDataInfo());
}
public static String mapQualifier(String name, Qualifier q) {

View File

@ -9,7 +9,7 @@ import org.junit.jupiter.api.BeforeEach;
public class GraphJoinerTest {
private ClassLoader cl = getClass().getClassLoader();
private final ClassLoader cl = getClass().getClassLoader();
private Path workingDir;
private Path inputDir;
private Path outputDir;

View File

@ -22,6 +22,7 @@
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>2.1.11</version>
<configuration>
<failOnNoGitDirectory>false</failOnNoGitDirectory>
</configuration>

View File

@ -3,13 +3,27 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp</artifactId>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.1.7-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-worfklow-profiles</artifactId>
<packaging>jar</packaging>
<!--
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
-->
</project>

View File

@ -13,6 +13,8 @@
<artifactId>dhp-workflows</artifactId>
<packaging>pom</packaging>
<description>This module is the container for the oozie workflow definitions in dnet-hadoop project</description>
<modules>
<module>dhp-worfklow-profiles</module>
<module>dhp-aggregation</module>
@ -184,7 +186,7 @@
<plugin>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version>
<version>${properties.maven.plugin.version}</version>
<dependencies>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
@ -564,60 +566,4 @@
</profile>
</profiles>
<build>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself. -->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.kuali.maven.plugins
</groupId>
<artifactId>
properties-maven-plugin
</artifactId>
<versionRange>
[1.3.2,)
</versionRange>
<goals>
<goal>
read-project-properties
</goal>
<goal>
write-project-properties
</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
<!-- copy-dependency plugin -->
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<versionRange>[1.0.0,)</versionRange>
<goals>
<goal>copy-dependencies</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

38
pom.xml
View File

@ -6,14 +6,14 @@
<version>1.1.7-SNAPSHOT</version>
<packaging>pom</packaging>
<url>http://www.d-net.research-infrastructures.eu</url>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<name>GNU Affero General Public License v3.0 or later</name>
<url>https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText</url>
<distribution>repo</distribution>
<comments>A business-friendly OSS license</comments>
<comments>This program is free software: you can redistribute it and/or modify it under the terms of the
GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.</comments>
</license>
</licenses>
@ -41,6 +41,8 @@
<tag>HEAD</tag>
</scm>
<description>This module is the root descriptor for the dnet-hadoop project</description>
<pluginRepositories>
</pluginRepositories>
@ -406,6 +408,18 @@
<testOutputDirectory>target/test-classes</testOutputDirectory>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
@ -449,9 +463,10 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<version>3.2.0</version>
<configuration>
<detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration>
</plugin>
<plugin>
@ -475,6 +490,14 @@
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
</plugin>
<plugin>
<groupId>net.revelc.code.formatter</groupId>
<artifactId>formatter-maven-plugin</artifactId>
@ -569,9 +592,9 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration>
<detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration>
</plugin>
</plugins>
@ -582,6 +605,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version>
<properties.maven.plugin.version>2.0.1</properties.maven.plugin.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>