master #11

Manually merged
claudio.atzori merged 275 commits from :master into enrichment_wfs 2020-05-11 15:14:56 +02:00
42 changed files with 783 additions and 819 deletions
Showing only changes of commit 3abb76ff7a - Show all commits

View File

@ -12,6 +12,8 @@
<artifactId>dhp-build-assembly-resources</artifactId>
<packaging>jar</packaging>
<description>This module contains a set of scripts supporting the build lifecycle for the dnet-hadoop project</description>
<build>
<plugins>
<plugin>

View File

@ -12,22 +12,29 @@
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<packaging>maven-plugin</packaging>
<description>This module is a maven plugin implementing custom properties substitutions in the build lifecycle</description>
<dependencies>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<version>2.0</version>
<version>3.6.3</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-project</artifactId>
<version>2.0</version>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-artifact</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version>
<version>${properties.maven.plugin.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
@ -73,44 +80,10 @@
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-plugin-plugin
</artifactId>
<versionRange>
[3.2,)
</versionRange>
<goals>
<goal>descriptor</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -40,7 +40,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files. Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
* http://site.kuali.org/maven/plugins/properties-maven-plugin/2.0.1/write-project-properties-mojo.html
*
* @author mhorst
* @goal write-project-properties

View File

@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach
public void clearSystemProperties() {

View File

@ -366,7 +366,7 @@ public class WritePredefinedProjectPropertiesTest {
}
private Properties getStoredProperties(File testFolder)
throws FileNotFoundException, IOException {
throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties;

View File

@ -11,6 +11,38 @@
<packaging>jar</packaging>
<description>This module contains resources supporting common code style conventions</description>
<distributionManagement>
<snapshotRepository>
<id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout>
</snapshotRepository>
<repository>
<id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
</repository>
</distributionManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

View File

@ -8,6 +8,9 @@
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>
<description>This module is a container for the build tools used in dnet-hadoop</description>
<modules>
<module>dhp-code-style</module>
<module>dhp-build-assembly-resources</module>

View File

@ -12,6 +12,8 @@
<artifactId>dhp-common</artifactId>
<packaging>jar</packaging>
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
<dependencies>
<dependency>

View File

@ -21,7 +21,7 @@ public class DHPUtils {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8"));
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");

View File

@ -17,7 +17,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
@Override
public String getName() {

View File

@ -21,7 +21,7 @@ public class MessageManager {
private Connection connection;
private Map<String, Channel> channels = new HashMap<>();
private final Map<String, Channel> channels = new HashMap<>();
private boolean durable;

View File

@ -12,7 +12,7 @@
<artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging>
<description>This module contains common schema classes meant to be used across the dnet-hadoop submodules</description>
<dependencies>

View File

@ -16,7 +16,7 @@ public class AtomicActionDeserializer extends JsonDeserializer {
@Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException {
throws IOException {
JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload");

View File

@ -10,7 +10,7 @@ import java.util.function.Function;
public class ModelSupport {
/** Defines the mapping between the actual entity type and the main entity type */
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
private static final Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static {
entityMapping.put(EntityType.publication, MainEntityType.result);
@ -52,7 +52,8 @@ public class ModelSupport {
private static final String schemeTemplate = "dnet:%s_%s_relations";
private ModelSupport() {}
private ModelSupport() {
}
/**
* Checks subclass-superclass relationship.
@ -142,7 +143,8 @@ public class ModelSupport {
}
public static String getScheme(final String sourceType, final String targetType) {
return String.format(
return String
.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name());
@ -159,29 +161,31 @@ public class ModelSupport {
private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t;
return Optional.ofNullable(r.getSource())
return Optional
.ofNullable(r.getSource())
.map(
source ->
Optional.ofNullable(r.getTarget())
source -> Optional
.ofNullable(r.getTarget())
.map(
target ->
Optional.ofNullable(r.getRelType())
target -> Optional
.ofNullable(r.getRelType())
.map(
relType ->
Optional.ofNullable(r.getSubRelType())
relType -> Optional
.ofNullable(r.getSubRelType())
.map(
subRelType ->
Optional.ofNullable(r.getRelClass())
subRelType -> Optional
.ofNullable(r.getRelClass())
.map(
relClass ->
String.join(
relClass -> String
.join(
source,
target,
relType,
subRelType,
relClass))
.orElse(
String.join(
String
.join(
source,
target,
relType,
@ -195,4 +199,5 @@ public class ModelSupport {
private static <T extends Oaf> String idFnForOafEntity(T t) {
return ((OafEntity) t).getId();
}
}

View File

@ -32,7 +32,7 @@ public class ISClient implements Serializable {
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private ISLookUpService isLookup;
private final ISLookUpService isLookup;
public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);

View File

@ -123,10 +123,10 @@ public class PromoteActionPayloadFunctions {
* @param <G> Type of graph table row
*/
public static class TableAggregator<G extends Oaf> extends Aggregator<G, G, G> {
private SerializableSupplier<G> zeroFn;
private SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn;
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private Class<G> rowClazz;
private final SerializableSupplier<G> zeroFn;
private final SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn;
private final SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private final Class<G> rowClazz;
public TableAggregator(
SerializableSupplier<G> zeroFn,

View File

@ -20,7 +20,7 @@ public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
private static CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
private static ArgumentApplicationParser argumentParser;

View File

@ -9,7 +9,7 @@ public class CollectorPluginErrorLogList extends LinkedList<String> {
@Override
public String toString() {
String log = new String();
String log = "";
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);

View File

@ -11,22 +11,22 @@ import java.util.regex.Pattern;
public class XmlCleaner {
/** Pattern for numeric entities. */
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
private static final Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
// //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
private static final Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
/**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] |
* [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
private static final Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>();
private static Map<String, String> badEntities = new HashMap<>();
private static final Set<String> goodEntities = new HashSet<>();
private static final Map<String, String> badEntities = new HashMap<>();
static {
// pre-defined XML entities

View File

@ -21,8 +21,8 @@ import eu.dnetlib.message.MessageManager;
public class DnetCollectorWorkerApplicationTests {
private ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
private MessageManager messageManager = mock(MessageManager.class);
private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
private final MessageManager messageManager = mock(MessageManager.class);
private DnetCollectorWorker worker;

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.dedup;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.text.Normalizer;
import java.util.*;
@ -73,7 +74,7 @@ public class DedupUtility {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8"));
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");

View File

@ -15,7 +15,7 @@ public class SparkReporter implements Serializable, Reporter {
private final List<Tuple2<String, String>> relations = new ArrayList<>();
private Map<String, LongAccumulator> accumulators;
private final Map<String, LongAccumulator> accumulators;
public SparkReporter(Map<String, LongAccumulator> accumulators) {
this.accumulators = accumulators;

View File

@ -106,7 +106,7 @@ public class DedupUtility {
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes("UTF-8"));
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) {
System.err.println("Error creating id");

View File

@ -410,14 +410,10 @@ public abstract class AbstractMdRecordToOafMapper {
final String identifier = n.valueOf("./*[local-name()='identifier']");
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
;
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
;
final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true");
final String datestamp = n.valueOf("./*[local-name()='datestamp']");
;
final String harvestDate = n.valueOf("@harvestDate");
;
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
}

View File

@ -60,11 +60,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
MigrateDbEntitiesApplication.class.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(MigrateDbEntitiesApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
parser.parseArgument(args);
@ -74,11 +73,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final String hdfsPath = parser.get("hdfsPath");
final boolean processClaims =
parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
try (final MigrateDbEntitiesApplication smdbe =
new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) {
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
dbPassword)) {
if (processClaims) {
log.info("Processing claims...");
smdbe.execute("queryClaims.sql", smdbe::processClaims);
@ -118,8 +116,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
throws Exception {
final String sql =
IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
@ -136,10 +133,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds.setCollectedfrom(
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
ds
.setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
ds.setPid(new ArrayList<>());
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
ds.setDateoftransformation(null); // Value not returned by the SQL query
@ -179,8 +174,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
ds.setPidsystems(field(rs.getString("pidsystems"), info));
ds.setCertificates(field(rs.getString("certificates"), info));
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
ds.setJournal(
prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
ds
.setJournal(prepareJournal(rs.getString("officialname"), rs.getString("journal"), info)); // Journal
ds.setDataInfo(info);
ds.setLastupdatetimestamp(lastUpdateTimestamp);
@ -199,10 +194,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
p.setId(createOpenaireId(40, rs.getString("projectid"), true));
p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p.setCollectedfrom(
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
p
.setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
p.setPid(new ArrayList<>());
p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
@ -218,8 +211,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
p.setKeywords(field(rs.getString("keywords"), info));
p.setDuration(field(Integer.toString(rs.getInt("duration")), info));
p.setEcsc39(field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setOamandatepublications(
field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p
.setOamandatepublications(field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setFundingtree(prepareListFields(rs.getArray("fundingtree"), info));
@ -255,10 +248,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o.setCollectedfrom(
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
o
.setCollectedfrom(listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")));
o.setPid(new ArrayList<>());
o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
@ -272,13 +263,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
o.setEclegalbody(field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
o.setEclegalperson(field(Boolean.toString(rs.getBoolean("eclegalperson")), info));
o.setEcnonprofit(field(Boolean.toString(rs.getBoolean("ecnonprofit")), info));
o.setEcresearchorganization(
field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o
.setEcresearchorganization(field(Boolean.toString(rs.getBoolean("ecresearchorganization")), info));
o.setEchighereducation(field(Boolean.toString(rs.getBoolean("echighereducation")), info));
o.setEcinternationalorganizationeurinterests(
field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o.setEcinternationalorganization(
field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o
.setEcinternationalorganizationeurinterests(field(Boolean.toString(rs.getBoolean("ecinternationalorganizationeurinterests")), info));
o
.setEcinternationalorganization(field(Boolean.toString(rs.getBoolean("ecinternationalorganization")), info));
o.setEcenterprise(field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
@ -297,10 +288,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
final List<KeyValue> collectedFrom =
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"));
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType("datasourceOrganization");
@ -333,15 +321,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom =
listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"));
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType("projectOrganization");
r1.setSubRelType("participation");
r1.setRelClass("isParticipant");
r1.setRelClass("hasParticipant");
r1.setSource(projectId);
r1.setTarget(orgId);
r1.setCollectedfrom(collectedFrom);
@ -351,7 +336,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final Relation r2 = new Relation();
r2.setRelType("projectOrganization");
r2.setSubRelType("participation");
r2.setRelClass("hasParticipant");
r2.setRelClass("isParticipant");
r2.setSource(orgId);
r2.setTarget(projectId);
r2.setCollectedfrom(collectedFrom);
@ -367,17 +352,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
public List<Oaf> processClaims(final ResultSet rs) {
final DataInfo info =
dataInfo(
false,
null,
false,
false,
qualifier(
"user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"),
"0.9");
dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9");
final List<KeyValue> collectedFrom =
listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
final List<KeyValue> collectedFrom = listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try {
@ -405,10 +382,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
return Arrays.asList(r);
} else {
final String sourceId =
createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
final String targetId =
createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
final Relation r1 = new Relation();
final Relation r2 = new Relation();
@ -465,19 +440,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
return dataInfo(
deletedbyinference,
inferenceprovenance,
inferred,
false,
MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION,
trust);
return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, MigrationConstants.ENTITYREGISTRY_PROVENANCE_ACTION, trust);
}
private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) {
return null;
}
if (StringUtils.isBlank(s)) { return null; }
final String[] arr = s.split("@@@");
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
@ -491,22 +458,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
}
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
if (StringUtils.isBlank(s)) {
return null;
}
if (StringUtils.isBlank(s)) { return null; }
final String[] parts = s.split("###");
if (parts.length == 2) {
final String value = parts[0];
final String[] arr = parts[1].split("@@@");
if (arr.length == 4) {
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo);
}
if (arr.length == 4) { return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
}
return null;
}
private List<StructuredProperty> prepareListOfStructProps(
final Array array, final DataInfo dataInfo) throws SQLException {
final Array array,
final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
@ -524,11 +488,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
if (StringUtils.isNotBlank(sj)) {
final String[] arr = sj.split("@@@");
if (arr.length == 3) {
final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0] : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1] : null;
;
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2] : null;
;
final String issn = StringUtils.isNotBlank(arr[0]) ? arr[0].trim() : null;
final String eissn = StringUtils.isNotBlank(arr[1]) ? arr[1].trim() : null;;
final String lissn = StringUtils.isNotBlank(arr[2]) ? arr[2].trim() : null;;
if (issn != null || eissn != null || lissn != null) {
return journal(name, issn, eissn, eissn, null, null, null, null, null, null, null, info);
}
@ -543,5 +505,4 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
dbClient.close();
}
}

View File

@ -14,7 +14,7 @@ public class DbClient implements Closeable {
private static final Log log = LogFactory.getLog(DbClient.class);
private Connection connection;
private final Connection connection;
public DbClient(final String address, final String login, final String password) {

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.graph.raw.common;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.List;
@ -141,7 +142,7 @@ public class PacePerson {
public String hash() {
return Hashing
.murmur3_128()
.hashString(getNormalisedFullname(), Charset.forName(UTF8))
.hashString(getNormalisedFullname(), StandardCharsets.UTF_8)
.toString();
}

View File

@ -25,7 +25,7 @@ public abstract class AbstractScholexplorerParser {
protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class);
static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE);
private List<String> datasetSubTypes = Arrays
private final List<String> datasetSubTypes = Arrays
.asList(
"dataset",
"software",

View File

@ -108,7 +108,7 @@ SELECT
ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
FROM dsm_datasources d

View File

@ -18,7 +18,7 @@ import eu.dnetlib.dhp.utils.DHPUtils;
public class CrossRefParserJSON {
private static List<ScholixCollectedFrom> collectedFrom = generateCrossrefCollectedFrom("complete");
private static final List<ScholixCollectedFrom> collectedFrom = generateCrossrefCollectedFrom("complete");
public static ScholixResource parseRecord(final String record) {
if (record == null)

View File

@ -16,7 +16,7 @@ public class DataciteClient {
private String host;
private String index = "datacite";
private String indexType = "dump";
private Datacite2Scholix d2s;
private final Datacite2Scholix d2s;
public DataciteClient(String host) {
this.host = host;

View File

@ -12,7 +12,7 @@ import eu.dnetlib.dhp.oa.provision.model.SortableRelation;
*/
public class RelationPartitioner extends Partitioner {
private int numPartitions;
private final int numPartitions;
public RelationPartitioner(int numPartitions) {
this.numPartitions = numPartitions;

View File

@ -46,7 +46,7 @@ public class StreamingInputDocumentFactory {
private static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier";
private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
private static final String outFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final List<String> dateFormats = Arrays
.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");
@ -61,15 +61,18 @@ public class StreamingInputDocumentFactory {
private static final int MAX_FIELD_LENGTH = 25000;
private ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal.withInitial(() -> XMLInputFactory.newInstance());
private final ThreadLocal<XMLInputFactory> inputFactory = ThreadLocal
.withInitial(() -> XMLInputFactory.newInstance());
private ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal.withInitial(() -> XMLOutputFactory.newInstance());
private final ThreadLocal<XMLOutputFactory> outputFactory = ThreadLocal
.withInitial(() -> XMLOutputFactory.newInstance());
private ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal.withInitial(() -> XMLEventFactory.newInstance());
private final ThreadLocal<XMLEventFactory> eventFactory = ThreadLocal
.withInitial(() -> XMLEventFactory.newInstance());
private String version;
private final String version;
private String dsId;
private final String dsId;
private String resultName = DEFAULTDNETRESULT;

View File

@ -17,7 +17,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class TemplateFactory {
private TemplateResources resources;
private final TemplateResources resources;
private static final char DELIMITER = '$';

View File

@ -8,17 +8,17 @@ import com.google.common.io.Resources;
public class TemplateResources {
private String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
private final String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
private String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
private final String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
private String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
private final String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
private String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
private final String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
private String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
private final String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
private String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
private final String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
private static String read(final String classpathResource) throws IOException {
return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8);

View File

@ -48,13 +48,13 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class XmlRecordFactory implements Serializable {
public static final String REL_SUBTYPE_DEDUP = "dedup";
private Map<String, LongAccumulator> accumulators;
private final Map<String, LongAccumulator> accumulators;
private Set<String> specialDatasourceTypes;
private final Set<String> specialDatasourceTypes;
private ContextMapper contextMapper;
private final ContextMapper contextMapper;
private String schemaLocation;
private final String schemaLocation;
private boolean indent = false;

View File

@ -41,7 +41,7 @@ public class XmlSerializationUtils {
public static String mapStructuredProperty(String name, StructuredProperty t) {
return asXmlElement(
name, t.getValue(), t.getQualifier(), t.getDataInfo() != null ? t.getDataInfo() : null);
name, t.getValue(), t.getQualifier(), t.getDataInfo());
}
public static String mapQualifier(String name, Qualifier q) {

View File

@ -9,7 +9,7 @@ import org.junit.jupiter.api.BeforeEach;
public class GraphJoinerTest {
private ClassLoader cl = getClass().getClassLoader();
private final ClassLoader cl = getClass().getClassLoader();
private Path workingDir;
private Path inputDir;
private Path outputDir;

View File

@ -22,6 +22,7 @@
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>2.1.11</version>
<configuration>
<failOnNoGitDirectory>false</failOnNoGitDirectory>
</configuration>

View File

@ -3,13 +3,27 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp</artifactId>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.1.7-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-worfklow-profiles</artifactId>
<packaging>jar</packaging>
<!--
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
-->
</project>

View File

@ -13,6 +13,8 @@
<artifactId>dhp-workflows</artifactId>
<packaging>pom</packaging>
<description>This module is the container for the oozie workflow definitions in dnet-hadoop project</description>
<modules>
<module>dhp-worfklow-profiles</module>
<module>dhp-aggregation</module>
@ -184,7 +186,7 @@
<plugin>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version>
<version>${properties.maven.plugin.version}</version>
<dependencies>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
@ -564,60 +566,4 @@
</profile>
</profiles>
<build>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself. -->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.kuali.maven.plugins
</groupId>
<artifactId>
properties-maven-plugin
</artifactId>
<versionRange>
[1.3.2,)
</versionRange>
<goals>
<goal>
read-project-properties
</goal>
<goal>
write-project-properties
</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
<!-- copy-dependency plugin -->
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<versionRange>[1.0.0,)</versionRange>
<goals>
<goal>copy-dependencies</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

38
pom.xml
View File

@ -6,14 +6,14 @@
<version>1.1.7-SNAPSHOT</version>
<packaging>pom</packaging>
<url>http://www.d-net.research-infrastructures.eu</url>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<name>GNU Affero General Public License v3.0 or later</name>
<url>https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText</url>
<distribution>repo</distribution>
<comments>A business-friendly OSS license</comments>
<comments>This program is free software: you can redistribute it and/or modify it under the terms of the
GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.</comments>
</license>
</licenses>
@ -41,6 +41,8 @@
<tag>HEAD</tag>
</scm>
<description>This module is the root descriptor for the dnet-hadoop project</description>
<pluginRepositories>
</pluginRepositories>
@ -406,6 +408,18 @@
<testOutputDirectory>target/test-classes</testOutputDirectory>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
@ -449,9 +463,10 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<version>3.2.0</version>
<configuration>
<detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration>
</plugin>
<plugin>
@ -475,6 +490,14 @@
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
</plugin>
<plugin>
<groupId>net.revelc.code.formatter</groupId>
<artifactId>formatter-maven-plugin</artifactId>
@ -569,9 +592,9 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration>
<detectLinks>true</detectLinks>
<doclint>none</doclint>
</configuration>
</plugin>
</plugins>
@ -582,6 +605,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version>
<properties.maven.plugin.version>2.0.1</properties.maven.plugin.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>