manually merging PR#149 D-Net/dnet-hadoop#149

remotes/1693145428336165040/master
Claudio Atzori 3 years ago
commit 83c90c7180

@ -8,8 +8,6 @@ import java.util.List;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
/** /**
* Generates oozie properties which were not provided from commandline. * Generates oozie properties which were not provided from commandline.
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
}; };
@Override @Override
public void execute() throws MojoExecutionException, MojoFailureException { public void execute() {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName( String generatedSandboxName = generateSandboxName(
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
/** /**
* Generates sandbox name from workflow source directory. * Generates sandbox name from workflow source directory.
* *
* @param wfSourceDir * @param wfSourceDir workflow source directory
* @return generated sandbox name * @return generated sandbox name
*/ */
private String generateSandboxName(String wfSourceDir) { private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters // utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>(); List<String> sandboxNameParts = new ArrayList<>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens); ArrayUtils.reverse(tokens);
if (tokens.length > 0) { if (tokens.length > 0) {
for (String token : tokens) { for (String token : tokens) {
for (String limiter : limiters) { for (String limiter : limiters) {
if (limiter.equals(token)) { if (limiter.equals(token)) {
return sandboxNameParts.size() > 0 return !sandboxNameParts.isEmpty()
? StringUtils.join(sandboxNameParts.toArray()) ? StringUtils.join(sandboxNameParts.toArray())
: null; : null;
} }
} }
if (sandboxNameParts.size() > 0) { if (!sandboxNameParts.isEmpty()) {
sandboxNameParts.add(0, File.separator); sandboxNameParts.add(0, File.separator);
} }
sandboxNameParts.add(0, token); sandboxNameParts.add(0, token);

@ -16,6 +16,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
*/ */
protected List<String> getEscapeChars(String escapeChars) { protected List<String> getEscapeChars(String escapeChars) {
List<String> tokens = getListFromCSV(escapeChars); List<String> tokens = getListFromCSV(escapeChars);
List<String> realTokens = new ArrayList<String>(); List<String> realTokens = new ArrayList<>();
for (String token : tokens) { for (String token : tokens) {
String realToken = getRealToken(token); String realToken = getRealToken(token);
realTokens.add(realToken); realTokens.add(realToken);
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* @return content * @return content
*/ */
protected String getContent(String comment, Properties properties, List<String> escapeTokens) { protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
List<String> names = new ArrayList<String>(properties.stringPropertyNames()); List<String> names = new ArrayList<>(properties.stringPropertyNames());
Collections.sort(names); Collections.sort(names);
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
if (!StringUtils.isBlank(comment)) { if (!StringUtils.isBlank(comment)) {
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
throws MojoExecutionException { throws MojoExecutionException {
try { try {
String content = getContent(comment, properties, escapeTokens); String content = getContent(comment, properties, escapeTokens);
FileUtils.writeStringToFile(file, content, ENCODING_UTF8); FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
} catch (IOException e) { } catch (IOException e) {
throw new MojoExecutionException("Error creating properties file", e); throw new MojoExecutionException("Error creating properties file", e);
} }
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
*/ */
protected static final List<String> getListFromCSV(String csv) { protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) { if (StringUtils.isBlank(csv)) {
return new ArrayList<String>(); return new ArrayList<>();
} }
List<String> list = new ArrayList<String>(); List<String> list = new ArrayList<>();
String[] tokens = StringUtils.split(csv, ","); String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) { for (String token : tokens) {
list.add(token.trim()); list.add(token.trim());

@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest { class GenerateOoziePropertiesMojoTest {
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach @BeforeEach
public void clearSystemProperties() { void clearSystemProperties() {
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
} }
@Test @Test
public void testExecuteEmpty() throws Exception { void testExecuteEmpty() throws Exception {
// execute // execute
mojo.execute(); mojo.execute();
@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteSandboxNameAlreadySet() throws Exception { void testExecuteSandboxNameAlreadySet() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
String sandboxName = "originalSandboxName"; String sandboxName = "originalSandboxName";
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteEmptyWorkflowSourceDir() throws Exception { void testExecuteEmptyWorkflowSourceDir() throws Exception {
// given // given
String workflowSourceDir = ""; String workflowSourceDir = "";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteNullSandboxNameGenerated() throws Exception { void testExecuteNullSandboxNameGenerated() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/"; String workflowSourceDir = "eu/dnetlib/dhp/";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecute() throws Exception { void testExecute() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteWithoutRoot() throws Exception { void testExecuteWithoutRoot() throws Exception {
// given // given
String workflowSourceDir = "wf/transformers"; String workflowSourceDir = "wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);

@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest { class WritePredefinedProjectPropertiesTest {
@Mock @Mock
private MavenProject mavenProject; private MavenProject mavenProject;
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
// ----------------------------------- TESTS --------------------------------------------- // ----------------------------------- TESTS ---------------------------------------------
@Test @Test
public void testExecuteEmpty() throws Exception { void testExecuteEmpty() throws Exception {
// execute // execute
mojo.execute(); mojo.execute();
@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectProperties() throws Exception { void testExecuteWithProjectProperties() throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test() @Test()
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromBlankLocation() { void testExecuteIncludingPropertyKeysFromBlankLocation() {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
// given // given
mojo.setQuiet(true); mojo.setQuiet(true);
mojo.setIncludePropertyKeysFromFiles(new String[] { mojo.setIncludePropertyKeysFromFiles(new String[] {
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidFile() { void testExecuteIncludingPropertyKeysFromInvalidFile() {
// given // given
mojo.setIncludePropertyKeysFromFiles(new String[] { mojo.setIncludePropertyKeysFromFiles(new String[] {
"invalid location" "invalid location"
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
// given // given
mojo.setIncludeEnvironmentVariables(true); mojo.setIncludeEnvironmentVariables(true);
@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
// given // given
String key = "systemPropertyKey"; String key = "systemPropertyKey";
String value = "systemPropertyValue"; String value = "systemPropertyValue";
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "systemPropertyKey "; String key = "systemPropertyKey ";

@ -117,6 +117,11 @@
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
</dependency>
</dependencies> </dependencies>
</project> </project>

@ -1,14 +0,0 @@
package eu.dnetlib.dhp.application;
import java.io.*;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import com.google.common.collect.Maps;
public class ApplicationUtils {
}

@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
final StringWriter stringWriter = new StringWriter(); final StringWriter stringWriter = new StringWriter();
IOUtils.copy(gis, stringWriter); IOUtils.copy(gis, stringWriter);
return stringWriter.toString(); return stringWriter.toString();
} catch (Throwable e) { } catch (IOException e) {
log.error("Wrong value to decompress:" + abstractCompressed); log.error("Wrong value to decompress: {}", abstractCompressed);
throw new RuntimeException(e); throw new IllegalArgumentException(e);
} }
} }
public static String compressArgument(final String value) throws Exception { public static String compressArgument(final String value) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out); GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes()); gzip.write(value.getBytes());

@ -9,9 +9,6 @@ public class OptionsParameter {
private boolean paramRequired; private boolean paramRequired;
private boolean compressed; private boolean compressed;
public OptionsParameter() {
}
public String getParamName() { public String getParamName() {
return paramName; return paramName;
} }

@ -34,7 +34,7 @@ public class ApiDescriptor {
return params; return params;
} }
public void setParams(final HashMap<String, String> params) { public void setParams(final Map<String, String> params) {
this.params = params; this.params = params;
} }

@ -12,6 +12,9 @@ public class Constants {
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
private Constants() {
}
static { static {
accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("OPEN", "c_abf2");
accessRightsCoarMap.put("RESTRICTED", "c_16ec"); accessRightsCoarMap.put("RESTRICTED", "c_16ec");
@ -49,4 +52,10 @@ public class Constants {
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
} }

@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable {
.setDocumentationUrl( .setDocumentationUrl(
value value
.stream() .stream()
.map(v -> v.getValue()) .map(Field::getValue)
.collect(Collectors.toList()))); .collect(Collectors.toList())));
Optional Optional
@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable {
.setContactgroup( .setContactgroup(
Optional Optional
.ofNullable(ir.getContactgroup()) .ofNullable(ir.getContactgroup())
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out out
.setContactperson( .setContactperson(
Optional Optional
.ofNullable(ir.getContactperson()) .ofNullable(ir.getContactperson())
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out out
.setTool( .setTool(
Optional Optional
.ofNullable(ir.getTool()) .ofNullable(ir.getTool())
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable {
Optional Optional
.ofNullable(input.getAuthor()) .ofNullable(input.getAuthor())
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); .ifPresent(
ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList())));
// I do not map Access Right UNKNOWN or OTHER // I do not map Access Right UNKNOWN or OTHER
@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable {
if (oInst.isPresent()) { if (oInst.isPresent()) {
out out
.setInstance( .setInstance(
oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList()));
} }
@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setMaintitle(iTitle.get(0).getValue()); out.setMaintitle(iTitle.get(0).getValue());
} }
@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setSubtitle(iTitle.get(0).getValue()); out.setSubtitle(iTitle.get(0).getValue());
} }

@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable {
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true); fileSystem.delete(hdfsWritePath, true);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
} }
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
throws IOException { throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true); fileSystem.delete(hdfsWritePath, true);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
fileSystem.create(hdfsWritePath).getWrappedStream())) {
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); RemoteIterator<LocatedFileStatus> iterator = fileSystem
.listFiles(
new Path(inputPath), true);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem while (iterator.hasNext()) {
.listFiles( writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
new Path(inputPath), true); }
while (fileStatusListIterator.hasNext()) {
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
} }
ar.close();
} }
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,

@ -10,8 +10,6 @@ import java.util.Optional;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient; import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI; import com.mongodb.MongoClientURI;
import com.mongodb.QueryBuilder; import com.mongodb.QueryBuilder;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase; import com.mongodb.client.MongoDatabase;
@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable {
final String currentId = Optional final String currentId = Optional
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
.map(r -> r.first()) .map(FindIterable::first)
.map(d -> d.getString("currentId")) .map(d -> d.getString("currentId"))
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable {
if (!Iterables.contains(client.listDatabaseNames(), dbName)) { if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
log.warn(err); log.warn(err);
throw new RuntimeException(err); throw new IllegalArgumentException(err);
} }
return client.getDatabase(dbName); return client.getDatabase(dbName);
} }
@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable {
String.format("Missing collection '%s' in database '%s'", collName, db.getName())); String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
log.warn(err); log.warn(err);
if (abortIfMissing) { if (abortIfMissing) {
throw new RuntimeException(err); throw new IllegalArgumentException(err);
} else { } else {
return null; return null;
} }

@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
*/ */
public class PacePerson { public class PacePerson {
private static final String UTF8 = "UTF-8";
private List<String> name = Lists.newArrayList(); private List<String> name = Lists.newArrayList();
private List<String> surname = Lists.newArrayList(); private List<String> surname = Lists.newArrayList();
private List<String> fullname = Lists.newArrayList(); private List<String> fullname = Lists.newArrayList();

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.aggregation.common; package eu.dnetlib.dhp.common.aggregation;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
@ -11,8 +11,6 @@ import java.util.Objects;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap<String, String> implements C
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
private MessageSender messageSender; private transient MessageSender messageSender;
public AggregatorReport() { public AggregatorReport() {
} }
public AggregatorReport(MessageSender messageSender) throws IOException { public AggregatorReport(MessageSender messageSender) {
this.messageSender = messageSender; this.messageSender = messageSender;
} }

@ -5,6 +5,9 @@ import java.io.*;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
this.deposition_id = deposition_id; this.deposition_id = deposition_id;
} }
public ZenodoAPIClient(String urlString, String access_token) throws IOException { public ZenodoAPIClient(String urlString, String access_token) {
this.urlString = urlString; this.urlString = urlString;
this.access_token = access_token; this.access_token = access_token;
@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString) .url(urlString)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body) .post(body)
.build(); .build();
@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(bucket + "/" + file_name) .url(bucket + "/" + file_name)
.addHeader("Content-Type", "application/zip") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.build(); .build();
@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString + "/" + deposition_id) .url(urlString + "/" + deposition_id)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(body) .put(body)
.build(); .build();
@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion") .url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body) .post(body)
.build(); .build();
@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString) .url(urlString)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get() .get()
.build(); .build();
@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(url) .url(url)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get() .get()
.build(); .build();

@ -32,13 +32,13 @@ public class Creator {
public static Creator newInstance(String name, String affiliation, String orcid) { public static Creator newInstance(String name, String affiliation, String orcid) {
Creator c = new Creator(); Creator c = new Creator();
if (!(name == null)) { if (name != null) {
c.name = name; c.name = name;
} }
if (!(affiliation == null)) { if (affiliation != null) {
c.affiliation = affiliation; c.affiliation = affiliation;
} }
if (!(orcid == null)) { if (orcid != null) {
c.orcid = orcid; c.orcid = orcid;
} }

@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable; import java.io.Serializable;
import net.minidev.json.annotate.JsonIgnore;
public class File implements Serializable { public class File implements Serializable {
private String checksum; private String checksum;
private String filename; private String filename;
private long filesize; private long filesize;
private String id; private String id;
@JsonIgnore
// private Links links;
public String getChecksum() { public String getChecksum() {
return checksum; return checksum;
} }
@ -46,13 +41,4 @@ public class File implements Serializable {
this.id = id; this.id = id;
} }
// @JsonIgnore
// public Links getLinks() {
// return links;
// }
//
// @JsonIgnore
// public void setLinks(Links links) {
// this.links = links;
// }
} }

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.common.collection;
public class CollectorException extends Exception { public class CollectorException extends Exception {

@ -0,0 +1,56 @@
package eu.dnetlib.dhp.common.collection;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.opencsv.bean.CsvToBeanBuilder;
public class GetCSV {
public static final char DEFAULT_DELIMITER = ',';
private GetCSV() {
}
public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
String modelClass) throws IOException, ClassNotFoundException {
getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER);
}
public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath,
String modelClass, char delimiter) throws IOException, ClassNotFoundException {
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) {
final ObjectMapper mapper = new ObjectMapper();
@SuppressWarnings("unchecked")
final List lines = new CsvToBeanBuilder(reader)
.withType(Class.forName(modelClass))
.withSeparator(delimiter)
.build()
.parse();
for (Object line : lines) {
writer.write(mapper.writeValueAsString(line));
writer.newLine();
}
}
}
}

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.common.collection;
/** /**
* Bundles the http connection parameters driving the client behaviour. * Bundles the http connection parameters driving the client behaviour.

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.common.collection;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;
@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.common.Constants;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
/** /**
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
* *
* @author jochen, michele, andrea, alessia, claudio * @author jochen, michele, andrea, alessia, claudio, andreas
*/ */
public class HttpConnector2 { public class HttpConnector2 {
@ -32,7 +33,7 @@ public class HttpConnector2 {
private String responseType = null; private String responseType = null;
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
public HttpConnector2() { public HttpConnector2() {
this(new HttpClientParams()); this(new HttpClientParams());
@ -112,6 +113,17 @@ public class HttpConnector2 {
} }
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) {
if (retryAfter > 0) {
backoffAndSleep(retryAfter);
} else {
backoffAndSleep(1000);
}
}
if (is2xx(urlConn.getResponseCode())) { if (is2xx(urlConn.getResponseCode())) {
input = urlConn.getInputStream(); input = urlConn.getInputStream();
responseType = urlConn.getContentType(); responseType = urlConn.getContentType();
@ -120,7 +132,7 @@ public class HttpConnector2 {
if (is3xx(urlConn.getResponseCode())) { if (is3xx(urlConn.getResponseCode())) {
// REDIRECTS // REDIRECTS
final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.info(String.format("The requested url has been moved to %s", newUrl)); log.info("The requested url has been moved to {}", newUrl);
report report
.put( .put(
REPORT_PREFIX + urlConn.getResponseCode(), REPORT_PREFIX + urlConn.getResponseCode(),
@ -140,14 +152,14 @@ public class HttpConnector2 {
if (retryAfter > 0) { if (retryAfter > 0) {
log log
.warn( .warn(
requestUrl + " - waiting and repeating request after suggested retry-after " "{} - waiting and repeating request after suggested retry-after {} sec.",
+ retryAfter + " sec."); requestUrl, retryAfter);
backoffAndSleep(retryAfter * 1000); backoffAndSleep(retryAfter * 1000);
} else { } else {
log log
.warn( .warn(
requestUrl + " - waiting and repeating request after default delay of " "{} - waiting and repeating request after default delay of {} sec.",
+ getClientParams().getRetryDelay() + " sec."); requestUrl, getClientParams().getRetryDelay());
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
} }
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
@ -181,12 +193,12 @@ public class HttpConnector2 {
} }
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage()); log.debug("StatusCode: {}", urlConn.getResponseMessage());
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) { for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) { if (e.getKey() != null) {
for (String v : e.getValue()) { for (String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v); log.debug(" key: {} - value: {}", e.getKey(), v);
} }
} }
} }
@ -204,7 +216,7 @@ public class HttpConnector2 {
private int obtainRetryAfter(final Map<String, List<String>> headerMap) { private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (String key : headerMap.keySet()) { for (String key : headerMap.keySet()) {
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) { && NumberUtils.isCreatable(headerMap.get(key).get(0))) {
return Integer.parseInt(headerMap.get(key).get(0)) + 10; return Integer.parseInt(headerMap.get(key).get(0)) + 10;
} }

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.common.rest; package eu.dnetlib.dhp.common.rest;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.HttpUriRequest;
@ -23,17 +23,20 @@ public class DNetRestClient {
private static final ObjectMapper mapper = new ObjectMapper(); private static final ObjectMapper mapper = new ObjectMapper();
private DNetRestClient() {
}
public static <T> T doGET(final String url, Class<T> clazz) throws Exception { public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);
return doHTTPRequest(httpGet, clazz); return doHTTPRequest(httpGet, clazz);
} }
public static String doGET(final String url) throws Exception { public static String doGET(final String url) throws IOException {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);
return doHTTPRequest(httpGet); return doHTTPRequest(httpGet);
} }
public static <V> String doPOST(final String url, V objParam) throws Exception { public static <V> String doPOST(final String url, V objParam) throws IOException {
final HttpPost httpPost = new HttpPost(url); final HttpPost httpPost = new HttpPost(url);
if (objParam != null) { if (objParam != null) {
@ -45,25 +48,25 @@ public class DNetRestClient {
return doHTTPRequest(httpPost); return doHTTPRequest(httpPost);
} }
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception { public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
return mapper.readValue(doPOST(url, objParam), clazz); return mapper.readValue(doPOST(url, objParam), clazz);
} }
private static String doHTTPRequest(final HttpUriRequest r) throws Exception { private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
CloseableHttpClient client = HttpClients.createDefault(); try (CloseableHttpClient client = HttpClients.createDefault()) {
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
log log
.info( .info(
"request headers: {}", "request headers: {}",
Arrays Arrays
.asList(r.getAllHeaders()) .asList(r.getAllHeaders())
.stream() .stream()
.map(h -> h.getName() + ":" + h.getValue()) .map(h -> h.getName() + ":" + h.getValue())
.collect(Collectors.joining(","))); .collect(Collectors.joining(",")));
CloseableHttpResponse response = client.execute(r); return IOUtils.toString(client.execute(r).getEntity().getContent());
return IOUtils.toString(response.getEntity().getContent()); }
} }
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception { private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {

@ -46,7 +46,7 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTerm(final String id) { public VocabularyTerm getTerm(final String id) {
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
} }
protected void addTerm(final String id, final String name) { protected void addTerm(final String id, final String name) {
@ -81,7 +81,6 @@ public class Vocabulary implements Serializable {
.ofNullable(getTermBySynonym(syn)) .ofNullable(getTermBySynonym(syn))
.map(term -> getTermAsQualifier(term.getId())) .map(term -> getTermAsQualifier(term.getId()))
.orElse(null); .orElse(null);
// .orElse(OafMapperUtils.unknown(getId(), getName()));
} }
} }

@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable {
} }
vocs.addTerm(vocId, termId, termName); vocs.addTerm(vocId, termId, termName);
// vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable {
final String syn = arr[2].trim(); final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn); vocs.addSynonyms(vocId, termId, syn);
// vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable {
.getTerms() .getTerms()
.values() .values()
.stream() .stream()
.map(t -> t.getId()) .map(VocabularyTerm::getId)
.collect(Collectors.toCollection(HashSet::new)); .collect(Collectors.toCollection(HashSet::new));
} }
@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable {
return Optional return Optional
.ofNullable(vocId) .ofNullable(vocId)
.map(String::toLowerCase) .map(String::toLowerCase)
.map(id -> vocs.containsKey(id)) .map(vocs::containsKey)
.orElse(false); .orElse(false);
} }
private void addSynonyms(final String vocId, final String termId, final String syn) { private void addSynonyms(final String vocId, final String termId, final String syn) {
String id = Optional String id = Optional
.ofNullable(vocId) .ofNullable(vocId)
.map(s -> s.toLowerCase()) .map(String::toLowerCase)
.orElseThrow( .orElseThrow(
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); () -> new IllegalArgumentException(
String
.format(
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
Optional Optional
.ofNullable(vocs.get(id)) .ofNullable(vocs.get(id))
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.message; package eu.dnetlib.dhp.message;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
@ -10,8 +9,8 @@ public class Message implements Serializable {
private static final long serialVersionUID = 401753881204524893L; private static final long serialVersionUID = 401753881204524893L;
public static String CURRENT_PARAM = "current"; public static final String CURRENT_PARAM = "current";
public static String TOTAL_PARAM = "total"; public static final String TOTAL_PARAM = "total";
private MessageType messageType; private MessageType messageType;

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -19,6 +18,9 @@ public class AuthorMerger {
private static final Double THRESHOLD = 0.95; private static final Double THRESHOLD = 0.95;
private AuthorMerger() {
}
public static List<Author> merge(List<List<Author>> authors) { public static List<Author> merge(List<List<Author>> authors) {
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
@ -36,7 +38,8 @@ public class AuthorMerger {
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) { public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a); int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b); int pb = countAuthorsPids(b);
List<Author> base, enrich; List<Author> base;
List<Author> enrich;
int sa = authorsSize(a); int sa = authorsSize(a);
int sb = authorsSize(b); int sb = authorsSize(b);
@ -62,7 +65,7 @@ public class AuthorMerger {
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list) // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base final Map<String, Author> basePidAuthorMap = base
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
@ -74,7 +77,7 @@ public class AuthorMerger {
// <pid, Author> (list of pid that are missing in the other list) // <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
@ -117,9 +120,9 @@ public class AuthorMerger {
} }
public static String pidToComparableString(StructuredProperty pid) { public static String pidToComparableString(StructuredProperty pid) {
return (pid.getQualifier() != null final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" : "";
: "") return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
} }

@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
/** Created by sandro on 9/29/16. */ /** Created by sandro on 9/29/16. */
public class VtdUtilityParser { public class VtdUtilityParser {
private VtdUtilityParser() {
}
public static List<Node> getTextValuesWithAttributes( public static List<Node> getTextValuesWithAttributes(
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes) final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
throws VtdException { throws VtdException {

@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
r r
.getAuthor() .getAuthor()
.stream() .stream()
.filter(a -> Objects.nonNull(a)) .filter(Objects::nonNull)
.filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname()))
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
.collect(Collectors.toList())); .collect(Collectors.toList()));

@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class OafMapperUtils { public class OafMapperUtils {
private OafMapperUtils() {
}
public static Oaf merge(final Oaf left, final Oaf right) { public static Oaf merge(final Oaf left, final Oaf right) {
if (ModelSupport.isSubClass(left, OafEntity.class)) { if (ModelSupport.isSubClass(left, OafEntity.class)) {
return mergeEntities((OafEntity) left, (OafEntity) right); return mergeEntities((OafEntity) left, (OafEntity) right);
} else if (ModelSupport.isSubClass(left, Relation.class)) { } else if (ModelSupport.isSubClass(left, Relation.class)) {
((Relation) left).mergeFrom((Relation) right); ((Relation) left).mergeFrom((Relation) right);
} else { } else {
throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
} }
return left; return left;
} }
@ -38,7 +41,7 @@ public class OafMapperUtils {
} else if (ModelSupport.isSubClass(left, Project.class)) { } else if (ModelSupport.isSubClass(left, Project.class)) {
left.mergeFrom(right); left.mergeFrom(right);
} else { } else {
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
} }
return left; return left;
} }
@ -62,7 +65,7 @@ public class OafMapperUtils {
public static List<KeyValue> listKeyValues(final String... s) { public static List<KeyValue> listKeyValues(final String... s) {
if (s.length % 2 > 0) { if (s.length % 2 > 0) {
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
} }
final List<KeyValue> list = new ArrayList<>(); final List<KeyValue> list = new ArrayList<>();
@ -88,7 +91,7 @@ public class OafMapperUtils {
.stream(values) .stream(values)
.map(v -> field(v, info)) .map(v -> field(v, info))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue())) .filter(distinctByKey(Field::getValue))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -97,7 +100,7 @@ public class OafMapperUtils {
.stream() .stream()
.map(v -> field(v, info)) .map(v -> field(v, info))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue())) .filter(distinctByKey(Field::getValue))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -342,10 +345,10 @@ public class OafMapperUtils {
if (instanceList != null) { if (instanceList != null) {
final Optional<AccessRight> min = instanceList final Optional<AccessRight> min = instanceList
.stream() .stream()
.map(i -> i.getAccessright()) .map(Instance::getAccessright)
.min(new AccessRightComparator<>()); .min(new AccessRightComparator<>());
final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
if (StringUtils.isBlank(rights.getClassid())) { if (StringUtils.isBlank(rights.getClassid())) {
rights.setClassid(UNKNOWN); rights.setClassid(UNKNOWN);

@ -34,6 +34,9 @@ public class DHPUtils {
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
private DHPUtils() {
}
public static Seq<String> toSeq(List<String> list) { public static Seq<String> toSeq(List<String> list) {
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
} }
@ -44,7 +47,7 @@ public class DHPUtils {
md.update(s.getBytes(StandardCharsets.UTF_8)); md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest())); return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) { } catch (final Exception e) {
System.err.println("Error creating id"); log.error("Error creating id from {}", s);
return null; return null;
} }
} }
@ -53,33 +56,6 @@ public class DHPUtils {
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
} }
public static String compressString(final String input) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
Base64OutputStream b64os = new Base64OutputStream(out)) {
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
gzip.write(input.getBytes(StandardCharsets.UTF_8));
gzip.close();
return out.toString();
} catch (Throwable e) {
return null;
}
}
public static String decompressString(final String input) {
byte[] byteArray = Base64.decodeBase64(input.getBytes());
int len;
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
byte[] buffer = new byte[1024];
while ((len = gis.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
return bos.toString();
} catch (Exception e) {
return null;
}
}
public static String getJPathString(final String jsonPath, final String json) { public static String getJPathString(final String jsonPath, final String json) {
try { try {
Object o = JsonPath.read(json, jsonPath); Object o = JsonPath.read(json, jsonPath);

@ -18,13 +18,16 @@ public class ISLookupClientFactory {
private static final int requestTimeout = 60000 * 10; private static final int requestTimeout = 60000 * 10;
private static final int connectTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10;
private ISLookupClientFactory() {
}
public static ISLookUpService getLookUpService(final String isLookupUrl) { public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl); return getServiceStub(ISLookUpService.class, isLookupUrl);
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) { private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); log.info("creating {} stub from {}", clazz.getName(), endpoint);
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setServiceClass(clazz);
jaxWsProxyFactory.setAddress(endpoint); jaxWsProxyFactory.setAddress(endpoint);
@ -38,12 +41,10 @@ public class ISLookupClientFactory {
log log
.info( .info(
String "setting connectTimeout to {}, requestTimeout to {} for service {}",
.format( connectTimeout,
"setting connectTimeout to %s, requestTimeout to %s for service %s", requestTimeout,
connectTimeout, clazz.getCanonicalName());
requestTimeout,
clazz.getCanonicalName()));
policy.setConnectionTimeout(connectTimeout); policy.setConnectionTimeout(connectTimeout);
policy.setReceiveTimeout(requestTimeout); policy.setReceiveTimeout(requestTimeout);

@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
public abstract String getName(); public abstract String getName();

@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {
@Override @Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) { if (arguments == null || arguments.length == 0) {
return new StringValue(""); return new StringValue("");
} }
final Item item = arguments[0].head(); final Item item = arguments[0].head();
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
for (String format : dateFormats) { for (String format : dateFormats) {
try { try {
c.setTime(new SimpleDateFormat(format).parse(s)); c.setTime(new SimpleDateFormat(format).parse(s));
String year = String.valueOf(c.get(Calendar.YEAR)); return String.valueOf(c.get(Calendar.YEAR));
return year;
} catch (ParseException e) { } catch (ParseException e) {
} }
} }

@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override @Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) { if (arguments == null || arguments.length == 0) {
return new StringValue(BLANK); return new StringValue(BLANK);
} }
String s = arguments[0].head().getStringValue(); String s = arguments[0].head().getStringValue();

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.utils.saxon; package eu.dnetlib.dhp.utils.saxon;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.XPathContext;
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
final String s1 = getValue(arguments[0]); final String s1 = getValue(arguments[0]);
final String s2 = getValue(arguments[1]); final String s2 = getValue(arguments[1]);
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
return new StringValue(value);
} }
private String getValue(final Sequence arg) throws XPathException { private String getValue(final Sequence arg) throws XPathException {

@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;
public class SaxonTransformerFactory { public class SaxonTransformerFactory {
private SaxonTransformerFactory() {
}
/** /**
* Creates the index record transformer from the given XSLT * Creates the index record transformer from the given XSLT
* *

@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class ArgumentApplicationParserTest { class ArgumentApplicationParserTest {
@Test @Test
public void testParseParameter() throws Exception { void testParseParameter() throws Exception {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString( .toString(
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));

@ -21,13 +21,13 @@ public class HdfsSupportTest {
class Remove { class Remove {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
} }
@Test @Test
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
// when // when
HdfsSupport.remove(tempDir.toString(), new Configuration()); HdfsSupport.remove(tempDir.toString(), new Configuration());
@ -36,7 +36,7 @@ public class HdfsSupportTest {
} }
@Test @Test
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
// given // given
Path file = Files.createTempFile(tempDir, "p", "s"); Path file = Files.createTempFile(tempDir, "p", "s");
@ -52,13 +52,13 @@ public class HdfsSupportTest {
class ListFiles { class ListFiles {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
} }
@Test @Test
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me");

@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class PacePersonTest { class PacePersonTest {
@Test @Test
public void pacePersonTest1() { void pacePersonTest1() {
PacePerson p = new PacePerson("Artini, Michele", false); PacePerson p = new PacePerson("Artini, Michele", false);
assertEquals("Artini", p.getSurnameString()); assertEquals("Artini", p.getSurnameString());
@ -17,7 +17,7 @@ public class PacePersonTest {
} }
@Test @Test
public void pacePersonTest2() { void pacePersonTest2() {
PacePerson p = new PacePerson("Michele G. Artini", false); PacePerson p = new PacePerson("Michele G. Artini", false);
assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Artini, Michele G.", p.getNormalisedFullname());
assertEquals("Michele G", p.getNameString()); assertEquals("Michele G", p.getNameString());

@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
class RunWithSparkSession { class RunWithSparkSession {
@Test @Test
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() @SuppressWarnings("unchecked")
void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
throws Exception { throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
} }
@Test @Test
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() @SuppressWarnings("unchecked")
void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
throws Exception { throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);

@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@Disabled @Disabled
public class ZenodoAPIClientTest { class ZenodoAPIClientTest {
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
private final String ACCESS_TOKEN = ""; private final String ACCESS_TOKEN = "";
@ -22,7 +22,7 @@ public class ZenodoAPIClientTest {
private final String depositionId = "674915"; private final String depositionId = "674915";
@Test @Test
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
@ -44,7 +44,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewDeposition() throws IOException { void testNewDeposition() throws IOException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
@ -67,7 +67,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewVersionNewName() throws IOException, MissingConceptDoiException { void testNewVersionNewName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
@ -87,7 +87,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewVersionOldName() throws IOException, MissingConceptDoiException { void testNewVersionOldName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);

@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2; import scala.Tuple2;
public class AuthorMergerTest { class AuthorMergerTest {
private String publicationsBasePath; private String publicationsBasePath;
@ -43,7 +43,7 @@ public class AuthorMergerTest {
} }
@Test @Test
public void mergeTest() { // used in the dedup: threshold set to 0.95 void mergeTest() { // used in the dedup: threshold set to 0.95
for (List<Author> authors1 : authors) { for (List<Author> authors1 : authors) {
System.out.println("List " + (authors.indexOf(authors1) + 1)); System.out.println("List " + (authors.indexOf(authors1) + 1));

@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import me.xuender.unidecode.Unidecode; import me.xuender.unidecode.Unidecode;
public class OafMapperUtilsTest { class OafMapperUtilsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@ -42,7 +42,7 @@ public class OafMapperUtilsTest {
} }
@Test @Test
public void testDateValidation() { void testDateValidation() {
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
@ -147,44 +147,46 @@ public class OafMapperUtilsTest {
} }
@Test @Test
public void testDate() { void testDate() {
System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
assertNotNull(date);
System.out.println(date);
} }
@Test @Test
public void testMergePubs() throws IOException { void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class); Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class);
assertEquals(p1.getCollectedfrom().size(), 1); assertEquals(1, p1.getCollectedfrom().size());
assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(d2.getCollectedfrom().size(), 1); assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertTrue( assertEquals(
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
OafMapperUtils OafMapperUtils
.mergeResults(p1, d2) .mergeResults(p1, d2)
.getResulttype() .getResulttype()
.getClassid() .getClassid());
.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
assertEquals(p2.getCollectedfrom().size(), 1); assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(d1.getCollectedfrom().size(), 1); assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertTrue( assertEquals(
ModelConstants.DATASET_RESULTTYPE_CLASSID,
OafMapperUtils OafMapperUtils
.mergeResults(p2, d1) .mergeResults(p2, d1)
.getResulttype() .getResulttype()
.getClassid() .getClassid());
.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
} }
protected HashSet<String> cfId(List<KeyValue> collectedfrom) { protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
} }
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException { protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {

@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class RelationMapperTest { class RelationMapperTest {
@Test @Test
public void testLoadRels() throws Exception { void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load(); RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println); relationMapper.keySet().forEach(System.out::println);

@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.*; import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Triple; import org.apache.commons.lang3.tuple.Triple;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.actionmanager.rmi.ActionManagerException; import eu.dnetlib.actionmanager.rmi.ActionManagerException;
import eu.dnetlib.actionmanager.set.ActionManagerSet;
import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes;
import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class ISClient implements Serializable { public class ISClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); private static final Logger log = LoggerFactory.getLogger(ISClient.class);
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private final ISLookUpService isLookup; private final transient ISLookUpService isLookup;
public ISClient(String isLookupUrl) { public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
@ -63,7 +60,7 @@ public class ISClient implements Serializable {
.map( .map(
sets -> sets sets -> sets
.stream() .stream()
.map(set -> parseSetInfo(set)) .map(ISClient::parseSetInfo)
.filter(t -> ids.contains(t.getLeft())) .filter(t -> ids.contains(t.getLeft()))
.map(t -> buildDirectory(basePath, t)) .map(t -> buildDirectory(basePath, t))
.collect(Collectors.toList())) .collect(Collectors.toList()))
@ -73,15 +70,17 @@ public class ISClient implements Serializable {
} }
} }
private Triple<String, String, String> parseSetInfo(String set) { private static Triple<String, String, String> parseSetInfo(String set) {
try { try {
Document doc = new SAXReader().read(new StringReader(set)); final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
Document doc = reader.read(new StringReader(set));
return Triple return Triple
.of( .of(
doc.valueOf("//SET/@id"), doc.valueOf("//SET/@id"),
doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@directory"),
doc.valueOf("//SET/@latest")); doc.valueOf("//SET/@latest"));
} catch (DocumentException e) { } catch (DocumentException | SAXException e) {
throw new IllegalStateException(e); throw new IllegalStateException(e);
} }
} }
@ -99,7 +98,7 @@ public class ISClient implements Serializable {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + propertyName
+ "']/@value/string()"; + "']/@value/string()";
log.debug("quering for service property: " + q); log.debug("quering for service property: {}", q);
try { try {
final List<String> value = isLookup.quickSearchProfile(q); final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value); return Iterables.getOnlyElement(value);

@ -62,6 +62,7 @@ public class MergeAndGet {
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
@SuppressWarnings("unchecked")
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) { private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {

@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob {
.orElse(true); .orElse(true);
logger.info("shouldGroupById: {}", shouldGroupById); logger.info("shouldGroupById: {}", shouldGroupById);
@SuppressWarnings("unchecked")
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName); Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
@SuppressWarnings("unchecked")
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName); Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob {
return spark return spark
.read() .read()
.parquet(path) .parquet(path)
.map((MapFunction<Row, String>) value -> extractPayload(value), Encoders.STRING()) .map((MapFunction<Row, String>) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING())
.map( .map(
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value), (MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
Encoders.bean(actionPayloadClazz)); Encoders.bean(actionPayloadClazz));

@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest {
private ISClient isClient; private ISClient isClient;
@Test @Test
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
// given // given
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
Path outputDir = workingDir.resolve("output"); Path outputDir = workingDir.resolve("output");

@ -20,7 +20,7 @@ public class MergeAndGetTest {
class MergeFromAndGetStrategy { class MergeFromAndGetStrategy {
@Test @Test
public void shouldThrowForOafAndOaf() { void shouldThrowForOafAndOaf() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -33,7 +33,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafAndRelation() { void shouldThrowForOafAndRelation() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -46,7 +46,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafAndOafEntity() { void shouldThrowForOafAndOafEntity() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -59,7 +59,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOaf() { void shouldThrowForRelationAndOaf() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -72,7 +72,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -85,7 +85,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldBehaveProperlyForRelationAndRelation() { void shouldBehaveProperlyForRelationAndRelation() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -101,7 +101,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndOaf() { void shouldThrowForOafEntityAndOaf() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -114,7 +114,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -127,7 +127,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given // given
class OafEntitySub1 extends OafEntity { class OafEntitySub1 extends OafEntity {
} }
@ -145,7 +145,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldBehaveProperlyForOafEntityAndOafEntity() { void shouldBehaveProperlyForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -165,7 +165,7 @@ public class MergeAndGetTest {
class SelectNewerAndGetStrategy { class SelectNewerAndGetStrategy {
@Test @Test
public void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -178,7 +178,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -191,7 +191,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndResult() { void shouldThrowForOafEntityAndResult() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Result b = mock(Result.class); Result b = mock(Result.class);
@ -204,7 +204,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
// given // given
// real types must be used because subclass-superclass resolution does not work for // real types must be used because subclass-superclass resolution does not work for
// mocks // mocks
@ -221,7 +221,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldShouldReturnLeftForOafEntityAndOafEntity() { void shouldShouldReturnLeftForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
when(a.getLastupdatetimestamp()).thenReturn(1L); when(a.getLastupdatetimestamp()).thenReturn(1L);
@ -238,7 +238,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldShouldReturnRightForOafEntityAndOafEntity() { void shouldShouldReturnRightForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
when(a.getLastupdatetimestamp()).thenReturn(2L); when(a.getLastupdatetimestamp()).thenReturn(2L);

@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
class Main { class Main {
@Test @Test
public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
// given // given
Class<Relation> rowClazz = Relation.class; Class<Relation> rowClazz = Relation.class;
Class<OafEntity> actionPayloadClazz = OafEntity.class; Class<OafEntity> actionPayloadClazz = OafEntity.class;
@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable( void shouldPromoteActionPayloadForGraphTable(
MergeAndGet.Strategy strategy, MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz, Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) Class<? extends Oaf> actionPayloadClazz)

@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest {
class JoinTableWithActionPayloadAndMerge { class JoinTableWithActionPayloadAndMerge {
@Test @Test
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
// given // given
class OafImpl extends Oaf { class OafImpl extends Oaf {
} }
@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest {
} }
@Test @Test
public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
// given // given
String id0 = "id0"; String id0 = "id0";
String id1 = "id1"; String id1 = "id1";
@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest {
} }
@Test @Test
public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
// given // given
String id0 = "id0"; String id0 = "id0";
String id1 = "id1"; String id1 = "id1";
@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest {
class GroupTableByIdAndMerge { class GroupTableByIdAndMerge {
@Test @Test
public void shouldRunProperly() { void shouldRunProperly() {
// given // given
String id1 = "id1"; String id1 = "id1";
String id2 = "id2"; String id2 = "id2";

@ -84,14 +84,6 @@
<artifactId>json</artifactId> <artifactId>json</artifactId>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.8</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency> <dependency>
<groupId>org.apache.poi</groupId> <groupId>org.apache.poi</groupId>

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class CollectAndSave implements Serializable { public class CollectAndSave implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static <I extends Result> void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
CollectAndSave.class Objects
.getResourceAsStream( .requireNonNull(
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); CollectAndSave.class
.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);

@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath, private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
String bipScorePath, Class<I> inputClazz) { String bipScorePath, Class<I> inputClazz) {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
.textFile(bipScorePath) .textFile(bipScorePath)
@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
return bs; return bs;
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
System.out.println(bipScores.count());
Dataset<I> results = readPath(spark, inputPath, inputClazz); Dataset<I> results = readPath(spark, inputPath, inputClazz);
results.createOrReplaceTempView("result"); results.createOrReplaceTempView("result");
@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
ret.setId(value._2().getId()); ret.setId(value._2().getId());
return ret; return ret;
}, Encoders.bean(BipScore.class)) }, Encoders.bean(BipScore.class))
.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING()) .groupByKey((MapFunction<BipScore, String>) BipScore::getId, Encoders.STRING())
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> { .mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
Result ret = new Result(); Result ret = new Result();
ret.setDataInfo(getDataInfo()); ret.setDataInfo(getDataInfo());

@ -8,14 +8,15 @@ import org.apache.http.impl.client.{HttpClientBuilder, HttpClients}
import java.io.IOException import java.io.IOException
abstract class AbstractRestClient extends Iterator[String]{
abstract class AbstractRestClient extends Iterator[String] {
var buffer: List[String] = List() var buffer: List[String] = List()
var current_index:Int = 0 var current_index: Int = 0
var scroll_value: Option[String] = None var scroll_value: Option[String] = None
var complete:Boolean = false var complete: Boolean = false
def extractInfo(input: String): Unit def extractInfo(input: String): Unit
@ -23,13 +24,13 @@ abstract class AbstractRestClient extends Iterator[String]{
protected def getBufferData(): Unit protected def getBufferData(): Unit
def doHTTPGETRequest(url:String): String = { def doHTTPGETRequest(url: String): String = {
val httpGet = new HttpGet(url) val httpGet = new HttpGet(url)
doHTTPRequest(httpGet) doHTTPRequest(httpGet)
} }
def doHTTPPOSTRequest(url:String, json:String): String = { def doHTTPPOSTRequest(url: String, json: String): String = {
val httpPost = new HttpPost(url) val httpPost = new HttpPost(url)
if (json != null) { if (json != null) {
val entity = new StringEntity(json) val entity = new StringEntity(json)
@ -46,7 +47,7 @@ abstract class AbstractRestClient extends Iterator[String]{
override def next(): String = { override def next(): String = {
val next_item:String = buffer(current_index) val next_item: String = buffer(current_index)
current_index = current_index + 1 current_index = current_index + 1
if (current_index == buffer.size) if (current_index == buffer.size)
getBufferData() getBufferData()
@ -54,17 +55,16 @@ abstract class AbstractRestClient extends Iterator[String]{
} }
private def doHTTPRequest[A <: HttpUriRequest](r: A): String = {
private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={
val timeout = 60; // seconds val timeout = 60; // seconds
val config = RequestConfig.custom() val config = RequestConfig.custom()
.setConnectTimeout(timeout * 1000) .setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000).build() .setSocketTimeout(timeout * 1000).build()
val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build()
var tries = 4 try {
while (tries > 0) { var tries = 4
while (tries > 0) {
println(s"requesting ${r.getURI}") println(s"requesting ${r.getURI}")
try { try {
val response = client.execute(r) val response = client.execute(r)
@ -78,10 +78,15 @@ abstract class AbstractRestClient extends Iterator[String]{
case e: Throwable => case e: Throwable =>
println(s"Error on requesting ${r.getURI}") println(s"Error on requesting ${r.getURI}")
e.printStackTrace() e.printStackTrace()
tries-=1 tries -= 1
} }
} }
"" ""
} } finally {
if (client != null)
client.close()
}
}
getBufferData() getBufferData()
} }

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.actionmanager.datacite
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.LocalFileSystem
import org.apache.hadoop.hdfs.DistributedFileSystem
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
import org.apache.spark.sql.functions.max
import org.slf4j.{Logger, LoggerFactory}
import java.text.SimpleDateFormat
import java.util.{Date, Locale}
import scala.io.Source
object SparkDownloadUpdateDatacite {
val log: Logger = LoggerFactory.getLogger(getClass)
def main(args: Array[String]): Unit = {
val conf = new SparkConf
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
parser.parseArgument(args)
val master = parser.get("master")
val sourcePath = parser.get("sourcePath")
val workingPath = parser.get("workingPath")
val hdfsuri = parser.get("namenode")
log.info(s"namenode is $hdfsuri")
val spark: SparkSession = SparkSession.builder().config(conf)
.appName(getClass.getSimpleName)
.master(master)
.getOrCreate()
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result]
import spark.implicits._
val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0)
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
val string_to_date =ISO8601FORMAT.parse(maxDate)
val ts = string_to_date.getTime
}
}

@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2; import scala.Tuple2;
@ -171,26 +171,23 @@ public class PrepareProgramme {
} }
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
if (!a.getLanguage().equals("en")) { if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) {
if (b.getLanguage().equalsIgnoreCase("en")) { a.setTitle(b.getTitle());
a.setTitle(b.getTitle()); a.setLanguage(b.getLanguage());
a.setLanguage(b.getLanguage());
}
} }
if (StringUtils.isEmpty(a.getShortTitle())) { if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) {
if (!StringUtils.isEmpty(b.getShortTitle())) { a.setShortTitle(b.getShortTitle());
a.setShortTitle(b.getShortTitle());
}
} }
return a; return a;
} }
@SuppressWarnings("unchecked")
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) { private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
Object[] codedescription = h2020Programmes Object[] codedescription = h2020Programmes
.map( .map(
value -> new Tuple2<>(value.getCode(), value -> new Tuple2<>(value.getCode(),
new Tuple2<String, String>(value.getTitle(), value.getShortTitle()))) new Tuple2<>(value.getTitle(), value.getShortTitle())))
.collect() .collect()
.toArray(); .toArray();
@ -216,7 +213,7 @@ public class PrepareProgramme {
String[] tmp = ent.split("\\."); String[] tmp = ent.split("\\.");
if (tmp.length <= 2) { if (tmp.length <= 2) {
if (StringUtils.isEmpty(entry._2()._2())) { if (StringUtils.isEmpty(entry._2()._2())) {
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1())); map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1()));
} else { } else {
map.put(entry._1(), entry._2()); map.put(entry._1(), entry._2());
} }

@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2; import scala.Tuple2;
@ -29,7 +29,7 @@ import scala.Tuple2;
*/ */
public class PrepareProjects { public class PrepareProjects {
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {

@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient;
*/ */
public class ReadProjectsFromDB implements Closeable { public class ReadProjectsFromDB implements Closeable {
private final DbClient dbClient;
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
private static final String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
private final DbClient dbClient;
private final Configuration conf; private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final static String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable {
} }
} }
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) throws Exception { public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) {
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(this::writeProject);
dbClient.processResults(sql, consumer); dbClient.processResults(sql, consumer);
} }
@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable {
public ReadProjectsFromDB( public ReadProjectsFromDB(
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
throws Exception { throws IOException {
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.conf = new Configuration(); this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); this.conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
} }
@Override @Override

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
@ -22,15 +21,16 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic; import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Classification;
import eu.dnetlib.dhp.schema.oaf.H2020Programme; import eu.dnetlib.dhp.schema.oaf.H2020Programme;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;
@ -47,13 +47,10 @@ import scala.Tuple2;
* *
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
* than one programme. * than one programme.
*
*
*/ */
public class SparkAtomicActionJob { public class SparkAtomicActionJob {
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final HashMap<String, String> programmeMap = new HashMap<>();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -137,7 +134,6 @@ public class SparkAtomicActionJob {
h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setClassification(csvProgramme.getClassification());
h2020classification.setH2020Programme(pm); h2020classification.setH2020Programme(pm);
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
pp.setH2020classification(Arrays.asList(h2020classification)); pp.setH2020classification(Arrays.asList(h2020classification));
return pp; return pp;
@ -152,20 +148,16 @@ public class SparkAtomicActionJob {
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> { .map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
Optional<EXCELTopic> op = Optional.ofNullable(p._2()); Optional<EXCELTopic> op = Optional.ofNullable(p._2());
Project rp = p._1(); Project rp = p._1();
if (op.isPresent()) { op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle()));
rp.setH2020topicdescription(op.get().getTitle());
}
return rp; return rp;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.groupByKey( .groupByKey(
(MapFunction<Project, String>) p -> p.getId(), (MapFunction<Project, String>) OafEntity::getId,
Encoders.STRING()) Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> { .mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
Project first = it.next(); Project first = it.next();
it.forEachRemaining(p -> { it.forEachRemaining(first::mergeFrom);
first.mergeFrom(p);
});
return first; return first;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.toJavaRDD() .toJavaRDD()
@ -189,12 +181,6 @@ public class SparkAtomicActionJob {
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
} }
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
// String[] tmp = classification.split(" \\| ");
//
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
// }
public static <R> Dataset<R> readPath( public static <R> Dataset<R> readPath(
SparkSession spark, String inputPath, Class<R> clazz) { SparkSession spark, String inputPath, Class<R> clazz) {
return spark return spark

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang.reflect.FieldUtils;
/**
* Reads a generic csv and maps it into classes that mirror its schema
*/
public class CSVParser {
public <R> List<R> parse(String csvFile, String classForName)
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
final CSVFormat format = CSVFormat.EXCEL
.withHeader()
.withDelimiter(';')
.withQuote('"')
.withTrim();
List<R> ret = new ArrayList<>();
final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format);
final Set<String> headers = parser.getHeaderMap().keySet();
Class<?> clazz = Class.forName(classForName);
for (CSVRecord csvRecord : parser.getRecords()) {
final Object cc = clazz.newInstance();
for (String header : headers) {
FieldUtils.writeField(cc, header, csvRecord.get(header), true);
}
ret.add((R) cc);
}
return ret;
}
}

@ -1,200 +0,0 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.Serializable;
/**
* the mmodel for the projects csv file
*/
public class CSVProject implements Serializable {
private String rcn;
private String id;
private String acronym;
private String status;
private String programme;
private String topics;
private String frameworkProgramme;
private String title;
private String startDate;
private String endDate;
private String projectUrl;
private String objective;
private String totalCost;
private String ecMaxContribution;
private String call;
private String fundingScheme;
private String coordinator;
private String coordinatorCountry;
private String participants;
private String participantCountries;
private String subjects;
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getProgramme() {
return programme;
}
public void setProgramme(String programme) {
this.programme = programme;
}
public String getTopics() {
return topics;
}
public void setTopics(String topics) {
this.topics = topics;
}
public String getFrameworkProgramme() {
return frameworkProgramme;
}
public void setFrameworkProgramme(String frameworkProgramme) {
this.frameworkProgramme = frameworkProgramme;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getProjectUrl() {
return projectUrl;
}
public void setProjectUrl(String projectUrl) {
this.projectUrl = projectUrl;
}
public String getObjective() {
return objective;
}
public void setObjective(String objective) {
this.objective = objective;
}
public String getTotalCost() {
return totalCost;
}
public void setTotalCost(String totalCost) {
this.totalCost = totalCost;
}
public String getEcMaxContribution() {
return ecMaxContribution;
}
public void setEcMaxContribution(String ecMaxContribution) {
this.ecMaxContribution = ecMaxContribution;
}
public String getCall() {
return call;
}
public void setCall(String call) {
this.call = call;
}
public String getFundingScheme() {
return fundingScheme;
}
public void setFundingScheme(String fundingScheme) {
this.fundingScheme = fundingScheme;
}
public String getCoordinator() {
return coordinator;
}
public void setCoordinator(String coordinator) {
this.coordinator = coordinator;
}
public String getCoordinatorCountry() {
return coordinatorCountry;
}
public void setCoordinatorCountry(String coordinatorCountry) {
this.coordinatorCountry = coordinatorCountry;
}
public String getParticipants() {
return participants;
}
public void setParticipants(String participants) {
this.participants = participants;
}
public String getParticipantCountries() {
return participantCountries;
}
public void setParticipantCountries(String participantCountries) {
this.participantCountries = participantCountries;
}
public String getSubjects() {
return subjects;
}
public void setSubjects(String subjects) {
this.subjects = subjects;
}
}

@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
/** /**
* Reads a generic excel file and maps it into classes that mirror its schema * Reads a generic excel file and maps it into classes that mirror its schema
*/ */
@ -26,52 +28,52 @@ public class EXCELParser {
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
InvalidFormatException { InvalidFormatException {
OPCPackage pkg = OPCPackage.open(file); try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) {
XSSFWorkbook wb = new XSSFWorkbook(pkg);
XSSFSheet sheet = wb.getSheet(sheetName); XSSFSheet sheet = wb.getSheet(sheetName);
if (sheetName == null) { if (sheet == null) {
throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file");
} }
List<R> ret = new ArrayList<>(); List<R> ret = new ArrayList<>();
DataFormatter dataFormatter = new DataFormatter(); DataFormatter dataFormatter = new DataFormatter();
Iterator<Row> rowIterator = sheet.rowIterator(); Iterator<Row> rowIterator = sheet.rowIterator();
List<String> headers = new ArrayList<>(); List<String> headers = new ArrayList<>();
int count = 0; int count = 0;
while (rowIterator.hasNext()) { while (rowIterator.hasNext()) {
Row row = rowIterator.next(); Row row = rowIterator.next();
if (count == 0) { if (count == 0) {
Iterator<Cell> cellIterator = row.cellIterator(); Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) { while (cellIterator.hasNext()) {
Cell cell = cellIterator.next(); Cell cell = cellIterator.next();
headers.add(dataFormatter.formatCellValue(cell)); headers.add(dataFormatter.formatCellValue(cell));
} }
} else { } else {
Class<?> clazz = Class.forName(classForName); Class<?> clazz = Class.forName(classForName);
final Object cc = clazz.newInstance(); final Object cc = clazz.newInstance();
for (int i = 0; i < headers.size(); i++) { for (int i = 0; i < headers.size(); i++) {
Cell cell = row.getCell(i); Cell cell = row.getCell(i);
FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true);
} }
EXCELTopic et = (EXCELTopic) cc;
if (StringUtils.isNotBlank(et.getRcn())) {
ret.add((R) cc);
}
EXCELTopic et = (EXCELTopic) cc;
if (StringUtils.isNotBlank(et.getRcn())) {
ret.add((R) cc);
} }
count += 1;
} }
count += 1; return ret;
} }
return ret;
} }
} }

@ -1,34 +1,21 @@
package eu.dnetlib.dhp.actionmanager.project.utils; package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.BufferedWriter; import java.io.*;
import java.io.Closeable; import java.util.Optional;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.GetCSV;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
/** /**
* Applies the parsing of a csv file and writes the Serialization of it in hdfs * Applies the parsing of a csv file and writes the Serialization of it in hdfs
*/ */
public class ReadCSV implements Closeable { public class ReadCSV {
private static final Log log = LogFactory.getLog(ReadCSV.class);
private final Configuration conf;
private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final String csvFile;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -44,56 +31,22 @@ public class ReadCSV implements Closeable {
final String hdfsPath = parser.get("hdfsPath"); final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode"); final String hdfsNameNode = parser.get("hdfsNameNode");
final String classForName = parser.get("classForName"); final String classForName = parser.get("classForName");
Optional<String> delimiter = Optional.ofNullable(parser.get("delimiter"));
char del = ';';
if (delimiter.isPresent())
del = delimiter.get().charAt(0);
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
log.info("Getting CSV file...");
readCSV.execute(classForName);
} FileSystem fileSystem = FileSystem.get(conf);
} BufferedReader reader = new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)));
public void execute(final String classForName) throws Exception { GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);
CSVParser csvParser = new CSVParser();
csvParser
.parse(csvFile, classForName)
.stream()
.forEach(p -> write(p));
}
@Override reader.close();
public void close() throws IOException {
writer.close();
}
public ReadCSV(
final String hdfsPath,
final String hdfsNameNode,
final String fileURL)
throws Exception {
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector2 httpConnector = new HttpConnector2();
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
this.csvFile = httpConnector.getInputSource(fileURL);
}
protected void write(final Object p) {
try {
writer.write(OBJECT_MAPPER.writeValueAsString(p));
writer.newLine();
} catch (final Exception e) {
throw new RuntimeException(e);
}
} }
} }

@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
/** /**
* Applies the parsing of an excel file and writes the Serialization of it in hdfs * Applies the parsing of an excel file and writes the Serialization of it in hdfs
*/ */
public class ReadExcel implements Closeable { public class ReadExcel implements Closeable {
private static final Log log = LogFactory.getLog(ReadCSV.class); private static final Log log = LogFactory.getLog(ReadExcel.class);
private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final InputStream excelFile; private final InputStream excelFile;
@ -31,7 +33,7 @@ public class ReadExcel implements Closeable {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(
ReadCSV.class ReadExcel.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); "/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
@ -51,13 +53,15 @@ public class ReadExcel implements Closeable {
} }
} }
public void execute(final String classForName, final String sheetName) throws Exception { public void execute(final String classForName, final String sheetName)
throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException,
InstantiationException {
EXCELParser excelParser = new EXCELParser(); EXCELParser excelParser = new EXCELParser();
excelParser excelParser
.parse(excelFile, classForName, sheetName) .parse(excelFile, classForName, sheetName)
.stream() .stream()
.forEach(p -> write(p)); .forEach(this::write);
} }
@Override @Override
@ -68,20 +72,20 @@ public class ReadExcel implements Closeable {
public ReadExcel( public ReadExcel(
final String hdfsPath, final String hdfsPath,
final String hdfsNameNode, final String hdfsNameNode,
final String fileURL) final String fileURL) throws CollectorException, IOException {
throws Exception {
this.conf = new Configuration(); final Configuration conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector2 httpConnector = new HttpConnector2(); HttpConnector2 httpConnector = new HttpConnector2();
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
this.excelFile = httpConnector.getInputSourceAsStream(fileURL); this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
} }

@ -1,20 +1,32 @@
package eu.dnetlib.dhp.actionmanager.project.utils; package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable; import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
import com.opencsv.bean.CsvIgnore;
/** /**
* The model for the programme csv file * The model for the programme csv file
*/ */
public class CSVProgramme implements Serializable { public class CSVProgramme implements Serializable {
private String rcn; @CsvBindByName(column = "code")
private String code; private String code;
@CsvBindByName(column = "title")
private String title; private String title;
@CsvBindByName(column = "shortTitle")
private String shortTitle; private String shortTitle;
@CsvBindByName(column = "language")
private String language; private String language;
@CsvIgnore
private String classification; private String classification;
@CsvIgnore
private String classification_short; private String classification_short;
public String getClassification_short() { public String getClassification_short() {
@ -33,14 +45,6 @@ public class CSVProgramme implements Serializable {
this.classification = classification; this.classification = classification;
} }
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getCode() { public String getCode() {
return code; return code;
} }
@ -73,5 +77,4 @@ public class CSVProgramme implements Serializable {
this.language = language; this.language = language;
} }
//
} }

@ -0,0 +1,46 @@
package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
/**
* the mmodel for the projects csv file
*/
public class CSVProject implements Serializable {
@CsvBindByName(column = "id")
private String id;
@CsvBindByName(column = "programme")
private String programme;
@CsvBindByName(column = "topics")
private String topics;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getProgramme() {
return programme;
}
public void setProgramme(String programme) {
this.programme = programme;
}
public String getTopics() {
return topics;
}
public void setTopics(String topics) {
this.topics = topics;
}
}

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.actionmanager.project.utils; package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable; import java.io.Serializable;

@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -74,7 +75,7 @@ public class GenerateRorActionSetJob {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString( .toString(
SparkAtomicActionJob.class GenerateRorActionSetJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -108,7 +109,7 @@ public class GenerateRorActionSetJob {
private static void processRorOrganizations(final SparkSession spark, private static void processRorOrganizations(final SparkSession spark,
final String inputPath, final String inputPath,
final String outputPath) throws Exception { final String outputPath) throws IOException {
readInputPath(spark, inputPath) readInputPath(spark, inputPath)
.map( .map(
@ -203,7 +204,7 @@ public class GenerateRorActionSetJob {
private static Dataset<RorOrganization> readInputPath( private static Dataset<RorOrganization> readInputPath(
final SparkSession spark, final SparkSession spark,
final String path) throws Exception { final String path) throws IOException {
try (final FileSystem fileSystem = FileSystem.get(new Configuration()); try (final FileSystem fileSystem = FileSystem.get(new Configuration());
final InputStream is = fileSystem.open(new Path(path))) { final InputStream is = fileSystem.open(new Path(path))) {

@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Address implements Serializable { public class Address implements Serializable {
private static final long serialVersionUID = 2444635485253443195L;
@JsonProperty("lat") @JsonProperty("lat")
private Float lat; private Float lat;
@ -37,8 +39,6 @@ public class Address implements Serializable {
@JsonProperty("line") @JsonProperty("line")
private String line; private String line;
private final static long serialVersionUID = 2444635485253443195L;
public Float getLat() { public Float getLat() {
return lat; return lat;
} }

@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Country implements Serializable { public class Country implements Serializable {
private static final long serialVersionUID = 4357848706229493627L;
@JsonProperty("country_code") @JsonProperty("country_code")
private String countryCode; private String countryCode;
@JsonProperty("country_name") @JsonProperty("country_name")
private String countryName; private String countryName;
private final static long serialVersionUID = 4357848706229493627L;
public String getCountryCode() { public String getCountryCode() {
return countryCode; return countryCode;
} }

@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable {
private String preferred; private String preferred;
private final static long serialVersionUID = 2616688352998387611L; private static final long serialVersionUID = 2616688352998387611L;
public ExternalIdType() { public ExternalIdType() {
} }

@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode;
public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> { public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> {
@Override @Override
public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException {
throws IOException, JsonProcessingException {
final ObjectCodec oc = p.getCodec(); final ObjectCodec oc = p.getCodec();
final JsonNode node = oc.readTree(p); final JsonNode node = oc.readTree(p);

@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable {
@JsonProperty("code") @JsonProperty("code")
private String code; private String code;
private final static long serialVersionUID = 7294958526269195673L; private static final long serialVersionUID = 7294958526269195673L;
public String getAsciiName() { public String getAsciiName() {
return asciiName; return asciiName;

@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable {
@JsonProperty("license") @JsonProperty("license")
private License license; private License license;
private final static long serialVersionUID = -8389480201526252955L; private static final long serialVersionUID = -8389480201526252955L;
public NameAndCode getNutsLevel2() { public NameAndCode getNutsLevel2() {
return nutsLevel2; return nutsLevel2;

@ -13,7 +13,7 @@ public class Label implements Serializable {
@JsonProperty("label") @JsonProperty("label")
private String label; private String label;
private final static long serialVersionUID = -6576156103297850809L; private static final long serialVersionUID = -6576156103297850809L;
public String getIso639() { public String getIso639() {
return iso639; return iso639;

@ -13,7 +13,7 @@ public class License implements Serializable {
@JsonProperty("license") @JsonProperty("license")
private String license; private String license;
private final static long serialVersionUID = -194308261058176439L; private static final long serialVersionUID = -194308261058176439L;
public String getAttribution() { public String getAttribution() {
return attribution; return attribution;

@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class NameAndCode implements Serializable { public class NameAndCode implements Serializable {
private static final long serialVersionUID = 5459836979206140843L;
@JsonProperty("name") @JsonProperty("name")
private String name; private String name;
@JsonProperty("code") @JsonProperty("code")
private String code; private String code;
private final static long serialVersionUID = 5459836979206140843L;
public String getName() { public String getName() {
return name; return name;
} }

@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Relationship implements Serializable { public class Relationship implements Serializable {
private static final long serialVersionUID = 7847399503395576960L;
@JsonProperty("type") @JsonProperty("type")
private String type; private String type;
@ -16,8 +18,6 @@ public class Relationship implements Serializable {
@JsonProperty("label") @JsonProperty("label")
private String label; private String label;
private final static long serialVersionUID = 7847399503395576960L;
public String getType() { public String getType() {
return type; return type;
} }

@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class RorOrganization implements Serializable { public class RorOrganization implements Serializable {
private static final long serialVersionUID = -2658312087616043225L;
@JsonProperty("ip_addresses") @JsonProperty("ip_addresses")
private List<String> ipAddresses = new ArrayList<>(); private List<String> ipAddresses = new ArrayList<>();
@ -59,8 +61,6 @@ public class RorOrganization implements Serializable {
@JsonProperty("status") @JsonProperty("status")
private String status; private String status;
private final static long serialVersionUID = -2658312087616043225L;
public List<String> getIpAddresses() { public List<String> getIpAddresses() {
return ipAddresses; return ipAddresses;
} }

@ -6,6 +6,8 @@ import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
public abstract class ReportingJob { public abstract class ReportingJob {
/** /**
@ -22,7 +24,7 @@ public abstract class ReportingJob {
protected final AggregatorReport report; protected final AggregatorReport report;
public ReportingJob(AggregatorReport report) { protected ReportingJob(AggregatorReport report) {
this.report = report; this.report = report;
} }

@ -25,7 +25,7 @@ public class MDStoreActionNode {
NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
} }
public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
@ -70,7 +70,7 @@ public class MDStoreActionNode {
if (StringUtils.isBlank(hdfsuri)) { if (StringUtils.isBlank(hdfsuri)) {
throw new IllegalArgumentException("missing or empty argument namenode"); throw new IllegalArgumentException("missing or empty argument namenode");
} }
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {
@ -94,7 +94,7 @@ public class MDStoreActionNode {
break; break;
} }
case ROLLBACK: { case ROLLBACK: {
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {

@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.aggregation.common.ReporterCallback; import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
import eu.dnetlib.dhp.aggregation.common.ReportingJob; import eu.dnetlib.dhp.aggregation.common.ReportingJob;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
public class CollectorWorker extends ReportingJob { public class CollectorWorker extends ReportingJob {
@ -116,7 +118,7 @@ public class CollectorWorker extends ReportingJob {
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
.ofNullable(api.getParams().get("other_plugin_type")) .ofNullable(api.getParams().get("other_plugin_type"))
.map(CollectorPlugin.NAME.OTHER_NAME::valueOf) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
.get(); .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type"));
switch (plugin) { switch (plugin) {
case mdstore_mongodb_dump: case mdstore_mongodb_dump:

@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;

@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob {
totalItems.add(1); totalItems.add(1);
try { try {
SAXReader reader = new SAXReader(); SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Node node = document.selectSingleNode(xpath); Node node = document.selectSingleNode(xpath);
final String originalIdentifier = node.getText(); final String originalIdentifier = node.getText();

@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
public interface CollectorPlugin { public interface CollectorPlugin {

@ -11,15 +11,13 @@ import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCollection;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.MdstoreClient; import eu.dnetlib.dhp.common.MdstoreClient;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
public class MDStoreCollectorPlugin implements CollectorPlugin { public class MDStoreCollectorPlugin implements CollectorPlugin {

@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
public static final String PATH_PARAM = "path"; public static final String PATH_PARAM = "path";
public static final String BODY_JSONPATH = "$.body"; public static final String BODY_JSONPATH = "$.body";
public FileSystem fileSystem; private final FileSystem fileSystem;
public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
this.fileSystem = fileSystem; this.fileSystem = fileSystem;

@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
public class OaiCollectorPlugin implements CollectorPlugin { public class OaiCollectorPlugin implements CollectorPlugin {
@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
} }
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate);
} }
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate);
} }
final Iterator<Iterator<String>> iters = sets final Iterator<Iterator<String>> iters = sets

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.collection.plugin.oai; package eu.dnetlib.dhp.collection.plugin.oai;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLEncoder; import java.net.URLEncoder;
@ -16,21 +15,21 @@ import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper; import org.dom4j.DocumentHelper;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.OutputFormat; import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter; import org.dom4j.io.XMLWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpConnector2;
import eu.dnetlib.dhp.collection.XmlCleaner; import eu.dnetlib.dhp.collection.XmlCleaner;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OaiIterator implements Iterator<String> { public class OaiIterator implements Iterator<String> {
private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
private final static String REPORT_PREFIX = "oai:"; private static final String REPORT_PREFIX = "oai:";
public static final String UTF_8 = "UTF-8";
private final Queue<String> queue = new PriorityBlockingQueue<>(); private final Queue<String> queue = new PriorityBlockingQueue<>();
@ -68,7 +67,7 @@ public class OaiIterator implements Iterator<String> {
try { try {
this.token = firstPage(); this.token = firstPage();
} catch (final CollectorException e) { } catch (final CollectorException e) {
throw new RuntimeException(e); throw new IllegalStateException(e);
} }
} }
} }
@ -90,7 +89,7 @@ public class OaiIterator implements Iterator<String> {
try { try {
token = otherPages(token); token = otherPages(token);
} catch (final CollectorException e) { } catch (final CollectorException e) {
throw new RuntimeException(e); throw new IllegalStateException(e);
} }
} }
return res; return res;
@ -99,23 +98,24 @@ public class OaiIterator implements Iterator<String> {
@Override @Override
public void remove() { public void remove() {
throw new UnsupportedOperationException();
} }
private String firstPage() throws CollectorException { private String firstPage() throws CollectorException {
try { try {
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8);
if (set != null && !set.isEmpty()) { if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8"); url += "&set=" + URLEncoder.encode(set, UTF_8);
} }
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); url += "&from=" + URLEncoder.encode(fromDate, UTF_8);
} }
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); url += "&until=" + URLEncoder.encode(untilDate, UTF_8);
} }
log.info("Start harvesting using url: " + url); log.info("Start harvesting using url: {}", url);
return downloadPage(url); return downloadPage(url);
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
@ -143,7 +143,7 @@ public class OaiIterator implements Iterator<String> {
return downloadPage( return downloadPage(
baseUrl baseUrl
+ "?verb=ListRecords&resumptionToken=" + "?verb=ListRecords&resumptionToken="
+ URLEncoder.encode(resumptionToken, "UTF-8")); + URLEncoder.encode(resumptionToken, UTF_8));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
report.put(e.getClass().getName(), e.getMessage()); report.put(e.getClass().getName(), e.getMessage());
throw new CollectorException(e); throw new CollectorException(e);

@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai;
import java.util.Iterator; import java.util.Iterator;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OaiIteratorFactory { public class OaiIteratorFactory {

@ -9,11 +9,11 @@ import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
/** /**
* TODO: delegate HTTP requests to the common HttpConnector2 implementation. * TODO: delegate HTTP requests to the common HttpConnector2 implementation.

@ -30,9 +30,9 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.JsonUtils; import eu.dnetlib.dhp.collection.JsonUtils;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
/** /**
* log.info(...) equal to log.trace(...) in the application-logs * log.info(...) equal to log.trace(...) in the application-logs
@ -131,7 +131,8 @@ public class RestIterator implements Iterator<String> {
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
throws TransformerConfigurationException, XPathExpressionException { throws TransformerConfigurationException, XPathExpressionException {
transformer = TransformerFactory.newInstance().newTransformer(); final TransformerFactory factory = TransformerFactory.newInstance();
transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
xpath = XPathFactory.newInstance().newXPath(); xpath = XPathFactory.newInstance().newXPath();
@ -142,7 +143,7 @@ public class RestIterator implements Iterator<String> {
private void initQueue() { private void initQueue() {
query = baseUrl + "?" + queryParams + querySize + queryFormat; query = baseUrl + "?" + queryParams + querySize + queryFormat;
log.info("REST calls starting with " + query); log.info("REST calls starting with {}", query);
} }
private void disconnect() { private void disconnect() {
@ -174,7 +175,7 @@ public class RestIterator implements Iterator<String> {
try { try {
query = downloadPage(query); query = downloadPage(query);
} catch (CollectorException e) { } catch (CollectorException e) {
log.debug("CollectorPlugin.next()-Exception: " + e); log.debug("CollectorPlugin.next()-Exception: {}", e);
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@ -198,7 +199,7 @@ public class RestIterator implements Iterator<String> {
// check if cursor=* is initial set otherwise add it to the queryParam URL // check if cursor=* is initial set otherwise add it to the queryParam URL
if (resumptionType.equalsIgnoreCase("deep-cursor")) { if (resumptionType.equalsIgnoreCase("deep-cursor")) {
log.debug("check resumptionType deep-cursor and check cursor=*?" + query); log.debug("check resumptionType deep-cursor and check cursor=*?{}", query);
if (!query.contains("&cursor=")) { if (!query.contains("&cursor=")) {
query += "&cursor=*"; query += "&cursor=*";
} }
@ -208,16 +209,16 @@ public class RestIterator implements Iterator<String> {
log.info("requestig URL [{}]", query); log.info("requestig URL [{}]", query);
URL qUrl = new URL(query); URL qUrl = new URL(query);
log.debug("authMethod :" + authMethod); log.debug("authMethod: {}", authMethod);
if ("bearer".equalsIgnoreCase(this.authMethod)) { if ("bearer".equalsIgnoreCase(this.authMethod)) {
log.trace("authMethod before inputStream: " + resultXml); log.trace("authMethod before inputStream: {}", resultXml);
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
conn.setRequestMethod("GET"); conn.setRequestMethod("GET");
theHttpInputStream = conn.getInputStream(); theHttpInputStream = conn.getInputStream();
} else if (BASIC.equalsIgnoreCase(this.authMethod)) { } else if (BASIC.equalsIgnoreCase(this.authMethod)) {
log.trace("authMethod before inputStream: " + resultXml); log.trace("authMethod before inputStream: {}", resultXml);
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
@ -237,13 +238,13 @@ public class RestIterator implements Iterator<String> {
if (!(emptyXml).equalsIgnoreCase(resultXml)) { if (!(emptyXml).equalsIgnoreCase(resultXml)) {
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
log.debug("nodeList.length: " + nodeList.getLength()); log.debug("nodeList.length: {}", nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++) { for (int i = 0; i < nodeList.getLength(); i++) {
StringWriter sw = new StringWriter(); StringWriter sw = new StringWriter();
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
String toEnqueue = sw.toString(); String toEnqueue = sw.toString();
if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
} else { } else {
recordQueue.add(sw.toString()); recordQueue.add(sw.toString());
} }
@ -274,9 +275,9 @@ public class RestIterator implements Iterator<String> {
String[] resumptionKeyValue = arrayUrlArgStr.split("="); String[] resumptionKeyValue = arrayUrlArgStr.split("=");
if (isInteger(resumptionKeyValue[1])) { if (isInteger(resumptionKeyValue[1])) {
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize);
} else { } else {
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]);
} }
} }
} }
@ -295,7 +296,7 @@ public class RestIterator implements Iterator<String> {
discoverResultSize += nodeList.getLength(); discoverResultSize += nodeList.getLength();
} }
} }
log.info("discoverResultSize: {}", discoverResultSize); log.info("discoverResultSize: {}", discoverResultSize);
break; break;
case "pagination": case "pagination":

@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -23,8 +24,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
@ -67,10 +68,10 @@ public class TransformSparkJobNode {
log.info("outputBasePath: {}", outputBasePath); log.info("outputBasePath: {}", outputBasePath);
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
log.info(String.format("isLookupUrl: %s", isLookupUrl)); log.info("isLookupUrl: {}", isLookupUrl);
final String dateOfTransformation = parser.get("dateOfTransformation"); final String dateOfTransformation = parser.get("dateOfTransformation");
log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); log.info("dateOfTransformation: {}", dateOfTransformation);
final Integer rpt = Optional final Integer rpt = Optional
.ofNullable(parser.get("recordsPerTask")) .ofNullable(parser.get("recordsPerTask"))
@ -126,12 +127,13 @@ public class TransformSparkJobNode {
JavaRDD<MetadataRecord> mdstore = inputMDStore JavaRDD<MetadataRecord> mdstore = inputMDStore
.javaRDD() .javaRDD()
.repartition(getRepartitionNumber(totalInput, rpt)) .repartition(getRepartitionNumber(totalInput, rpt))
.map((Function<MetadataRecord, MetadataRecord>) x::call); .map((Function<MetadataRecord, MetadataRecord>) x::call)
.filter((Function<MetadataRecord, Boolean>) Objects::nonNull);
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Transformed item {}", ct.getProcessedItems().count());
log.info("Total item " + ct.getTotalItems().count()); log.info("Total item {}", ct.getTotalItems().count());
log.info("Transformation Error item " + ct.getErrorItems().count()); log.info("Transformation Error item {}", ct.getErrorItems().count());
final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count();
writeHdfsFile( writeHdfsFile(

@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class TransformationFactory { public class TransformationFactory {
private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class);
public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') "
+
"where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]";
private TransformationFactory() {
}
public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin( public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin(
final Map<String, String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) final Map<String, String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService)
@ -27,7 +33,7 @@ public class TransformationFactory {
try { try {
final String transformationPlugin = jobArgument.get("transformationPlugin"); final String transformationPlugin = jobArgument.get("transformationPlugin");
log.info("Transformation plugin required " + transformationPlugin); log.info("Transformation plugin required {}", transformationPlugin);
switch (transformationPlugin) { switch (transformationPlugin) {
case "XSLT_TRANSFORM": { case "XSLT_TRANSFORM": {
final String transformationRuleId = jobArgument.get("transformationRuleId"); final String transformationRuleId = jobArgument.get("transformationRuleId");
@ -38,7 +44,7 @@ public class TransformationFactory {
final String transformationRule = queryTransformationRuleFromIS( final String transformationRule = queryTransformationRuleFromIS(
transformationRuleId, isLookupService); transformationRuleId, isLookupService);
final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation"));
return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation,
vocabularies); vocabularies);
@ -46,7 +52,6 @@ public class TransformationFactory {
default: default:
throw new DnetTransformationException( throw new DnetTransformationException(
"transformation plugin does not exists for " + transformationPlugin); "transformation plugin does not exists for " + transformationPlugin);
} }
} catch (Throwable e) { } catch (Throwable e) {
@ -55,9 +60,9 @@ public class TransformationFactory {
} }
private static String queryTransformationRuleFromIS(final String transformationRuleId, private static String queryTransformationRuleFromIS(final String transformationRuleId,
final ISLookUpService isLookUpService) throws Exception { final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException {
final String query = String.format(TRULE_XQUERY, transformationRuleId); final String query = String.format(TRULE_XQUERY, transformationRuleId);
System.out.println("asking query to IS: " + query); log.info("asking query to IS: {}", query);
List<String> result = isLookUpService.quickSearchProfile(query); List<String> result = isLookUpService.quickSearchProfile(query);
if (result == null || result.isEmpty()) if (result == null || result.isEmpty())

@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
import java.io.Serializable; import java.io.Serializable;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import net.sf.saxon.s9api.*; import net.sf.saxon.s9api.*;

@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
private static final Set<String> particles = null; private static final Set<String> particles = null;
public PersonCleaner() {
}
private String normalize(String s) { private String normalize(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD
s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\(.+\\)", "");
s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\[.+\\]", "");
s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\{.+\\}", "");
s = s.replaceAll("\\s+-\\s+", "-"); s = s.replaceAll("\\s+-\\s+", "-");
// s = s.replaceAll("[\\W&&[^,-]]", " ");
// System.out.println("class Person: s: " + s);
// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " ");
s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " ");
s = s.replace("\\d", " "); s = s.replace("\\d", " ");
s = s.replace("\\n", " "); s = s.replace("\\n", " ");
@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
s = s.replaceAll("\\s+", " "); s = s.replaceAll("\\s+", " ");
if (s.contains(",")) { if (s.contains(",")) {
// System.out.println("class Person: s: " + s);
String[] arr = s.split(","); String[] arr = s.split(",");
if (arr.length == 1) { if (arr.length == 1) {
@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} else if (arr.length > 1) { } else if (arr.length > 1) {
surname = splitTerms(arr[0]); surname = splitTerms(arr[0]);
firstname = splitTermsFirstName(arr[1]); firstname = splitTermsFirstName(arr[1]);
// System.out.println("class Person: surname: " + surname);
// System.out.println("class Person: firstname: " + firstname);
fullname.addAll(surname); fullname.addAll(surname);
fullname.addAll(firstname); fullname.addAll(firstname);
} }
@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} }
if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini
firstname = fullname.subList(0, lastInitialPosition + 1); firstname = fullname.subList(0, lastInitialPosition + 1);
System.out.println("name: " + firstname);
surname = fullname.subList(lastInitialPosition + 1, fullname.size()); surname = fullname.subList(lastInitialPosition + 1, fullname.size());
} else if (hasSurnameInUpperCase) { // Case: Michele ARTINI } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI
for (String term : fullname) { for (String term : fullname) {
@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} }
private List<String> splitTerms(String s) { private List<String> splitTerms(String s) {
if (particles == null) {
// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
}
List<String> list = Lists.newArrayList(); List<String> list = Lists.newArrayList();
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
// if (!particles.contains(part.toLowerCase())) {
list.add(part); list.add(part);
// }
} }
return list; return list;
} }
@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
public String getNormalisedFullname() { public String getNormalisedFullname() {
return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations())
: Joiner.on(" ").join(fullname); : Joiner.on(" ").join(fullname);
// return isAccurate() ?
// Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) :
// Joiner.on(" ").join(fullname);
} }
public List<String> getCapitalSurname() { public List<String> getCapitalSurname() {

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.transformation.xslt; package eu.dnetlib.dhp.transformation.xslt;
import java.io.ByteArrayInputStream;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringWriter; import java.io.StringWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*;
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable { public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable {
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; private static final String DATASOURCE_ID_PARAM = "varDataSourceId";
private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; private static final String DATASOURCE_NAME_PARAM = "varOfficialName";
private final AggregationCounter aggregationCounter; private final AggregationCounter aggregationCounter;
@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
final AggregationCounter aggregationCounter, final AggregationCounter aggregationCounter,
final String transformationRule, final String transformationRule,
long dateOfTransformation, long dateOfTransformation,
final VocabularyGroup vocabularies) final VocabularyGroup vocabularies) {
throws Exception {
this.aggregationCounter = aggregationCounter; this.aggregationCounter = aggregationCounter;
this.transformationRule = transformationRule; this.transformationRule = transformationRule;
this.vocabularies = vocabularies; this.vocabularies = vocabularies;
@ -83,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
return value; return value;
} catch (Throwable e) { } catch (Throwable e) {
aggregationCounter.getErrorItems().add(1); aggregationCounter.getErrorItems().add(1);
throw new RuntimeException(e); return null;
// throw new RuntimeException(e);
} }
} }

@ -1,8 +1,6 @@
package eu.dnetlib.dhp.transformation.xslt.utils; package eu.dnetlib.dhp.transformation.xslt.utils;
// import org.apache.commons.text.WordUtils;
// import org.apache.commons.text.WordUtils;
import com.google.common.base.Function; import com.google.common.base.Function;
public class Capitalize implements Function<String, String> { public class Capitalize implements Function<String, String> {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save