applied intellij code cleanup

This commit is contained in:
Claudio Atzori 2021-05-14 10:58:12 +02:00
parent 609eb711b3
commit 23b8883ab1
105 changed files with 352 additions and 290 deletions

View File

@ -14,7 +14,7 @@ public class DbClient implements Closeable {
private static final Log log = LogFactory.getLog(DbClient.class); private static final Log log = LogFactory.getLog(DbClient.class);
private Connection connection; private final Connection connection;
public DbClient(final String address, final String login, final String password) { public DbClient(final String address, final String login, final String password) {

View File

@ -100,7 +100,7 @@ public class MakeTarArchive implements Serializable {
BufferedInputStream bis = new BufferedInputStream(is); BufferedInputStream bis = new BufferedInputStream(is);
int count; int count;
byte data[] = new byte[1024]; byte[] data = new byte[1024];
while ((count = bis.read(data, 0, data.length)) != -1) { while ((count = bis.read(data, 0, data.length)) != -1) {
ar.write(data, 0, count); ar.write(data, 0, count);
} }

View File

@ -13,9 +13,9 @@ import okio.Source;
public class InputStreamRequestBody extends RequestBody { public class InputStreamRequestBody extends RequestBody {
private InputStream inputStream; private final InputStream inputStream;
private MediaType mediaType; private final MediaType mediaType;
private long lenght; private final long lenght;
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) { public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {

View File

@ -21,7 +21,7 @@ public class DNetRestClient {
private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class); private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class);
private static ObjectMapper mapper = new ObjectMapper(); private static final ObjectMapper mapper = new ObjectMapper();
public static <T> T doGET(final String url, Class<T> clazz) throws Exception { public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);

View File

@ -34,7 +34,7 @@ public class MessageSender {
private final String workflowId; private final String workflowId;
private ExecutorService executorService = Executors.newCachedThreadPool(); private final ExecutorService executorService = Executors.newCachedThreadPool();
public MessageSender(final String dnetMessageEndpoint, final String workflowId) { public MessageSender(final String dnetMessageEndpoint, final String workflowId) {
this.workflowId = workflowId; this.workflowId = workflowId;

View File

@ -32,11 +32,11 @@ public class OafMapperUtils {
if (ModelSupport.isSubClass(left, Result.class)) { if (ModelSupport.isSubClass(left, Result.class)) {
return mergeResults((Result) left, (Result) right); return mergeResults((Result) left, (Result) right);
} else if (ModelSupport.isSubClass(left, Datasource.class)) { } else if (ModelSupport.isSubClass(left, Datasource.class)) {
((Datasource) left).mergeFrom((Datasource) right); left.mergeFrom(right);
} else if (ModelSupport.isSubClass(left, Organization.class)) { } else if (ModelSupport.isSubClass(left, Organization.class)) {
((Organization) left).mergeFrom((Organization) right); left.mergeFrom(right);
} else if (ModelSupport.isSubClass(left, Project.class)) { } else if (ModelSupport.isSubClass(left, Project.class)) {
((Project) left).mergeFrom((Project) right); left.mergeFrom(right);
} else { } else {
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
} }

View File

@ -15,8 +15,8 @@ public class ISLookupClientFactory {
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class); private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
private static int requestTimeout = 60000 * 10; private static final int requestTimeout = 60000 * 10;
private static int connectTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10;
public static ISLookUpService getLookUpService(final String isLookupUrl) { public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl); return getServiceStub(ISLookUpService.class, isLookupUrl);

View File

@ -22,7 +22,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class OafMapperUtilsTest { public class OafMapperUtilsTest {
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test @Test

View File

@ -160,9 +160,9 @@ public class PromoteActionPayloadForGraphTableJob {
private static String extractPayload(Row value) { private static String extractPayload(Row value) {
try { try {
return value.<String> getAs("payload"); return value.getAs("payload");
} catch (IllegalArgumentException | ClassCastException e) { } catch (IllegalArgumentException | ClassCastException e) {
logger.error("cannot extract payload from action: {}", value.toString()); logger.error("cannot extract payload from action: {}", value);
throw e; throw e;
} }
} }

View File

@ -75,7 +75,6 @@ public class CollectAndSave implements Serializable {
.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class)) .union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class)) .union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
;
} }
private static void removeOutputDir(SparkSession spark, String path) { private static void removeOutputDir(SparkSession spark, String path) {

View File

@ -36,7 +36,7 @@ import scala.Tuple2;
*/ */
public class SparkAtomicActionScoreJob implements Serializable { public class SparkAtomicActionScoreJob implements Serializable {
private static String DOI = "doi"; private static final String DOI = "doi";
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class); private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

View File

@ -249,7 +249,7 @@ public class PrepareProgramme {
parent = parent.substring(parent.lastIndexOf("|") + 1).trim(); parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
} }
if (current.trim().length() > parent.length() if (current.trim().length() > parent.length()
&& current.toLowerCase().trim().substring(0, parent.length()).equals(parent)) { && current.toLowerCase().trim().startsWith(parent)) {
current = current.substring(parent.length() + 1); current = current.substring(parent.length() + 1);
if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '') { if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '') {
current = current.trim().substring(1).trim(); current = current.trim().substring(1).trim();

View File

@ -93,7 +93,7 @@ public class PrepareProjects {
} }
private static FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject> getTuple2CSVProjectFlatMapFunction() { private static FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject> getTuple2CSVProjectFlatMapFunction() {
return (FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject>) value -> { return value -> {
Optional<CSVProject> csvProject = Optional.ofNullable(value._2()); Optional<CSVProject> csvProject = Optional.ofNullable(value._2());
List<CSVProject> csvProjectList = new ArrayList<>(); List<CSVProject> csvProjectList = new ArrayList<>();
if (csvProject.isPresent()) { if (csvProject.isPresent()) {

View File

@ -28,7 +28,7 @@ public class ReadCSV implements Closeable {
private final Configuration conf; private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private String csvFile; private final String csvFile;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -85,7 +85,6 @@ public class ReadCSV implements Closeable {
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
this.csvFile = httpConnector.getInputSource(fileURL); this.csvFile = httpConnector.getInputSource(fileURL);
;
} }
protected void write(final Object p) { protected void write(final Object p) {

View File

@ -25,7 +25,7 @@ public class ReadExcel implements Closeable {
private final Configuration conf; private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private InputStream excelFile; private final InputStream excelFile;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -82,7 +82,6 @@ public class ReadExcel implements Closeable {
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
this.excelFile = httpConnector.getInputSourceAsStream(fileURL); this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
;
} }
protected void write(final Object p) { protected void write(final Object p) {

View File

@ -18,7 +18,7 @@ public abstract class ReportingJob {
*/ */
public static final int INITIAL_DELAY = 2; public static final int INITIAL_DELAY = 2;
private ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor(); private final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
protected final AggregatorReport report; protected final AggregatorReport report;

View File

@ -30,7 +30,7 @@ public class CollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class); private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class);
private FileSystem fileSystem; private final FileSystem fileSystem;
public CollectorWorkerApplication(FileSystem fileSystem) { public CollectorWorkerApplication(FileSystem fileSystem) {
this.fileSystem = fileSystem; this.fileSystem = fileSystem;

View File

@ -32,7 +32,7 @@ public class HttpConnector2 {
private String responseType = null; private String responseType = null;
private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
public HttpConnector2() { public HttpConnector2() {
this(new HttpClientParams()); this(new HttpClientParams());

View File

@ -42,7 +42,7 @@ public class OaiIterator implements Iterator<String> {
private String token; private String token;
private boolean started; private boolean started;
private final HttpConnector2 httpConnector; private final HttpConnector2 httpConnector;
private AggregatorReport report; private final AggregatorReport report;
public OaiIterator( public OaiIterator(
final String baseUrl, final String baseUrl,

View File

@ -26,7 +26,7 @@ public class RestCollectorPlugin implements CollectorPlugin {
public static final String RESULT_SIZE_VALUE_DEFAULT = "100"; public static final String RESULT_SIZE_VALUE_DEFAULT = "100";
private HttpClientParams clientParams; private final HttpClientParams clientParams;
public RestCollectorPlugin(HttpClientParams clientParams) { public RestCollectorPlugin(HttpClientParams clientParams) {
this.clientParams = clientParams; this.clientParams = clientParams;

View File

@ -48,18 +48,18 @@ public class RestIterator implements Iterator<String> {
private static final Logger log = LoggerFactory.getLogger(RestIterator.class); private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
public static final String UTF_8 = "UTF-8"; public static final String UTF_8 = "UTF-8";
private HttpClientParams clientParams; private final HttpClientParams clientParams;
private final String BASIC = "basic"; private final String BASIC = "basic";
private JsonUtils jsonUtils; private final JsonUtils jsonUtils;
private String baseUrl; private final String baseUrl;
private String resumptionType; private final String resumptionType;
private String resumptionParam; private final String resumptionParam;
private String resultFormatValue; private final String resultFormatValue;
private String queryParams; private String queryParams;
private int resultSizeValue; private final int resultSizeValue;
private int resumptionInt = 0; // integer resumption token (first record to harvest) private int resumptionInt = 0; // integer resumption token (first record to harvest)
private int resultTotal = -1; private int resultTotal = -1;
private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest
@ -71,11 +71,11 @@ public class RestIterator implements Iterator<String> {
private XPathExpression xprResultTotalPath; private XPathExpression xprResultTotalPath;
private XPathExpression xprResumptionPath; private XPathExpression xprResumptionPath;
private XPathExpression xprEntity; private XPathExpression xprEntity;
private String queryFormat; private final String queryFormat;
private String querySize; private final String querySize;
private String authMethod; private final String authMethod;
private String authToken; private final String authToken;
private Queue<String> recordQueue = new PriorityBlockingQueue<String>(); private final Queue<String> recordQueue = new PriorityBlockingQueue<String>();
private int discoverResultSize = 0; private int discoverResultSize = 0;
private int pagination = 1; private int pagination = 1;
/* /*
@ -83,7 +83,7 @@ public class RestIterator implements Iterator<String> {
* json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in * json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in
* json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format. * json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
*/ */
private String resultOutputFormat; private final String resultOutputFormat;
/** RestIterator class /** RestIterator class
* compatible to version 1.3.33 * compatible to version 1.3.33
@ -229,7 +229,7 @@ public class RestIterator implements Iterator<String> {
resultStream = theHttpInputStream; resultStream = theHttpInputStream;
if ("json".equals(resultOutputFormat)) { if ("json".equals(resultOutputFormat)) {
resultJson = IOUtils.toString(resultStream, UTF_8); resultJson = IOUtils.toString(resultStream, StandardCharsets.UTF_8);
resultXml = jsonUtils.convertToXML(resultJson); resultXml = jsonUtils.convertToXML(resultJson);
resultStream = IOUtils.toInputStream(resultXml, UTF_8); resultStream = IOUtils.toInputStream(resultXml, UTF_8);
} }

View File

@ -36,7 +36,7 @@ public class TransformSparkJobNode {
private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class); private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class);
private static int RECORDS_PER_TASK = 200; private static final int RECORDS_PER_TASK = 200;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {

View File

@ -26,7 +26,7 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
private List<String> surname = Lists.newArrayList(); private List<String> surname = Lists.newArrayList();
private List<String> fullname = Lists.newArrayList(); private List<String> fullname = Lists.newArrayList();
private static Set<String> particles = null; private static final Set<String> particles = null;
public PersonCleaner() { public PersonCleaner() {

View File

@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2;
public class EXCELParserTest { public class EXCELParserTest {
private static Path workingDir; private static Path workingDir;
private HttpConnector2 httpConnector = new HttpConnector2(); private final HttpConnector2 httpConnector = new HttpConnector2();
private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx"; private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx";
@BeforeAll @BeforeAll

View File

@ -25,22 +25,22 @@ public class RestCollectorPluginTest {
private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class);
private String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search"; private final String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
private String resumptionType = "count"; private final String resumptionType = "count";
private String resumptionParam = "from"; private final String resumptionParam = "from";
private String entityXpath = "//hits/hits"; private final String entityXpath = "//hits/hits";
private String resumptionXpath = "//hits"; private final String resumptionXpath = "//hits";
private String resultTotalXpath = "//hits/total"; private final String resultTotalXpath = "//hits/total";
private String resultFormatParam = "format"; private final String resultFormatParam = "format";
private String resultFormatValue = "json"; private final String resultFormatValue = "json";
private String resultSizeParam = "size"; private final String resultSizeParam = "size";
private String resultSizeValue = "10"; private final String resultSizeValue = "10";
// private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29"; // private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
private String query = "q=%28sources%3AengrXiv+AND+type%3Apreprint%29"; private final String query = "q=%28sources%3AengrXiv+AND+type%3Apreprint%29";
// private String query = "=(sources:engrXiv AND type:preprint)"; // private String query = "=(sources:engrXiv AND type:preprint)";
private String protocolDescriptor = "rest_json2xml"; private final String protocolDescriptor = "rest_json2xml";
private ApiDescriptor api = new ApiDescriptor(); private final ApiDescriptor api = new ApiDescriptor();
private RestCollectorPlugin rcp; private RestCollectorPlugin rcp;
@BeforeEach @BeforeEach

View File

@ -20,20 +20,20 @@ public class RestIteratorTest {
private static final Logger log = LoggerFactory.getLogger(RestIteratorTest.class); private static final Logger log = LoggerFactory.getLogger(RestIteratorTest.class);
private String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search"; private final String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
private String resumptionType = "count"; private final String resumptionType = "count";
private String resumptionParam = "from"; private final String resumptionParam = "from";
private String resumptionXpath = ""; private final String resumptionXpath = "";
private String resultTotalXpath = "//hits/total"; private final String resultTotalXpath = "//hits/total";
private String entityXpath = "//hits/hits"; private final String entityXpath = "//hits/hits";
private String resultFormatParam = "format"; private final String resultFormatParam = "format";
private String resultFormatValue = "Json"; // Change from lowerCase to one UpperCase private final String resultFormatValue = "Json"; // Change from lowerCase to one UpperCase
private String resultSizeParam = "size"; private final String resultSizeParam = "size";
private String resultSizeValue = "10"; private final String resultSizeValue = "10";
private String authMethod = ""; private final String authMethod = "";
private String authToken = ""; private final String authToken = "";
private String resultOffsetParam = "cursor"; private final String resultOffsetParam = "cursor";
private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29"; private final String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
@Disabled @Disabled
@Test @Test

View File

@ -4,6 +4,8 @@ package eu.dnetlib.dhp.broker.oa;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
@ -47,26 +49,22 @@ public class CheckDuplictedIdsJob {
final LongAccumulator total = spark.sparkContext().longAccumulator("invaild_event_id"); final LongAccumulator total = spark.sparkContext().longAccumulator("invaild_event_id");
final TypedColumn<Tuple2<String, Long>, Tuple2<String, Long>> agg = new CountAggregator().toColumn(); final Encoder<Tuple2<String, Long>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.LONG());
ClusterUtils ClusterUtils
.readPath(spark, eventsPath, Event.class) .readPath(spark, eventsPath, Event.class)
.map(e -> new Tuple2<>(e.getEventId(), 1l), Encoders.tuple(Encoders.STRING(), Encoders.LONG())) .map((MapFunction<Event, Tuple2<String, Long>>) e -> new Tuple2<>(e.getEventId(), 1l), encoder)
.groupByKey(t -> t._1, Encoders.STRING()) .groupByKey((MapFunction<Tuple2<String, Long>, String>) t -> t._1, Encoders.STRING())
.agg(agg) .agg(new CountAggregator().toColumn())
.map(t -> t._2, Encoders.tuple(Encoders.STRING(), Encoders.LONG())) .map((MapFunction<Tuple2<String, Tuple2<String, Long>>, Tuple2<String, Long>>) t -> t._2, encoder)
.filter(t -> t._2 > 1) .filter((FilterFunction<Tuple2<String, Long>>) t -> t._2 > 1)
.map(o -> ClusterUtils.incrementAccumulator(o, total), Encoders.tuple(Encoders.STRING(), Encoders.LONG())) .map(
(MapFunction<Tuple2<String, Long>, Tuple2<String, Long>>) o -> ClusterUtils
.incrementAccumulator(o, total),
encoder)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(countPath); .json(countPath);
;
}
private static String eventAsJsonString(final Event f) throws JsonProcessingException {
return new ObjectMapper().writeValueAsString(f);
} }
} }

View File

@ -12,6 +12,8 @@ import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext; import org.apache.spark.SparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
@ -77,11 +79,11 @@ public class GenerateEventsJob {
final Dataset<Event> dataset = groups final Dataset<Event> dataset = groups
.map( .map(
g -> EventFinder (MapFunction<ResultGroup, EventGroup>) g -> EventFinder
.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, topicWhitelist, accumulators), .generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, topicWhitelist, accumulators),
Encoders Encoders
.bean(EventGroup.class)) .bean(EventGroup.class))
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class)); .flatMap((FlatMapFunction<EventGroup, Event>) g -> g.getData().iterator(), Encoders.bean(Event.class));
ClusterUtils.save(dataset, eventsPath, Event.class, total); ClusterUtils.save(dataset, eventsPath, Event.class, total);

View File

@ -13,6 +13,7 @@ import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.TypedColumn;
@ -24,6 +25,7 @@ import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.stats.DatasourceStats; import eu.dnetlib.dhp.broker.oa.util.aggregators.stats.DatasourceStats;
import eu.dnetlib.dhp.broker.oa.util.aggregators.stats.StatsAggregator; import eu.dnetlib.dhp.broker.oa.util.aggregators.stats.StatsAggregator;
import scala.Tuple2;
public class GenerateStatsJob { public class GenerateStatsJob {
@ -71,9 +73,13 @@ public class GenerateStatsJob {
ClusterUtils ClusterUtils
.readPath(spark, eventsPath, Event.class) .readPath(spark, eventsPath, Event.class)
.groupByKey(e -> e.getTopic() + "@@@" + e.getMap().getTargetDatasourceId(), Encoders.STRING()) .groupByKey(
(MapFunction<Event, String>) e -> e.getTopic() + "@@@" + e.getMap().getTargetDatasourceId(),
Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(DatasourceStats.class)) .map(
(MapFunction<Tuple2<String, DatasourceStats>, DatasourceStats>) t -> t._2,
Encoders.bean(DatasourceStats.class))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties); .jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties);

View File

@ -13,6 +13,8 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -30,6 +32,7 @@ import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
import eu.dnetlib.dhp.broker.oa.util.EventGroup; import eu.dnetlib.dhp.broker.oa.util.EventGroup;
import eu.dnetlib.dhp.broker.oa.util.aggregators.subset.EventSubsetAggregator; import eu.dnetlib.dhp.broker.oa.util.aggregators.subset.EventSubsetAggregator;
import scala.Tuple2;
public class IndexEventSubsetJob { public class IndexEventSubsetJob {
@ -83,13 +86,15 @@ public class IndexEventSubsetJob {
final Dataset<Event> subset = ClusterUtils final Dataset<Event> subset = ClusterUtils
.readPath(spark, eventsPath, Event.class) .readPath(spark, eventsPath, Event.class)
.groupByKey(e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(), Encoders.STRING()) .groupByKey(
(MapFunction<Event, String>) e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(),
Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(EventGroup.class)) .map((MapFunction<Tuple2<String, EventGroup>, EventGroup>) t -> t._2, Encoders.bean(EventGroup.class))
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class)); .flatMap((FlatMapFunction<EventGroup, Event>) g -> g.getData().iterator(), Encoders.bean(Event.class));
final JavaRDD<String> inputRdd = subset final JavaRDD<String> inputRdd = subset
.map(e -> prepareEventForIndexing(e, now, total), Encoders.STRING()) .map((MapFunction<Event, String>) e -> prepareEventForIndexing(e, now, total), Encoders.STRING())
.javaRDD(); .javaRDD();
final Map<String, String> esCfg = new HashMap<>(); final Map<String, String> esCfg = new HashMap<>();

View File

@ -18,7 +18,10 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
@ -89,13 +92,17 @@ public class IndexNotificationsJob {
log.info("Number of subscriptions: " + subscriptions.size()); log.info("Number of subscriptions: " + subscriptions.size());
if (subscriptions.size() > 0) { if (subscriptions.size() > 0) {
final Encoder<NotificationGroup> ngEncoder = Encoders.bean(NotificationGroup.class);
final Encoder<Notification> nEncoder = Encoders.bean(Notification.class);
final Dataset<Notification> notifications = ClusterUtils final Dataset<Notification> notifications = ClusterUtils
.readPath(spark, eventsPath, Event.class) .readPath(spark, eventsPath, Event.class)
.map(e -> generateNotifications(e, subscriptions, startTime), Encoders.bean(NotificationGroup.class)) .map(
.flatMap(g -> g.getData().iterator(), Encoders.bean(Notification.class)); (MapFunction<Event, NotificationGroup>) e -> generateNotifications(e, subscriptions, startTime),
ngEncoder)
.flatMap((FlatMapFunction<NotificationGroup, Notification>) g -> g.getData().iterator(), nEncoder);
final JavaRDD<String> inputRdd = notifications final JavaRDD<String> inputRdd = notifications
.map(n -> prepareForIndexing(n, total), Encoders.STRING()) .map((MapFunction<Notification, String>) n -> prepareForIndexing(n, total), Encoders.STRING())
.javaRDD(); .javaRDD();
final Map<String, String> esCfg = new HashMap<>(); final Map<String, String> esCfg = new HashMap<>();
@ -192,15 +199,11 @@ public class IndexNotificationsJob {
return false; return false;
} }
if (conditions.containsKey("targetSubjects") return !conditions.containsKey("targetSubjects")
&& !conditions || conditions
.get("targetSubjects") .get("targetSubjects")
.stream() .stream()
.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()))) { .allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()));
return false;
}
return true;
} }

View File

@ -7,6 +7,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.TypedColumn;
@ -67,9 +68,13 @@ public class JoinStep0Job {
final Dataset<OaBrokerMainEntity> dataset = sources final Dataset<OaBrokerMainEntity> dataset = sources
.joinWith(typedRels, sources.col("openaireId").equalTo(typedRels.col("source")), "left_outer") .joinWith(typedRels, sources.col("openaireId").equalTo(typedRels.col("source")), "left_outer")
.groupByKey(t -> t._1.getOpenaireId(), Encoders.STRING()) .groupByKey(
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedDatasource>, String>) t -> t._1.getOpenaireId(),
Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class)); .map(
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
Encoders.bean(OaBrokerMainEntity.class));
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total); ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);

View File

@ -69,7 +69,9 @@ public class JoinStep1Job {
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedProject>, String>) t -> t._1.getOpenaireId(), (MapFunction<Tuple2<OaBrokerMainEntity, RelatedProject>, String>) t -> t._1.getOpenaireId(),
Encoders.STRING()) Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class)); .map(
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
Encoders.bean(OaBrokerMainEntity.class));
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total); ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);

View File

@ -7,6 +7,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.TypedColumn;
@ -64,9 +65,13 @@ public class JoinStep2Job {
final Dataset<OaBrokerMainEntity> dataset = sources final Dataset<OaBrokerMainEntity> dataset = sources
.joinWith(typedRels, sources.col("openaireId").equalTo(typedRels.col("source")), "left_outer") .joinWith(typedRels, sources.col("openaireId").equalTo(typedRels.col("source")), "left_outer")
.groupByKey(t -> t._1.getOpenaireId(), Encoders.STRING()) .groupByKey(
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedSoftware>, String>) t -> t._1.getOpenaireId(),
Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class)); .map(
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
Encoders.bean(OaBrokerMainEntity.class));
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total); ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);

View File

@ -69,7 +69,9 @@ public class JoinStep3Job {
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedDataset>, String>) t -> t._1.getOpenaireId(), (MapFunction<Tuple2<OaBrokerMainEntity, RelatedDataset>, String>) t -> t._1.getOpenaireId(),
Encoders.STRING()) Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class)); .map(
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
Encoders.bean(OaBrokerMainEntity.class));
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total); ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);

View File

@ -69,7 +69,9 @@ public class JoinStep4Job {
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedPublication>, String>) t -> t._1.getOpenaireId(), (MapFunction<Tuple2<OaBrokerMainEntity, RelatedPublication>, String>) t -> t._1.getOpenaireId(),
Encoders.STRING()) Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class)); .map(
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
Encoders.bean(OaBrokerMainEntity.class));
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total); ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);

View File

@ -7,6 +7,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -64,7 +65,7 @@ public class PrepareGroupsJob {
final Dataset<Relation> mergedRels = ClusterUtils final Dataset<Relation> mergedRels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); .filter((FilterFunction<Relation>) r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator() final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator()
.toColumn(); .toColumn();
@ -75,8 +76,9 @@ public class PrepareGroupsJob {
(MapFunction<Tuple2<OaBrokerMainEntity, Relation>, String>) t -> t._2.getTarget(), (MapFunction<Tuple2<OaBrokerMainEntity, Relation>, String>) t -> t._2.getTarget(),
Encoders.STRING()) Encoders.STRING())
.agg(aggr) .agg(aggr)
.map(t -> t._2, Encoders.bean(ResultGroup.class)) .map(
.filter(rg -> rg.getData().size() > 1); (MapFunction<Tuple2<String, ResultGroup>, ResultGroup>) t -> t._2, Encoders.bean(ResultGroup.class))
.filter((FilterFunction<ResultGroup>) rg -> rg.getData().size() > 1);
ClusterUtils.save(dataset, groupsPath, ResultGroup.class, total); ClusterUtils.save(dataset, groupsPath, ResultGroup.class, total);

View File

@ -7,6 +7,8 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
@ -20,6 +22,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
public class PrepareRelatedDatasetsJob { public class PrepareRelatedDatasetsJob {
@ -58,20 +61,22 @@ public class PrepareRelatedDatasetsJob {
final Dataset<OaBrokerRelatedDataset> datasets = ClusterUtils final Dataset<OaBrokerRelatedDataset> datasets = ClusterUtils
.readPath(spark, graphPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class) .readPath(spark, graphPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
.filter(d -> !ClusterUtils.isDedupRoot(d.getId())) .filter((FilterFunction<eu.dnetlib.dhp.schema.oaf.Dataset>) d -> !ClusterUtils.isDedupRoot(d.getId()))
.map(ConversionUtils::oafDatasetToBrokerDataset, Encoders.bean(OaBrokerRelatedDataset.class)); .map(
(MapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, OaBrokerRelatedDataset>) ConversionUtils::oafDatasetToBrokerDataset,
Encoders.bean(OaBrokerRelatedDataset.class));
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter(r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
final Dataset<RelatedDataset> dataset = rels final Dataset<RelatedDataset> dataset = rels
.joinWith(datasets, datasets.col("openaireId").equalTo(rels.col("target")), "inner") .joinWith(datasets, datasets.col("openaireId").equalTo(rels.col("target")), "inner")
.map(t -> { .map((MapFunction<Tuple2<Relation, OaBrokerRelatedDataset>, RelatedDataset>) t -> {
final RelatedDataset rel = new RelatedDataset(t._1.getSource(), final RelatedDataset rel = new RelatedDataset(t._1.getSource(),
t._2); t._2);
rel.getRelDataset().setRelType(t._1.getRelClass()); rel.getRelDataset().setRelType(t._1.getRelClass());

View File

@ -7,6 +7,9 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -25,6 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import scala.Tuple2;
import scala.Tuple3; import scala.Tuple3;
public class PrepareRelatedDatasourcesJob { public class PrepareRelatedDatasourcesJob {
@ -70,17 +74,20 @@ public class PrepareRelatedDatasourcesJob {
final Dataset<OaBrokerRelatedDatasource> datasources = ClusterUtils final Dataset<OaBrokerRelatedDatasource> datasources = ClusterUtils
.readPath(spark, graphPath + "/datasource", Datasource.class) .readPath(spark, graphPath + "/datasource", Datasource.class)
.map(ConversionUtils::oafDatasourceToBrokerDatasource, Encoders.bean(OaBrokerRelatedDatasource.class)); .map(
(MapFunction<Datasource, OaBrokerRelatedDatasource>) ConversionUtils::oafDatasourceToBrokerDatasource,
Encoders.bean(OaBrokerRelatedDatasource.class));
final Dataset<RelatedDatasource> dataset = rels final Dataset<RelatedDatasource> dataset = rels
.joinWith(datasources, datasources.col("openaireId").equalTo(rels.col("_2")), "inner") .joinWith(datasources, datasources.col("openaireId").equalTo(rels.col("_2")), "inner")
.map(t -> { .map(
final RelatedDatasource r = new RelatedDatasource(); (MapFunction<Tuple2<Tuple3<String, String, String>, OaBrokerRelatedDatasource>, RelatedDatasource>) t -> {
r.setSource(t._1._1()); final RelatedDatasource r = new RelatedDatasource();
r.setRelDatasource(t._2); r.setSource(t._1._1());
r.getRelDatasource().setRelType(t._1._3()); r.setRelDatasource(t._2);
return r; r.getRelDatasource().setRelType(t._1._3());
}, Encoders.bean(RelatedDatasource.class)); return r;
}, Encoders.bean(RelatedDatasource.class));
ClusterUtils.save(dataset, relsPath, RelatedDatasource.class, total); ClusterUtils.save(dataset, relsPath, RelatedDatasource.class, total);
@ -88,19 +95,22 @@ public class PrepareRelatedDatasourcesJob {
} }
private static final Dataset<Tuple3<String, String, String>> prepareResultTuples(final SparkSession spark, private static final <T extends Result> Dataset<Tuple3<String, String, String>> prepareResultTuples(
final SparkSession spark,
final String graphPath, final String graphPath,
final Class<? extends Result> sourceClass) { final Class<T> sourceClass) {
return ClusterUtils return ClusterUtils
.readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass) .readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
.filter(r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction<T>) r -> !ClusterUtils.isDedupRoot(r.getId()))
.filter(r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction<T>) r -> r.getDataInfo().getDeletedbyinference())
.map( .map(
r -> DatasourceRelationsAccumulator.calculateTuples(r), (MapFunction<T, DatasourceRelationsAccumulator>) r -> DatasourceRelationsAccumulator.calculateTuples(r),
Encoders.bean(DatasourceRelationsAccumulator.class)) Encoders.bean(DatasourceRelationsAccumulator.class))
.flatMap( .flatMap(
acc -> acc.getRels().iterator(), (FlatMapFunction<DatasourceRelationsAccumulator, Tuple3<String, String, String>>) acc -> acc
.getRels()
.iterator(),
Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING())); Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING()));
} }

View File

@ -7,6 +7,8 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
@ -22,6 +24,7 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
public class PrepareRelatedProjectsJob { public class PrepareRelatedProjectsJob {
@ -60,20 +63,25 @@ public class PrepareRelatedProjectsJob {
final Dataset<OaBrokerProject> projects = ClusterUtils final Dataset<OaBrokerProject> projects = ClusterUtils
.readPath(spark, graphPath + "/project", Project.class) .readPath(spark, graphPath + "/project", Project.class)
.filter(p -> !ClusterUtils.isDedupRoot(p.getId())) .filter((FilterFunction<Project>) p -> !ClusterUtils.isDedupRoot(p.getId()))
.map(ConversionUtils::oafProjectToBrokerProject, Encoders.bean(OaBrokerProject.class)); .map(
(MapFunction<Project, OaBrokerProject>) ConversionUtils::oafProjectToBrokerProject,
Encoders.bean(OaBrokerProject.class));
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter(r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
final Dataset<RelatedProject> dataset = rels final Dataset<RelatedProject> dataset = rels
.joinWith(projects, projects.col("openaireId").equalTo(rels.col("target")), "inner") .joinWith(projects, projects.col("openaireId").equalTo(rels.col("target")), "inner")
.map(t -> new RelatedProject(t._1.getSource(), t._2), Encoders.bean(RelatedProject.class)); .map(
(MapFunction<Tuple2<Relation, OaBrokerProject>, RelatedProject>) t -> new RelatedProject(
t._1.getSource(), t._2),
Encoders.bean(RelatedProject.class));
ClusterUtils.save(dataset, relsPath, RelatedProject.class, total); ClusterUtils.save(dataset, relsPath, RelatedProject.class, total);

View File

@ -7,6 +7,8 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
@ -21,6 +23,7 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedPublication;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
public class PrepareRelatedPublicationsJob { public class PrepareRelatedPublicationsJob {
@ -59,22 +62,22 @@ public class PrepareRelatedPublicationsJob {
final Dataset<OaBrokerRelatedPublication> pubs = ClusterUtils final Dataset<OaBrokerRelatedPublication> pubs = ClusterUtils
.readPath(spark, graphPath + "/publication", Publication.class) .readPath(spark, graphPath + "/publication", Publication.class)
.filter(p -> !ClusterUtils.isDedupRoot(p.getId())) .filter((FilterFunction<Publication>) p -> !ClusterUtils.isDedupRoot(p.getId()))
.map( .map(
ConversionUtils::oafPublicationToBrokerPublication, (MapFunction<Publication, OaBrokerRelatedPublication>) ConversionUtils::oafPublicationToBrokerPublication,
Encoders.bean(OaBrokerRelatedPublication.class)); Encoders.bean(OaBrokerRelatedPublication.class));
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter(r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass())) .filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
final Dataset<RelatedPublication> dataset = rels final Dataset<RelatedPublication> dataset = rels
.joinWith(pubs, pubs.col("openaireId").equalTo(rels.col("target")), "inner") .joinWith(pubs, pubs.col("openaireId").equalTo(rels.col("target")), "inner")
.map(t -> { .map((MapFunction<Tuple2<Relation, OaBrokerRelatedPublication>, RelatedPublication>) t -> {
final RelatedPublication rel = new RelatedPublication( final RelatedPublication rel = new RelatedPublication(
t._1.getSource(), t._2); t._1.getSource(), t._2);
rel.getRelPublication().setRelType(t._1.getRelClass()); rel.getRelPublication().setRelType(t._1.getRelClass());

View File

@ -7,7 +7,10 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -22,6 +25,7 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import scala.Tuple2;
public class PrepareRelatedSoftwaresJob { public class PrepareRelatedSoftwaresJob {
@ -58,22 +62,30 @@ public class PrepareRelatedSoftwaresJob {
final LongAccumulator total = spark.sparkContext().longAccumulator("total_rels"); final LongAccumulator total = spark.sparkContext().longAccumulator("total_rels");
final Encoder<OaBrokerRelatedSoftware> obrsEncoder = Encoders.bean(OaBrokerRelatedSoftware.class);
final Dataset<OaBrokerRelatedSoftware> softwares = ClusterUtils final Dataset<OaBrokerRelatedSoftware> softwares = ClusterUtils
.readPath(spark, graphPath + "/software", Software.class) .readPath(spark, graphPath + "/software", Software.class)
.filter(sw -> !ClusterUtils.isDedupRoot(sw.getId())) .filter((FilterFunction<Software>) sw -> !ClusterUtils.isDedupRoot(sw.getId()))
.map(ConversionUtils::oafSoftwareToBrokerSoftware, Encoders.bean(OaBrokerRelatedSoftware.class)); .map(
(MapFunction<Software, OaBrokerRelatedSoftware>) ConversionUtils::oafSoftwareToBrokerSoftware,
obrsEncoder);
final Dataset<Relation> rels = ClusterUtils final Dataset<Relation> rels;
rels = ClusterUtils
.loadRelations(graphPath, spark) .loadRelations(graphPath, spark)
.filter(r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT)) .filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)) .filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource())) .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget())); .filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
final Encoder<RelatedSoftware> rsEncoder = Encoders.bean(RelatedSoftware.class);
final Dataset<RelatedSoftware> dataset = rels final Dataset<RelatedSoftware> dataset = rels
.joinWith(softwares, softwares.col("openaireId").equalTo(rels.col("target")), "inner") .joinWith(softwares, softwares.col("openaireId").equalTo(rels.col("target")), "inner")
.map(t -> new RelatedSoftware(t._1.getSource(), t._2), Encoders.bean(RelatedSoftware.class)); .map(
(MapFunction<Tuple2<Relation, OaBrokerRelatedSoftware>, RelatedSoftware>) t -> new RelatedSoftware(
t._1.getSource(), t._2),
rsEncoder);
ClusterUtils.save(dataset, relsPath, RelatedSoftware.class, total); ClusterUtils.save(dataset, relsPath, RelatedSoftware.class, total);

View File

@ -7,7 +7,10 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
@ -73,11 +76,12 @@ public class PrepareSimpleEntititiesJob {
final String graphPath, final String graphPath,
final Class<SRC> sourceClass) { final Class<SRC> sourceClass) {
final Encoder<OaBrokerMainEntity> encoder = Encoders.bean(OaBrokerMainEntity.class);
return ClusterUtils return ClusterUtils
.readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass) .readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
.filter(r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction<SRC>) r -> !ClusterUtils.isDedupRoot(r.getId()))
.filter(r -> r.getDataInfo().getDeletedbyinference()) .filter((FilterFunction<SRC>) r -> r.getDataInfo().getDeletedbyinference())
.map(ConversionUtils::oafResultToBrokerResult, Encoders.bean(OaBrokerMainEntity.class)); .map((MapFunction<SRC, OaBrokerMainEntity>) ConversionUtils::oafResultToBrokerResult, encoder);
} }
} }

View File

@ -34,7 +34,7 @@ public class ClusterUtils {
public static Dataset<Relation> loadRelations(final String graphPath, final SparkSession spark) { public static Dataset<Relation> loadRelations(final String graphPath, final SparkSession spark) {
return ClusterUtils return ClusterUtils
.readPath(spark, graphPath + "/relation", Relation.class) .readPath(spark, graphPath + "/relation", Relation.class)
.map(r -> { .map((MapFunction<Relation, Relation>) r -> {
r.setSource(ConversionUtils.cleanOpenaireId(r.getSource())); r.setSource(ConversionUtils.cleanOpenaireId(r.getSource()));
r.setTarget(ConversionUtils.cleanOpenaireId(r.getTarget())); r.setTarget(ConversionUtils.cleanOpenaireId(r.getTarget()));
return r; return r;
@ -75,7 +75,7 @@ public class ClusterUtils {
final Class<T> clazz, final Class<T> clazz,
final LongAccumulator acc) { final LongAccumulator acc) {
dataset dataset
.map(o -> ClusterUtils.incrementAccumulator(o, acc), Encoders.bean(clazz)) .map((MapFunction<T, T>) o -> ClusterUtils.incrementAccumulator(o, acc), Encoders.bean(clazz))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")

View File

@ -105,7 +105,6 @@ public final class UpdateInfo<T> {
.map(OaBrokerInstance::getUrl) .map(OaBrokerInstance::getUrl)
.findFirst() .findFirst()
.orElse(null); .orElse(null);
;
final OaBrokerProvenance provenance = new OaBrokerProvenance(provId, provRepo, provType, provUrl); final OaBrokerProvenance provenance = new OaBrokerProvenance(provId, provRepo, provType, provUrl);

View File

@ -44,7 +44,7 @@ public class GroupEntitiesSparkJob {
private final static String ID_JPATH = "$.id"; private final static String ID_JPATH = "$.id";
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {

View File

@ -11,7 +11,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
public class RelationAggregator extends Aggregator<Relation, Relation, Relation> { public class RelationAggregator extends Aggregator<Relation, Relation, Relation> {
private static Relation ZERO = new Relation(); private static final Relation ZERO = new Relation();
@Override @Override
public Relation zero() { public Relation zero() {

View File

@ -161,7 +161,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
} }
private FilterFunction<Relation> getRelationFilterFunction() { private FilterFunction<Relation> getRelationFilterFunction() {
return (FilterFunction<Relation>) r -> StringUtils.isNotBlank(r.getSource()) || return r -> StringUtils.isNotBlank(r.getSource()) ||
StringUtils.isNotBlank(r.getTarget()) || StringUtils.isNotBlank(r.getTarget()) ||
StringUtils.isNotBlank(r.getRelClass()) || StringUtils.isNotBlank(r.getRelClass()) ||
StringUtils.isNotBlank(r.getSubRelType()) || StringUtils.isNotBlank(r.getSubRelType()) ||

View File

@ -30,7 +30,7 @@ public class EntityMergerTest implements Serializable {
private String testEntityBasePath; private String testEntityBasePath;
private DataInfo dataInfo; private DataInfo dataInfo;
private String dedupId = "00|dedup_id::1"; private final String dedupId = "00|dedup_id::1";
private Publication pub_top; private Publication pub_top;
@BeforeEach @BeforeEach

View File

@ -30,7 +30,7 @@ import scala.Tuple2;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class IdGeneratorTest { public class IdGeneratorTest {
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
private static List<Identifier<Publication>> bestIds; private static List<Identifier<Publication>> bestIds;

View File

@ -44,10 +44,10 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class SparkOpenorgsDedupTest implements Serializable { public class SparkOpenorgsDedupTest implements Serializable {
private static String dbUrl = "jdbc:h2:mem:openorgs_test;DB_CLOSE_DELAY=-1;DATABASE_TO_UPPER=false"; private static final String dbUrl = "jdbc:h2:mem:openorgs_test;DB_CLOSE_DELAY=-1;DATABASE_TO_UPPER=false";
private static String dbUser = "sa"; private static final String dbUser = "sa";
private static String dbTable = "tmp_dedup_events"; private static final String dbTable = "tmp_dedup_events";
private static String dbPwd = ""; private static final String dbPwd = "";
@Mock(serializable = true) @Mock(serializable = true)
ISLookUpService isLookUpService; ISLookUpService isLookUpService;

View File

@ -101,9 +101,8 @@ public class CrossrefImporter {
int size = decompresser.inflate(buffer); int size = decompresser.inflate(buffer);
bos.write(buffer, 0, size); bos.write(buffer, 0, size);
} }
byte[] unzippeddata = bos.toByteArray();
decompresser.end(); decompresser.end();
return new String(unzippeddata); return bos.toString();
} catch (Throwable e) { } catch (Throwable e) {
throw new RuntimeException("Wrong record:" + blob, e); throw new RuntimeException("Wrong record:" + blob, e);
} }

View File

@ -113,7 +113,7 @@ public class ActivitiesDecompressor {
} }
} else { } else {
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString()); Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer);
xmlParserErrorFound += 1; xmlParserErrorFound += 1;
} }
} }

View File

@ -16,13 +16,13 @@ public class ExtractXMLActivitiesData extends OrcidDSManager {
private String outputWorksPath; private String outputWorksPath;
private String activitiesFileNameTarGz; private String activitiesFileNameTarGz;
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
ExtractXMLActivitiesData extractXMLActivitiesData = new ExtractXMLActivitiesData(); ExtractXMLActivitiesData extractXMLActivitiesData = new ExtractXMLActivitiesData();
extractXMLActivitiesData.loadArgs(args); extractXMLActivitiesData.loadArgs(args);
extractXMLActivitiesData.extractWorks(); extractXMLActivitiesData.extractWorks();
} }
private void loadArgs(String[] args) throws IOException, Exception { private void loadArgs(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(

View File

@ -17,13 +17,13 @@ public class ExtractXMLSummariesData extends OrcidDSManager {
private String outputAuthorsPath; private String outputAuthorsPath;
private String summariesFileNameTarGz; private String summariesFileNameTarGz;
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
ExtractXMLSummariesData extractXMLSummariesData = new ExtractXMLSummariesData(); ExtractXMLSummariesData extractXMLSummariesData = new ExtractXMLSummariesData();
extractXMLSummariesData.loadArgs(args); extractXMLSummariesData.loadArgs(args);
extractXMLSummariesData.extractAuthors(); extractXMLSummariesData.extractAuthors();
} }
private void loadArgs(String[] args) throws IOException, Exception { private void loadArgs(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(

View File

@ -16,7 +16,7 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
private String activitiesFileNameTarGz; private String activitiesFileNameTarGz;
private String outputAuthorsDOIsPath; private String outputAuthorsDOIsPath;
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
OrcidAuthorsDOIsDataGen orcidAuthorsDOIsDataGen = new OrcidAuthorsDOIsDataGen(); OrcidAuthorsDOIsDataGen orcidAuthorsDOIsDataGen = new OrcidAuthorsDOIsDataGen();
orcidAuthorsDOIsDataGen.loadArgs(args); orcidAuthorsDOIsDataGen.loadArgs(args);
orcidAuthorsDOIsDataGen.generateAuthorsDOIsData(); orcidAuthorsDOIsDataGen.generateAuthorsDOIsData();
@ -30,7 +30,7 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath);
} }
private void loadArgs(String[] args) throws IOException, Exception { private void loadArgs(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(

View File

@ -19,7 +19,7 @@ public class OrcidDSManager {
private String summariesFileNameTarGz; private String summariesFileNameTarGz;
private String outputAuthorsPath; private String outputAuthorsPath;
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
OrcidDSManager orcidDSManager = new OrcidDSManager(); OrcidDSManager orcidDSManager = new OrcidDSManager();
orcidDSManager.loadArgs(args); orcidDSManager.loadArgs(args);
orcidDSManager.generateAuthors(); orcidDSManager.generateAuthors();
@ -56,7 +56,7 @@ public class OrcidDSManager {
return fs; return fs;
} }
private void loadArgs(String[] args) throws IOException, Exception { private void loadArgs(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(

View File

@ -46,7 +46,7 @@ public class SparkDownloadOrcidWorks {
public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter
.ofPattern(ORCID_XML_DATETIME_FORMAT); .ofPattern(ORCID_XML_DATETIME_FORMAT);
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -246,6 +246,6 @@ public class SparkDownloadOrcidWorks {
return name.getAsString(); return name.getAsString();
} }
} }
return new String(""); return "";
} }
} }

View File

@ -33,7 +33,7 @@ public class SparkGenLastModifiedSeq {
private static String outputPath; private static String outputPath;
private static String lambdaFileName; private static String lambdaFileName;
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(

View File

@ -36,7 +36,7 @@ import scala.Tuple2;
public class SparkGenerateDoiAuthorList { public class SparkGenerateDoiAuthorList {
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
Logger logger = LoggerFactory.getLogger(SparkGenerateDoiAuthorList.class); Logger logger = LoggerFactory.getLogger(SparkGenerateDoiAuthorList.class);
logger.info("[ SparkGenerateDoiAuthorList STARTED]"); logger.info("[ SparkGenerateDoiAuthorList STARTED]");

View File

@ -38,7 +38,7 @@ public class SparkUpdateOrcidAuthors {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.setSerializationInclusion(JsonInclude.Include.NON_NULL); .setSerializationInclusion(JsonInclude.Include.NON_NULL);
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class);
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -204,7 +204,7 @@ public class SparkUpdateOrcidAuthors {
a -> a._1().equals(authorSummary.getAuthorData().getOid()) && a -> a._1().equals(authorSummary.getAuthorData().getOid()) &&
a._2().equals(authorSummary.getDownloadDate())) a._2().equals(authorSummary.getDownloadDate()))
.count() == 1; .count() == 1;
return (oidFound && tsFound) || (!oidFound); return !oidFound || tsFound;
}); });
Dataset<AuthorSummary> cleanedDS = spark Dataset<AuthorSummary> cleanedDS = spark

View File

@ -38,7 +38,7 @@ public class SparkUpdateOrcidDatasets {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.setSerializationInclusion(JsonInclude.Include.NON_NULL); .setSerializationInclusion(JsonInclude.Include.NON_NULL);
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class);
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(

View File

@ -35,7 +35,7 @@ public class SparkUpdateOrcidWorks {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.setSerializationInclusion(JsonInclude.Include.NON_NULL); .setSerializationInclusion(JsonInclude.Include.NON_NULL);
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class);
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(

View File

@ -124,7 +124,7 @@ public class SummariesDecompressor {
} }
} else { } else {
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString()); Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer);
xmlParserErrorFound += 1; xmlParserErrorFound += 1;
} }
} }

View File

@ -35,7 +35,7 @@ public class HDFSUtil {
if (!fileSystem.exists(toReadPath)) { if (!fileSystem.exists(toReadPath)) {
throw new RuntimeException("File not exist: " + path); throw new RuntimeException("File not exist: " + path);
} }
logger.info("Last_update_path " + toReadPath.toString()); logger.info("Last_update_path " + toReadPath);
FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath));
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
@ -60,7 +60,7 @@ public class HDFSUtil {
fileSystem.delete(toWritePath, true); fileSystem.delete(toWritePath, true);
} }
FSDataOutputStream os = fileSystem.create(toWritePath); FSDataOutputStream os = fileSystem.create(toWritePath);
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8));
br.write(text); br.write(text);
br.close(); br.close();
} }

View File

@ -39,7 +39,7 @@ public class XMLRecordParser {
private static final String NS_ERROR = "error"; private static final String NS_ERROR = "error";
public static AuthorData VTDParseAuthorData(byte[] bytes) public static AuthorData VTDParseAuthorData(byte[] bytes)
throws VtdException, EncodingException, EOFException, EntityException, ParseException { throws VtdException, ParseException {
final VTDGen vg = new VTDGen(); final VTDGen vg = new VTDGen();
vg.setDoc(bytes); vg.setDoc(bytes);
vg.parse(true); vg.parse(true);
@ -134,7 +134,7 @@ public class XMLRecordParser {
} }
public static WorkData VTDParseWorkData(byte[] bytes) public static WorkData VTDParseWorkData(byte[] bytes)
throws VtdException, EncodingException, EOFException, EntityException, ParseException { throws VtdException, ParseException {
final VTDGen vg = new VTDGen(); final VTDGen vg = new VTDGen();
vg.setDoc(bytes); vg.setDoc(bytes);
vg.parse(true); vg.parse(true);

View File

@ -123,7 +123,7 @@ public class ActivitiesDumpReader {
} }
} else { } else {
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString()); Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer);
xmlParserErrorFound += 1; xmlParserErrorFound += 1;
} }
} }

View File

@ -22,7 +22,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager {
private String activitiesFileNameTarGz; private String activitiesFileNameTarGz;
private String outputWorksPath; private String outputWorksPath;
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork(); GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork();
genOrcidAuthorWork.loadArgs(args); genOrcidAuthorWork.loadArgs(args);
genOrcidAuthorWork.generateAuthorsDOIsData(); genOrcidAuthorWork.generateAuthorsDOIsData();
@ -36,7 +36,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager {
ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath);
} }
private void loadArgs(String[] args) throws IOException, Exception { private void loadArgs(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(

View File

@ -52,7 +52,7 @@ public class SparkGenEnrichedOrcidWorks {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -169,7 +169,7 @@ public class SparkGenEnrichedOrcidWorks {
oafPublicationRDD oafPublicationRDD
.mapToPair( .mapToPair(
p -> new Tuple2<>(p.getClass().toString(), p -> new Tuple2<>(p.getClass().toString(),
OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, (Publication) p)))) OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p))))
.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
.saveAsNewAPIHadoopFile( .saveAsNewAPIHadoopFile(
workingPath.concat(outputEnrichedWorksPath), workingPath.concat(outputEnrichedWorksPath),

View File

@ -16,7 +16,7 @@ import eu.dnetlib.doiboost.orcid.model.WorkData;
public class JsonWriter { public class JsonWriter {
public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.setSerializationInclusion(JsonInclude.Include.NON_NULL);; .setSerializationInclusion(JsonInclude.Include.NON_NULL);
public static String create(AuthorData authorData) throws JsonProcessingException { public static String create(AuthorData authorData) throws JsonProcessingException {
return OBJECT_MAPPER.writeValueAsString(authorData); return OBJECT_MAPPER.writeValueAsString(authorData);

View File

@ -88,7 +88,7 @@ public class PublicationToOaf implements Serializable {
this.dateOfCollection = null; this.dateOfCollection = null;
} }
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() { private static final Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
{ {
put( put(
@ -99,7 +99,7 @@ public class PublicationToOaf implements Serializable {
}; };
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() { private static final Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
{ {
put("ark".toLowerCase(), new Pair<>("ark", "ark")); put("ark".toLowerCase(), new Pair<>("ark", "ark"));
@ -529,9 +529,7 @@ public class PublicationToOaf implements Serializable {
if (jsonArray.isJsonNull()) { if (jsonArray.isJsonNull()) {
return false; return false;
} }
if (jsonArray.get(0).isJsonNull()) { return !jsonArray.get(0).isJsonNull();
return false;
}
} }
return true; return true;
} }

View File

@ -142,7 +142,7 @@ public class AuthorMatcher {
public static String normalize(final String s) { public static String normalize(final String s) {
if (s == null) { if (s == null) {
return new String(""); return "";
} }
return nfd(s) return nfd(s)
.toLowerCase() .toLowerCase()
@ -189,16 +189,13 @@ public class AuthorMatcher {
} }
private static void updateRanks(List<Contributor> contributors) { private static void updateRanks(List<Contributor> contributors) {
boolean seqFound = false; boolean seqFound = contributors
if (contributors
.stream() .stream()
.filter( .filter(
c -> c.getRole() != null && c.getSequence() != null && c -> c.getRole() != null && c.getSequence() != null &&
c.getRole().equals("author") && (c.getSequence().equals("first") || c.getRole().equals("author") && (c.getSequence().equals("first") ||
c.getSequence().equals("additional"))) c.getSequence().equals("additional")))
.count() > 0) { .count() > 0;
seqFound = true;
}
if (!seqFound) { if (!seqFound) {
List<Integer> seqIds = Arrays.asList(0); List<Integer> seqIds = Arrays.asList(0);
contributors.forEach(c -> { contributors.forEach(c -> {

View File

@ -20,7 +20,7 @@ public class DumpToActionsUtility {
public static String getStringValue(final JsonObject root, final String key) { public static String getStringValue(final JsonObject root, final String key) {
if (root.has(key) && !root.get(key).isJsonNull()) if (root.has(key) && !root.get(key).isJsonNull())
return root.get(key).getAsString(); return root.get(key).getAsString();
return new String(""); return "";
} }
public static List<String> getArrayValues(final JsonObject root, final String key) { public static List<String> getArrayValues(final JsonObject root, final String key) {

View File

@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi.util;
public class Pair<K, V> { public class Pair<K, V> {
private K k; private final K k;
private V v; private final V v;
public Pair(K k, V v) { public Pair(K k, V v) {
this.k = k; this.k = k;

View File

@ -43,7 +43,7 @@ public class XMLRecordParserNoDoi {
private static final String NS_ERROR = "error"; private static final String NS_ERROR = "error";
public static WorkDetail VTDParseWorkData(byte[] bytes) public static WorkDetail VTDParseWorkData(byte[] bytes)
throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException, throws VtdException, ParseException, XPathParseException,
NavException, XPathEvalException { NavException, XPathEvalException {
final VTDGen vg = new VTDGen(); final VTDGen vg = new VTDGen();
vg.setDoc(bytes); vg.setDoc(bytes);

File diff suppressed because one or more lines are too long

View File

@ -83,7 +83,7 @@ public class OrcidClientTest {
} catch (Throwable e) { } catch (Throwable e) {
e.printStackTrace(); e.printStackTrace();
} }
return new String(""); return "";
} }
// @Test // @Test
@ -142,7 +142,7 @@ public class OrcidClientTest {
} }
private void testDate(String value) throws ParseException { private void testDate(String value) throws ParseException {
System.out.println(value.toString()); System.out.println(value);
if (value.length() != 19) { if (value.length() != 19) {
value = value.substring(0, 19); value = value.substring(0, 19);
} }
@ -185,7 +185,7 @@ public class OrcidClientTest {
br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
String line; String line;
while ((line = br.readLine()) != null) { while ((line = br.readLine()) != null) {
String[] values = line.toString().split(","); String[] values = line.split(",");
List<String> recordInfo = Arrays.asList(values); List<String> recordInfo = Arrays.asList(values);
assertTrue(recordInfo.size() == 4); assertTrue(recordInfo.size() == 4);
String orcid = recordInfo.get(0); String orcid = recordInfo.get(0);
@ -260,7 +260,7 @@ public class OrcidClientTest {
} catch (Throwable e) { } catch (Throwable e) {
e.printStackTrace(); e.printStackTrace();
} }
return new String(""); return "";
} }
@Test @Test

View File

@ -44,7 +44,7 @@ public class XMLRecordParserTest {
XMLRecordParser p = new XMLRecordParser(); XMLRecordParser p = new XMLRecordParser();
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes()); AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes());
assertNotNull(authorData); assertNotNull(authorData);
assertNotNull(authorData.getName()); assertNotNull(authorData.getName());
System.out.println("name: " + authorData.getName()); System.out.println("name: " + authorData.getName());
@ -60,7 +60,7 @@ public class XMLRecordParserTest {
XMLRecordParser p = new XMLRecordParser(); XMLRecordParser p = new XMLRecordParser();
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes()); AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes());
assertNotNull(authorData); assertNotNull(authorData);
assertNotNull(authorData.getErrorCode()); assertNotNull(authorData.getErrorCode());
System.out.println("error: " + authorData.getErrorCode()); System.out.println("error: " + authorData.getErrorCode());
@ -75,7 +75,7 @@ public class XMLRecordParserTest {
XMLRecordParser p = new XMLRecordParser(); XMLRecordParser p = new XMLRecordParser();
WorkData workData = p.VTDParseWorkData(xml.getBytes()); WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes());
assertNotNull(workData); assertNotNull(workData);
assertNotNull(workData.getOid()); assertNotNull(workData.getOid());
System.out.println("oid: " + workData.getOid()); System.out.println("oid: " + workData.getOid());

View File

@ -50,7 +50,7 @@ public class OrcidNoDoiTest {
} }
WorkDetail workData = null; WorkDetail workData = null;
try { try {
workData = p.VTDParseWorkData(xml.getBytes()); workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
} catch (Exception e) { } catch (Exception e) {
logger.error("parsing xml", e); logger.error("parsing xml", e);
} }
@ -107,7 +107,7 @@ public class OrcidNoDoiTest {
} }
WorkDetail workData = null; WorkDetail workData = null;
try { try {
workData = p.VTDParseWorkData(xml.getBytes()); workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
} catch (Exception e) { } catch (Exception e) {
logger.error("parsing xml", e); logger.error("parsing xml", e);
} }
@ -138,7 +138,7 @@ public class OrcidNoDoiTest {
} }
WorkDetail workData = null; WorkDetail workData = null;
try { try {
workData = p.VTDParseWorkData(xml.getBytes()); workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
} catch (Exception e) { } catch (Exception e) {
logger.error("parsing xml", e); logger.error("parsing xml", e);
} }
@ -181,7 +181,7 @@ public class OrcidNoDoiTest {
} }
WorkDetail workData = null; WorkDetail workData = null;
try { try {
workData = p.VTDParseWorkData(xml.getBytes()); workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
} catch (Exception e) { } catch (Exception e) {
logger.error("parsing xml", e); logger.error("parsing xml", e);
} }
@ -217,16 +217,16 @@ public class OrcidNoDoiTest {
.stream() .stream()
.filter(c -> !StringUtils.isBlank(c.getCreditName())) .filter(c -> !StringUtils.isBlank(c.getCreditName()))
.forEach(c -> { .forEach(c -> {
if (am.simpleMatch(c.getCreditName(), author.getName()) || if (AuthorMatcher.simpleMatch(c.getCreditName(), author.getName()) ||
am.simpleMatch(c.getCreditName(), author.getSurname()) || AuthorMatcher.simpleMatch(c.getCreditName(), author.getSurname()) ||
am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) { AuthorMatcher.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
matchCounters.set(0, matchCounters.get(0) + 1); matchCounters.set(0, matchCounters.get(0) + 1);
c.setSimpleMatch(true); c.setSimpleMatch(true);
} }
}); });
assertTrue(matchCounters.get(0) == 1); assertTrue(matchCounters.get(0) == 1);
am.updateAuthorsSimpleMatch(contributors, author); AuthorMatcher.updateAuthorsSimpleMatch(contributors, author);
assertTrue(contributors.get(0).getName().equals("Joe")); assertTrue(contributors.get(0).getName().equals("Joe"));
assertTrue(contributors.get(0).getSurname().equals("Dodge")); assertTrue(contributors.get(0).getSurname().equals("Dodge"));
assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge"));
@ -249,9 +249,9 @@ public class OrcidNoDoiTest {
.stream() .stream()
.filter(c -> !StringUtils.isBlank(c.getCreditName())) .filter(c -> !StringUtils.isBlank(c.getCreditName()))
.forEach(c -> { .forEach(c -> {
if (am.simpleMatch(c.getCreditName(), authorX.getName()) || if (AuthorMatcher.simpleMatch(c.getCreditName(), authorX.getName()) ||
am.simpleMatch(c.getCreditName(), authorX.getSurname()) || AuthorMatcher.simpleMatch(c.getCreditName(), authorX.getSurname()) ||
am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) { AuthorMatcher.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
int currentCounter = matchCounters2.get(0); int currentCounter = matchCounters2.get(0);
currentCounter += 1; currentCounter += 1;
matchCounters2.set(0, currentCounter); matchCounters2.set(0, currentCounter);
@ -268,7 +268,7 @@ public class OrcidNoDoiTest {
.filter(c -> c.isSimpleMatch()) .filter(c -> c.isSimpleMatch())
.filter(c -> !StringUtils.isBlank(c.getCreditName())) .filter(c -> !StringUtils.isBlank(c.getCreditName()))
.map(c -> { .map(c -> {
c.setScore(am.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName()));
return c; return c;
}) })
.filter(c -> c.getScore() >= AuthorMatcher.threshold) .filter(c -> c.getScore() >= AuthorMatcher.threshold)
@ -280,7 +280,7 @@ public class OrcidNoDoiTest {
assertTrue(bestMatchContributor.getCreditName().equals("Abdel-Dayem Khai")); assertTrue(bestMatchContributor.getCreditName().equals("Abdel-Dayem Khai"));
assertTrue(contributorList.get(0).isBestMatch()); assertTrue(contributorList.get(0).isBestMatch());
assertTrue(!contributorList.get(1).isBestMatch()); assertTrue(!contributorList.get(1).isBestMatch());
am.updateAuthorsSimilarityMatch(contributorList, authorX); AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX);
assertTrue(contributorList.get(0).getName().equals(nameA)); assertTrue(contributorList.get(0).getName().equals(nameA));
assertTrue(contributorList.get(0).getSurname().equals(surnameA)); assertTrue(contributorList.get(0).getSurname().equals(surnameA));
assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai"));
@ -310,7 +310,7 @@ public class OrcidNoDoiTest {
} }
WorkDetail workData = null; WorkDetail workData = null;
try { try {
workData = p.VTDParseWorkData(xml.getBytes()); workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
} catch (Exception e) { } catch (Exception e) {
logger.error("parsing xml", e); logger.error("parsing xml", e);
} }
@ -331,8 +331,8 @@ public class OrcidNoDoiTest {
author.setName("Joe"); author.setName("Joe");
author.setSurname("Dodge"); author.setSurname("Dodge");
author.setOid("0000-1111-2222-3333"); author.setOid("0000-1111-2222-3333");
String otherName1 = new String("Joe Dr. Dodge"); String otherName1 = "Joe Dr. Dodge";
String otherName2 = new String("XY"); String otherName2 = "XY";
List<String> others = Lists.newArrayList(); List<String> others = Lists.newArrayList();
others.add(otherName1); others.add(otherName1);
others.add(otherName2); others.add(otherName2);

View File

@ -123,7 +123,7 @@ public class SparkBulkTagJob {
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null // TODO remove this hack as soon as the values fixed by this method will be provided as NON null
private static <R extends Result> MapFunction<R, R> patchResult() { private static <R extends Result> MapFunction<R, R> patchResult() {
return (MapFunction<R, R>) r -> { return r -> {
if (r.getDataInfo().getDeletedbyinference() == null) { if (r.getDataInfo().getDeletedbyinference() == null) {
r.getDataInfo().setDeletedbyinference(false); r.getDataInfo().setDeletedbyinference(false);
} }

View File

@ -29,7 +29,7 @@ public class CommunityConfigurationFactory {
private static final Log log = LogFactory.getLog(CommunityConfigurationFactory.class); private static final Log log = LogFactory.getLog(CommunityConfigurationFactory.class);
private static VerbResolver resolver = VerbResolverFactory.newInstance(); private static final VerbResolver resolver = VerbResolverFactory.newInstance();
public static CommunityConfiguration newInstance(final String xml) throws DocumentException { public static CommunityConfiguration newInstance(final String xml) throws DocumentException {
@ -51,7 +51,7 @@ public class CommunityConfigurationFactory {
} }
log.info(String.format("loaded %s community configuration profiles", communities.size())); log.info(String.format("loaded %s community configuration profiles", communities.size()));
log.debug(String.format("loaded community configuration:\n%s", communities.toString())); log.debug(String.format("loaded community configuration:\n%s", communities));
return new CommunityConfiguration(communities); return new CommunityConfiguration(communities);
} }

View File

@ -40,7 +40,7 @@ public class VerbResolver implements Serializable {
.getParameterValues() .getParameterValues()
.get(0) .get(0)
.getValue(), .getValue(),
value -> (Class<Selection>) ((ClassInfo) value).loadClass())); value -> (Class<Selection>) value.loadClass()));
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }

View File

@ -111,7 +111,7 @@ public class SparkCountryPropagationJob {
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
return (MapFunction<Tuple2<R, ResultCountrySet>, R>) t -> { return t -> {
Optional.ofNullable(t._2()).ifPresent(r -> { Optional.ofNullable(t._2()).ifPresent(r -> {
t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet())); t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
}); });

View File

@ -105,7 +105,7 @@ public class SparkOrcidToResultFromSemRelJob {
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultOrcidList>, R> authorEnrichFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultOrcidList>, R> authorEnrichFn() {
return (MapFunction<Tuple2<R, ResultOrcidList>, R>) value -> { return value -> {
R ret = value._1(); R ret = value._1();
Optional<ResultOrcidList> rol = Optional.ofNullable(value._2()); Optional<ResultOrcidList> rol = Optional.ofNullable(value._2());
if (rol.isPresent()) { if (rol.isPresent()) {
@ -202,8 +202,8 @@ public class SparkOrcidToResultFromSemRelJob {
return false; return false;
} }
for (StructuredProperty pid : pids.get()) { for (StructuredProperty pid : pids.get()) {
if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || if (ModelConstants.ORCID_PENDING.equalsIgnoreCase(pid.getQualifier().getClassid()) ||
ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { ModelConstants.ORCID.equalsIgnoreCase(pid.getQualifier().getClassid())) {
return true; return true;
} }
} }

View File

@ -94,7 +94,7 @@ public class SparkResultToProjectThroughSemRelJob {
} }
private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() { private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {
return (FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation>) value -> { return value -> {
List<Relation> new_relations = new ArrayList<>(); List<Relation> new_relations = new ArrayList<>();
ResultProjectSet potential_update = value._1(); ResultProjectSet potential_update = value._1();
Optional<ResultProjectSet> already_linked = Optional.ofNullable(value._2()); Optional<ResultProjectSet> already_linked = Optional.ofNullable(value._2());

View File

@ -117,7 +117,7 @@ public class PrepareResultCommunitySet {
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn( private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
OrganizationMap organizationMap) { OrganizationMap organizationMap) {
return (MapFunction<ResultOrganizations, ResultCommunityList>) value -> { return value -> {
String rId = value.getResultId(); String rId = value.getResultId();
Optional<List<String>> orgs = Optional.ofNullable(value.getMerges()); Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());
String oTarget = value.getOrgId(); String oTarget = value.getOrgId();

View File

@ -98,7 +98,7 @@ public class SparkResultToCommunityFromOrganizationJob {
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
return (MapFunction<Tuple2<R, ResultCommunityList>, R>) value -> { return value -> {
R ret = value._1(); R ret = value._1();
Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2()); Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2());
if (rcl.isPresent()) { if (rcl.isPresent()) {

View File

@ -101,7 +101,7 @@ public class SparkResultToCommunityThroughSemRelJob {
} }
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> contextUpdaterFn() { private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> contextUpdaterFn() {
return (MapFunction<Tuple2<R, ResultCommunityList>, R>) value -> { return value -> {
R ret = value._1(); R ret = value._1();
Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2()); Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2());
if (rcl.isPresent()) { if (rcl.isPresent()) {

View File

@ -124,7 +124,7 @@ public class SparkResultToOrganizationFromIstRepoJob {
} }
private static FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation> createRelationFn() { private static FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation> createRelationFn() {
return (FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation>) value -> { return value -> {
List<Relation> new_relations = new ArrayList<>(); List<Relation> new_relations = new ArrayList<>();
ResultOrganizationSet potential_update = value._1(); ResultOrganizationSet potential_update = value._1();
Optional<ResultOrganizationSet> already_linked = Optional.ofNullable(value._2()); Optional<ResultOrganizationSet> already_linked = Optional.ofNullable(value._2());

View File

@ -14,7 +14,7 @@ public class Constants {
public static final String HARVESTED = "Harvested"; public static final String HARVESTED = "Harvested";
public static final String DEFAULT_TRUST = "0.9"; public static final String DEFAULT_TRUST = "0.9";
public static final String USER_CLAIM = "Linked by user";; public static final String USER_CLAIM = "Linked by user";
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
@ -44,7 +44,7 @@ public class Constants {
public enum DUMPTYPE { public enum DUMPTYPE {
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder"); COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
private String type; private final String type;
DUMPTYPE(String type) { DUMPTYPE(String type) {
this.type = type; this.type = type;

View File

@ -51,7 +51,7 @@ public class GraphHiveImporterJobTest {
conf conf
.set( .set(
"javax.jdo.option.ConnectionURL", "javax.jdo.option.ConnectionURL",
String.format(JDBC_DERBY_TEMPLATE, workingDir.resolve("warehouse").toString())); String.format(JDBC_DERBY_TEMPLATE, workingDir.resolve("warehouse")));
spark = SparkSession spark = SparkSession
.builder() .builder()

View File

@ -40,7 +40,7 @@ public class DumpJobTest {
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class); private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
private static CommunityMap map = new CommunityMap(); private static final CommunityMap map = new CommunityMap();
static { static {
map.put("egi", "EGI Federation"); map.put("egi", "EGI Federation");

View File

@ -37,7 +37,7 @@ public class PrepareResultProjectJobTest {
private static final Logger log = LoggerFactory private static final Logger log = LoggerFactory
.getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class); .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {

View File

@ -36,7 +36,7 @@ public class UpdateProjectInfoTest {
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class); private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {

View File

@ -37,7 +37,7 @@ public class DumpOrganizationProjectDatasourceTest {
private static final Logger log = LoggerFactory private static final Logger log = LoggerFactory
.getLogger(DumpOrganizationProjectDatasourceTest.class); .getLogger(DumpOrganizationProjectDatasourceTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {

View File

@ -36,7 +36,7 @@ public class DumpRelationTest {
private static final Logger log = LoggerFactory private static final Logger log = LoggerFactory
.getLogger(DumpRelationTest.class); .getLogger(DumpRelationTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {

View File

@ -33,7 +33,7 @@ public class RelationFromOrganizationTest {
private static final Logger log = LoggerFactory private static final Logger log = LoggerFactory
.getLogger(RelationFromOrganizationTest.class); .getLogger(RelationFromOrganizationTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static final HashMap<String, String> map = new HashMap<>();
String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], " String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], "
+ +

View File

@ -36,7 +36,7 @@ public class ResultLinkedToProjectTest {
private static final Logger log = LoggerFactory private static final Logger log = LoggerFactory
.getLogger(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class); .getLogger(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class);
private static HashMap<String, String> map = new HashMap<>(); private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {

View File

@ -347,7 +347,7 @@ public class MigrateDbEntitiesApplicationTest {
} }
private List<String> getValueAsList(final String name, final List<TypedField> fields) { private List<String> getValueAsList(final String name, final List<TypedField> fields) {
return (List<String>) getValueAs(name, fields); return getValueAs(name, fields);
} }
} }

View File

@ -61,9 +61,8 @@ public class CrossrefClient {
int size = decompresser.inflate(buffer); int size = decompresser.inflate(buffer);
bos.write(buffer, 0, size); bos.write(buffer, 0, size);
} }
byte[] unzippeddata = bos.toByteArray();
decompresser.end(); decompresser.end();
return new String(unzippeddata); return bos.toString();
} catch (Throwable e) { } catch (Throwable e) {
throw new RuntimeException("Wrong record:" + blob, e); throw new RuntimeException("Wrong record:" + blob, e);
} }

Some files were not shown because too many files have changed in this diff Show More