forked from D-Net/dnet-hadoop
applied intellij code cleanup
This commit is contained in:
parent
609eb711b3
commit
23b8883ab1
|
@ -14,7 +14,7 @@ public class DbClient implements Closeable {
|
|||
|
||||
private static final Log log = LogFactory.getLog(DbClient.class);
|
||||
|
||||
private Connection connection;
|
||||
private final Connection connection;
|
||||
|
||||
public DbClient(final String address, final String login, final String password) {
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ public class MakeTarArchive implements Serializable {
|
|||
BufferedInputStream bis = new BufferedInputStream(is);
|
||||
|
||||
int count;
|
||||
byte data[] = new byte[1024];
|
||||
byte[] data = new byte[1024];
|
||||
while ((count = bis.read(data, 0, data.length)) != -1) {
|
||||
ar.write(data, 0, count);
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@ import okio.Source;
|
|||
|
||||
public class InputStreamRequestBody extends RequestBody {
|
||||
|
||||
private InputStream inputStream;
|
||||
private MediaType mediaType;
|
||||
private long lenght;
|
||||
private final InputStream inputStream;
|
||||
private final MediaType mediaType;
|
||||
private final long lenght;
|
||||
|
||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ public class DNetRestClient {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class);
|
||||
|
||||
private static ObjectMapper mapper = new ObjectMapper();
|
||||
private static final ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
|
||||
final HttpGet httpGet = new HttpGet(url);
|
||||
|
|
|
@ -34,7 +34,7 @@ public class MessageSender {
|
|||
|
||||
private final String workflowId;
|
||||
|
||||
private ExecutorService executorService = Executors.newCachedThreadPool();
|
||||
private final ExecutorService executorService = Executors.newCachedThreadPool();
|
||||
|
||||
public MessageSender(final String dnetMessageEndpoint, final String workflowId) {
|
||||
this.workflowId = workflowId;
|
||||
|
|
|
@ -32,11 +32,11 @@ public class OafMapperUtils {
|
|||
if (ModelSupport.isSubClass(left, Result.class)) {
|
||||
return mergeResults((Result) left, (Result) right);
|
||||
} else if (ModelSupport.isSubClass(left, Datasource.class)) {
|
||||
((Datasource) left).mergeFrom((Datasource) right);
|
||||
left.mergeFrom(right);
|
||||
} else if (ModelSupport.isSubClass(left, Organization.class)) {
|
||||
((Organization) left).mergeFrom((Organization) right);
|
||||
left.mergeFrom(right);
|
||||
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
||||
((Project) left).mergeFrom((Project) right);
|
||||
left.mergeFrom(right);
|
||||
} else {
|
||||
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ public class ISLookupClientFactory {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
|
||||
|
||||
private static int requestTimeout = 60000 * 10;
|
||||
private static int connectTimeout = 60000 * 10;
|
||||
private static final int requestTimeout = 60000 * 10;
|
||||
private static final int connectTimeout = 60000 * 10;
|
||||
|
||||
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
||||
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
||||
|
|
|
@ -22,7 +22,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
|
|||
|
||||
public class OafMapperUtilsTest {
|
||||
|
||||
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Test
|
||||
|
|
|
@ -160,9 +160,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
|
||||
private static String extractPayload(Row value) {
|
||||
try {
|
||||
return value.<String> getAs("payload");
|
||||
return value.getAs("payload");
|
||||
} catch (IllegalArgumentException | ClassCastException e) {
|
||||
logger.error("cannot extract payload from action: {}", value.toString());
|
||||
logger.error("cannot extract payload from action: {}", value);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,7 +75,6 @@ public class CollectAndSave implements Serializable {
|
|||
.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
|
||||
.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||
;
|
||||
}
|
||||
|
||||
private static void removeOutputDir(SparkSession spark, String path) {
|
||||
|
|
|
@ -36,7 +36,7 @@ import scala.Tuple2;
|
|||
*/
|
||||
public class SparkAtomicActionScoreJob implements Serializable {
|
||||
|
||||
private static String DOI = "doi";
|
||||
private static final String DOI = "doi";
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
|
|
|
@ -249,7 +249,7 @@ public class PrepareProgramme {
|
|||
parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
|
||||
}
|
||||
if (current.trim().length() > parent.length()
|
||||
&& current.toLowerCase().trim().substring(0, parent.length()).equals(parent)) {
|
||||
&& current.toLowerCase().trim().startsWith(parent)) {
|
||||
current = current.substring(parent.length() + 1);
|
||||
if (current.trim().charAt(0) == '-' || current.trim().charAt(0) == '–') {
|
||||
current = current.trim().substring(1).trim();
|
||||
|
|
|
@ -93,7 +93,7 @@ public class PrepareProjects {
|
|||
}
|
||||
|
||||
private static FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject> getTuple2CSVProjectFlatMapFunction() {
|
||||
return (FlatMapFunction<Tuple2<ProjectSubset, CSVProject>, CSVProject>) value -> {
|
||||
return value -> {
|
||||
Optional<CSVProject> csvProject = Optional.ofNullable(value._2());
|
||||
List<CSVProject> csvProjectList = new ArrayList<>();
|
||||
if (csvProject.isPresent()) {
|
||||
|
|
|
@ -28,7 +28,7 @@ public class ReadCSV implements Closeable {
|
|||
private final Configuration conf;
|
||||
private final BufferedWriter writer;
|
||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
private String csvFile;
|
||||
private final String csvFile;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
|
@ -85,7 +85,6 @@ public class ReadCSV implements Closeable {
|
|||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
this.csvFile = httpConnector.getInputSource(fileURL);
|
||||
;
|
||||
}
|
||||
|
||||
protected void write(final Object p) {
|
||||
|
|
|
@ -25,7 +25,7 @@ public class ReadExcel implements Closeable {
|
|||
private final Configuration conf;
|
||||
private final BufferedWriter writer;
|
||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
private InputStream excelFile;
|
||||
private final InputStream excelFile;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
|
@ -82,7 +82,6 @@ public class ReadExcel implements Closeable {
|
|||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
|
||||
;
|
||||
}
|
||||
|
||||
protected void write(final Object p) {
|
||||
|
|
|
@ -18,7 +18,7 @@ public abstract class ReportingJob {
|
|||
*/
|
||||
public static final int INITIAL_DELAY = 2;
|
||||
|
||||
private ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
|
||||
private final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
|
||||
|
||||
protected final AggregatorReport report;
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ public class CollectorWorkerApplication {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class);
|
||||
|
||||
private FileSystem fileSystem;
|
||||
private final FileSystem fileSystem;
|
||||
|
||||
public CollectorWorkerApplication(FileSystem fileSystem) {
|
||||
this.fileSystem = fileSystem;
|
||||
|
|
|
@ -32,7 +32,7 @@ public class HttpConnector2 {
|
|||
|
||||
private String responseType = null;
|
||||
|
||||
private String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||
|
||||
public HttpConnector2() {
|
||||
this(new HttpClientParams());
|
||||
|
|
|
@ -42,7 +42,7 @@ public class OaiIterator implements Iterator<String> {
|
|||
private String token;
|
||||
private boolean started;
|
||||
private final HttpConnector2 httpConnector;
|
||||
private AggregatorReport report;
|
||||
private final AggregatorReport report;
|
||||
|
||||
public OaiIterator(
|
||||
final String baseUrl,
|
||||
|
|
|
@ -26,7 +26,7 @@ public class RestCollectorPlugin implements CollectorPlugin {
|
|||
|
||||
public static final String RESULT_SIZE_VALUE_DEFAULT = "100";
|
||||
|
||||
private HttpClientParams clientParams;
|
||||
private final HttpClientParams clientParams;
|
||||
|
||||
public RestCollectorPlugin(HttpClientParams clientParams) {
|
||||
this.clientParams = clientParams;
|
||||
|
|
|
@ -48,18 +48,18 @@ public class RestIterator implements Iterator<String> {
|
|||
private static final Logger log = LoggerFactory.getLogger(RestIterator.class);
|
||||
public static final String UTF_8 = "UTF-8";
|
||||
|
||||
private HttpClientParams clientParams;
|
||||
private final HttpClientParams clientParams;
|
||||
|
||||
private final String BASIC = "basic";
|
||||
|
||||
private JsonUtils jsonUtils;
|
||||
private final JsonUtils jsonUtils;
|
||||
|
||||
private String baseUrl;
|
||||
private String resumptionType;
|
||||
private String resumptionParam;
|
||||
private String resultFormatValue;
|
||||
private final String baseUrl;
|
||||
private final String resumptionType;
|
||||
private final String resumptionParam;
|
||||
private final String resultFormatValue;
|
||||
private String queryParams;
|
||||
private int resultSizeValue;
|
||||
private final int resultSizeValue;
|
||||
private int resumptionInt = 0; // integer resumption token (first record to harvest)
|
||||
private int resultTotal = -1;
|
||||
private String resumptionStr = Integer.toString(resumptionInt); // string resumption token (first record to harvest
|
||||
|
@ -71,11 +71,11 @@ public class RestIterator implements Iterator<String> {
|
|||
private XPathExpression xprResultTotalPath;
|
||||
private XPathExpression xprResumptionPath;
|
||||
private XPathExpression xprEntity;
|
||||
private String queryFormat;
|
||||
private String querySize;
|
||||
private String authMethod;
|
||||
private String authToken;
|
||||
private Queue<String> recordQueue = new PriorityBlockingQueue<String>();
|
||||
private final String queryFormat;
|
||||
private final String querySize;
|
||||
private final String authMethod;
|
||||
private final String authToken;
|
||||
private final Queue<String> recordQueue = new PriorityBlockingQueue<String>();
|
||||
private int discoverResultSize = 0;
|
||||
private int pagination = 1;
|
||||
/*
|
||||
|
@ -83,7 +83,7 @@ public class RestIterator implements Iterator<String> {
|
|||
* json. useful for cases when the target API expects a resultFormatValue != json, but the results are returned in
|
||||
* json. An example is the EU Open Data Portal API: resultFormatValue=standard, results are in json format.
|
||||
*/
|
||||
private String resultOutputFormat;
|
||||
private final String resultOutputFormat;
|
||||
|
||||
/** RestIterator class
|
||||
* compatible to version 1.3.33
|
||||
|
@ -229,7 +229,7 @@ public class RestIterator implements Iterator<String> {
|
|||
|
||||
resultStream = theHttpInputStream;
|
||||
if ("json".equals(resultOutputFormat)) {
|
||||
resultJson = IOUtils.toString(resultStream, UTF_8);
|
||||
resultJson = IOUtils.toString(resultStream, StandardCharsets.UTF_8);
|
||||
resultXml = jsonUtils.convertToXML(resultJson);
|
||||
resultStream = IOUtils.toInputStream(resultXml, UTF_8);
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ public class TransformSparkJobNode {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(TransformSparkJobNode.class);
|
||||
|
||||
private static int RECORDS_PER_TASK = 200;
|
||||
private static final int RECORDS_PER_TASK = 200;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
|||
private List<String> surname = Lists.newArrayList();
|
||||
private List<String> fullname = Lists.newArrayList();
|
||||
|
||||
private static Set<String> particles = null;
|
||||
private static final Set<String> particles = null;
|
||||
|
||||
public PersonCleaner() {
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2;
|
|||
public class EXCELParserTest {
|
||||
|
||||
private static Path workingDir;
|
||||
private HttpConnector2 httpConnector = new HttpConnector2();
|
||||
private final HttpConnector2 httpConnector = new HttpConnector2();
|
||||
private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx";
|
||||
|
||||
@BeforeAll
|
||||
|
|
|
@ -25,22 +25,22 @@ public class RestCollectorPluginTest {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class);
|
||||
|
||||
private String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
|
||||
private String resumptionType = "count";
|
||||
private String resumptionParam = "from";
|
||||
private String entityXpath = "//hits/hits";
|
||||
private String resumptionXpath = "//hits";
|
||||
private String resultTotalXpath = "//hits/total";
|
||||
private String resultFormatParam = "format";
|
||||
private String resultFormatValue = "json";
|
||||
private String resultSizeParam = "size";
|
||||
private String resultSizeValue = "10";
|
||||
private final String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
|
||||
private final String resumptionType = "count";
|
||||
private final String resumptionParam = "from";
|
||||
private final String entityXpath = "//hits/hits";
|
||||
private final String resumptionXpath = "//hits";
|
||||
private final String resultTotalXpath = "//hits/total";
|
||||
private final String resultFormatParam = "format";
|
||||
private final String resultFormatValue = "json";
|
||||
private final String resultSizeParam = "size";
|
||||
private final String resultSizeValue = "10";
|
||||
// private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
|
||||
private String query = "q=%28sources%3AengrXiv+AND+type%3Apreprint%29";
|
||||
private final String query = "q=%28sources%3AengrXiv+AND+type%3Apreprint%29";
|
||||
// private String query = "=(sources:engrXiv AND type:preprint)";
|
||||
|
||||
private String protocolDescriptor = "rest_json2xml";
|
||||
private ApiDescriptor api = new ApiDescriptor();
|
||||
private final String protocolDescriptor = "rest_json2xml";
|
||||
private final ApiDescriptor api = new ApiDescriptor();
|
||||
private RestCollectorPlugin rcp;
|
||||
|
||||
@BeforeEach
|
||||
|
|
|
@ -20,20 +20,20 @@ public class RestIteratorTest {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(RestIteratorTest.class);
|
||||
|
||||
private String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
|
||||
private String resumptionType = "count";
|
||||
private String resumptionParam = "from";
|
||||
private String resumptionXpath = "";
|
||||
private String resultTotalXpath = "//hits/total";
|
||||
private String entityXpath = "//hits/hits";
|
||||
private String resultFormatParam = "format";
|
||||
private String resultFormatValue = "Json"; // Change from lowerCase to one UpperCase
|
||||
private String resultSizeParam = "size";
|
||||
private String resultSizeValue = "10";
|
||||
private String authMethod = "";
|
||||
private String authToken = "";
|
||||
private String resultOffsetParam = "cursor";
|
||||
private String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
|
||||
private final String baseUrl = "https://share.osf.io/api/v2/search/creativeworks/_search";
|
||||
private final String resumptionType = "count";
|
||||
private final String resumptionParam = "from";
|
||||
private final String resumptionXpath = "";
|
||||
private final String resultTotalXpath = "//hits/total";
|
||||
private final String entityXpath = "//hits/hits";
|
||||
private final String resultFormatParam = "format";
|
||||
private final String resultFormatValue = "Json"; // Change from lowerCase to one UpperCase
|
||||
private final String resultSizeParam = "size";
|
||||
private final String resultSizeValue = "10";
|
||||
private final String authMethod = "";
|
||||
private final String authToken = "";
|
||||
private final String resultOffsetParam = "cursor";
|
||||
private final String query = "q=%28sources%3ASocArXiv+AND+type%3Apreprint%29";
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
|
|
|
@ -4,6 +4,8 @@ package eu.dnetlib.dhp.broker.oa;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
|
@ -47,26 +49,22 @@ public class CheckDuplictedIdsJob {
|
|||
|
||||
final LongAccumulator total = spark.sparkContext().longAccumulator("invaild_event_id");
|
||||
|
||||
final TypedColumn<Tuple2<String, Long>, Tuple2<String, Long>> agg = new CountAggregator().toColumn();
|
||||
|
||||
final Encoder<Tuple2<String, Long>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.LONG());
|
||||
ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
.map(e -> new Tuple2<>(e.getEventId(), 1l), Encoders.tuple(Encoders.STRING(), Encoders.LONG()))
|
||||
.groupByKey(t -> t._1, Encoders.STRING())
|
||||
.agg(agg)
|
||||
.map(t -> t._2, Encoders.tuple(Encoders.STRING(), Encoders.LONG()))
|
||||
.filter(t -> t._2 > 1)
|
||||
.map(o -> ClusterUtils.incrementAccumulator(o, total), Encoders.tuple(Encoders.STRING(), Encoders.LONG()))
|
||||
.map((MapFunction<Event, Tuple2<String, Long>>) e -> new Tuple2<>(e.getEventId(), 1l), encoder)
|
||||
.groupByKey((MapFunction<Tuple2<String, Long>, String>) t -> t._1, Encoders.STRING())
|
||||
.agg(new CountAggregator().toColumn())
|
||||
.map((MapFunction<Tuple2<String, Tuple2<String, Long>>, Tuple2<String, Long>>) t -> t._2, encoder)
|
||||
.filter((FilterFunction<Tuple2<String, Long>>) t -> t._2 > 1)
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, Long>, Tuple2<String, Long>>) o -> ClusterUtils
|
||||
.incrementAccumulator(o, total),
|
||||
encoder)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(countPath);
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
private static String eventAsJsonString(final Event f) throws JsonProcessingException {
|
||||
return new ObjectMapper().writeValueAsString(f);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,6 +12,8 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.SparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
@ -77,11 +79,11 @@ public class GenerateEventsJob {
|
|||
|
||||
final Dataset<Event> dataset = groups
|
||||
.map(
|
||||
g -> EventFinder
|
||||
(MapFunction<ResultGroup, EventGroup>) g -> EventFinder
|
||||
.generateEvents(g, dsIdWhitelist, dsIdBlacklist, dsTypeWhitelist, topicWhitelist, accumulators),
|
||||
Encoders
|
||||
.bean(EventGroup.class))
|
||||
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||
.flatMap((FlatMapFunction<EventGroup, Event>) g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||
|
||||
ClusterUtils.save(dataset, eventsPath, Event.class, total);
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ import org.apache.http.client.methods.HttpGet;
|
|||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.TypedColumn;
|
||||
|
@ -24,6 +25,7 @@ import eu.dnetlib.dhp.broker.model.Event;
|
|||
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.stats.DatasourceStats;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.stats.StatsAggregator;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class GenerateStatsJob {
|
||||
|
||||
|
@ -71,9 +73,13 @@ public class GenerateStatsJob {
|
|||
|
||||
ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
.groupByKey(e -> e.getTopic() + "@@@" + e.getMap().getTargetDatasourceId(), Encoders.STRING())
|
||||
.groupByKey(
|
||||
(MapFunction<Event, String>) e -> e.getTopic() + "@@@" + e.getMap().getTargetDatasourceId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(DatasourceStats.class))
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, DatasourceStats>, DatasourceStats>) t -> t._2,
|
||||
Encoders.bean(DatasourceStats.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.jdbc(dbUrl, "oa_datasource_stats_temp", connectionProperties);
|
||||
|
|
|
@ -13,6 +13,8 @@ import org.apache.http.impl.client.CloseableHttpClient;
|
|||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
@ -30,6 +32,7 @@ import eu.dnetlib.dhp.broker.model.Event;
|
|||
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.EventGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.subset.EventSubsetAggregator;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class IndexEventSubsetJob {
|
||||
|
||||
|
@ -83,13 +86,15 @@ public class IndexEventSubsetJob {
|
|||
|
||||
final Dataset<Event> subset = ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
.groupByKey(e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(), Encoders.STRING())
|
||||
.groupByKey(
|
||||
(MapFunction<Event, String>) e -> e.getTopic() + '@' + e.getMap().getTargetDatasourceId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(EventGroup.class))
|
||||
.flatMap(g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||
.map((MapFunction<Tuple2<String, EventGroup>, EventGroup>) t -> t._2, Encoders.bean(EventGroup.class))
|
||||
.flatMap((FlatMapFunction<EventGroup, Event>) g -> g.getData().iterator(), Encoders.bean(Event.class));
|
||||
|
||||
final JavaRDD<String> inputRdd = subset
|
||||
.map(e -> prepareEventForIndexing(e, now, total), Encoders.STRING())
|
||||
.map((MapFunction<Event, String>) e -> prepareEventForIndexing(e, now, total), Encoders.STRING())
|
||||
.javaRDD();
|
||||
|
||||
final Map<String, String> esCfg = new HashMap<>();
|
||||
|
|
|
@ -18,7 +18,10 @@ import org.apache.http.impl.client.CloseableHttpClient;
|
|||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
@ -89,13 +92,17 @@ public class IndexNotificationsJob {
|
|||
log.info("Number of subscriptions: " + subscriptions.size());
|
||||
|
||||
if (subscriptions.size() > 0) {
|
||||
final Encoder<NotificationGroup> ngEncoder = Encoders.bean(NotificationGroup.class);
|
||||
final Encoder<Notification> nEncoder = Encoders.bean(Notification.class);
|
||||
final Dataset<Notification> notifications = ClusterUtils
|
||||
.readPath(spark, eventsPath, Event.class)
|
||||
.map(e -> generateNotifications(e, subscriptions, startTime), Encoders.bean(NotificationGroup.class))
|
||||
.flatMap(g -> g.getData().iterator(), Encoders.bean(Notification.class));
|
||||
.map(
|
||||
(MapFunction<Event, NotificationGroup>) e -> generateNotifications(e, subscriptions, startTime),
|
||||
ngEncoder)
|
||||
.flatMap((FlatMapFunction<NotificationGroup, Notification>) g -> g.getData().iterator(), nEncoder);
|
||||
|
||||
final JavaRDD<String> inputRdd = notifications
|
||||
.map(n -> prepareForIndexing(n, total), Encoders.STRING())
|
||||
.map((MapFunction<Notification, String>) n -> prepareForIndexing(n, total), Encoders.STRING())
|
||||
.javaRDD();
|
||||
|
||||
final Map<String, String> esCfg = new HashMap<>();
|
||||
|
@ -192,15 +199,11 @@ public class IndexNotificationsJob {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (conditions.containsKey("targetSubjects")
|
||||
&& !conditions
|
||||
return !conditions.containsKey("targetSubjects")
|
||||
|| conditions
|
||||
.get("targetSubjects")
|
||||
.stream()
|
||||
.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
.allMatch(c -> SubscriptionUtils.verifyListExact(map.getTargetSubjects(), c.getValue()));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.TypedColumn;
|
||||
|
@ -67,9 +68,13 @@ public class JoinStep0Job {
|
|||
|
||||
final Dataset<OaBrokerMainEntity> dataset = sources
|
||||
.joinWith(typedRels, sources.col("openaireId").equalTo(typedRels.col("source")), "left_outer")
|
||||
.groupByKey(t -> t._1.getOpenaireId(), Encoders.STRING())
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedDatasource>, String>) t -> t._1.getOpenaireId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
|
||||
Encoders.bean(OaBrokerMainEntity.class));
|
||||
|
||||
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);
|
||||
|
||||
|
|
|
@ -69,7 +69,9 @@ public class JoinStep1Job {
|
|||
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedProject>, String>) t -> t._1.getOpenaireId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
|
||||
Encoders.bean(OaBrokerMainEntity.class));
|
||||
|
||||
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.TypedColumn;
|
||||
|
@ -64,9 +65,13 @@ public class JoinStep2Job {
|
|||
|
||||
final Dataset<OaBrokerMainEntity> dataset = sources
|
||||
.joinWith(typedRels, sources.col("openaireId").equalTo(typedRels.col("source")), "left_outer")
|
||||
.groupByKey(t -> t._1.getOpenaireId(), Encoders.STRING())
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedSoftware>, String>) t -> t._1.getOpenaireId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
|
||||
Encoders.bean(OaBrokerMainEntity.class));
|
||||
|
||||
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);
|
||||
|
||||
|
|
|
@ -69,7 +69,9 @@ public class JoinStep3Job {
|
|||
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedDataset>, String>) t -> t._1.getOpenaireId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
|
||||
Encoders.bean(OaBrokerMainEntity.class));
|
||||
|
||||
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);
|
||||
|
||||
|
|
|
@ -69,7 +69,9 @@ public class JoinStep4Job {
|
|||
(MapFunction<Tuple2<OaBrokerMainEntity, RelatedPublication>, String>) t -> t._1.getOpenaireId(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(OaBrokerMainEntity.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, OaBrokerMainEntity>, OaBrokerMainEntity>) t -> t._2,
|
||||
Encoders.bean(OaBrokerMainEntity.class));
|
||||
|
||||
ClusterUtils.save(dataset, joinedEntitiesPath, OaBrokerMainEntity.class, total);
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -64,7 +65,7 @@ public class PrepareGroupsJob {
|
|||
|
||||
final Dataset<Relation> mergedRels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
||||
final TypedColumn<Tuple2<OaBrokerMainEntity, Relation>, ResultGroup> aggr = new ResultAggregator()
|
||||
.toColumn();
|
||||
|
@ -75,8 +76,9 @@ public class PrepareGroupsJob {
|
|||
(MapFunction<Tuple2<OaBrokerMainEntity, Relation>, String>) t -> t._2.getTarget(),
|
||||
Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map(t -> t._2, Encoders.bean(ResultGroup.class))
|
||||
.filter(rg -> rg.getData().size() > 1);
|
||||
.map(
|
||||
(MapFunction<Tuple2<String, ResultGroup>, ResultGroup>) t -> t._2, Encoders.bean(ResultGroup.class))
|
||||
.filter((FilterFunction<ResultGroup>) rg -> rg.getData().size() > 1);
|
||||
|
||||
ClusterUtils.save(dataset, groupsPath, ResultGroup.class, total);
|
||||
|
||||
|
|
|
@ -7,6 +7,8 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
@ -20,6 +22,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
|||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareRelatedDatasetsJob {
|
||||
|
||||
|
@ -58,20 +61,22 @@ public class PrepareRelatedDatasetsJob {
|
|||
|
||||
final Dataset<OaBrokerRelatedDataset> datasets = ClusterUtils
|
||||
.readPath(spark, graphPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
|
||||
.filter(d -> !ClusterUtils.isDedupRoot(d.getId()))
|
||||
.map(ConversionUtils::oafDatasetToBrokerDataset, Encoders.bean(OaBrokerRelatedDataset.class));
|
||||
.filter((FilterFunction<eu.dnetlib.dhp.schema.oaf.Dataset>) d -> !ClusterUtils.isDedupRoot(d.getId()))
|
||||
.map(
|
||||
(MapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, OaBrokerRelatedDataset>) ConversionUtils::oafDatasetToBrokerDataset,
|
||||
Encoders.bean(OaBrokerRelatedDataset.class));
|
||||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
||||
final Dataset<RelatedDataset> dataset = rels
|
||||
.joinWith(datasets, datasets.col("openaireId").equalTo(rels.col("target")), "inner")
|
||||
.map(t -> {
|
||||
.map((MapFunction<Tuple2<Relation, OaBrokerRelatedDataset>, RelatedDataset>) t -> {
|
||||
final RelatedDataset rel = new RelatedDataset(t._1.getSource(),
|
||||
t._2);
|
||||
rel.getRelDataset().setRelType(t._1.getRelClass());
|
||||
|
|
|
@ -7,6 +7,9 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
@ -25,6 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
|||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import scala.Tuple2;
|
||||
import scala.Tuple3;
|
||||
|
||||
public class PrepareRelatedDatasourcesJob {
|
||||
|
@ -70,17 +74,20 @@ public class PrepareRelatedDatasourcesJob {
|
|||
|
||||
final Dataset<OaBrokerRelatedDatasource> datasources = ClusterUtils
|
||||
.readPath(spark, graphPath + "/datasource", Datasource.class)
|
||||
.map(ConversionUtils::oafDatasourceToBrokerDatasource, Encoders.bean(OaBrokerRelatedDatasource.class));
|
||||
.map(
|
||||
(MapFunction<Datasource, OaBrokerRelatedDatasource>) ConversionUtils::oafDatasourceToBrokerDatasource,
|
||||
Encoders.bean(OaBrokerRelatedDatasource.class));
|
||||
|
||||
final Dataset<RelatedDatasource> dataset = rels
|
||||
.joinWith(datasources, datasources.col("openaireId").equalTo(rels.col("_2")), "inner")
|
||||
.map(t -> {
|
||||
final RelatedDatasource r = new RelatedDatasource();
|
||||
r.setSource(t._1._1());
|
||||
r.setRelDatasource(t._2);
|
||||
r.getRelDatasource().setRelType(t._1._3());
|
||||
return r;
|
||||
}, Encoders.bean(RelatedDatasource.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<Tuple3<String, String, String>, OaBrokerRelatedDatasource>, RelatedDatasource>) t -> {
|
||||
final RelatedDatasource r = new RelatedDatasource();
|
||||
r.setSource(t._1._1());
|
||||
r.setRelDatasource(t._2);
|
||||
r.getRelDatasource().setRelType(t._1._3());
|
||||
return r;
|
||||
}, Encoders.bean(RelatedDatasource.class));
|
||||
|
||||
ClusterUtils.save(dataset, relsPath, RelatedDatasource.class, total);
|
||||
|
||||
|
@ -88,19 +95,22 @@ public class PrepareRelatedDatasourcesJob {
|
|||
|
||||
}
|
||||
|
||||
private static final Dataset<Tuple3<String, String, String>> prepareResultTuples(final SparkSession spark,
|
||||
private static final <T extends Result> Dataset<Tuple3<String, String, String>> prepareResultTuples(
|
||||
final SparkSession spark,
|
||||
final String graphPath,
|
||||
final Class<? extends Result> sourceClass) {
|
||||
final Class<T> sourceClass) {
|
||||
|
||||
return ClusterUtils
|
||||
.readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getId()))
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<T>) r -> !ClusterUtils.isDedupRoot(r.getId()))
|
||||
.filter((FilterFunction<T>) r -> r.getDataInfo().getDeletedbyinference())
|
||||
.map(
|
||||
r -> DatasourceRelationsAccumulator.calculateTuples(r),
|
||||
(MapFunction<T, DatasourceRelationsAccumulator>) r -> DatasourceRelationsAccumulator.calculateTuples(r),
|
||||
Encoders.bean(DatasourceRelationsAccumulator.class))
|
||||
.flatMap(
|
||||
acc -> acc.getRels().iterator(),
|
||||
(FlatMapFunction<DatasourceRelationsAccumulator, Tuple3<String, String, String>>) acc -> acc
|
||||
.getRels()
|
||||
.iterator(),
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING()));
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,8 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
@ -22,6 +24,7 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareRelatedProjectsJob {
|
||||
|
||||
|
@ -60,20 +63,25 @@ public class PrepareRelatedProjectsJob {
|
|||
|
||||
final Dataset<OaBrokerProject> projects = ClusterUtils
|
||||
.readPath(spark, graphPath + "/project", Project.class)
|
||||
.filter(p -> !ClusterUtils.isDedupRoot(p.getId()))
|
||||
.map(ConversionUtils::oafProjectToBrokerProject, Encoders.bean(OaBrokerProject.class));
|
||||
.filter((FilterFunction<Project>) p -> !ClusterUtils.isDedupRoot(p.getId()))
|
||||
.map(
|
||||
(MapFunction<Project, OaBrokerProject>) ConversionUtils::oafProjectToBrokerProject,
|
||||
Encoders.bean(OaBrokerProject.class));
|
||||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
|
||||
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_PROJECT))
|
||||
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
||||
final Dataset<RelatedProject> dataset = rels
|
||||
.joinWith(projects, projects.col("openaireId").equalTo(rels.col("target")), "inner")
|
||||
.map(t -> new RelatedProject(t._1.getSource(), t._2), Encoders.bean(RelatedProject.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<Relation, OaBrokerProject>, RelatedProject>) t -> new RelatedProject(
|
||||
t._1.getSource(), t._2),
|
||||
Encoders.bean(RelatedProject.class));
|
||||
|
||||
ClusterUtils.save(dataset, relsPath, RelatedProject.class, total);
|
||||
|
||||
|
|
|
@ -7,6 +7,8 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
@ -21,6 +23,7 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedPublication;
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareRelatedPublicationsJob {
|
||||
|
||||
|
@ -59,22 +62,22 @@ public class PrepareRelatedPublicationsJob {
|
|||
|
||||
final Dataset<OaBrokerRelatedPublication> pubs = ClusterUtils
|
||||
.readPath(spark, graphPath + "/publication", Publication.class)
|
||||
.filter(p -> !ClusterUtils.isDedupRoot(p.getId()))
|
||||
.filter((FilterFunction<Publication>) p -> !ClusterUtils.isDedupRoot(p.getId()))
|
||||
.map(
|
||||
ConversionUtils::oafPublicationToBrokerPublication,
|
||||
(MapFunction<Publication, OaBrokerRelatedPublication>) ConversionUtils::oafPublicationToBrokerPublication,
|
||||
Encoders.bean(OaBrokerRelatedPublication.class));
|
||||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter(r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter((FilterFunction<Relation>) r -> ClusterUtils.isValidResultResultClass(r.getRelClass()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
||||
final Dataset<RelatedPublication> dataset = rels
|
||||
.joinWith(pubs, pubs.col("openaireId").equalTo(rels.col("target")), "inner")
|
||||
.map(t -> {
|
||||
.map((MapFunction<Tuple2<Relation, OaBrokerRelatedPublication>, RelatedPublication>) t -> {
|
||||
final RelatedPublication rel = new RelatedPublication(
|
||||
t._1.getSource(), t._2);
|
||||
rel.getRelPublication().setRelType(t._1.getRelClass());
|
||||
|
|
|
@ -7,7 +7,10 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -22,6 +25,7 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareRelatedSoftwaresJob {
|
||||
|
||||
|
@ -58,22 +62,30 @@ public class PrepareRelatedSoftwaresJob {
|
|||
|
||||
final LongAccumulator total = spark.sparkContext().longAccumulator("total_rels");
|
||||
|
||||
final Encoder<OaBrokerRelatedSoftware> obrsEncoder = Encoders.bean(OaBrokerRelatedSoftware.class);
|
||||
final Dataset<OaBrokerRelatedSoftware> softwares = ClusterUtils
|
||||
.readPath(spark, graphPath + "/software", Software.class)
|
||||
.filter(sw -> !ClusterUtils.isDedupRoot(sw.getId()))
|
||||
.map(ConversionUtils::oafSoftwareToBrokerSoftware, Encoders.bean(OaBrokerRelatedSoftware.class));
|
||||
.filter((FilterFunction<Software>) sw -> !ClusterUtils.isDedupRoot(sw.getId()))
|
||||
.map(
|
||||
(MapFunction<Software, OaBrokerRelatedSoftware>) ConversionUtils::oafSoftwareToBrokerSoftware,
|
||||
obrsEncoder);
|
||||
|
||||
final Dataset<Relation> rels = ClusterUtils
|
||||
final Dataset<Relation> rels;
|
||||
rels = ClusterUtils
|
||||
.loadRelations(graphPath, spark)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter(r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
.filter((FilterFunction<Relation>) r -> r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelType().equals(ModelConstants.RESULT_RESULT))
|
||||
.filter((FilterFunction<Relation>) r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getSource()))
|
||||
.filter((FilterFunction<Relation>) r -> !ClusterUtils.isDedupRoot(r.getTarget()));
|
||||
|
||||
final Encoder<RelatedSoftware> rsEncoder = Encoders.bean(RelatedSoftware.class);
|
||||
final Dataset<RelatedSoftware> dataset = rels
|
||||
.joinWith(softwares, softwares.col("openaireId").equalTo(rels.col("target")), "inner")
|
||||
.map(t -> new RelatedSoftware(t._1.getSource(), t._2), Encoders.bean(RelatedSoftware.class));
|
||||
.map(
|
||||
(MapFunction<Tuple2<Relation, OaBrokerRelatedSoftware>, RelatedSoftware>) t -> new RelatedSoftware(
|
||||
t._1.getSource(), t._2),
|
||||
rsEncoder);
|
||||
|
||||
ClusterUtils.save(dataset, relsPath, RelatedSoftware.class, total);
|
||||
|
||||
|
|
|
@ -7,7 +7,10 @@ import java.util.Optional;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoder;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
@ -73,11 +76,12 @@ public class PrepareSimpleEntititiesJob {
|
|||
final String graphPath,
|
||||
final Class<SRC> sourceClass) {
|
||||
|
||||
final Encoder<OaBrokerMainEntity> encoder = Encoders.bean(OaBrokerMainEntity.class);
|
||||
return ClusterUtils
|
||||
.readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
|
||||
.filter(r -> !ClusterUtils.isDedupRoot(r.getId()))
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.map(ConversionUtils::oafResultToBrokerResult, Encoders.bean(OaBrokerMainEntity.class));
|
||||
.filter((FilterFunction<SRC>) r -> !ClusterUtils.isDedupRoot(r.getId()))
|
||||
.filter((FilterFunction<SRC>) r -> r.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<SRC, OaBrokerMainEntity>) ConversionUtils::oafResultToBrokerResult, encoder);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ public class ClusterUtils {
|
|||
public static Dataset<Relation> loadRelations(final String graphPath, final SparkSession spark) {
|
||||
return ClusterUtils
|
||||
.readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.map(r -> {
|
||||
.map((MapFunction<Relation, Relation>) r -> {
|
||||
r.setSource(ConversionUtils.cleanOpenaireId(r.getSource()));
|
||||
r.setTarget(ConversionUtils.cleanOpenaireId(r.getTarget()));
|
||||
return r;
|
||||
|
@ -75,7 +75,7 @@ public class ClusterUtils {
|
|||
final Class<T> clazz,
|
||||
final LongAccumulator acc) {
|
||||
dataset
|
||||
.map(o -> ClusterUtils.incrementAccumulator(o, acc), Encoders.bean(clazz))
|
||||
.map((MapFunction<T, T>) o -> ClusterUtils.incrementAccumulator(o, acc), Encoders.bean(clazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
|
|
@ -105,7 +105,6 @@ public final class UpdateInfo<T> {
|
|||
.map(OaBrokerInstance::getUrl)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
;
|
||||
|
||||
final OaBrokerProvenance provenance = new OaBrokerProvenance(provId, provRepo, provType, provUrl);
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ public class GroupEntitiesSparkJob {
|
|||
|
||||
private final static String ID_JPATH = "$.id";
|
||||
|
||||
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
|
|
@ -11,7 +11,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
|||
|
||||
public class RelationAggregator extends Aggregator<Relation, Relation, Relation> {
|
||||
|
||||
private static Relation ZERO = new Relation();
|
||||
private static final Relation ZERO = new Relation();
|
||||
|
||||
@Override
|
||||
public Relation zero() {
|
||||
|
|
|
@ -161,7 +161,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
|
|||
}
|
||||
|
||||
private FilterFunction<Relation> getRelationFilterFunction() {
|
||||
return (FilterFunction<Relation>) r -> StringUtils.isNotBlank(r.getSource()) ||
|
||||
return r -> StringUtils.isNotBlank(r.getSource()) ||
|
||||
StringUtils.isNotBlank(r.getTarget()) ||
|
||||
StringUtils.isNotBlank(r.getRelClass()) ||
|
||||
StringUtils.isNotBlank(r.getSubRelType()) ||
|
||||
|
|
|
@ -30,7 +30,7 @@ public class EntityMergerTest implements Serializable {
|
|||
|
||||
private String testEntityBasePath;
|
||||
private DataInfo dataInfo;
|
||||
private String dedupId = "00|dedup_id::1";
|
||||
private final String dedupId = "00|dedup_id::1";
|
||||
private Publication pub_top;
|
||||
|
||||
@BeforeEach
|
||||
|
|
|
@ -30,7 +30,7 @@ import scala.Tuple2;
|
|||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||
public class IdGeneratorTest {
|
||||
|
||||
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
private static List<Identifier<Publication>> bestIds;
|
||||
|
|
|
@ -44,10 +44,10 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||
public class SparkOpenorgsDedupTest implements Serializable {
|
||||
|
||||
private static String dbUrl = "jdbc:h2:mem:openorgs_test;DB_CLOSE_DELAY=-1;DATABASE_TO_UPPER=false";
|
||||
private static String dbUser = "sa";
|
||||
private static String dbTable = "tmp_dedup_events";
|
||||
private static String dbPwd = "";
|
||||
private static final String dbUrl = "jdbc:h2:mem:openorgs_test;DB_CLOSE_DELAY=-1;DATABASE_TO_UPPER=false";
|
||||
private static final String dbUser = "sa";
|
||||
private static final String dbTable = "tmp_dedup_events";
|
||||
private static final String dbPwd = "";
|
||||
|
||||
@Mock(serializable = true)
|
||||
ISLookUpService isLookUpService;
|
||||
|
|
|
@ -101,9 +101,8 @@ public class CrossrefImporter {
|
|||
int size = decompresser.inflate(buffer);
|
||||
bos.write(buffer, 0, size);
|
||||
}
|
||||
byte[] unzippeddata = bos.toByteArray();
|
||||
decompresser.end();
|
||||
return new String(unzippeddata);
|
||||
return bos.toString();
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException("Wrong record:" + blob, e);
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ public class ActivitiesDecompressor {
|
|||
}
|
||||
|
||||
} else {
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer);
|
||||
xmlParserErrorFound += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,13 +16,13 @@ public class ExtractXMLActivitiesData extends OrcidDSManager {
|
|||
private String outputWorksPath;
|
||||
private String activitiesFileNameTarGz;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
ExtractXMLActivitiesData extractXMLActivitiesData = new ExtractXMLActivitiesData();
|
||||
extractXMLActivitiesData.loadArgs(args);
|
||||
extractXMLActivitiesData.extractWorks();
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
private void loadArgs(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
|
|
@ -17,13 +17,13 @@ public class ExtractXMLSummariesData extends OrcidDSManager {
|
|||
private String outputAuthorsPath;
|
||||
private String summariesFileNameTarGz;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
ExtractXMLSummariesData extractXMLSummariesData = new ExtractXMLSummariesData();
|
||||
extractXMLSummariesData.loadArgs(args);
|
||||
extractXMLSummariesData.extractAuthors();
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
private void loadArgs(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
|
|
@ -16,7 +16,7 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
|
|||
private String activitiesFileNameTarGz;
|
||||
private String outputAuthorsDOIsPath;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
OrcidAuthorsDOIsDataGen orcidAuthorsDOIsDataGen = new OrcidAuthorsDOIsDataGen();
|
||||
orcidAuthorsDOIsDataGen.loadArgs(args);
|
||||
orcidAuthorsDOIsDataGen.generateAuthorsDOIsData();
|
||||
|
@ -30,7 +30,7 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
|
|||
ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath);
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
private void loadArgs(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
|
|
@ -19,7 +19,7 @@ public class OrcidDSManager {
|
|||
private String summariesFileNameTarGz;
|
||||
private String outputAuthorsPath;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
OrcidDSManager orcidDSManager = new OrcidDSManager();
|
||||
orcidDSManager.loadArgs(args);
|
||||
orcidDSManager.generateAuthors();
|
||||
|
@ -56,7 +56,7 @@ public class OrcidDSManager {
|
|||
return fs;
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
private void loadArgs(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
|
|
@ -46,7 +46,7 @@ public class SparkDownloadOrcidWorks {
|
|||
public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter
|
||||
.ofPattern(ORCID_XML_DATETIME_FORMAT);
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
|
@ -246,6 +246,6 @@ public class SparkDownloadOrcidWorks {
|
|||
return name.getAsString();
|
||||
}
|
||||
}
|
||||
return new String("");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ public class SparkGenLastModifiedSeq {
|
|||
private static String outputPath;
|
||||
private static String lambdaFileName;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
|
|
@ -36,7 +36,7 @@ import scala.Tuple2;
|
|||
|
||||
public class SparkGenerateDoiAuthorList {
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkGenerateDoiAuthorList.class);
|
||||
logger.info("[ SparkGenerateDoiAuthorList STARTED]");
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ public class SparkUpdateOrcidAuthors {
|
|||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.setSerializationInclusion(JsonInclude.Include.NON_NULL);
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class);
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
|
@ -204,7 +204,7 @@ public class SparkUpdateOrcidAuthors {
|
|||
a -> a._1().equals(authorSummary.getAuthorData().getOid()) &&
|
||||
a._2().equals(authorSummary.getDownloadDate()))
|
||||
.count() == 1;
|
||||
return (oidFound && tsFound) || (!oidFound);
|
||||
return !oidFound || tsFound;
|
||||
});
|
||||
|
||||
Dataset<AuthorSummary> cleanedDS = spark
|
||||
|
|
|
@ -38,7 +38,7 @@ public class SparkUpdateOrcidDatasets {
|
|||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.setSerializationInclusion(JsonInclude.Include.NON_NULL);
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class);
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
|
|
|
@ -35,7 +35,7 @@ public class SparkUpdateOrcidWorks {
|
|||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.setSerializationInclusion(JsonInclude.Include.NON_NULL);
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class);
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
|
|
|
@ -124,7 +124,7 @@ public class SummariesDecompressor {
|
|||
}
|
||||
|
||||
} else {
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer);
|
||||
xmlParserErrorFound += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ public class HDFSUtil {
|
|||
if (!fileSystem.exists(toReadPath)) {
|
||||
throw new RuntimeException("File not exist: " + path);
|
||||
}
|
||||
logger.info("Last_update_path " + toReadPath.toString());
|
||||
logger.info("Last_update_path " + toReadPath);
|
||||
FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath));
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
|
||||
StringBuffer sb = new StringBuffer();
|
||||
|
@ -60,7 +60,7 @@ public class HDFSUtil {
|
|||
fileSystem.delete(toWritePath, true);
|
||||
}
|
||||
FSDataOutputStream os = fileSystem.create(toWritePath);
|
||||
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
|
||||
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8));
|
||||
br.write(text);
|
||||
br.close();
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class XMLRecordParser {
|
|||
private static final String NS_ERROR = "error";
|
||||
|
||||
public static AuthorData VTDParseAuthorData(byte[] bytes)
|
||||
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
|
||||
throws VtdException, ParseException {
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(bytes);
|
||||
vg.parse(true);
|
||||
|
@ -134,7 +134,7 @@ public class XMLRecordParser {
|
|||
}
|
||||
|
||||
public static WorkData VTDParseWorkData(byte[] bytes)
|
||||
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
|
||||
throws VtdException, ParseException {
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(bytes);
|
||||
vg.parse(true);
|
||||
|
|
|
@ -123,7 +123,7 @@ public class ActivitiesDumpReader {
|
|||
}
|
||||
|
||||
} else {
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer);
|
||||
xmlParserErrorFound += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager {
|
|||
private String activitiesFileNameTarGz;
|
||||
private String outputWorksPath;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork();
|
||||
genOrcidAuthorWork.loadArgs(args);
|
||||
genOrcidAuthorWork.generateAuthorsDOIsData();
|
||||
|
@ -36,7 +36,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager {
|
|||
ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath);
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
private void loadArgs(String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
|
|
|
@ -52,7 +52,7 @@ public class SparkGenEnrichedOrcidWorks {
|
|||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
|
@ -169,7 +169,7 @@ public class SparkGenEnrichedOrcidWorks {
|
|||
oafPublicationRDD
|
||||
.mapToPair(
|
||||
p -> new Tuple2<>(p.getClass().toString(),
|
||||
OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, (Publication) p))))
|
||||
OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p))))
|
||||
.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
|
||||
.saveAsNewAPIHadoopFile(
|
||||
workingPath.concat(outputEnrichedWorksPath),
|
||||
|
|
|
@ -16,7 +16,7 @@ import eu.dnetlib.doiboost.orcid.model.WorkData;
|
|||
public class JsonWriter {
|
||||
|
||||
public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.setSerializationInclusion(JsonInclude.Include.NON_NULL);;
|
||||
.setSerializationInclusion(JsonInclude.Include.NON_NULL);
|
||||
|
||||
public static String create(AuthorData authorData) throws JsonProcessingException {
|
||||
return OBJECT_MAPPER.writeValueAsString(authorData);
|
||||
|
|
|
@ -88,7 +88,7 @@ public class PublicationToOaf implements Serializable {
|
|||
this.dateOfCollection = null;
|
||||
}
|
||||
|
||||
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
||||
private static final Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {
|
||||
|
||||
{
|
||||
put(
|
||||
|
@ -99,7 +99,7 @@ public class PublicationToOaf implements Serializable {
|
|||
};
|
||||
|
||||
// json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname
|
||||
private static Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
|
||||
private static final Map<String, Pair<String, String>> externalIds = new HashMap<String, Pair<String, String>>() {
|
||||
|
||||
{
|
||||
put("ark".toLowerCase(), new Pair<>("ark", "ark"));
|
||||
|
@ -529,9 +529,7 @@ public class PublicationToOaf implements Serializable {
|
|||
if (jsonArray.isJsonNull()) {
|
||||
return false;
|
||||
}
|
||||
if (jsonArray.get(0).isJsonNull()) {
|
||||
return false;
|
||||
}
|
||||
return !jsonArray.get(0).isJsonNull();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -142,7 +142,7 @@ public class AuthorMatcher {
|
|||
|
||||
public static String normalize(final String s) {
|
||||
if (s == null) {
|
||||
return new String("");
|
||||
return "";
|
||||
}
|
||||
return nfd(s)
|
||||
.toLowerCase()
|
||||
|
@ -189,16 +189,13 @@ public class AuthorMatcher {
|
|||
}
|
||||
|
||||
private static void updateRanks(List<Contributor> contributors) {
|
||||
boolean seqFound = false;
|
||||
if (contributors
|
||||
boolean seqFound = contributors
|
||||
.stream()
|
||||
.filter(
|
||||
c -> c.getRole() != null && c.getSequence() != null &&
|
||||
c.getRole().equals("author") && (c.getSequence().equals("first") ||
|
||||
c.getSequence().equals("additional")))
|
||||
.count() > 0) {
|
||||
seqFound = true;
|
||||
}
|
||||
.count() > 0;
|
||||
if (!seqFound) {
|
||||
List<Integer> seqIds = Arrays.asList(0);
|
||||
contributors.forEach(c -> {
|
||||
|
|
|
@ -20,7 +20,7 @@ public class DumpToActionsUtility {
|
|||
public static String getStringValue(final JsonObject root, final String key) {
|
||||
if (root.has(key) && !root.get(key).isJsonNull())
|
||||
return root.get(key).getAsString();
|
||||
return new String("");
|
||||
return "";
|
||||
}
|
||||
|
||||
public static List<String> getArrayValues(final JsonObject root, final String key) {
|
||||
|
|
|
@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi.util;
|
|||
|
||||
public class Pair<K, V> {
|
||||
|
||||
private K k;
|
||||
private final K k;
|
||||
|
||||
private V v;
|
||||
private final V v;
|
||||
|
||||
public Pair(K k, V v) {
|
||||
this.k = k;
|
||||
|
|
|
@ -43,7 +43,7 @@ public class XMLRecordParserNoDoi {
|
|||
private static final String NS_ERROR = "error";
|
||||
|
||||
public static WorkDetail VTDParseWorkData(byte[] bytes)
|
||||
throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException,
|
||||
throws VtdException, ParseException, XPathParseException,
|
||||
NavException, XPathEvalException {
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(bytes);
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -83,7 +83,7 @@ public class OrcidClientTest {
|
|||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return new String("");
|
||||
return "";
|
||||
}
|
||||
|
||||
// @Test
|
||||
|
@ -142,7 +142,7 @@ public class OrcidClientTest {
|
|||
}
|
||||
|
||||
private void testDate(String value) throws ParseException {
|
||||
System.out.println(value.toString());
|
||||
System.out.println(value);
|
||||
if (value.length() != 19) {
|
||||
value = value.substring(0, 19);
|
||||
}
|
||||
|
@ -185,7 +185,7 @@ public class OrcidClientTest {
|
|||
br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
|
||||
String line;
|
||||
while ((line = br.readLine()) != null) {
|
||||
String[] values = line.toString().split(",");
|
||||
String[] values = line.split(",");
|
||||
List<String> recordInfo = Arrays.asList(values);
|
||||
assertTrue(recordInfo.size() == 4);
|
||||
String orcid = recordInfo.get(0);
|
||||
|
@ -260,7 +260,7 @@ public class OrcidClientTest {
|
|||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return new String("");
|
||||
return "";
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -44,7 +44,7 @@ public class XMLRecordParserTest {
|
|||
|
||||
XMLRecordParser p = new XMLRecordParser();
|
||||
|
||||
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes());
|
||||
AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes());
|
||||
assertNotNull(authorData);
|
||||
assertNotNull(authorData.getName());
|
||||
System.out.println("name: " + authorData.getName());
|
||||
|
@ -60,7 +60,7 @@ public class XMLRecordParserTest {
|
|||
|
||||
XMLRecordParser p = new XMLRecordParser();
|
||||
|
||||
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes());
|
||||
AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes());
|
||||
assertNotNull(authorData);
|
||||
assertNotNull(authorData.getErrorCode());
|
||||
System.out.println("error: " + authorData.getErrorCode());
|
||||
|
@ -75,7 +75,7 @@ public class XMLRecordParserTest {
|
|||
|
||||
XMLRecordParser p = new XMLRecordParser();
|
||||
|
||||
WorkData workData = p.VTDParseWorkData(xml.getBytes());
|
||||
WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes());
|
||||
assertNotNull(workData);
|
||||
assertNotNull(workData.getOid());
|
||||
System.out.println("oid: " + workData.getOid());
|
||||
|
|
|
@ -50,7 +50,7 @@ public class OrcidNoDoiTest {
|
|||
}
|
||||
WorkDetail workData = null;
|
||||
try {
|
||||
workData = p.VTDParseWorkData(xml.getBytes());
|
||||
workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
|
||||
} catch (Exception e) {
|
||||
logger.error("parsing xml", e);
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ public class OrcidNoDoiTest {
|
|||
}
|
||||
WorkDetail workData = null;
|
||||
try {
|
||||
workData = p.VTDParseWorkData(xml.getBytes());
|
||||
workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
|
||||
} catch (Exception e) {
|
||||
logger.error("parsing xml", e);
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ public class OrcidNoDoiTest {
|
|||
}
|
||||
WorkDetail workData = null;
|
||||
try {
|
||||
workData = p.VTDParseWorkData(xml.getBytes());
|
||||
workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
|
||||
} catch (Exception e) {
|
||||
logger.error("parsing xml", e);
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ public class OrcidNoDoiTest {
|
|||
}
|
||||
WorkDetail workData = null;
|
||||
try {
|
||||
workData = p.VTDParseWorkData(xml.getBytes());
|
||||
workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
|
||||
} catch (Exception e) {
|
||||
logger.error("parsing xml", e);
|
||||
}
|
||||
|
@ -217,16 +217,16 @@ public class OrcidNoDoiTest {
|
|||
.stream()
|
||||
.filter(c -> !StringUtils.isBlank(c.getCreditName()))
|
||||
.forEach(c -> {
|
||||
if (am.simpleMatch(c.getCreditName(), author.getName()) ||
|
||||
am.simpleMatch(c.getCreditName(), author.getSurname()) ||
|
||||
am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
|
||||
if (AuthorMatcher.simpleMatch(c.getCreditName(), author.getName()) ||
|
||||
AuthorMatcher.simpleMatch(c.getCreditName(), author.getSurname()) ||
|
||||
AuthorMatcher.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
|
||||
matchCounters.set(0, matchCounters.get(0) + 1);
|
||||
c.setSimpleMatch(true);
|
||||
}
|
||||
});
|
||||
|
||||
assertTrue(matchCounters.get(0) == 1);
|
||||
am.updateAuthorsSimpleMatch(contributors, author);
|
||||
AuthorMatcher.updateAuthorsSimpleMatch(contributors, author);
|
||||
assertTrue(contributors.get(0).getName().equals("Joe"));
|
||||
assertTrue(contributors.get(0).getSurname().equals("Dodge"));
|
||||
assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge"));
|
||||
|
@ -249,9 +249,9 @@ public class OrcidNoDoiTest {
|
|||
.stream()
|
||||
.filter(c -> !StringUtils.isBlank(c.getCreditName()))
|
||||
.forEach(c -> {
|
||||
if (am.simpleMatch(c.getCreditName(), authorX.getName()) ||
|
||||
am.simpleMatch(c.getCreditName(), authorX.getSurname()) ||
|
||||
am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
|
||||
if (AuthorMatcher.simpleMatch(c.getCreditName(), authorX.getName()) ||
|
||||
AuthorMatcher.simpleMatch(c.getCreditName(), authorX.getSurname()) ||
|
||||
AuthorMatcher.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) {
|
||||
int currentCounter = matchCounters2.get(0);
|
||||
currentCounter += 1;
|
||||
matchCounters2.set(0, currentCounter);
|
||||
|
@ -268,7 +268,7 @@ public class OrcidNoDoiTest {
|
|||
.filter(c -> c.isSimpleMatch())
|
||||
.filter(c -> !StringUtils.isBlank(c.getCreditName()))
|
||||
.map(c -> {
|
||||
c.setScore(am.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName()));
|
||||
c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName()));
|
||||
return c;
|
||||
})
|
||||
.filter(c -> c.getScore() >= AuthorMatcher.threshold)
|
||||
|
@ -280,7 +280,7 @@ public class OrcidNoDoiTest {
|
|||
assertTrue(bestMatchContributor.getCreditName().equals("Abdel-Dayem Khai"));
|
||||
assertTrue(contributorList.get(0).isBestMatch());
|
||||
assertTrue(!contributorList.get(1).isBestMatch());
|
||||
am.updateAuthorsSimilarityMatch(contributorList, authorX);
|
||||
AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX);
|
||||
assertTrue(contributorList.get(0).getName().equals(nameA));
|
||||
assertTrue(contributorList.get(0).getSurname().equals(surnameA));
|
||||
assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai"));
|
||||
|
@ -310,7 +310,7 @@ public class OrcidNoDoiTest {
|
|||
}
|
||||
WorkDetail workData = null;
|
||||
try {
|
||||
workData = p.VTDParseWorkData(xml.getBytes());
|
||||
workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes());
|
||||
} catch (Exception e) {
|
||||
logger.error("parsing xml", e);
|
||||
}
|
||||
|
@ -331,8 +331,8 @@ public class OrcidNoDoiTest {
|
|||
author.setName("Joe");
|
||||
author.setSurname("Dodge");
|
||||
author.setOid("0000-1111-2222-3333");
|
||||
String otherName1 = new String("Joe Dr. Dodge");
|
||||
String otherName2 = new String("XY");
|
||||
String otherName1 = "Joe Dr. Dodge";
|
||||
String otherName2 = "XY";
|
||||
List<String> others = Lists.newArrayList();
|
||||
others.add(otherName1);
|
||||
others.add(otherName2);
|
||||
|
|
|
@ -123,7 +123,7 @@ public class SparkBulkTagJob {
|
|||
|
||||
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null
|
||||
private static <R extends Result> MapFunction<R, R> patchResult() {
|
||||
return (MapFunction<R, R>) r -> {
|
||||
return r -> {
|
||||
if (r.getDataInfo().getDeletedbyinference() == null) {
|
||||
r.getDataInfo().setDeletedbyinference(false);
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ public class CommunityConfigurationFactory {
|
|||
|
||||
private static final Log log = LogFactory.getLog(CommunityConfigurationFactory.class);
|
||||
|
||||
private static VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||
|
||||
public static CommunityConfiguration newInstance(final String xml) throws DocumentException {
|
||||
|
||||
|
@ -51,7 +51,7 @@ public class CommunityConfigurationFactory {
|
|||
}
|
||||
|
||||
log.info(String.format("loaded %s community configuration profiles", communities.size()));
|
||||
log.debug(String.format("loaded community configuration:\n%s", communities.toString()));
|
||||
log.debug(String.format("loaded community configuration:\n%s", communities));
|
||||
|
||||
return new CommunityConfiguration(communities);
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ public class VerbResolver implements Serializable {
|
|||
.getParameterValues()
|
||||
.get(0)
|
||||
.getValue(),
|
||||
value -> (Class<Selection>) ((ClassInfo) value).loadClass()));
|
||||
value -> (Class<Selection>) value.loadClass()));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
|
|
@ -111,7 +111,7 @@ public class SparkCountryPropagationJob {
|
|||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
|
||||
return (MapFunction<Tuple2<R, ResultCountrySet>, R>) t -> {
|
||||
return t -> {
|
||||
Optional.ofNullable(t._2()).ifPresent(r -> {
|
||||
t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
|
||||
});
|
||||
|
|
|
@ -105,7 +105,7 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultOrcidList>, R> authorEnrichFn() {
|
||||
return (MapFunction<Tuple2<R, ResultOrcidList>, R>) value -> {
|
||||
return value -> {
|
||||
R ret = value._1();
|
||||
Optional<ResultOrcidList> rol = Optional.ofNullable(value._2());
|
||||
if (rol.isPresent()) {
|
||||
|
@ -202,8 +202,8 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
return false;
|
||||
}
|
||||
for (StructuredProperty pid : pids.get()) {
|
||||
if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) ||
|
||||
ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) {
|
||||
if (ModelConstants.ORCID_PENDING.equalsIgnoreCase(pid.getQualifier().getClassid()) ||
|
||||
ModelConstants.ORCID.equalsIgnoreCase(pid.getQualifier().getClassid())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
}
|
||||
|
||||
private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {
|
||||
return (FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation>) value -> {
|
||||
return value -> {
|
||||
List<Relation> new_relations = new ArrayList<>();
|
||||
ResultProjectSet potential_update = value._1();
|
||||
Optional<ResultProjectSet> already_linked = Optional.ofNullable(value._2());
|
||||
|
|
|
@ -117,7 +117,7 @@ public class PrepareResultCommunitySet {
|
|||
|
||||
private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
|
||||
OrganizationMap organizationMap) {
|
||||
return (MapFunction<ResultOrganizations, ResultCommunityList>) value -> {
|
||||
return value -> {
|
||||
String rId = value.getResultId();
|
||||
Optional<List<String>> orgs = Optional.ofNullable(value.getMerges());
|
||||
String oTarget = value.getOrgId();
|
||||
|
|
|
@ -98,7 +98,7 @@ public class SparkResultToCommunityFromOrganizationJob {
|
|||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
|
||||
return (MapFunction<Tuple2<R, ResultCommunityList>, R>) value -> {
|
||||
return value -> {
|
||||
R ret = value._1();
|
||||
Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2());
|
||||
if (rcl.isPresent()) {
|
||||
|
|
|
@ -101,7 +101,7 @@ public class SparkResultToCommunityThroughSemRelJob {
|
|||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> contextUpdaterFn() {
|
||||
return (MapFunction<Tuple2<R, ResultCommunityList>, R>) value -> {
|
||||
return value -> {
|
||||
R ret = value._1();
|
||||
Optional<ResultCommunityList> rcl = Optional.ofNullable(value._2());
|
||||
if (rcl.isPresent()) {
|
||||
|
|
|
@ -124,7 +124,7 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
}
|
||||
|
||||
private static FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation> createRelationFn() {
|
||||
return (FlatMapFunction<Tuple2<ResultOrganizationSet, ResultOrganizationSet>, Relation>) value -> {
|
||||
return value -> {
|
||||
List<Relation> new_relations = new ArrayList<>();
|
||||
ResultOrganizationSet potential_update = value._1();
|
||||
Optional<ResultOrganizationSet> already_linked = Optional.ofNullable(value._2());
|
||||
|
|
|
@ -14,7 +14,7 @@ public class Constants {
|
|||
|
||||
public static final String HARVESTED = "Harvested";
|
||||
public static final String DEFAULT_TRUST = "0.9";
|
||||
public static final String USER_CLAIM = "Linked by user";;
|
||||
public static final String USER_CLAIM = "Linked by user";
|
||||
|
||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||
|
||||
|
@ -44,7 +44,7 @@ public class Constants {
|
|||
public enum DUMPTYPE {
|
||||
COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder");
|
||||
|
||||
private String type;
|
||||
private final String type;
|
||||
|
||||
DUMPTYPE(String type) {
|
||||
this.type = type;
|
||||
|
|
|
@ -51,7 +51,7 @@ public class GraphHiveImporterJobTest {
|
|||
conf
|
||||
.set(
|
||||
"javax.jdo.option.ConnectionURL",
|
||||
String.format(JDBC_DERBY_TEMPLATE, workingDir.resolve("warehouse").toString()));
|
||||
String.format(JDBC_DERBY_TEMPLATE, workingDir.resolve("warehouse")));
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
|
|
|
@ -40,7 +40,7 @@ public class DumpJobTest {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
|
||||
|
||||
private static CommunityMap map = new CommunityMap();
|
||||
private static final CommunityMap map = new CommunityMap();
|
||||
|
||||
static {
|
||||
map.put("egi", "EGI Federation");
|
||||
|
|
|
@ -37,7 +37,7 @@ public class PrepareResultProjectJobTest {
|
|||
private static final Logger log = LoggerFactory
|
||||
.getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class);
|
||||
|
||||
private static HashMap<String, String> map = new HashMap<>();
|
||||
private static final HashMap<String, String> map = new HashMap<>();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
|
|
|
@ -36,7 +36,7 @@ public class UpdateProjectInfoTest {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class);
|
||||
|
||||
private static HashMap<String, String> map = new HashMap<>();
|
||||
private static final HashMap<String, String> map = new HashMap<>();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
|
|
|
@ -37,7 +37,7 @@ public class DumpOrganizationProjectDatasourceTest {
|
|||
private static final Logger log = LoggerFactory
|
||||
.getLogger(DumpOrganizationProjectDatasourceTest.class);
|
||||
|
||||
private static HashMap<String, String> map = new HashMap<>();
|
||||
private static final HashMap<String, String> map = new HashMap<>();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
|
|
|
@ -36,7 +36,7 @@ public class DumpRelationTest {
|
|||
private static final Logger log = LoggerFactory
|
||||
.getLogger(DumpRelationTest.class);
|
||||
|
||||
private static HashMap<String, String> map = new HashMap<>();
|
||||
private static final HashMap<String, String> map = new HashMap<>();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
|
|
|
@ -33,7 +33,7 @@ public class RelationFromOrganizationTest {
|
|||
private static final Logger log = LoggerFactory
|
||||
.getLogger(RelationFromOrganizationTest.class);
|
||||
|
||||
private static HashMap<String, String> map = new HashMap<>();
|
||||
private static final HashMap<String, String> map = new HashMap<>();
|
||||
|
||||
String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], "
|
||||
+
|
||||
|
|
|
@ -36,7 +36,7 @@ public class ResultLinkedToProjectTest {
|
|||
private static final Logger log = LoggerFactory
|
||||
.getLogger(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class);
|
||||
|
||||
private static HashMap<String, String> map = new HashMap<>();
|
||||
private static final HashMap<String, String> map = new HashMap<>();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
|
|
|
@ -347,7 +347,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
private List<String> getValueAsList(final String name, final List<TypedField> fields) {
|
||||
return (List<String>) getValueAs(name, fields);
|
||||
return getValueAs(name, fields);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -61,9 +61,8 @@ public class CrossrefClient {
|
|||
int size = decompresser.inflate(buffer);
|
||||
bos.write(buffer, 0, size);
|
||||
}
|
||||
byte[] unzippeddata = bos.toByteArray();
|
||||
decompresser.end();
|
||||
return new String(unzippeddata);
|
||||
return bos.toString();
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException("Wrong record:" + blob, e);
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue