Merge pull request 'Minor fixes' (#496) from beta_fixes_oct into beta

Reviewed-on: #496
This commit is contained in:
Claudio Atzori 2024-10-25 10:09:56 +02:00
commit 8172bee8c8
12 changed files with 27 additions and 20 deletions

View File

@ -179,7 +179,7 @@ class OafMapperUtilsTest {
assertEquals(
ModelConstants.DATASET_RESULTTYPE_CLASSID,
((Result) MergeUtils
.merge(p2, d1))
.merge(p2, d1, true))
.getResulttype()
.getClassid());
}

View File

@ -11,7 +11,6 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import eu.dnetlib.pace.model.Person;
import jdk.nashorn.internal.ir.annotations.Ignore;
public class UtilTest {

View File

@ -11,6 +11,7 @@ import java.util.stream.Collectors;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
@ -20,7 +21,6 @@ import org.apache.spark.sql.*;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.spark_project.jetty.util.StringUtil;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -317,13 +317,13 @@ public class ExtractPerson implements Serializable {
"0.91"),
null);
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) {
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank(row.getStartDate())) {
KeyValue kv = new KeyValue();
kv.setKey("startDate");
kv.setValue(row.getStartDate());
properties.add(kv);
}
if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtil.isNotBlank(row.getEndDate())) {
if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank(row.getEndDate())) {
KeyValue kv = new KeyValue();
kv.setKey("endDate");
kv.setValue(row.getEndDate());

View File

@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{
PidType
}
import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.Row
import org.json4s
import org.json4s.DefaultFormats

View File

@ -7,7 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactor
import eu.dnetlib.dhp.utils.DHPUtils
import eu.dnetlib.doiboost.DoiBoostMappingUtil
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang3.StringUtils
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST._

View File

@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
import eu.dnetlib.doiboost.DoiBoostMappingUtil
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang3.StringUtils
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST._

View File

@ -6,11 +6,11 @@ import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
@ -81,7 +81,7 @@ public class Utils implements Serializable {
Community c = new Community();
c.setId(cm.getId());
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
if (StringUtils.isNotBlank(cm.getZenodoCommunity()))
c.getZenodoCommunities().add(cm.getZenodoCommunity());
c.setSubjects(cm.getSubjects());
c.getSubjects().addAll(cm.getFos());

View File

@ -4,7 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnore;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;

View File

@ -72,9 +72,9 @@ public class GraphHiveTableImporterJob {
final Encoder<T> clazzEncoder = Encoders.bean(clazz);
Dataset<Row> dataset = spark
.read()
.schema(clazzEncoder.schema())
.json(inputPath);
.read()
.schema(clazzEncoder.schema())
.json(inputPath);
if (numPartitions > 0) {
log.info("repartitioning {} to {} partitions", clazz.getSimpleName(), numPartitions);

View File

@ -22,5 +22,11 @@
"paramLongName": "targetPath",
"paramDescription": "the output path of the graph enriched",
"paramRequired": true
},
{
"paramName": "wp",
"paramLongName": "workingDir",
"paramDescription": "the working dir",
"paramRequired": true
}
]

View File

@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
log.info(s"orcidPath is '$orcidPath'")
val targetPath = parser.get("targetPath")
log.info(s"targetPath is '$targetPath'")
val workingDir = parser.get("workingDir")
log.info(s"targetPath is '$workingDir'")
createTemporaryData(graphPath, orcidPath, targetPath)
analisys(targetPath)
generateGraph(graphPath, targetPath)
createTemporaryData(graphPath, orcidPath, workingDir)
analisys(workingDir)
generateGraph(graphPath, workingDir, targetPath)
}
private def generateGraph(graphPath: String, targetPath: String): Unit = {
private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {
ModelSupport.entityTypes.asScala
.filter(e => ModelSupport.isResult(e._1))
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
val matched = spark.read
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
.parquet(s"${targetPath}/${resultType}_matched")
.parquet(s"${workingDir}/${resultType}_matched")
.selectExpr("id", "enriched_author")
spark.read

View File

@ -3,8 +3,8 @@ package eu.dnetlib.dhp.swh.models;
import java.io.Serializable;
import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
@JsonIgnoreProperties(ignoreUnknown = true)
public class LastVisitData implements Serializable {