Merge pull request 'Minor fixes' (#496) from beta_fixes_oct into beta
Reviewed-on: #496
This commit is contained in:
commit
8172bee8c8
|
@ -179,7 +179,7 @@ class OafMapperUtilsTest {
|
|||
assertEquals(
|
||||
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||
((Result) MergeUtils
|
||||
.merge(p2, d1))
|
||||
.merge(p2, d1, true))
|
||||
.getResulttype()
|
||||
.getClassid());
|
||||
}
|
||||
|
|
|
@ -11,7 +11,6 @@ import org.junit.jupiter.api.Disabled;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.pace.model.Person;
|
||||
import jdk.nashorn.internal.ir.annotations.Ignore;
|
||||
|
||||
public class UtilTest {
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.BZip2Codec;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
|
@ -20,7 +21,6 @@ import org.apache.spark.sql.*;
|
|||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.spark_project.jetty.util.StringUtil;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
|
@ -317,13 +317,13 @@ public class ExtractPerson implements Serializable {
|
|||
"0.91"),
|
||||
null);
|
||||
|
||||
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) {
|
||||
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank(row.getStartDate())) {
|
||||
KeyValue kv = new KeyValue();
|
||||
kv.setKey("startDate");
|
||||
kv.setValue(row.getStartDate());
|
||||
properties.add(kv);
|
||||
}
|
||||
if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtil.isNotBlank(row.getEndDate())) {
|
||||
if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank(row.getEndDate())) {
|
||||
KeyValue kv = new KeyValue();
|
||||
kv.setKey("endDate");
|
||||
kv.setValue(row.getEndDate());
|
||||
|
|
|
@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{
|
|||
PidType
|
||||
}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.apache.spark.sql.Row
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
|
|
|
@ -7,7 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactor
|
|||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST._
|
||||
|
|
|
@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
|||
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST._
|
||||
|
|
|
@ -6,11 +6,11 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.amazonaws.util.StringUtils;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
|
@ -81,7 +81,7 @@ public class Utils implements Serializable {
|
|||
Community c = new Community();
|
||||
c.setId(cm.getId());
|
||||
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
||||
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
|
||||
if (StringUtils.isNotBlank(cm.getZenodoCommunity()))
|
||||
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
||||
c.setSubjects(cm.getSubjects());
|
||||
c.getSubjects().addAll(cm.getFos());
|
||||
|
|
|
@ -4,7 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
|
|||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
|
|
|
@ -72,9 +72,9 @@ public class GraphHiveTableImporterJob {
|
|||
final Encoder<T> clazzEncoder = Encoders.bean(clazz);
|
||||
|
||||
Dataset<Row> dataset = spark
|
||||
.read()
|
||||
.schema(clazzEncoder.schema())
|
||||
.json(inputPath);
|
||||
.read()
|
||||
.schema(clazzEncoder.schema())
|
||||
.json(inputPath);
|
||||
|
||||
if (numPartitions > 0) {
|
||||
log.info("repartitioning {} to {} partitions", clazz.getSimpleName(), numPartitions);
|
||||
|
|
|
@ -22,5 +22,11 @@
|
|||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the output path of the graph enriched",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingDir",
|
||||
"paramDescription": "the working dir",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
|||
log.info(s"orcidPath is '$orcidPath'")
|
||||
val targetPath = parser.get("targetPath")
|
||||
log.info(s"targetPath is '$targetPath'")
|
||||
val workingDir = parser.get("workingDir")
|
||||
log.info(s"targetPath is '$workingDir'")
|
||||
|
||||
createTemporaryData(graphPath, orcidPath, targetPath)
|
||||
analisys(targetPath)
|
||||
generateGraph(graphPath, targetPath)
|
||||
createTemporaryData(graphPath, orcidPath, workingDir)
|
||||
analisys(workingDir)
|
||||
generateGraph(graphPath, workingDir, targetPath)
|
||||
}
|
||||
|
||||
private def generateGraph(graphPath: String, targetPath: String): Unit = {
|
||||
private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {
|
||||
|
||||
ModelSupport.entityTypes.asScala
|
||||
.filter(e => ModelSupport.isResult(e._1))
|
||||
|
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
|||
|
||||
val matched = spark.read
|
||||
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
|
||||
.parquet(s"${targetPath}/${resultType}_matched")
|
||||
.parquet(s"${workingDir}/${resultType}_matched")
|
||||
.selectExpr("id", "enriched_author")
|
||||
|
||||
spark.read
|
||||
|
|
|
@ -3,8 +3,8 @@ package eu.dnetlib.dhp.swh.models;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class LastVisitData implements Serializable {
|
||||
|
|
Loading…
Reference in New Issue