Minor fixes #496
|
@ -179,7 +179,7 @@ class OafMapperUtilsTest {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
((Result) MergeUtils
|
((Result) MergeUtils
|
||||||
.merge(p2, d1))
|
.merge(p2, d1, true))
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid());
|
.getClassid());
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,6 @@ import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Person;
|
import eu.dnetlib.pace.model.Person;
|
||||||
import jdk.nashorn.internal.ir.annotations.Ignore;
|
|
||||||
|
|
||||||
public class UtilTest {
|
public class UtilTest {
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.cli.ParseException;
|
import org.apache.commons.cli.ParseException;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.io.compress.BZip2Codec;
|
import org.apache.hadoop.io.compress.BZip2Codec;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
|
@ -20,7 +21,6 @@ import org.apache.spark.sql.*;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.spark_project.jetty.util.StringUtil;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
@ -317,13 +317,13 @@ public class ExtractPerson implements Serializable {
|
||||||
"0.91"),
|
"0.91"),
|
||||||
null);
|
null);
|
||||||
|
|
||||||
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) {
|
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtils.isNotBlank(row.getStartDate())) {
|
||||||
KeyValue kv = new KeyValue();
|
KeyValue kv = new KeyValue();
|
||||||
kv.setKey("startDate");
|
kv.setKey("startDate");
|
||||||
kv.setValue(row.getStartDate());
|
kv.setValue(row.getStartDate());
|
||||||
properties.add(kv);
|
properties.add(kv);
|
||||||
}
|
}
|
||||||
if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtil.isNotBlank(row.getEndDate())) {
|
if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtils.isNotBlank(row.getEndDate())) {
|
||||||
KeyValue kv = new KeyValue();
|
KeyValue kv = new KeyValue();
|
||||||
kv.setKey("endDate");
|
kv.setKey("endDate");
|
||||||
kv.setValue(row.getEndDate());
|
kv.setValue(row.getEndDate());
|
||||||
|
|
|
@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{
|
||||||
PidType
|
PidType
|
||||||
}
|
}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.commons.lang.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
import org.apache.spark.sql.Row
|
import org.apache.spark.sql.Row
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
|
|
|
@ -7,7 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactor
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||||
import org.apache.commons.lang.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.JsonAST._
|
import org.json4s.JsonAST._
|
||||||
|
|
|
@ -6,7 +6,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
|
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo}
|
||||||
import org.apache.commons.lang.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.JsonAST._
|
import org.json4s.JsonAST._
|
||||||
|
|
|
@ -6,11 +6,11 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.amazonaws.util.StringUtils;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ public class Utils implements Serializable {
|
||||||
Community c = new Community();
|
Community c = new Community();
|
||||||
c.setId(cm.getId());
|
c.setId(cm.getId());
|
||||||
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
||||||
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
|
if (StringUtils.isNotBlank(cm.getZenodoCommunity()))
|
||||||
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
||||||
c.setSubjects(cm.getSubjects());
|
c.setSubjects(cm.getSubjects());
|
||||||
c.getSubjects().addAll(cm.getFos());
|
c.getSubjects().addAll(cm.getFos());
|
||||||
|
|
|
@ -4,7 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
|
||||||
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
|
@ -22,5 +22,11 @@
|
||||||
"paramLongName": "targetPath",
|
"paramLongName": "targetPath",
|
||||||
"paramDescription": "the output path of the graph enriched",
|
"paramDescription": "the output path of the graph enriched",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "wp",
|
||||||
|
"paramLongName": "workingDir",
|
||||||
|
"paramDescription": "the working dir",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -47,13 +47,15 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
log.info(s"orcidPath is '$orcidPath'")
|
log.info(s"orcidPath is '$orcidPath'")
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
log.info(s"targetPath is '$targetPath'")
|
log.info(s"targetPath is '$targetPath'")
|
||||||
|
val workingDir = parser.get("workingDir")
|
||||||
|
log.info(s"targetPath is '$workingDir'")
|
||||||
|
|
||||||
createTemporaryData(graphPath, orcidPath, targetPath)
|
createTemporaryData(graphPath, orcidPath, workingDir)
|
||||||
analisys(targetPath)
|
analisys(workingDir)
|
||||||
generateGraph(graphPath, targetPath)
|
generateGraph(graphPath, workingDir, targetPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def generateGraph(graphPath: String, targetPath: String): Unit = {
|
private def generateGraph(graphPath: String, workingDir: String, targetPath: String): Unit = {
|
||||||
|
|
||||||
ModelSupport.entityTypes.asScala
|
ModelSupport.entityTypes.asScala
|
||||||
.filter(e => ModelSupport.isResult(e._1))
|
.filter(e => ModelSupport.isResult(e._1))
|
||||||
|
@ -63,7 +65,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
|
|
||||||
val matched = spark.read
|
val matched = spark.read
|
||||||
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
|
.schema(Encoders.bean(classOf[ORCIDAuthorEnricherResult]).schema)
|
||||||
.parquet(s"${targetPath}/${resultType}_matched")
|
.parquet(s"${workingDir}/${resultType}_matched")
|
||||||
.selectExpr("id", "enriched_author")
|
.selectExpr("id", "enriched_author")
|
||||||
|
|
||||||
spark.read
|
spark.read
|
||||||
|
|
|
@ -3,8 +3,8 @@ package eu.dnetlib.dhp.swh.models;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import com.cloudera.com.fasterxml.jackson.annotation.JsonProperty;
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
public class LastVisitData implements Serializable {
|
public class LastVisitData implements Serializable {
|
||||||
|
|
Loading…
Reference in New Issue