using com.fasterxml.jackson.databind.ObjectMapper instead of org.codehaus.jackson.map.ObjectMapper

This commit is contained in:
Claudio Atzori 2020-12-23 16:59:52 +01:00
parent 60649ac7d2
commit 28460c2cd1
16 changed files with 32 additions and 49 deletions

View File

@ -9,11 +9,12 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.codehaus.jackson.map.ObjectMapper;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

View File

@ -105,6 +105,8 @@ public class ModelConstants {
public static final KeyValue UNKNOWN_REPOSITORY = keyValue( public static final KeyValue UNKNOWN_REPOSITORY = keyValue(
"10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository"); "10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository");
public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE, DNET_COUNTRY_TYPE);
private static Qualifier qualifier( private static Qualifier qualifier(
final String classid, final String classid,
final String classname, final String classname,

View File

@ -10,10 +10,11 @@ import java.io.Serializable;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.*; import java.util.*;
import org.codehaus.jackson.map.ObjectMapper;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.oa.merge.AuthorMerger;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.MapDocumentUtil;

View File

@ -7,7 +7,6 @@ import java.util.List;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.api.java.function.PairFunction;
@ -16,8 +15,8 @@ import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.codehaus.jackson.map.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.hash.Hashing; import com.google.common.hash.Hashing;
import eu.dnetlib.dedup.graph.ConnectedComponent; import eu.dnetlib.dedup.graph.ConnectedComponent;

View File

@ -10,7 +10,8 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.codehaus.jackson.map.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;

View File

@ -4,14 +4,13 @@ import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.commons.lang3.StringUtils import org.apache.commons.lang3.StringUtils
import org.codehaus.jackson.map.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import org.json4s import org.json4s
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory} import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.io.Source
case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
@ -19,17 +18,11 @@ case class HostedByItemType(id: String, officialname: String, issn: String, eiss
case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){} case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){}
object DoiBoostMappingUtil { object DoiBoostMappingUtil {
def getUnknownCountry(): Qualifier = {
createQualifier("UNKNOWN","UNKNOWN","dnet:countries","dnet:countries")
}
def generateMAGAffiliationId(affId: String): String = { def generateMAGAffiliationId(affId: String): String = {
s"20|microsoft___$SEPARATOR${DHPUtils.md5(affId)}" s"20|microsoft___$SEPARATOR${DHPUtils.md5(affId)}"
} }
val logger: Logger = LoggerFactory.getLogger(getClass) val logger: Logger = LoggerFactory.getLogger(getClass)
//STATIC STRING //STATIC STRING

View File

@ -2,6 +2,7 @@ package eu.dnetlib.doiboost
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.oa.merge.AuthorMerger
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset} import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset}
import eu.dnetlib.doiboost.mag.ConversionUtil import eu.dnetlib.doiboost.mag.ConversionUtil
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
@ -132,7 +133,7 @@ object SparkGenerateDoiBoost {
o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get)) o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get))
if (affiliation.OfficialPage.isDefined) if (affiliation.OfficialPage.isDefined)
o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get)) o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get))
o.setCountry(DoiBoostMappingUtil.getUnknownCountry()) o.setCountry(ModelConstants.UNKNOWN_COUNTRY)
o o
} }
else else

View File

@ -1,11 +1,11 @@
package eu.dnetlib.doiboost.orcid package eu.dnetlib.doiboost.orcid
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication}
import eu.dnetlib.dhp.schema.orcid.OrcidDOI import eu.dnetlib.dhp.schema.orcid.OrcidDOI
import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier} import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier}
import org.apache.commons.lang.StringUtils import org.apache.commons.lang.StringUtils
import org.codehaus.jackson.map.ObjectMapper
import org.slf4j.{Logger, LoggerFactory} import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
@ -18,7 +18,7 @@ case class ORCIDItem(oid:String,name:String,surname:String,creditName:String,err
case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {} case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {}
object ORCIDToOAF { object ORCIDToOAF {
val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass) val logger: Logger = LoggerFactory.getLogger(ORCIDToOAF.getClass)
val mapper = new ObjectMapper val mapper = new ObjectMapper()
def isJsonValid(inputStr: String): Boolean = { def isJsonValid(inputStr: String): Boolean = {
import java.io.IOException import java.io.IOException

View File

@ -3,10 +3,8 @@ package eu.dnetlib.doiboost.orcid;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -18,11 +16,9 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.Function;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.mortbay.log.Log;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -36,7 +32,7 @@ public class SparkDownloadOrcidAuthors {
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
static final String lastUpdate = "2020-09-29 00:00:00"; static final String lastUpdate = "2020-09-29 00:00:00";
public static void main(String[] args) throws IOException, Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -51,12 +47,12 @@ public class SparkDownloadOrcidAuthors {
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String workingPath = parser.get("workingPath"); final String workingPath = parser.get("workingPath");
logger.info("workingPath: ", workingPath); logger.info("workingPath: {}", workingPath);
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
logger.info("outputPath: ", outputPath); logger.info("outputPath: {}", outputPath);
final String token = parser.get("token"); final String token = parser.get("token");
final String lambdaFileName = parser.get("lambdaFileName"); final String lambdaFileName = parser.get("lambdaFileName");
logger.info("lambdaFileName: ", lambdaFileName); logger.info("lambdaFileName: {}", lambdaFileName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
runWithSparkSession( runWithSparkSession(
@ -171,8 +167,8 @@ public class SparkDownloadOrcidAuthors {
} }
private static boolean isModified(String orcidId, String modifiedDate) { private static boolean isModified(String orcidId, String modifiedDate) {
Date modifiedDateDt = null; Date modifiedDateDt;
Date lastUpdateDt = null; Date lastUpdateDt;
try { try {
if (modifiedDate.length() != 19) { if (modifiedDate.length() != 19) {
modifiedDate = modifiedDate.substring(0, 19); modifiedDate = modifiedDate.substring(0, 19);

View File

@ -1,9 +1,9 @@
package eu.dnetlib.doiboost.orcid package eu.dnetlib.doiboost.orcid
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.oaf.Publication import eu.dnetlib.dhp.schema.oaf.Publication
import eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF.getClass import eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF.getClass
import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
import org.codehaus.jackson.map.ObjectMapper
import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory} import org.slf4j.{Logger, LoggerFactory}

View File

@ -89,7 +89,7 @@ public class CleaningFunctions {
} else if (value instanceof Organization) { } else if (value instanceof Organization) {
Organization o = (Organization) value; Organization o = (Organization) value;
if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) { if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
o.setCountry(qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_COUNTRY_TYPE)); o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
} }
} else if (value instanceof Relation) { } else if (value instanceof Relation) {
// nothing to clean here // nothing to clean here

View File

@ -11,7 +11,8 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.codehaus.jackson.map.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;

View File

@ -1,15 +1,15 @@
package eu.dnetlib.dhp.export package eu.dnetlib.dhp.export
import com.fasterxml.jackson.databind.ObjectMapper
import java.time.LocalDateTime import java.time.LocalDateTime
import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatter
import eu.dnetlib.dhp.common.PacePerson import eu.dnetlib.dhp.common.PacePerson
import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty}
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication} import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.commons.lang3.StringUtils import org.apache.commons.lang3.StringUtils
import org.codehaus.jackson.map.ObjectMapper
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils._ import eu.dnetlib.dhp.schema.scholexplorer.OafUtils._
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._

View File

@ -1,27 +1,21 @@
package eu.dnetlib.dhp.`export` package eu.dnetlib.dhp.`export`
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Instance, Publication, Relation, Dataset => OafDataset} import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset}
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication} import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
import org.apache.hadoop.io.Text import org.apache.hadoop.io.Text
import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.hadoop.mapred.SequenceFileOutputFormat
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.apache.spark.sql.functions._ import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.expressions.Window
import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.SparkConf
import org.codehaus.jackson.map.ObjectMapper
import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.ArrayBuffer
import scala.collection.JavaConverters._
object SparkExportContentForOpenAire { object SparkExportContentForOpenAire {
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf() val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkExportContentForOpenAire.getClass.getResourceAsStream("input_export_content_parameters.json"))) val parser = new ArgumentApplicationParser(IOUtils.toString(SparkExportContentForOpenAire.getClass.getResourceAsStream("input_export_content_parameters.json")))
@ -178,11 +172,4 @@ object SparkExportContentForOpenAire {
fRels.union(fpubs).union(fdats).rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$workingPath/export/rawset", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) fRels.union(fpubs).union(fdats).rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$workingPath/export/rawset", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
} }
} }

View File

@ -10,7 +10,8 @@ import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.StringEntity; import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.codehaus.jackson.map.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;

View File

@ -1,5 +1,6 @@
package eu.dnetlib.dhp.provision package eu.dnetlib.dhp.provision
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.provision.scholix.Scholix import eu.dnetlib.dhp.provision.scholix.Scholix
import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary
@ -7,7 +8,6 @@ import org.apache.commons.io.IOUtils
import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.io.compress.GzipCodec
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
import org.codehaus.jackson.map.ObjectMapper
object SparkConvertDatasetToJson { object SparkConvertDatasetToJson {