1
0
Fork 0

code formatting

This commit is contained in:
Claudio Atzori 2024-12-06 13:58:39 +01:00
parent 5c7f7fb3b8
commit e4b814b3f1
6 changed files with 97 additions and 84 deletions

View File

@ -7,7 +7,6 @@ import java.io.IOException;
import java.util.Optional; import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import eu.dnetlib.dhp.collection.plugin.zenodo.CollectZenodoDumpCollectorPlugin;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.IntWritable;
@ -29,6 +28,7 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.osf.OsfPreprintsCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.osf.OsfPreprintsCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.zenodo.CollectZenodoDumpCollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.common.collection.HttpClientParams;

View File

@ -503,7 +503,6 @@ case object Crossref2Oaf {
) )
} }
if (doi.startsWith("10.3410") || doi.startsWith("10.12703")) if (doi.startsWith("10.3410") || doi.startsWith("10.12703"))
instance.setHostedby( instance.setHostedby(
OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true), "H1Connect") OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true), "H1Connect")
@ -556,14 +555,18 @@ case object Crossref2Oaf {
result result
} }
def generateIdentifier(oaf: Result, doi: String): String = { def generateIdentifier(oaf: Result, doi: String): String = {
val id = DHPUtils.md5(doi.toLowerCase) val id = DHPUtils.md5(doi.toLowerCase)
s"50|doiboost____|$id" s"50|doiboost____|$id"
} }
private def generateAuthor(given: String, family: String, orcid: String, index: Int, affiliation: Option[List[mappingAffiliation]]): Author = { private def generateAuthor(
given: String,
family: String,
orcid: String,
index: Int,
affiliation: Option[List[mappingAffiliation]]
): Author = {
val a = new Author val a = new Author
a.setName(given) a.setName(given)
a.setSurname(family) a.setSurname(family)
@ -700,7 +703,6 @@ case object Crossref2Oaf {
if (objectType == null) if (objectType == null)
return resultList return resultList
// If the item has a relations is-review-of, then we force it to a peer-review // If the item has a relations is-review-of, then we force it to a peer-review
val is_review = json \ "relation" \ "is-review-of" \ "id" val is_review = json \ "relation" \ "is-review-of" \ "id"
var force_to_review = false var force_to_review = false
@ -713,7 +715,6 @@ case object Crossref2Oaf {
if (typology == null) if (typology == null)
return List() return List()
val result = generateItemFromType(typology._2) val result = generateItemFromType(typology._2)
if (result == null) if (result == null)
return List() return List()

View File

@ -28,17 +28,21 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
val input = val input =
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8") IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/issn_pub.json"), "utf-8")
Crossref2Oaf.convert(input, vocabularies, TransformationType.All).foreach(record => { Crossref2Oaf
.convert(input, vocabularies, TransformationType.All)
.foreach(record => {
Assertions.assertNotNull(record) Assertions.assertNotNull(record)
}) })
} }
@Test @Test
def mappingAffiliation(): Unit = { def mappingAffiliation(): Unit = {
val input = val input =
IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"), "utf-8") IOUtils.toString(
getClass.getResourceAsStream("/eu/dnetlib/dhp/collection/crossref/affiliationTest.json"),
"utf-8"
)
val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult) val data = Crossref2Oaf.convert(input, vocabularies, TransformationType.OnlyResult)
data.foreach(record => { data.foreach(record => {
Assertions.assertNotNull(record) Assertions.assertNotNull(record)
@ -46,7 +50,7 @@ class CrossrefMappingTest extends AbstractVocabularyTest {
val publication = record.asInstanceOf[Publication] val publication = record.asInstanceOf[Publication]
publication.getAuthor.asScala.foreach(author => { publication.getAuthor.asScala.foreach(author => {
Assertions.assertNotNull(author.getRawAffiliationString) Assertions.assertNotNull(author.getRawAffiliationString)
Assertions.assertTrue(author.getRawAffiliationString.size()>0) Assertions.assertTrue(author.getRawAffiliationString.size() > 0)
}) })
}) })

View File

@ -1,16 +1,16 @@
package eu.dnetlib.dhp.resulttocommunityfromsemrel; package eu.dnetlib.dhp.resulttocommunityfromsemrel;
import static java.lang.String.join;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static java.lang.String.join;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.sql.*; import org.apache.spark.sql.*;
@ -22,6 +22,7 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
@ -37,8 +38,7 @@ public class PrepareResultCommunitySetStep1 {
* relation * relation
*/ */
// TODO // TODO
private static final String RESULT_CONTEXT_QUERY_TEMPLATE = private static final String RESULT_CONTEXT_QUERY_TEMPLATE = "select target resultId, community_context "
"select target resultId, community_context "
+ "from (select id, collect_set(co.id) community_context " + "from (select id, collect_set(co.id) community_context "
+ " from result " + " from result "
+ " lateral view explode (context) c as co " + " lateral view explode (context) c as co "
@ -60,8 +60,8 @@ public class PrepareResultCommunitySetStep1 {
+ "where length(co) > 0 " + "where length(co) > 0 "
+ "group by resultId"; + "group by resultId";
private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = private static final String RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO = "select target as resultId, community_context "
"select target as resultId, community_context " + +
"from resultWithContext rwc " + "from resultWithContext rwc " +
"join relatedToRelations r " + "join relatedToRelations r " +
"join patents p " + "join patents p " +
@ -107,17 +107,25 @@ public class PrepareResultCommunitySetStep1 {
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
final String allowedsemrel ="(" + join(",", final String allowedsemrel = "(" + join(
Arrays.asList(parser.get("allowedsemrels").split(";")).stream().map(value -> "'" + value.toLowerCase() + "'") ",",
.toArray(String[]::new)) + ")"; Arrays
.asList(parser.get("allowedsemrels").split(";"))
.stream()
.map(value -> "'" + value.toLowerCase() + "'")
.toArray(String[]::new))
+ ")";
log.info("allowedSemRel: {}", allowedsemrel); log.info("allowedSemRel: {}", allowedsemrel);
final String baseURL = parser.get("baseURL"); final String baseURL = parser.get("baseURL");
log.info("baseURL: {}", baseURL); log.info("baseURL: {}", baseURL);
final String communityIdList = "(" + join(",", getCommunityList(baseURL).stream() final String communityIdList = "(" + join(
",", getCommunityList(baseURL)
.stream()
.map(value -> "'" + value.toLowerCase() + "'") .map(value -> "'" + value.toLowerCase() + "'")
.toArray(String[]::new)) + ")"; .toArray(String[]::new))
+ ")";
final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
log.info("resultType: {}", resultType); log.info("resultType: {}", resultType);
@ -167,7 +175,6 @@ public class PrepareResultCommunitySetStep1 {
final String outputResultPath = outputPath + "/" + resultType; final String outputResultPath = outputPath + "/" + resultType;
log.info("writing output results to: {}", outputResultPath); log.info("writing output results to: {}", outputResultPath);
String resultContextQuery = String String resultContextQuery = String
.format( .format(
RESULT_CONTEXT_QUERY_TEMPLATE, RESULT_CONTEXT_QUERY_TEMPLATE,
@ -183,8 +190,7 @@ public class PrepareResultCommunitySetStep1 {
patents.createOrReplaceTempView("patents"); patents.createOrReplaceTempView("patents");
relatedToRelations.createOrReplaceTempView("relatedTorelations"); relatedToRelations.createOrReplaceTempView("relatedTorelations");
result_context = result_context.unionAll(spark.sql(RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO));
result_context = result_context.unionAll( spark.sql(RESULT_CONTEXT_QUERY_TEMPLATE_IS_RELATED_TO));
result_context.createOrReplaceTempView("result_context"); result_context.createOrReplaceTempView("result_context");

View File

@ -10,7 +10,6 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -27,6 +26,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
import scala.collection.Seq; import scala.collection.Seq;
@ -279,16 +279,10 @@ public class ResultToCommunityJobTest {
@Test @Test
public void prepareStep1Test() throws Exception { public void prepareStep1Test() throws Exception {
/* /*
* final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) .map(value ->
* "'" + value.toLowerCase() + "'") .toArray(String[]::new)); log.info("allowedSemRel: {}", new
final String allowedsemrel = join(",", Arrays.stream(parser.get("allowedsemrels").split(";")) * Gson().toJson(allowedsemrel)); final String baseURL = parser.get("baseURL"); log.info("baseURL: {}",
.map(value -> "'" + value.toLowerCase() + "'") * baseURL);
.toArray(String[]::new));
log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
final String baseURL = parser.get("baseURL");
log.info("baseURL: {}", baseURL);
*/ */
PrepareResultCommunitySetStep1 PrepareResultCommunitySetStep1
.main( .main(
@ -300,32 +294,40 @@ public class ResultToCommunityJobTest {
"-hive_metastore_uris", "", "-hive_metastore_uris", "",
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/preparedInfo", "-outputPath", workingDir.toString() + "/preparedInfo",
"-allowedsemrels","issupplementto;issupplementedby", "-allowedsemrels", "issupplementto;issupplementedby",
"-baseURL","https://dev-openaire.d4science.org/openaire/community/" "-baseURL", "https://dev-openaire.d4science.org/openaire/community/"
}); });
org.apache.spark.sql.Dataset<ResultCommunityList> resultCommunityList = spark
org.apache.spark.sql.Dataset<ResultCommunityList> resultCommunityList = spark.read().schema(Encoders.bean(ResultCommunityList.class).schema()) .read()
.schema(Encoders.bean(ResultCommunityList.class).schema())
.json(workingDir.toString() + "/preparedInfo/publication") .json(workingDir.toString() + "/preparedInfo/publication")
.as(Encoders.bean(ResultCommunityList.class)); .as(Encoders.bean(ResultCommunityList.class));
Assertions.assertEquals(2, resultCommunityList.count()); Assertions.assertEquals(2, resultCommunityList.count());
Assertions.assertEquals(1,resultCommunityList.filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'").count()); Assertions
Assertions.assertEquals(1,resultCommunityList.filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'").count()); .assertEquals(
1,
resultCommunityList.filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'").count());
Assertions
.assertEquals(
1,
resultCommunityList.filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'").count());
ArrayList<String> communities = resultCommunityList ArrayList<String> communities = resultCommunityList
.filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'") .filter("resultId = '50|dedup_wf_001::06e51d2bf295531b2d2e7a1b55500783'")
.first().getCommunityList(); .first()
.getCommunityList();
Assertions.assertEquals(2, communities.size()); Assertions.assertEquals(2, communities.size());
Assertions.assertTrue(communities.stream().anyMatch(cid -> "beopen".equals(cid))); Assertions.assertTrue(communities.stream().anyMatch(cid -> "beopen".equals(cid)));
Assertions.assertTrue(communities.stream().anyMatch(cid -> "dh-ch".equals(cid))); Assertions.assertTrue(communities.stream().anyMatch(cid -> "dh-ch".equals(cid)));
communities = resultCommunityList communities = resultCommunityList
.filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'") .filter("resultId = '50|pending_org_::82f63b2d21ae88596b9d8991780e9888'")
.first().getCommunityList(); .first()
.getCommunityList();
Assertions.assertEquals(1, communities.size()); Assertions.assertEquals(1, communities.size());
Assertions.assertEquals("dh-ch", communities.get(0)); Assertions.assertEquals("dh-ch", communities.get(0));
} }
} }