Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
2 changed files with 43 additions and 40 deletions
Showing only changes of commit 8dd5517548 - Show all commits

View File

@ -61,17 +61,21 @@ public class GenerateRorActionSetJob {
private static final String ROR_NS_PREFIX = "ror_________"; private static final String ROR_NS_PREFIX = "ror_________";
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues("10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR"); private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues(
"10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR");
private static final DataInfo ROR_DATA_INFO = dataInfo(false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92"); private static final DataInfo ROR_DATA_INFO = dataInfo(
false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92");
private static final Qualifier ROR_PID_TYPE = qualifier("ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES); private static final Qualifier ROR_PID_TYPE = qualifier(
"ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString(GenerateRorActionSetJob.class .toString(
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); GenerateRorActionSetJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -109,8 +113,9 @@ public class GenerateRorActionSetJob {
readInputPath(spark, inputPath) readInputPath(spark, inputPath)
.map(GenerateRorActionSetJob::convertRorOrg) .map(GenerateRorActionSetJob::convertRorOrg)
.flatMap(List::iterator) .flatMap(List::iterator)
.mapToPair(aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), .mapToPair(
new Text(OBJECT_MAPPER.writeValueAsString(aa)))) aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
} }
@ -145,9 +150,12 @@ public class GenerateRorActionSetJob {
o.setEcnutscode(null); o.setEcnutscode(null);
if (r.getCountry() != null) { if (r.getCountry() != null) {
o o
.setCountry(qualifier(r.getCountry().getCountryCode(), r .setCountry(
.getCountry() qualifier(
.getCountryName(), ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE)); r.getCountry().getCountryCode(), r
.getCountry()
.getCountryName(),
ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE));
} else { } else {
o.setCountry(null); o.setCountry(null);
} }
@ -173,7 +181,8 @@ public class GenerateRorActionSetJob {
final String type = e.getKey(); final String type = e.getKey();
final List<String> all = e.getValue().getAll(); final List<String> all = e.getValue().getAll();
if (all != null) { if (all != null) {
final Qualifier qualifier = qualifier(type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES); final Qualifier qualifier = qualifier(
type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
for (final String pid : all) { for (final String pid : all) {
pids pids
.add(structuredProperty(pid, qualifier, ROR_DATA_INFO)); .add(structuredProperty(pid, qualifier, ROR_DATA_INFO));

View File

@ -177,43 +177,37 @@ object SparkGenerateDoiBoost {
.map(DoiBoostMappingUtil.fixPublication) .map(DoiBoostMappingUtil.fixPublication)
.map(p => (p.getId, p)) .map(p => (p.getId, p))
.groupByKey(_._1) .groupByKey(_._1)
.reduceGroups((left, right) => .reduceGroups((left, right) => {
{ //Check left is not null
//Check left is not null if (left != null && left._1 != null) {
if (left != null && left._1 != null) //If right is null then return left
{ if (right == null || right._2 == null)
//If right is null then return left left
if (right == null || right._2 == null)
left
else {
// Here Left and Right are not null
// So we have to merge
val b1 = left._2
val b2 = right._2
b1.mergeFrom(b2)
b1.mergeOAFDataInfo(b2)
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
b1.setAuthor(authors)
if (b2.getId != null && b2.getId.nonEmpty)
b1.setId(b2.getId)
//Return publication Merged
(b1.getId, b1)
}
}
else { else {
// Left is Null so we return right // Here Left and Right are not null
right // So we have to merge
val b1 = left._2
val b2 = right._2
b1.mergeFrom(b2)
b1.mergeOAFDataInfo(b2)
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
b1.setAuthor(authors)
if (b2.getId != null && b2.getId.nonEmpty)
b1.setId(b2.getId)
//Return publication Merged
(b1.getId, b1)
} }
} else {
// Left is Null so we return right
right
} }
})
) .filter(s => s != null && s._2 != null)
.filter(s => s!= null && s._2!=null)
.map(s => s._2._2) .map(s => s._2._2)
.write .write
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.save(s"$workingDirPath/doiBoostPublicationFiltered") .save(s"$workingDirPath/doiBoostPublicationFiltered")
val affiliationPath = parser.get("affiliationPath") val affiliationPath = parser.get("affiliationPath")
val paperAffiliationPath = parser.get("paperAffiliationPath") val paperAffiliationPath = parser.get("paperAffiliationPath")