Master branch updates from beta September 2023 #337
|
@ -61,17 +61,21 @@ public class GenerateRorActionSetJob {
|
||||||
|
|
||||||
private static final String ROR_NS_PREFIX = "ror_________";
|
private static final String ROR_NS_PREFIX = "ror_________";
|
||||||
|
|
||||||
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues("10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR");
|
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues(
|
||||||
|
"10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR");
|
||||||
|
|
||||||
private static final DataInfo ROR_DATA_INFO = dataInfo(false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92");
|
private static final DataInfo ROR_DATA_INFO = dataInfo(
|
||||||
|
false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92");
|
||||||
|
|
||||||
private static final Qualifier ROR_PID_TYPE = qualifier("ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
private static final Qualifier ROR_PID_TYPE = qualifier(
|
||||||
|
"ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(GenerateRorActionSetJob.class
|
.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
|
GenerateRorActionSetJob.class
|
||||||
|
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
@ -109,8 +113,9 @@ public class GenerateRorActionSetJob {
|
||||||
readInputPath(spark, inputPath)
|
readInputPath(spark, inputPath)
|
||||||
.map(GenerateRorActionSetJob::convertRorOrg)
|
.map(GenerateRorActionSetJob::convertRorOrg)
|
||||||
.flatMap(List::iterator)
|
.flatMap(List::iterator)
|
||||||
.mapToPair(aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
.mapToPair(
|
||||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||||
|
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,9 +150,12 @@ public class GenerateRorActionSetJob {
|
||||||
o.setEcnutscode(null);
|
o.setEcnutscode(null);
|
||||||
if (r.getCountry() != null) {
|
if (r.getCountry() != null) {
|
||||||
o
|
o
|
||||||
.setCountry(qualifier(r.getCountry().getCountryCode(), r
|
.setCountry(
|
||||||
.getCountry()
|
qualifier(
|
||||||
.getCountryName(), ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE));
|
r.getCountry().getCountryCode(), r
|
||||||
|
.getCountry()
|
||||||
|
.getCountryName(),
|
||||||
|
ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE));
|
||||||
} else {
|
} else {
|
||||||
o.setCountry(null);
|
o.setCountry(null);
|
||||||
}
|
}
|
||||||
|
@ -173,7 +181,8 @@ public class GenerateRorActionSetJob {
|
||||||
final String type = e.getKey();
|
final String type = e.getKey();
|
||||||
final List<String> all = e.getValue().getAll();
|
final List<String> all = e.getValue().getAll();
|
||||||
if (all != null) {
|
if (all != null) {
|
||||||
final Qualifier qualifier = qualifier(type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
final Qualifier qualifier = qualifier(
|
||||||
|
type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES);
|
||||||
for (final String pid : all) {
|
for (final String pid : all) {
|
||||||
pids
|
pids
|
||||||
.add(structuredProperty(pid, qualifier, ROR_DATA_INFO));
|
.add(structuredProperty(pid, qualifier, ROR_DATA_INFO));
|
||||||
|
|
|
@ -177,43 +177,37 @@ object SparkGenerateDoiBoost {
|
||||||
.map(DoiBoostMappingUtil.fixPublication)
|
.map(DoiBoostMappingUtil.fixPublication)
|
||||||
.map(p => (p.getId, p))
|
.map(p => (p.getId, p))
|
||||||
.groupByKey(_._1)
|
.groupByKey(_._1)
|
||||||
.reduceGroups((left, right) =>
|
.reduceGroups((left, right) => {
|
||||||
{
|
//Check left is not null
|
||||||
//Check left is not null
|
if (left != null && left._1 != null) {
|
||||||
if (left != null && left._1 != null)
|
//If right is null then return left
|
||||||
{
|
if (right == null || right._2 == null)
|
||||||
//If right is null then return left
|
left
|
||||||
if (right == null || right._2 == null)
|
|
||||||
left
|
|
||||||
else {
|
|
||||||
// Here Left and Right are not null
|
|
||||||
// So we have to merge
|
|
||||||
val b1 = left._2
|
|
||||||
val b2 = right._2
|
|
||||||
b1.mergeFrom(b2)
|
|
||||||
b1.mergeOAFDataInfo(b2)
|
|
||||||
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
|
|
||||||
b1.setAuthor(authors)
|
|
||||||
if (b2.getId != null && b2.getId.nonEmpty)
|
|
||||||
b1.setId(b2.getId)
|
|
||||||
//Return publication Merged
|
|
||||||
(b1.getId, b1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
// Left is Null so we return right
|
// Here Left and Right are not null
|
||||||
right
|
// So we have to merge
|
||||||
|
val b1 = left._2
|
||||||
|
val b2 = right._2
|
||||||
|
b1.mergeFrom(b2)
|
||||||
|
b1.mergeOAFDataInfo(b2)
|
||||||
|
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
|
||||||
|
b1.setAuthor(authors)
|
||||||
|
if (b2.getId != null && b2.getId.nonEmpty)
|
||||||
|
b1.setId(b2.getId)
|
||||||
|
//Return publication Merged
|
||||||
|
(b1.getId, b1)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Left is Null so we return right
|
||||||
|
right
|
||||||
}
|
}
|
||||||
|
})
|
||||||
)
|
.filter(s => s != null && s._2 != null)
|
||||||
.filter(s => s!= null && s._2!=null)
|
|
||||||
.map(s => s._2._2)
|
.map(s => s._2._2)
|
||||||
.write
|
.write
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.save(s"$workingDirPath/doiBoostPublicationFiltered")
|
.save(s"$workingDirPath/doiBoostPublicationFiltered")
|
||||||
|
|
||||||
|
|
||||||
val affiliationPath = parser.get("affiliationPath")
|
val affiliationPath = parser.get("affiliationPath")
|
||||||
val paperAffiliationPath = parser.get("paperAffiliationPath")
|
val paperAffiliationPath = parser.get("paperAffiliationPath")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue