This commit is contained in:
parent
9a4c2aff07
commit
38065d6ed6
|
@ -210,11 +210,16 @@ public class DoiBoostAuthorMerger {
|
|||
enrich.setPid(new ArrayList<>());
|
||||
}
|
||||
Set<String> aPids = enrich.getPid().stream().map(p -> pidToComparableString(p)).collect(Collectors.toSet());
|
||||
ArrayList<StructuredProperty> newPids = new ArrayList<>();
|
||||
newPids.addAll(enrich.getPid());
|
||||
enriching.getPid().forEach(p -> {
|
||||
if (!aPids.contains(pidToComparableString(p))) {
|
||||
enrich.getPid().add(p);
|
||||
String pidToComparableString = pidToComparableString(p);
|
||||
if (!aPids.contains(pidToComparableString)) {
|
||||
newPids.add(p);
|
||||
aPids.add(pidToComparableString);
|
||||
}
|
||||
});
|
||||
enrich.setPid(newPids);
|
||||
if (enrich.getAffiliation() == null) {
|
||||
if (enriching.getAffiliation() != null) {
|
||||
enrich.setAffiliation(enriching.getAffiliation());
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
|
||||
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
|
||||
<case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
|
||||
<case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
|
||||
<!-- <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>-->
|
||||
<default to="ConvertCrossrefToOAF"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
@ -226,40 +226,40 @@
|
|||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="GenerateActionSet"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="GenerateActionSet">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Generate DOIBoost ActionSet</name>
|
||||
<class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--dbPublicationPath</arg><arg>${workingPath}/doiBoostPublicationFiltered</arg>
|
||||
<arg>--dbDatasetPath</arg><arg>${workingPath}/crossrefDataset</arg>
|
||||
<arg>--crossRefRelation</arg><arg>${workingPath}/crossrefRelation</arg>
|
||||
<arg>--dbaffiliationRelationPath</arg><arg>${workingPath}/doiBoostPublicationAffiliation</arg>
|
||||
<arg>--dbOrganizationPath</arg><arg>${workingPath}/doiBoostOrganization</arg>
|
||||
<arg>--targetPath</arg><arg>${workingPath}/actionDataSet</arg>
|
||||
<arg>--sFilePath</arg><arg>${outputPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<!-- <action name="GenerateActionSet">-->
|
||||
<!-- <spark xmlns="uri:oozie:spark-action:0.2">-->
|
||||
<!-- <master>yarn-cluster</master>-->
|
||||
<!-- <mode>cluster</mode>-->
|
||||
<!-- <name>Generate DOIBoost ActionSet</name>-->
|
||||
<!-- <class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>-->
|
||||
<!-- <jar>dhp-doiboost-${projectVersion}.jar</jar>-->
|
||||
<!-- <spark-opts>-->
|
||||
<!-- --executor-memory=${sparkExecutorMemory}-->
|
||||
<!-- --executor-cores=${sparkExecutorCores}-->
|
||||
<!-- --driver-memory=${sparkDriverMemory}-->
|
||||
<!-- --conf spark.sql.shuffle.partitions=3840-->
|
||||
<!-- --conf spark.extraListeners=${spark2ExtraListeners}-->
|
||||
<!-- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}-->
|
||||
<!-- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}-->
|
||||
<!-- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}-->
|
||||
<!-- </spark-opts>-->
|
||||
<!-- <arg>--dbPublicationPath</arg><arg>${workingPath}/doiBoostPublicationFiltered</arg>-->
|
||||
<!-- <arg>--dbDatasetPath</arg><arg>${workingPath}/crossrefDataset</arg>-->
|
||||
<!-- <arg>--crossRefRelation</arg><arg>${workingPath}/crossrefRelation</arg>-->
|
||||
<!-- <arg>--dbaffiliationRelationPath</arg><arg>${workingPath}/doiBoostPublicationAffiliation</arg>-->
|
||||
<!-- <arg>--dbOrganizationPath</arg><arg>${workingPath}/doiBoostOrganization</arg>-->
|
||||
<!-- <arg>--targetPath</arg><arg>${workingPath}/actionDataSet</arg>-->
|
||||
<!-- <arg>--sFilePath</arg><arg>${outputPath}</arg>-->
|
||||
<!-- <arg>--master</arg><arg>yarn-cluster</arg>-->
|
||||
<!-- </spark>-->
|
||||
<!-- <ok to="End"/>-->
|
||||
<!-- <error to="Kill"/>-->
|
||||
<!-- </action>-->
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -225,28 +225,26 @@ public class GraphCleaningFunctionsTest {
|
|||
GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testCleanDoiBoost() throws IOException {
|
||||
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json"));
|
||||
String json = IOUtils
|
||||
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json"));
|
||||
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||
Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
||||
Publication cleaned = GraphCleaningFunctions.cleanup(p_out);
|
||||
|
||||
|
||||
Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) );
|
||||
Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCleanDoiBoost2() throws IOException {
|
||||
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json"));
|
||||
String json = IOUtils
|
||||
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json"));
|
||||
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||
Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
||||
Publication cleaned = GraphCleaningFunctions.cleanup(p_out);
|
||||
|
||||
|
||||
Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) );
|
||||
Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue