This commit is contained in:
parent
9a4c2aff07
commit
38065d6ed6
|
@ -210,11 +210,16 @@ public class DoiBoostAuthorMerger {
|
||||||
enrich.setPid(new ArrayList<>());
|
enrich.setPid(new ArrayList<>());
|
||||||
}
|
}
|
||||||
Set<String> aPids = enrich.getPid().stream().map(p -> pidToComparableString(p)).collect(Collectors.toSet());
|
Set<String> aPids = enrich.getPid().stream().map(p -> pidToComparableString(p)).collect(Collectors.toSet());
|
||||||
|
ArrayList<StructuredProperty> newPids = new ArrayList<>();
|
||||||
|
newPids.addAll(enrich.getPid());
|
||||||
enriching.getPid().forEach(p -> {
|
enriching.getPid().forEach(p -> {
|
||||||
if (!aPids.contains(pidToComparableString(p))) {
|
String pidToComparableString = pidToComparableString(p);
|
||||||
enrich.getPid().add(p);
|
if (!aPids.contains(pidToComparableString)) {
|
||||||
|
newPids.add(p);
|
||||||
|
aPids.add(pidToComparableString);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
enrich.setPid(newPids);
|
||||||
if (enrich.getAffiliation() == null) {
|
if (enrich.getAffiliation() == null) {
|
||||||
if (enriching.getAffiliation() != null) {
|
if (enriching.getAffiliation() != null) {
|
||||||
enrich.setAffiliation(enriching.getAffiliation());
|
enrich.setAffiliation(enriching.getAffiliation());
|
||||||
|
|
|
@ -86,7 +86,7 @@
|
||||||
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
|
<case to="ProcessUW">${wf:conf('resumeFrom') eq 'PreprocessUW'}</case>
|
||||||
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
|
<case to="ProcessORCID">${wf:conf('resumeFrom') eq 'ProcessORCID'}</case>
|
||||||
<case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
|
<case to="CreateDOIBoost">${wf:conf('resumeFrom') eq 'CreateDOIBoost'}</case>
|
||||||
<case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>
|
<!-- <case to="GenerateActionSet">${wf:conf('resumeFrom') eq 'GenerateActionSet'}</case>-->
|
||||||
<default to="ConvertCrossrefToOAF"/>
|
<default to="ConvertCrossrefToOAF"/>
|
||||||
</switch>
|
</switch>
|
||||||
</decision>
|
</decision>
|
||||||
|
@ -226,40 +226,40 @@
|
||||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="GenerateActionSet"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
|
|
||||||
<action name="GenerateActionSet">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn-cluster</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>Generate DOIBoost ActionSet</name>
|
|
||||||
<class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>
|
|
||||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.sql.shuffle.partitions=3840
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--dbPublicationPath</arg><arg>${workingPath}/doiBoostPublicationFiltered</arg>
|
|
||||||
<arg>--dbDatasetPath</arg><arg>${workingPath}/crossrefDataset</arg>
|
|
||||||
<arg>--crossRefRelation</arg><arg>${workingPath}/crossrefRelation</arg>
|
|
||||||
<arg>--dbaffiliationRelationPath</arg><arg>${workingPath}/doiBoostPublicationAffiliation</arg>
|
|
||||||
<arg>--dbOrganizationPath</arg><arg>${workingPath}/doiBoostOrganization</arg>
|
|
||||||
<arg>--targetPath</arg><arg>${workingPath}/actionDataSet</arg>
|
|
||||||
<arg>--sFilePath</arg><arg>${outputPath}</arg>
|
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- <action name="GenerateActionSet">-->
|
||||||
|
<!-- <spark xmlns="uri:oozie:spark-action:0.2">-->
|
||||||
|
<!-- <master>yarn-cluster</master>-->
|
||||||
|
<!-- <mode>cluster</mode>-->
|
||||||
|
<!-- <name>Generate DOIBoost ActionSet</name>-->
|
||||||
|
<!-- <class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>-->
|
||||||
|
<!-- <jar>dhp-doiboost-${projectVersion}.jar</jar>-->
|
||||||
|
<!-- <spark-opts>-->
|
||||||
|
<!-- --executor-memory=${sparkExecutorMemory}-->
|
||||||
|
<!-- --executor-cores=${sparkExecutorCores}-->
|
||||||
|
<!-- --driver-memory=${sparkDriverMemory}-->
|
||||||
|
<!-- --conf spark.sql.shuffle.partitions=3840-->
|
||||||
|
<!-- --conf spark.extraListeners=${spark2ExtraListeners}-->
|
||||||
|
<!-- --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}-->
|
||||||
|
<!-- --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}-->
|
||||||
|
<!-- --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}-->
|
||||||
|
<!-- </spark-opts>-->
|
||||||
|
<!-- <arg>--dbPublicationPath</arg><arg>${workingPath}/doiBoostPublicationFiltered</arg>-->
|
||||||
|
<!-- <arg>--dbDatasetPath</arg><arg>${workingPath}/crossrefDataset</arg>-->
|
||||||
|
<!-- <arg>--crossRefRelation</arg><arg>${workingPath}/crossrefRelation</arg>-->
|
||||||
|
<!-- <arg>--dbaffiliationRelationPath</arg><arg>${workingPath}/doiBoostPublicationAffiliation</arg>-->
|
||||||
|
<!-- <arg>--dbOrganizationPath</arg><arg>${workingPath}/doiBoostOrganization</arg>-->
|
||||||
|
<!-- <arg>--targetPath</arg><arg>${workingPath}/actionDataSet</arg>-->
|
||||||
|
<!-- <arg>--sFilePath</arg><arg>${outputPath}</arg>-->
|
||||||
|
<!-- <arg>--master</arg><arg>yarn-cluster</arg>-->
|
||||||
|
<!-- </spark>-->
|
||||||
|
<!-- <ok to="End"/>-->
|
||||||
|
<!-- <error to="Kill"/>-->
|
||||||
|
<!-- </action>-->
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -225,28 +225,26 @@ public class GraphCleaningFunctionsTest {
|
||||||
GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCleanDoiBoost() throws IOException {
|
public void testCleanDoiBoost() throws IOException {
|
||||||
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json"));
|
String json = IOUtils
|
||||||
|
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub.json"));
|
||||||
Publication p_in = MAPPER.readValue(json, Publication.class);
|
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||||
Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
||||||
Publication cleaned = GraphCleaningFunctions.cleanup(p_out);
|
Publication cleaned = GraphCleaningFunctions.cleanup(p_out);
|
||||||
|
|
||||||
|
Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned));
|
||||||
Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCleanDoiBoost2() throws IOException {
|
public void testCleanDoiBoost2() throws IOException {
|
||||||
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json"));
|
String json = IOUtils
|
||||||
|
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/doiboostpub2.json"));
|
||||||
Publication p_in = MAPPER.readValue(json, Publication.class);
|
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||||
Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
Publication p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
||||||
Publication cleaned = GraphCleaningFunctions.cleanup(p_out);
|
Publication cleaned = GraphCleaningFunctions.cleanup(p_out);
|
||||||
|
|
||||||
|
Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned));
|
||||||
Assertions.assertEquals(true,GraphCleaningFunctions.filter(cleaned) );
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue