forked from D-Net/dnet-hadoop
fixed wrong import of unresolved relation in openaire
This commit is contained in:
parent
3762b17f7b
commit
9f8a80deb7
|
@ -70,7 +70,7 @@
|
|||
<arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg>
|
||||
<arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--exportLinks</arg><arg>true</arg>
|
||||
<arg>--exportLinks</arg><arg>false</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
|
@ -131,6 +131,7 @@ public class PrepareRelationsJob {
|
|||
Set<String> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
|
||||
|
||||
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
|
||||
.filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved")))
|
||||
.filter(rel -> !rel.getDataInfo().getDeletedbyinference())
|
||||
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())));
|
||||
|
||||
|
|
Loading…
Reference in New Issue