forked from D-Net/dnet-hadoop
fixed wrong import of unresolved relation in openaire
This commit is contained in:
parent
3762b17f7b
commit
9f8a80deb7
|
@ -70,7 +70,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg>
|
<arg>--sourcePath</arg><arg>${mainPath}/datacite_dump</arg>
|
||||||
<arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg>
|
<arg>--targetPath</arg><arg>${mainPath}/datacite_oaf</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
<arg>--exportLinks</arg><arg>true</arg>
|
<arg>--exportLinks</arg><arg>false</arg>
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -131,6 +131,7 @@ public class PrepareRelationsJob {
|
||||||
Set<String> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
|
Set<String> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
|
||||||
|
|
||||||
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
|
JavaRDD<Relation> rels = readPathRelationRDD(spark, inputRelationsPath)
|
||||||
|
.filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved")))
|
||||||
.filter(rel -> !rel.getDataInfo().getDeletedbyinference())
|
.filter(rel -> !rel.getDataInfo().getDeletedbyinference())
|
||||||
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())));
|
.filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue