1
0
Fork 0

relations for openorgs: not it choose only one master

This commit is contained in:
miconis 2020-11-05 15:48:42 +01:00
parent c4a59d1b9a
commit 1699d41d39
1 changed files with 26 additions and 24 deletions

View File

@ -1,13 +1,9 @@
package eu.dnetlib.dhp.oa.dedup; package eu.dnetlib.dhp.oa.dedup;
import com.google.common.collect.Lists; import java.io.IOException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import java.util.*;
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -17,12 +13,19 @@ import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
import scala.Tuple3; import scala.Tuple3;
import java.io.IOException;
import java.util.*;
public class SparkPrepareOrgRels extends AbstractSparkAction { public class SparkPrepareOrgRels extends AbstractSparkAction {
private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class); private static final Logger log = LoggerFactory.getLogger(SparkCreateDedupRecord.class);
@ -125,12 +128,11 @@ public class SparkPrepareOrgRels extends AbstractSparkAction {
List<String> ids = sortIds(l); List<String> ids = sortIds(l);
List<Tuple3<String, String, String>> rels = new ArrayList<>(); List<Tuple3<String, String, String>> rels = new ArrayList<>();
for (String source : ids) { String source = ids.get(0);
if (source.contains("openorgs____") || ids.indexOf(source) == 0)
for (String target : ids) { for (String target : ids) {
rels.add(new Tuple3<>(source, target, groupId)); rels.add(new Tuple3<>(source, target, groupId));
} }
}
return rels.iterator(); return rels.iterator();
}) })
.rdd(), .rdd(),