forked from D-Net/dnet-hadoop
fixed incremental indexing
This commit is contained in:
parent
82e8341f50
commit
c36239e693
|
@ -0,0 +1,15 @@
|
||||||
|
package eu.dnetlib.dhp.schema.scholexplorer;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
|
public class DLIRelation extends Relation {
|
||||||
|
private String dateOfCollection;
|
||||||
|
|
||||||
|
public String getDateOfCollection() {
|
||||||
|
return dateOfCollection;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDateOfCollection(String dateOfCollection) {
|
||||||
|
this.dateOfCollection = dateOfCollection;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
package eu.dnetlib.dhp.sx.graph;
|
package eu.dnetlib.dhp.sx.graph;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.api.java.JavaPairRDD;
|
import org.apache.spark.api.java.JavaPairRDD;
|
||||||
|
@ -49,15 +50,15 @@ public class SparkSXGeneratePidSimlarity {
|
||||||
.equalsIgnoreCase(StringUtils.substringAfter(t._2(), "::")))
|
.equalsIgnoreCase(StringUtils.substringAfter(t._2(), "::")))
|
||||||
.distinct();
|
.distinct();
|
||||||
|
|
||||||
JavaRDD<Relation> simRel = datasetSimRel.union(publicationSimRel).map(s -> {
|
JavaRDD<DLIRelation> simRel = datasetSimRel.union(publicationSimRel).map(s -> {
|
||||||
final Relation r = new Relation();
|
final DLIRelation r = new DLIRelation();
|
||||||
r.setSource(s._1());
|
r.setSource(s._1());
|
||||||
r.setTarget(s._2());
|
r.setTarget(s._2());
|
||||||
r.setRelType("similar");
|
r.setRelType("similar");
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
spark.createDataset(simRel.rdd(), Encoders.bean(Relation.class)).distinct().write()
|
spark.createDataset(simRel.rdd(), Encoders.bean(DLIRelation.class)).distinct().write()
|
||||||
.mode(SaveMode.Overwrite).save(targetPath+"/pid_simRel");
|
.mode(SaveMode.Overwrite).save(targetPath+"/pid_simRel");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
|
||||||
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import net.minidev.json.JSONArray;
|
import net.minidev.json.JSONArray;
|
||||||
|
@ -135,19 +136,19 @@ public class SparkScholexplorerCreateRawGraphJob {
|
||||||
|
|
||||||
|
|
||||||
SparkSXGeneratePidSimlarity.generateDataFrame(spark, sc, inputPath.replace("/relation",""),targetPath.replace("/relation","") );
|
SparkSXGeneratePidSimlarity.generateDataFrame(spark, sc, inputPath.replace("/relation",""),targetPath.replace("/relation","") );
|
||||||
RDD<Relation> rdd = union.mapToPair((PairFunction<String, String, Relation>) f -> {
|
RDD<DLIRelation> rdd = union.mapToPair((PairFunction<String, String, DLIRelation>) f -> {
|
||||||
final String source = getJPathString(SOURCEJSONPATH, f);
|
final String source = getJPathString(SOURCEJSONPATH, f);
|
||||||
final String target = getJPathString(TARGETJSONPATH, f);
|
final String target = getJPathString(TARGETJSONPATH, f);
|
||||||
final String reltype = getJPathString(RELJSONPATH, f);
|
final String reltype = getJPathString(RELJSONPATH, f);
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
return new Tuple2<>(DHPUtils.md5(String.format("%s::%s::%s", source.toLowerCase(), reltype.toLowerCase(), target.toLowerCase())), mapper.readValue(f, Relation.class));
|
return new Tuple2<>(DHPUtils.md5(String.format("%s::%s::%s", source.toLowerCase(), reltype.toLowerCase(), target.toLowerCase())), mapper.readValue(f, DLIRelation.class));
|
||||||
}).reduceByKey((a, b) -> {
|
}).reduceByKey((a, b) -> {
|
||||||
a.mergeFrom(b);
|
a.mergeFrom(b);
|
||||||
return a;
|
return a;
|
||||||
}).map(Tuple2::_2).rdd();
|
}).map(Tuple2::_2).rdd();
|
||||||
|
|
||||||
spark.createDataset(rdd, Encoders.bean(Relation.class)).write().mode(SaveMode.Overwrite).save(targetPath);
|
spark.createDataset(rdd, Encoders.bean(DLIRelation.class)).write().mode(SaveMode.Overwrite).save(targetPath);
|
||||||
Dataset<Relation> rel_ds =spark.read().load(targetPath).as(Encoders.bean(Relation.class));
|
Dataset<Relation> rel_ds =spark.read().load(targetPath).as(Encoders.bean(Relation.class));
|
||||||
|
|
||||||
System.out.println("LOADING PATH :"+targetPath.replace("/relation","")+"/pid_simRel");
|
System.out.println("LOADING PATH :"+targetPath.replace("/relation","")+"/pid_simRel");
|
||||||
|
|
|
@ -2,10 +2,13 @@ package eu.dnetlib.dhp.sx.graph.parser;
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
|
||||||
|
import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
|
||||||
|
import eu.dnetlib.dhp.schema.scholexplorer.ProvenaceInfo;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.scholexplorer.relation.RelInfo;
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -15,6 +18,7 @@ import javax.xml.stream.XMLStreamReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public abstract class AbstractScholexplorerParser {
|
public abstract class AbstractScholexplorerParser {
|
||||||
|
|
||||||
|
@ -104,6 +108,74 @@ public abstract class AbstractScholexplorerParser {
|
||||||
return type+ DHPUtils.md5(String.format("%s::%s", pid.toLowerCase().trim(), pidType.toLowerCase().trim()));
|
return type+ DHPUtils.md5(String.format("%s::%s", pid.toLowerCase().trim(), pidType.toLowerCase().trim()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected DLIUnknown createUnknownObject(final String pid, final String pidType, final KeyValue cf, final DataInfo di, final String dateOfCollection) {
|
||||||
|
final DLIUnknown uk = new DLIUnknown();
|
||||||
|
uk.setId(generateId(pid, pidType, "unknown"));
|
||||||
|
ProvenaceInfo pi = new ProvenaceInfo();
|
||||||
|
pi.setId(cf.getKey());
|
||||||
|
pi.setName(cf.getValue());
|
||||||
|
pi.setCompletionStatus("incomplete");
|
||||||
|
uk.setDataInfo(di);
|
||||||
|
uk.setDlicollectedfrom(Collections.singletonList(pi));
|
||||||
|
final StructuredProperty sourcePid = new StructuredProperty();
|
||||||
|
sourcePid.setValue(pid);
|
||||||
|
final Qualifier pt = new Qualifier();
|
||||||
|
pt.setClassname(pidType);
|
||||||
|
pt.setClassid(pidType);
|
||||||
|
pt.setSchemename("dnet:pid_types");
|
||||||
|
pt.setSchemeid("dnet:pid_types");
|
||||||
|
sourcePid.setQualifier(pt);
|
||||||
|
uk.setPid(Collections.singletonList(sourcePid));
|
||||||
|
uk.setDateofcollection(dateOfCollection);
|
||||||
|
return uk;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void generateRelations(RelationMapper relationMapper, Result parsedObject, List<Oaf> result, DataInfo di, String dateOfCollection, List<VtdUtilityParser.Node> relatedIdentifiers) {
|
||||||
|
if(relatedIdentifiers!= null) {
|
||||||
|
result.addAll(relatedIdentifiers.stream()
|
||||||
|
.flatMap(n -> {
|
||||||
|
final List<DLIRelation> rels = new ArrayList<>();
|
||||||
|
DLIRelation r = new DLIRelation();
|
||||||
|
r.setSource(parsedObject.getId());
|
||||||
|
final String relatedPid = n.getTextValue();
|
||||||
|
final String relatedPidType = n.getAttributes().get("relatedIdentifierType");
|
||||||
|
final String relatedType = n.getAttributes().getOrDefault("entityType", "unknown");
|
||||||
|
String relationSemantic = n.getAttributes().get("relationType");
|
||||||
|
String inverseRelation;
|
||||||
|
final String targetId = generateId(relatedPid, relatedPidType, relatedType);
|
||||||
|
r.setDateOfCollection(dateOfCollection);
|
||||||
|
if (relationMapper.containsKey(relationSemantic.toLowerCase()))
|
||||||
|
{
|
||||||
|
RelInfo relInfo = relationMapper.get(relationSemantic.toLowerCase());
|
||||||
|
relationSemantic = relInfo.getOriginal();
|
||||||
|
inverseRelation = relInfo.getInverse();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
relationSemantic = "Unknown";
|
||||||
|
inverseRelation = "Unknown";
|
||||||
|
}
|
||||||
|
r.setTarget(targetId);
|
||||||
|
r.setRelType(relationSemantic);
|
||||||
|
r.setRelClass("datacite");
|
||||||
|
r.setCollectedFrom(parsedObject.getCollectedfrom());
|
||||||
|
r.setDataInfo(di);
|
||||||
|
rels.add(r);
|
||||||
|
r = new DLIRelation();
|
||||||
|
r.setDataInfo(di);
|
||||||
|
r.setSource(targetId);
|
||||||
|
r.setTarget(parsedObject.getId());
|
||||||
|
r.setRelType(inverseRelation);
|
||||||
|
r.setRelClass("datacite");
|
||||||
|
r.setCollectedFrom(parsedObject.getCollectedfrom());
|
||||||
|
r.setDateOfCollection(dateOfCollection);
|
||||||
|
rels.add(r);
|
||||||
|
if("unknown".equalsIgnoreCase(relatedType))
|
||||||
|
result.add(createUnknownObject(relatedPid, relatedPidType, parsedObject.getCollectedfrom().get(0), di, dateOfCollection));
|
||||||
|
return rels.stream();
|
||||||
|
}).collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,8 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
|
||||||
parsedObject.setOriginalId(Collections.singletonList(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='recordIdentifier']")));
|
parsedObject.setOriginalId(Collections.singletonList(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='recordIdentifier']")));
|
||||||
|
|
||||||
parsedObject.setOriginalObjIdentifier(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='objIdentifier']"));
|
parsedObject.setOriginalObjIdentifier(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='objIdentifier']"));
|
||||||
parsedObject.setDateofcollection(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='dateOfCollection']"));
|
String dateOfCollection = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='dateOfCollection']");
|
||||||
|
parsedObject.setDateofcollection(dateOfCollection);
|
||||||
|
|
||||||
final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
|
final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
|
||||||
|
|
||||||
|
@ -123,7 +124,7 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
|
||||||
List<String> descs = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='description']");
|
List<String> descs = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='description']");
|
||||||
if (descs != null && descs.size() > 0)
|
if (descs != null && descs.size() > 0)
|
||||||
parsedObject.setDescription(descs.stream()
|
parsedObject.setDescription(descs.stream()
|
||||||
.map(it -> it.length() < 512 ? it : it.substring(0, 512))
|
.map(it -> it.length() < 10000 ? it : it.substring(0, 10000))
|
||||||
.map(it -> {
|
.map(it -> {
|
||||||
final Field<String> d = new Field<>();
|
final Field<String> d = new Field<>();
|
||||||
d.setValue(it);
|
d.setValue(it);
|
||||||
|
@ -137,48 +138,7 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
|
||||||
Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
|
Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
|
||||||
|
|
||||||
|
|
||||||
if(relatedIdentifiers!= null) {
|
generateRelations(relationMapper, parsedObject, result, di, dateOfCollection, relatedIdentifiers);
|
||||||
result.addAll(relatedIdentifiers.stream()
|
|
||||||
.flatMap(n -> {
|
|
||||||
final List<Relation> rels = new ArrayList<>();
|
|
||||||
Relation r = new Relation();
|
|
||||||
r.setSource(parsedObject.getId());
|
|
||||||
final String relatedPid = n.getTextValue();
|
|
||||||
final String relatedPidType = n.getAttributes().get("relatedIdentifierType");
|
|
||||||
final String relatedType = n.getAttributes().getOrDefault("entityType", "unknown");
|
|
||||||
String relationSemantic = n.getAttributes().get("relationType");
|
|
||||||
String inverseRelation = n.getAttributes().get("inverseRelationType");
|
|
||||||
final String targetId = generateId(relatedPid, relatedPidType, relatedType);
|
|
||||||
|
|
||||||
if (relationMapper.containsKey(relationSemantic.toLowerCase()))
|
|
||||||
{
|
|
||||||
RelInfo relInfo = relationMapper.get(relationSemantic.toLowerCase());
|
|
||||||
relationSemantic = relInfo.getOriginal();
|
|
||||||
inverseRelation = relInfo.getInverse();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
relationSemantic = "Unknown";
|
|
||||||
inverseRelation = "Unknown";
|
|
||||||
}
|
|
||||||
r.setTarget(targetId);
|
|
||||||
r.setRelType(relationSemantic);
|
|
||||||
r.setRelClass("datacite");
|
|
||||||
r.setCollectedFrom(parsedObject.getCollectedfrom());
|
|
||||||
r.setDataInfo(di);
|
|
||||||
rels.add(r);
|
|
||||||
r = new Relation();
|
|
||||||
r.setDataInfo(di);
|
|
||||||
r.setSource(targetId);
|
|
||||||
r.setTarget(parsedObject.getId());
|
|
||||||
r.setRelType(inverseRelation);
|
|
||||||
r.setRelClass("datacite");
|
|
||||||
r.setCollectedFrom(parsedObject.getCollectedfrom());
|
|
||||||
rels.add(r);
|
|
||||||
if("unknown".equalsIgnoreCase(relatedType))
|
|
||||||
result.add(createUnknownObject(relatedPid, relatedPidType, parsedObject.getCollectedfrom().get(0), di));
|
|
||||||
return rels.stream();
|
|
||||||
}).collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
final List<Node> hostedBy =
|
final List<Node> hostedBy =
|
||||||
|
@ -199,7 +159,7 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
List<StructuredProperty> subjects = extractSubject(VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='subject']", Arrays.asList("subjectScheme")));
|
List<StructuredProperty> subjects = extractSubject(VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='subject']", Collections.singletonList("subjectScheme")));
|
||||||
|
|
||||||
parsedObject.setSubject(subjects);
|
parsedObject.setSubject(subjects);
|
||||||
|
|
||||||
|
@ -265,24 +225,6 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private DLIUnknown createUnknownObject(final String pid, final String pidType, final KeyValue cf, final DataInfo di) {
|
|
||||||
final DLIUnknown uk = new DLIUnknown();
|
|
||||||
uk.setId(generateId(pid, pidType, "unknown"));
|
|
||||||
ProvenaceInfo pi = new ProvenaceInfo();
|
|
||||||
pi.setId(cf.getKey());
|
|
||||||
pi.setName(cf.getValue());
|
|
||||||
pi.setCompletionStatus("incomplete");
|
|
||||||
uk.setDataInfo(di);
|
|
||||||
uk.setDlicollectedfrom(Collections.singletonList(pi));
|
|
||||||
final StructuredProperty sourcePid = new StructuredProperty();
|
|
||||||
sourcePid.setValue(pid);
|
|
||||||
final Qualifier pt = new Qualifier();
|
|
||||||
pt.setClassname(pidType);
|
|
||||||
pt.setClassid(pidType);
|
|
||||||
pt.setSchemename("dnet:pid_types");
|
|
||||||
pt.setSchemeid("dnet:pid_types");
|
|
||||||
sourcePid.setQualifier(pt);
|
|
||||||
uk.setPid(Collections.singletonList(sourcePid));
|
|
||||||
return uk;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,8 @@ public class PublicationScholexplorerParser extends AbstractScholexplorerParser
|
||||||
di.setDeletedbyinference(false);
|
di.setDeletedbyinference(false);
|
||||||
di.setInvisible(false);
|
di.setInvisible(false);
|
||||||
|
|
||||||
parsedObject.setDateofcollection(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='dateOfCollection']"));
|
String dateOfCollection = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='dateOfCollection']");
|
||||||
|
parsedObject.setDateofcollection(dateOfCollection);
|
||||||
|
|
||||||
final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
|
final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
|
||||||
parsedObject.setOriginalId(Collections.singletonList(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='recordIdentifier']")));
|
parsedObject.setOriginalId(Collections.singletonList(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='recordIdentifier']")));
|
||||||
|
@ -118,48 +119,7 @@ public class PublicationScholexplorerParser extends AbstractScholexplorerParser
|
||||||
final List<Node> relatedIdentifiers =
|
final List<Node> relatedIdentifiers =
|
||||||
VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='relatedIdentifier']",
|
VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='relatedIdentifier']",
|
||||||
Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
|
Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
|
||||||
|
generateRelations(relationMapper, parsedObject, result, di, dateOfCollection, relatedIdentifiers);
|
||||||
|
|
||||||
if (relatedIdentifiers != null) {
|
|
||||||
result.addAll(relatedIdentifiers.stream()
|
|
||||||
.flatMap(n -> {
|
|
||||||
final List<Relation> rels = new ArrayList<>();
|
|
||||||
Relation r = new Relation();
|
|
||||||
r.setSource(parsedObject.getId());
|
|
||||||
final String relatedPid = n.getTextValue();
|
|
||||||
final String relatedPidType = n.getAttributes().get("relatedIdentifierType");
|
|
||||||
final String relatedType = n.getAttributes().getOrDefault("entityType", "unknown");
|
|
||||||
String relationSemantic = n.getAttributes().get("relationType");
|
|
||||||
String inverseRelation = "Unknown";
|
|
||||||
final String targetId = generateId(relatedPid, relatedPidType, relatedType);
|
|
||||||
|
|
||||||
if (relationMapper.containsKey(relationSemantic.toLowerCase()))
|
|
||||||
{
|
|
||||||
RelInfo relInfo = relationMapper.get(relationSemantic.toLowerCase());
|
|
||||||
relationSemantic = relInfo.getOriginal();
|
|
||||||
inverseRelation = relInfo.getInverse();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
relationSemantic = "Unknown";
|
|
||||||
}
|
|
||||||
r.setTarget(targetId);
|
|
||||||
r.setRelType(relationSemantic);
|
|
||||||
r.setCollectedFrom(parsedObject.getCollectedfrom());
|
|
||||||
r.setRelClass("datacite");
|
|
||||||
r.setDataInfo(di);
|
|
||||||
rels.add(r);
|
|
||||||
r = new Relation();
|
|
||||||
r.setDataInfo(di);
|
|
||||||
r.setSource(targetId);
|
|
||||||
r.setTarget(parsedObject.getId());
|
|
||||||
r.setRelType(inverseRelation);
|
|
||||||
r.setRelClass("datacite");
|
|
||||||
r.setCollectedFrom(parsedObject.getCollectedfrom());
|
|
||||||
rels.add(r);
|
|
||||||
|
|
||||||
return rels.stream();
|
|
||||||
}).collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<Node> hostedBy =
|
final List<Node> hostedBy =
|
||||||
VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='hostedBy']", Arrays.asList("id", "name"));
|
VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='hostedBy']", Arrays.asList("id", "name"));
|
||||||
|
@ -206,8 +166,8 @@ public class PublicationScholexplorerParser extends AbstractScholexplorerParser
|
||||||
|
|
||||||
description.setValue(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='description']"));
|
description.setValue(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='description']"));
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(description.getValue()) && description.getValue().length() > 512) {
|
if (StringUtils.isNotBlank(description.getValue()) && description.getValue().length() > 10000) {
|
||||||
description.setValue(description.getValue().substring(0, 512));
|
description.setValue(description.getValue().substring(0, 10000));
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedObject.setDescription(Collections.singletonList(description));
|
parsedObject.setDescription(Collections.singletonList(description));
|
||||||
|
|
|
@ -69,6 +69,11 @@
|
||||||
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.provision.scholix;
|
||||||
import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary;
|
import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -20,10 +19,6 @@ public class ScholixResource implements Serializable {
|
||||||
private List<ScholixEntityId> publisher;
|
private List<ScholixEntityId> publisher;
|
||||||
private List<ScholixCollectedFrom> collectedFrom;
|
private List<ScholixCollectedFrom> collectedFrom;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static ScholixResource fromSummary(ScholixSummary summary) {
|
public static ScholixResource fromSummary(ScholixSummary summary) {
|
||||||
|
|
||||||
final ScholixResource resource = new ScholixResource();
|
final ScholixResource resource = new ScholixResource();
|
||||||
|
@ -38,7 +33,7 @@ public class ScholixResource implements Serializable {
|
||||||
resource.setObjectType(summary.getTypology().toString());
|
resource.setObjectType(summary.getTypology().toString());
|
||||||
|
|
||||||
|
|
||||||
if (summary.getTitle() != null && summary.getTitle().size()>0)
|
if (summary.getTitle() != null && summary.getTitle().size() > 0)
|
||||||
resource.setTitle(summary.getTitle().get(0));
|
resource.setTitle(summary.getTitle().get(0));
|
||||||
|
|
||||||
if (summary.getAuthor() != null)
|
if (summary.getAuthor() != null)
|
||||||
|
@ -47,7 +42,7 @@ public class ScholixResource implements Serializable {
|
||||||
.collect(Collectors.toList())
|
.collect(Collectors.toList())
|
||||||
);
|
);
|
||||||
|
|
||||||
if (summary.getDate() != null && summary.getDate().size()>0)
|
if (summary.getDate() != null && summary.getDate().size() > 0)
|
||||||
resource.setPublicationDate(summary.getDate().get(0));
|
resource.setPublicationDate(summary.getDate().get(0));
|
||||||
if (summary.getPublisher() != null)
|
if (summary.getPublisher() != null)
|
||||||
resource.setPublisher(summary.getPublisher().stream()
|
resource.setPublisher(summary.getPublisher().stream()
|
||||||
|
@ -65,6 +60,7 @@ public class ScholixResource implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<ScholixIdentifier> getIdentifier() {
|
public List<ScholixIdentifier> getIdentifier() {
|
||||||
return identifier;
|
return identifier;
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,7 +165,7 @@ public class Datacite2Scholix {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String generateId(final String pid, final String pidType, final String entityType) {
|
public static String generateId(final String pid, final String pidType, final String entityType) {
|
||||||
String type;
|
String type;
|
||||||
switch (entityType){
|
switch (entityType){
|
||||||
case "publication":
|
case "publication":
|
||||||
|
|
|
@ -4,18 +4,25 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.provision.scholix.Scholix;
|
import eu.dnetlib.dhp.provision.scholix.Scholix;
|
||||||
import eu.dnetlib.dhp.provision.scholix.ScholixIdentifier;
|
import eu.dnetlib.dhp.provision.scholix.ScholixIdentifier;
|
||||||
|
import eu.dnetlib.dhp.provision.scholix.ScholixRelationship;
|
||||||
import eu.dnetlib.dhp.provision.scholix.ScholixResource;
|
import eu.dnetlib.dhp.provision.scholix.ScholixResource;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.io.IntWritable;
|
import org.apache.hadoop.io.IntWritable;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class SparkResolveScholixTarget {
|
public class SparkResolveScholixTarget {
|
||||||
|
|
||||||
|
@ -29,8 +36,6 @@ public class SparkResolveScholixTarget {
|
||||||
final String sourcePath = parser.get("sourcePath");
|
final String sourcePath = parser.get("sourcePath");
|
||||||
final String workingDirPath= parser.get("workingDirPath");
|
final String workingDirPath= parser.get("workingDirPath");
|
||||||
final String indexHost= parser.get("indexHost");
|
final String indexHost= parser.get("indexHost");
|
||||||
|
|
||||||
|
|
||||||
try (SparkSession spark = getSession(conf, master)){
|
try (SparkSession spark = getSession(conf, master)){
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||||
|
@ -65,7 +70,55 @@ public class SparkResolveScholixTarget {
|
||||||
}, Encoders.bean(ScholixResource.class)).write().mode(SaveMode.Overwrite).save(workingDirPath+"/stepB");
|
}, Encoders.bean(ScholixResource.class)).write().mode(SaveMode.Overwrite).save(workingDirPath+"/stepB");
|
||||||
|
|
||||||
|
|
||||||
|
Dataset<ScholixResource> s2 = spark.read().load(workingDirPath+"/stepB").as(Encoders.bean(ScholixResource.class));
|
||||||
|
|
||||||
|
|
||||||
|
s1.joinWith(s2, s1.col("target.identifier.identifier").equalTo(s2.col("identifier.identifier")), "left")
|
||||||
|
|
||||||
|
.flatMap((FlatMapFunction<Tuple2<Scholix, ScholixResource>, Scholix>) f ->
|
||||||
|
{
|
||||||
|
|
||||||
|
final List<Scholix> res = new ArrayList<>();
|
||||||
|
final Scholix s = f._1();
|
||||||
|
final ScholixResource target = f._2();
|
||||||
|
if (StringUtils.isNotBlank(s.getIdentifier()))
|
||||||
|
res.add(s);
|
||||||
|
else if (target == null) {
|
||||||
|
ScholixResource currentTarget = s.getTarget();
|
||||||
|
currentTarget.setObjectType("unknown");
|
||||||
|
currentTarget.setDnetIdentifier(Datacite2Scholix.generateId(currentTarget.getIdentifier().get(0).getIdentifier(),currentTarget.getIdentifier().get(0).getSchema(), currentTarget.getObjectType()));
|
||||||
|
|
||||||
|
s.generateIdentifier();
|
||||||
|
res.add(s);
|
||||||
|
final Scholix inverse = new Scholix();
|
||||||
|
inverse.setTarget(s.getSource());
|
||||||
|
inverse.setSource(s.getTarget());
|
||||||
|
inverse.setLinkprovider(s.getLinkprovider());
|
||||||
|
inverse.setPublicationDate(s.getPublicationDate());
|
||||||
|
inverse.setPublisher(s.getPublisher());
|
||||||
|
inverse.setRelationship(new ScholixRelationship(s.getRelationship().getInverse(), s.getRelationship().getSchema(), s.getRelationship().getName()));
|
||||||
|
inverse.generateIdentifier();
|
||||||
|
res.add(inverse);
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
target.setIdentifier(target.getIdentifier().stream().map(d -> new ScholixIdentifier(d.getIdentifier().toLowerCase(), d.getSchema().toLowerCase())).collect(Collectors.toList()));
|
||||||
|
s.setTarget(target);
|
||||||
|
s.generateIdentifier();
|
||||||
|
res.add(s);
|
||||||
|
final Scholix inverse = new Scholix();
|
||||||
|
inverse.setTarget(s.getSource());
|
||||||
|
inverse.setSource(s.getTarget());
|
||||||
|
inverse.setLinkprovider(s.getLinkprovider());
|
||||||
|
inverse.setPublicationDate(s.getPublicationDate());
|
||||||
|
inverse.setPublisher(s.getPublisher());
|
||||||
|
inverse.setRelationship(new ScholixRelationship(s.getRelationship().getInverse(), s.getRelationship().getSchema(), s.getRelationship().getName()));
|
||||||
|
inverse.generateIdentifier();
|
||||||
|
res.add(inverse);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.iterator();
|
||||||
|
}, Encoders.bean(Scholix.class)).javaRDD().map(s -> new ObjectMapper().writeValueAsString(s)).saveAsTextFile(workingDirPath+"/resolved_json");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,68 @@
|
||||||
|
<workflow-app name="Index graph to ElasticSearch" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>workingDirPath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>index</name>
|
||||||
|
<description>index name</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="indexSummary"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<action name="indexSummary">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<master>yarn-cluster</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>index Summary</name>
|
||||||
|
<class>eu.dnetlib.dhp.provision.SparkIndexCollectionOnES</class>
|
||||||
|
<jar>dhp-graph-provision-scholexplorer-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="32" </spark-opts>
|
||||||
|
<arg>-mt</arg> <arg>yarn-cluster</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDirPath}/summary</arg>
|
||||||
|
<arg>--index</arg><arg>${index}_object</arg>
|
||||||
|
<arg>--idPath</arg><arg>id</arg>
|
||||||
|
<arg>--type</arg><arg>summary</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="indexScholix"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="indexScholix">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<master>yarn-cluster</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>index scholix</name>
|
||||||
|
<class>eu.dnetlib.dhp.provision.SparkIndexCollectionOnES</class>
|
||||||
|
<jar>dhp-graph-provision-scholexplorer-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8" </spark-opts>
|
||||||
|
<arg>-mt</arg> <arg>yarn-cluster</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDirPath}/scholix_json</arg>
|
||||||
|
<arg>--index</arg><arg>${index}_scholix</arg>
|
||||||
|
<arg>--idPath</arg><arg>identifier</arg>
|
||||||
|
<arg>--type</arg><arg>scholix</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,10 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,97 @@
|
||||||
|
<workflow-app name="Keep On Synch datacite" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>workingDirPath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>index</name>
|
||||||
|
<description>index name</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>timestamp</name>
|
||||||
|
<description>timestamp from incremental harvesting</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="ResetWorkingPath"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="ResetWorkingPath">
|
||||||
|
<fs>
|
||||||
|
<delete path='${workingDirPath}/synch'/>
|
||||||
|
<mkdir path='${workingDirPath}/synch'/>
|
||||||
|
</fs>
|
||||||
|
<ok to="ImportDataciteUpdate"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="ImportDataciteUpdate">
|
||||||
|
<java>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<main-class>eu.dnetlib.dhp.provision.update.RetrieveUpdateFromDatacite</main-class>
|
||||||
|
<arg>-t</arg><arg>${workingDirPath}/synch/input_json</arg>
|
||||||
|
<arg>-n</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>-ts</arg><arg>${timestamp}</arg>
|
||||||
|
<arg>-ih</arg><arg>ip-90-147-167-25.ct1.garrservices.it</arg>
|
||||||
|
<arg>-in</arg><arg>datacite</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="resolveScholix">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<master>yarn-cluster</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Resolve and generate Scholix</name>
|
||||||
|
<class>eu.dnetlib.dhp.provision.update.SparkResolveScholixTarget</class>
|
||||||
|
<jar>dhp-graph-provision-scholexplorer-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="32" </spark-opts>
|
||||||
|
<arg>-m</arg> <arg>yarn-cluster</arg>
|
||||||
|
<arg>-s</arg><arg>${workingDirPath}/synch/input_json</arg>
|
||||||
|
<arg>-w</arg><arg>${workingDirPath}/synch</arg>
|
||||||
|
<arg>-h</arg><arg>ip-90-147-167-25.ct1.garrservices.it</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="indexScholix"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="indexScholix">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<master>yarn-cluster</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>index scholix</name>
|
||||||
|
<class>eu.dnetlib.dhp.provision.SparkIndexCollectionOnES</class>
|
||||||
|
<jar>dhp-graph-provision-scholexplorer-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>--executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} --conf spark.dynamicAllocation.maxExecutors="8" </spark-opts>
|
||||||
|
<arg>-mt</arg> <arg>yarn-cluster</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDirPath}/synch/resolved_json</arg>
|
||||||
|
<arg>--index</arg><arg>${index}_scholix</arg>
|
||||||
|
<arg>--idPath</arg><arg>identifier</arg>
|
||||||
|
<arg>--type</arg><arg>scholix</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -3,24 +3,18 @@ package eu.dnetlib.dhp.provision;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.provision.scholix.Scholix;
|
import eu.dnetlib.dhp.provision.scholix.Scholix;
|
||||||
import eu.dnetlib.dhp.provision.scholix.ScholixResource;
|
import eu.dnetlib.dhp.provision.scholix.ScholixResource;
|
||||||
import eu.dnetlib.dhp.provision.update.*;
|
import eu.dnetlib.dhp.provision.update.CrossrefClient;
|
||||||
|
import eu.dnetlib.dhp.provision.update.Datacite2Scholix;
|
||||||
|
import eu.dnetlib.dhp.provision.update.DataciteClient;
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpPost;
|
|
||||||
import org.apache.http.entity.StringEntity;
|
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
|
||||||
import org.apache.http.impl.client.HttpClients;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
public class DataciteClientTest {
|
public class DataciteClientTest {
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void dataciteSCholixTest() throws Exception {
|
public void dataciteSCholixTest() throws Exception {
|
||||||
final String json = IOUtils.toString(getClass().getResourceAsStream("datacite.json"));
|
final String json = IOUtils.toString(getClass().getResourceAsStream("datacite.json"));
|
||||||
|
@ -32,66 +26,18 @@ public class DataciteClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public void testClient() throws Exception {
|
|
||||||
RetrieveUpdateFromDatacite.main(new String[]{
|
|
||||||
"-n", "file:///data/new_s2.txt",
|
|
||||||
"-t", "/data/new_s2.txt",
|
|
||||||
"-ts", "1585760736",
|
|
||||||
"-ih", "ip-90-147-167-25.ct1.garrservices.it",
|
|
||||||
"-in", "datacite",
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
SparkResolveScholixTarget.main(new String[]{
|
|
||||||
"-s", "file:///data/new_s.txt",
|
|
||||||
"-m", "local[*]",
|
|
||||||
"-w", "/data/scholix/provision",
|
|
||||||
"-h", "ip-90-147-167-25.ct1.garrservices.it",
|
|
||||||
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void testResolveDataset() throws Exception {
|
public void testResolveDataset() throws Exception {
|
||||||
DataciteClient dc = new DataciteClient("ip-90-147-167-25.ct1.garrservices.it");
|
DataciteClient dc = new DataciteClient("ip-90-147-167-25.ct1.garrservices.it");
|
||||||
ScholixResource datasetByDOI = dc.getDatasetByDOI("10.17182/hepdata.15392.v1/t5");
|
ScholixResource datasetByDOI = dc.getDatasetByDOI("10.17182/hepdata.15392.v1/t5");
|
||||||
Assertions.assertNotNull(datasetByDOI);
|
Assertions.assertNotNull(datasetByDOI);
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(datasetByDOI));
|
System.out.println(new ObjectMapper().writeValueAsString(datasetByDOI));
|
||||||
|
|
||||||
|
|
||||||
CrossrefClient cr = new CrossrefClient("ip-90-147-167-25.ct1.garrservices.it");
|
CrossrefClient cr = new CrossrefClient("ip-90-147-167-25.ct1.garrservices.it");
|
||||||
ScholixResource crossrefByDOI = cr.getResourceByDOI("10.26850/1678-4618eqj.v35.1.2010.p41-46");
|
ScholixResource crossrefByDOI = cr.getResourceByDOI("10.26850/1678-4618eqj.v35.1.2010.p41-46");
|
||||||
Assertions.assertNotNull(crossrefByDOI);
|
Assertions.assertNotNull(crossrefByDOI);
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(crossrefByDOI));
|
System.out.println(new ObjectMapper().writeValueAsString(crossrefByDOI));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getResponse(final String url,final String json ) {
|
|
||||||
CloseableHttpClient client = HttpClients.createDefault();
|
|
||||||
try {
|
|
||||||
|
|
||||||
HttpPost httpPost = new HttpPost(url);
|
|
||||||
if (json!= null) {
|
|
||||||
StringEntity entity = new StringEntity(json);
|
|
||||||
httpPost.setEntity(entity);
|
|
||||||
httpPost.setHeader("Accept", "application/json");
|
|
||||||
httpPost.setHeader("Content-type", "application/json");
|
|
||||||
}
|
|
||||||
CloseableHttpResponse response = client.execute(httpPost);
|
|
||||||
|
|
||||||
return IOUtils.toString(response.getEntity().getContent());
|
|
||||||
} catch (Throwable e) {
|
|
||||||
throw new RuntimeException("Error on executing request ",e);
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
client.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to close client ",e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue