Merge pull request 'betaFixPerson' (#505) from betaFixPerson into beta
Reviewed-on: #505
This commit is contained in:
commit
9f7a606ddd
|
@ -104,22 +104,22 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
|
.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelationsNewModel(
|
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelationsNewModel(
|
||||||
spark, crossrefInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::crossref");
|
spark, crossrefInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":crossref");
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
|
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
|
||||||
spark, pubmedInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::pubmed");
|
spark, pubmedInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":pubmed");
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelationsNewModel(
|
JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelationsNewModel(
|
||||||
spark, openapcInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::openapc");
|
spark, openapcInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":openapc");
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelationsNewModel(
|
JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelationsNewModel(
|
||||||
spark, dataciteInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::datacite");
|
spark, dataciteInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":datacite");
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelationsNewModel(
|
JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelationsNewModel(
|
||||||
spark, webcrawlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::rawaff");
|
spark, webcrawlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":rawaff");
|
||||||
|
|
||||||
JavaPairRDD<Text, Text> publisherRelations = prepareAffiliationRelationFromPublisherNewModel(
|
JavaPairRDD<Text, Text> publisherRelations = prepareAffiliationRelationFromPublisherNewModel(
|
||||||
spark, publisherlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + "::webcrawl");
|
spark, publisherlInputPath, collectedfromOpenAIRE, BIP_INFERENCE_PROVENANCE + ":webcrawl");
|
||||||
|
|
||||||
crossrefRelations
|
crossrefRelations
|
||||||
.union(pubmedRelations)
|
.union(pubmedRelations)
|
||||||
|
|
|
@ -15,6 +15,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.cli.ParseException;
|
import org.apache.commons.cli.ParseException;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -193,8 +194,8 @@ public class ExtractPerson implements Serializable {
|
||||||
private static Relation getProjectRelation(String project, String orcid, String role) {
|
private static Relation getProjectRelation(String project, String orcid, String role) {
|
||||||
|
|
||||||
String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(orcid);
|
String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(orcid);
|
||||||
String target = PROJECT_ID_PREFIX + project.substring(0, 14)
|
String target = PROJECT_ID_PREFIX + StringUtils.substringBefore(project, "::") + "::"
|
||||||
+ IdentifierFactory.md5(project.substring(15));
|
+ IdentifierFactory.md5(StringUtils.substringAfter(project, "::"));
|
||||||
List<KeyValue> properties = new ArrayList<>();
|
List<KeyValue> properties = new ArrayList<>();
|
||||||
|
|
||||||
Relation relation = OafMapperUtils
|
Relation relation = OafMapperUtils
|
||||||
|
@ -345,7 +346,16 @@ public class ExtractPerson implements Serializable {
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.structuredProperty(
|
.structuredProperty(
|
||||||
op.getOrcid(), ModelConstants.ORCID, ModelConstants.ORCID_CLASSNAME,
|
op.getOrcid(), ModelConstants.ORCID, ModelConstants.ORCID_CLASSNAME,
|
||||||
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, null));
|
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES,
|
||||||
|
OafMapperUtils.dataInfo(false,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY,
|
||||||
|
ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES),
|
||||||
|
"0.91")));
|
||||||
person.setDateofcollection(op.getLastModifiedDate());
|
person.setDateofcollection(op.getLastModifiedDate());
|
||||||
person.setOriginalId(Arrays.asList(op.getOrcid()));
|
person.setOriginalId(Arrays.asList(op.getOrcid()));
|
||||||
person.setDataInfo(ORCIDDATAINFO);
|
person.setDataInfo(ORCIDDATAINFO);
|
||||||
|
|
Loading…
Reference in New Issue