merging with branch beta
This commit is contained in:
commit
45605f93ae
|
@ -129,7 +129,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
Dataset<Row> df = spark
|
Dataset<Row> df = spark
|
||||||
.read()
|
.read()
|
||||||
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
|
.schema("`DOI` STRING, `Matchings` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
|
||||||
.json(inputPath);
|
.json(inputPath)
|
||||||
|
.where("DOI is not null");
|
||||||
|
|
||||||
// unroll nested arrays
|
// unroll nested arrays
|
||||||
df = df
|
df = df
|
||||||
|
|
|
@ -31,5 +31,11 @@ class ORCIDAuthorMatchersTest {
|
||||||
assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin"))
|
assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin"))
|
||||||
// assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented
|
// assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented
|
||||||
}
|
}
|
||||||
|
@Test def testDocumentationNames(): Unit = {
|
||||||
|
assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller-Jones"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test def testDocumentationNames2(): Unit = {
|
||||||
|
assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller Jones"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -698,6 +698,7 @@ public class ProvisionModelSupport {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(s -> Objects.nonNull(s.getQualifier()))
|
.filter(s -> Objects.nonNull(s.getQualifier()))
|
||||||
.filter(s -> Objects.nonNull(s.getQualifier().getClassname()))
|
.filter(s -> Objects.nonNull(s.getQualifier().getClassname()))
|
||||||
|
.filter(ProvisionModelSupport::filterFosL1L2)
|
||||||
.map(
|
.map(
|
||||||
s -> Subject
|
s -> Subject
|
||||||
.newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname()))
|
.newInstance(s.getValue(), s.getQualifier().getClassid(), s.getQualifier().getClassname()))
|
||||||
|
@ -720,6 +721,16 @@ public class ProvisionModelSupport {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean filterFosL1L2(StructuredProperty s) {
|
||||||
|
final String subjectType = Optional.ofNullable(s.getQualifier()).map(Qualifier::getClassid).orElse("");
|
||||||
|
if (ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(subjectType)) {
|
||||||
|
String code = StringUtils.substringBefore(s.getValue(), " ");
|
||||||
|
return code.matches("^\\d{2}$|^\\d{4}$");
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private static Country asCountry(eu.dnetlib.dhp.schema.oaf.Qualifier country) {
|
private static Country asCountry(eu.dnetlib.dhp.schema.oaf.Qualifier country) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(country)
|
.ofNullable(country)
|
||||||
|
|
|
@ -20,6 +20,7 @@ import javax.xml.transform.*;
|
||||||
import javax.xml.transform.dom.DOMSource;
|
import javax.xml.transform.dom.DOMSource;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.provision.model.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.lang3.tuple.ImmutablePair;
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
@ -41,10 +42,6 @@ import com.google.common.collect.Sets;
|
||||||
import com.mycila.xmltool.XMLDoc;
|
import com.mycila.xmltool.XMLDoc;
|
||||||
import com.mycila.xmltool.XMLTag;
|
import com.mycila.xmltool.XMLTag;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.XmlInstance;
|
|
||||||
import eu.dnetlib.dhp.schema.common.*;
|
import eu.dnetlib.dhp.schema.common.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
@ -389,6 +386,7 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
|
.filter(ProvisionModelSupport::filterFosL1L2)
|
||||||
.map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s))
|
.map(s -> XmlSerializationUtils.mapStructuredProperty("subject", s))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,11 +5,7 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
|
||||||
import static org.apache.commons.lang3.StringUtils.isBlank;
|
import static org.apache.commons.lang3.StringUtils.isBlank;
|
||||||
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
||||||
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,8 @@ public class XmlRecordFactoryTest {
|
||||||
assertEquals("bronze", doc.valueOf("//*[local-name() = 'result']/openaccesscolor/text()"));
|
assertEquals("bronze", doc.valueOf("//*[local-name() = 'result']/openaccesscolor/text()"));
|
||||||
assertEquals("true", doc.valueOf("//*[local-name() = 'result']/isindiamondjournal/text()"));
|
assertEquals("true", doc.valueOf("//*[local-name() = 'result']/isindiamondjournal/text()"));
|
||||||
assertEquals("true", doc.valueOf("//*[local-name() = 'result']/publiclyfunded/text()"));
|
assertEquals("true", doc.valueOf("//*[local-name() = 'result']/publiclyfunded/text()"));
|
||||||
|
|
||||||
|
assertEquals(15, doc.selectNodes("//*[local-name() = 'result']/*[local-name() = 'subject']").size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -1886,12 +1886,34 @@
|
||||||
"trust": ""
|
"trust": ""
|
||||||
},
|
},
|
||||||
"qualifier": {
|
"qualifier": {
|
||||||
"classid": "keyword",
|
"classid": "FOS",
|
||||||
"classname": "keyword",
|
"classname": "Fields of Science and Technology classification",
|
||||||
"schemeid": "dnet:subject_classification_typologies",
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
"schemename": "dnet:subject_classification_typologies"
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
},
|
},
|
||||||
"value": "Thermal conductivity"
|
"value": "0101 mathematics"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"trust": ""
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "FOS",
|
||||||
|
"classname": "Fields of Science and Technology classification",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "010101 applied mathematics"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": [
|
"title": [
|
||||||
|
|
Loading…
Reference in New Issue