Merge branch 'master' of https://code-repo.d4science.org/D-Net/dnet-hadoop into orcid-no-doi
This commit is contained in:
commit
2b0c9bbb7e
|
@ -1,8 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.clean;
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@ -17,7 +19,13 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
public class CleaningFunctions {
|
public class CleaningFunctions {
|
||||||
|
|
||||||
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
|
||||||
public static final String NONE = "none";
|
|
||||||
|
public static final Set<String> PID_BLACKLIST = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
PID_BLACKLIST.add("none");
|
||||||
|
PID_BLACKLIST.add("na");
|
||||||
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
|
@ -114,7 +122,7 @@ public class CleaningFunctions {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
||||||
.filter(sp -> !NONE.equalsIgnoreCase(sp.getValue().trim()))
|
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
|
||||||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
.map(sp -> {
|
.map(sp -> {
|
||||||
|
|
20
pom.xml
20
pom.xml
|
@ -70,6 +70,26 @@
|
||||||
<enabled>false</enabled>
|
<enabled>false</enabled>
|
||||||
</snapshots>
|
</snapshots>
|
||||||
</repository>
|
</repository>
|
||||||
|
<repository>
|
||||||
|
<id>dnet45-releases-old</id>
|
||||||
|
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
|
<repository>
|
||||||
|
<id>dnet45-snapshots-old</id>
|
||||||
|
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
Loading…
Reference in New Issue