code formatting

This commit is contained in:
Claudio Atzori 2020-10-07 13:14:31 +02:00
parent 1abcabb6e6
commit 8958f20813
7 changed files with 72 additions and 68 deletions

View File

@ -1,13 +1,14 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import java.io.Serializable;
import java.util.Objects;
import org.apache.commons.lang.StringUtils;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import org.apache.commons.lang.StringUtils;
import java.io.Serializable;
import java.util.Objects;
/** /**
* Factory class for OpenAIRE identifiers in the Graph * Factory class for OpenAIRE identifiers in the Graph
@ -16,7 +17,8 @@ public class IdentifierFactory implements Serializable {
public static final String ID_SEPARATOR = "::"; public static final String ID_SEPARATOR = "::";
public static final String ID_PREFIX_SEPARATOR = "|"; public static final String ID_PREFIX_SEPARATOR = "|";
public final static String ID_REGEX = "^[0-9][0-9]\\"+ID_PREFIX_SEPARATOR+".{12}"+ID_SEPARATOR+"[a-zA-Z0-9]{32}$"; public final static String ID_REGEX = "^[0-9][0-9]\\" + ID_PREFIX_SEPARATOR + ".{12}" + ID_SEPARATOR
+ "[a-zA-Z0-9]{32}$";
public static final int ID_PREFIX_LEN = 12; public static final int ID_PREFIX_LEN = 12;
public static <T extends OafEntity> String createIdentifier(T entity) { public static <T extends OafEntity> String createIdentifier(T entity) {
@ -26,18 +28,18 @@ public class IdentifierFactory implements Serializable {
} }
return entity return entity
.getPid() .getPid()
.stream() .stream()
.filter(s -> Objects.nonNull(s.getQualifier())) .filter(s -> Objects.nonNull(s.getQualifier()))
.filter(s -> PidType.isValid(s.getQualifier().getClassid())) .filter(s -> PidType.isValid(s.getQualifier().getClassid()))
.min(new PidComparator<>(entity)) .min(new PidComparator<>(entity))
.map(s -> idFromPid(entity, s)) .map(s -> idFromPid(entity, s))
.map(IdentifierFactory::verifyIdSyntax) .map(IdentifierFactory::verifyIdSyntax)
.orElseGet(entity::getId); .orElseGet(entity::getId);
} }
private static String verifyIdSyntax(String s) { private static String verifyIdSyntax(String s) {
if(StringUtils.isBlank(s) || !s.matches(ID_REGEX)) { if (StringUtils.isBlank(s) || !s.matches(ID_REGEX)) {
throw new RuntimeException(String.format("malformed id: '%s'", s)); throw new RuntimeException(String.format("malformed id: '%s'", s));
} else { } else {
return s; return s;
@ -46,16 +48,16 @@ public class IdentifierFactory implements Serializable {
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s) { private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s) {
return new StringBuilder() return new StringBuilder()
.append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR)) .append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR))
.append(ID_PREFIX_SEPARATOR) .append(ID_PREFIX_SEPARATOR)
.append(createPrefix(s.getQualifier().getClassid())) .append(createPrefix(s.getQualifier().getClassid()))
.append(ID_SEPARATOR) .append(ID_SEPARATOR)
.append(DHPUtils.md5(normalizePidValue(s.getValue()))) .append(DHPUtils.md5(normalizePidValue(s.getValue())))
.toString(); .toString();
} }
private static String normalizePidValue(String value) { private static String normalizePidValue(String value) {
//TODO more aggressive cleaning? keep only alphanum and punctuation? // TODO more aggressive cleaning? keep only alphanum and punctuation?
return value.toLowerCase().replaceAll(" ", ""); return value.toLowerCase().replaceAll(" ", "");
} }

View File

@ -1,17 +1,18 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import org.apache.commons.lang3.EnumUtils; import org.apache.commons.lang3.EnumUtils;
public enum PidType { public enum PidType {
// Result // Result
doi, pmid, pmc, handle, arXiv, NCID, GBIF, nct, pdb, doi, pmid, pmc, handle, arXiv, NCID, GBIF, nct, pdb,
// Organization // Organization
GRID, mag_id, urn; GRID, mag_id, urn;
public static boolean isValid(String type) { public static boolean isValid(String type) {
return EnumUtils.isValidEnum(PidType.class, type); return EnumUtils.isValidEnum(PidType.class, type);
} }
} }

View File

@ -1,43 +1,47 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.utils.DHPUtils;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.utils.DHPUtils;
public class IdentifierFactoryTest { public class IdentifierFactoryTest {
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test @Test
public void testCreateIdentifierForPublication() throws IOException { public void testCreateIdentifierForPublication() throws IOException {
verifyIdentifier("publication_doi.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013")); verifyIdentifier("publication_doi.json", "50|doi_________::" + DHPUtils.md5("10.1016/j.cmet.2011.03.013"));
verifyIdentifier("publication_pmc.json", "50|pmc_________::" + DHPUtils.md5("21459329")); verifyIdentifier("publication_pmc.json", "50|pmc_________::" + DHPUtils.md5("21459329"));
verifyIdentifier("publication_urn.json", "50|urn_________::" + DHPUtils.md5("urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2")); verifyIdentifier(
"publication_urn.json",
"50|urn_________::" + DHPUtils.md5("urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"));
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"; final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
verifyIdentifier("publication_3.json", defaultID); verifyIdentifier("publication_3.json", defaultID);
verifyIdentifier("publication_4.json", defaultID); verifyIdentifier("publication_4.json", defaultID);
verifyIdentifier("publication_5.json", defaultID); verifyIdentifier("publication_5.json", defaultID);
} }
protected void verifyIdentifier(String filename, String expectedID) throws IOException { protected void verifyIdentifier(String filename, String expectedID) throws IOException {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename)); final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class); final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
String id = IdentifierFactory.createIdentifier(pub); String id = IdentifierFactory.createIdentifier(pub);
assertNotNull(id);
assertEquals(expectedID, id);
}
assertNotNull(id);
assertEquals(expectedID, id);
}
} }

View File

@ -10,7 +10,6 @@ import java.util.Objects;
* - private String description to store the description of the programme * - private String description to store the description of the programme
*/ */
public class H2020Programme implements Serializable { public class H2020Programme implements Serializable {
private String code; private String code;
private String description; private String description;

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import eu.dnetlib.dhp.schema.common.LicenseComparator;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.LicenseComparator;
public class Result extends OafEntity implements Serializable { public class Result extends OafEntity implements Serializable {
private List<Measure> measures; private List<Measure> measures;
@ -247,7 +247,8 @@ public class Result extends OafEntity implements Serializable {
instance = mergeLists(instance, r.getInstance()); instance = mergeLists(instance, r.getInstance());
if (r.getBestaccessright() != null && new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0) if (r.getBestaccessright() != null
&& new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0)
bestaccessright = r.getBestaccessright(); bestaccessright = r.getBestaccessright();
if (r.getResulttype() != null && compareTrust(this, r) < 0) if (r.getResulttype() != null && compareTrust(this, r) < 0)

View File

@ -177,14 +177,12 @@ public class PrepareProgramme {
prepareClassification(h2020Programmes); prepareClassification(h2020Programmes);
h2020Programmes.map(csvProgramme -> OBJECT_MAPPER.writeValueAsString(csvProgramme)) h2020Programmes
.saveAsTextFile(outputPath); .map(csvProgramme -> OBJECT_MAPPER.writeValueAsString(csvProgramme))
.saveAsTextFile(outputPath);
} }
private static void prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) { private static void prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
Object[] codedescription = h2020Programmes Object[] codedescription = h2020Programmes
.map(value -> new Tuple2<>(value.getCode(), value.getTitle())) .map(value -> new Tuple2<>(value.getCode(), value.getTitle()))
@ -255,7 +253,7 @@ public class PrepareProgramme {
} }
h2020Programmes.foreach(csvProgramme -> { h2020Programmes.foreach(csvProgramme -> {
if (!csvProgramme.getCode().endsWith(".") && !csvProgramme.getCode().contains("Euratom") if (!csvProgramme.getCode().endsWith(".") && !csvProgramme.getCode().contains("Euratom")
&& !csvProgramme.getCode().equals("H2020-EC")) && !csvProgramme.getCode().equals("H2020-EC"))
csvProgramme.setClassification(map.get(csvProgramme.getCode() + ".")); csvProgramme.setClassification(map.get(csvProgramme.getCode() + "."));
else else
csvProgramme.setClassification(map.get(csvProgramme.getCode())); csvProgramme.setClassification(map.get(csvProgramme.getCode()));

View File

@ -10,7 +10,6 @@ public class ProjectSubset implements Serializable {
private String code; private String code;
public String getCode() { public String getCode() {
return code; return code;
} }