2023-06-09 16:47:25 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
|
|
|
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
// https://ror.readme.io/docs/ror-identifier-pattern
|
|
|
|
public class RorCleaningRule {
|
|
|
|
|
2023-06-23 16:10:49 +02:00
|
|
|
public static final String ROR_PREFIX = "https://ror.org/";
|
|
|
|
|
|
|
|
private static final Pattern PATTERN = Pattern.compile("(?<ror>0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2})");
|
|
|
|
|
2023-06-09 16:47:25 +02:00
|
|
|
public static String clean(String ror) {
|
|
|
|
String s = ror
|
|
|
|
.replaceAll("\\s", "")
|
|
|
|
.toLowerCase();
|
2023-06-23 16:10:49 +02:00
|
|
|
|
|
|
|
Matcher m = PATTERN.matcher(s);
|
|
|
|
|
|
|
|
if (m.find()) {
|
|
|
|
return ROR_PREFIX + m.group("ror");
|
|
|
|
}
|
|
|
|
return "";
|
2023-06-09 16:47:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|