516 lines
16 KiB
Java
516 lines
16 KiB
Java
package eu.dnetlib.data.collector.plugins.schemaorg;
|
|
|
|
import org.json.JSONArray;
|
|
import org.json.JSONObject;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
|
|
public class JSONLDUtils {
|
|
|
|
public interface PrincipalInfo{
|
|
String name();
|
|
List<String> affiliationNames();
|
|
|
|
}
|
|
|
|
public static class OrganizationInfo implements PrincipalInfo{
|
|
public String name;
|
|
|
|
public String name(){return this.name;}
|
|
|
|
public List<String> affiliationNames(){
|
|
return null;
|
|
}
|
|
|
|
public OrganizationInfo(){}
|
|
|
|
public OrganizationInfo(String name){
|
|
this.name = name;
|
|
}
|
|
}
|
|
|
|
public static class PersonInfo implements PrincipalInfo{
|
|
public String name;
|
|
public List<OrganizationInfo> affiliations;
|
|
|
|
public String name(){return this.name;}
|
|
|
|
public List<String> affiliationNames(){
|
|
if(this.affiliations == null) return null;
|
|
List<String> curated = new ArrayList<>();
|
|
for(OrganizationInfo item : this.affiliations){
|
|
if(item == null || item.name == null || item.name.trim().length() == 0) continue;;
|
|
curated.add(item.name.trim());
|
|
}
|
|
return curated;
|
|
}
|
|
|
|
public PersonInfo(){}
|
|
|
|
public PersonInfo(String name){
|
|
this.name = name;
|
|
}
|
|
|
|
public PersonInfo(String name, List<OrganizationInfo> affiliations){
|
|
this.name = name;
|
|
this.affiliations = affiliations;
|
|
}
|
|
}
|
|
|
|
public static class LicenseInfo{
|
|
public String name;
|
|
public String url;
|
|
|
|
public LicenseInfo(){}
|
|
|
|
public LicenseInfo(String url){
|
|
this.url = url;
|
|
}
|
|
|
|
public LicenseInfo(String url, String name){
|
|
this.name = name;
|
|
this.url = url;
|
|
}
|
|
}
|
|
|
|
public static class CitationInfo{
|
|
public String url;
|
|
|
|
public CitationInfo(){}
|
|
|
|
public CitationInfo(String url){
|
|
this.url = url;
|
|
}
|
|
}
|
|
|
|
public static class IdentifierInfo{
|
|
public String value;
|
|
public String type;
|
|
|
|
public IdentifierInfo(){}
|
|
|
|
public IdentifierInfo(String value){
|
|
this.value = value;
|
|
}
|
|
|
|
public IdentifierInfo(String value, String type){
|
|
this.value = value;
|
|
this.type = type;
|
|
}
|
|
}
|
|
|
|
public static class GeoCoordinatesInfo{
|
|
public String latitude;
|
|
public String longitude;
|
|
|
|
public GeoCoordinatesInfo(){}
|
|
|
|
public GeoCoordinatesInfo(String latitude, String longitude){
|
|
this.latitude = latitude;
|
|
this.longitude = longitude;
|
|
}
|
|
}
|
|
|
|
public static class GeoShapeInfo{
|
|
public String box;
|
|
|
|
public GeoShapeInfo(){}
|
|
|
|
public GeoShapeInfo(String box){
|
|
this.box = box;
|
|
}
|
|
}
|
|
|
|
public static class PlaceInfo{
|
|
public String name;
|
|
public List<GeoCoordinatesInfo> geoCoordinates;
|
|
public List<GeoShapeInfo> geoShapes;
|
|
|
|
public PlaceInfo(){}
|
|
|
|
public PlaceInfo(String name, List<GeoCoordinatesInfo> geoCoordinates, List<GeoShapeInfo> geoShapes){
|
|
this.name = name;
|
|
this.geoCoordinates = geoCoordinates;
|
|
this.geoShapes = geoShapes;
|
|
}
|
|
}
|
|
|
|
private static PlaceInfo extractPlaceSingle(JSONObject document){
|
|
if(document == null || !"Place".equals(document.optString("@type"))) return null;
|
|
String name = document.optString("name");
|
|
List<GeoCoordinatesInfo> geoCoordinates = JSONLDUtils.extractGeoCoordinates(document, "geo");
|
|
List<GeoShapeInfo> geoShapes = JSONLDUtils.extractGeoShapes(document, "geo");
|
|
if((name==null || name.trim().length() == 0) &&
|
|
(geoCoordinates == null || geoCoordinates.size() == 0) &&
|
|
(geoShapes == null || geoShapes.size() == 0)) return null;
|
|
return new PlaceInfo(name, geoCoordinates, geoShapes);
|
|
}
|
|
|
|
public static List<PlaceInfo> extractPlaces(JSONObject document, String key) {
|
|
List<PlaceInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
PlaceInfo nfo = JSONLDUtils.extractPlaceSingle(array.optJSONObject(i));
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
PlaceInfo nfo = JSONLDUtils.extractPlaceSingle(obj);
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
private static GeoCoordinatesInfo extractGeoCoordinatesSingle(JSONObject document){
|
|
if(document == null || !"GeoCoordinates".equals(document.optString("@type"))) return null;
|
|
String latitude = document.optString("latitude");
|
|
String longitude = document.optString("longitude");
|
|
if(latitude==null || latitude.trim().length()==0 || longitude==null || longitude.trim().length()==0) return null;
|
|
return new GeoCoordinatesInfo(latitude, longitude);
|
|
}
|
|
|
|
private static List<GeoCoordinatesInfo> extractGeoCoordinates(JSONObject document, String key) {
|
|
List<GeoCoordinatesInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
GeoCoordinatesInfo nfo = JSONLDUtils.extractGeoCoordinatesSingle(array.optJSONObject(i));
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
GeoCoordinatesInfo nfo = JSONLDUtils.extractGeoCoordinatesSingle(obj);
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
private static GeoShapeInfo extractGeoShapeSingle(JSONObject document){
|
|
if(document == null || !"GeoShape".equals(document.optString("@type"))) return null;
|
|
String box = document.optString("box");
|
|
if(box==null || box.trim().length()==0 ) return null;
|
|
return new GeoShapeInfo(box);
|
|
}
|
|
|
|
private static List<GeoShapeInfo> extractGeoShapes(JSONObject document, String key) {
|
|
List<GeoShapeInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
GeoShapeInfo nfo = JSONLDUtils.extractGeoShapeSingle(array.optJSONObject(i));
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
GeoShapeInfo nfo = JSONLDUtils.extractGeoShapeSingle(obj);
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
private static OrganizationInfo extractOrganizationSingle(JSONObject document){
|
|
if(document == null || !"Organization".equals(document.optString("@type"))) return null;
|
|
String name = document.optString("name");
|
|
if(name==null || name.trim().length()==0) return null;
|
|
return new OrganizationInfo(name);
|
|
}
|
|
|
|
private static List<OrganizationInfo> extractOrganization(JSONObject document, String key) {
|
|
List<OrganizationInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
OrganizationInfo nfo = JSONLDUtils.extractOrganizationSingle(array.optJSONObject(i));
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
OrganizationInfo nfo = JSONLDUtils.extractOrganizationSingle(obj);
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
private static PersonInfo extractPersonSingle(JSONObject document) {
|
|
if(document == null || !"Person".equals(document.optString("@type"))) return null;
|
|
String name = document.optString("name");
|
|
String givenName = document.optString("givenName");
|
|
String familyName = document.optString("familyName");
|
|
if ((name == null || name.trim().length() == 0) && (givenName!=null || familyName !=null)) {
|
|
if(givenName !=null && familyName!=null) name = String.join(" ", familyName, givenName).trim();
|
|
else if (givenName == null) name = familyName;
|
|
else if (familyName == null) name = givenName;
|
|
}
|
|
if(name==null || name.trim().length()==0) return null;
|
|
List<OrganizationInfo> affiliations = JSONLDUtils.extractOrganization(document, "affiliation");
|
|
return new PersonInfo(name, affiliations);
|
|
}
|
|
|
|
private static List<PersonInfo> extractPerson(JSONObject document, String key) {
|
|
List<PersonInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
PersonInfo nfo = JSONLDUtils.extractPersonSingle(array.optJSONObject(i));
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
PersonInfo nfo = JSONLDUtils.extractPersonSingle(obj);
|
|
if(nfo!=null) items.add(nfo);
|
|
} else {
|
|
String value = document.optString(key);
|
|
if (value != null) items.add(new PersonInfo(value));
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
public static PrincipalInfo extractPrincipalSingle(JSONObject document) {
|
|
PrincipalInfo principal = JSONLDUtils.extractPersonSingle(document);
|
|
if(principal == null) principal = JSONLDUtils.extractOrganizationSingle(document);
|
|
return principal;
|
|
}
|
|
|
|
public static List<PrincipalInfo> extractPrincipal(JSONObject document, String key) {
|
|
List<PrincipalInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
PrincipalInfo nfo = JSONLDUtils.extractPrincipalSingle(array.optJSONObject(i));
|
|
if(nfo!=null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
PrincipalInfo nfo = JSONLDUtils.extractPrincipalSingle(obj);
|
|
if(nfo!=null) items.add(nfo);
|
|
} else {
|
|
String value = document.optString(key);
|
|
if (value != null) items.add(new PersonInfo(value));
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
public static List<String> extractString(JSONObject document, String key){
|
|
List<String> items = new ArrayList<>();
|
|
|
|
if (!document.has(key)) return items;
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
JSONObject item = array.optJSONObject(i);
|
|
if(item != null) continue;
|
|
String value = array.optString(i);
|
|
if(value == null) continue;
|
|
items.add(value);
|
|
}
|
|
} else if (obj == null) {
|
|
String value = document.optString(key);
|
|
if(value != null) items.add(value);
|
|
}
|
|
|
|
return items;
|
|
|
|
}
|
|
|
|
public static List<String> extractSize(JSONObject document, String key){
|
|
List<String> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
JSONObject item = array.optJSONObject(i);
|
|
if (item == null || !"DataDownload".equals((item.optString("@type")))) continue;
|
|
String size = item.optString("contentSize");
|
|
if (size != null) items.add(size);
|
|
}
|
|
} else if (obj != null) {
|
|
String size = obj.optString("contentSize");
|
|
if ("DataDownload".equals((obj.optString("@type"))) && size != null) {
|
|
items.add(size);
|
|
}
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
public static List<String> extractEncodingFormat(JSONObject document, String key){
|
|
List<String> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
JSONObject item = array.optJSONObject(i);
|
|
if (item == null || !"DataDownload".equals((item.optString("@type")))) continue;
|
|
String encodingFormat = item.optString("encodingFormat");
|
|
if (encodingFormat != null) items.add(encodingFormat);
|
|
String fileFormat = item.optString("fileFormat");
|
|
if (fileFormat != null) items.add(fileFormat);
|
|
}
|
|
} else if (obj != null) {
|
|
if ("DataDownload".equals((obj.optString("@type")))) {
|
|
String encodingFormat = obj.optString("encodingFormat");
|
|
if (encodingFormat != null) items.add(encodingFormat);
|
|
String fileFormat = obj.optString("fileFormat");
|
|
if (fileFormat != null) items.add(fileFormat);
|
|
}
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
public static List<String> extractLanguage(JSONObject document, String key){
|
|
List<String> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
JSONObject item = array.optJSONObject(i);
|
|
if (item == null) {
|
|
String value = array.optString(i);
|
|
if (value != null) items.add(value);
|
|
} else {
|
|
if (!"Language".equals((item.optString("@type")))) continue;
|
|
String name = item.optString("name");
|
|
if (name != null) items.add(name);
|
|
String alternateName = item.optString("alternateName");
|
|
if (alternateName != null) items.add(alternateName);
|
|
}
|
|
}
|
|
} else if (obj != null) {
|
|
if ("Language".equals((obj.optString("@type")))){
|
|
String name = obj.optString("name");
|
|
if (name != null) items.add(name);
|
|
String alternateName = obj.optString("alternateName");
|
|
if (alternateName != null) items.add(alternateName);
|
|
}
|
|
} else {
|
|
String value = document.optString(key);
|
|
if (value != null) items.add(value);
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
public static List<LicenseInfo> extractLicenses(JSONObject document, String key){
|
|
List<LicenseInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
JSONObject item = array.optJSONObject(i);
|
|
if (item == null) {
|
|
String value = array.optString(i);
|
|
if(value != null) items.add(new LicenseInfo(value));
|
|
} else {
|
|
if (!"CreativeWork".equals((item.optString("@type")))) continue;
|
|
String url = item.optString("url");
|
|
String name = item.optString("name");
|
|
if (url != null || name != null) items.add(new LicenseInfo(url, name));
|
|
}
|
|
}
|
|
} else if (obj != null) {
|
|
if("CreativeWork".equals((obj.optString("@type")))) {
|
|
String url = obj.optString("url");
|
|
String name = obj.optString("name");
|
|
if (url != null || name != null) items.add(new LicenseInfo(url, name));
|
|
}
|
|
} else {
|
|
String value = document.optString(key);
|
|
if (value != null) items.add(new LicenseInfo(value));
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
public static List<CitationInfo> extractCitations(JSONObject document, String key){
|
|
List<CitationInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
JSONObject item = array.optJSONObject(i);
|
|
if (item == null) {
|
|
String value = array.optString(i);
|
|
if(value != null) items.add(new CitationInfo(value));
|
|
} else {
|
|
if (!"CreativeWork".equals((item.optString("@type")))) continue;
|
|
String url = item.optString("url");
|
|
if (url != null) items.add(new CitationInfo(url));
|
|
}
|
|
}
|
|
} else if (obj != null) {
|
|
if("CreativeWork".equals((obj.optString("@type")))) {
|
|
String url = obj.optString("url");
|
|
if (url != null) items.add(new CitationInfo(url));
|
|
}
|
|
} else {
|
|
String value = document.optString(key);
|
|
if (value != null) items.add(new CitationInfo(value));
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
private static IdentifierInfo extractIdentifierSingle(JSONObject document){
|
|
if(document == null || !"PropertyValue".equals(document.optString("@type"))) return null;
|
|
String name = document.optString("name");
|
|
String value = document.optString("value");
|
|
if(value==null || value.trim().length()==0) return null;
|
|
return new IdentifierInfo(value, name);
|
|
}
|
|
|
|
public static List<IdentifierInfo> extractIdentifier(JSONObject document, String key) {
|
|
List<IdentifierInfo> items = new ArrayList<>();
|
|
|
|
JSONArray array = document.optJSONArray(key);
|
|
JSONObject obj = document.optJSONObject(key);
|
|
|
|
if (array != null) {
|
|
for (int i = 0; i < array.length(); i += 1) {
|
|
IdentifierInfo nfo = null;
|
|
if (array.optJSONObject(i) == null) {
|
|
String value = array.optString(i);
|
|
if (value != null) nfo = new IdentifierInfo(value);
|
|
}
|
|
if (nfo == null) nfo = JSONLDUtils.extractIdentifierSingle(array.optJSONObject(i));
|
|
if (nfo != null) items.add(nfo);
|
|
}
|
|
}else if (obj!=null) {
|
|
IdentifierInfo nfo = JSONLDUtils.extractIdentifierSingle(obj);
|
|
if (nfo != null) items.add(nfo);
|
|
} else {
|
|
String value = document.optString(key);
|
|
if (value != null) items.add(new IdentifierInfo(value));
|
|
}
|
|
|
|
return items;
|
|
}
|
|
}
|