gFeed/oai-harvester/src/test/java/org/gcube/application/gfeed/oai/OAIClientTests.java

147 lines
4.6 KiB
Java

package org.gcube.application.gfeed.oai;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Set;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import org.gcube.data.publishing.gFeed.collectors.oai.OAIClient;
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.MetadataHolder;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIInteractionException;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.ListRecords;
public class OAIClientTests {
public static void main (String[] args) throws JAXBException, OAIInteractionException, IOException {
Collection <OAIRecord> records;
// FROM URL
String baseUrl="https://data.inrae.fr/oai";
// String baseUrl="http://researchdata.cab.unipd.it/cgi/oai2";
Integer maxItems=-1;
records=fromURL(baseUrl,maxItems);
System.out.println("Serialized to : "+serialize(records));
//FROM PATH
// String path="/var/folders/21/65t9t22j3l3fk7s18m0t77rh0000gp/T/oai-records7470133448784207387.xml";
// records=read(path);
System.out.println("Records size = "+records.size());
//Returned IDS check
System.out.println("Checking IDS ..");
HashSet<String> codes=new HashSet<String>();
for(OAIRecord r:records) {
String code=r.getHeader().getIdentifier();
if(codes.contains(code)) {
System.out.println("Found duplicate id : "+code);
}
codes.add(code);
}
System.out.println("IDS size is : "+codes.size());
// Checking mapping
HashMap<String,Set<String>> collisions=new HashMap<>();
for(OAIRecord r:records) {
String toFix=r.getHeader().getIdentifier();
String fixed=toFix.toLowerCase().replaceAll("[^a-z0-9_\\\\-]", "_");
if(!collisions.containsKey(fixed))
collisions.put(fixed, new HashSet<String>());
collisions.get(fixed).add(toFix);
}
System.out.println("Got "+collisions.size()+" unique ckan ids, listing collisions");
String maxCollided=null;
int maxCollisions=0;
for(Entry<String,Set<String>> entry:collisions.entrySet()) {
if(entry.getValue().size()>1)
System.out.println("Collision from "+entry.getKey()+" to "+entry.getValue());
if(entry.getValue().size()>maxCollisions) {
maxCollisions=entry.getValue().size();
maxCollided=entry.getKey();
}
}
System.out.println("Top hits from "+maxCollided+" to "+collisions.get(maxCollided));
//Use transofrmer
// GCatTransformer tr=new GCatTransformer();
// Set<GCatModel> translated=tr.transform(records);
// System.out.println("Trasnlated size = "+translated.size());
//
// HashSet<String> ids =new HashSet<String>();
// for(GCatModel model : translated)
// ids.add(model.getItem().getName());
//
// System.out.println("IDS size = "+ids.size());
}
private static final Collection<OAIRecord> fromURL(String url,Integer maxItems) throws JAXBException, OAIInteractionException {
OAIClient client=new OAIClient(url);
client.setMaxItems(maxItems);
return client.getAll(OAIClient.DC_METADATA_PREFIX);
}
private static String serialize (Collection<OAIRecord> coll) throws JAXBException, IOException {
OAI_PMH toWrite=new OAI_PMH();
toWrite.setResponseRecords(new ListRecords(new ArrayList<OAIRecord>(coll),null));
JAXBContext jaxbContext = JAXBContext.newInstance(OAIRecord.class,
MetadataHolder.class,
OAIMetadata.class,
DCRecordMetadata.class,
OAI_PMH.class);
File outF=File.createTempFile("oai-records", ".xml");
PrintWriter out = new PrintWriter(outF);
Marshaller m=jaxbContext.createMarshaller();
// for(OAIRecord r:toWrite)
m.marshal(toWrite, out);
out.flush();
out.close();
return outF.getAbsolutePath();
}
private static Collection<OAIRecord> read(String file) throws JAXBException{
JAXBContext jaxbContext = JAXBContext.newInstance(OAIRecord.class,
MetadataHolder.class,
OAIMetadata.class,
DCRecordMetadata.class,
OAI_PMH.class);
Unmarshaller u=jaxbContext.createUnmarshaller();
OAI_PMH parsed=(OAI_PMH) u.unmarshal(new File(file));
return parsed.getResponseRecords().getRecords();
}
}