|
|
|
@ -1,10 +1,6 @@
|
|
|
|
|
package org.gcube.portlets.user.shareupdates.server.opengraph;
|
|
|
|
|
|
|
|
|
|
import org.htmlcleaner.HtmlCleaner;
|
|
|
|
|
import org.htmlcleaner.TagNode;
|
|
|
|
|
|
|
|
|
|
import java.io.BufferedReader;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.io.InputStreamReader;
|
|
|
|
|
import java.net.URL;
|
|
|
|
|
import java.net.URLConnection;
|
|
|
|
@ -14,15 +10,20 @@ import java.util.Hashtable;
|
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
|
|
import org.htmlcleaner.HtmlCleaner;
|
|
|
|
|
import org.htmlcleaner.TagNode;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A Java object representation of an Open Graph enabled webpage.
|
|
|
|
|
* A simplified layer over a Hastable.
|
|
|
|
|
*
|
|
|
|
|
* @author Callum Jones
|
|
|
|
|
* @author Massimiliano Assante
|
|
|
|
|
*/
|
|
|
|
|
public class OpenGraph
|
|
|
|
|
{
|
|
|
|
|
public class OpenGraph {
|
|
|
|
|
private String pageUrl;
|
|
|
|
|
|
|
|
|
|
private URL realURL;
|
|
|
|
|
private ArrayList<OpenGraphNamespace> pageNamespaces;
|
|
|
|
|
private Hashtable<String, ArrayList<MetaElement>> metaAttributes;
|
|
|
|
|
private String baseType;
|
|
|
|
@ -47,8 +48,7 @@ public class OpenGraph
|
|
|
|
|
/**
|
|
|
|
|
* Create an open graph representation for generating your own Open Graph object
|
|
|
|
|
*/
|
|
|
|
|
public OpenGraph()
|
|
|
|
|
{
|
|
|
|
|
public OpenGraph() {
|
|
|
|
|
pageNamespaces = new ArrayList<OpenGraphNamespace>();
|
|
|
|
|
metaAttributes = new Hashtable<String, ArrayList<MetaElement>>();
|
|
|
|
|
hasChanged = false;
|
|
|
|
@ -70,14 +70,13 @@ public class OpenGraph
|
|
|
|
|
// download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content
|
|
|
|
|
Charset charset = getConnectionCharset(siteConnection);
|
|
|
|
|
BufferedReader dis = new BufferedReader(new InputStreamReader(siteConnection.getInputStream(), charset));
|
|
|
|
|
this.realURL = siteConnection.getURL();
|
|
|
|
|
String inputLine;
|
|
|
|
|
StringBuffer headContents = new StringBuffer();
|
|
|
|
|
|
|
|
|
|
// Loop through each line, looking for the closing head element
|
|
|
|
|
while ((inputLine = dis.readLine()) != null)
|
|
|
|
|
{
|
|
|
|
|
if (inputLine.contains("</head>"))
|
|
|
|
|
{
|
|
|
|
|
while ((inputLine = dis.readLine()) != null) {
|
|
|
|
|
if (inputLine.contains("</head>")) {
|
|
|
|
|
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
|
|
|
|
|
inputLine = inputLine.concat("<body></body></html>");
|
|
|
|
|
headContents.append(inputLine + "\r\n");
|
|
|
|
@ -156,6 +155,10 @@ public class OpenGraph
|
|
|
|
|
pageUrl = realURL.toExternalForm();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public URL getRealURL() {
|
|
|
|
|
return realURL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Gets the charset for specified connection.
|
|
|
|
|
* Content Type header is parsed to get the charset name.
|
|
|
|
|