added user agent property to http requests to avoid getting 403 errors, refined the way to guess content and images when parsing HTML

git-svn-id: https://svn.research-infrastructures.eu/d4science/gcube/trunk/portlets/user/share-updates@93471 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Massimiliano Assante 2014-03-22 16:32:46 +00:00
parent bb6f5cb846
commit 91b8e92a47
9 changed files with 424 additions and 336 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<classpath> <classpath>
<classpathentry kind="src" output="target/share-updates-1.2.0-SNAPSHOT/WEB-INF/classes" path="src/main/java"> <classpathentry kind="src" output="target/share-updates-1.2.1-SNAPSHOT/WEB-INF/classes" path="src/main/java">
<attributes> <attributes>
<attribute name="optional" value="true"/> <attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/> <attribute name="maven.pomderived" value="true"/>
@ -31,5 +31,5 @@
<attribute name="maven.pomderived" value="true"/> <attribute name="maven.pomderived" value="true"/>
</attributes> </attributes>
</classpathentry> </classpathentry>
<classpathentry kind="output" path="target/share-updates-1.2.0-SNAPSHOT/WEB-INF/classes"/> <classpathentry kind="output" path="target/share-updates-1.2.1-SNAPSHOT/WEB-INF/classes"/>
</classpath> </classpath>

View File

@ -1,5 +1,5 @@
eclipse.preferences.version=1 eclipse.preferences.version=1
jarsExcludedFromWebInfLib= jarsExcludedFromWebInfLib=
lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.2.0-SNAPSHOT lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.2.1-SNAPSHOT
warSrcDir=src/main/webapp warSrcDir=src/main/webapp
warSrcDirIsOutput=false warSrcDirIsOutput=false

View File

@ -4,9 +4,6 @@
<wb-resource deploy-path="/" source-path="/src/main/webapp" tag="defaultRootSource"/> <wb-resource deploy-path="/" source-path="/src/main/webapp" tag="defaultRootSource"/>
<wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/java"/> <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/java"/>
<wb-resource deploy-path="/WEB-INF/classes" source-path="/target/generated-sources/gwt"/> <wb-resource deploy-path="/WEB-INF/classes" source-path="/target/generated-sources/gwt"/>
<dependent-module archiveName="fileupload-progress-bar-1.0.0-SNAPSHOT.jar" deploy-path="/WEB-INF/lib" handle="module:/resource/fileupload-progress-bar/fileupload-progress-bar">
<dependency-type>uses</dependency-type>
</dependent-module>
<property name="java-output-path" value="/${module}/target/www/WEB-INF/classes"/> <property name="java-output-path" value="/${module}/target/www/WEB-INF/classes"/>
<property name="context-root" value="share-updates"/> <property name="context-root" value="share-updates"/>
</wb-module> </wb-module>

View File

@ -13,7 +13,7 @@
<groupId>org.gcube.portlets.user</groupId> <groupId>org.gcube.portlets.user</groupId>
<artifactId>share-updates</artifactId> <artifactId>share-updates</artifactId>
<packaging>war</packaging> <packaging>war</packaging>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1-SNAPSHOT</version>
<name>gCube Share Updates Portlet</name> <name>gCube Share Updates Portlet</name>
<description> <description>
@ -106,13 +106,13 @@
<groupId>org.gcube.contentmanagement</groupId> <groupId>org.gcube.contentmanagement</groupId>
<artifactId>storage-manager-core</artifactId> <artifactId>storage-manager-core</artifactId>
<version>[2.0.0-SNAPSHOT, 3.0.0-SNAPSHOT)</version> <version>[2.0.0-SNAPSHOT, 3.0.0-SNAPSHOT)</version>
<scope>compile</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.gcube.contentmanagement</groupId> <groupId>org.gcube.contentmanagement</groupId>
<artifactId>storage-manager-wrapper</artifactId> <artifactId>storage-manager-wrapper</artifactId>
<version>[2.0.0-SNAPSHOT, 3.0.0-SNAPSHOT)</version> <version>[2.0.0-SNAPSHOT, 3.0.0-SNAPSHOT)</version>
<scope>compile</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.gcube.applicationsupportlayer</groupId> <groupId>org.gcube.applicationsupportlayer</groupId>

View File

@ -10,9 +10,6 @@ import com.google.gwt.user.client.ui.RootPanel;
*/ */
public class ShareUpdates implements EntryPoint { public class ShareUpdates implements EntryPoint {
/**
* This is the entry point method.
*/
public void onModuleLoad() { public void onModuleLoad() {
RootPanel.get("shareUpdateDiv").add(new ShareUpdateForm()); RootPanel.get("shareUpdateDiv").add(new ShareUpdateForm());
} }

View File

@ -1,12 +1,17 @@
package org.gcube.portlets.user.shareupdates.server; package org.gcube.portlets.user.shareupdates.server;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
@ -57,6 +62,7 @@ import org.gcube.portlets.user.shareupdates.client.ShareUpdateService;
import org.gcube.portlets.user.shareupdates.client.view.ShareUpdateForm; import org.gcube.portlets.user.shareupdates.client.view.ShareUpdateForm;
import org.gcube.portlets.user.shareupdates.server.metaseeker.MetaSeeker; import org.gcube.portlets.user.shareupdates.server.metaseeker.MetaSeeker;
import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraph; import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraph;
import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraphNamespace;
import org.gcube.portlets.user.shareupdates.shared.LinkPreview; import org.gcube.portlets.user.shareupdates.shared.LinkPreview;
import org.gcube.portlets.user.shareupdates.shared.UserSettings; import org.gcube.portlets.user.shareupdates.shared.UserSettings;
import org.gcube.portlets.widgets.pickuser.shared.PickingUser; import org.gcube.portlets.widgets.pickuser.shared.PickingUser;
@ -66,6 +72,8 @@ import org.gcube.vomanagement.usermanagement.impl.liferay.LiferayGroupManager;
import org.gcube.vomanagement.usermanagement.impl.liferay.LiferayUserManager; import org.gcube.vomanagement.usermanagement.impl.liferay.LiferayUserManager;
import org.gcube.vomanagement.usermanagement.model.GroupModel; import org.gcube.vomanagement.usermanagement.model.GroupModel;
import org.gcube.vomanagement.usermanagement.model.UserModel; import org.gcube.vomanagement.usermanagement.model.UserModel;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlparser.beans.StringBean; import org.htmlparser.beans.StringBean;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -134,9 +142,9 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
if (user == null) { if (user == null) {
_log.warn("USER IS NULL setting test.user and Running OUTSIDE PORTAL"); _log.warn("USER IS NULL setting test.user and Running OUTSIDE PORTAL");
user = "test.user"; user = "test.user";
// user = "massimiliano.assante"; user = "massimiliano.assante";
// SessionManager.getInstance().getASLSession(sessionID, user).setScope("/gcube/devsec/devVRE"); SessionManager.getInstance().getASLSession(sessionID, user).setScope("/gcube/devsec/devVRE");
withinPortal = false; withinPortal = false;
} }
else { else {
withinPortal = true; withinPortal = true;
@ -176,12 +184,12 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
e.printStackTrace(); e.printStackTrace();
} }
} }
String linkTitle = preview.getTitle(); String linkTitle = preview.getTitle();
String linkDesc = preview.getDescription(); String linkDesc = preview.getDescription();
String host = preview.getHost(); String host = preview.getHost();
String url = preview.getUrl(); String url = preview.getUrl();
Date feedDate = new Date(); Date feedDate = new Date();
//this means the user has shared a file without text in it. //this means the user has shared a file without text in it.
String textToPost = ""; String textToPost = "";
@ -190,10 +198,10 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
} else { } else {
textToPost = transformUrls(escapedFeedText); textToPost = transformUrls(escapedFeedText);
} }
ScopeBean scope = new ScopeBean(session.getScope()); ScopeBean scope = new ScopeBean(session.getScope());
String vreId2Set = scope.is(Type.VRE) ? scope.toString() : ""; String vreId2Set = scope.is(Type.VRE) ? scope.toString() : "";
Feed toShare = new Feed(UUID.randomUUID().toString(), feedType, username, feedDate, Feed toShare = new Feed(UUID.randomUUID().toString(), feedType, username, feedDate,
vreId2Set, url, urlThumbnail, textToPost, pLevel, fullName, email, thumbnailURL, linkTitle, linkDesc, host); vreId2Set, url, urlThumbnail, textToPost, pLevel, fullName, email, thumbnailURL, linkTitle, linkDesc, host);
@ -233,14 +241,14 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
ClientFeed cf = new ClientFeed(toShare.getKey(), toShare.getType().toString(), username, feedDate, toShare.getUri(), ClientFeed cf = new ClientFeed(toShare.getKey(), toShare.getType().toString(), username, feedDate, toShare.getUri(),
replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(), replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(),
toShare.getUriThumbnail(), toShare.getLinkHost()); toShare.getUriThumbnail(), toShare.getLinkHost());
//send the notification about this posts to everyone in the group if notifyGroup is true //send the notification about this posts to everyone in the group if notifyGroup is true
if (pLevel == PrivacyLevel.SINGLE_VRE && vreId != null && vreId.compareTo("") != 0 && notifyGroup) { if (pLevel == PrivacyLevel.SINGLE_VRE && vreId != null && vreId.compareTo("") != 0 && notifyGroup) {
NotificationsManager nm = new ApplicationNotificationsManager(session, NEWS_FEED_PORTLET_CLASSNAME); NotificationsManager nm = new ApplicationNotificationsManager(session, NEWS_FEED_PORTLET_CLASSNAME);
Thread thread = new Thread(new PostNotificationsThread(toShare.getKey(), escapedFeedText, ""+session.getGroupId(), nm)); Thread thread = new Thread(new PostNotificationsThread(toShare.getKey(), escapedFeedText, ""+session.getGroupId(), nm));
thread.start(); thread.start();
} }
//send the notification to the mentioned users //send the notification to the mentioned users
if (mentionedUsers != null && mentionedUsers.size() > 0) { if (mentionedUsers != null && mentionedUsers.size() > 0) {
@ -248,7 +256,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
Thread thread = new Thread(new MentionNotificationsThread(toShare.getKey(), escapedFeedText, nm, mentionedUsers)); Thread thread = new Thread(new MentionNotificationsThread(toShare.getKey(), escapedFeedText, nm, mentionedUsers));
thread.start(); thread.start();
} }
//it means I also should upload a copy on the user's Workspace root folder //it means I also should upload a copy on the user's Workspace root folder
if (fileName != null && filePathOnServer != null) { if (fileName != null && filePathOnServer != null) {
//The workspace uploader Thread starts here asyncronously //The workspace uploader Thread starts here asyncronously
@ -278,10 +286,6 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
return escapedFeedText; return escapedFeedText;
} }
private UserSettings getUserSettingsFromSession() {
return (UserSettings) getASLSession().getAttribute(UserInfo.USER_INFO_ATTR);
}
private void setUserSettingsInSession(UserSettings user) { private void setUserSettingsInSession(UserSettings user) {
getASLSession().setAttribute(UserInfo.USER_INFO_ATTR, user); getASLSession().setAttribute(UserInfo.USER_INFO_ATTR, user);
} }
@ -330,7 +334,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a file.").append("</a> ").toString(); sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a file.").append("</a> ").toString();
return sb.toString(); return sb.toString();
} }
@Override @Override
public UserSettings getUserSettings() { public UserSettings getUserSettings() {
try { try {
@ -357,13 +361,14 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
return toReturn; return toReturn;
} }
else { else {
_log.info("Returning test USER");
_log.info("Returning test USER = " + session.getUsername());
HashMap<String, String> fakeVreNames = new HashMap<String, String>(); HashMap<String, String> fakeVreNames = new HashMap<String, String>();
fakeVreNames.put("/gcube/devsec/devVRE","devVRE"); //fakeVreNames.put("/gcube/devsec/devVRE","devVRE");
//fakeVreNames.put("/gcube/devNext/NexNext","NexNext"); //fakeVreNames.put("/gcube/devNext/NexNext","NexNext");
UserInfo user = new UserInfo(getASLSession().getUsername(), fullName, thumbnailURL, email, "fakeAccountUrl", true, false, fakeVreNames); UserInfo user = new UserInfo(session.getUsername(), fullName, thumbnailURL, email, "fakeAccountUrl", true, false, fakeVreNames);
return new UserSettings(user, 0, session.getScopeName(), isInfrastructureScope()); return new UserSettings(user, 0, session.getScopeName(), false);
} }
} catch (Exception e) { } catch (Exception e) {
@ -385,11 +390,11 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
ScopeProvider.instance.set("/"+PortalContext.getConfiguration().getInfrastructureName()); ScopeProvider.instance.set("/"+PortalContext.getConfiguration().getInfrastructureName());
IClient storageClient = new StorageClient(STORAGE_OWNER, AccessType.SHARED, MemoryType.PERSISTENT).getClient(); IClient storageClient = new StorageClient(STORAGE_OWNER, AccessType.SHARED, MemoryType.PERSISTENT).getClient();
ScopeProvider.instance.set(currScope); ScopeProvider.instance.set(currScope);
String httpURL = ""; String httpURL = "";
//get the url to show, before actually uploading it //get the url to show, before actually uploading it
String smpURI = storageClient.getUrl().RFile(remoteFilePath); String smpURI = storageClient.getUrl().RFile(remoteFilePath);
//The storage uploader Thread starts here asyncronously //The storage uploader Thread starts here asyncronously
Thread thread = new Thread(new UploadToStorageThread(storageClient, fileName, fileabsolutePathOnServer, remoteFilePath)); Thread thread = new Thread(new UploadToStorageThread(storageClient, fileName, fileabsolutePathOnServer, remoteFilePath));
thread.start(); thread.start();
@ -426,7 +431,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
return FilePreviewer.getUnhandledTypePreview(fileName, fileabsolutePathOnServer, httpURL, mimeType); return FilePreviewer.getUnhandledTypePreview(fileName, fileabsolutePathOnServer, httpURL, mimeType);
} }
} catch (Exception e) { } catch (Exception e) {
_log.error("Error while resolving or previewing file"); _log.error("Error while resolving or previewing file");
e.printStackTrace(); e.printStackTrace();
@ -449,14 +454,14 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
*/ */
protected static String getMimeType(File file, String filenameWithExtension) throws IOException { protected static String getMimeType(File file, String filenameWithExtension) throws IOException {
TikaConfig config = TikaConfig.getDefaultConfig(); TikaConfig config = TikaConfig.getDefaultConfig();
Detector detector = config.getDetector(); Detector detector = config.getDetector();
TikaInputStream stream = TikaInputStream.get(file); TikaInputStream stream = TikaInputStream.get(file);
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension); metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension);
MediaType mediaType = detector.detect(stream, metadata); MediaType mediaType = detector.detect(stream, metadata);
return mediaType.getBaseType().toString(); return mediaType.getBaseType().toString();
} }
/** /**
* return the id as key and the names as value of the vre a user is subscribed to * return the id as key and the names as value of the vre a user is subscribed to
* @param username * @param username
@ -564,11 +569,11 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
} }
String toReturn = html.replaceAll("&", "&amp;").replaceAll("<", "&lt;") String toReturn = html.replaceAll("&", "&amp;").replaceAll("<", "&lt;")
.replaceAll(">", "&gt;"); .replaceAll(">", "&gt;");
// then replace all the line breaks by <br/>, and all the double spaces by the html version &nbsp; // then replace all the line breaks by <br/>, and all the double spaces by the html version &nbsp;
toReturn = toReturn.replaceAll("(\r\n|\n)","<br />"); toReturn = toReturn.replaceAll("(\r\n|\n)","<br />");
toReturn = toReturn.replaceAll("\\s\\s","&nbsp;&nbsp;"); toReturn = toReturn.replaceAll("\\s\\s","&nbsp;&nbsp;");
return toReturn; return toReturn;
} }
@ -638,6 +643,9 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
_log.error("url is not reachable"); _log.error("url is not reachable");
return null; return null;
} }
//pretend you're a browser (make my request from Java more browsery-like.)
siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
String title; String title;
String description; String description;
ArrayList<String> imageUrls = new ArrayList<String>(); ArrayList<String> imageUrls = new ArrayList<String>();
@ -651,7 +659,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
if (ogLink == null || ogLink.getContent("title") == null) { if (ogLink == null || ogLink.getContent("title") == null) {
//there is no OpenGraph for this link //there is no OpenGraph for this link
_log.info("No OpenGraph Found, going Best guess from page content") ; _log.info("No OpenGraph Found, going Best guess from page content") ;
toReturn = getInfoFromHTML(pageURL, linkToCheck, host); toReturn = getInfoFromHTML(siteConnection, pageURL, linkToCheck, host);
} else { } else {
//there is OpenGraph //there is OpenGraph
title = ogLink.getContent("title"); title = ogLink.getContent("title");
@ -661,7 +669,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
if (ogLink.getContent("image") != null) if (ogLink.getContent("image") != null)
imageUrls.add(ogLink.getContent("image")); imageUrls.add(ogLink.getContent("image"));
else { else {
ArrayList<String> images = getImagesFromHTML(pageURL); ArrayList<String> images = getImagesFromHTML(siteConnection, pageURL);
if (! images.isEmpty()) if (! images.isEmpty())
imageUrls = images; imageUrls = images;
} }
@ -681,11 +689,10 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
* @return a list of image url * @return a list of image url
* @throws IOException * @throws IOException
*/ */
private ArrayList<String> getImagesFromHTML(URL pageURL) throws IOException { private ArrayList<String> getImagesFromHTML(URLConnection connection, URL pageURL) throws IOException {
ArrayList<String> toReturn = new ArrayList<String>(); ArrayList<String> toReturn = new ArrayList<String>();
InputStream input = pageURL.openStream();
try { try {
Document document = new Tidy().parseDOM(input, null); Document document = new Tidy().parseDOM(pageURL.openStream(), null);
NodeList imgs = document.getElementsByTagName("img"); NodeList imgs = document.getElementsByTagName("img");
int upTo = (imgs.getLength() > 15) ? 15 : imgs.getLength(); int upTo = (imgs.getLength() > 15) ? 15 : imgs.getLength();
for (int i = 0; i < upTo; i++) { for (int i = 0; i < upTo; i++) {
@ -707,49 +714,126 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
* @return a LinPreview object instance filled with the extracted information * @return a LinPreview object instance filled with the extracted information
* @throws IOException * @throws IOException
*/ */
private LinkPreview getInfoFromHTML(URL pageUrl, String link, String host) throws Exception { private LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception {
LinkPreview toReturn = null; LinkPreview toReturn = null;
String title = ""; String title = "";
String description = ""; String description = "";
InputStream input = pageUrl.openStream(); URLConnection conn = pageUrl.openConnection();
Document document = new Tidy().parseDOM(input, null); //pretend you're a browser (make my request from Java more browsery-like.)
NodeList titles = document.getElementsByTagName("title"); conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
if (titles != null && titles.getLength()>0) {
if (titles.item(0).getFirstChild() == null || titles.item(0).getFirstChild().getNodeValue() == null) { MetaSeeker ms = null;
_log.error("[MANUAL-PARSE] Something wrong with the title element, returning ... "); try {
return toReturn; title = getTitleFromHeader(pageUrl);
} ms = new MetaSeeker(connection, pageUrl);
title = titles.item(0).getFirstChild().getNodeValue();
MetaSeeker ms = null;
try {
ms = new MetaSeeker(link);
} catch(Exception e) {
_log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... ");
return toReturn;
}
//try the metadata, otherwise ask the guesser //try the metadata, otherwise ask the guesser
description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link); description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link);
ArrayList<String> images = new ArrayList<String>(); ArrayList<String> images = new ArrayList<String>();
NodeList imgs = document.getElementsByTagName("img"); images = getImagesWithCleaner(pageUrl);
int upTo = (imgs.getLength() > 15) ? 15 : imgs.getLength();
for (int i = 0; i < upTo; i++) {
String imageUrl = imgs.item(i).getAttributes().getNamedItem("src").getNodeValue();
if (imageUrl.startsWith("/"))
imageUrl = pageUrl.getProtocol()+"://"+pageUrl.getHost()+imageUrl;
else if (!imageUrl.contains("/")) { //then the image is probably in the same folder
// e.g. http://www.fao.org/docrep/018/i3328e/i3328e00.htm?utm_source
String imageFolder = pageUrl.toString().substring(0, pageUrl.toString().lastIndexOf("/"));
imageUrl= imageFolder + "/" + imageUrl;
}
images.add(imageUrl);
_log.trace("[FOUND image] " + imageUrl);
}
toReturn = new LinkPreview(title, description, link, host, images); toReturn = new LinkPreview(title, description, link, host, images);
} catch(Exception e) {
_log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... ");
return toReturn;
} }
return toReturn; return toReturn;
} }
/**
* @param pageURL
* @return the title of the page or null if can't read it
* @throws IOException
*/
private String getTitleFromHeader(URL pageURL) throws IOException {
URLConnection conn = pageURL.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
Charset charset = OpenGraph.getConnectionCharset(conn);
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
String inputLine;
StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null)
{
if (inputLine.contains("</head>")) {
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
inputLine = inputLine.concat("<body></body></html>");
headContents.append(inputLine + "\r\n");
break;
}
headContents.append(inputLine + "\r\n");
}
String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);
// open only the title tags
TagNode[] title = pageData.getElementsByName("title", true);
if (title != null && title.length > 0) {
String theTitle = title[0].getChildren().get(0).toString();
System.out.println("theTitle: " + theTitle);
return theTitle;
}
return null;
}
/**
* if jTidy has problems try with with HtmlCleaner API to read the images
* @param pageURL
* @return the title of the page or null if can't read it
* @throws IOException
*/
private ArrayList<String> getImagesWithCleaner(URL pageURL) throws IOException {
ArrayList<String> images = new ArrayList<String>();
URLConnection conn = pageURL.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
Charset charset = OpenGraph.getConnectionCharset(conn);
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
String inputLine;
StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null) {
headContents.append(inputLine + "\r\n");
}
String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);
// open only the title tags
TagNode[] imgs = pageData.getElementsByName("img", true);
int upTo = (imgs.length > 15) ? 15 : imgs.length;
for (int i = 0; i < upTo; i++) {
if (imgs[i].hasAttribute("src")) {
String imageUrl = imgs[i].getAttributeByName("src");
if (imageUrl.startsWith("/"))
imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl;
else if (imageUrl.startsWith("../")) {
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
}
else if (!imageUrl.contains("/")) { //then the image is probably in the same folder
// e.g. http://www.fao.org/docrep/018/i3328e/i3328e00.htm?utm_source
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
imageUrl= imageFolder + "/" + imageUrl;
}
else if (!imageUrl.startsWith("http") ) { //e.g. img/anImage.png
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
}
images.add(imageUrl);
_log.trace("[FOUND image] " + imageUrl);
}
}
return images;
}
/** /**
* generate the description parsing the content (Best Guess) * generate the description parsing the content (Best Guess)
* @param link the link to check * @param link the link to check
@ -857,18 +941,5 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
} }
return portalUsers; return portalUsers;
} }
/**
*
* @return the workspace instance
* @throws InternalErrorException
* @throws HomeNotFoundException
* @throws WorkspaceFolderNotFoundException
*/
private Workspace getWorkspace() throws InternalErrorException, HomeNotFoundException, WorkspaceFolderNotFoundException {
final ASLSession session = getASLSession();
Workspace workspace = HomeLibrary.getUserWorkspace(session.getUsername());
return workspace;
}
} }

View File

@ -8,6 +8,8 @@ import java.util.Date;
import org.gcube.common.homelibrary.home.HomeLibrary; import org.gcube.common.homelibrary.home.HomeLibrary;
import org.gcube.common.homelibrary.home.workspace.Workspace; import org.gcube.common.homelibrary.home.workspace.Workspace;
import org.gcube.common.homelibrary.home.workspace.exceptions.ItemAlreadyExistException; import org.gcube.common.homelibrary.home.workspace.exceptions.ItemAlreadyExistException;
import org.gcube.common.portal.PortalContext;
import org.gcube.common.scope.api.ScopeProvider;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -46,26 +48,37 @@ public class UploadToWorkspaceThread implements Runnable {
@Override @Override
public void run() { public void run() {
try { try {
String currScope = ScopeProvider.instance.get();
ScopeProvider.instance.set("/"+PortalContext.getConfiguration().getInfrastructureName());
Workspace ws = HomeLibrary Workspace ws = HomeLibrary
.getHomeManagerFactory() .getHomeManagerFactory()
.getHomeManager() .getHomeManager()
.getHome(username).getWorkspace(); .getHome(username).getWorkspace();
_log.info("File to upload="+fileabsolutePathOnServer);
File file = new File(fileabsolutePathOnServer); File file = new File(fileabsolutePathOnServer);
String mimeType = ShareUpdateServiceImpl.getMimeType(file, fileName); String mimeType = ShareUpdateServiceImpl.getMimeType(file, fileName);
InputStream fileData = new FileInputStream(file); InputStream fileData = new FileInputStream(file);
String theId = ""; String theId = "";
_log.info("mimeType="+mimeType + " fileData null? " + (fileData == null) );
try { try {
theId = ws.createExternalFile(fileName ,"File added automatically by Share Updates" , mimeType ,fileData, ws.getRoot().getId()).getId(); theId = ws.createExternalFile(fileName ,"File added automatically by Share Updates" , mimeType ,fileData, ws.getRoot().getId()).getId();
} }
catch (NullPointerException exn) {
_log.warn("null pointer");
exn.printStackTrace();
}
catch (ItemAlreadyExistException ex) { catch (ItemAlreadyExistException ex) {
_log.warn("fileName " + fileName + " exists, appending timestamp"); _log.warn("fileName " + fileName + " exists, appending timestamp");
theId = ws.createExternalFile(fileName+" ("+ new Date()+")" ,"File added automatically by Share Updates" , mimeType ,fileData, ws.getRoot().getId()).getId(); theId = ws.createExternalFile(fileName+" ("+ new Date()+")" ,"File added automatically by Share Updates" , mimeType ,fileData, ws.getRoot().getId()).getId();
ex.printStackTrace();
} finally { } finally {
fileData.close(); fileData.close();
} }
fileData.close(); fileData.close();
_log.debug("Uploaded " + fileName + " - Returned Workspace id=" + theId); _log.debug("Uploaded " + fileName + " - Returned Workspace id=" + theId);
ScopeProvider.instance.set(currScope);
} }
catch (Exception e) { catch (Exception e) {

View File

@ -3,9 +3,12 @@ package org.gcube.portlets.user.shareupdates.server.metaseeker;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.URL; import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Hashtable; import java.util.Hashtable;
import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraph;
import org.htmlcleaner.HtmlCleaner; import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode; import org.htmlcleaner.TagNode;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -61,12 +64,11 @@ public class MetaSeeker {
* @param url The address to the web page to fetch the meta * @param url The address to the web page to fetch the meta
* @throws java.io.IOException If a network error occurs, the HTML parser will throw an IO Exception * @throws java.io.IOException If a network error occurs, the HTML parser will throw an IO Exception
*/ */
public MetaSeeker(String url) throws java.io.IOException, Exception { public MetaSeeker(URLConnection connection, URL httpURL) throws java.io.IOException, Exception {
this(); this();
isImported = true; isImported = true;
// download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content Charset charset = OpenGraph.getConnectionCharset(connection);
URL httpURL = new URL(url); BufferedReader dis = new BufferedReader(new InputStreamReader(connection.getInputStream(), charset));
BufferedReader dis = new BufferedReader(new InputStreamReader(httpURL.openStream()));
String inputLine; String inputLine;
StringBuffer headContents = new StringBuffer(); StringBuffer headContents = new StringBuffer();

View File

@ -4,6 +4,7 @@ import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode; import org.htmlcleaner.TagNode;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.URL; import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
@ -21,74 +22,74 @@ import java.util.regex.Pattern;
*/ */
public class OpenGraph public class OpenGraph
{ {
private String pageUrl; private String pageUrl;
private ArrayList<OpenGraphNamespace> pageNamespaces; private ArrayList<OpenGraphNamespace> pageNamespaces;
private Hashtable<String, ArrayList<MetaElement>> metaAttributes; private Hashtable<String, ArrayList<MetaElement>> metaAttributes;
private String baseType; private String baseType;
private boolean isImported; // determine if the object is a new incarnation or representation of a web page private boolean isImported; // determine if the object is a new incarnation or representation of a web page
private boolean hasChanged; // track if object has been changed private boolean hasChanged; // track if object has been changed
public final static String[] REQUIRED_META = new String[]{"title", "type", "image", "url" }; public final static String[] REQUIRED_META = new String[]{"title", "type", "image", "url" };
public final static Hashtable<String, String[]> BASE_TYPES = new Hashtable<String, String[]>(); public final static Hashtable<String, String[]> BASE_TYPES = new Hashtable<String, String[]>();
static static
{ {
BASE_TYPES.put("activity", new String[] {"activity", "sport"}); BASE_TYPES.put("activity", new String[] {"activity", "sport"});
BASE_TYPES.put("business", new String[] {"bar", "company", "cafe", "hotel", "restaurant"}); BASE_TYPES.put("business", new String[] {"bar", "company", "cafe", "hotel", "restaurant"});
BASE_TYPES.put("group", new String[] {"cause", "sports_league", "sports_team"}); BASE_TYPES.put("group", new String[] {"cause", "sports_league", "sports_team"});
BASE_TYPES.put("organization", new String[] {"band", "government", "non_profit", "school", "university"}); BASE_TYPES.put("organization", new String[] {"band", "government", "non_profit", "school", "university"});
BASE_TYPES.put("person", new String[] {"actor", "athlete", "author", "director", "musician", "politician", "profile", "public_figure"}); BASE_TYPES.put("person", new String[] {"actor", "athlete", "author", "director", "musician", "politician", "profile", "public_figure"});
BASE_TYPES.put("place", new String[] {"city", "country", "landmark", "state_province"}); BASE_TYPES.put("place", new String[] {"city", "country", "landmark", "state_province"});
BASE_TYPES.put("product", new String[] {"album", "book", "drink", "food", "game", "movie", "product", "song", "tv_show"}); BASE_TYPES.put("product", new String[] {"album", "book", "drink", "food", "game", "movie", "product", "song", "tv_show"});
BASE_TYPES.put("website", new String[] {"blog", "website", "article"}); BASE_TYPES.put("website", new String[] {"blog", "website", "article"});
} }
/** /**
* Create an open graph representation for generating your own Open Graph object * Create an open graph representation for generating your own Open Graph object
*/ */
public OpenGraph() public OpenGraph()
{ {
pageNamespaces = new ArrayList<OpenGraphNamespace>(); pageNamespaces = new ArrayList<OpenGraphNamespace>();
metaAttributes = new Hashtable<String, ArrayList<MetaElement>>(); metaAttributes = new Hashtable<String, ArrayList<MetaElement>>();
hasChanged = false; hasChanged = false;
isImported = false; isImported = false;
} }
/** /**
* Fetch the open graph representation from a web site * Fetch the open graph representation from a web site
* @param url The address to the web page to fetch Open Graph data * @param url The address to the web page to fetch Open Graph data
* @param ignoreSpecErrors Set this option to true if you don't wish to have an exception throw if the page does not conform to the basic 4 attributes * @param ignoreSpecErrors Set this option to true if you don't wish to have an exception throw if the page does not conform to the basic 4 attributes
* @throws java.io.IOException If a network error occurs, the HTML parser will throw an IO Exception * @throws java.io.IOException If a network error occurs, the HTML parser will throw an IO Exception
* @throws java.lang.Exception A generic exception is throw if the specific page fails to conform to the basic Open Graph standard as define by the constant REQUIRED_META * @throws java.lang.Exception A generic exception is throw if the specific page fails to conform to the basic Open Graph standard as define by the constant REQUIRED_META
*/ */
public OpenGraph(String url, boolean ignoreSpecErrors, URLConnection siteConnection) throws java.io.IOException, Exception { public OpenGraph(String url, boolean ignoreSpecErrors, URLConnection siteConnection) throws java.io.IOException, Exception {
this(); this();
isImported = true; isImported = true;
// download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content // download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content
Charset charset = getConnectionCharset(siteConnection); Charset charset = getConnectionCharset(siteConnection);
BufferedReader dis = new BufferedReader(new InputStreamReader(siteConnection.getInputStream(), charset)); BufferedReader dis = new BufferedReader(new InputStreamReader(siteConnection.getInputStream(), charset));
String inputLine; String inputLine;
StringBuffer headContents = new StringBuffer(); StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element // Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null) while ((inputLine = dis.readLine()) != null)
{ {
if (inputLine.contains("</head>")) if (inputLine.contains("</head>"))
{ {
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7); inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
inputLine = inputLine.concat("<body></body></html>"); inputLine = inputLine.concat("<body></body></html>");
headContents.append(inputLine + "\r\n"); headContents.append(inputLine + "\r\n");
break; break;
} }
headContents.append(inputLine + "\r\n"); headContents.append(inputLine + "\r\n");
} }
String headContentsStr = headContents.toString(); String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner(); HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML // parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr); TagNode pageData = cleaner.clean(headContentsStr);
// read in the declared namespaces // read in the declared namespaces
boolean hasOGspec = false; boolean hasOGspec = false;
@ -100,12 +101,12 @@ public class OpenGraph
Matcher matcher = pattern.matcher(namespaceData); Matcher matcher = pattern.matcher(namespaceData);
while (matcher.find()) while (matcher.find())
{ {
String prefix = matcher.group(2); String prefix = matcher.group(2);
String documentURI = matcher.group(3); String documentURI = matcher.group(3);
pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI)); pageNamespaces.add(new OpenGraphNamespace(prefix, documentURI));
if (prefix.equals("og")) if (prefix.equals("og"))
hasOGspec = true; hasOGspec = true;
} }
} }
// some pages do not include the new OG spec // some pages do not include the new OG spec
@ -113,17 +114,17 @@ public class OpenGraph
if (!hasOGspec) if (!hasOGspec)
pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#")); pageNamespaces.add(new OpenGraphNamespace("og", "http:// ogp.me/ns#"));
// open only the meta tags // open only the meta tags
TagNode[] metaData = pageData.getElementsByName("meta", true); TagNode[] metaData = pageData.getElementsByName("meta", true);
for (TagNode metaElement : metaData) for (TagNode metaElement : metaData)
{ {
for (OpenGraphNamespace namespace : pageNamespaces) for (OpenGraphNamespace namespace : pageNamespaces)
{ {
String target = null; String target = null;
if (metaElement.hasAttribute("property")) if (metaElement.hasAttribute("property"))
target = "property"; target = "property";
else if (metaElement.hasAttribute("name")) else if (metaElement.hasAttribute("name"))
target = "name"; target = "name";
if (target != null && metaElement.getAttributeByName(target).startsWith(namespace.getPrefix() + ":")) if (target != null && metaElement.getAttributeByName(target).startsWith(namespace.getPrefix() + ":"))
{ {
@ -131,134 +132,141 @@ public class OpenGraph
break; break;
} }
} }
} }
/** /**
* Check that page conforms to Open Graph protocol * Check that page conforms to Open Graph protocol
*/ */
if (!ignoreSpecErrors) if (!ignoreSpecErrors)
{ {
for (String req : REQUIRED_META) for (String req : REQUIRED_META)
{ {
if (!metaAttributes.containsKey(req)) if (!metaAttributes.containsKey(req))
throw new Exception("Does not conform to Open Graph protocol"); throw new Exception("Does not conform to Open Graph protocol");
} }
} }
/** /**
* Has conformed, now determine basic sub type. * Has conformed, now determine basic sub type.
*/ */
baseType = null; baseType = null;
String currentType = getContent("type"); String currentType = getContent("type");
// read the original page url // read the original page url
URL realURL = siteConnection.getURL(); URL realURL = siteConnection.getURL();
pageUrl = realURL.toExternalForm(); pageUrl = realURL.toExternalForm();
} }
/** /**
* Gets the charset for specified connection. * Gets the charset for specified connection.
* Content Type header is parsed to get the charset name. * Content Type header is parsed to get the charset name.
* *
* @param connection the connection. * @param connection the connection.
* @return the Charset object for response charset name; * @return the Charset object for response charset name;
* if it's not found then the default charset. * if it's not found then the default charset.
*/ */
private static Charset getConnectionCharset(URLConnection connection) public static Charset getConnectionCharset(URLConnection connection) {
{ String contentType = null;
String contentType = connection.getContentType(); try {
if (contentType != null && contentType.length() > 0) contentType = connection.getContentType();
{ }
contentType = contentType.toLowerCase(); catch (Exception e) {
String charsetName = extractCharsetName(contentType); // specified charset is not found,
if (charsetName != null && charsetName.length() > 0) // skip it to return the default one
{ return Charset.defaultCharset();
try }
{ if (contentType != null && contentType.length() > 0)
return Charset.forName(charsetName); {
} contentType = contentType.toLowerCase();
catch (Exception e) { String charsetName = extractCharsetName(contentType);
// specified charset is not found, if (charsetName != null && charsetName.length() > 0)
// skip it to return the default one {
} try
} {
} return Charset.forName(charsetName);
}
catch (Exception e) {
// specified charset is not found,
// skip it to return the default one
}
}
}
// return the default charset // return the default charset
return Charset.defaultCharset(); return Charset.defaultCharset();
} }
/** /**
* Extract the charset name form the content type string. * Extract the charset name form the content type string.
* Content type string is received from Content-Type header. * Content type string is received from Content-Type header.
* *
* @param contentType the content type string, must be not null. * @param contentType the content type string, must be not null.
* @return the found charset name or null if not found. * @return the found charset name or null if not found.
*/ */
private static String extractCharsetName(String contentType) private static String extractCharsetName(String contentType)
{
// split onto media types
final String[] mediaTypes = contentType.split(":");
if (mediaTypes.length > 0)
{
// use only the first one, and split it on parameters
final String[] params = mediaTypes[0].split(";");
// find the charset parameter and return it's value
for (String each : params)
{
each = each.trim();
if (each.startsWith("charset="))
{
// return the charset name
return each.substring(8).trim();
}
}
}
return null;
}
/**
* Get the basic type of the Open graph page as per the specification
* @return Base type as defined by specification, null otherwise
*/
public String getBaseType()
{ {
return baseType; // split onto media types
} final String[] mediaTypes = contentType.split(":");
if (mediaTypes.length > 0)
{
// use only the first one, and split it on parameters
final String[] params = mediaTypes[0].split(";");
/** // find the charset parameter and return it's value
* Get a value of a given Open Graph property for (String each : params)
* @param property The Open graph property key {
* @return Returns the value of the first property defined, null otherwise each = each.trim();
*/ if (each.startsWith("charset="))
public String getContent(String property) {
// return the charset name
return each.substring(8).trim();
}
}
}
return null;
}
/**
* Get the basic type of the Open graph page as per the specification
* @return Base type as defined by specification, null otherwise
*/
public String getBaseType()
{ {
if (metaAttributes.containsKey(property) && metaAttributes.get(property).size() > 0) return baseType;
}
/**
* Get a value of a given Open Graph property
* @param property The Open graph property key
* @return Returns the value of the first property defined, null otherwise
*/
public String getContent(String property)
{
if (metaAttributes.containsKey(property) && metaAttributes.get(property).size() > 0)
return metaAttributes.get(property).get(0).getContent(); return metaAttributes.get(property).get(0).getContent();
else else
return null; return null;
} }
/** /**
* Get all the defined properties of the Open Graph object * Get all the defined properties of the Open Graph object
* @return An array of all currently defined properties * @return An array of all currently defined properties
*/ */
public MetaElement[] getProperties() public MetaElement[] getProperties()
{ {
ArrayList<MetaElement> allElements = new ArrayList<MetaElement>(); ArrayList<MetaElement> allElements = new ArrayList<MetaElement>();
for (ArrayList<MetaElement> collection : metaAttributes.values()) for (ArrayList<MetaElement> collection : metaAttributes.values())
allElements.addAll(collection); allElements.addAll(collection);
return (MetaElement[]) allElements.toArray(new MetaElement[allElements.size()]); return (MetaElement[]) allElements.toArray(new MetaElement[allElements.size()]);
} }
/** /**
* Get all the defined properties of the Open Graph object * Get all the defined properties of the Open Graph object
* @param property The property to focus on * @param property The property to focus on
* @return An array of all currently defined properties * @return An array of all currently defined properties
*/ */
public MetaElement[] getProperties(String property) public MetaElement[] getProperties(String property)
{ {
if (metaAttributes.containsKey(property)) if (metaAttributes.containsKey(property))
{ {
@ -267,69 +275,69 @@ public class OpenGraph
} }
else else
return null; return null;
} }
/** /**
* Get the original URL the Open Graph page was obtained from * Get the original URL the Open Graph page was obtained from
* @return The address to the Open Graph object page * @return The address to the Open Graph object page
*/ */
public String getOriginalUrl() public String getOriginalUrl()
{ {
return pageUrl; return pageUrl;
} }
/** /**
* Get the HTML representation of the Open Graph data. * Get the HTML representation of the Open Graph data.
* @return An array of meta elements as Strings * @return An array of meta elements as Strings
*/ */
public String[] toHTML() public String[] toHTML()
{ {
// allocate the array // allocate the array
ArrayList<String> returnHTML = new ArrayList<String>(); ArrayList<String> returnHTML = new ArrayList<String>();
int index = 0; // keep track of the index to insert into int index = 0; // keep track of the index to insert into
for (ArrayList<MetaElement> elements : metaAttributes.values()) for (ArrayList<MetaElement> elements : metaAttributes.values())
{ {
for (MetaElement element : elements) for (MetaElement element : elements)
returnHTML.add("<meta property=\"" + element.getNamespace() + ":" + returnHTML.add("<meta property=\"" + element.getNamespace() + ":" +
element.getProperty() + "\" content=\"" + element.getContent() + "\" />"); element.getProperty() + "\" content=\"" + element.getContent() + "\" />");
} }
// return the array // return the array
return (String[]) returnHTML.toArray(); return (String[]) returnHTML.toArray();
} }
/** /**
* Get the XHTML representation of the Open Graph data. * Get the XHTML representation of the Open Graph data.
* @return An array of meta elements as Strings * @return An array of meta elements as Strings
*/ */
public String[] toXHTML() public String[] toXHTML()
{ {
// allocate the array // allocate the array
ArrayList<String> returnHTML = new ArrayList<String>(); ArrayList<String> returnHTML = new ArrayList<String>();
int index = 0; // keep track of the index to insert into int index = 0; // keep track of the index to insert into
for (ArrayList<MetaElement> elements : metaAttributes.values()) for (ArrayList<MetaElement> elements : metaAttributes.values())
{ {
for (MetaElement element : elements) for (MetaElement element : elements)
returnHTML.add("<meta name=\"" + element.getNamespace().getPrefix() + ":" + returnHTML.add("<meta name=\"" + element.getNamespace().getPrefix() + ":" +
element.getProperty() + "\" content=\"" + element.getContent() + "\" />"); element.getProperty() + "\" content=\"" + element.getContent() + "\" />");
} }
// return the array // return the array
return (String[]) returnHTML.toArray(); return (String[]) returnHTML.toArray();
} }
/** /**
* Set the Open Graph property to a specific value * Set the Open Graph property to a specific value
* @param namespace The OpenGraph namespace the content belongs to * @param namespace The OpenGraph namespace the content belongs to
* @param property The og:XXXX where XXXX is the property you wish to set * @param property The og:XXXX where XXXX is the property you wish to set
* @param content The value or contents of the property to be set * @param content The value or contents of the property to be set
*/ */
public void setProperty(OpenGraphNamespace namespace, String property, String content) public void setProperty(OpenGraphNamespace namespace, String property, String content)
{ {
if (!pageNamespaces.contains(namespace)) if (!pageNamespaces.contains(namespace))
pageNamespaces.add(namespace); pageNamespaces.add(namespace);
property = property.replaceAll(namespace.getPrefix() + ":", ""); property = property.replaceAll(namespace.getPrefix() + ":", "");
@ -338,41 +346,41 @@ public class OpenGraph
metaAttributes.put(property, new ArrayList<MetaElement>()); metaAttributes.put(property, new ArrayList<MetaElement>());
metaAttributes.get(property).add(element); metaAttributes.get(property).add(element);
} }
/** /**
* Removed a defined property * Removed a defined property
* @param property The og:XXXX where XXXX is the property you wish to remove * @param property The og:XXXX where XXXX is the property you wish to remove
*/ */
public void removeProperty(String property) public void removeProperty(String property)
{ {
metaAttributes.remove(property); metaAttributes.remove(property);
} }
/** /**
* Obtain the underlying HashTable * Obtain the underlying HashTable
* @return The underlying structure as a Hashtable * @return The underlying structure as a Hashtable
*/ */
public Hashtable<String, ArrayList<MetaElement>> exposeTable() { public Hashtable<String, ArrayList<MetaElement>> exposeTable() {
return metaAttributes; return metaAttributes;
} }
/** /**
* Test if the Open Graph object was initially a representation of a web page * Test if the Open Graph object was initially a representation of a web page
* @return True if the object is from a web page, false otherwise * @return True if the object is from a web page, false otherwise
*/ */
public boolean isFromWeb() public boolean isFromWeb()
{ {
return isImported; return isImported;
} }
/** /**
* Test if the object has been modified by setters/deleters. * Test if the object has been modified by setters/deleters.
* This is only relevant if this object initially represented a web page * This is only relevant if this object initially represented a web page
* @return True True if the object has been modified, false otherwise * @return True True if the object has been modified, false otherwise
*/ */
public boolean hasChanged() public boolean hasChanged()
{ {
return hasChanged; return hasChanged;
} }
} }